]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
3c7be0fc5dc6d8558f4cb3d27baceb855684cd73
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2531 };
2532
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2534 {
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2538 {}
2539
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2542 {
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2544 }
2545
2546 virtual unsigned int execute (function *)
2547 {
2548 return rest_of_handle_insert_vzeroupper ();
2549 }
2550
2551 }; // class pass_insert_vzeroupper
2552
2553 } // anon namespace
2554
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2557 {
2558 return new pass_insert_vzeroupper (ctxt);
2559 }
2560
2561 /* Return true if a red-zone is in use. */
2562
2563 static inline bool
2564 ix86_using_red_zone (void)
2565 {
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 }
2568 \f
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2571
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2576 {
2577 struct ix86_target_opts
2578 {
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2581 };
2582
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2586 {
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2602 { "-msse3", OPTION_MASK_ISA_SSE3 },
2603 { "-msse2", OPTION_MASK_ISA_SSE2 },
2604 { "-msse", OPTION_MASK_ISA_SSE },
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2607 { "-mmmx", OPTION_MASK_ISA_MMX },
2608 { "-mabm", OPTION_MASK_ISA_ABM },
2609 { "-mbmi", OPTION_MASK_ISA_BMI },
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2612 { "-mhle", OPTION_MASK_ISA_HLE },
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2616 { "-madx", OPTION_MASK_ISA_ADX },
2617 { "-mtbm", OPTION_MASK_ISA_TBM },
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2621 { "-maes", OPTION_MASK_ISA_AES },
2622 { "-msha", OPTION_MASK_ISA_SHA },
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2626 { "-mf16c", OPTION_MASK_ISA_F16C },
2627 { "-mrtm", OPTION_MASK_ISA_RTM },
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2634 };
2635
2636 /* Flag options. */
2637 static struct ix86_target_opts flag_opts[] =
2638 {
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2642 { "-m80387", MASK_80387 },
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2644 { "-malign-double", MASK_ALIGN_DOUBLE },
2645 { "-mcld", MASK_CLD },
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2647 { "-mieee-fp", MASK_IEEE_FP },
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2654 { "-mno-red-zone", MASK_NO_RED_ZONE },
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2656 { "-mrecip", MASK_RECIP },
2657 { "-mrtd", MASK_RTD },
2658 { "-msseregparm", MASK_SSEREGPARM },
2659 { "-mstack-arg-probe", MASK_STACK_PROBE },
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2663 { "-mvzeroupper", MASK_VZEROUPPER },
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128},
2667 };
2668
2669 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2670
2671 char isa_other[40];
2672 char target_other[40];
2673 unsigned num = 0;
2674 unsigned i, j;
2675 char *ret;
2676 char *ptr;
2677 size_t len;
2678 size_t line_len;
2679 size_t sep_len;
2680 const char *abi;
2681
2682 memset (opts, '\0', sizeof (opts));
2683
2684 /* Add -march= option. */
2685 if (arch)
2686 {
2687 opts[num][0] = "-march=";
2688 opts[num++][1] = arch;
2689 }
2690
2691 /* Add -mtune= option. */
2692 if (tune)
2693 {
2694 opts[num][0] = "-mtune=";
2695 opts[num++][1] = tune;
2696 }
2697
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2700 {
2701 if ((isa & OPTION_MASK_ABI_64) != 0)
2702 abi = "-m64";
2703 else
2704 abi = "-mx32";
2705 isa &= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32);
2708 }
2709 else
2710 abi = "-m32";
2711 opts[num++][0] = abi;
2712
2713 /* Pick out the options in isa options. */
2714 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2715 {
2716 if ((isa & isa_opts[i].mask) != 0)
2717 {
2718 opts[num++][0] = isa_opts[i].option;
2719 isa &= ~ isa_opts[i].mask;
2720 }
2721 }
2722
2723 if (isa && add_nl_p)
2724 {
2725 opts[num++][0] = isa_other;
2726 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2727 isa);
2728 }
2729
2730 /* Add flag options. */
2731 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2732 {
2733 if ((flags & flag_opts[i].mask) != 0)
2734 {
2735 opts[num++][0] = flag_opts[i].option;
2736 flags &= ~ flag_opts[i].mask;
2737 }
2738 }
2739
2740 if (flags && add_nl_p)
2741 {
2742 opts[num++][0] = target_other;
2743 sprintf (target_other, "(other flags: %#x)", flags);
2744 }
2745
2746 /* Add -fpmath= option. */
2747 if (fpmath)
2748 {
2749 opts[num][0] = "-mfpmath=";
2750 switch ((int) fpmath)
2751 {
2752 case FPMATH_387:
2753 opts[num++][1] = "387";
2754 break;
2755
2756 case FPMATH_SSE:
2757 opts[num++][1] = "sse";
2758 break;
2759
2760 case FPMATH_387 | FPMATH_SSE:
2761 opts[num++][1] = "sse+387";
2762 break;
2763
2764 default:
2765 gcc_unreachable ();
2766 }
2767 }
2768
2769 /* Any options? */
2770 if (num == 0)
2771 return NULL;
2772
2773 gcc_assert (num < ARRAY_SIZE (opts));
2774
2775 /* Size the string. */
2776 len = 0;
2777 sep_len = (add_nl_p) ? 3 : 1;
2778 for (i = 0; i < num; i++)
2779 {
2780 len += sep_len;
2781 for (j = 0; j < 2; j++)
2782 if (opts[i][j])
2783 len += strlen (opts[i][j]);
2784 }
2785
2786 /* Build the string. */
2787 ret = ptr = (char *) xmalloc (len);
2788 line_len = 0;
2789
2790 for (i = 0; i < num; i++)
2791 {
2792 size_t len2[2];
2793
2794 for (j = 0; j < 2; j++)
2795 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2796
2797 if (i != 0)
2798 {
2799 *ptr++ = ' ';
2800 line_len++;
2801
2802 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2803 {
2804 *ptr++ = '\\';
2805 *ptr++ = '\n';
2806 line_len = 0;
2807 }
2808 }
2809
2810 for (j = 0; j < 2; j++)
2811 if (opts[i][j])
2812 {
2813 memcpy (ptr, opts[i][j], len2[j]);
2814 ptr += len2[j];
2815 line_len += len2[j];
2816 }
2817 }
2818
2819 *ptr = '\0';
2820 gcc_assert (ret + len >= ptr);
2821
2822 return ret;
2823 }
2824
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2828 static bool
2829 ix86_profile_before_prologue (void)
2830 {
2831 return flag_fentry != 0;
2832 }
2833
2834 /* Function that is callable from the debugger to print the current
2835 options. */
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2838 {
2839 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2840 ix86_arch_string, ix86_tune_string,
2841 ix86_fpmath, true);
2842
2843 if (opts)
2844 {
2845 fprintf (stderr, "%s\n\n", opts);
2846 free (opts);
2847 }
2848 else
2849 fputs ("<no options>\n\n", stderr);
2850
2851 return;
2852 }
2853
2854 static const char *stringop_alg_names[] = {
2855 #define DEF_ENUM
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2858 #undef DEF_ENUM
2859 #undef DEF_ALG
2860 };
2861
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2864
2865 strategy_alg:max_size:[align|noalign]
2866
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2870
2871 Examples:
2872
2873 1.
2874 -mmemcpy-strategy=libcall:-1:noalign
2875
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2877
2878
2879 2.
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2881
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2886
2887 struct stringop_size_range
2888 {
2889 int max;
2890 stringop_alg alg;
2891 bool noalign;
2892 };
2893
2894 static void
2895 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2896 {
2897 const struct stringop_algs *default_algs;
2898 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2899 char *curr_range_str, *next_range_str;
2900 int i = 0, n = 0;
2901
2902 if (is_memset)
2903 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2904 else
2905 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2906
2907 curr_range_str = strategy_str;
2908
2909 do
2910 {
2911 int maxs;
2912 char alg_name[128];
2913 char align[16];
2914 next_range_str = strchr (curr_range_str, ',');
2915 if (next_range_str)
2916 *next_range_str++ = '\0';
2917
2918 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2919 alg_name, &maxs, align))
2920 {
2921 error ("wrong arg %s to option %s", curr_range_str,
2922 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 return;
2924 }
2925
2926 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2927 {
2928 error ("size ranges of option %s should be increasing",
2929 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 return;
2931 }
2932
2933 for (i = 0; i < last_alg; i++)
2934 if (!strcmp (alg_name, stringop_alg_names[i]))
2935 break;
2936
2937 if (i == last_alg)
2938 {
2939 error ("wrong stringop strategy name %s specified for option %s",
2940 alg_name,
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2943 }
2944
2945 input_ranges[n].max = maxs;
2946 input_ranges[n].alg = (stringop_alg) i;
2947 if (!strcmp (align, "align"))
2948 input_ranges[n].noalign = false;
2949 else if (!strcmp (align, "noalign"))
2950 input_ranges[n].noalign = true;
2951 else
2952 {
2953 error ("unknown alignment %s specified for option %s",
2954 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2955 return;
2956 }
2957 n++;
2958 curr_range_str = next_range_str;
2959 }
2960 while (curr_range_str);
2961
2962 if (input_ranges[n - 1].max != -1)
2963 {
2964 error ("the max value for the last size range should be -1"
2965 " for option %s",
2966 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2968 }
2969
2970 if (n > MAX_STRINGOP_ALGS)
2971 {
2972 error ("too many size ranges specified in option %s",
2973 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2974 return;
2975 }
2976
2977 /* Now override the default algs array. */
2978 for (i = 0; i < n; i++)
2979 {
2980 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2981 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2982 = input_ranges[i].alg;
2983 *const_cast<int *>(&default_algs->size[i].noalign)
2984 = input_ranges[i].noalign;
2985 }
2986 }
2987
2988 \f
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2991
2992 static void
2993 parse_mtune_ctrl_str (bool dump)
2994 {
2995 if (!ix86_tune_ctrl_string)
2996 return;
2997
2998 char *next_feature_string = NULL;
2999 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3000 char *orig = curr_feature_string;
3001 int i;
3002 do
3003 {
3004 bool clear = false;
3005
3006 next_feature_string = strchr (curr_feature_string, ',');
3007 if (next_feature_string)
3008 *next_feature_string++ = '\0';
3009 if (*curr_feature_string == '^')
3010 {
3011 curr_feature_string++;
3012 clear = true;
3013 }
3014 for (i = 0; i < X86_TUNE_LAST; i++)
3015 {
3016 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3017 {
3018 ix86_tune_features[i] = !clear;
3019 if (dump)
3020 fprintf (stderr, "Explicitly %s feature %s\n",
3021 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3022 break;
3023 }
3024 }
3025 if (i == X86_TUNE_LAST)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear ? curr_feature_string - 1 : curr_feature_string);
3028 curr_feature_string = next_feature_string;
3029 }
3030 while (curr_feature_string);
3031 free (orig);
3032 }
3033
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3035 processor type. */
3036
3037 static void
3038 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3039 {
3040 unsigned int ix86_tune_mask = 1u << ix86_tune;
3041 int i;
3042
3043 for (i = 0; i < X86_TUNE_LAST; ++i)
3044 {
3045 if (ix86_tune_no_default)
3046 ix86_tune_features[i] = 0;
3047 else
3048 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3049 }
3050
3051 if (dump)
3052 {
3053 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3054 for (i = 0; i < X86_TUNE_LAST; i++)
3055 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3056 ix86_tune_features[i] ? "on" : "off");
3057 }
3058
3059 parse_mtune_ctrl_str (dump);
3060 }
3061
3062
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3065 attributes. */
3066
3067 static void
3068 ix86_option_override_internal (bool main_args_p,
3069 struct gcc_options *opts,
3070 struct gcc_options *opts_set)
3071 {
3072 int i;
3073 unsigned int ix86_arch_mask;
3074 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3075 const char *prefix;
3076 const char *suffix;
3077 const char *sw;
3078
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3131
3132 #define PTA_CORE2 \
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3152
3153 /* if this reaches 64, need to widen struct pta flags below */
3154
3155 static struct pta
3156 {
3157 const char *const name; /* processor name or nickname. */
3158 const enum processor_type processor;
3159 const enum attr_cpu schedule;
3160 const unsigned HOST_WIDE_INT flags;
3161 }
3162 const processor_alias_table[] =
3163 {
3164 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3165 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3166 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3169 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3170 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3171 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3177 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_FXSR},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_FXSR},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3185 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3189 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3192 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3193 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3194 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_SANDYBRIDGE},
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_SANDYBRIDGE},
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_IVYBRIDGE},
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_IVYBRIDGE},
3204 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3205 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3207 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3208 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3210 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3212 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3215 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3216 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3219 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3221 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"x86-64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3229 {"k8", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3255 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3256 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3261 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3262 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3263 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3264 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3265 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3266 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3267 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3272 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3273 | PTA_XSAVEOPT | PTA_FSGSBASE},
3274 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3278 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3279 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3280 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3281 | PTA_MOVBE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3292
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3296 };
3297
3298 /* -mrecip options. */
3299 static struct
3300 {
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3303 }
3304 const recip_options[] =
3305 {
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3312 };
3313
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3315
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3319 {
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3323 }
3324 else
3325 {
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3329 }
3330
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3337 {
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3352 #endif
3353 }
3354 #endif
3355
3356 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3357 {
3358 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3359 OPTION_MASK_ABI_64 for TARGET_X32. */
3360 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3361 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3362 }
3363 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3364 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3365 | OPTION_MASK_ABI_X32
3366 | OPTION_MASK_ABI_64);
3367 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3368 {
3369 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3370 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3371 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3372 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3373 }
3374
3375 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3376 SUBTARGET_OVERRIDE_OPTIONS;
3377 #endif
3378
3379 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3380 SUBSUBTARGET_OVERRIDE_OPTIONS;
3381 #endif
3382
3383 /* -fPIC is the default for x86_64. */
3384 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_flag_pic = 2;
3386
3387 /* Need to check -mtune=generic first. */
3388 if (opts->x_ix86_tune_string)
3389 {
3390 /* As special support for cross compilers we read -mtune=native
3391 as -mtune=generic. With native compilers we won't see the
3392 -mtune=native, as it was changed by the driver. */
3393 if (!strcmp (opts->x_ix86_tune_string, "native"))
3394 {
3395 opts->x_ix86_tune_string = "generic";
3396 }
3397 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3398 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3399 "%stune=k8%s or %stune=generic%s instead as appropriate",
3400 prefix, suffix, prefix, suffix, prefix, suffix);
3401 }
3402 else
3403 {
3404 if (opts->x_ix86_arch_string)
3405 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3406 if (!opts->x_ix86_tune_string)
3407 {
3408 opts->x_ix86_tune_string
3409 = processor_target_table[TARGET_CPU_DEFAULT].name;
3410 ix86_tune_defaulted = 1;
3411 }
3412
3413 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3414 or defaulted. We need to use a sensible tune option. */
3415 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3416 {
3417 opts->x_ix86_tune_string = "generic";
3418 }
3419 }
3420
3421 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3422 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3423 {
3424 /* rep; movq isn't available in 32-bit code. */
3425 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3426 opts->x_ix86_stringop_alg = no_stringop;
3427 }
3428
3429 if (!opts->x_ix86_arch_string)
3430 opts->x_ix86_arch_string
3431 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3432 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3433 else
3434 ix86_arch_specified = 1;
3435
3436 if (opts_set->x_ix86_pmode)
3437 {
3438 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3439 && opts->x_ix86_pmode == PMODE_SI)
3440 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3441 && opts->x_ix86_pmode == PMODE_DI))
3442 error ("address mode %qs not supported in the %s bit mode",
3443 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3445 }
3446 else
3447 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3448 ? PMODE_DI : PMODE_SI;
3449
3450 if (!opts_set->x_ix86_abi)
3451 opts->x_ix86_abi = DEFAULT_ABI;
3452
3453 /* For targets using ms ABI enable ms-extensions, if not
3454 explicit turned off. For non-ms ABI we turn off this
3455 option. */
3456 if (!opts_set->x_flag_ms_extensions)
3457 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3458
3459 if (opts_set->x_ix86_cmodel)
3460 {
3461 switch (opts->x_ix86_cmodel)
3462 {
3463 case CM_SMALL:
3464 case CM_SMALL_PIC:
3465 if (opts->x_flag_pic)
3466 opts->x_ix86_cmodel = CM_SMALL_PIC;
3467 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3468 error ("code model %qs not supported in the %s bit mode",
3469 "small", "32");
3470 break;
3471
3472 case CM_MEDIUM:
3473 case CM_MEDIUM_PIC:
3474 if (opts->x_flag_pic)
3475 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3476 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in the %s bit mode",
3478 "medium", "32");
3479 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3480 error ("code model %qs not supported in x32 mode",
3481 "medium");
3482 break;
3483
3484 case CM_LARGE:
3485 case CM_LARGE_PIC:
3486 if (opts->x_flag_pic)
3487 opts->x_ix86_cmodel = CM_LARGE_PIC;
3488 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3489 error ("code model %qs not supported in the %s bit mode",
3490 "large", "32");
3491 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3492 error ("code model %qs not supported in x32 mode",
3493 "large");
3494 break;
3495
3496 case CM_32:
3497 if (opts->x_flag_pic)
3498 error ("code model %s does not support PIC mode", "32");
3499 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3500 error ("code model %qs not supported in the %s bit mode",
3501 "32", "64");
3502 break;
3503
3504 case CM_KERNEL:
3505 if (opts->x_flag_pic)
3506 {
3507 error ("code model %s does not support PIC mode", "kernel");
3508 opts->x_ix86_cmodel = CM_32;
3509 }
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "kernel", "32");
3513 break;
3514
3515 default:
3516 gcc_unreachable ();
3517 }
3518 }
3519 else
3520 {
3521 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3522 use of rip-relative addressing. This eliminates fixups that
3523 would otherwise be needed if this object is to be placed in a
3524 DLL, and is essentially just as efficient as direct addressing. */
3525 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3526 && (TARGET_RDOS || TARGET_PECOFF))
3527 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3528 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3529 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3530 else
3531 opts->x_ix86_cmodel = CM_32;
3532 }
3533 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3534 {
3535 error ("-masm=intel not supported in this configuration");
3536 opts->x_ix86_asm_dialect = ASM_ATT;
3537 }
3538 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3539 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3540 sorry ("%i-bit mode not compiled in",
3541 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3542
3543 for (i = 0; i < pta_size; i++)
3544 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3545 {
3546 ix86_schedule = processor_alias_table[i].schedule;
3547 ix86_arch = processor_alias_table[i].processor;
3548 /* Default cpu tuning to the architecture. */
3549 ix86_tune = ix86_arch;
3550
3551 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3552 && !(processor_alias_table[i].flags & PTA_64BIT))
3553 error ("CPU you selected does not support x86-64 "
3554 "instruction set");
3555
3556 if (processor_alias_table[i].flags & PTA_MMX
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3559 if (processor_alias_table[i].flags & PTA_3DNOW
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3562 if (processor_alias_table[i].flags & PTA_3DNOW_A
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3565 if (processor_alias_table[i].flags & PTA_SSE
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3568 if (processor_alias_table[i].flags & PTA_SSE2
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3571 if (processor_alias_table[i].flags & PTA_SSE3
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3574 if (processor_alias_table[i].flags & PTA_SSSE3
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3577 if (processor_alias_table[i].flags & PTA_SSE4_1
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3580 if (processor_alias_table[i].flags & PTA_SSE4_2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3583 if (processor_alias_table[i].flags & PTA_AVX
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3586 if (processor_alias_table[i].flags & PTA_AVX2
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3589 if (processor_alias_table[i].flags & PTA_FMA
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3592 if (processor_alias_table[i].flags & PTA_SSE4A
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3595 if (processor_alias_table[i].flags & PTA_FMA4
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3598 if (processor_alias_table[i].flags & PTA_XOP
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3601 if (processor_alias_table[i].flags & PTA_LWP
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3604 if (processor_alias_table[i].flags & PTA_ABM
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3607 if (processor_alias_table[i].flags & PTA_BMI
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3610 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3613 if (processor_alias_table[i].flags & PTA_TBM
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3616 if (processor_alias_table[i].flags & PTA_BMI2
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3619 if (processor_alias_table[i].flags & PTA_CX16
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3622 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3625 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3626 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3629 if (processor_alias_table[i].flags & PTA_MOVBE
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3632 if (processor_alias_table[i].flags & PTA_AES
3633 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3634 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3635 if (processor_alias_table[i].flags & PTA_SHA
3636 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3637 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3638 if (processor_alias_table[i].flags & PTA_PCLMUL
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3641 if (processor_alias_table[i].flags & PTA_FSGSBASE
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3644 if (processor_alias_table[i].flags & PTA_RDRND
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3647 if (processor_alias_table[i].flags & PTA_F16C
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3650 if (processor_alias_table[i].flags & PTA_RTM
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3653 if (processor_alias_table[i].flags & PTA_HLE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3656 if (processor_alias_table[i].flags & PTA_PRFCHW
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3659 if (processor_alias_table[i].flags & PTA_RDSEED
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3662 if (processor_alias_table[i].flags & PTA_ADX
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3665 if (processor_alias_table[i].flags & PTA_FXSR
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3668 if (processor_alias_table[i].flags & PTA_XSAVE
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3671 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3674 if (processor_alias_table[i].flags & PTA_AVX512F
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3677 if (processor_alias_table[i].flags & PTA_AVX512ER
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3680 if (processor_alias_table[i].flags & PTA_AVX512PF
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3683 if (processor_alias_table[i].flags & PTA_AVX512CD
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3686 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3689 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3692 if (processor_alias_table[i].flags & PTA_XSAVEC
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3695 if (processor_alias_table[i].flags & PTA_XSAVES
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3698 if (processor_alias_table[i].flags & PTA_AVX512DQ
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3701 if (processor_alias_table[i].flags & PTA_AVX512BW
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3704 if (processor_alias_table[i].flags & PTA_AVX512VL
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3707 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3708 x86_prefetch_sse = true;
3709
3710 break;
3711 }
3712
3713 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3714 error ("generic CPU can be used only for %stune=%s %s",
3715 prefix, suffix, sw);
3716 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3717 error ("intel CPU can be used only for %stune=%s %s",
3718 prefix, suffix, sw);
3719 else if (i == pta_size)
3720 error ("bad value (%s) for %sarch=%s %s",
3721 opts->x_ix86_arch_string, prefix, suffix, sw);
3722
3723 ix86_arch_mask = 1u << ix86_arch;
3724 for (i = 0; i < X86_ARCH_LAST; ++i)
3725 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3726
3727 for (i = 0; i < pta_size; i++)
3728 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3729 {
3730 ix86_schedule = processor_alias_table[i].schedule;
3731 ix86_tune = processor_alias_table[i].processor;
3732 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3733 {
3734 if (!(processor_alias_table[i].flags & PTA_64BIT))
3735 {
3736 if (ix86_tune_defaulted)
3737 {
3738 opts->x_ix86_tune_string = "x86-64";
3739 for (i = 0; i < pta_size; i++)
3740 if (! strcmp (opts->x_ix86_tune_string,
3741 processor_alias_table[i].name))
3742 break;
3743 ix86_schedule = processor_alias_table[i].schedule;
3744 ix86_tune = processor_alias_table[i].processor;
3745 }
3746 else
3747 error ("CPU you selected does not support x86-64 "
3748 "instruction set");
3749 }
3750 }
3751 /* Intel CPUs have always interpreted SSE prefetch instructions as
3752 NOPs; so, we can enable SSE prefetch instructions even when
3753 -mtune (rather than -march) points us to a processor that has them.
3754 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3755 higher processors. */
3756 if (TARGET_CMOV
3757 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3758 x86_prefetch_sse = true;
3759 break;
3760 }
3761
3762 if (ix86_tune_specified && i == pta_size)
3763 error ("bad value (%s) for %stune=%s %s",
3764 opts->x_ix86_tune_string, prefix, suffix, sw);
3765
3766 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3767
3768 #ifndef USE_IX86_FRAME_POINTER
3769 #define USE_IX86_FRAME_POINTER 0
3770 #endif
3771
3772 #ifndef USE_X86_64_FRAME_POINTER
3773 #define USE_X86_64_FRAME_POINTER 0
3774 #endif
3775
3776 /* Set the default values for switches whose default depends on TARGET_64BIT
3777 in case they weren't overwritten by command line options. */
3778 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3779 {
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3782 if (opts->x_flag_asynchronous_unwind_tables
3783 && !opts_set->x_flag_unwind_tables
3784 && TARGET_64BIT_MS_ABI)
3785 opts->x_flag_unwind_tables = 1;
3786 if (opts->x_flag_asynchronous_unwind_tables == 2)
3787 opts->x_flag_unwind_tables
3788 = opts->x_flag_asynchronous_unwind_tables = 1;
3789 if (opts->x_flag_pcc_struct_return == 2)
3790 opts->x_flag_pcc_struct_return = 0;
3791 }
3792 else
3793 {
3794 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3795 opts->x_flag_omit_frame_pointer
3796 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3797 if (opts->x_flag_asynchronous_unwind_tables == 2)
3798 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3799 if (opts->x_flag_pcc_struct_return == 2)
3800 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3801 }
3802
3803 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3804 if (opts->x_optimize_size)
3805 ix86_cost = &ix86_size_cost;
3806 else
3807 ix86_cost = ix86_tune_cost;
3808
3809 /* Arrange to set up i386_stack_locals for all functions. */
3810 init_machine_status = ix86_init_machine_status;
3811
3812 /* Validate -mregparm= value. */
3813 if (opts_set->x_ix86_regparm)
3814 {
3815 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 warning (0, "-mregparm is ignored in 64-bit mode");
3817 if (opts->x_ix86_regparm > REGPARM_MAX)
3818 {
3819 error ("-mregparm=%d is not between 0 and %d",
3820 opts->x_ix86_regparm, REGPARM_MAX);
3821 opts->x_ix86_regparm = 0;
3822 }
3823 }
3824 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3825 opts->x_ix86_regparm = REGPARM_MAX;
3826
3827 /* Default align_* from the processor table. */
3828 if (opts->x_align_loops == 0)
3829 {
3830 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3831 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3832 }
3833 if (opts->x_align_jumps == 0)
3834 {
3835 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3836 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3837 }
3838 if (opts->x_align_functions == 0)
3839 {
3840 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3841 }
3842
3843 /* Provide default for -mbranch-cost= value. */
3844 if (!opts_set->x_ix86_branch_cost)
3845 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3846
3847 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3848 {
3849 opts->x_target_flags
3850 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3851
3852 /* Enable by default the SSE and MMX builtins. Do allow the user to
3853 explicitly disable any of these. In particular, disabling SSE and
3854 MMX for kernel code is extremely useful. */
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3858 | TARGET_SUBTARGET64_ISA_DEFAULT)
3859 & ~opts->x_ix86_isa_flags_explicit);
3860
3861 if (TARGET_RTD_P (opts->x_target_flags))
3862 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3863 }
3864 else
3865 {
3866 opts->x_target_flags
3867 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3868
3869 if (!ix86_arch_specified)
3870 opts->x_ix86_isa_flags
3871 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3872
3873 /* i386 ABI does not specify red zone. It still makes sense to use it
3874 when programmer takes care to stack from being destroyed. */
3875 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3876 opts->x_target_flags |= MASK_NO_RED_ZONE;
3877 }
3878
3879 /* Keep nonleaf frame pointers. */
3880 if (opts->x_flag_omit_frame_pointer)
3881 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3882 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3883 opts->x_flag_omit_frame_pointer = 1;
3884
3885 /* If we're doing fast math, we don't care about comparison order
3886 wrt NaNs. This lets us use a shorter comparison sequence. */
3887 if (opts->x_flag_finite_math_only)
3888 opts->x_target_flags &= ~MASK_IEEE_FP;
3889
3890 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3891 since the insns won't need emulation. */
3892 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3893 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3894
3895 /* Likewise, if the target doesn't have a 387, or we've specified
3896 software floating point, don't use 387 inline intrinsics. */
3897 if (!TARGET_80387_P (opts->x_target_flags))
3898 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3899
3900 /* Turn on MMX builtins for -msse. */
3901 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3902 opts->x_ix86_isa_flags
3903 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3904
3905 /* Enable SSE prefetch. */
3906 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3907 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3908 x86_prefetch_sse = true;
3909
3910 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3911 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3912 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3913 opts->x_ix86_isa_flags
3914 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3915
3916 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3917 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3918 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3919 opts->x_ix86_isa_flags
3920 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3921
3922 /* Enable lzcnt instruction for -mabm. */
3923 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3924 opts->x_ix86_isa_flags
3925 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3926
3927 /* Validate -mpreferred-stack-boundary= value or default it to
3928 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3929 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3930 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3931 {
3932 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3933 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3934 int max = (TARGET_SEH ? 4 : 12);
3935
3936 if (opts->x_ix86_preferred_stack_boundary_arg < min
3937 || opts->x_ix86_preferred_stack_boundary_arg > max)
3938 {
3939 if (min == max)
3940 error ("-mpreferred-stack-boundary is not supported "
3941 "for this target");
3942 else
3943 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3944 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3945 }
3946 else
3947 ix86_preferred_stack_boundary
3948 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3949 }
3950
3951 /* Set the default value for -mstackrealign. */
3952 if (opts->x_ix86_force_align_arg_pointer == -1)
3953 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3954
3955 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3956
3957 /* Validate -mincoming-stack-boundary= value or default it to
3958 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3959 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3960 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3961 {
3962 if (opts->x_ix86_incoming_stack_boundary_arg
3963 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3964 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3965 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3966 opts->x_ix86_incoming_stack_boundary_arg,
3967 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3968 else
3969 {
3970 ix86_user_incoming_stack_boundary
3971 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3972 ix86_incoming_stack_boundary
3973 = ix86_user_incoming_stack_boundary;
3974 }
3975 }
3976
3977 /* Accept -msseregparm only if at least SSE support is enabled. */
3978 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3979 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3981
3982 if (opts_set->x_ix86_fpmath)
3983 {
3984 if (opts->x_ix86_fpmath & FPMATH_SSE)
3985 {
3986 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3987 {
3988 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3989 opts->x_ix86_fpmath = FPMATH_387;
3990 }
3991 else if ((opts->x_ix86_fpmath & FPMATH_387)
3992 && !TARGET_80387_P (opts->x_target_flags))
3993 {
3994 warning (0, "387 instruction set disabled, using SSE arithmetics");
3995 opts->x_ix86_fpmath = FPMATH_SSE;
3996 }
3997 }
3998 }
3999 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4000 fpmath=387. The second is however default at many targets since the
4001 extra 80bit precision of temporaries is considered to be part of ABI.
4002 Overwrite the default at least for -ffast-math.
4003 TODO: -mfpmath=both seems to produce same performing code with bit
4004 smaller binaries. It is however not clear if register allocation is
4005 ready for this setting.
4006 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4007 codegen. We may switch to 387 with -ffast-math for size optimized
4008 functions. */
4009 else if (fast_math_flags_set_p (&global_options)
4010 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4011 opts->x_ix86_fpmath = FPMATH_SSE;
4012 else
4013 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4014
4015 /* If the i387 is disabled, then do not return values in it. */
4016 if (!TARGET_80387_P (opts->x_target_flags))
4017 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4018
4019 /* Use external vectorized library in vectorizing intrinsics. */
4020 if (opts_set->x_ix86_veclibabi_type)
4021 switch (opts->x_ix86_veclibabi_type)
4022 {
4023 case ix86_veclibabi_type_svml:
4024 ix86_veclib_handler = ix86_veclibabi_svml;
4025 break;
4026
4027 case ix86_veclibabi_type_acml:
4028 ix86_veclib_handler = ix86_veclibabi_acml;
4029 break;
4030
4031 default:
4032 gcc_unreachable ();
4033 }
4034
4035 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4036 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4037 && !opts->x_optimize_size)
4038 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4039
4040 /* If stack probes are required, the space used for large function
4041 arguments on the stack must also be probed, so enable
4042 -maccumulate-outgoing-args so this happens in the prologue. */
4043 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4044 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4045 {
4046 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4047 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4048 "for correctness", prefix, suffix);
4049 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4050 }
4051
4052 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4053 {
4054 char *p;
4055 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4056 p = strchr (internal_label_prefix, 'X');
4057 internal_label_prefix_len = p - internal_label_prefix;
4058 *p = '\0';
4059 }
4060
4061 /* When scheduling description is not available, disable scheduler pass
4062 so it won't slow down the compilation and make x87 code slower. */
4063 if (!TARGET_SCHEDULE)
4064 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4065
4066 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4067 ix86_tune_cost->simultaneous_prefetches,
4068 opts->x_param_values,
4069 opts_set->x_param_values);
4070 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4071 ix86_tune_cost->prefetch_block,
4072 opts->x_param_values,
4073 opts_set->x_param_values);
4074 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4075 ix86_tune_cost->l1_cache_size,
4076 opts->x_param_values,
4077 opts_set->x_param_values);
4078 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4079 ix86_tune_cost->l2_cache_size,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4082
4083 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4084 if (opts->x_flag_prefetch_loop_arrays < 0
4085 && HAVE_prefetch
4086 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4087 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4088 opts->x_flag_prefetch_loop_arrays = 1;
4089
4090 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4091 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4092 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4093 targetm.expand_builtin_va_start = NULL;
4094
4095 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4096 {
4097 ix86_gen_leave = gen_leave_rex64;
4098 if (Pmode == DImode)
4099 {
4100 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4101 ix86_gen_tls_local_dynamic_base_64
4102 = gen_tls_local_dynamic_base_64_di;
4103 }
4104 else
4105 {
4106 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4107 ix86_gen_tls_local_dynamic_base_64
4108 = gen_tls_local_dynamic_base_64_si;
4109 }
4110 }
4111 else
4112 ix86_gen_leave = gen_leave;
4113
4114 if (Pmode == DImode)
4115 {
4116 ix86_gen_add3 = gen_adddi3;
4117 ix86_gen_sub3 = gen_subdi3;
4118 ix86_gen_sub3_carry = gen_subdi3_carry;
4119 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4120 ix86_gen_andsp = gen_anddi3;
4121 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4122 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4123 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4124 ix86_gen_monitor = gen_sse3_monitor_di;
4125 }
4126 else
4127 {
4128 ix86_gen_add3 = gen_addsi3;
4129 ix86_gen_sub3 = gen_subsi3;
4130 ix86_gen_sub3_carry = gen_subsi3_carry;
4131 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4132 ix86_gen_andsp = gen_andsi3;
4133 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4134 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4135 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4136 ix86_gen_monitor = gen_sse3_monitor_si;
4137 }
4138
4139 #ifdef USE_IX86_CLD
4140 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4141 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4142 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4143 #endif
4144
4145 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4146 {
4147 if (opts->x_flag_fentry > 0)
4148 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4149 "with -fpic");
4150 opts->x_flag_fentry = 0;
4151 }
4152 else if (TARGET_SEH)
4153 {
4154 if (opts->x_flag_fentry == 0)
4155 sorry ("-mno-fentry isn%'t compatible with SEH");
4156 opts->x_flag_fentry = 1;
4157 }
4158 else if (opts->x_flag_fentry < 0)
4159 {
4160 #if defined(PROFILE_BEFORE_PROLOGUE)
4161 opts->x_flag_fentry = 1;
4162 #else
4163 opts->x_flag_fentry = 0;
4164 #endif
4165 }
4166
4167 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4168 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4169 AVX unaligned load/store. */
4170 if (!opts->x_optimize_size)
4171 {
4172 if (flag_expensive_optimizations
4173 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4174 opts->x_target_flags |= MASK_VZEROUPPER;
4175 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4176 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4177 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4178 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4179 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4180 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4181 /* Enable 128-bit AVX instruction generation
4182 for the auto-vectorizer. */
4183 if (TARGET_AVX128_OPTIMAL
4184 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4185 opts->x_target_flags |= MASK_PREFER_AVX128;
4186 }
4187
4188 if (opts->x_ix86_recip_name)
4189 {
4190 char *p = ASTRDUP (opts->x_ix86_recip_name);
4191 char *q;
4192 unsigned int mask, i;
4193 bool invert;
4194
4195 while ((q = strtok (p, ",")) != NULL)
4196 {
4197 p = NULL;
4198 if (*q == '!')
4199 {
4200 invert = true;
4201 q++;
4202 }
4203 else
4204 invert = false;
4205
4206 if (!strcmp (q, "default"))
4207 mask = RECIP_MASK_ALL;
4208 else
4209 {
4210 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4211 if (!strcmp (q, recip_options[i].string))
4212 {
4213 mask = recip_options[i].mask;
4214 break;
4215 }
4216
4217 if (i == ARRAY_SIZE (recip_options))
4218 {
4219 error ("unknown option for -mrecip=%s", q);
4220 invert = false;
4221 mask = RECIP_MASK_NONE;
4222 }
4223 }
4224
4225 opts->x_recip_mask_explicit |= mask;
4226 if (invert)
4227 opts->x_recip_mask &= ~mask;
4228 else
4229 opts->x_recip_mask |= mask;
4230 }
4231 }
4232
4233 if (TARGET_RECIP_P (opts->x_target_flags))
4234 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4235 else if (opts_set->x_target_flags & MASK_RECIP)
4236 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4237
4238 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4239 for 64-bit Bionic. */
4240 if (TARGET_HAS_BIONIC
4241 && !(opts_set->x_target_flags
4242 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4243 opts->x_target_flags |= (TARGET_64BIT
4244 ? MASK_LONG_DOUBLE_128
4245 : MASK_LONG_DOUBLE_64);
4246
4247 /* Only one of them can be active. */
4248 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4249 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4250
4251 /* Save the initial options in case the user does function specific
4252 options. */
4253 if (main_args_p)
4254 target_option_default_node = target_option_current_node
4255 = build_target_option_node (opts);
4256
4257 /* Handle stack protector */
4258 if (!opts_set->x_ix86_stack_protector_guard)
4259 opts->x_ix86_stack_protector_guard
4260 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4261
4262 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4263 if (opts->x_ix86_tune_memcpy_strategy)
4264 {
4265 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4266 ix86_parse_stringop_strategy_string (str, false);
4267 free (str);
4268 }
4269
4270 if (opts->x_ix86_tune_memset_strategy)
4271 {
4272 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4273 ix86_parse_stringop_strategy_string (str, true);
4274 free (str);
4275 }
4276 }
4277
4278 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4279
4280 static void
4281 ix86_option_override (void)
4282 {
4283 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4284 static struct register_pass_info insert_vzeroupper_info
4285 = { pass_insert_vzeroupper, "reload",
4286 1, PASS_POS_INSERT_AFTER
4287 };
4288
4289 ix86_option_override_internal (true, &global_options, &global_options_set);
4290
4291
4292 /* This needs to be done at start up. It's convenient to do it here. */
4293 register_pass (&insert_vzeroupper_info);
4294 }
4295
4296 /* Update register usage after having seen the compiler flags. */
4297
4298 static void
4299 ix86_conditional_register_usage (void)
4300 {
4301 int i, c_mask;
4302 unsigned int j;
4303
4304 /* The PIC register, if it exists, is fixed. */
4305 j = PIC_OFFSET_TABLE_REGNUM;
4306 if (j != INVALID_REGNUM)
4307 fixed_regs[j] = call_used_regs[j] = 1;
4308
4309 /* For 32-bit targets, squash the REX registers. */
4310 if (! TARGET_64BIT)
4311 {
4312 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4313 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4315 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4316 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4317 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4318 }
4319
4320 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4321 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4322 : TARGET_64BIT ? (1 << 2)
4323 : (1 << 1));
4324
4325 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4326
4327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4328 {
4329 /* Set/reset conditionally defined registers from
4330 CALL_USED_REGISTERS initializer. */
4331 if (call_used_regs[i] > 1)
4332 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4333
4334 /* Calculate registers of CLOBBERED_REGS register set
4335 as call used registers from GENERAL_REGS register set. */
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4337 && call_used_regs[i])
4338 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4339 }
4340
4341 /* If MMX is disabled, squash the registers. */
4342 if (! TARGET_MMX)
4343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4344 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4345 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4346
4347 /* If SSE is disabled, squash the registers. */
4348 if (! TARGET_SSE)
4349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4350 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4352
4353 /* If the FPU is disabled, squash the registers. */
4354 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4356 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4357 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4358
4359 /* If AVX512F is disabled, squash the registers. */
4360 if (! TARGET_AVX512F)
4361 {
4362 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4363 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4364
4365 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4366 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4367 }
4368 }
4369
4370 \f
4371 /* Save the current options */
4372
4373 static void
4374 ix86_function_specific_save (struct cl_target_option *ptr,
4375 struct gcc_options *opts)
4376 {
4377 ptr->arch = ix86_arch;
4378 ptr->schedule = ix86_schedule;
4379 ptr->tune = ix86_tune;
4380 ptr->branch_cost = ix86_branch_cost;
4381 ptr->tune_defaulted = ix86_tune_defaulted;
4382 ptr->arch_specified = ix86_arch_specified;
4383 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4384 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4385 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4386 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4387 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4388 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4389 ptr->x_ix86_abi = opts->x_ix86_abi;
4390 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4391 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4392 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4393 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4394 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4395 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4396 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4397 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4398 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4399 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4400 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4401 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4402 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4403 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4404 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4405 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4406 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4407 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4408 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4409 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4410
4411 /* The fields are char but the variables are not; make sure the
4412 values fit in the fields. */
4413 gcc_assert (ptr->arch == ix86_arch);
4414 gcc_assert (ptr->schedule == ix86_schedule);
4415 gcc_assert (ptr->tune == ix86_tune);
4416 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4417 }
4418
4419 /* Restore the current options */
4420
4421 static void
4422 ix86_function_specific_restore (struct gcc_options *opts,
4423 struct cl_target_option *ptr)
4424 {
4425 enum processor_type old_tune = ix86_tune;
4426 enum processor_type old_arch = ix86_arch;
4427 unsigned int ix86_arch_mask;
4428 int i;
4429
4430 /* We don't change -fPIC. */
4431 opts->x_flag_pic = flag_pic;
4432
4433 ix86_arch = (enum processor_type) ptr->arch;
4434 ix86_schedule = (enum attr_cpu) ptr->schedule;
4435 ix86_tune = (enum processor_type) ptr->tune;
4436 opts->x_ix86_branch_cost = ptr->branch_cost;
4437 ix86_tune_defaulted = ptr->tune_defaulted;
4438 ix86_arch_specified = ptr->arch_specified;
4439 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4440 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4441 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4442 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4443 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4444 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4445 opts->x_ix86_abi = ptr->x_ix86_abi;
4446 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4447 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4448 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4449 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4450 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4451 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4452 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4453 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4454 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4455 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4456 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4457 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4458 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4459 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4460 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4461 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4462 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4463 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4464 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4465 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4466
4467 /* Recreate the arch feature tests if the arch changed */
4468 if (old_arch != ix86_arch)
4469 {
4470 ix86_arch_mask = 1u << ix86_arch;
4471 for (i = 0; i < X86_ARCH_LAST; ++i)
4472 ix86_arch_features[i]
4473 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4474 }
4475
4476 /* Recreate the tune optimization tests */
4477 if (old_tune != ix86_tune)
4478 set_ix86_tune_features (ix86_tune, false);
4479 }
4480
4481 /* Print the current options */
4482
4483 static void
4484 ix86_function_specific_print (FILE *file, int indent,
4485 struct cl_target_option *ptr)
4486 {
4487 char *target_string
4488 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4489 NULL, NULL, ptr->x_ix86_fpmath, false);
4490
4491 gcc_assert (ptr->arch < PROCESSOR_max);
4492 fprintf (file, "%*sarch = %d (%s)\n",
4493 indent, "",
4494 ptr->arch, processor_target_table[ptr->arch].name);
4495
4496 gcc_assert (ptr->tune < PROCESSOR_max);
4497 fprintf (file, "%*stune = %d (%s)\n",
4498 indent, "",
4499 ptr->tune, processor_target_table[ptr->tune].name);
4500
4501 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4502
4503 if (target_string)
4504 {
4505 fprintf (file, "%*s%s\n", indent, "", target_string);
4506 free (target_string);
4507 }
4508 }
4509
4510 \f
4511 /* Inner function to process the attribute((target(...))), take an argument and
4512 set the current options from the argument. If we have a list, recursively go
4513 over the list. */
4514
4515 static bool
4516 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4517 struct gcc_options *opts,
4518 struct gcc_options *opts_set,
4519 struct gcc_options *enum_opts_set)
4520 {
4521 char *next_optstr;
4522 bool ret = true;
4523
4524 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4525 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4526 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4527 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4528 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4529
4530 enum ix86_opt_type
4531 {
4532 ix86_opt_unknown,
4533 ix86_opt_yes,
4534 ix86_opt_no,
4535 ix86_opt_str,
4536 ix86_opt_enum,
4537 ix86_opt_isa
4538 };
4539
4540 static const struct
4541 {
4542 const char *string;
4543 size_t len;
4544 enum ix86_opt_type type;
4545 int opt;
4546 int mask;
4547 } attrs[] = {
4548 /* isa options */
4549 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4550 IX86_ATTR_ISA ("abm", OPT_mabm),
4551 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4552 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4553 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4554 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4555 IX86_ATTR_ISA ("aes", OPT_maes),
4556 IX86_ATTR_ISA ("sha", OPT_msha),
4557 IX86_ATTR_ISA ("avx", OPT_mavx),
4558 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4559 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4560 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4561 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4562 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4563 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4564 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4565 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4566 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4567 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4568 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4569 IX86_ATTR_ISA ("sse", OPT_msse),
4570 IX86_ATTR_ISA ("sse2", OPT_msse2),
4571 IX86_ATTR_ISA ("sse3", OPT_msse3),
4572 IX86_ATTR_ISA ("sse4", OPT_msse4),
4573 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4574 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4575 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4576 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4577 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4578 IX86_ATTR_ISA ("fma", OPT_mfma),
4579 IX86_ATTR_ISA ("xop", OPT_mxop),
4580 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4581 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4582 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4583 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4584 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4585 IX86_ATTR_ISA ("hle", OPT_mhle),
4586 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4587 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4588 IX86_ATTR_ISA ("adx", OPT_madx),
4589 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4590 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4591 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4592 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4593 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4594 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4595 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4596
4597 /* enum options */
4598 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4599
4600 /* string options */
4601 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4602 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4603
4604 /* flag options */
4605 IX86_ATTR_YES ("cld",
4606 OPT_mcld,
4607 MASK_CLD),
4608
4609 IX86_ATTR_NO ("fancy-math-387",
4610 OPT_mfancy_math_387,
4611 MASK_NO_FANCY_MATH_387),
4612
4613 IX86_ATTR_YES ("ieee-fp",
4614 OPT_mieee_fp,
4615 MASK_IEEE_FP),
4616
4617 IX86_ATTR_YES ("inline-all-stringops",
4618 OPT_minline_all_stringops,
4619 MASK_INLINE_ALL_STRINGOPS),
4620
4621 IX86_ATTR_YES ("inline-stringops-dynamically",
4622 OPT_minline_stringops_dynamically,
4623 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4624
4625 IX86_ATTR_NO ("align-stringops",
4626 OPT_mno_align_stringops,
4627 MASK_NO_ALIGN_STRINGOPS),
4628
4629 IX86_ATTR_YES ("recip",
4630 OPT_mrecip,
4631 MASK_RECIP),
4632
4633 };
4634
4635 /* If this is a list, recurse to get the options. */
4636 if (TREE_CODE (args) == TREE_LIST)
4637 {
4638 bool ret = true;
4639
4640 for (; args; args = TREE_CHAIN (args))
4641 if (TREE_VALUE (args)
4642 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4643 p_strings, opts, opts_set,
4644 enum_opts_set))
4645 ret = false;
4646
4647 return ret;
4648 }
4649
4650 else if (TREE_CODE (args) != STRING_CST)
4651 {
4652 error ("attribute %<target%> argument not a string");
4653 return false;
4654 }
4655
4656 /* Handle multiple arguments separated by commas. */
4657 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4658
4659 while (next_optstr && *next_optstr != '\0')
4660 {
4661 char *p = next_optstr;
4662 char *orig_p = p;
4663 char *comma = strchr (next_optstr, ',');
4664 const char *opt_string;
4665 size_t len, opt_len;
4666 int opt;
4667 bool opt_set_p;
4668 char ch;
4669 unsigned i;
4670 enum ix86_opt_type type = ix86_opt_unknown;
4671 int mask = 0;
4672
4673 if (comma)
4674 {
4675 *comma = '\0';
4676 len = comma - next_optstr;
4677 next_optstr = comma + 1;
4678 }
4679 else
4680 {
4681 len = strlen (p);
4682 next_optstr = NULL;
4683 }
4684
4685 /* Recognize no-xxx. */
4686 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4687 {
4688 opt_set_p = false;
4689 p += 3;
4690 len -= 3;
4691 }
4692 else
4693 opt_set_p = true;
4694
4695 /* Find the option. */
4696 ch = *p;
4697 opt = N_OPTS;
4698 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4699 {
4700 type = attrs[i].type;
4701 opt_len = attrs[i].len;
4702 if (ch == attrs[i].string[0]
4703 && ((type != ix86_opt_str && type != ix86_opt_enum)
4704 ? len == opt_len
4705 : len > opt_len)
4706 && memcmp (p, attrs[i].string, opt_len) == 0)
4707 {
4708 opt = attrs[i].opt;
4709 mask = attrs[i].mask;
4710 opt_string = attrs[i].string;
4711 break;
4712 }
4713 }
4714
4715 /* Process the option. */
4716 if (opt == N_OPTS)
4717 {
4718 error ("attribute(target(\"%s\")) is unknown", orig_p);
4719 ret = false;
4720 }
4721
4722 else if (type == ix86_opt_isa)
4723 {
4724 struct cl_decoded_option decoded;
4725
4726 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4727 ix86_handle_option (opts, opts_set,
4728 &decoded, input_location);
4729 }
4730
4731 else if (type == ix86_opt_yes || type == ix86_opt_no)
4732 {
4733 if (type == ix86_opt_no)
4734 opt_set_p = !opt_set_p;
4735
4736 if (opt_set_p)
4737 opts->x_target_flags |= mask;
4738 else
4739 opts->x_target_flags &= ~mask;
4740 }
4741
4742 else if (type == ix86_opt_str)
4743 {
4744 if (p_strings[opt])
4745 {
4746 error ("option(\"%s\") was already specified", opt_string);
4747 ret = false;
4748 }
4749 else
4750 p_strings[opt] = xstrdup (p + opt_len);
4751 }
4752
4753 else if (type == ix86_opt_enum)
4754 {
4755 bool arg_ok;
4756 int value;
4757
4758 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4759 if (arg_ok)
4760 set_option (opts, enum_opts_set, opt, value,
4761 p + opt_len, DK_UNSPECIFIED, input_location,
4762 global_dc);
4763 else
4764 {
4765 error ("attribute(target(\"%s\")) is unknown", orig_p);
4766 ret = false;
4767 }
4768 }
4769
4770 else
4771 gcc_unreachable ();
4772 }
4773
4774 return ret;
4775 }
4776
4777 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4778
4779 tree
4780 ix86_valid_target_attribute_tree (tree args,
4781 struct gcc_options *opts,
4782 struct gcc_options *opts_set)
4783 {
4784 const char *orig_arch_string = opts->x_ix86_arch_string;
4785 const char *orig_tune_string = opts->x_ix86_tune_string;
4786 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4787 int orig_tune_defaulted = ix86_tune_defaulted;
4788 int orig_arch_specified = ix86_arch_specified;
4789 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4790 tree t = NULL_TREE;
4791 int i;
4792 struct cl_target_option *def
4793 = TREE_TARGET_OPTION (target_option_default_node);
4794 struct gcc_options enum_opts_set;
4795
4796 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4797
4798 /* Process each of the options on the chain. */
4799 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4800 opts_set, &enum_opts_set))
4801 return error_mark_node;
4802
4803 /* If the changed options are different from the default, rerun
4804 ix86_option_override_internal, and then save the options away.
4805 The string options are are attribute options, and will be undone
4806 when we copy the save structure. */
4807 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4808 || opts->x_target_flags != def->x_target_flags
4809 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4810 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4811 || enum_opts_set.x_ix86_fpmath)
4812 {
4813 /* If we are using the default tune= or arch=, undo the string assigned,
4814 and use the default. */
4815 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4816 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4817 else if (!orig_arch_specified)
4818 opts->x_ix86_arch_string = NULL;
4819
4820 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4821 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4822 else if (orig_tune_defaulted)
4823 opts->x_ix86_tune_string = NULL;
4824
4825 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4826 if (enum_opts_set.x_ix86_fpmath)
4827 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4828 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4829 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4830 {
4831 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4832 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4833 }
4834
4835 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4836 ix86_option_override_internal (false, opts, opts_set);
4837
4838 /* Add any builtin functions with the new isa if any. */
4839 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4840
4841 /* Save the current options unless we are validating options for
4842 #pragma. */
4843 t = build_target_option_node (opts);
4844
4845 opts->x_ix86_arch_string = orig_arch_string;
4846 opts->x_ix86_tune_string = orig_tune_string;
4847 opts_set->x_ix86_fpmath = orig_fpmath_set;
4848
4849 /* Free up memory allocated to hold the strings */
4850 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4851 free (option_strings[i]);
4852 }
4853
4854 return t;
4855 }
4856
4857 /* Hook to validate attribute((target("string"))). */
4858
4859 static bool
4860 ix86_valid_target_attribute_p (tree fndecl,
4861 tree ARG_UNUSED (name),
4862 tree args,
4863 int ARG_UNUSED (flags))
4864 {
4865 struct gcc_options func_options;
4866 tree new_target, new_optimize;
4867 bool ret = true;
4868
4869 /* attribute((target("default"))) does nothing, beyond
4870 affecting multi-versioning. */
4871 if (TREE_VALUE (args)
4872 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4873 && TREE_CHAIN (args) == NULL_TREE
4874 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4875 return true;
4876
4877 tree old_optimize = build_optimization_node (&global_options);
4878
4879 /* Get the optimization options of the current function. */
4880 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4881
4882 if (!func_optimize)
4883 func_optimize = old_optimize;
4884
4885 /* Init func_options. */
4886 memset (&func_options, 0, sizeof (func_options));
4887 init_options_struct (&func_options, NULL);
4888 lang_hooks.init_options_struct (&func_options);
4889
4890 cl_optimization_restore (&func_options,
4891 TREE_OPTIMIZATION (func_optimize));
4892
4893 /* Initialize func_options to the default before its target options can
4894 be set. */
4895 cl_target_option_restore (&func_options,
4896 TREE_TARGET_OPTION (target_option_default_node));
4897
4898 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4899 &global_options_set);
4900
4901 new_optimize = build_optimization_node (&func_options);
4902
4903 if (new_target == error_mark_node)
4904 ret = false;
4905
4906 else if (fndecl && new_target)
4907 {
4908 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4909
4910 if (old_optimize != new_optimize)
4911 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4912 }
4913
4914 return ret;
4915 }
4916
4917 \f
4918 /* Hook to determine if one function can safely inline another. */
4919
4920 static bool
4921 ix86_can_inline_p (tree caller, tree callee)
4922 {
4923 bool ret = false;
4924 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4925 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4926
4927 /* If callee has no option attributes, then it is ok to inline. */
4928 if (!callee_tree)
4929 ret = true;
4930
4931 /* If caller has no option attributes, but callee does then it is not ok to
4932 inline. */
4933 else if (!caller_tree)
4934 ret = false;
4935
4936 else
4937 {
4938 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4939 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4940
4941 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4942 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4943 function. */
4944 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4945 != callee_opts->x_ix86_isa_flags)
4946 ret = false;
4947
4948 /* See if we have the same non-isa options. */
4949 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4950 ret = false;
4951
4952 /* See if arch, tune, etc. are the same. */
4953 else if (caller_opts->arch != callee_opts->arch)
4954 ret = false;
4955
4956 else if (caller_opts->tune != callee_opts->tune)
4957 ret = false;
4958
4959 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4960 ret = false;
4961
4962 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4963 ret = false;
4964
4965 else
4966 ret = true;
4967 }
4968
4969 return ret;
4970 }
4971
4972 \f
4973 /* Remember the last target of ix86_set_current_function. */
4974 static GTY(()) tree ix86_previous_fndecl;
4975
4976 /* Invalidate ix86_previous_fndecl cache. */
4977 void
4978 ix86_reset_previous_fndecl (void)
4979 {
4980 ix86_previous_fndecl = NULL_TREE;
4981 }
4982
4983 /* Establish appropriate back-end context for processing the function
4984 FNDECL. The argument might be NULL to indicate processing at top
4985 level, outside of any function scope. */
4986 static void
4987 ix86_set_current_function (tree fndecl)
4988 {
4989 /* Only change the context if the function changes. This hook is called
4990 several times in the course of compiling a function, and we don't want to
4991 slow things down too much or call target_reinit when it isn't safe. */
4992 if (fndecl && fndecl != ix86_previous_fndecl)
4993 {
4994 tree old_tree = (ix86_previous_fndecl
4995 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4996 : NULL_TREE);
4997
4998 tree new_tree = (fndecl
4999 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5000 : NULL_TREE);
5001
5002 ix86_previous_fndecl = fndecl;
5003 if (old_tree == new_tree)
5004 ;
5005
5006 else if (new_tree)
5007 {
5008 cl_target_option_restore (&global_options,
5009 TREE_TARGET_OPTION (new_tree));
5010 if (TREE_TARGET_GLOBALS (new_tree))
5011 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5012 else
5013 TREE_TARGET_GLOBALS (new_tree)
5014 = save_target_globals_default_opts ();
5015 }
5016
5017 else if (old_tree)
5018 {
5019 new_tree = target_option_current_node;
5020 cl_target_option_restore (&global_options,
5021 TREE_TARGET_OPTION (new_tree));
5022 if (TREE_TARGET_GLOBALS (new_tree))
5023 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5024 else if (new_tree == target_option_default_node)
5025 restore_target_globals (&default_target_globals);
5026 else
5027 TREE_TARGET_GLOBALS (new_tree)
5028 = save_target_globals_default_opts ();
5029 }
5030 }
5031 }
5032
5033 \f
5034 /* Return true if this goes in large data/bss. */
5035
5036 static bool
5037 ix86_in_large_data_p (tree exp)
5038 {
5039 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5040 return false;
5041
5042 /* Functions are never large data. */
5043 if (TREE_CODE (exp) == FUNCTION_DECL)
5044 return false;
5045
5046 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5047 {
5048 const char *section = DECL_SECTION_NAME (exp);
5049 if (strcmp (section, ".ldata") == 0
5050 || strcmp (section, ".lbss") == 0)
5051 return true;
5052 return false;
5053 }
5054 else
5055 {
5056 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5057
5058 /* If this is an incomplete type with size 0, then we can't put it
5059 in data because it might be too big when completed. Also,
5060 int_size_in_bytes returns -1 if size can vary or is larger than
5061 an integer in which case also it is safer to assume that it goes in
5062 large data. */
5063 if (size <= 0 || size > ix86_section_threshold)
5064 return true;
5065 }
5066
5067 return false;
5068 }
5069
5070 /* Switch to the appropriate section for output of DECL.
5071 DECL is either a `VAR_DECL' node or a constant of some sort.
5072 RELOC indicates whether forming the initial value of DECL requires
5073 link-time relocations. */
5074
5075 ATTRIBUTE_UNUSED static section *
5076 x86_64_elf_select_section (tree decl, int reloc,
5077 unsigned HOST_WIDE_INT align)
5078 {
5079 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5080 && ix86_in_large_data_p (decl))
5081 {
5082 const char *sname = NULL;
5083 unsigned int flags = SECTION_WRITE;
5084 switch (categorize_decl_for_section (decl, reloc))
5085 {
5086 case SECCAT_DATA:
5087 sname = ".ldata";
5088 break;
5089 case SECCAT_DATA_REL:
5090 sname = ".ldata.rel";
5091 break;
5092 case SECCAT_DATA_REL_LOCAL:
5093 sname = ".ldata.rel.local";
5094 break;
5095 case SECCAT_DATA_REL_RO:
5096 sname = ".ldata.rel.ro";
5097 break;
5098 case SECCAT_DATA_REL_RO_LOCAL:
5099 sname = ".ldata.rel.ro.local";
5100 break;
5101 case SECCAT_BSS:
5102 sname = ".lbss";
5103 flags |= SECTION_BSS;
5104 break;
5105 case SECCAT_RODATA:
5106 case SECCAT_RODATA_MERGE_STR:
5107 case SECCAT_RODATA_MERGE_STR_INIT:
5108 case SECCAT_RODATA_MERGE_CONST:
5109 sname = ".lrodata";
5110 flags = 0;
5111 break;
5112 case SECCAT_SRODATA:
5113 case SECCAT_SDATA:
5114 case SECCAT_SBSS:
5115 gcc_unreachable ();
5116 case SECCAT_TEXT:
5117 case SECCAT_TDATA:
5118 case SECCAT_TBSS:
5119 /* We don't split these for medium model. Place them into
5120 default sections and hope for best. */
5121 break;
5122 }
5123 if (sname)
5124 {
5125 /* We might get called with string constants, but get_named_section
5126 doesn't like them as they are not DECLs. Also, we need to set
5127 flags in that case. */
5128 if (!DECL_P (decl))
5129 return get_section (sname, flags, NULL);
5130 return get_named_section (decl, sname, reloc);
5131 }
5132 }
5133 return default_elf_select_section (decl, reloc, align);
5134 }
5135
5136 /* Select a set of attributes for section NAME based on the properties
5137 of DECL and whether or not RELOC indicates that DECL's initializer
5138 might contain runtime relocations. */
5139
5140 static unsigned int ATTRIBUTE_UNUSED
5141 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5142 {
5143 unsigned int flags = default_section_type_flags (decl, name, reloc);
5144
5145 if (decl == NULL_TREE
5146 && (strcmp (name, ".ldata.rel.ro") == 0
5147 || strcmp (name, ".ldata.rel.ro.local") == 0))
5148 flags |= SECTION_RELRO;
5149
5150 if (strcmp (name, ".lbss") == 0
5151 || strncmp (name, ".lbss.", 5) == 0
5152 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5153 flags |= SECTION_BSS;
5154
5155 return flags;
5156 }
5157
5158 /* Build up a unique section name, expressed as a
5159 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5160 RELOC indicates whether the initial value of EXP requires
5161 link-time relocations. */
5162
5163 static void ATTRIBUTE_UNUSED
5164 x86_64_elf_unique_section (tree decl, int reloc)
5165 {
5166 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5167 && ix86_in_large_data_p (decl))
5168 {
5169 const char *prefix = NULL;
5170 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5171 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5172
5173 switch (categorize_decl_for_section (decl, reloc))
5174 {
5175 case SECCAT_DATA:
5176 case SECCAT_DATA_REL:
5177 case SECCAT_DATA_REL_LOCAL:
5178 case SECCAT_DATA_REL_RO:
5179 case SECCAT_DATA_REL_RO_LOCAL:
5180 prefix = one_only ? ".ld" : ".ldata";
5181 break;
5182 case SECCAT_BSS:
5183 prefix = one_only ? ".lb" : ".lbss";
5184 break;
5185 case SECCAT_RODATA:
5186 case SECCAT_RODATA_MERGE_STR:
5187 case SECCAT_RODATA_MERGE_STR_INIT:
5188 case SECCAT_RODATA_MERGE_CONST:
5189 prefix = one_only ? ".lr" : ".lrodata";
5190 break;
5191 case SECCAT_SRODATA:
5192 case SECCAT_SDATA:
5193 case SECCAT_SBSS:
5194 gcc_unreachable ();
5195 case SECCAT_TEXT:
5196 case SECCAT_TDATA:
5197 case SECCAT_TBSS:
5198 /* We don't split these for medium model. Place them into
5199 default sections and hope for best. */
5200 break;
5201 }
5202 if (prefix)
5203 {
5204 const char *name, *linkonce;
5205 char *string;
5206
5207 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5208 name = targetm.strip_name_encoding (name);
5209
5210 /* If we're using one_only, then there needs to be a .gnu.linkonce
5211 prefix to the section name. */
5212 linkonce = one_only ? ".gnu.linkonce" : "";
5213
5214 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5215
5216 set_decl_section_name (decl, string);
5217 return;
5218 }
5219 }
5220 default_unique_section (decl, reloc);
5221 }
5222
5223 #ifdef COMMON_ASM_OP
5224 /* This says how to output assembler code to declare an
5225 uninitialized external linkage data object.
5226
5227 For medium model x86-64 we need to use .largecomm opcode for
5228 large objects. */
5229 void
5230 x86_elf_aligned_common (FILE *file,
5231 const char *name, unsigned HOST_WIDE_INT size,
5232 int align)
5233 {
5234 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5235 && size > (unsigned int)ix86_section_threshold)
5236 fputs (".largecomm\t", file);
5237 else
5238 fputs (COMMON_ASM_OP, file);
5239 assemble_name (file, name);
5240 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5241 size, align / BITS_PER_UNIT);
5242 }
5243 #endif
5244
5245 /* Utility function for targets to use in implementing
5246 ASM_OUTPUT_ALIGNED_BSS. */
5247
5248 void
5249 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5250 unsigned HOST_WIDE_INT size, int align)
5251 {
5252 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5253 && size > (unsigned int)ix86_section_threshold)
5254 switch_to_section (get_named_section (decl, ".lbss", 0));
5255 else
5256 switch_to_section (bss_section);
5257 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5258 #ifdef ASM_DECLARE_OBJECT_NAME
5259 last_assemble_variable_decl = decl;
5260 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5261 #else
5262 /* Standard thing is just output label for the object. */
5263 ASM_OUTPUT_LABEL (file, name);
5264 #endif /* ASM_DECLARE_OBJECT_NAME */
5265 ASM_OUTPUT_SKIP (file, size ? size : 1);
5266 }
5267 \f
5268 /* Decide whether we must probe the stack before any space allocation
5269 on this target. It's essentially TARGET_STACK_PROBE except when
5270 -fstack-check causes the stack to be already probed differently. */
5271
5272 bool
5273 ix86_target_stack_probe (void)
5274 {
5275 /* Do not probe the stack twice if static stack checking is enabled. */
5276 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5277 return false;
5278
5279 return TARGET_STACK_PROBE;
5280 }
5281 \f
5282 /* Decide whether we can make a sibling call to a function. DECL is the
5283 declaration of the function being targeted by the call and EXP is the
5284 CALL_EXPR representing the call. */
5285
5286 static bool
5287 ix86_function_ok_for_sibcall (tree decl, tree exp)
5288 {
5289 tree type, decl_or_type;
5290 rtx a, b;
5291
5292 /* If we are generating position-independent code, we cannot sibcall
5293 optimize any indirect call, or a direct call to a global function,
5294 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5295 if (!TARGET_MACHO
5296 && !TARGET_64BIT
5297 && flag_pic
5298 && (!decl || !targetm.binds_local_p (decl)))
5299 return false;
5300
5301 /* If we need to align the outgoing stack, then sibcalling would
5302 unalign the stack, which may break the called function. */
5303 if (ix86_minimum_incoming_stack_boundary (true)
5304 < PREFERRED_STACK_BOUNDARY)
5305 return false;
5306
5307 if (decl)
5308 {
5309 decl_or_type = decl;
5310 type = TREE_TYPE (decl);
5311 }
5312 else
5313 {
5314 /* We're looking at the CALL_EXPR, we need the type of the function. */
5315 type = CALL_EXPR_FN (exp); /* pointer expression */
5316 type = TREE_TYPE (type); /* pointer type */
5317 type = TREE_TYPE (type); /* function type */
5318 decl_or_type = type;
5319 }
5320
5321 /* Check that the return value locations are the same. Like
5322 if we are returning floats on the 80387 register stack, we cannot
5323 make a sibcall from a function that doesn't return a float to a
5324 function that does or, conversely, from a function that does return
5325 a float to a function that doesn't; the necessary stack adjustment
5326 would not be executed. This is also the place we notice
5327 differences in the return value ABI. Note that it is ok for one
5328 of the functions to have void return type as long as the return
5329 value of the other is passed in a register. */
5330 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5331 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5332 cfun->decl, false);
5333 if (STACK_REG_P (a) || STACK_REG_P (b))
5334 {
5335 if (!rtx_equal_p (a, b))
5336 return false;
5337 }
5338 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5339 ;
5340 else if (!rtx_equal_p (a, b))
5341 return false;
5342
5343 if (TARGET_64BIT)
5344 {
5345 /* The SYSV ABI has more call-clobbered registers;
5346 disallow sibcalls from MS to SYSV. */
5347 if (cfun->machine->call_abi == MS_ABI
5348 && ix86_function_type_abi (type) == SYSV_ABI)
5349 return false;
5350 }
5351 else
5352 {
5353 /* If this call is indirect, we'll need to be able to use a
5354 call-clobbered register for the address of the target function.
5355 Make sure that all such registers are not used for passing
5356 parameters. Note that DLLIMPORT functions are indirect. */
5357 if (!decl
5358 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5359 {
5360 if (ix86_function_regparm (type, NULL) >= 3)
5361 {
5362 /* ??? Need to count the actual number of registers to be used,
5363 not the possible number of registers. Fix later. */
5364 return false;
5365 }
5366 }
5367 }
5368
5369 /* Otherwise okay. That also includes certain types of indirect calls. */
5370 return true;
5371 }
5372
5373 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5374 and "sseregparm" calling convention attributes;
5375 arguments as in struct attribute_spec.handler. */
5376
5377 static tree
5378 ix86_handle_cconv_attribute (tree *node, tree name,
5379 tree args,
5380 int,
5381 bool *no_add_attrs)
5382 {
5383 if (TREE_CODE (*node) != FUNCTION_TYPE
5384 && TREE_CODE (*node) != METHOD_TYPE
5385 && TREE_CODE (*node) != FIELD_DECL
5386 && TREE_CODE (*node) != TYPE_DECL)
5387 {
5388 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5389 name);
5390 *no_add_attrs = true;
5391 return NULL_TREE;
5392 }
5393
5394 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5395 if (is_attribute_p ("regparm", name))
5396 {
5397 tree cst;
5398
5399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5400 {
5401 error ("fastcall and regparm attributes are not compatible");
5402 }
5403
5404 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5405 {
5406 error ("regparam and thiscall attributes are not compatible");
5407 }
5408
5409 cst = TREE_VALUE (args);
5410 if (TREE_CODE (cst) != INTEGER_CST)
5411 {
5412 warning (OPT_Wattributes,
5413 "%qE attribute requires an integer constant argument",
5414 name);
5415 *no_add_attrs = true;
5416 }
5417 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5418 {
5419 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5420 name, REGPARM_MAX);
5421 *no_add_attrs = true;
5422 }
5423
5424 return NULL_TREE;
5425 }
5426
5427 if (TARGET_64BIT)
5428 {
5429 /* Do not warn when emulating the MS ABI. */
5430 if ((TREE_CODE (*node) != FUNCTION_TYPE
5431 && TREE_CODE (*node) != METHOD_TYPE)
5432 || ix86_function_type_abi (*node) != MS_ABI)
5433 warning (OPT_Wattributes, "%qE attribute ignored",
5434 name);
5435 *no_add_attrs = true;
5436 return NULL_TREE;
5437 }
5438
5439 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5440 if (is_attribute_p ("fastcall", name))
5441 {
5442 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5443 {
5444 error ("fastcall and cdecl attributes are not compatible");
5445 }
5446 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5447 {
5448 error ("fastcall and stdcall attributes are not compatible");
5449 }
5450 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5451 {
5452 error ("fastcall and regparm attributes are not compatible");
5453 }
5454 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5455 {
5456 error ("fastcall and thiscall attributes are not compatible");
5457 }
5458 }
5459
5460 /* Can combine stdcall with fastcall (redundant), regparm and
5461 sseregparm. */
5462 else if (is_attribute_p ("stdcall", name))
5463 {
5464 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5465 {
5466 error ("stdcall and cdecl attributes are not compatible");
5467 }
5468 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5469 {
5470 error ("stdcall and fastcall attributes are not compatible");
5471 }
5472 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5473 {
5474 error ("stdcall and thiscall attributes are not compatible");
5475 }
5476 }
5477
5478 /* Can combine cdecl with regparm and sseregparm. */
5479 else if (is_attribute_p ("cdecl", name))
5480 {
5481 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5482 {
5483 error ("stdcall and cdecl attributes are not compatible");
5484 }
5485 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5486 {
5487 error ("fastcall and cdecl attributes are not compatible");
5488 }
5489 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5490 {
5491 error ("cdecl and thiscall attributes are not compatible");
5492 }
5493 }
5494 else if (is_attribute_p ("thiscall", name))
5495 {
5496 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5497 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5498 name);
5499 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5500 {
5501 error ("stdcall and thiscall attributes are not compatible");
5502 }
5503 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5504 {
5505 error ("fastcall and thiscall attributes are not compatible");
5506 }
5507 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5508 {
5509 error ("cdecl and thiscall attributes are not compatible");
5510 }
5511 }
5512
5513 /* Can combine sseregparm with all attributes. */
5514
5515 return NULL_TREE;
5516 }
5517
5518 /* The transactional memory builtins are implicitly regparm or fastcall
5519 depending on the ABI. Override the generic do-nothing attribute that
5520 these builtins were declared with, and replace it with one of the two
5521 attributes that we expect elsewhere. */
5522
5523 static tree
5524 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5525 int flags, bool *no_add_attrs)
5526 {
5527 tree alt;
5528
5529 /* In no case do we want to add the placeholder attribute. */
5530 *no_add_attrs = true;
5531
5532 /* The 64-bit ABI is unchanged for transactional memory. */
5533 if (TARGET_64BIT)
5534 return NULL_TREE;
5535
5536 /* ??? Is there a better way to validate 32-bit windows? We have
5537 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5538 if (CHECK_STACK_LIMIT > 0)
5539 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5540 else
5541 {
5542 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5543 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5544 }
5545 decl_attributes (node, alt, flags);
5546
5547 return NULL_TREE;
5548 }
5549
5550 /* This function determines from TYPE the calling-convention. */
5551
5552 unsigned int
5553 ix86_get_callcvt (const_tree type)
5554 {
5555 unsigned int ret = 0;
5556 bool is_stdarg;
5557 tree attrs;
5558
5559 if (TARGET_64BIT)
5560 return IX86_CALLCVT_CDECL;
5561
5562 attrs = TYPE_ATTRIBUTES (type);
5563 if (attrs != NULL_TREE)
5564 {
5565 if (lookup_attribute ("cdecl", attrs))
5566 ret |= IX86_CALLCVT_CDECL;
5567 else if (lookup_attribute ("stdcall", attrs))
5568 ret |= IX86_CALLCVT_STDCALL;
5569 else if (lookup_attribute ("fastcall", attrs))
5570 ret |= IX86_CALLCVT_FASTCALL;
5571 else if (lookup_attribute ("thiscall", attrs))
5572 ret |= IX86_CALLCVT_THISCALL;
5573
5574 /* Regparam isn't allowed for thiscall and fastcall. */
5575 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5576 {
5577 if (lookup_attribute ("regparm", attrs))
5578 ret |= IX86_CALLCVT_REGPARM;
5579 if (lookup_attribute ("sseregparm", attrs))
5580 ret |= IX86_CALLCVT_SSEREGPARM;
5581 }
5582
5583 if (IX86_BASE_CALLCVT(ret) != 0)
5584 return ret;
5585 }
5586
5587 is_stdarg = stdarg_p (type);
5588 if (TARGET_RTD && !is_stdarg)
5589 return IX86_CALLCVT_STDCALL | ret;
5590
5591 if (ret != 0
5592 || is_stdarg
5593 || TREE_CODE (type) != METHOD_TYPE
5594 || ix86_function_type_abi (type) != MS_ABI)
5595 return IX86_CALLCVT_CDECL | ret;
5596
5597 return IX86_CALLCVT_THISCALL;
5598 }
5599
5600 /* Return 0 if the attributes for two types are incompatible, 1 if they
5601 are compatible, and 2 if they are nearly compatible (which causes a
5602 warning to be generated). */
5603
5604 static int
5605 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5606 {
5607 unsigned int ccvt1, ccvt2;
5608
5609 if (TREE_CODE (type1) != FUNCTION_TYPE
5610 && TREE_CODE (type1) != METHOD_TYPE)
5611 return 1;
5612
5613 ccvt1 = ix86_get_callcvt (type1);
5614 ccvt2 = ix86_get_callcvt (type2);
5615 if (ccvt1 != ccvt2)
5616 return 0;
5617 if (ix86_function_regparm (type1, NULL)
5618 != ix86_function_regparm (type2, NULL))
5619 return 0;
5620
5621 return 1;
5622 }
5623 \f
5624 /* Return the regparm value for a function with the indicated TYPE and DECL.
5625 DECL may be NULL when calling function indirectly
5626 or considering a libcall. */
5627
5628 static int
5629 ix86_function_regparm (const_tree type, const_tree decl)
5630 {
5631 tree attr;
5632 int regparm;
5633 unsigned int ccvt;
5634
5635 if (TARGET_64BIT)
5636 return (ix86_function_type_abi (type) == SYSV_ABI
5637 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5638 ccvt = ix86_get_callcvt (type);
5639 regparm = ix86_regparm;
5640
5641 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5642 {
5643 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5644 if (attr)
5645 {
5646 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5647 return regparm;
5648 }
5649 }
5650 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5651 return 2;
5652 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5653 return 1;
5654
5655 /* Use register calling convention for local functions when possible. */
5656 if (decl
5657 && TREE_CODE (decl) == FUNCTION_DECL
5658 /* Caller and callee must agree on the calling convention, so
5659 checking here just optimize means that with
5660 __attribute__((optimize (...))) caller could use regparm convention
5661 and callee not, or vice versa. Instead look at whether the callee
5662 is optimized or not. */
5663 && opt_for_fn (decl, optimize)
5664 && !(profile_flag && !flag_fentry))
5665 {
5666 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5667 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5668 if (i && i->local && i->can_change_signature)
5669 {
5670 int local_regparm, globals = 0, regno;
5671
5672 /* Make sure no regparm register is taken by a
5673 fixed register variable. */
5674 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5675 if (fixed_regs[local_regparm])
5676 break;
5677
5678 /* We don't want to use regparm(3) for nested functions as
5679 these use a static chain pointer in the third argument. */
5680 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5681 local_regparm = 2;
5682
5683 /* In 32-bit mode save a register for the split stack. */
5684 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5685 local_regparm = 2;
5686
5687 /* Each fixed register usage increases register pressure,
5688 so less registers should be used for argument passing.
5689 This functionality can be overriden by an explicit
5690 regparm value. */
5691 for (regno = AX_REG; regno <= DI_REG; regno++)
5692 if (fixed_regs[regno])
5693 globals++;
5694
5695 local_regparm
5696 = globals < local_regparm ? local_regparm - globals : 0;
5697
5698 if (local_regparm > regparm)
5699 regparm = local_regparm;
5700 }
5701 }
5702
5703 return regparm;
5704 }
5705
5706 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5707 DFmode (2) arguments in SSE registers for a function with the
5708 indicated TYPE and DECL. DECL may be NULL when calling function
5709 indirectly or considering a libcall. Otherwise return 0. */
5710
5711 static int
5712 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5713 {
5714 gcc_assert (!TARGET_64BIT);
5715
5716 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5717 by the sseregparm attribute. */
5718 if (TARGET_SSEREGPARM
5719 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5720 {
5721 if (!TARGET_SSE)
5722 {
5723 if (warn)
5724 {
5725 if (decl)
5726 error ("calling %qD with attribute sseregparm without "
5727 "SSE/SSE2 enabled", decl);
5728 else
5729 error ("calling %qT with attribute sseregparm without "
5730 "SSE/SSE2 enabled", type);
5731 }
5732 return 0;
5733 }
5734
5735 return 2;
5736 }
5737
5738 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5739 (and DFmode for SSE2) arguments in SSE registers. */
5740 if (decl && TARGET_SSE_MATH && optimize
5741 && !(profile_flag && !flag_fentry))
5742 {
5743 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5744 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5745 if (i && i->local && i->can_change_signature)
5746 return TARGET_SSE2 ? 2 : 1;
5747 }
5748
5749 return 0;
5750 }
5751
5752 /* Return true if EAX is live at the start of the function. Used by
5753 ix86_expand_prologue to determine if we need special help before
5754 calling allocate_stack_worker. */
5755
5756 static bool
5757 ix86_eax_live_at_start_p (void)
5758 {
5759 /* Cheat. Don't bother working forward from ix86_function_regparm
5760 to the function type to whether an actual argument is located in
5761 eax. Instead just look at cfg info, which is still close enough
5762 to correct at this point. This gives false positives for broken
5763 functions that might use uninitialized data that happens to be
5764 allocated in eax, but who cares? */
5765 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5766 }
5767
5768 static bool
5769 ix86_keep_aggregate_return_pointer (tree fntype)
5770 {
5771 tree attr;
5772
5773 if (!TARGET_64BIT)
5774 {
5775 attr = lookup_attribute ("callee_pop_aggregate_return",
5776 TYPE_ATTRIBUTES (fntype));
5777 if (attr)
5778 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5779
5780 /* For 32-bit MS-ABI the default is to keep aggregate
5781 return pointer. */
5782 if (ix86_function_type_abi (fntype) == MS_ABI)
5783 return true;
5784 }
5785 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5786 }
5787
5788 /* Value is the number of bytes of arguments automatically
5789 popped when returning from a subroutine call.
5790 FUNDECL is the declaration node of the function (as a tree),
5791 FUNTYPE is the data type of the function (as a tree),
5792 or for a library call it is an identifier node for the subroutine name.
5793 SIZE is the number of bytes of arguments passed on the stack.
5794
5795 On the 80386, the RTD insn may be used to pop them if the number
5796 of args is fixed, but if the number is variable then the caller
5797 must pop them all. RTD can't be used for library calls now
5798 because the library is compiled with the Unix compiler.
5799 Use of RTD is a selectable option, since it is incompatible with
5800 standard Unix calling sequences. If the option is not selected,
5801 the caller must always pop the args.
5802
5803 The attribute stdcall is equivalent to RTD on a per module basis. */
5804
5805 static int
5806 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5807 {
5808 unsigned int ccvt;
5809
5810 /* None of the 64-bit ABIs pop arguments. */
5811 if (TARGET_64BIT)
5812 return 0;
5813
5814 ccvt = ix86_get_callcvt (funtype);
5815
5816 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5817 | IX86_CALLCVT_THISCALL)) != 0
5818 && ! stdarg_p (funtype))
5819 return size;
5820
5821 /* Lose any fake structure return argument if it is passed on the stack. */
5822 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5823 && !ix86_keep_aggregate_return_pointer (funtype))
5824 {
5825 int nregs = ix86_function_regparm (funtype, fundecl);
5826 if (nregs == 0)
5827 return GET_MODE_SIZE (Pmode);
5828 }
5829
5830 return 0;
5831 }
5832
5833 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5834
5835 static bool
5836 ix86_legitimate_combined_insn (rtx insn)
5837 {
5838 /* Check operand constraints in case hard registers were propagated
5839 into insn pattern. This check prevents combine pass from
5840 generating insn patterns with invalid hard register operands.
5841 These invalid insns can eventually confuse reload to error out
5842 with a spill failure. See also PRs 46829 and 46843. */
5843 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5844 {
5845 int i;
5846
5847 extract_insn (insn);
5848 preprocess_constraints (insn);
5849
5850 int n_operands = recog_data.n_operands;
5851 int n_alternatives = recog_data.n_alternatives;
5852 for (i = 0; i < n_operands; i++)
5853 {
5854 rtx op = recog_data.operand[i];
5855 enum machine_mode mode = GET_MODE (op);
5856 const operand_alternative *op_alt;
5857 int offset = 0;
5858 bool win;
5859 int j;
5860
5861 /* For pre-AVX disallow unaligned loads/stores where the
5862 instructions don't support it. */
5863 if (!TARGET_AVX
5864 && VECTOR_MODE_P (GET_MODE (op))
5865 && misaligned_operand (op, GET_MODE (op)))
5866 {
5867 int min_align = get_attr_ssememalign (insn);
5868 if (min_align == 0)
5869 return false;
5870 }
5871
5872 /* A unary operator may be accepted by the predicate, but it
5873 is irrelevant for matching constraints. */
5874 if (UNARY_P (op))
5875 op = XEXP (op, 0);
5876
5877 if (GET_CODE (op) == SUBREG)
5878 {
5879 if (REG_P (SUBREG_REG (op))
5880 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5881 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5882 GET_MODE (SUBREG_REG (op)),
5883 SUBREG_BYTE (op),
5884 GET_MODE (op));
5885 op = SUBREG_REG (op);
5886 }
5887
5888 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5889 continue;
5890
5891 op_alt = recog_op_alt;
5892
5893 /* Operand has no constraints, anything is OK. */
5894 win = !n_alternatives;
5895
5896 alternative_mask enabled = recog_data.enabled_alternatives;
5897 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5898 {
5899 if (!TEST_BIT (enabled, j))
5900 continue;
5901 if (op_alt[i].anything_ok
5902 || (op_alt[i].matches != -1
5903 && operands_match_p
5904 (recog_data.operand[i],
5905 recog_data.operand[op_alt[i].matches]))
5906 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5907 {
5908 win = true;
5909 break;
5910 }
5911 }
5912
5913 if (!win)
5914 return false;
5915 }
5916 }
5917
5918 return true;
5919 }
5920 \f
5921 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5922
5923 static unsigned HOST_WIDE_INT
5924 ix86_asan_shadow_offset (void)
5925 {
5926 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5927 : HOST_WIDE_INT_C (0x7fff8000))
5928 : (HOST_WIDE_INT_1 << 29);
5929 }
5930 \f
5931 /* Argument support functions. */
5932
5933 /* Return true when register may be used to pass function parameters. */
5934 bool
5935 ix86_function_arg_regno_p (int regno)
5936 {
5937 int i;
5938 const int *parm_regs;
5939
5940 if (!TARGET_64BIT)
5941 {
5942 if (TARGET_MACHO)
5943 return (regno < REGPARM_MAX
5944 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5945 else
5946 return (regno < REGPARM_MAX
5947 || (TARGET_MMX && MMX_REGNO_P (regno)
5948 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5949 || (TARGET_SSE && SSE_REGNO_P (regno)
5950 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5951 }
5952
5953 if (TARGET_SSE && SSE_REGNO_P (regno)
5954 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5955 return true;
5956
5957 /* TODO: The function should depend on current function ABI but
5958 builtins.c would need updating then. Therefore we use the
5959 default ABI. */
5960
5961 /* RAX is used as hidden argument to va_arg functions. */
5962 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5963 return true;
5964
5965 if (ix86_abi == MS_ABI)
5966 parm_regs = x86_64_ms_abi_int_parameter_registers;
5967 else
5968 parm_regs = x86_64_int_parameter_registers;
5969 for (i = 0; i < (ix86_abi == MS_ABI
5970 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5971 if (regno == parm_regs[i])
5972 return true;
5973 return false;
5974 }
5975
5976 /* Return if we do not know how to pass TYPE solely in registers. */
5977
5978 static bool
5979 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5980 {
5981 if (must_pass_in_stack_var_size_or_pad (mode, type))
5982 return true;
5983
5984 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5985 The layout_type routine is crafty and tries to trick us into passing
5986 currently unsupported vector types on the stack by using TImode. */
5987 return (!TARGET_64BIT && mode == TImode
5988 && type && TREE_CODE (type) != VECTOR_TYPE);
5989 }
5990
5991 /* It returns the size, in bytes, of the area reserved for arguments passed
5992 in registers for the function represented by fndecl dependent to the used
5993 abi format. */
5994 int
5995 ix86_reg_parm_stack_space (const_tree fndecl)
5996 {
5997 enum calling_abi call_abi = SYSV_ABI;
5998 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5999 call_abi = ix86_function_abi (fndecl);
6000 else
6001 call_abi = ix86_function_type_abi (fndecl);
6002 if (TARGET_64BIT && call_abi == MS_ABI)
6003 return 32;
6004 return 0;
6005 }
6006
6007 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6008 call abi used. */
6009 enum calling_abi
6010 ix86_function_type_abi (const_tree fntype)
6011 {
6012 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6013 {
6014 enum calling_abi abi = ix86_abi;
6015 if (abi == SYSV_ABI)
6016 {
6017 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6018 abi = MS_ABI;
6019 }
6020 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6021 abi = SYSV_ABI;
6022 return abi;
6023 }
6024 return ix86_abi;
6025 }
6026
6027 /* We add this as a workaround in order to use libc_has_function
6028 hook in i386.md. */
6029 bool
6030 ix86_libc_has_function (enum function_class fn_class)
6031 {
6032 return targetm.libc_has_function (fn_class);
6033 }
6034
6035 static bool
6036 ix86_function_ms_hook_prologue (const_tree fn)
6037 {
6038 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6039 {
6040 if (decl_function_context (fn) != NULL_TREE)
6041 error_at (DECL_SOURCE_LOCATION (fn),
6042 "ms_hook_prologue is not compatible with nested function");
6043 else
6044 return true;
6045 }
6046 return false;
6047 }
6048
6049 static enum calling_abi
6050 ix86_function_abi (const_tree fndecl)
6051 {
6052 if (! fndecl)
6053 return ix86_abi;
6054 return ix86_function_type_abi (TREE_TYPE (fndecl));
6055 }
6056
6057 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6058 call abi used. */
6059 enum calling_abi
6060 ix86_cfun_abi (void)
6061 {
6062 if (! cfun)
6063 return ix86_abi;
6064 return cfun->machine->call_abi;
6065 }
6066
6067 /* Write the extra assembler code needed to declare a function properly. */
6068
6069 void
6070 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6071 tree decl)
6072 {
6073 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6074
6075 if (is_ms_hook)
6076 {
6077 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6078 unsigned int filler_cc = 0xcccccccc;
6079
6080 for (i = 0; i < filler_count; i += 4)
6081 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6082 }
6083
6084 #ifdef SUBTARGET_ASM_UNWIND_INIT
6085 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6086 #endif
6087
6088 ASM_OUTPUT_LABEL (asm_out_file, fname);
6089
6090 /* Output magic byte marker, if hot-patch attribute is set. */
6091 if (is_ms_hook)
6092 {
6093 if (TARGET_64BIT)
6094 {
6095 /* leaq [%rsp + 0], %rsp */
6096 asm_fprintf (asm_out_file, ASM_BYTE
6097 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6098 }
6099 else
6100 {
6101 /* movl.s %edi, %edi
6102 push %ebp
6103 movl.s %esp, %ebp */
6104 asm_fprintf (asm_out_file, ASM_BYTE
6105 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6106 }
6107 }
6108 }
6109
6110 /* regclass.c */
6111 extern void init_regs (void);
6112
6113 /* Implementation of call abi switching target hook. Specific to FNDECL
6114 the specific call register sets are set. See also
6115 ix86_conditional_register_usage for more details. */
6116 void
6117 ix86_call_abi_override (const_tree fndecl)
6118 {
6119 if (fndecl == NULL_TREE)
6120 cfun->machine->call_abi = ix86_abi;
6121 else
6122 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6123 }
6124
6125 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6126 expensive re-initialization of init_regs each time we switch function context
6127 since this is needed only during RTL expansion. */
6128 static void
6129 ix86_maybe_switch_abi (void)
6130 {
6131 if (TARGET_64BIT &&
6132 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6133 reinit_regs ();
6134 }
6135
6136 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6137 for a call to a function whose data type is FNTYPE.
6138 For a library call, FNTYPE is 0. */
6139
6140 void
6141 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6142 tree fntype, /* tree ptr for function decl */
6143 rtx libname, /* SYMBOL_REF of library name or 0 */
6144 tree fndecl,
6145 int caller)
6146 {
6147 struct cgraph_local_info *i;
6148
6149 memset (cum, 0, sizeof (*cum));
6150
6151 if (fndecl)
6152 {
6153 i = cgraph_node::local_info (fndecl);
6154 cum->call_abi = ix86_function_abi (fndecl);
6155 }
6156 else
6157 {
6158 i = NULL;
6159 cum->call_abi = ix86_function_type_abi (fntype);
6160 }
6161
6162 cum->caller = caller;
6163
6164 /* Set up the number of registers to use for passing arguments. */
6165 cum->nregs = ix86_regparm;
6166 if (TARGET_64BIT)
6167 {
6168 cum->nregs = (cum->call_abi == SYSV_ABI
6169 ? X86_64_REGPARM_MAX
6170 : X86_64_MS_REGPARM_MAX);
6171 }
6172 if (TARGET_SSE)
6173 {
6174 cum->sse_nregs = SSE_REGPARM_MAX;
6175 if (TARGET_64BIT)
6176 {
6177 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6178 ? X86_64_SSE_REGPARM_MAX
6179 : X86_64_MS_SSE_REGPARM_MAX);
6180 }
6181 }
6182 if (TARGET_MMX)
6183 cum->mmx_nregs = MMX_REGPARM_MAX;
6184 cum->warn_avx512f = true;
6185 cum->warn_avx = true;
6186 cum->warn_sse = true;
6187 cum->warn_mmx = true;
6188
6189 /* Because type might mismatch in between caller and callee, we need to
6190 use actual type of function for local calls.
6191 FIXME: cgraph_analyze can be told to actually record if function uses
6192 va_start so for local functions maybe_vaarg can be made aggressive
6193 helping K&R code.
6194 FIXME: once typesytem is fixed, we won't need this code anymore. */
6195 if (i && i->local && i->can_change_signature)
6196 fntype = TREE_TYPE (fndecl);
6197 cum->maybe_vaarg = (fntype
6198 ? (!prototype_p (fntype) || stdarg_p (fntype))
6199 : !libname);
6200
6201 if (!TARGET_64BIT)
6202 {
6203 /* If there are variable arguments, then we won't pass anything
6204 in registers in 32-bit mode. */
6205 if (stdarg_p (fntype))
6206 {
6207 cum->nregs = 0;
6208 cum->sse_nregs = 0;
6209 cum->mmx_nregs = 0;
6210 cum->warn_avx512f = false;
6211 cum->warn_avx = false;
6212 cum->warn_sse = false;
6213 cum->warn_mmx = false;
6214 return;
6215 }
6216
6217 /* Use ecx and edx registers if function has fastcall attribute,
6218 else look for regparm information. */
6219 if (fntype)
6220 {
6221 unsigned int ccvt = ix86_get_callcvt (fntype);
6222 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6223 {
6224 cum->nregs = 1;
6225 cum->fastcall = 1; /* Same first register as in fastcall. */
6226 }
6227 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6228 {
6229 cum->nregs = 2;
6230 cum->fastcall = 1;
6231 }
6232 else
6233 cum->nregs = ix86_function_regparm (fntype, fndecl);
6234 }
6235
6236 /* Set up the number of SSE registers used for passing SFmode
6237 and DFmode arguments. Warn for mismatching ABI. */
6238 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6239 }
6240 }
6241
6242 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6243 But in the case of vector types, it is some vector mode.
6244
6245 When we have only some of our vector isa extensions enabled, then there
6246 are some modes for which vector_mode_supported_p is false. For these
6247 modes, the generic vector support in gcc will choose some non-vector mode
6248 in order to implement the type. By computing the natural mode, we'll
6249 select the proper ABI location for the operand and not depend on whatever
6250 the middle-end decides to do with these vector types.
6251
6252 The midde-end can't deal with the vector types > 16 bytes. In this
6253 case, we return the original mode and warn ABI change if CUM isn't
6254 NULL.
6255
6256 If INT_RETURN is true, warn ABI change if the vector mode isn't
6257 available for function return value. */
6258
6259 static enum machine_mode
6260 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6261 bool in_return)
6262 {
6263 enum machine_mode mode = TYPE_MODE (type);
6264
6265 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6266 {
6267 HOST_WIDE_INT size = int_size_in_bytes (type);
6268 if ((size == 8 || size == 16 || size == 32 || size == 64)
6269 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6270 && TYPE_VECTOR_SUBPARTS (type) > 1)
6271 {
6272 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6273
6274 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6275 mode = MIN_MODE_VECTOR_FLOAT;
6276 else
6277 mode = MIN_MODE_VECTOR_INT;
6278
6279 /* Get the mode which has this inner mode and number of units. */
6280 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6281 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6282 && GET_MODE_INNER (mode) == innermode)
6283 {
6284 if (size == 64 && !TARGET_AVX512F)
6285 {
6286 static bool warnedavx512f;
6287 static bool warnedavx512f_ret;
6288
6289 if (cum && cum->warn_avx512f && !warnedavx512f)
6290 {
6291 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6292 "without AVX512F enabled changes the ABI"))
6293 warnedavx512f = true;
6294 }
6295 else if (in_return && !warnedavx512f_ret)
6296 {
6297 if (warning (OPT_Wpsabi, "AVX512F vector return "
6298 "without AVX512F enabled changes the ABI"))
6299 warnedavx512f_ret = true;
6300 }
6301
6302 return TYPE_MODE (type);
6303 }
6304 else if (size == 32 && !TARGET_AVX)
6305 {
6306 static bool warnedavx;
6307 static bool warnedavx_ret;
6308
6309 if (cum && cum->warn_avx && !warnedavx)
6310 {
6311 if (warning (OPT_Wpsabi, "AVX vector argument "
6312 "without AVX enabled changes the ABI"))
6313 warnedavx = true;
6314 }
6315 else if (in_return && !warnedavx_ret)
6316 {
6317 if (warning (OPT_Wpsabi, "AVX vector return "
6318 "without AVX enabled changes the ABI"))
6319 warnedavx_ret = true;
6320 }
6321
6322 return TYPE_MODE (type);
6323 }
6324 else if (((size == 8 && TARGET_64BIT) || size == 16)
6325 && !TARGET_SSE)
6326 {
6327 static bool warnedsse;
6328 static bool warnedsse_ret;
6329
6330 if (cum && cum->warn_sse && !warnedsse)
6331 {
6332 if (warning (OPT_Wpsabi, "SSE vector argument "
6333 "without SSE enabled changes the ABI"))
6334 warnedsse = true;
6335 }
6336 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6337 {
6338 if (warning (OPT_Wpsabi, "SSE vector return "
6339 "without SSE enabled changes the ABI"))
6340 warnedsse_ret = true;
6341 }
6342 }
6343 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6344 {
6345 static bool warnedmmx;
6346 static bool warnedmmx_ret;
6347
6348 if (cum && cum->warn_mmx && !warnedmmx)
6349 {
6350 if (warning (OPT_Wpsabi, "MMX vector argument "
6351 "without MMX enabled changes the ABI"))
6352 warnedmmx = true;
6353 }
6354 else if (in_return && !warnedmmx_ret)
6355 {
6356 if (warning (OPT_Wpsabi, "MMX vector return "
6357 "without MMX enabled changes the ABI"))
6358 warnedmmx_ret = true;
6359 }
6360 }
6361 return mode;
6362 }
6363
6364 gcc_unreachable ();
6365 }
6366 }
6367
6368 return mode;
6369 }
6370
6371 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6372 this may not agree with the mode that the type system has chosen for the
6373 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6374 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6375
6376 static rtx
6377 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6378 unsigned int regno)
6379 {
6380 rtx tmp;
6381
6382 if (orig_mode != BLKmode)
6383 tmp = gen_rtx_REG (orig_mode, regno);
6384 else
6385 {
6386 tmp = gen_rtx_REG (mode, regno);
6387 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6388 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6389 }
6390
6391 return tmp;
6392 }
6393
6394 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6395 of this code is to classify each 8bytes of incoming argument by the register
6396 class and assign registers accordingly. */
6397
6398 /* Return the union class of CLASS1 and CLASS2.
6399 See the x86-64 PS ABI for details. */
6400
6401 static enum x86_64_reg_class
6402 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6403 {
6404 /* Rule #1: If both classes are equal, this is the resulting class. */
6405 if (class1 == class2)
6406 return class1;
6407
6408 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6409 the other class. */
6410 if (class1 == X86_64_NO_CLASS)
6411 return class2;
6412 if (class2 == X86_64_NO_CLASS)
6413 return class1;
6414
6415 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6416 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6417 return X86_64_MEMORY_CLASS;
6418
6419 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6420 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6421 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6422 return X86_64_INTEGERSI_CLASS;
6423 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6424 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6425 return X86_64_INTEGER_CLASS;
6426
6427 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6428 MEMORY is used. */
6429 if (class1 == X86_64_X87_CLASS
6430 || class1 == X86_64_X87UP_CLASS
6431 || class1 == X86_64_COMPLEX_X87_CLASS
6432 || class2 == X86_64_X87_CLASS
6433 || class2 == X86_64_X87UP_CLASS
6434 || class2 == X86_64_COMPLEX_X87_CLASS)
6435 return X86_64_MEMORY_CLASS;
6436
6437 /* Rule #6: Otherwise class SSE is used. */
6438 return X86_64_SSE_CLASS;
6439 }
6440
6441 /* Classify the argument of type TYPE and mode MODE.
6442 CLASSES will be filled by the register class used to pass each word
6443 of the operand. The number of words is returned. In case the parameter
6444 should be passed in memory, 0 is returned. As a special case for zero
6445 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6446
6447 BIT_OFFSET is used internally for handling records and specifies offset
6448 of the offset in bits modulo 512 to avoid overflow cases.
6449
6450 See the x86-64 PS ABI for details.
6451 */
6452
6453 static int
6454 classify_argument (enum machine_mode mode, const_tree type,
6455 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6456 {
6457 HOST_WIDE_INT bytes =
6458 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6459 int words
6460 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6461
6462 /* Variable sized entities are always passed/returned in memory. */
6463 if (bytes < 0)
6464 return 0;
6465
6466 if (mode != VOIDmode
6467 && targetm.calls.must_pass_in_stack (mode, type))
6468 return 0;
6469
6470 if (type && AGGREGATE_TYPE_P (type))
6471 {
6472 int i;
6473 tree field;
6474 enum x86_64_reg_class subclasses[MAX_CLASSES];
6475
6476 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6477 if (bytes > 64)
6478 return 0;
6479
6480 for (i = 0; i < words; i++)
6481 classes[i] = X86_64_NO_CLASS;
6482
6483 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6484 signalize memory class, so handle it as special case. */
6485 if (!words)
6486 {
6487 classes[0] = X86_64_NO_CLASS;
6488 return 1;
6489 }
6490
6491 /* Classify each field of record and merge classes. */
6492 switch (TREE_CODE (type))
6493 {
6494 case RECORD_TYPE:
6495 /* And now merge the fields of structure. */
6496 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6497 {
6498 if (TREE_CODE (field) == FIELD_DECL)
6499 {
6500 int num;
6501
6502 if (TREE_TYPE (field) == error_mark_node)
6503 continue;
6504
6505 /* Bitfields are always classified as integer. Handle them
6506 early, since later code would consider them to be
6507 misaligned integers. */
6508 if (DECL_BIT_FIELD (field))
6509 {
6510 for (i = (int_bit_position (field)
6511 + (bit_offset % 64)) / 8 / 8;
6512 i < ((int_bit_position (field) + (bit_offset % 64))
6513 + tree_to_shwi (DECL_SIZE (field))
6514 + 63) / 8 / 8; i++)
6515 classes[i] =
6516 merge_classes (X86_64_INTEGER_CLASS,
6517 classes[i]);
6518 }
6519 else
6520 {
6521 int pos;
6522
6523 type = TREE_TYPE (field);
6524
6525 /* Flexible array member is ignored. */
6526 if (TYPE_MODE (type) == BLKmode
6527 && TREE_CODE (type) == ARRAY_TYPE
6528 && TYPE_SIZE (type) == NULL_TREE
6529 && TYPE_DOMAIN (type) != NULL_TREE
6530 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6531 == NULL_TREE))
6532 {
6533 static bool warned;
6534
6535 if (!warned && warn_psabi)
6536 {
6537 warned = true;
6538 inform (input_location,
6539 "the ABI of passing struct with"
6540 " a flexible array member has"
6541 " changed in GCC 4.4");
6542 }
6543 continue;
6544 }
6545 num = classify_argument (TYPE_MODE (type), type,
6546 subclasses,
6547 (int_bit_position (field)
6548 + bit_offset) % 512);
6549 if (!num)
6550 return 0;
6551 pos = (int_bit_position (field)
6552 + (bit_offset % 64)) / 8 / 8;
6553 for (i = 0; i < num && (i + pos) < words; i++)
6554 classes[i + pos] =
6555 merge_classes (subclasses[i], classes[i + pos]);
6556 }
6557 }
6558 }
6559 break;
6560
6561 case ARRAY_TYPE:
6562 /* Arrays are handled as small records. */
6563 {
6564 int num;
6565 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6566 TREE_TYPE (type), subclasses, bit_offset);
6567 if (!num)
6568 return 0;
6569
6570 /* The partial classes are now full classes. */
6571 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6572 subclasses[0] = X86_64_SSE_CLASS;
6573 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6574 && !((bit_offset % 64) == 0 && bytes == 4))
6575 subclasses[0] = X86_64_INTEGER_CLASS;
6576
6577 for (i = 0; i < words; i++)
6578 classes[i] = subclasses[i % num];
6579
6580 break;
6581 }
6582 case UNION_TYPE:
6583 case QUAL_UNION_TYPE:
6584 /* Unions are similar to RECORD_TYPE but offset is always 0.
6585 */
6586 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6587 {
6588 if (TREE_CODE (field) == FIELD_DECL)
6589 {
6590 int num;
6591
6592 if (TREE_TYPE (field) == error_mark_node)
6593 continue;
6594
6595 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6596 TREE_TYPE (field), subclasses,
6597 bit_offset);
6598 if (!num)
6599 return 0;
6600 for (i = 0; i < num && i < words; i++)
6601 classes[i] = merge_classes (subclasses[i], classes[i]);
6602 }
6603 }
6604 break;
6605
6606 default:
6607 gcc_unreachable ();
6608 }
6609
6610 if (words > 2)
6611 {
6612 /* When size > 16 bytes, if the first one isn't
6613 X86_64_SSE_CLASS or any other ones aren't
6614 X86_64_SSEUP_CLASS, everything should be passed in
6615 memory. */
6616 if (classes[0] != X86_64_SSE_CLASS)
6617 return 0;
6618
6619 for (i = 1; i < words; i++)
6620 if (classes[i] != X86_64_SSEUP_CLASS)
6621 return 0;
6622 }
6623
6624 /* Final merger cleanup. */
6625 for (i = 0; i < words; i++)
6626 {
6627 /* If one class is MEMORY, everything should be passed in
6628 memory. */
6629 if (classes[i] == X86_64_MEMORY_CLASS)
6630 return 0;
6631
6632 /* The X86_64_SSEUP_CLASS should be always preceded by
6633 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6634 if (classes[i] == X86_64_SSEUP_CLASS
6635 && classes[i - 1] != X86_64_SSE_CLASS
6636 && classes[i - 1] != X86_64_SSEUP_CLASS)
6637 {
6638 /* The first one should never be X86_64_SSEUP_CLASS. */
6639 gcc_assert (i != 0);
6640 classes[i] = X86_64_SSE_CLASS;
6641 }
6642
6643 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6644 everything should be passed in memory. */
6645 if (classes[i] == X86_64_X87UP_CLASS
6646 && (classes[i - 1] != X86_64_X87_CLASS))
6647 {
6648 static bool warned;
6649
6650 /* The first one should never be X86_64_X87UP_CLASS. */
6651 gcc_assert (i != 0);
6652 if (!warned && warn_psabi)
6653 {
6654 warned = true;
6655 inform (input_location,
6656 "the ABI of passing union with long double"
6657 " has changed in GCC 4.4");
6658 }
6659 return 0;
6660 }
6661 }
6662 return words;
6663 }
6664
6665 /* Compute alignment needed. We align all types to natural boundaries with
6666 exception of XFmode that is aligned to 64bits. */
6667 if (mode != VOIDmode && mode != BLKmode)
6668 {
6669 int mode_alignment = GET_MODE_BITSIZE (mode);
6670
6671 if (mode == XFmode)
6672 mode_alignment = 128;
6673 else if (mode == XCmode)
6674 mode_alignment = 256;
6675 if (COMPLEX_MODE_P (mode))
6676 mode_alignment /= 2;
6677 /* Misaligned fields are always returned in memory. */
6678 if (bit_offset % mode_alignment)
6679 return 0;
6680 }
6681
6682 /* for V1xx modes, just use the base mode */
6683 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6684 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6685 mode = GET_MODE_INNER (mode);
6686
6687 /* Classification of atomic types. */
6688 switch (mode)
6689 {
6690 case SDmode:
6691 case DDmode:
6692 classes[0] = X86_64_SSE_CLASS;
6693 return 1;
6694 case TDmode:
6695 classes[0] = X86_64_SSE_CLASS;
6696 classes[1] = X86_64_SSEUP_CLASS;
6697 return 2;
6698 case DImode:
6699 case SImode:
6700 case HImode:
6701 case QImode:
6702 case CSImode:
6703 case CHImode:
6704 case CQImode:
6705 {
6706 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6707
6708 /* Analyze last 128 bits only. */
6709 size = (size - 1) & 0x7f;
6710
6711 if (size < 32)
6712 {
6713 classes[0] = X86_64_INTEGERSI_CLASS;
6714 return 1;
6715 }
6716 else if (size < 64)
6717 {
6718 classes[0] = X86_64_INTEGER_CLASS;
6719 return 1;
6720 }
6721 else if (size < 64+32)
6722 {
6723 classes[0] = X86_64_INTEGER_CLASS;
6724 classes[1] = X86_64_INTEGERSI_CLASS;
6725 return 2;
6726 }
6727 else if (size < 64+64)
6728 {
6729 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6730 return 2;
6731 }
6732 else
6733 gcc_unreachable ();
6734 }
6735 case CDImode:
6736 case TImode:
6737 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6738 return 2;
6739 case COImode:
6740 case OImode:
6741 /* OImode shouldn't be used directly. */
6742 gcc_unreachable ();
6743 case CTImode:
6744 return 0;
6745 case SFmode:
6746 if (!(bit_offset % 64))
6747 classes[0] = X86_64_SSESF_CLASS;
6748 else
6749 classes[0] = X86_64_SSE_CLASS;
6750 return 1;
6751 case DFmode:
6752 classes[0] = X86_64_SSEDF_CLASS;
6753 return 1;
6754 case XFmode:
6755 classes[0] = X86_64_X87_CLASS;
6756 classes[1] = X86_64_X87UP_CLASS;
6757 return 2;
6758 case TFmode:
6759 classes[0] = X86_64_SSE_CLASS;
6760 classes[1] = X86_64_SSEUP_CLASS;
6761 return 2;
6762 case SCmode:
6763 classes[0] = X86_64_SSE_CLASS;
6764 if (!(bit_offset % 64))
6765 return 1;
6766 else
6767 {
6768 static bool warned;
6769
6770 if (!warned && warn_psabi)
6771 {
6772 warned = true;
6773 inform (input_location,
6774 "the ABI of passing structure with complex float"
6775 " member has changed in GCC 4.4");
6776 }
6777 classes[1] = X86_64_SSESF_CLASS;
6778 return 2;
6779 }
6780 case DCmode:
6781 classes[0] = X86_64_SSEDF_CLASS;
6782 classes[1] = X86_64_SSEDF_CLASS;
6783 return 2;
6784 case XCmode:
6785 classes[0] = X86_64_COMPLEX_X87_CLASS;
6786 return 1;
6787 case TCmode:
6788 /* This modes is larger than 16 bytes. */
6789 return 0;
6790 case V8SFmode:
6791 case V8SImode:
6792 case V32QImode:
6793 case V16HImode:
6794 case V4DFmode:
6795 case V4DImode:
6796 classes[0] = X86_64_SSE_CLASS;
6797 classes[1] = X86_64_SSEUP_CLASS;
6798 classes[2] = X86_64_SSEUP_CLASS;
6799 classes[3] = X86_64_SSEUP_CLASS;
6800 return 4;
6801 case V8DFmode:
6802 case V16SFmode:
6803 case V8DImode:
6804 case V16SImode:
6805 case V32HImode:
6806 case V64QImode:
6807 classes[0] = X86_64_SSE_CLASS;
6808 classes[1] = X86_64_SSEUP_CLASS;
6809 classes[2] = X86_64_SSEUP_CLASS;
6810 classes[3] = X86_64_SSEUP_CLASS;
6811 classes[4] = X86_64_SSEUP_CLASS;
6812 classes[5] = X86_64_SSEUP_CLASS;
6813 classes[6] = X86_64_SSEUP_CLASS;
6814 classes[7] = X86_64_SSEUP_CLASS;
6815 return 8;
6816 case V4SFmode:
6817 case V4SImode:
6818 case V16QImode:
6819 case V8HImode:
6820 case V2DFmode:
6821 case V2DImode:
6822 classes[0] = X86_64_SSE_CLASS;
6823 classes[1] = X86_64_SSEUP_CLASS;
6824 return 2;
6825 case V1TImode:
6826 case V1DImode:
6827 case V2SFmode:
6828 case V2SImode:
6829 case V4HImode:
6830 case V8QImode:
6831 classes[0] = X86_64_SSE_CLASS;
6832 return 1;
6833 case BLKmode:
6834 case VOIDmode:
6835 return 0;
6836 default:
6837 gcc_assert (VECTOR_MODE_P (mode));
6838
6839 if (bytes > 16)
6840 return 0;
6841
6842 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6843
6844 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6845 classes[0] = X86_64_INTEGERSI_CLASS;
6846 else
6847 classes[0] = X86_64_INTEGER_CLASS;
6848 classes[1] = X86_64_INTEGER_CLASS;
6849 return 1 + (bytes > 8);
6850 }
6851 }
6852
6853 /* Examine the argument and return set number of register required in each
6854 class. Return true iff parameter should be passed in memory. */
6855
6856 static bool
6857 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6858 int *int_nregs, int *sse_nregs)
6859 {
6860 enum x86_64_reg_class regclass[MAX_CLASSES];
6861 int n = classify_argument (mode, type, regclass, 0);
6862
6863 *int_nregs = 0;
6864 *sse_nregs = 0;
6865
6866 if (!n)
6867 return true;
6868 for (n--; n >= 0; n--)
6869 switch (regclass[n])
6870 {
6871 case X86_64_INTEGER_CLASS:
6872 case X86_64_INTEGERSI_CLASS:
6873 (*int_nregs)++;
6874 break;
6875 case X86_64_SSE_CLASS:
6876 case X86_64_SSESF_CLASS:
6877 case X86_64_SSEDF_CLASS:
6878 (*sse_nregs)++;
6879 break;
6880 case X86_64_NO_CLASS:
6881 case X86_64_SSEUP_CLASS:
6882 break;
6883 case X86_64_X87_CLASS:
6884 case X86_64_X87UP_CLASS:
6885 case X86_64_COMPLEX_X87_CLASS:
6886 if (!in_return)
6887 return true;
6888 break;
6889 case X86_64_MEMORY_CLASS:
6890 gcc_unreachable ();
6891 }
6892
6893 return false;
6894 }
6895
6896 /* Construct container for the argument used by GCC interface. See
6897 FUNCTION_ARG for the detailed description. */
6898
6899 static rtx
6900 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6901 const_tree type, int in_return, int nintregs, int nsseregs,
6902 const int *intreg, int sse_regno)
6903 {
6904 /* The following variables hold the static issued_error state. */
6905 static bool issued_sse_arg_error;
6906 static bool issued_sse_ret_error;
6907 static bool issued_x87_ret_error;
6908
6909 enum machine_mode tmpmode;
6910 int bytes =
6911 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6912 enum x86_64_reg_class regclass[MAX_CLASSES];
6913 int n;
6914 int i;
6915 int nexps = 0;
6916 int needed_sseregs, needed_intregs;
6917 rtx exp[MAX_CLASSES];
6918 rtx ret;
6919
6920 n = classify_argument (mode, type, regclass, 0);
6921 if (!n)
6922 return NULL;
6923 if (examine_argument (mode, type, in_return, &needed_intregs,
6924 &needed_sseregs))
6925 return NULL;
6926 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6927 return NULL;
6928
6929 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6930 some less clueful developer tries to use floating-point anyway. */
6931 if (needed_sseregs && !TARGET_SSE)
6932 {
6933 if (in_return)
6934 {
6935 if (!issued_sse_ret_error)
6936 {
6937 error ("SSE register return with SSE disabled");
6938 issued_sse_ret_error = true;
6939 }
6940 }
6941 else if (!issued_sse_arg_error)
6942 {
6943 error ("SSE register argument with SSE disabled");
6944 issued_sse_arg_error = true;
6945 }
6946 return NULL;
6947 }
6948
6949 /* Likewise, error if the ABI requires us to return values in the
6950 x87 registers and the user specified -mno-80387. */
6951 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6952 for (i = 0; i < n; i++)
6953 if (regclass[i] == X86_64_X87_CLASS
6954 || regclass[i] == X86_64_X87UP_CLASS
6955 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6956 {
6957 if (!issued_x87_ret_error)
6958 {
6959 error ("x87 register return with x87 disabled");
6960 issued_x87_ret_error = true;
6961 }
6962 return NULL;
6963 }
6964
6965 /* First construct simple cases. Avoid SCmode, since we want to use
6966 single register to pass this type. */
6967 if (n == 1 && mode != SCmode)
6968 switch (regclass[0])
6969 {
6970 case X86_64_INTEGER_CLASS:
6971 case X86_64_INTEGERSI_CLASS:
6972 return gen_rtx_REG (mode, intreg[0]);
6973 case X86_64_SSE_CLASS:
6974 case X86_64_SSESF_CLASS:
6975 case X86_64_SSEDF_CLASS:
6976 if (mode != BLKmode)
6977 return gen_reg_or_parallel (mode, orig_mode,
6978 SSE_REGNO (sse_regno));
6979 break;
6980 case X86_64_X87_CLASS:
6981 case X86_64_COMPLEX_X87_CLASS:
6982 return gen_rtx_REG (mode, FIRST_STACK_REG);
6983 case X86_64_NO_CLASS:
6984 /* Zero sized array, struct or class. */
6985 return NULL;
6986 default:
6987 gcc_unreachable ();
6988 }
6989 if (n == 2
6990 && regclass[0] == X86_64_SSE_CLASS
6991 && regclass[1] == X86_64_SSEUP_CLASS
6992 && mode != BLKmode)
6993 return gen_reg_or_parallel (mode, orig_mode,
6994 SSE_REGNO (sse_regno));
6995 if (n == 4
6996 && regclass[0] == X86_64_SSE_CLASS
6997 && regclass[1] == X86_64_SSEUP_CLASS
6998 && regclass[2] == X86_64_SSEUP_CLASS
6999 && regclass[3] == X86_64_SSEUP_CLASS
7000 && mode != BLKmode)
7001 return gen_reg_or_parallel (mode, orig_mode,
7002 SSE_REGNO (sse_regno));
7003 if (n == 8
7004 && regclass[0] == X86_64_SSE_CLASS
7005 && regclass[1] == X86_64_SSEUP_CLASS
7006 && regclass[2] == X86_64_SSEUP_CLASS
7007 && regclass[3] == X86_64_SSEUP_CLASS
7008 && regclass[4] == X86_64_SSEUP_CLASS
7009 && regclass[5] == X86_64_SSEUP_CLASS
7010 && regclass[6] == X86_64_SSEUP_CLASS
7011 && regclass[7] == X86_64_SSEUP_CLASS
7012 && mode != BLKmode)
7013 return gen_reg_or_parallel (mode, orig_mode,
7014 SSE_REGNO (sse_regno));
7015 if (n == 2
7016 && regclass[0] == X86_64_X87_CLASS
7017 && regclass[1] == X86_64_X87UP_CLASS)
7018 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7019
7020 if (n == 2
7021 && regclass[0] == X86_64_INTEGER_CLASS
7022 && regclass[1] == X86_64_INTEGER_CLASS
7023 && (mode == CDImode || mode == TImode)
7024 && intreg[0] + 1 == intreg[1])
7025 return gen_rtx_REG (mode, intreg[0]);
7026
7027 /* Otherwise figure out the entries of the PARALLEL. */
7028 for (i = 0; i < n; i++)
7029 {
7030 int pos;
7031
7032 switch (regclass[i])
7033 {
7034 case X86_64_NO_CLASS:
7035 break;
7036 case X86_64_INTEGER_CLASS:
7037 case X86_64_INTEGERSI_CLASS:
7038 /* Merge TImodes on aligned occasions here too. */
7039 if (i * 8 + 8 > bytes)
7040 tmpmode
7041 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7042 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7043 tmpmode = SImode;
7044 else
7045 tmpmode = DImode;
7046 /* We've requested 24 bytes we
7047 don't have mode for. Use DImode. */
7048 if (tmpmode == BLKmode)
7049 tmpmode = DImode;
7050 exp [nexps++]
7051 = gen_rtx_EXPR_LIST (VOIDmode,
7052 gen_rtx_REG (tmpmode, *intreg),
7053 GEN_INT (i*8));
7054 intreg++;
7055 break;
7056 case X86_64_SSESF_CLASS:
7057 exp [nexps++]
7058 = gen_rtx_EXPR_LIST (VOIDmode,
7059 gen_rtx_REG (SFmode,
7060 SSE_REGNO (sse_regno)),
7061 GEN_INT (i*8));
7062 sse_regno++;
7063 break;
7064 case X86_64_SSEDF_CLASS:
7065 exp [nexps++]
7066 = gen_rtx_EXPR_LIST (VOIDmode,
7067 gen_rtx_REG (DFmode,
7068 SSE_REGNO (sse_regno)),
7069 GEN_INT (i*8));
7070 sse_regno++;
7071 break;
7072 case X86_64_SSE_CLASS:
7073 pos = i;
7074 switch (n)
7075 {
7076 case 1:
7077 tmpmode = DImode;
7078 break;
7079 case 2:
7080 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7081 {
7082 tmpmode = TImode;
7083 i++;
7084 }
7085 else
7086 tmpmode = DImode;
7087 break;
7088 case 4:
7089 gcc_assert (i == 0
7090 && regclass[1] == X86_64_SSEUP_CLASS
7091 && regclass[2] == X86_64_SSEUP_CLASS
7092 && regclass[3] == X86_64_SSEUP_CLASS);
7093 tmpmode = OImode;
7094 i += 3;
7095 break;
7096 case 8:
7097 gcc_assert (i == 0
7098 && regclass[1] == X86_64_SSEUP_CLASS
7099 && regclass[2] == X86_64_SSEUP_CLASS
7100 && regclass[3] == X86_64_SSEUP_CLASS
7101 && regclass[4] == X86_64_SSEUP_CLASS
7102 && regclass[5] == X86_64_SSEUP_CLASS
7103 && regclass[6] == X86_64_SSEUP_CLASS
7104 && regclass[7] == X86_64_SSEUP_CLASS);
7105 tmpmode = XImode;
7106 i += 7;
7107 break;
7108 default:
7109 gcc_unreachable ();
7110 }
7111 exp [nexps++]
7112 = gen_rtx_EXPR_LIST (VOIDmode,
7113 gen_rtx_REG (tmpmode,
7114 SSE_REGNO (sse_regno)),
7115 GEN_INT (pos*8));
7116 sse_regno++;
7117 break;
7118 default:
7119 gcc_unreachable ();
7120 }
7121 }
7122
7123 /* Empty aligned struct, union or class. */
7124 if (nexps == 0)
7125 return NULL;
7126
7127 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7128 for (i = 0; i < nexps; i++)
7129 XVECEXP (ret, 0, i) = exp [i];
7130 return ret;
7131 }
7132
7133 /* Update the data in CUM to advance over an argument of mode MODE
7134 and data type TYPE. (TYPE is null for libcalls where that information
7135 may not be available.) */
7136
7137 static void
7138 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7139 const_tree type, HOST_WIDE_INT bytes,
7140 HOST_WIDE_INT words)
7141 {
7142 switch (mode)
7143 {
7144 default:
7145 break;
7146
7147 case BLKmode:
7148 if (bytes < 0)
7149 break;
7150 /* FALLTHRU */
7151
7152 case DImode:
7153 case SImode:
7154 case HImode:
7155 case QImode:
7156 cum->words += words;
7157 cum->nregs -= words;
7158 cum->regno += words;
7159
7160 if (cum->nregs <= 0)
7161 {
7162 cum->nregs = 0;
7163 cum->regno = 0;
7164 }
7165 break;
7166
7167 case OImode:
7168 /* OImode shouldn't be used directly. */
7169 gcc_unreachable ();
7170
7171 case DFmode:
7172 if (cum->float_in_sse < 2)
7173 break;
7174 case SFmode:
7175 if (cum->float_in_sse < 1)
7176 break;
7177 /* FALLTHRU */
7178
7179 case V8SFmode:
7180 case V8SImode:
7181 case V64QImode:
7182 case V32HImode:
7183 case V16SImode:
7184 case V8DImode:
7185 case V16SFmode:
7186 case V8DFmode:
7187 case V32QImode:
7188 case V16HImode:
7189 case V4DFmode:
7190 case V4DImode:
7191 case TImode:
7192 case V16QImode:
7193 case V8HImode:
7194 case V4SImode:
7195 case V2DImode:
7196 case V4SFmode:
7197 case V2DFmode:
7198 if (!type || !AGGREGATE_TYPE_P (type))
7199 {
7200 cum->sse_words += words;
7201 cum->sse_nregs -= 1;
7202 cum->sse_regno += 1;
7203 if (cum->sse_nregs <= 0)
7204 {
7205 cum->sse_nregs = 0;
7206 cum->sse_regno = 0;
7207 }
7208 }
7209 break;
7210
7211 case V8QImode:
7212 case V4HImode:
7213 case V2SImode:
7214 case V2SFmode:
7215 case V1TImode:
7216 case V1DImode:
7217 if (!type || !AGGREGATE_TYPE_P (type))
7218 {
7219 cum->mmx_words += words;
7220 cum->mmx_nregs -= 1;
7221 cum->mmx_regno += 1;
7222 if (cum->mmx_nregs <= 0)
7223 {
7224 cum->mmx_nregs = 0;
7225 cum->mmx_regno = 0;
7226 }
7227 }
7228 break;
7229 }
7230 }
7231
7232 static void
7233 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7234 const_tree type, HOST_WIDE_INT words, bool named)
7235 {
7236 int int_nregs, sse_nregs;
7237
7238 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7239 if (!named && (VALID_AVX512F_REG_MODE (mode)
7240 || VALID_AVX256_REG_MODE (mode)))
7241 return;
7242
7243 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7244 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7245 {
7246 cum->nregs -= int_nregs;
7247 cum->sse_nregs -= sse_nregs;
7248 cum->regno += int_nregs;
7249 cum->sse_regno += sse_nregs;
7250 }
7251 else
7252 {
7253 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7254 cum->words = (cum->words + align - 1) & ~(align - 1);
7255 cum->words += words;
7256 }
7257 }
7258
7259 static void
7260 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7261 HOST_WIDE_INT words)
7262 {
7263 /* Otherwise, this should be passed indirect. */
7264 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7265
7266 cum->words += words;
7267 if (cum->nregs > 0)
7268 {
7269 cum->nregs -= 1;
7270 cum->regno += 1;
7271 }
7272 }
7273
7274 /* Update the data in CUM to advance over an argument of mode MODE and
7275 data type TYPE. (TYPE is null for libcalls where that information
7276 may not be available.) */
7277
7278 static void
7279 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7280 const_tree type, bool named)
7281 {
7282 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7283 HOST_WIDE_INT bytes, words;
7284
7285 if (mode == BLKmode)
7286 bytes = int_size_in_bytes (type);
7287 else
7288 bytes = GET_MODE_SIZE (mode);
7289 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7290
7291 if (type)
7292 mode = type_natural_mode (type, NULL, false);
7293
7294 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7295 function_arg_advance_ms_64 (cum, bytes, words);
7296 else if (TARGET_64BIT)
7297 function_arg_advance_64 (cum, mode, type, words, named);
7298 else
7299 function_arg_advance_32 (cum, mode, type, bytes, words);
7300 }
7301
7302 /* Define where to put the arguments to a function.
7303 Value is zero to push the argument on the stack,
7304 or a hard register in which to store the argument.
7305
7306 MODE is the argument's machine mode.
7307 TYPE is the data type of the argument (as a tree).
7308 This is null for libcalls where that information may
7309 not be available.
7310 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7311 the preceding args and about the function being called.
7312 NAMED is nonzero if this argument is a named parameter
7313 (otherwise it is an extra parameter matching an ellipsis). */
7314
7315 static rtx
7316 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7317 enum machine_mode orig_mode, const_tree type,
7318 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7319 {
7320 /* Avoid the AL settings for the Unix64 ABI. */
7321 if (mode == VOIDmode)
7322 return constm1_rtx;
7323
7324 switch (mode)
7325 {
7326 default:
7327 break;
7328
7329 case BLKmode:
7330 if (bytes < 0)
7331 break;
7332 /* FALLTHRU */
7333 case DImode:
7334 case SImode:
7335 case HImode:
7336 case QImode:
7337 if (words <= cum->nregs)
7338 {
7339 int regno = cum->regno;
7340
7341 /* Fastcall allocates the first two DWORD (SImode) or
7342 smaller arguments to ECX and EDX if it isn't an
7343 aggregate type . */
7344 if (cum->fastcall)
7345 {
7346 if (mode == BLKmode
7347 || mode == DImode
7348 || (type && AGGREGATE_TYPE_P (type)))
7349 break;
7350
7351 /* ECX not EAX is the first allocated register. */
7352 if (regno == AX_REG)
7353 regno = CX_REG;
7354 }
7355 return gen_rtx_REG (mode, regno);
7356 }
7357 break;
7358
7359 case DFmode:
7360 if (cum->float_in_sse < 2)
7361 break;
7362 case SFmode:
7363 if (cum->float_in_sse < 1)
7364 break;
7365 /* FALLTHRU */
7366 case TImode:
7367 /* In 32bit, we pass TImode in xmm registers. */
7368 case V16QImode:
7369 case V8HImode:
7370 case V4SImode:
7371 case V2DImode:
7372 case V4SFmode:
7373 case V2DFmode:
7374 if (!type || !AGGREGATE_TYPE_P (type))
7375 {
7376 if (cum->sse_nregs)
7377 return gen_reg_or_parallel (mode, orig_mode,
7378 cum->sse_regno + FIRST_SSE_REG);
7379 }
7380 break;
7381
7382 case OImode:
7383 case XImode:
7384 /* OImode and XImode shouldn't be used directly. */
7385 gcc_unreachable ();
7386
7387 case V64QImode:
7388 case V32HImode:
7389 case V16SImode:
7390 case V8DImode:
7391 case V16SFmode:
7392 case V8DFmode:
7393 case V8SFmode:
7394 case V8SImode:
7395 case V32QImode:
7396 case V16HImode:
7397 case V4DFmode:
7398 case V4DImode:
7399 if (!type || !AGGREGATE_TYPE_P (type))
7400 {
7401 if (cum->sse_nregs)
7402 return gen_reg_or_parallel (mode, orig_mode,
7403 cum->sse_regno + FIRST_SSE_REG);
7404 }
7405 break;
7406
7407 case V8QImode:
7408 case V4HImode:
7409 case V2SImode:
7410 case V2SFmode:
7411 case V1TImode:
7412 case V1DImode:
7413 if (!type || !AGGREGATE_TYPE_P (type))
7414 {
7415 if (cum->mmx_nregs)
7416 return gen_reg_or_parallel (mode, orig_mode,
7417 cum->mmx_regno + FIRST_MMX_REG);
7418 }
7419 break;
7420 }
7421
7422 return NULL_RTX;
7423 }
7424
7425 static rtx
7426 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7427 enum machine_mode orig_mode, const_tree type, bool named)
7428 {
7429 /* Handle a hidden AL argument containing number of registers
7430 for varargs x86-64 functions. */
7431 if (mode == VOIDmode)
7432 return GEN_INT (cum->maybe_vaarg
7433 ? (cum->sse_nregs < 0
7434 ? X86_64_SSE_REGPARM_MAX
7435 : cum->sse_regno)
7436 : -1);
7437
7438 switch (mode)
7439 {
7440 default:
7441 break;
7442
7443 case V8SFmode:
7444 case V8SImode:
7445 case V32QImode:
7446 case V16HImode:
7447 case V4DFmode:
7448 case V4DImode:
7449 case V16SFmode:
7450 case V16SImode:
7451 case V64QImode:
7452 case V32HImode:
7453 case V8DFmode:
7454 case V8DImode:
7455 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7456 if (!named)
7457 return NULL;
7458 break;
7459 }
7460
7461 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7462 cum->sse_nregs,
7463 &x86_64_int_parameter_registers [cum->regno],
7464 cum->sse_regno);
7465 }
7466
7467 static rtx
7468 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7469 enum machine_mode orig_mode, bool named,
7470 HOST_WIDE_INT bytes)
7471 {
7472 unsigned int regno;
7473
7474 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7475 We use value of -2 to specify that current function call is MSABI. */
7476 if (mode == VOIDmode)
7477 return GEN_INT (-2);
7478
7479 /* If we've run out of registers, it goes on the stack. */
7480 if (cum->nregs == 0)
7481 return NULL_RTX;
7482
7483 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7484
7485 /* Only floating point modes are passed in anything but integer regs. */
7486 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7487 {
7488 if (named)
7489 regno = cum->regno + FIRST_SSE_REG;
7490 else
7491 {
7492 rtx t1, t2;
7493
7494 /* Unnamed floating parameters are passed in both the
7495 SSE and integer registers. */
7496 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7497 t2 = gen_rtx_REG (mode, regno);
7498 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7499 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7500 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7501 }
7502 }
7503 /* Handle aggregated types passed in register. */
7504 if (orig_mode == BLKmode)
7505 {
7506 if (bytes > 0 && bytes <= 8)
7507 mode = (bytes > 4 ? DImode : SImode);
7508 if (mode == BLKmode)
7509 mode = DImode;
7510 }
7511
7512 return gen_reg_or_parallel (mode, orig_mode, regno);
7513 }
7514
7515 /* Return where to put the arguments to a function.
7516 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7517
7518 MODE is the argument's machine mode. TYPE is the data type of the
7519 argument. It is null for libcalls where that information may not be
7520 available. CUM gives information about the preceding args and about
7521 the function being called. NAMED is nonzero if this argument is a
7522 named parameter (otherwise it is an extra parameter matching an
7523 ellipsis). */
7524
7525 static rtx
7526 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7527 const_tree type, bool named)
7528 {
7529 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7530 enum machine_mode mode = omode;
7531 HOST_WIDE_INT bytes, words;
7532 rtx arg;
7533
7534 if (mode == BLKmode)
7535 bytes = int_size_in_bytes (type);
7536 else
7537 bytes = GET_MODE_SIZE (mode);
7538 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7539
7540 /* To simplify the code below, represent vector types with a vector mode
7541 even if MMX/SSE are not active. */
7542 if (type && TREE_CODE (type) == VECTOR_TYPE)
7543 mode = type_natural_mode (type, cum, false);
7544
7545 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7546 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7547 else if (TARGET_64BIT)
7548 arg = function_arg_64 (cum, mode, omode, type, named);
7549 else
7550 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7551
7552 return arg;
7553 }
7554
7555 /* A C expression that indicates when an argument must be passed by
7556 reference. If nonzero for an argument, a copy of that argument is
7557 made in memory and a pointer to the argument is passed instead of
7558 the argument itself. The pointer is passed in whatever way is
7559 appropriate for passing a pointer to that type. */
7560
7561 static bool
7562 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7563 const_tree type, bool)
7564 {
7565 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7566
7567 /* See Windows x64 Software Convention. */
7568 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7569 {
7570 int msize = (int) GET_MODE_SIZE (mode);
7571 if (type)
7572 {
7573 /* Arrays are passed by reference. */
7574 if (TREE_CODE (type) == ARRAY_TYPE)
7575 return true;
7576
7577 if (AGGREGATE_TYPE_P (type))
7578 {
7579 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7580 are passed by reference. */
7581 msize = int_size_in_bytes (type);
7582 }
7583 }
7584
7585 /* __m128 is passed by reference. */
7586 switch (msize) {
7587 case 1: case 2: case 4: case 8:
7588 break;
7589 default:
7590 return true;
7591 }
7592 }
7593 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7594 return 1;
7595
7596 return 0;
7597 }
7598
7599 /* Return true when TYPE should be 128bit aligned for 32bit argument
7600 passing ABI. XXX: This function is obsolete and is only used for
7601 checking psABI compatibility with previous versions of GCC. */
7602
7603 static bool
7604 ix86_compat_aligned_value_p (const_tree type)
7605 {
7606 enum machine_mode mode = TYPE_MODE (type);
7607 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7608 || mode == TDmode
7609 || mode == TFmode
7610 || mode == TCmode)
7611 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7612 return true;
7613 if (TYPE_ALIGN (type) < 128)
7614 return false;
7615
7616 if (AGGREGATE_TYPE_P (type))
7617 {
7618 /* Walk the aggregates recursively. */
7619 switch (TREE_CODE (type))
7620 {
7621 case RECORD_TYPE:
7622 case UNION_TYPE:
7623 case QUAL_UNION_TYPE:
7624 {
7625 tree field;
7626
7627 /* Walk all the structure fields. */
7628 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7629 {
7630 if (TREE_CODE (field) == FIELD_DECL
7631 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7632 return true;
7633 }
7634 break;
7635 }
7636
7637 case ARRAY_TYPE:
7638 /* Just for use if some languages passes arrays by value. */
7639 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7640 return true;
7641 break;
7642
7643 default:
7644 gcc_unreachable ();
7645 }
7646 }
7647 return false;
7648 }
7649
7650 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7651 XXX: This function is obsolete and is only used for checking psABI
7652 compatibility with previous versions of GCC. */
7653
7654 static unsigned int
7655 ix86_compat_function_arg_boundary (enum machine_mode mode,
7656 const_tree type, unsigned int align)
7657 {
7658 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7659 natural boundaries. */
7660 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7661 {
7662 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7663 make an exception for SSE modes since these require 128bit
7664 alignment.
7665
7666 The handling here differs from field_alignment. ICC aligns MMX
7667 arguments to 4 byte boundaries, while structure fields are aligned
7668 to 8 byte boundaries. */
7669 if (!type)
7670 {
7671 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7672 align = PARM_BOUNDARY;
7673 }
7674 else
7675 {
7676 if (!ix86_compat_aligned_value_p (type))
7677 align = PARM_BOUNDARY;
7678 }
7679 }
7680 if (align > BIGGEST_ALIGNMENT)
7681 align = BIGGEST_ALIGNMENT;
7682 return align;
7683 }
7684
7685 /* Return true when TYPE should be 128bit aligned for 32bit argument
7686 passing ABI. */
7687
7688 static bool
7689 ix86_contains_aligned_value_p (const_tree type)
7690 {
7691 enum machine_mode mode = TYPE_MODE (type);
7692
7693 if (mode == XFmode || mode == XCmode)
7694 return false;
7695
7696 if (TYPE_ALIGN (type) < 128)
7697 return false;
7698
7699 if (AGGREGATE_TYPE_P (type))
7700 {
7701 /* Walk the aggregates recursively. */
7702 switch (TREE_CODE (type))
7703 {
7704 case RECORD_TYPE:
7705 case UNION_TYPE:
7706 case QUAL_UNION_TYPE:
7707 {
7708 tree field;
7709
7710 /* Walk all the structure fields. */
7711 for (field = TYPE_FIELDS (type);
7712 field;
7713 field = DECL_CHAIN (field))
7714 {
7715 if (TREE_CODE (field) == FIELD_DECL
7716 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7717 return true;
7718 }
7719 break;
7720 }
7721
7722 case ARRAY_TYPE:
7723 /* Just for use if some languages passes arrays by value. */
7724 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7725 return true;
7726 break;
7727
7728 default:
7729 gcc_unreachable ();
7730 }
7731 }
7732 else
7733 return TYPE_ALIGN (type) >= 128;
7734
7735 return false;
7736 }
7737
7738 /* Gives the alignment boundary, in bits, of an argument with the
7739 specified mode and type. */
7740
7741 static unsigned int
7742 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7743 {
7744 unsigned int align;
7745 if (type)
7746 {
7747 /* Since the main variant type is used for call, we convert it to
7748 the main variant type. */
7749 type = TYPE_MAIN_VARIANT (type);
7750 align = TYPE_ALIGN (type);
7751 }
7752 else
7753 align = GET_MODE_ALIGNMENT (mode);
7754 if (align < PARM_BOUNDARY)
7755 align = PARM_BOUNDARY;
7756 else
7757 {
7758 static bool warned;
7759 unsigned int saved_align = align;
7760
7761 if (!TARGET_64BIT)
7762 {
7763 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7764 if (!type)
7765 {
7766 if (mode == XFmode || mode == XCmode)
7767 align = PARM_BOUNDARY;
7768 }
7769 else if (!ix86_contains_aligned_value_p (type))
7770 align = PARM_BOUNDARY;
7771
7772 if (align < 128)
7773 align = PARM_BOUNDARY;
7774 }
7775
7776 if (warn_psabi
7777 && !warned
7778 && align != ix86_compat_function_arg_boundary (mode, type,
7779 saved_align))
7780 {
7781 warned = true;
7782 inform (input_location,
7783 "The ABI for passing parameters with %d-byte"
7784 " alignment has changed in GCC 4.6",
7785 align / BITS_PER_UNIT);
7786 }
7787 }
7788
7789 return align;
7790 }
7791
7792 /* Return true if N is a possible register number of function value. */
7793
7794 static bool
7795 ix86_function_value_regno_p (const unsigned int regno)
7796 {
7797 switch (regno)
7798 {
7799 case AX_REG:
7800 return true;
7801 case DX_REG:
7802 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7803 case DI_REG:
7804 case SI_REG:
7805 return TARGET_64BIT && ix86_abi != MS_ABI;
7806
7807 /* Complex values are returned in %st(0)/%st(1) pair. */
7808 case ST0_REG:
7809 case ST1_REG:
7810 /* TODO: The function should depend on current function ABI but
7811 builtins.c would need updating then. Therefore we use the
7812 default ABI. */
7813 if (TARGET_64BIT && ix86_abi == MS_ABI)
7814 return false;
7815 return TARGET_FLOAT_RETURNS_IN_80387;
7816
7817 /* Complex values are returned in %xmm0/%xmm1 pair. */
7818 case XMM0_REG:
7819 case XMM1_REG:
7820 return TARGET_SSE;
7821
7822 case MM0_REG:
7823 if (TARGET_MACHO || TARGET_64BIT)
7824 return false;
7825 return TARGET_MMX;
7826 }
7827
7828 return false;
7829 }
7830
7831 /* Define how to find the value returned by a function.
7832 VALTYPE is the data type of the value (as a tree).
7833 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7834 otherwise, FUNC is 0. */
7835
7836 static rtx
7837 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7838 const_tree fntype, const_tree fn)
7839 {
7840 unsigned int regno;
7841
7842 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7843 we normally prevent this case when mmx is not available. However
7844 some ABIs may require the result to be returned like DImode. */
7845 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7846 regno = FIRST_MMX_REG;
7847
7848 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7849 we prevent this case when sse is not available. However some ABIs
7850 may require the result to be returned like integer TImode. */
7851 else if (mode == TImode
7852 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7853 regno = FIRST_SSE_REG;
7854
7855 /* 32-byte vector modes in %ymm0. */
7856 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7857 regno = FIRST_SSE_REG;
7858
7859 /* 64-byte vector modes in %zmm0. */
7860 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7861 regno = FIRST_SSE_REG;
7862
7863 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7864 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7865 regno = FIRST_FLOAT_REG;
7866 else
7867 /* Most things go in %eax. */
7868 regno = AX_REG;
7869
7870 /* Override FP return register with %xmm0 for local functions when
7871 SSE math is enabled or for functions with sseregparm attribute. */
7872 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7873 {
7874 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7875 if ((sse_level >= 1 && mode == SFmode)
7876 || (sse_level == 2 && mode == DFmode))
7877 regno = FIRST_SSE_REG;
7878 }
7879
7880 /* OImode shouldn't be used directly. */
7881 gcc_assert (mode != OImode);
7882
7883 return gen_rtx_REG (orig_mode, regno);
7884 }
7885
7886 static rtx
7887 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7888 const_tree valtype)
7889 {
7890 rtx ret;
7891
7892 /* Handle libcalls, which don't provide a type node. */
7893 if (valtype == NULL)
7894 {
7895 unsigned int regno;
7896
7897 switch (mode)
7898 {
7899 case SFmode:
7900 case SCmode:
7901 case DFmode:
7902 case DCmode:
7903 case TFmode:
7904 case SDmode:
7905 case DDmode:
7906 case TDmode:
7907 regno = FIRST_SSE_REG;
7908 break;
7909 case XFmode:
7910 case XCmode:
7911 regno = FIRST_FLOAT_REG;
7912 break;
7913 case TCmode:
7914 return NULL;
7915 default:
7916 regno = AX_REG;
7917 }
7918
7919 return gen_rtx_REG (mode, regno);
7920 }
7921 else if (POINTER_TYPE_P (valtype))
7922 {
7923 /* Pointers are always returned in word_mode. */
7924 mode = word_mode;
7925 }
7926
7927 ret = construct_container (mode, orig_mode, valtype, 1,
7928 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7929 x86_64_int_return_registers, 0);
7930
7931 /* For zero sized structures, construct_container returns NULL, but we
7932 need to keep rest of compiler happy by returning meaningful value. */
7933 if (!ret)
7934 ret = gen_rtx_REG (orig_mode, AX_REG);
7935
7936 return ret;
7937 }
7938
7939 static rtx
7940 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7941 const_tree valtype)
7942 {
7943 unsigned int regno = AX_REG;
7944
7945 if (TARGET_SSE)
7946 {
7947 switch (GET_MODE_SIZE (mode))
7948 {
7949 case 16:
7950 if (valtype != NULL_TREE
7951 && !VECTOR_INTEGER_TYPE_P (valtype)
7952 && !VECTOR_INTEGER_TYPE_P (valtype)
7953 && !INTEGRAL_TYPE_P (valtype)
7954 && !VECTOR_FLOAT_TYPE_P (valtype))
7955 break;
7956 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7957 && !COMPLEX_MODE_P (mode))
7958 regno = FIRST_SSE_REG;
7959 break;
7960 case 8:
7961 case 4:
7962 if (mode == SFmode || mode == DFmode)
7963 regno = FIRST_SSE_REG;
7964 break;
7965 default:
7966 break;
7967 }
7968 }
7969 return gen_rtx_REG (orig_mode, regno);
7970 }
7971
7972 static rtx
7973 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7974 enum machine_mode orig_mode, enum machine_mode mode)
7975 {
7976 const_tree fn, fntype;
7977
7978 fn = NULL_TREE;
7979 if (fntype_or_decl && DECL_P (fntype_or_decl))
7980 fn = fntype_or_decl;
7981 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7982
7983 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7984 return function_value_ms_64 (orig_mode, mode, valtype);
7985 else if (TARGET_64BIT)
7986 return function_value_64 (orig_mode, mode, valtype);
7987 else
7988 return function_value_32 (orig_mode, mode, fntype, fn);
7989 }
7990
7991 static rtx
7992 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
7993 {
7994 enum machine_mode mode, orig_mode;
7995
7996 orig_mode = TYPE_MODE (valtype);
7997 mode = type_natural_mode (valtype, NULL, true);
7998 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7999 }
8000
8001 /* Pointer function arguments and return values are promoted to
8002 word_mode. */
8003
8004 static enum machine_mode
8005 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8006 int *punsignedp, const_tree fntype,
8007 int for_return)
8008 {
8009 if (type != NULL_TREE && POINTER_TYPE_P (type))
8010 {
8011 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8012 return word_mode;
8013 }
8014 return default_promote_function_mode (type, mode, punsignedp, fntype,
8015 for_return);
8016 }
8017
8018 /* Return true if a structure, union or array with MODE containing FIELD
8019 should be accessed using BLKmode. */
8020
8021 static bool
8022 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8023 {
8024 /* Union with XFmode must be in BLKmode. */
8025 return (mode == XFmode
8026 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8027 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8028 }
8029
8030 rtx
8031 ix86_libcall_value (enum machine_mode mode)
8032 {
8033 return ix86_function_value_1 (NULL, NULL, mode, mode);
8034 }
8035
8036 /* Return true iff type is returned in memory. */
8037
8038 static bool
8039 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8040 {
8041 #ifdef SUBTARGET_RETURN_IN_MEMORY
8042 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8043 #else
8044 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8045 HOST_WIDE_INT size;
8046
8047 if (TARGET_64BIT)
8048 {
8049 if (ix86_function_type_abi (fntype) == MS_ABI)
8050 {
8051 size = int_size_in_bytes (type);
8052
8053 /* __m128 is returned in xmm0. */
8054 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8055 || INTEGRAL_TYPE_P (type)
8056 || VECTOR_FLOAT_TYPE_P (type))
8057 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8058 && !COMPLEX_MODE_P (mode)
8059 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8060 return false;
8061
8062 /* Otherwise, the size must be exactly in [1248]. */
8063 return size != 1 && size != 2 && size != 4 && size != 8;
8064 }
8065 else
8066 {
8067 int needed_intregs, needed_sseregs;
8068
8069 return examine_argument (mode, type, 1,
8070 &needed_intregs, &needed_sseregs);
8071 }
8072 }
8073 else
8074 {
8075 if (mode == BLKmode)
8076 return true;
8077
8078 size = int_size_in_bytes (type);
8079
8080 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8081 return false;
8082
8083 if (VECTOR_MODE_P (mode) || mode == TImode)
8084 {
8085 /* User-created vectors small enough to fit in EAX. */
8086 if (size < 8)
8087 return false;
8088
8089 /* Unless ABI prescibes otherwise,
8090 MMX/3dNow values are returned in MM0 if available. */
8091
8092 if (size == 8)
8093 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8094
8095 /* SSE values are returned in XMM0 if available. */
8096 if (size == 16)
8097 return !TARGET_SSE;
8098
8099 /* AVX values are returned in YMM0 if available. */
8100 if (size == 32)
8101 return !TARGET_AVX;
8102
8103 /* AVX512F values are returned in ZMM0 if available. */
8104 if (size == 64)
8105 return !TARGET_AVX512F;
8106 }
8107
8108 if (mode == XFmode)
8109 return false;
8110
8111 if (size > 12)
8112 return true;
8113
8114 /* OImode shouldn't be used directly. */
8115 gcc_assert (mode != OImode);
8116
8117 return false;
8118 }
8119 #endif
8120 }
8121
8122 \f
8123 /* Create the va_list data type. */
8124
8125 /* Returns the calling convention specific va_list date type.
8126 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8127
8128 static tree
8129 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8130 {
8131 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8132
8133 /* For i386 we use plain pointer to argument area. */
8134 if (!TARGET_64BIT || abi == MS_ABI)
8135 return build_pointer_type (char_type_node);
8136
8137 record = lang_hooks.types.make_type (RECORD_TYPE);
8138 type_decl = build_decl (BUILTINS_LOCATION,
8139 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8140
8141 f_gpr = build_decl (BUILTINS_LOCATION,
8142 FIELD_DECL, get_identifier ("gp_offset"),
8143 unsigned_type_node);
8144 f_fpr = build_decl (BUILTINS_LOCATION,
8145 FIELD_DECL, get_identifier ("fp_offset"),
8146 unsigned_type_node);
8147 f_ovf = build_decl (BUILTINS_LOCATION,
8148 FIELD_DECL, get_identifier ("overflow_arg_area"),
8149 ptr_type_node);
8150 f_sav = build_decl (BUILTINS_LOCATION,
8151 FIELD_DECL, get_identifier ("reg_save_area"),
8152 ptr_type_node);
8153
8154 va_list_gpr_counter_field = f_gpr;
8155 va_list_fpr_counter_field = f_fpr;
8156
8157 DECL_FIELD_CONTEXT (f_gpr) = record;
8158 DECL_FIELD_CONTEXT (f_fpr) = record;
8159 DECL_FIELD_CONTEXT (f_ovf) = record;
8160 DECL_FIELD_CONTEXT (f_sav) = record;
8161
8162 TYPE_STUB_DECL (record) = type_decl;
8163 TYPE_NAME (record) = type_decl;
8164 TYPE_FIELDS (record) = f_gpr;
8165 DECL_CHAIN (f_gpr) = f_fpr;
8166 DECL_CHAIN (f_fpr) = f_ovf;
8167 DECL_CHAIN (f_ovf) = f_sav;
8168
8169 layout_type (record);
8170
8171 /* The correct type is an array type of one element. */
8172 return build_array_type (record, build_index_type (size_zero_node));
8173 }
8174
8175 /* Setup the builtin va_list data type and for 64-bit the additional
8176 calling convention specific va_list data types. */
8177
8178 static tree
8179 ix86_build_builtin_va_list (void)
8180 {
8181 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8182
8183 /* Initialize abi specific va_list builtin types. */
8184 if (TARGET_64BIT)
8185 {
8186 tree t;
8187 if (ix86_abi == MS_ABI)
8188 {
8189 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8190 if (TREE_CODE (t) != RECORD_TYPE)
8191 t = build_variant_type_copy (t);
8192 sysv_va_list_type_node = t;
8193 }
8194 else
8195 {
8196 t = ret;
8197 if (TREE_CODE (t) != RECORD_TYPE)
8198 t = build_variant_type_copy (t);
8199 sysv_va_list_type_node = t;
8200 }
8201 if (ix86_abi != MS_ABI)
8202 {
8203 t = ix86_build_builtin_va_list_abi (MS_ABI);
8204 if (TREE_CODE (t) != RECORD_TYPE)
8205 t = build_variant_type_copy (t);
8206 ms_va_list_type_node = t;
8207 }
8208 else
8209 {
8210 t = ret;
8211 if (TREE_CODE (t) != RECORD_TYPE)
8212 t = build_variant_type_copy (t);
8213 ms_va_list_type_node = t;
8214 }
8215 }
8216
8217 return ret;
8218 }
8219
8220 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8221
8222 static void
8223 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8224 {
8225 rtx save_area, mem;
8226 alias_set_type set;
8227 int i, max;
8228
8229 /* GPR size of varargs save area. */
8230 if (cfun->va_list_gpr_size)
8231 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8232 else
8233 ix86_varargs_gpr_size = 0;
8234
8235 /* FPR size of varargs save area. We don't need it if we don't pass
8236 anything in SSE registers. */
8237 if (TARGET_SSE && cfun->va_list_fpr_size)
8238 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8239 else
8240 ix86_varargs_fpr_size = 0;
8241
8242 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8243 return;
8244
8245 save_area = frame_pointer_rtx;
8246 set = get_varargs_alias_set ();
8247
8248 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8249 if (max > X86_64_REGPARM_MAX)
8250 max = X86_64_REGPARM_MAX;
8251
8252 for (i = cum->regno; i < max; i++)
8253 {
8254 mem = gen_rtx_MEM (word_mode,
8255 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8256 MEM_NOTRAP_P (mem) = 1;
8257 set_mem_alias_set (mem, set);
8258 emit_move_insn (mem,
8259 gen_rtx_REG (word_mode,
8260 x86_64_int_parameter_registers[i]));
8261 }
8262
8263 if (ix86_varargs_fpr_size)
8264 {
8265 enum machine_mode smode;
8266 rtx_code_label *label;
8267 rtx test;
8268
8269 /* Now emit code to save SSE registers. The AX parameter contains number
8270 of SSE parameter registers used to call this function, though all we
8271 actually check here is the zero/non-zero status. */
8272
8273 label = gen_label_rtx ();
8274 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8275 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8276 label));
8277
8278 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8279 we used movdqa (i.e. TImode) instead? Perhaps even better would
8280 be if we could determine the real mode of the data, via a hook
8281 into pass_stdarg. Ignore all that for now. */
8282 smode = V4SFmode;
8283 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8284 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8285
8286 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8287 if (max > X86_64_SSE_REGPARM_MAX)
8288 max = X86_64_SSE_REGPARM_MAX;
8289
8290 for (i = cum->sse_regno; i < max; ++i)
8291 {
8292 mem = plus_constant (Pmode, save_area,
8293 i * 16 + ix86_varargs_gpr_size);
8294 mem = gen_rtx_MEM (smode, mem);
8295 MEM_NOTRAP_P (mem) = 1;
8296 set_mem_alias_set (mem, set);
8297 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8298
8299 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8300 }
8301
8302 emit_label (label);
8303 }
8304 }
8305
8306 static void
8307 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8308 {
8309 alias_set_type set = get_varargs_alias_set ();
8310 int i;
8311
8312 /* Reset to zero, as there might be a sysv vaarg used
8313 before. */
8314 ix86_varargs_gpr_size = 0;
8315 ix86_varargs_fpr_size = 0;
8316
8317 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8318 {
8319 rtx reg, mem;
8320
8321 mem = gen_rtx_MEM (Pmode,
8322 plus_constant (Pmode, virtual_incoming_args_rtx,
8323 i * UNITS_PER_WORD));
8324 MEM_NOTRAP_P (mem) = 1;
8325 set_mem_alias_set (mem, set);
8326
8327 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8328 emit_move_insn (mem, reg);
8329 }
8330 }
8331
8332 static void
8333 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8334 tree type, int *, int no_rtl)
8335 {
8336 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8337 CUMULATIVE_ARGS next_cum;
8338 tree fntype;
8339
8340 /* This argument doesn't appear to be used anymore. Which is good,
8341 because the old code here didn't suppress rtl generation. */
8342 gcc_assert (!no_rtl);
8343
8344 if (!TARGET_64BIT)
8345 return;
8346
8347 fntype = TREE_TYPE (current_function_decl);
8348
8349 /* For varargs, we do not want to skip the dummy va_dcl argument.
8350 For stdargs, we do want to skip the last named argument. */
8351 next_cum = *cum;
8352 if (stdarg_p (fntype))
8353 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8354 true);
8355
8356 if (cum->call_abi == MS_ABI)
8357 setup_incoming_varargs_ms_64 (&next_cum);
8358 else
8359 setup_incoming_varargs_64 (&next_cum);
8360 }
8361
8362 /* Checks if TYPE is of kind va_list char *. */
8363
8364 static bool
8365 is_va_list_char_pointer (tree type)
8366 {
8367 tree canonic;
8368
8369 /* For 32-bit it is always true. */
8370 if (!TARGET_64BIT)
8371 return true;
8372 canonic = ix86_canonical_va_list_type (type);
8373 return (canonic == ms_va_list_type_node
8374 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8375 }
8376
8377 /* Implement va_start. */
8378
8379 static void
8380 ix86_va_start (tree valist, rtx nextarg)
8381 {
8382 HOST_WIDE_INT words, n_gpr, n_fpr;
8383 tree f_gpr, f_fpr, f_ovf, f_sav;
8384 tree gpr, fpr, ovf, sav, t;
8385 tree type;
8386 rtx ovf_rtx;
8387
8388 if (flag_split_stack
8389 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8390 {
8391 unsigned int scratch_regno;
8392
8393 /* When we are splitting the stack, we can't refer to the stack
8394 arguments using internal_arg_pointer, because they may be on
8395 the old stack. The split stack prologue will arrange to
8396 leave a pointer to the old stack arguments in a scratch
8397 register, which we here copy to a pseudo-register. The split
8398 stack prologue can't set the pseudo-register directly because
8399 it (the prologue) runs before any registers have been saved. */
8400
8401 scratch_regno = split_stack_prologue_scratch_regno ();
8402 if (scratch_regno != INVALID_REGNUM)
8403 {
8404 rtx reg;
8405 rtx_insn *seq;
8406
8407 reg = gen_reg_rtx (Pmode);
8408 cfun->machine->split_stack_varargs_pointer = reg;
8409
8410 start_sequence ();
8411 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8412 seq = get_insns ();
8413 end_sequence ();
8414
8415 push_topmost_sequence ();
8416 emit_insn_after (seq, entry_of_function ());
8417 pop_topmost_sequence ();
8418 }
8419 }
8420
8421 /* Only 64bit target needs something special. */
8422 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8423 {
8424 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8425 std_expand_builtin_va_start (valist, nextarg);
8426 else
8427 {
8428 rtx va_r, next;
8429
8430 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8431 next = expand_binop (ptr_mode, add_optab,
8432 cfun->machine->split_stack_varargs_pointer,
8433 crtl->args.arg_offset_rtx,
8434 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8435 convert_move (va_r, next, 0);
8436 }
8437 return;
8438 }
8439
8440 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8441 f_fpr = DECL_CHAIN (f_gpr);
8442 f_ovf = DECL_CHAIN (f_fpr);
8443 f_sav = DECL_CHAIN (f_ovf);
8444
8445 valist = build_simple_mem_ref (valist);
8446 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8447 /* The following should be folded into the MEM_REF offset. */
8448 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8449 f_gpr, NULL_TREE);
8450 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8451 f_fpr, NULL_TREE);
8452 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8453 f_ovf, NULL_TREE);
8454 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8455 f_sav, NULL_TREE);
8456
8457 /* Count number of gp and fp argument registers used. */
8458 words = crtl->args.info.words;
8459 n_gpr = crtl->args.info.regno;
8460 n_fpr = crtl->args.info.sse_regno;
8461
8462 if (cfun->va_list_gpr_size)
8463 {
8464 type = TREE_TYPE (gpr);
8465 t = build2 (MODIFY_EXPR, type,
8466 gpr, build_int_cst (type, n_gpr * 8));
8467 TREE_SIDE_EFFECTS (t) = 1;
8468 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8469 }
8470
8471 if (TARGET_SSE && cfun->va_list_fpr_size)
8472 {
8473 type = TREE_TYPE (fpr);
8474 t = build2 (MODIFY_EXPR, type, fpr,
8475 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8476 TREE_SIDE_EFFECTS (t) = 1;
8477 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8478 }
8479
8480 /* Find the overflow area. */
8481 type = TREE_TYPE (ovf);
8482 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8483 ovf_rtx = crtl->args.internal_arg_pointer;
8484 else
8485 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8486 t = make_tree (type, ovf_rtx);
8487 if (words != 0)
8488 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8489 t = build2 (MODIFY_EXPR, type, ovf, t);
8490 TREE_SIDE_EFFECTS (t) = 1;
8491 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8492
8493 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8494 {
8495 /* Find the register save area.
8496 Prologue of the function save it right above stack frame. */
8497 type = TREE_TYPE (sav);
8498 t = make_tree (type, frame_pointer_rtx);
8499 if (!ix86_varargs_gpr_size)
8500 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8501 t = build2 (MODIFY_EXPR, type, sav, t);
8502 TREE_SIDE_EFFECTS (t) = 1;
8503 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8504 }
8505 }
8506
8507 /* Implement va_arg. */
8508
8509 static tree
8510 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8511 gimple_seq *post_p)
8512 {
8513 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8514 tree f_gpr, f_fpr, f_ovf, f_sav;
8515 tree gpr, fpr, ovf, sav, t;
8516 int size, rsize;
8517 tree lab_false, lab_over = NULL_TREE;
8518 tree addr, t2;
8519 rtx container;
8520 int indirect_p = 0;
8521 tree ptrtype;
8522 enum machine_mode nat_mode;
8523 unsigned int arg_boundary;
8524
8525 /* Only 64bit target needs something special. */
8526 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8527 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8528
8529 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8530 f_fpr = DECL_CHAIN (f_gpr);
8531 f_ovf = DECL_CHAIN (f_fpr);
8532 f_sav = DECL_CHAIN (f_ovf);
8533
8534 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8535 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8536 valist = build_va_arg_indirect_ref (valist);
8537 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8538 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8539 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8540
8541 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8542 if (indirect_p)
8543 type = build_pointer_type (type);
8544 size = int_size_in_bytes (type);
8545 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8546
8547 nat_mode = type_natural_mode (type, NULL, false);
8548 switch (nat_mode)
8549 {
8550 case V8SFmode:
8551 case V8SImode:
8552 case V32QImode:
8553 case V16HImode:
8554 case V4DFmode:
8555 case V4DImode:
8556 case V16SFmode:
8557 case V16SImode:
8558 case V64QImode:
8559 case V32HImode:
8560 case V8DFmode:
8561 case V8DImode:
8562 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8563 if (!TARGET_64BIT_MS_ABI)
8564 {
8565 container = NULL;
8566 break;
8567 }
8568
8569 default:
8570 container = construct_container (nat_mode, TYPE_MODE (type),
8571 type, 0, X86_64_REGPARM_MAX,
8572 X86_64_SSE_REGPARM_MAX, intreg,
8573 0);
8574 break;
8575 }
8576
8577 /* Pull the value out of the saved registers. */
8578
8579 addr = create_tmp_var (ptr_type_node, "addr");
8580
8581 if (container)
8582 {
8583 int needed_intregs, needed_sseregs;
8584 bool need_temp;
8585 tree int_addr, sse_addr;
8586
8587 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8588 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8589
8590 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8591
8592 need_temp = (!REG_P (container)
8593 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8594 || TYPE_ALIGN (type) > 128));
8595
8596 /* In case we are passing structure, verify that it is consecutive block
8597 on the register save area. If not we need to do moves. */
8598 if (!need_temp && !REG_P (container))
8599 {
8600 /* Verify that all registers are strictly consecutive */
8601 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8602 {
8603 int i;
8604
8605 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8606 {
8607 rtx slot = XVECEXP (container, 0, i);
8608 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8609 || INTVAL (XEXP (slot, 1)) != i * 16)
8610 need_temp = 1;
8611 }
8612 }
8613 else
8614 {
8615 int i;
8616
8617 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8618 {
8619 rtx slot = XVECEXP (container, 0, i);
8620 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8621 || INTVAL (XEXP (slot, 1)) != i * 8)
8622 need_temp = 1;
8623 }
8624 }
8625 }
8626 if (!need_temp)
8627 {
8628 int_addr = addr;
8629 sse_addr = addr;
8630 }
8631 else
8632 {
8633 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8634 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8635 }
8636
8637 /* First ensure that we fit completely in registers. */
8638 if (needed_intregs)
8639 {
8640 t = build_int_cst (TREE_TYPE (gpr),
8641 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8642 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8643 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8644 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8645 gimplify_and_add (t, pre_p);
8646 }
8647 if (needed_sseregs)
8648 {
8649 t = build_int_cst (TREE_TYPE (fpr),
8650 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8651 + X86_64_REGPARM_MAX * 8);
8652 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8653 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8654 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8655 gimplify_and_add (t, pre_p);
8656 }
8657
8658 /* Compute index to start of area used for integer regs. */
8659 if (needed_intregs)
8660 {
8661 /* int_addr = gpr + sav; */
8662 t = fold_build_pointer_plus (sav, gpr);
8663 gimplify_assign (int_addr, t, pre_p);
8664 }
8665 if (needed_sseregs)
8666 {
8667 /* sse_addr = fpr + sav; */
8668 t = fold_build_pointer_plus (sav, fpr);
8669 gimplify_assign (sse_addr, t, pre_p);
8670 }
8671 if (need_temp)
8672 {
8673 int i, prev_size = 0;
8674 tree temp = create_tmp_var (type, "va_arg_tmp");
8675
8676 /* addr = &temp; */
8677 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8678 gimplify_assign (addr, t, pre_p);
8679
8680 for (i = 0; i < XVECLEN (container, 0); i++)
8681 {
8682 rtx slot = XVECEXP (container, 0, i);
8683 rtx reg = XEXP (slot, 0);
8684 enum machine_mode mode = GET_MODE (reg);
8685 tree piece_type;
8686 tree addr_type;
8687 tree daddr_type;
8688 tree src_addr, src;
8689 int src_offset;
8690 tree dest_addr, dest;
8691 int cur_size = GET_MODE_SIZE (mode);
8692
8693 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8694 prev_size = INTVAL (XEXP (slot, 1));
8695 if (prev_size + cur_size > size)
8696 {
8697 cur_size = size - prev_size;
8698 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8699 if (mode == BLKmode)
8700 mode = QImode;
8701 }
8702 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8703 if (mode == GET_MODE (reg))
8704 addr_type = build_pointer_type (piece_type);
8705 else
8706 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8707 true);
8708 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8709 true);
8710
8711 if (SSE_REGNO_P (REGNO (reg)))
8712 {
8713 src_addr = sse_addr;
8714 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8715 }
8716 else
8717 {
8718 src_addr = int_addr;
8719 src_offset = REGNO (reg) * 8;
8720 }
8721 src_addr = fold_convert (addr_type, src_addr);
8722 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8723
8724 dest_addr = fold_convert (daddr_type, addr);
8725 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8726 if (cur_size == GET_MODE_SIZE (mode))
8727 {
8728 src = build_va_arg_indirect_ref (src_addr);
8729 dest = build_va_arg_indirect_ref (dest_addr);
8730
8731 gimplify_assign (dest, src, pre_p);
8732 }
8733 else
8734 {
8735 tree copy
8736 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8737 3, dest_addr, src_addr,
8738 size_int (cur_size));
8739 gimplify_and_add (copy, pre_p);
8740 }
8741 prev_size += cur_size;
8742 }
8743 }
8744
8745 if (needed_intregs)
8746 {
8747 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8748 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8749 gimplify_assign (gpr, t, pre_p);
8750 }
8751
8752 if (needed_sseregs)
8753 {
8754 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8755 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8756 gimplify_assign (fpr, t, pre_p);
8757 }
8758
8759 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8760
8761 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8762 }
8763
8764 /* ... otherwise out of the overflow area. */
8765
8766 /* When we align parameter on stack for caller, if the parameter
8767 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8768 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8769 here with caller. */
8770 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8771 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8772 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8773
8774 /* Care for on-stack alignment if needed. */
8775 if (arg_boundary <= 64 || size == 0)
8776 t = ovf;
8777 else
8778 {
8779 HOST_WIDE_INT align = arg_boundary / 8;
8780 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8781 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8782 build_int_cst (TREE_TYPE (t), -align));
8783 }
8784
8785 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8786 gimplify_assign (addr, t, pre_p);
8787
8788 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8789 gimplify_assign (unshare_expr (ovf), t, pre_p);
8790
8791 if (container)
8792 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8793
8794 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8795 addr = fold_convert (ptrtype, addr);
8796
8797 if (indirect_p)
8798 addr = build_va_arg_indirect_ref (addr);
8799 return build_va_arg_indirect_ref (addr);
8800 }
8801 \f
8802 /* Return true if OPNUM's MEM should be matched
8803 in movabs* patterns. */
8804
8805 bool
8806 ix86_check_movabs (rtx insn, int opnum)
8807 {
8808 rtx set, mem;
8809
8810 set = PATTERN (insn);
8811 if (GET_CODE (set) == PARALLEL)
8812 set = XVECEXP (set, 0, 0);
8813 gcc_assert (GET_CODE (set) == SET);
8814 mem = XEXP (set, opnum);
8815 while (GET_CODE (mem) == SUBREG)
8816 mem = SUBREG_REG (mem);
8817 gcc_assert (MEM_P (mem));
8818 return volatile_ok || !MEM_VOLATILE_P (mem);
8819 }
8820 \f
8821 /* Initialize the table of extra 80387 mathematical constants. */
8822
8823 static void
8824 init_ext_80387_constants (void)
8825 {
8826 static const char * cst[5] =
8827 {
8828 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8829 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8830 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8831 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8832 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8833 };
8834 int i;
8835
8836 for (i = 0; i < 5; i++)
8837 {
8838 real_from_string (&ext_80387_constants_table[i], cst[i]);
8839 /* Ensure each constant is rounded to XFmode precision. */
8840 real_convert (&ext_80387_constants_table[i],
8841 XFmode, &ext_80387_constants_table[i]);
8842 }
8843
8844 ext_80387_constants_init = 1;
8845 }
8846
8847 /* Return non-zero if the constant is something that
8848 can be loaded with a special instruction. */
8849
8850 int
8851 standard_80387_constant_p (rtx x)
8852 {
8853 enum machine_mode mode = GET_MODE (x);
8854
8855 REAL_VALUE_TYPE r;
8856
8857 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8858 return -1;
8859
8860 if (x == CONST0_RTX (mode))
8861 return 1;
8862 if (x == CONST1_RTX (mode))
8863 return 2;
8864
8865 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8866
8867 /* For XFmode constants, try to find a special 80387 instruction when
8868 optimizing for size or on those CPUs that benefit from them. */
8869 if (mode == XFmode
8870 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8871 {
8872 int i;
8873
8874 if (! ext_80387_constants_init)
8875 init_ext_80387_constants ();
8876
8877 for (i = 0; i < 5; i++)
8878 if (real_identical (&r, &ext_80387_constants_table[i]))
8879 return i + 3;
8880 }
8881
8882 /* Load of the constant -0.0 or -1.0 will be split as
8883 fldz;fchs or fld1;fchs sequence. */
8884 if (real_isnegzero (&r))
8885 return 8;
8886 if (real_identical (&r, &dconstm1))
8887 return 9;
8888
8889 return 0;
8890 }
8891
8892 /* Return the opcode of the special instruction to be used to load
8893 the constant X. */
8894
8895 const char *
8896 standard_80387_constant_opcode (rtx x)
8897 {
8898 switch (standard_80387_constant_p (x))
8899 {
8900 case 1:
8901 return "fldz";
8902 case 2:
8903 return "fld1";
8904 case 3:
8905 return "fldlg2";
8906 case 4:
8907 return "fldln2";
8908 case 5:
8909 return "fldl2e";
8910 case 6:
8911 return "fldl2t";
8912 case 7:
8913 return "fldpi";
8914 case 8:
8915 case 9:
8916 return "#";
8917 default:
8918 gcc_unreachable ();
8919 }
8920 }
8921
8922 /* Return the CONST_DOUBLE representing the 80387 constant that is
8923 loaded by the specified special instruction. The argument IDX
8924 matches the return value from standard_80387_constant_p. */
8925
8926 rtx
8927 standard_80387_constant_rtx (int idx)
8928 {
8929 int i;
8930
8931 if (! ext_80387_constants_init)
8932 init_ext_80387_constants ();
8933
8934 switch (idx)
8935 {
8936 case 3:
8937 case 4:
8938 case 5:
8939 case 6:
8940 case 7:
8941 i = idx - 3;
8942 break;
8943
8944 default:
8945 gcc_unreachable ();
8946 }
8947
8948 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8949 XFmode);
8950 }
8951
8952 /* Return 1 if X is all 0s and 2 if x is all 1s
8953 in supported SSE/AVX vector mode. */
8954
8955 int
8956 standard_sse_constant_p (rtx x)
8957 {
8958 enum machine_mode mode = GET_MODE (x);
8959
8960 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8961 return 1;
8962 if (vector_all_ones_operand (x, mode))
8963 switch (mode)
8964 {
8965 case V16QImode:
8966 case V8HImode:
8967 case V4SImode:
8968 case V2DImode:
8969 if (TARGET_SSE2)
8970 return 2;
8971 case V32QImode:
8972 case V16HImode:
8973 case V8SImode:
8974 case V4DImode:
8975 if (TARGET_AVX2)
8976 return 2;
8977 case V64QImode:
8978 case V32HImode:
8979 case V16SImode:
8980 case V8DImode:
8981 if (TARGET_AVX512F)
8982 return 2;
8983 default:
8984 break;
8985 }
8986
8987 return 0;
8988 }
8989
8990 /* Return the opcode of the special instruction to be used to load
8991 the constant X. */
8992
8993 const char *
8994 standard_sse_constant_opcode (rtx insn, rtx x)
8995 {
8996 switch (standard_sse_constant_p (x))
8997 {
8998 case 1:
8999 switch (get_attr_mode (insn))
9000 {
9001 case MODE_XI:
9002 return "vpxord\t%g0, %g0, %g0";
9003 case MODE_V16SF:
9004 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9005 : "vpxord\t%g0, %g0, %g0";
9006 case MODE_V8DF:
9007 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9008 : "vpxorq\t%g0, %g0, %g0";
9009 case MODE_TI:
9010 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9011 : "%vpxor\t%0, %d0";
9012 case MODE_V2DF:
9013 return "%vxorpd\t%0, %d0";
9014 case MODE_V4SF:
9015 return "%vxorps\t%0, %d0";
9016
9017 case MODE_OI:
9018 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9019 : "vpxor\t%x0, %x0, %x0";
9020 case MODE_V4DF:
9021 return "vxorpd\t%x0, %x0, %x0";
9022 case MODE_V8SF:
9023 return "vxorps\t%x0, %x0, %x0";
9024
9025 default:
9026 break;
9027 }
9028
9029 case 2:
9030 if (TARGET_AVX512VL
9031 || get_attr_mode (insn) == MODE_XI
9032 || get_attr_mode (insn) == MODE_V8DF
9033 || get_attr_mode (insn) == MODE_V16SF)
9034 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9035 if (TARGET_AVX)
9036 return "vpcmpeqd\t%0, %0, %0";
9037 else
9038 return "pcmpeqd\t%0, %0";
9039
9040 default:
9041 break;
9042 }
9043 gcc_unreachable ();
9044 }
9045
9046 /* Returns true if OP contains a symbol reference */
9047
9048 bool
9049 symbolic_reference_mentioned_p (rtx op)
9050 {
9051 const char *fmt;
9052 int i;
9053
9054 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9055 return true;
9056
9057 fmt = GET_RTX_FORMAT (GET_CODE (op));
9058 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9059 {
9060 if (fmt[i] == 'E')
9061 {
9062 int j;
9063
9064 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9065 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9066 return true;
9067 }
9068
9069 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9070 return true;
9071 }
9072
9073 return false;
9074 }
9075
9076 /* Return true if it is appropriate to emit `ret' instructions in the
9077 body of a function. Do this only if the epilogue is simple, needing a
9078 couple of insns. Prior to reloading, we can't tell how many registers
9079 must be saved, so return false then. Return false if there is no frame
9080 marker to de-allocate. */
9081
9082 bool
9083 ix86_can_use_return_insn_p (void)
9084 {
9085 struct ix86_frame frame;
9086
9087 if (! reload_completed || frame_pointer_needed)
9088 return 0;
9089
9090 /* Don't allow more than 32k pop, since that's all we can do
9091 with one instruction. */
9092 if (crtl->args.pops_args && crtl->args.size >= 32768)
9093 return 0;
9094
9095 ix86_compute_frame_layout (&frame);
9096 return (frame.stack_pointer_offset == UNITS_PER_WORD
9097 && (frame.nregs + frame.nsseregs) == 0);
9098 }
9099 \f
9100 /* Value should be nonzero if functions must have frame pointers.
9101 Zero means the frame pointer need not be set up (and parms may
9102 be accessed via the stack pointer) in functions that seem suitable. */
9103
9104 static bool
9105 ix86_frame_pointer_required (void)
9106 {
9107 /* If we accessed previous frames, then the generated code expects
9108 to be able to access the saved ebp value in our frame. */
9109 if (cfun->machine->accesses_prev_frame)
9110 return true;
9111
9112 /* Several x86 os'es need a frame pointer for other reasons,
9113 usually pertaining to setjmp. */
9114 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9115 return true;
9116
9117 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9118 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9119 return true;
9120
9121 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9122 allocation is 4GB. */
9123 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9124 return true;
9125
9126 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9127 turns off the frame pointer by default. Turn it back on now if
9128 we've not got a leaf function. */
9129 if (TARGET_OMIT_LEAF_FRAME_POINTER
9130 && (!crtl->is_leaf
9131 || ix86_current_function_calls_tls_descriptor))
9132 return true;
9133
9134 if (crtl->profile && !flag_fentry)
9135 return true;
9136
9137 return false;
9138 }
9139
9140 /* Record that the current function accesses previous call frames. */
9141
9142 void
9143 ix86_setup_frame_addresses (void)
9144 {
9145 cfun->machine->accesses_prev_frame = 1;
9146 }
9147 \f
9148 #ifndef USE_HIDDEN_LINKONCE
9149 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9150 # define USE_HIDDEN_LINKONCE 1
9151 # else
9152 # define USE_HIDDEN_LINKONCE 0
9153 # endif
9154 #endif
9155
9156 static int pic_labels_used;
9157
9158 /* Fills in the label name that should be used for a pc thunk for
9159 the given register. */
9160
9161 static void
9162 get_pc_thunk_name (char name[32], unsigned int regno)
9163 {
9164 gcc_assert (!TARGET_64BIT);
9165
9166 if (USE_HIDDEN_LINKONCE)
9167 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9168 else
9169 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9170 }
9171
9172
9173 /* This function generates code for -fpic that loads %ebx with
9174 the return address of the caller and then returns. */
9175
9176 static void
9177 ix86_code_end (void)
9178 {
9179 rtx xops[2];
9180 int regno;
9181
9182 for (regno = AX_REG; regno <= SP_REG; regno++)
9183 {
9184 char name[32];
9185 tree decl;
9186
9187 if (!(pic_labels_used & (1 << regno)))
9188 continue;
9189
9190 get_pc_thunk_name (name, regno);
9191
9192 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9193 get_identifier (name),
9194 build_function_type_list (void_type_node, NULL_TREE));
9195 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9196 NULL_TREE, void_type_node);
9197 TREE_PUBLIC (decl) = 1;
9198 TREE_STATIC (decl) = 1;
9199 DECL_IGNORED_P (decl) = 1;
9200
9201 #if TARGET_MACHO
9202 if (TARGET_MACHO)
9203 {
9204 switch_to_section (darwin_sections[text_coal_section]);
9205 fputs ("\t.weak_definition\t", asm_out_file);
9206 assemble_name (asm_out_file, name);
9207 fputs ("\n\t.private_extern\t", asm_out_file);
9208 assemble_name (asm_out_file, name);
9209 putc ('\n', asm_out_file);
9210 ASM_OUTPUT_LABEL (asm_out_file, name);
9211 DECL_WEAK (decl) = 1;
9212 }
9213 else
9214 #endif
9215 if (USE_HIDDEN_LINKONCE)
9216 {
9217 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9218
9219 targetm.asm_out.unique_section (decl, 0);
9220 switch_to_section (get_named_section (decl, NULL, 0));
9221
9222 targetm.asm_out.globalize_label (asm_out_file, name);
9223 fputs ("\t.hidden\t", asm_out_file);
9224 assemble_name (asm_out_file, name);
9225 putc ('\n', asm_out_file);
9226 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9227 }
9228 else
9229 {
9230 switch_to_section (text_section);
9231 ASM_OUTPUT_LABEL (asm_out_file, name);
9232 }
9233
9234 DECL_INITIAL (decl) = make_node (BLOCK);
9235 current_function_decl = decl;
9236 init_function_start (decl);
9237 first_function_block_is_cold = false;
9238 /* Make sure unwind info is emitted for the thunk if needed. */
9239 final_start_function (emit_barrier (), asm_out_file, 1);
9240
9241 /* Pad stack IP move with 4 instructions (two NOPs count
9242 as one instruction). */
9243 if (TARGET_PAD_SHORT_FUNCTION)
9244 {
9245 int i = 8;
9246
9247 while (i--)
9248 fputs ("\tnop\n", asm_out_file);
9249 }
9250
9251 xops[0] = gen_rtx_REG (Pmode, regno);
9252 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9253 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9254 fputs ("\tret\n", asm_out_file);
9255 final_end_function ();
9256 init_insn_lengths ();
9257 free_after_compilation (cfun);
9258 set_cfun (NULL);
9259 current_function_decl = NULL;
9260 }
9261
9262 if (flag_split_stack)
9263 file_end_indicate_split_stack ();
9264 }
9265
9266 /* Emit code for the SET_GOT patterns. */
9267
9268 const char *
9269 output_set_got (rtx dest, rtx label)
9270 {
9271 rtx xops[3];
9272
9273 xops[0] = dest;
9274
9275 if (TARGET_VXWORKS_RTP && flag_pic)
9276 {
9277 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9278 xops[2] = gen_rtx_MEM (Pmode,
9279 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9280 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9281
9282 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9283 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9284 an unadorned address. */
9285 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9286 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9287 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9288 return "";
9289 }
9290
9291 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9292
9293 if (!flag_pic)
9294 {
9295 if (TARGET_MACHO)
9296 /* We don't need a pic base, we're not producing pic. */
9297 gcc_unreachable ();
9298
9299 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9300 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9301 targetm.asm_out.internal_label (asm_out_file, "L",
9302 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9303 }
9304 else
9305 {
9306 char name[32];
9307 get_pc_thunk_name (name, REGNO (dest));
9308 pic_labels_used |= 1 << REGNO (dest);
9309
9310 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9311 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9312 output_asm_insn ("call\t%X2", xops);
9313
9314 #if TARGET_MACHO
9315 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9316 This is what will be referenced by the Mach-O PIC subsystem. */
9317 if (machopic_should_output_picbase_label () || !label)
9318 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9319
9320 /* When we are restoring the pic base at the site of a nonlocal label,
9321 and we decided to emit the pic base above, we will still output a
9322 local label used for calculating the correction offset (even though
9323 the offset will be 0 in that case). */
9324 if (label)
9325 targetm.asm_out.internal_label (asm_out_file, "L",
9326 CODE_LABEL_NUMBER (label));
9327 #endif
9328 }
9329
9330 if (!TARGET_MACHO)
9331 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9332
9333 return "";
9334 }
9335
9336 /* Generate an "push" pattern for input ARG. */
9337
9338 static rtx
9339 gen_push (rtx arg)
9340 {
9341 struct machine_function *m = cfun->machine;
9342
9343 if (m->fs.cfa_reg == stack_pointer_rtx)
9344 m->fs.cfa_offset += UNITS_PER_WORD;
9345 m->fs.sp_offset += UNITS_PER_WORD;
9346
9347 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9348 arg = gen_rtx_REG (word_mode, REGNO (arg));
9349
9350 return gen_rtx_SET (VOIDmode,
9351 gen_rtx_MEM (word_mode,
9352 gen_rtx_PRE_DEC (Pmode,
9353 stack_pointer_rtx)),
9354 arg);
9355 }
9356
9357 /* Generate an "pop" pattern for input ARG. */
9358
9359 static rtx
9360 gen_pop (rtx arg)
9361 {
9362 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9363 arg = gen_rtx_REG (word_mode, REGNO (arg));
9364
9365 return gen_rtx_SET (VOIDmode,
9366 arg,
9367 gen_rtx_MEM (word_mode,
9368 gen_rtx_POST_INC (Pmode,
9369 stack_pointer_rtx)));
9370 }
9371
9372 /* Return >= 0 if there is an unused call-clobbered register available
9373 for the entire function. */
9374
9375 static unsigned int
9376 ix86_select_alt_pic_regnum (void)
9377 {
9378 if (crtl->is_leaf
9379 && !crtl->profile
9380 && !ix86_current_function_calls_tls_descriptor)
9381 {
9382 int i, drap;
9383 /* Can't use the same register for both PIC and DRAP. */
9384 if (crtl->drap_reg)
9385 drap = REGNO (crtl->drap_reg);
9386 else
9387 drap = -1;
9388 for (i = 2; i >= 0; --i)
9389 if (i != drap && !df_regs_ever_live_p (i))
9390 return i;
9391 }
9392
9393 return INVALID_REGNUM;
9394 }
9395
9396 /* Return TRUE if we need to save REGNO. */
9397
9398 static bool
9399 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9400 {
9401 if (pic_offset_table_rtx
9402 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9403 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9404 || crtl->profile
9405 || crtl->calls_eh_return
9406 || crtl->uses_const_pool
9407 || cfun->has_nonlocal_label))
9408 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9409
9410 if (crtl->calls_eh_return && maybe_eh_return)
9411 {
9412 unsigned i;
9413 for (i = 0; ; i++)
9414 {
9415 unsigned test = EH_RETURN_DATA_REGNO (i);
9416 if (test == INVALID_REGNUM)
9417 break;
9418 if (test == regno)
9419 return true;
9420 }
9421 }
9422
9423 if (crtl->drap_reg
9424 && regno == REGNO (crtl->drap_reg)
9425 && !cfun->machine->no_drap_save_restore)
9426 return true;
9427
9428 return (df_regs_ever_live_p (regno)
9429 && !call_used_regs[regno]
9430 && !fixed_regs[regno]
9431 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9432 }
9433
9434 /* Return number of saved general prupose registers. */
9435
9436 static int
9437 ix86_nsaved_regs (void)
9438 {
9439 int nregs = 0;
9440 int regno;
9441
9442 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9443 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9444 nregs ++;
9445 return nregs;
9446 }
9447
9448 /* Return number of saved SSE registrers. */
9449
9450 static int
9451 ix86_nsaved_sseregs (void)
9452 {
9453 int nregs = 0;
9454 int regno;
9455
9456 if (!TARGET_64BIT_MS_ABI)
9457 return 0;
9458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9459 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9460 nregs ++;
9461 return nregs;
9462 }
9463
9464 /* Given FROM and TO register numbers, say whether this elimination is
9465 allowed. If stack alignment is needed, we can only replace argument
9466 pointer with hard frame pointer, or replace frame pointer with stack
9467 pointer. Otherwise, frame pointer elimination is automatically
9468 handled and all other eliminations are valid. */
9469
9470 static bool
9471 ix86_can_eliminate (const int from, const int to)
9472 {
9473 if (stack_realign_fp)
9474 return ((from == ARG_POINTER_REGNUM
9475 && to == HARD_FRAME_POINTER_REGNUM)
9476 || (from == FRAME_POINTER_REGNUM
9477 && to == STACK_POINTER_REGNUM));
9478 else
9479 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9480 }
9481
9482 /* Return the offset between two registers, one to be eliminated, and the other
9483 its replacement, at the start of a routine. */
9484
9485 HOST_WIDE_INT
9486 ix86_initial_elimination_offset (int from, int to)
9487 {
9488 struct ix86_frame frame;
9489 ix86_compute_frame_layout (&frame);
9490
9491 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9492 return frame.hard_frame_pointer_offset;
9493 else if (from == FRAME_POINTER_REGNUM
9494 && to == HARD_FRAME_POINTER_REGNUM)
9495 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9496 else
9497 {
9498 gcc_assert (to == STACK_POINTER_REGNUM);
9499
9500 if (from == ARG_POINTER_REGNUM)
9501 return frame.stack_pointer_offset;
9502
9503 gcc_assert (from == FRAME_POINTER_REGNUM);
9504 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9505 }
9506 }
9507
9508 /* In a dynamically-aligned function, we can't know the offset from
9509 stack pointer to frame pointer, so we must ensure that setjmp
9510 eliminates fp against the hard fp (%ebp) rather than trying to
9511 index from %esp up to the top of the frame across a gap that is
9512 of unknown (at compile-time) size. */
9513 static rtx
9514 ix86_builtin_setjmp_frame_value (void)
9515 {
9516 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9517 }
9518
9519 /* When using -fsplit-stack, the allocation routines set a field in
9520 the TCB to the bottom of the stack plus this much space, measured
9521 in bytes. */
9522
9523 #define SPLIT_STACK_AVAILABLE 256
9524
9525 /* Fill structure ix86_frame about frame of currently computed function. */
9526
9527 static void
9528 ix86_compute_frame_layout (struct ix86_frame *frame)
9529 {
9530 unsigned HOST_WIDE_INT stack_alignment_needed;
9531 HOST_WIDE_INT offset;
9532 unsigned HOST_WIDE_INT preferred_alignment;
9533 HOST_WIDE_INT size = get_frame_size ();
9534 HOST_WIDE_INT to_allocate;
9535
9536 frame->nregs = ix86_nsaved_regs ();
9537 frame->nsseregs = ix86_nsaved_sseregs ();
9538
9539 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9540 function prologues and leaf. */
9541 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9542 && (!crtl->is_leaf || cfun->calls_alloca != 0
9543 || ix86_current_function_calls_tls_descriptor))
9544 {
9545 crtl->preferred_stack_boundary = 128;
9546 crtl->stack_alignment_needed = 128;
9547 }
9548 /* preferred_stack_boundary is never updated for call
9549 expanded from tls descriptor. Update it here. We don't update it in
9550 expand stage because according to the comments before
9551 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9552 away. */
9553 else if (ix86_current_function_calls_tls_descriptor
9554 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9555 {
9556 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9557 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9558 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9559 }
9560
9561 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9562 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9563
9564 gcc_assert (!size || stack_alignment_needed);
9565 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9566 gcc_assert (preferred_alignment <= stack_alignment_needed);
9567
9568 /* For SEH we have to limit the amount of code movement into the prologue.
9569 At present we do this via a BLOCKAGE, at which point there's very little
9570 scheduling that can be done, which means that there's very little point
9571 in doing anything except PUSHs. */
9572 if (TARGET_SEH)
9573 cfun->machine->use_fast_prologue_epilogue = false;
9574
9575 /* During reload iteration the amount of registers saved can change.
9576 Recompute the value as needed. Do not recompute when amount of registers
9577 didn't change as reload does multiple calls to the function and does not
9578 expect the decision to change within single iteration. */
9579 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9580 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9581 {
9582 int count = frame->nregs;
9583 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9584
9585 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9586
9587 /* The fast prologue uses move instead of push to save registers. This
9588 is significantly longer, but also executes faster as modern hardware
9589 can execute the moves in parallel, but can't do that for push/pop.
9590
9591 Be careful about choosing what prologue to emit: When function takes
9592 many instructions to execute we may use slow version as well as in
9593 case function is known to be outside hot spot (this is known with
9594 feedback only). Weight the size of function by number of registers
9595 to save as it is cheap to use one or two push instructions but very
9596 slow to use many of them. */
9597 if (count)
9598 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9599 if (node->frequency < NODE_FREQUENCY_NORMAL
9600 || (flag_branch_probabilities
9601 && node->frequency < NODE_FREQUENCY_HOT))
9602 cfun->machine->use_fast_prologue_epilogue = false;
9603 else
9604 cfun->machine->use_fast_prologue_epilogue
9605 = !expensive_function_p (count);
9606 }
9607
9608 frame->save_regs_using_mov
9609 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9610 /* If static stack checking is enabled and done with probes,
9611 the registers need to be saved before allocating the frame. */
9612 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9613
9614 /* Skip return address. */
9615 offset = UNITS_PER_WORD;
9616
9617 /* Skip pushed static chain. */
9618 if (ix86_static_chain_on_stack)
9619 offset += UNITS_PER_WORD;
9620
9621 /* Skip saved base pointer. */
9622 if (frame_pointer_needed)
9623 offset += UNITS_PER_WORD;
9624 frame->hfp_save_offset = offset;
9625
9626 /* The traditional frame pointer location is at the top of the frame. */
9627 frame->hard_frame_pointer_offset = offset;
9628
9629 /* Register save area */
9630 offset += frame->nregs * UNITS_PER_WORD;
9631 frame->reg_save_offset = offset;
9632
9633 /* On SEH target, registers are pushed just before the frame pointer
9634 location. */
9635 if (TARGET_SEH)
9636 frame->hard_frame_pointer_offset = offset;
9637
9638 /* Align and set SSE register save area. */
9639 if (frame->nsseregs)
9640 {
9641 /* The only ABI that has saved SSE registers (Win64) also has a
9642 16-byte aligned default stack, and thus we don't need to be
9643 within the re-aligned local stack frame to save them. */
9644 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9645 offset = (offset + 16 - 1) & -16;
9646 offset += frame->nsseregs * 16;
9647 }
9648 frame->sse_reg_save_offset = offset;
9649
9650 /* The re-aligned stack starts here. Values before this point are not
9651 directly comparable with values below this point. In order to make
9652 sure that no value happens to be the same before and after, force
9653 the alignment computation below to add a non-zero value. */
9654 if (stack_realign_fp)
9655 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9656
9657 /* Va-arg area */
9658 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9659 offset += frame->va_arg_size;
9660
9661 /* Align start of frame for local function. */
9662 if (stack_realign_fp
9663 || offset != frame->sse_reg_save_offset
9664 || size != 0
9665 || !crtl->is_leaf
9666 || cfun->calls_alloca
9667 || ix86_current_function_calls_tls_descriptor)
9668 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9669
9670 /* Frame pointer points here. */
9671 frame->frame_pointer_offset = offset;
9672
9673 offset += size;
9674
9675 /* Add outgoing arguments area. Can be skipped if we eliminated
9676 all the function calls as dead code.
9677 Skipping is however impossible when function calls alloca. Alloca
9678 expander assumes that last crtl->outgoing_args_size
9679 of stack frame are unused. */
9680 if (ACCUMULATE_OUTGOING_ARGS
9681 && (!crtl->is_leaf || cfun->calls_alloca
9682 || ix86_current_function_calls_tls_descriptor))
9683 {
9684 offset += crtl->outgoing_args_size;
9685 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9686 }
9687 else
9688 frame->outgoing_arguments_size = 0;
9689
9690 /* Align stack boundary. Only needed if we're calling another function
9691 or using alloca. */
9692 if (!crtl->is_leaf || cfun->calls_alloca
9693 || ix86_current_function_calls_tls_descriptor)
9694 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9695
9696 /* We've reached end of stack frame. */
9697 frame->stack_pointer_offset = offset;
9698
9699 /* Size prologue needs to allocate. */
9700 to_allocate = offset - frame->sse_reg_save_offset;
9701
9702 if ((!to_allocate && frame->nregs <= 1)
9703 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9704 frame->save_regs_using_mov = false;
9705
9706 if (ix86_using_red_zone ()
9707 && crtl->sp_is_unchanging
9708 && crtl->is_leaf
9709 && !ix86_current_function_calls_tls_descriptor)
9710 {
9711 frame->red_zone_size = to_allocate;
9712 if (frame->save_regs_using_mov)
9713 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9714 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9715 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9716 }
9717 else
9718 frame->red_zone_size = 0;
9719 frame->stack_pointer_offset -= frame->red_zone_size;
9720
9721 /* The SEH frame pointer location is near the bottom of the frame.
9722 This is enforced by the fact that the difference between the
9723 stack pointer and the frame pointer is limited to 240 bytes in
9724 the unwind data structure. */
9725 if (TARGET_SEH)
9726 {
9727 HOST_WIDE_INT diff;
9728
9729 /* If we can leave the frame pointer where it is, do so. Also, returns
9730 the establisher frame for __builtin_frame_address (0). */
9731 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9732 if (diff <= SEH_MAX_FRAME_SIZE
9733 && (diff > 240 || (diff & 15) != 0)
9734 && !crtl->accesses_prior_frames)
9735 {
9736 /* Ideally we'd determine what portion of the local stack frame
9737 (within the constraint of the lowest 240) is most heavily used.
9738 But without that complication, simply bias the frame pointer
9739 by 128 bytes so as to maximize the amount of the local stack
9740 frame that is addressable with 8-bit offsets. */
9741 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9742 }
9743 }
9744 }
9745
9746 /* This is semi-inlined memory_address_length, but simplified
9747 since we know that we're always dealing with reg+offset, and
9748 to avoid having to create and discard all that rtl. */
9749
9750 static inline int
9751 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9752 {
9753 int len = 4;
9754
9755 if (offset == 0)
9756 {
9757 /* EBP and R13 cannot be encoded without an offset. */
9758 len = (regno == BP_REG || regno == R13_REG);
9759 }
9760 else if (IN_RANGE (offset, -128, 127))
9761 len = 1;
9762
9763 /* ESP and R12 must be encoded with a SIB byte. */
9764 if (regno == SP_REG || regno == R12_REG)
9765 len++;
9766
9767 return len;
9768 }
9769
9770 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9771 The valid base registers are taken from CFUN->MACHINE->FS. */
9772
9773 static rtx
9774 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9775 {
9776 const struct machine_function *m = cfun->machine;
9777 rtx base_reg = NULL;
9778 HOST_WIDE_INT base_offset = 0;
9779
9780 if (m->use_fast_prologue_epilogue)
9781 {
9782 /* Choose the base register most likely to allow the most scheduling
9783 opportunities. Generally FP is valid throughout the function,
9784 while DRAP must be reloaded within the epilogue. But choose either
9785 over the SP due to increased encoding size. */
9786
9787 if (m->fs.fp_valid)
9788 {
9789 base_reg = hard_frame_pointer_rtx;
9790 base_offset = m->fs.fp_offset - cfa_offset;
9791 }
9792 else if (m->fs.drap_valid)
9793 {
9794 base_reg = crtl->drap_reg;
9795 base_offset = 0 - cfa_offset;
9796 }
9797 else if (m->fs.sp_valid)
9798 {
9799 base_reg = stack_pointer_rtx;
9800 base_offset = m->fs.sp_offset - cfa_offset;
9801 }
9802 }
9803 else
9804 {
9805 HOST_WIDE_INT toffset;
9806 int len = 16, tlen;
9807
9808 /* Choose the base register with the smallest address encoding.
9809 With a tie, choose FP > DRAP > SP. */
9810 if (m->fs.sp_valid)
9811 {
9812 base_reg = stack_pointer_rtx;
9813 base_offset = m->fs.sp_offset - cfa_offset;
9814 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9815 }
9816 if (m->fs.drap_valid)
9817 {
9818 toffset = 0 - cfa_offset;
9819 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9820 if (tlen <= len)
9821 {
9822 base_reg = crtl->drap_reg;
9823 base_offset = toffset;
9824 len = tlen;
9825 }
9826 }
9827 if (m->fs.fp_valid)
9828 {
9829 toffset = m->fs.fp_offset - cfa_offset;
9830 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9831 if (tlen <= len)
9832 {
9833 base_reg = hard_frame_pointer_rtx;
9834 base_offset = toffset;
9835 len = tlen;
9836 }
9837 }
9838 }
9839 gcc_assert (base_reg != NULL);
9840
9841 return plus_constant (Pmode, base_reg, base_offset);
9842 }
9843
9844 /* Emit code to save registers in the prologue. */
9845
9846 static void
9847 ix86_emit_save_regs (void)
9848 {
9849 unsigned int regno;
9850 rtx insn;
9851
9852 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9853 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9854 {
9855 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9856 RTX_FRAME_RELATED_P (insn) = 1;
9857 }
9858 }
9859
9860 /* Emit a single register save at CFA - CFA_OFFSET. */
9861
9862 static void
9863 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9864 HOST_WIDE_INT cfa_offset)
9865 {
9866 struct machine_function *m = cfun->machine;
9867 rtx reg = gen_rtx_REG (mode, regno);
9868 rtx mem, addr, base, insn;
9869
9870 addr = choose_baseaddr (cfa_offset);
9871 mem = gen_frame_mem (mode, addr);
9872
9873 /* For SSE saves, we need to indicate the 128-bit alignment. */
9874 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9875
9876 insn = emit_move_insn (mem, reg);
9877 RTX_FRAME_RELATED_P (insn) = 1;
9878
9879 base = addr;
9880 if (GET_CODE (base) == PLUS)
9881 base = XEXP (base, 0);
9882 gcc_checking_assert (REG_P (base));
9883
9884 /* When saving registers into a re-aligned local stack frame, avoid
9885 any tricky guessing by dwarf2out. */
9886 if (m->fs.realigned)
9887 {
9888 gcc_checking_assert (stack_realign_drap);
9889
9890 if (regno == REGNO (crtl->drap_reg))
9891 {
9892 /* A bit of a hack. We force the DRAP register to be saved in
9893 the re-aligned stack frame, which provides us with a copy
9894 of the CFA that will last past the prologue. Install it. */
9895 gcc_checking_assert (cfun->machine->fs.fp_valid);
9896 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9897 cfun->machine->fs.fp_offset - cfa_offset);
9898 mem = gen_rtx_MEM (mode, addr);
9899 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9900 }
9901 else
9902 {
9903 /* The frame pointer is a stable reference within the
9904 aligned frame. Use it. */
9905 gcc_checking_assert (cfun->machine->fs.fp_valid);
9906 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9907 cfun->machine->fs.fp_offset - cfa_offset);
9908 mem = gen_rtx_MEM (mode, addr);
9909 add_reg_note (insn, REG_CFA_EXPRESSION,
9910 gen_rtx_SET (VOIDmode, mem, reg));
9911 }
9912 }
9913
9914 /* The memory may not be relative to the current CFA register,
9915 which means that we may need to generate a new pattern for
9916 use by the unwind info. */
9917 else if (base != m->fs.cfa_reg)
9918 {
9919 addr = plus_constant (Pmode, m->fs.cfa_reg,
9920 m->fs.cfa_offset - cfa_offset);
9921 mem = gen_rtx_MEM (mode, addr);
9922 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9923 }
9924 }
9925
9926 /* Emit code to save registers using MOV insns.
9927 First register is stored at CFA - CFA_OFFSET. */
9928 static void
9929 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9930 {
9931 unsigned int regno;
9932
9933 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9934 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9935 {
9936 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9937 cfa_offset -= UNITS_PER_WORD;
9938 }
9939 }
9940
9941 /* Emit code to save SSE registers using MOV insns.
9942 First register is stored at CFA - CFA_OFFSET. */
9943 static void
9944 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9945 {
9946 unsigned int regno;
9947
9948 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9949 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9950 {
9951 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9952 cfa_offset -= 16;
9953 }
9954 }
9955
9956 static GTY(()) rtx queued_cfa_restores;
9957
9958 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9959 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9960 Don't add the note if the previously saved value will be left untouched
9961 within stack red-zone till return, as unwinders can find the same value
9962 in the register and on the stack. */
9963
9964 static void
9965 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9966 {
9967 if (!crtl->shrink_wrapped
9968 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9969 return;
9970
9971 if (insn)
9972 {
9973 add_reg_note (insn, REG_CFA_RESTORE, reg);
9974 RTX_FRAME_RELATED_P (insn) = 1;
9975 }
9976 else
9977 queued_cfa_restores
9978 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9979 }
9980
9981 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9982
9983 static void
9984 ix86_add_queued_cfa_restore_notes (rtx insn)
9985 {
9986 rtx last;
9987 if (!queued_cfa_restores)
9988 return;
9989 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9990 ;
9991 XEXP (last, 1) = REG_NOTES (insn);
9992 REG_NOTES (insn) = queued_cfa_restores;
9993 queued_cfa_restores = NULL_RTX;
9994 RTX_FRAME_RELATED_P (insn) = 1;
9995 }
9996
9997 /* Expand prologue or epilogue stack adjustment.
9998 The pattern exist to put a dependency on all ebp-based memory accesses.
9999 STYLE should be negative if instructions should be marked as frame related,
10000 zero if %r11 register is live and cannot be freely used and positive
10001 otherwise. */
10002
10003 static void
10004 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10005 int style, bool set_cfa)
10006 {
10007 struct machine_function *m = cfun->machine;
10008 rtx insn;
10009 bool add_frame_related_expr = false;
10010
10011 if (Pmode == SImode)
10012 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10013 else if (x86_64_immediate_operand (offset, DImode))
10014 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10015 else
10016 {
10017 rtx tmp;
10018 /* r11 is used by indirect sibcall return as well, set before the
10019 epilogue and used after the epilogue. */
10020 if (style)
10021 tmp = gen_rtx_REG (DImode, R11_REG);
10022 else
10023 {
10024 gcc_assert (src != hard_frame_pointer_rtx
10025 && dest != hard_frame_pointer_rtx);
10026 tmp = hard_frame_pointer_rtx;
10027 }
10028 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10029 if (style < 0)
10030 add_frame_related_expr = true;
10031
10032 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10033 }
10034
10035 insn = emit_insn (insn);
10036 if (style >= 0)
10037 ix86_add_queued_cfa_restore_notes (insn);
10038
10039 if (set_cfa)
10040 {
10041 rtx r;
10042
10043 gcc_assert (m->fs.cfa_reg == src);
10044 m->fs.cfa_offset += INTVAL (offset);
10045 m->fs.cfa_reg = dest;
10046
10047 r = gen_rtx_PLUS (Pmode, src, offset);
10048 r = gen_rtx_SET (VOIDmode, dest, r);
10049 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10050 RTX_FRAME_RELATED_P (insn) = 1;
10051 }
10052 else if (style < 0)
10053 {
10054 RTX_FRAME_RELATED_P (insn) = 1;
10055 if (add_frame_related_expr)
10056 {
10057 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10058 r = gen_rtx_SET (VOIDmode, dest, r);
10059 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10060 }
10061 }
10062
10063 if (dest == stack_pointer_rtx)
10064 {
10065 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10066 bool valid = m->fs.sp_valid;
10067
10068 if (src == hard_frame_pointer_rtx)
10069 {
10070 valid = m->fs.fp_valid;
10071 ooffset = m->fs.fp_offset;
10072 }
10073 else if (src == crtl->drap_reg)
10074 {
10075 valid = m->fs.drap_valid;
10076 ooffset = 0;
10077 }
10078 else
10079 {
10080 /* Else there are two possibilities: SP itself, which we set
10081 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10082 taken care of this by hand along the eh_return path. */
10083 gcc_checking_assert (src == stack_pointer_rtx
10084 || offset == const0_rtx);
10085 }
10086
10087 m->fs.sp_offset = ooffset - INTVAL (offset);
10088 m->fs.sp_valid = valid;
10089 }
10090 }
10091
10092 /* Find an available register to be used as dynamic realign argument
10093 pointer regsiter. Such a register will be written in prologue and
10094 used in begin of body, so it must not be
10095 1. parameter passing register.
10096 2. GOT pointer.
10097 We reuse static-chain register if it is available. Otherwise, we
10098 use DI for i386 and R13 for x86-64. We chose R13 since it has
10099 shorter encoding.
10100
10101 Return: the regno of chosen register. */
10102
10103 static unsigned int
10104 find_drap_reg (void)
10105 {
10106 tree decl = cfun->decl;
10107
10108 if (TARGET_64BIT)
10109 {
10110 /* Use R13 for nested function or function need static chain.
10111 Since function with tail call may use any caller-saved
10112 registers in epilogue, DRAP must not use caller-saved
10113 register in such case. */
10114 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10115 return R13_REG;
10116
10117 return R10_REG;
10118 }
10119 else
10120 {
10121 /* Use DI for nested function or function need static chain.
10122 Since function with tail call may use any caller-saved
10123 registers in epilogue, DRAP must not use caller-saved
10124 register in such case. */
10125 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10126 return DI_REG;
10127
10128 /* Reuse static chain register if it isn't used for parameter
10129 passing. */
10130 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10131 {
10132 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10133 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10134 return CX_REG;
10135 }
10136 return DI_REG;
10137 }
10138 }
10139
10140 /* Return minimum incoming stack alignment. */
10141
10142 static unsigned int
10143 ix86_minimum_incoming_stack_boundary (bool sibcall)
10144 {
10145 unsigned int incoming_stack_boundary;
10146
10147 /* Prefer the one specified at command line. */
10148 if (ix86_user_incoming_stack_boundary)
10149 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10150 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10151 if -mstackrealign is used, it isn't used for sibcall check and
10152 estimated stack alignment is 128bit. */
10153 else if (!sibcall
10154 && !TARGET_64BIT
10155 && ix86_force_align_arg_pointer
10156 && crtl->stack_alignment_estimated == 128)
10157 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10158 else
10159 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10160
10161 /* Incoming stack alignment can be changed on individual functions
10162 via force_align_arg_pointer attribute. We use the smallest
10163 incoming stack boundary. */
10164 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10165 && lookup_attribute (ix86_force_align_arg_pointer_string,
10166 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10167 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10168
10169 /* The incoming stack frame has to be aligned at least at
10170 parm_stack_boundary. */
10171 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10172 incoming_stack_boundary = crtl->parm_stack_boundary;
10173
10174 /* Stack at entrance of main is aligned by runtime. We use the
10175 smallest incoming stack boundary. */
10176 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10177 && DECL_NAME (current_function_decl)
10178 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10179 && DECL_FILE_SCOPE_P (current_function_decl))
10180 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10181
10182 return incoming_stack_boundary;
10183 }
10184
10185 /* Update incoming stack boundary and estimated stack alignment. */
10186
10187 static void
10188 ix86_update_stack_boundary (void)
10189 {
10190 ix86_incoming_stack_boundary
10191 = ix86_minimum_incoming_stack_boundary (false);
10192
10193 /* x86_64 vararg needs 16byte stack alignment for register save
10194 area. */
10195 if (TARGET_64BIT
10196 && cfun->stdarg
10197 && crtl->stack_alignment_estimated < 128)
10198 crtl->stack_alignment_estimated = 128;
10199 }
10200
10201 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10202 needed or an rtx for DRAP otherwise. */
10203
10204 static rtx
10205 ix86_get_drap_rtx (void)
10206 {
10207 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10208 crtl->need_drap = true;
10209
10210 if (stack_realign_drap)
10211 {
10212 /* Assign DRAP to vDRAP and returns vDRAP */
10213 unsigned int regno = find_drap_reg ();
10214 rtx drap_vreg;
10215 rtx arg_ptr;
10216 rtx_insn *seq, *insn;
10217
10218 arg_ptr = gen_rtx_REG (Pmode, regno);
10219 crtl->drap_reg = arg_ptr;
10220
10221 start_sequence ();
10222 drap_vreg = copy_to_reg (arg_ptr);
10223 seq = get_insns ();
10224 end_sequence ();
10225
10226 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10227 if (!optimize)
10228 {
10229 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10230 RTX_FRAME_RELATED_P (insn) = 1;
10231 }
10232 return drap_vreg;
10233 }
10234 else
10235 return NULL;
10236 }
10237
10238 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10239
10240 static rtx
10241 ix86_internal_arg_pointer (void)
10242 {
10243 return virtual_incoming_args_rtx;
10244 }
10245
10246 struct scratch_reg {
10247 rtx reg;
10248 bool saved;
10249 };
10250
10251 /* Return a short-lived scratch register for use on function entry.
10252 In 32-bit mode, it is valid only after the registers are saved
10253 in the prologue. This register must be released by means of
10254 release_scratch_register_on_entry once it is dead. */
10255
10256 static void
10257 get_scratch_register_on_entry (struct scratch_reg *sr)
10258 {
10259 int regno;
10260
10261 sr->saved = false;
10262
10263 if (TARGET_64BIT)
10264 {
10265 /* We always use R11 in 64-bit mode. */
10266 regno = R11_REG;
10267 }
10268 else
10269 {
10270 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10271 bool fastcall_p
10272 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10273 bool thiscall_p
10274 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10275 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10276 int regparm = ix86_function_regparm (fntype, decl);
10277 int drap_regno
10278 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10279
10280 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10281 for the static chain register. */
10282 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10283 && drap_regno != AX_REG)
10284 regno = AX_REG;
10285 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10286 for the static chain register. */
10287 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10288 regno = AX_REG;
10289 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10290 regno = DX_REG;
10291 /* ecx is the static chain register. */
10292 else if (regparm < 3 && !fastcall_p && !thiscall_p
10293 && !static_chain_p
10294 && drap_regno != CX_REG)
10295 regno = CX_REG;
10296 else if (ix86_save_reg (BX_REG, true))
10297 regno = BX_REG;
10298 /* esi is the static chain register. */
10299 else if (!(regparm == 3 && static_chain_p)
10300 && ix86_save_reg (SI_REG, true))
10301 regno = SI_REG;
10302 else if (ix86_save_reg (DI_REG, true))
10303 regno = DI_REG;
10304 else
10305 {
10306 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10307 sr->saved = true;
10308 }
10309 }
10310
10311 sr->reg = gen_rtx_REG (Pmode, regno);
10312 if (sr->saved)
10313 {
10314 rtx insn = emit_insn (gen_push (sr->reg));
10315 RTX_FRAME_RELATED_P (insn) = 1;
10316 }
10317 }
10318
10319 /* Release a scratch register obtained from the preceding function. */
10320
10321 static void
10322 release_scratch_register_on_entry (struct scratch_reg *sr)
10323 {
10324 if (sr->saved)
10325 {
10326 struct machine_function *m = cfun->machine;
10327 rtx x, insn = emit_insn (gen_pop (sr->reg));
10328
10329 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10330 RTX_FRAME_RELATED_P (insn) = 1;
10331 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10332 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10333 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10334 m->fs.sp_offset -= UNITS_PER_WORD;
10335 }
10336 }
10337
10338 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10339
10340 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10341
10342 static void
10343 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10344 {
10345 /* We skip the probe for the first interval + a small dope of 4 words and
10346 probe that many bytes past the specified size to maintain a protection
10347 area at the botton of the stack. */
10348 const int dope = 4 * UNITS_PER_WORD;
10349 rtx size_rtx = GEN_INT (size), last;
10350
10351 /* See if we have a constant small number of probes to generate. If so,
10352 that's the easy case. The run-time loop is made up of 11 insns in the
10353 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10354 for n # of intervals. */
10355 if (size <= 5 * PROBE_INTERVAL)
10356 {
10357 HOST_WIDE_INT i, adjust;
10358 bool first_probe = true;
10359
10360 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10361 values of N from 1 until it exceeds SIZE. If only one probe is
10362 needed, this will not generate any code. Then adjust and probe
10363 to PROBE_INTERVAL + SIZE. */
10364 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10365 {
10366 if (first_probe)
10367 {
10368 adjust = 2 * PROBE_INTERVAL + dope;
10369 first_probe = false;
10370 }
10371 else
10372 adjust = PROBE_INTERVAL;
10373
10374 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10375 plus_constant (Pmode, stack_pointer_rtx,
10376 -adjust)));
10377 emit_stack_probe (stack_pointer_rtx);
10378 }
10379
10380 if (first_probe)
10381 adjust = size + PROBE_INTERVAL + dope;
10382 else
10383 adjust = size + PROBE_INTERVAL - i;
10384
10385 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10386 plus_constant (Pmode, stack_pointer_rtx,
10387 -adjust)));
10388 emit_stack_probe (stack_pointer_rtx);
10389
10390 /* Adjust back to account for the additional first interval. */
10391 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10392 plus_constant (Pmode, stack_pointer_rtx,
10393 PROBE_INTERVAL + dope)));
10394 }
10395
10396 /* Otherwise, do the same as above, but in a loop. Note that we must be
10397 extra careful with variables wrapping around because we might be at
10398 the very top (or the very bottom) of the address space and we have
10399 to be able to handle this case properly; in particular, we use an
10400 equality test for the loop condition. */
10401 else
10402 {
10403 HOST_WIDE_INT rounded_size;
10404 struct scratch_reg sr;
10405
10406 get_scratch_register_on_entry (&sr);
10407
10408
10409 /* Step 1: round SIZE to the previous multiple of the interval. */
10410
10411 rounded_size = size & -PROBE_INTERVAL;
10412
10413
10414 /* Step 2: compute initial and final value of the loop counter. */
10415
10416 /* SP = SP_0 + PROBE_INTERVAL. */
10417 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10418 plus_constant (Pmode, stack_pointer_rtx,
10419 - (PROBE_INTERVAL + dope))));
10420
10421 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10422 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10423 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10424 gen_rtx_PLUS (Pmode, sr.reg,
10425 stack_pointer_rtx)));
10426
10427
10428 /* Step 3: the loop
10429
10430 while (SP != LAST_ADDR)
10431 {
10432 SP = SP + PROBE_INTERVAL
10433 probe at SP
10434 }
10435
10436 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10437 values of N from 1 until it is equal to ROUNDED_SIZE. */
10438
10439 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10440
10441
10442 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10443 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10444
10445 if (size != rounded_size)
10446 {
10447 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10448 plus_constant (Pmode, stack_pointer_rtx,
10449 rounded_size - size)));
10450 emit_stack_probe (stack_pointer_rtx);
10451 }
10452
10453 /* Adjust back to account for the additional first interval. */
10454 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10455 plus_constant (Pmode, stack_pointer_rtx,
10456 PROBE_INTERVAL + dope)));
10457
10458 release_scratch_register_on_entry (&sr);
10459 }
10460
10461 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10462
10463 /* Even if the stack pointer isn't the CFA register, we need to correctly
10464 describe the adjustments made to it, in particular differentiate the
10465 frame-related ones from the frame-unrelated ones. */
10466 if (size > 0)
10467 {
10468 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10469 XVECEXP (expr, 0, 0)
10470 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10471 plus_constant (Pmode, stack_pointer_rtx, -size));
10472 XVECEXP (expr, 0, 1)
10473 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10474 plus_constant (Pmode, stack_pointer_rtx,
10475 PROBE_INTERVAL + dope + size));
10476 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10477 RTX_FRAME_RELATED_P (last) = 1;
10478
10479 cfun->machine->fs.sp_offset += size;
10480 }
10481
10482 /* Make sure nothing is scheduled before we are done. */
10483 emit_insn (gen_blockage ());
10484 }
10485
10486 /* Adjust the stack pointer up to REG while probing it. */
10487
10488 const char *
10489 output_adjust_stack_and_probe (rtx reg)
10490 {
10491 static int labelno = 0;
10492 char loop_lab[32], end_lab[32];
10493 rtx xops[2];
10494
10495 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10496 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10497
10498 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10499
10500 /* Jump to END_LAB if SP == LAST_ADDR. */
10501 xops[0] = stack_pointer_rtx;
10502 xops[1] = reg;
10503 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10504 fputs ("\tje\t", asm_out_file);
10505 assemble_name_raw (asm_out_file, end_lab);
10506 fputc ('\n', asm_out_file);
10507
10508 /* SP = SP + PROBE_INTERVAL. */
10509 xops[1] = GEN_INT (PROBE_INTERVAL);
10510 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10511
10512 /* Probe at SP. */
10513 xops[1] = const0_rtx;
10514 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10515
10516 fprintf (asm_out_file, "\tjmp\t");
10517 assemble_name_raw (asm_out_file, loop_lab);
10518 fputc ('\n', asm_out_file);
10519
10520 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10521
10522 return "";
10523 }
10524
10525 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10526 inclusive. These are offsets from the current stack pointer. */
10527
10528 static void
10529 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10530 {
10531 /* See if we have a constant small number of probes to generate. If so,
10532 that's the easy case. The run-time loop is made up of 7 insns in the
10533 generic case while the compile-time loop is made up of n insns for n #
10534 of intervals. */
10535 if (size <= 7 * PROBE_INTERVAL)
10536 {
10537 HOST_WIDE_INT i;
10538
10539 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10540 it exceeds SIZE. If only one probe is needed, this will not
10541 generate any code. Then probe at FIRST + SIZE. */
10542 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10543 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10544 -(first + i)));
10545
10546 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10547 -(first + size)));
10548 }
10549
10550 /* Otherwise, do the same as above, but in a loop. Note that we must be
10551 extra careful with variables wrapping around because we might be at
10552 the very top (or the very bottom) of the address space and we have
10553 to be able to handle this case properly; in particular, we use an
10554 equality test for the loop condition. */
10555 else
10556 {
10557 HOST_WIDE_INT rounded_size, last;
10558 struct scratch_reg sr;
10559
10560 get_scratch_register_on_entry (&sr);
10561
10562
10563 /* Step 1: round SIZE to the previous multiple of the interval. */
10564
10565 rounded_size = size & -PROBE_INTERVAL;
10566
10567
10568 /* Step 2: compute initial and final value of the loop counter. */
10569
10570 /* TEST_OFFSET = FIRST. */
10571 emit_move_insn (sr.reg, GEN_INT (-first));
10572
10573 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10574 last = first + rounded_size;
10575
10576
10577 /* Step 3: the loop
10578
10579 while (TEST_ADDR != LAST_ADDR)
10580 {
10581 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10582 probe at TEST_ADDR
10583 }
10584
10585 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10586 until it is equal to ROUNDED_SIZE. */
10587
10588 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10589
10590
10591 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10592 that SIZE is equal to ROUNDED_SIZE. */
10593
10594 if (size != rounded_size)
10595 emit_stack_probe (plus_constant (Pmode,
10596 gen_rtx_PLUS (Pmode,
10597 stack_pointer_rtx,
10598 sr.reg),
10599 rounded_size - size));
10600
10601 release_scratch_register_on_entry (&sr);
10602 }
10603
10604 /* Make sure nothing is scheduled before we are done. */
10605 emit_insn (gen_blockage ());
10606 }
10607
10608 /* Probe a range of stack addresses from REG to END, inclusive. These are
10609 offsets from the current stack pointer. */
10610
10611 const char *
10612 output_probe_stack_range (rtx reg, rtx end)
10613 {
10614 static int labelno = 0;
10615 char loop_lab[32], end_lab[32];
10616 rtx xops[3];
10617
10618 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10619 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10620
10621 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10622
10623 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10624 xops[0] = reg;
10625 xops[1] = end;
10626 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10627 fputs ("\tje\t", asm_out_file);
10628 assemble_name_raw (asm_out_file, end_lab);
10629 fputc ('\n', asm_out_file);
10630
10631 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10632 xops[1] = GEN_INT (PROBE_INTERVAL);
10633 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10634
10635 /* Probe at TEST_ADDR. */
10636 xops[0] = stack_pointer_rtx;
10637 xops[1] = reg;
10638 xops[2] = const0_rtx;
10639 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10640
10641 fprintf (asm_out_file, "\tjmp\t");
10642 assemble_name_raw (asm_out_file, loop_lab);
10643 fputc ('\n', asm_out_file);
10644
10645 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10646
10647 return "";
10648 }
10649
10650 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10651 to be generated in correct form. */
10652 static void
10653 ix86_finalize_stack_realign_flags (void)
10654 {
10655 /* Check if stack realign is really needed after reload, and
10656 stores result in cfun */
10657 unsigned int incoming_stack_boundary
10658 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10659 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10660 unsigned int stack_realign = (incoming_stack_boundary
10661 < (crtl->is_leaf
10662 ? crtl->max_used_stack_slot_alignment
10663 : crtl->stack_alignment_needed));
10664
10665 if (crtl->stack_realign_finalized)
10666 {
10667 /* After stack_realign_needed is finalized, we can't no longer
10668 change it. */
10669 gcc_assert (crtl->stack_realign_needed == stack_realign);
10670 return;
10671 }
10672
10673 /* If the only reason for frame_pointer_needed is that we conservatively
10674 assumed stack realignment might be needed, but in the end nothing that
10675 needed the stack alignment had been spilled, clear frame_pointer_needed
10676 and say we don't need stack realignment. */
10677 if (stack_realign
10678 && frame_pointer_needed
10679 && crtl->is_leaf
10680 && flag_omit_frame_pointer
10681 && crtl->sp_is_unchanging
10682 && !ix86_current_function_calls_tls_descriptor
10683 && !crtl->accesses_prior_frames
10684 && !cfun->calls_alloca
10685 && !crtl->calls_eh_return
10686 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10687 && !ix86_frame_pointer_required ()
10688 && get_frame_size () == 0
10689 && ix86_nsaved_sseregs () == 0
10690 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10691 {
10692 HARD_REG_SET set_up_by_prologue, prologue_used;
10693 basic_block bb;
10694
10695 CLEAR_HARD_REG_SET (prologue_used);
10696 CLEAR_HARD_REG_SET (set_up_by_prologue);
10697 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10698 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10699 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10700 HARD_FRAME_POINTER_REGNUM);
10701 FOR_EACH_BB_FN (bb, cfun)
10702 {
10703 rtx_insn *insn;
10704 FOR_BB_INSNS (bb, insn)
10705 if (NONDEBUG_INSN_P (insn)
10706 && requires_stack_frame_p (insn, prologue_used,
10707 set_up_by_prologue))
10708 {
10709 crtl->stack_realign_needed = stack_realign;
10710 crtl->stack_realign_finalized = true;
10711 return;
10712 }
10713 }
10714
10715 /* If drap has been set, but it actually isn't live at the start
10716 of the function, there is no reason to set it up. */
10717 if (crtl->drap_reg)
10718 {
10719 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10720 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10721 {
10722 crtl->drap_reg = NULL_RTX;
10723 crtl->need_drap = false;
10724 }
10725 }
10726 else
10727 cfun->machine->no_drap_save_restore = true;
10728
10729 frame_pointer_needed = false;
10730 stack_realign = false;
10731 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10732 crtl->stack_alignment_needed = incoming_stack_boundary;
10733 crtl->stack_alignment_estimated = incoming_stack_boundary;
10734 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10735 crtl->preferred_stack_boundary = incoming_stack_boundary;
10736 df_finish_pass (true);
10737 df_scan_alloc (NULL);
10738 df_scan_blocks ();
10739 df_compute_regs_ever_live (true);
10740 df_analyze ();
10741 }
10742
10743 crtl->stack_realign_needed = stack_realign;
10744 crtl->stack_realign_finalized = true;
10745 }
10746
10747 /* Expand the prologue into a bunch of separate insns. */
10748
10749 void
10750 ix86_expand_prologue (void)
10751 {
10752 struct machine_function *m = cfun->machine;
10753 rtx insn, t;
10754 bool pic_reg_used;
10755 struct ix86_frame frame;
10756 HOST_WIDE_INT allocate;
10757 bool int_registers_saved;
10758 bool sse_registers_saved;
10759
10760 ix86_finalize_stack_realign_flags ();
10761
10762 /* DRAP should not coexist with stack_realign_fp */
10763 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10764
10765 memset (&m->fs, 0, sizeof (m->fs));
10766
10767 /* Initialize CFA state for before the prologue. */
10768 m->fs.cfa_reg = stack_pointer_rtx;
10769 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10770
10771 /* Track SP offset to the CFA. We continue tracking this after we've
10772 swapped the CFA register away from SP. In the case of re-alignment
10773 this is fudged; we're interested to offsets within the local frame. */
10774 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10775 m->fs.sp_valid = true;
10776
10777 ix86_compute_frame_layout (&frame);
10778
10779 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10780 {
10781 /* We should have already generated an error for any use of
10782 ms_hook on a nested function. */
10783 gcc_checking_assert (!ix86_static_chain_on_stack);
10784
10785 /* Check if profiling is active and we shall use profiling before
10786 prologue variant. If so sorry. */
10787 if (crtl->profile && flag_fentry != 0)
10788 sorry ("ms_hook_prologue attribute isn%'t compatible "
10789 "with -mfentry for 32-bit");
10790
10791 /* In ix86_asm_output_function_label we emitted:
10792 8b ff movl.s %edi,%edi
10793 55 push %ebp
10794 8b ec movl.s %esp,%ebp
10795
10796 This matches the hookable function prologue in Win32 API
10797 functions in Microsoft Windows XP Service Pack 2 and newer.
10798 Wine uses this to enable Windows apps to hook the Win32 API
10799 functions provided by Wine.
10800
10801 What that means is that we've already set up the frame pointer. */
10802
10803 if (frame_pointer_needed
10804 && !(crtl->drap_reg && crtl->stack_realign_needed))
10805 {
10806 rtx push, mov;
10807
10808 /* We've decided to use the frame pointer already set up.
10809 Describe this to the unwinder by pretending that both
10810 push and mov insns happen right here.
10811
10812 Putting the unwind info here at the end of the ms_hook
10813 is done so that we can make absolutely certain we get
10814 the required byte sequence at the start of the function,
10815 rather than relying on an assembler that can produce
10816 the exact encoding required.
10817
10818 However it does mean (in the unpatched case) that we have
10819 a 1 insn window where the asynchronous unwind info is
10820 incorrect. However, if we placed the unwind info at
10821 its correct location we would have incorrect unwind info
10822 in the patched case. Which is probably all moot since
10823 I don't expect Wine generates dwarf2 unwind info for the
10824 system libraries that use this feature. */
10825
10826 insn = emit_insn (gen_blockage ());
10827
10828 push = gen_push (hard_frame_pointer_rtx);
10829 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10830 stack_pointer_rtx);
10831 RTX_FRAME_RELATED_P (push) = 1;
10832 RTX_FRAME_RELATED_P (mov) = 1;
10833
10834 RTX_FRAME_RELATED_P (insn) = 1;
10835 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10836 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10837
10838 /* Note that gen_push incremented m->fs.cfa_offset, even
10839 though we didn't emit the push insn here. */
10840 m->fs.cfa_reg = hard_frame_pointer_rtx;
10841 m->fs.fp_offset = m->fs.cfa_offset;
10842 m->fs.fp_valid = true;
10843 }
10844 else
10845 {
10846 /* The frame pointer is not needed so pop %ebp again.
10847 This leaves us with a pristine state. */
10848 emit_insn (gen_pop (hard_frame_pointer_rtx));
10849 }
10850 }
10851
10852 /* The first insn of a function that accepts its static chain on the
10853 stack is to push the register that would be filled in by a direct
10854 call. This insn will be skipped by the trampoline. */
10855 else if (ix86_static_chain_on_stack)
10856 {
10857 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10858 emit_insn (gen_blockage ());
10859
10860 /* We don't want to interpret this push insn as a register save,
10861 only as a stack adjustment. The real copy of the register as
10862 a save will be done later, if needed. */
10863 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10864 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10865 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10866 RTX_FRAME_RELATED_P (insn) = 1;
10867 }
10868
10869 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10870 of DRAP is needed and stack realignment is really needed after reload */
10871 if (stack_realign_drap)
10872 {
10873 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10874
10875 /* Only need to push parameter pointer reg if it is caller saved. */
10876 if (!call_used_regs[REGNO (crtl->drap_reg)])
10877 {
10878 /* Push arg pointer reg */
10879 insn = emit_insn (gen_push (crtl->drap_reg));
10880 RTX_FRAME_RELATED_P (insn) = 1;
10881 }
10882
10883 /* Grab the argument pointer. */
10884 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10885 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10886 RTX_FRAME_RELATED_P (insn) = 1;
10887 m->fs.cfa_reg = crtl->drap_reg;
10888 m->fs.cfa_offset = 0;
10889
10890 /* Align the stack. */
10891 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10892 stack_pointer_rtx,
10893 GEN_INT (-align_bytes)));
10894 RTX_FRAME_RELATED_P (insn) = 1;
10895
10896 /* Replicate the return address on the stack so that return
10897 address can be reached via (argp - 1) slot. This is needed
10898 to implement macro RETURN_ADDR_RTX and intrinsic function
10899 expand_builtin_return_addr etc. */
10900 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10901 t = gen_frame_mem (word_mode, t);
10902 insn = emit_insn (gen_push (t));
10903 RTX_FRAME_RELATED_P (insn) = 1;
10904
10905 /* For the purposes of frame and register save area addressing,
10906 we've started over with a new frame. */
10907 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10908 m->fs.realigned = true;
10909 }
10910
10911 int_registers_saved = (frame.nregs == 0);
10912 sse_registers_saved = (frame.nsseregs == 0);
10913
10914 if (frame_pointer_needed && !m->fs.fp_valid)
10915 {
10916 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10917 slower on all targets. Also sdb doesn't like it. */
10918 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10919 RTX_FRAME_RELATED_P (insn) = 1;
10920
10921 /* Push registers now, before setting the frame pointer
10922 on SEH target. */
10923 if (!int_registers_saved
10924 && TARGET_SEH
10925 && !frame.save_regs_using_mov)
10926 {
10927 ix86_emit_save_regs ();
10928 int_registers_saved = true;
10929 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10930 }
10931
10932 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10933 {
10934 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10935 RTX_FRAME_RELATED_P (insn) = 1;
10936
10937 if (m->fs.cfa_reg == stack_pointer_rtx)
10938 m->fs.cfa_reg = hard_frame_pointer_rtx;
10939 m->fs.fp_offset = m->fs.sp_offset;
10940 m->fs.fp_valid = true;
10941 }
10942 }
10943
10944 if (!int_registers_saved)
10945 {
10946 /* If saving registers via PUSH, do so now. */
10947 if (!frame.save_regs_using_mov)
10948 {
10949 ix86_emit_save_regs ();
10950 int_registers_saved = true;
10951 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10952 }
10953
10954 /* When using red zone we may start register saving before allocating
10955 the stack frame saving one cycle of the prologue. However, avoid
10956 doing this if we have to probe the stack; at least on x86_64 the
10957 stack probe can turn into a call that clobbers a red zone location. */
10958 else if (ix86_using_red_zone ()
10959 && (! TARGET_STACK_PROBE
10960 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10961 {
10962 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10963 int_registers_saved = true;
10964 }
10965 }
10966
10967 if (stack_realign_fp)
10968 {
10969 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10970 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10971
10972 /* The computation of the size of the re-aligned stack frame means
10973 that we must allocate the size of the register save area before
10974 performing the actual alignment. Otherwise we cannot guarantee
10975 that there's enough storage above the realignment point. */
10976 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10977 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10978 GEN_INT (m->fs.sp_offset
10979 - frame.sse_reg_save_offset),
10980 -1, false);
10981
10982 /* Align the stack. */
10983 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10984 stack_pointer_rtx,
10985 GEN_INT (-align_bytes)));
10986
10987 /* For the purposes of register save area addressing, the stack
10988 pointer is no longer valid. As for the value of sp_offset,
10989 see ix86_compute_frame_layout, which we need to match in order
10990 to pass verification of stack_pointer_offset at the end. */
10991 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10992 m->fs.sp_valid = false;
10993 }
10994
10995 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10996
10997 if (flag_stack_usage_info)
10998 {
10999 /* We start to count from ARG_POINTER. */
11000 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11001
11002 /* If it was realigned, take into account the fake frame. */
11003 if (stack_realign_drap)
11004 {
11005 if (ix86_static_chain_on_stack)
11006 stack_size += UNITS_PER_WORD;
11007
11008 if (!call_used_regs[REGNO (crtl->drap_reg)])
11009 stack_size += UNITS_PER_WORD;
11010
11011 /* This over-estimates by 1 minimal-stack-alignment-unit but
11012 mitigates that by counting in the new return address slot. */
11013 current_function_dynamic_stack_size
11014 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11015 }
11016
11017 current_function_static_stack_size = stack_size;
11018 }
11019
11020 /* On SEH target with very large frame size, allocate an area to save
11021 SSE registers (as the very large allocation won't be described). */
11022 if (TARGET_SEH
11023 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11024 && !sse_registers_saved)
11025 {
11026 HOST_WIDE_INT sse_size =
11027 frame.sse_reg_save_offset - frame.reg_save_offset;
11028
11029 gcc_assert (int_registers_saved);
11030
11031 /* No need to do stack checking as the area will be immediately
11032 written. */
11033 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11034 GEN_INT (-sse_size), -1,
11035 m->fs.cfa_reg == stack_pointer_rtx);
11036 allocate -= sse_size;
11037 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11038 sse_registers_saved = true;
11039 }
11040
11041 /* The stack has already been decremented by the instruction calling us
11042 so probe if the size is non-negative to preserve the protection area. */
11043 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11044 {
11045 /* We expect the registers to be saved when probes are used. */
11046 gcc_assert (int_registers_saved);
11047
11048 if (STACK_CHECK_MOVING_SP)
11049 {
11050 if (!(crtl->is_leaf && !cfun->calls_alloca
11051 && allocate <= PROBE_INTERVAL))
11052 {
11053 ix86_adjust_stack_and_probe (allocate);
11054 allocate = 0;
11055 }
11056 }
11057 else
11058 {
11059 HOST_WIDE_INT size = allocate;
11060
11061 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11062 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11063
11064 if (TARGET_STACK_PROBE)
11065 {
11066 if (crtl->is_leaf && !cfun->calls_alloca)
11067 {
11068 if (size > PROBE_INTERVAL)
11069 ix86_emit_probe_stack_range (0, size);
11070 }
11071 else
11072 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11073 }
11074 else
11075 {
11076 if (crtl->is_leaf && !cfun->calls_alloca)
11077 {
11078 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11079 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11080 size - STACK_CHECK_PROTECT);
11081 }
11082 else
11083 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11084 }
11085 }
11086 }
11087
11088 if (allocate == 0)
11089 ;
11090 else if (!ix86_target_stack_probe ()
11091 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11092 {
11093 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11094 GEN_INT (-allocate), -1,
11095 m->fs.cfa_reg == stack_pointer_rtx);
11096 }
11097 else
11098 {
11099 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11100 rtx r10 = NULL;
11101 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11102 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11103 bool eax_live = ix86_eax_live_at_start_p ();
11104 bool r10_live = false;
11105
11106 if (TARGET_64BIT)
11107 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11108
11109 if (eax_live)
11110 {
11111 insn = emit_insn (gen_push (eax));
11112 allocate -= UNITS_PER_WORD;
11113 /* Note that SEH directives need to continue tracking the stack
11114 pointer even after the frame pointer has been set up. */
11115 if (sp_is_cfa_reg || TARGET_SEH)
11116 {
11117 if (sp_is_cfa_reg)
11118 m->fs.cfa_offset += UNITS_PER_WORD;
11119 RTX_FRAME_RELATED_P (insn) = 1;
11120 }
11121 }
11122
11123 if (r10_live)
11124 {
11125 r10 = gen_rtx_REG (Pmode, R10_REG);
11126 insn = emit_insn (gen_push (r10));
11127 allocate -= UNITS_PER_WORD;
11128 if (sp_is_cfa_reg || TARGET_SEH)
11129 {
11130 if (sp_is_cfa_reg)
11131 m->fs.cfa_offset += UNITS_PER_WORD;
11132 RTX_FRAME_RELATED_P (insn) = 1;
11133 }
11134 }
11135
11136 emit_move_insn (eax, GEN_INT (allocate));
11137 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11138
11139 /* Use the fact that AX still contains ALLOCATE. */
11140 adjust_stack_insn = (Pmode == DImode
11141 ? gen_pro_epilogue_adjust_stack_di_sub
11142 : gen_pro_epilogue_adjust_stack_si_sub);
11143
11144 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11145 stack_pointer_rtx, eax));
11146
11147 if (sp_is_cfa_reg || TARGET_SEH)
11148 {
11149 if (sp_is_cfa_reg)
11150 m->fs.cfa_offset += allocate;
11151 RTX_FRAME_RELATED_P (insn) = 1;
11152 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11153 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11154 plus_constant (Pmode, stack_pointer_rtx,
11155 -allocate)));
11156 }
11157 m->fs.sp_offset += allocate;
11158
11159 /* Use stack_pointer_rtx for relative addressing so that code
11160 works for realigned stack, too. */
11161 if (r10_live && eax_live)
11162 {
11163 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11164 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11165 gen_frame_mem (word_mode, t));
11166 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11167 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11168 gen_frame_mem (word_mode, t));
11169 }
11170 else if (eax_live || r10_live)
11171 {
11172 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11173 emit_move_insn (gen_rtx_REG (word_mode,
11174 (eax_live ? AX_REG : R10_REG)),
11175 gen_frame_mem (word_mode, t));
11176 }
11177 }
11178 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11179
11180 /* If we havn't already set up the frame pointer, do so now. */
11181 if (frame_pointer_needed && !m->fs.fp_valid)
11182 {
11183 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11184 GEN_INT (frame.stack_pointer_offset
11185 - frame.hard_frame_pointer_offset));
11186 insn = emit_insn (insn);
11187 RTX_FRAME_RELATED_P (insn) = 1;
11188 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11189
11190 if (m->fs.cfa_reg == stack_pointer_rtx)
11191 m->fs.cfa_reg = hard_frame_pointer_rtx;
11192 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11193 m->fs.fp_valid = true;
11194 }
11195
11196 if (!int_registers_saved)
11197 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11198 if (!sse_registers_saved)
11199 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11200
11201 pic_reg_used = false;
11202 /* We don't use pic-register for pe-coff target. */
11203 if (pic_offset_table_rtx
11204 && !TARGET_PECOFF
11205 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11206 || crtl->profile))
11207 {
11208 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11209
11210 if (alt_pic_reg_used != INVALID_REGNUM)
11211 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11212
11213 pic_reg_used = true;
11214 }
11215
11216 if (pic_reg_used)
11217 {
11218 if (TARGET_64BIT)
11219 {
11220 if (ix86_cmodel == CM_LARGE_PIC)
11221 {
11222 rtx_code_label *label;
11223 rtx tmp_reg;
11224
11225 gcc_assert (Pmode == DImode);
11226 label = gen_label_rtx ();
11227 emit_label (label);
11228 LABEL_PRESERVE_P (label) = 1;
11229 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11230 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11231 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11232 label));
11233 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11234 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11235 pic_offset_table_rtx, tmp_reg));
11236 }
11237 else
11238 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11239 }
11240 else
11241 {
11242 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11243 RTX_FRAME_RELATED_P (insn) = 1;
11244 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11245 }
11246 }
11247
11248 /* In the pic_reg_used case, make sure that the got load isn't deleted
11249 when mcount needs it. Blockage to avoid call movement across mcount
11250 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11251 note. */
11252 if (crtl->profile && !flag_fentry && pic_reg_used)
11253 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11254
11255 if (crtl->drap_reg && !crtl->stack_realign_needed)
11256 {
11257 /* vDRAP is setup but after reload it turns out stack realign
11258 isn't necessary, here we will emit prologue to setup DRAP
11259 without stack realign adjustment */
11260 t = choose_baseaddr (0);
11261 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11262 }
11263
11264 /* Prevent instructions from being scheduled into register save push
11265 sequence when access to the redzone area is done through frame pointer.
11266 The offset between the frame pointer and the stack pointer is calculated
11267 relative to the value of the stack pointer at the end of the function
11268 prologue, and moving instructions that access redzone area via frame
11269 pointer inside push sequence violates this assumption. */
11270 if (frame_pointer_needed && frame.red_zone_size)
11271 emit_insn (gen_memory_blockage ());
11272
11273 /* Emit cld instruction if stringops are used in the function. */
11274 if (TARGET_CLD && ix86_current_function_needs_cld)
11275 emit_insn (gen_cld ());
11276
11277 /* SEH requires that the prologue end within 256 bytes of the start of
11278 the function. Prevent instruction schedules that would extend that.
11279 Further, prevent alloca modifications to the stack pointer from being
11280 combined with prologue modifications. */
11281 if (TARGET_SEH)
11282 emit_insn (gen_prologue_use (stack_pointer_rtx));
11283 }
11284
11285 /* Emit code to restore REG using a POP insn. */
11286
11287 static void
11288 ix86_emit_restore_reg_using_pop (rtx reg)
11289 {
11290 struct machine_function *m = cfun->machine;
11291 rtx insn = emit_insn (gen_pop (reg));
11292
11293 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11294 m->fs.sp_offset -= UNITS_PER_WORD;
11295
11296 if (m->fs.cfa_reg == crtl->drap_reg
11297 && REGNO (reg) == REGNO (crtl->drap_reg))
11298 {
11299 /* Previously we'd represented the CFA as an expression
11300 like *(%ebp - 8). We've just popped that value from
11301 the stack, which means we need to reset the CFA to
11302 the drap register. This will remain until we restore
11303 the stack pointer. */
11304 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11305 RTX_FRAME_RELATED_P (insn) = 1;
11306
11307 /* This means that the DRAP register is valid for addressing too. */
11308 m->fs.drap_valid = true;
11309 return;
11310 }
11311
11312 if (m->fs.cfa_reg == stack_pointer_rtx)
11313 {
11314 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11315 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11316 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11317 RTX_FRAME_RELATED_P (insn) = 1;
11318
11319 m->fs.cfa_offset -= UNITS_PER_WORD;
11320 }
11321
11322 /* When the frame pointer is the CFA, and we pop it, we are
11323 swapping back to the stack pointer as the CFA. This happens
11324 for stack frames that don't allocate other data, so we assume
11325 the stack pointer is now pointing at the return address, i.e.
11326 the function entry state, which makes the offset be 1 word. */
11327 if (reg == hard_frame_pointer_rtx)
11328 {
11329 m->fs.fp_valid = false;
11330 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11331 {
11332 m->fs.cfa_reg = stack_pointer_rtx;
11333 m->fs.cfa_offset -= UNITS_PER_WORD;
11334
11335 add_reg_note (insn, REG_CFA_DEF_CFA,
11336 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11337 GEN_INT (m->fs.cfa_offset)));
11338 RTX_FRAME_RELATED_P (insn) = 1;
11339 }
11340 }
11341 }
11342
11343 /* Emit code to restore saved registers using POP insns. */
11344
11345 static void
11346 ix86_emit_restore_regs_using_pop (void)
11347 {
11348 unsigned int regno;
11349
11350 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11351 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11352 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11353 }
11354
11355 /* Emit code and notes for the LEAVE instruction. */
11356
11357 static void
11358 ix86_emit_leave (void)
11359 {
11360 struct machine_function *m = cfun->machine;
11361 rtx insn = emit_insn (ix86_gen_leave ());
11362
11363 ix86_add_queued_cfa_restore_notes (insn);
11364
11365 gcc_assert (m->fs.fp_valid);
11366 m->fs.sp_valid = true;
11367 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11368 m->fs.fp_valid = false;
11369
11370 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11371 {
11372 m->fs.cfa_reg = stack_pointer_rtx;
11373 m->fs.cfa_offset = m->fs.sp_offset;
11374
11375 add_reg_note (insn, REG_CFA_DEF_CFA,
11376 plus_constant (Pmode, stack_pointer_rtx,
11377 m->fs.sp_offset));
11378 RTX_FRAME_RELATED_P (insn) = 1;
11379 }
11380 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11381 m->fs.fp_offset);
11382 }
11383
11384 /* Emit code to restore saved registers using MOV insns.
11385 First register is restored from CFA - CFA_OFFSET. */
11386 static void
11387 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11388 bool maybe_eh_return)
11389 {
11390 struct machine_function *m = cfun->machine;
11391 unsigned int regno;
11392
11393 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11394 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11395 {
11396 rtx reg = gen_rtx_REG (word_mode, regno);
11397 rtx insn, mem;
11398
11399 mem = choose_baseaddr (cfa_offset);
11400 mem = gen_frame_mem (word_mode, mem);
11401 insn = emit_move_insn (reg, mem);
11402
11403 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11404 {
11405 /* Previously we'd represented the CFA as an expression
11406 like *(%ebp - 8). We've just popped that value from
11407 the stack, which means we need to reset the CFA to
11408 the drap register. This will remain until we restore
11409 the stack pointer. */
11410 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11411 RTX_FRAME_RELATED_P (insn) = 1;
11412
11413 /* This means that the DRAP register is valid for addressing. */
11414 m->fs.drap_valid = true;
11415 }
11416 else
11417 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11418
11419 cfa_offset -= UNITS_PER_WORD;
11420 }
11421 }
11422
11423 /* Emit code to restore saved registers using MOV insns.
11424 First register is restored from CFA - CFA_OFFSET. */
11425 static void
11426 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11427 bool maybe_eh_return)
11428 {
11429 unsigned int regno;
11430
11431 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11432 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11433 {
11434 rtx reg = gen_rtx_REG (V4SFmode, regno);
11435 rtx mem;
11436
11437 mem = choose_baseaddr (cfa_offset);
11438 mem = gen_rtx_MEM (V4SFmode, mem);
11439 set_mem_align (mem, 128);
11440 emit_move_insn (reg, mem);
11441
11442 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11443
11444 cfa_offset -= 16;
11445 }
11446 }
11447
11448 /* Restore function stack, frame, and registers. */
11449
11450 void
11451 ix86_expand_epilogue (int style)
11452 {
11453 struct machine_function *m = cfun->machine;
11454 struct machine_frame_state frame_state_save = m->fs;
11455 struct ix86_frame frame;
11456 bool restore_regs_via_mov;
11457 bool using_drap;
11458
11459 ix86_finalize_stack_realign_flags ();
11460 ix86_compute_frame_layout (&frame);
11461
11462 m->fs.sp_valid = (!frame_pointer_needed
11463 || (crtl->sp_is_unchanging
11464 && !stack_realign_fp));
11465 gcc_assert (!m->fs.sp_valid
11466 || m->fs.sp_offset == frame.stack_pointer_offset);
11467
11468 /* The FP must be valid if the frame pointer is present. */
11469 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11470 gcc_assert (!m->fs.fp_valid
11471 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11472
11473 /* We must have *some* valid pointer to the stack frame. */
11474 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11475
11476 /* The DRAP is never valid at this point. */
11477 gcc_assert (!m->fs.drap_valid);
11478
11479 /* See the comment about red zone and frame
11480 pointer usage in ix86_expand_prologue. */
11481 if (frame_pointer_needed && frame.red_zone_size)
11482 emit_insn (gen_memory_blockage ());
11483
11484 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11485 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11486
11487 /* Determine the CFA offset of the end of the red-zone. */
11488 m->fs.red_zone_offset = 0;
11489 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11490 {
11491 /* The red-zone begins below the return address. */
11492 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11493
11494 /* When the register save area is in the aligned portion of
11495 the stack, determine the maximum runtime displacement that
11496 matches up with the aligned frame. */
11497 if (stack_realign_drap)
11498 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11499 + UNITS_PER_WORD);
11500 }
11501
11502 /* Special care must be taken for the normal return case of a function
11503 using eh_return: the eax and edx registers are marked as saved, but
11504 not restored along this path. Adjust the save location to match. */
11505 if (crtl->calls_eh_return && style != 2)
11506 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11507
11508 /* EH_RETURN requires the use of moves to function properly. */
11509 if (crtl->calls_eh_return)
11510 restore_regs_via_mov = true;
11511 /* SEH requires the use of pops to identify the epilogue. */
11512 else if (TARGET_SEH)
11513 restore_regs_via_mov = false;
11514 /* If we're only restoring one register and sp is not valid then
11515 using a move instruction to restore the register since it's
11516 less work than reloading sp and popping the register. */
11517 else if (!m->fs.sp_valid && frame.nregs <= 1)
11518 restore_regs_via_mov = true;
11519 else if (TARGET_EPILOGUE_USING_MOVE
11520 && cfun->machine->use_fast_prologue_epilogue
11521 && (frame.nregs > 1
11522 || m->fs.sp_offset != frame.reg_save_offset))
11523 restore_regs_via_mov = true;
11524 else if (frame_pointer_needed
11525 && !frame.nregs
11526 && m->fs.sp_offset != frame.reg_save_offset)
11527 restore_regs_via_mov = true;
11528 else if (frame_pointer_needed
11529 && TARGET_USE_LEAVE
11530 && cfun->machine->use_fast_prologue_epilogue
11531 && frame.nregs == 1)
11532 restore_regs_via_mov = true;
11533 else
11534 restore_regs_via_mov = false;
11535
11536 if (restore_regs_via_mov || frame.nsseregs)
11537 {
11538 /* Ensure that the entire register save area is addressable via
11539 the stack pointer, if we will restore via sp. */
11540 if (TARGET_64BIT
11541 && m->fs.sp_offset > 0x7fffffff
11542 && !(m->fs.fp_valid || m->fs.drap_valid)
11543 && (frame.nsseregs + frame.nregs) != 0)
11544 {
11545 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11546 GEN_INT (m->fs.sp_offset
11547 - frame.sse_reg_save_offset),
11548 style,
11549 m->fs.cfa_reg == stack_pointer_rtx);
11550 }
11551 }
11552
11553 /* If there are any SSE registers to restore, then we have to do it
11554 via moves, since there's obviously no pop for SSE regs. */
11555 if (frame.nsseregs)
11556 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11557 style == 2);
11558
11559 if (restore_regs_via_mov)
11560 {
11561 rtx t;
11562
11563 if (frame.nregs)
11564 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11565
11566 /* eh_return epilogues need %ecx added to the stack pointer. */
11567 if (style == 2)
11568 {
11569 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11570
11571 /* Stack align doesn't work with eh_return. */
11572 gcc_assert (!stack_realign_drap);
11573 /* Neither does regparm nested functions. */
11574 gcc_assert (!ix86_static_chain_on_stack);
11575
11576 if (frame_pointer_needed)
11577 {
11578 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11579 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11580 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11581
11582 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11583 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11584
11585 /* Note that we use SA as a temporary CFA, as the return
11586 address is at the proper place relative to it. We
11587 pretend this happens at the FP restore insn because
11588 prior to this insn the FP would be stored at the wrong
11589 offset relative to SA, and after this insn we have no
11590 other reasonable register to use for the CFA. We don't
11591 bother resetting the CFA to the SP for the duration of
11592 the return insn. */
11593 add_reg_note (insn, REG_CFA_DEF_CFA,
11594 plus_constant (Pmode, sa, UNITS_PER_WORD));
11595 ix86_add_queued_cfa_restore_notes (insn);
11596 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11597 RTX_FRAME_RELATED_P (insn) = 1;
11598
11599 m->fs.cfa_reg = sa;
11600 m->fs.cfa_offset = UNITS_PER_WORD;
11601 m->fs.fp_valid = false;
11602
11603 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11604 const0_rtx, style, false);
11605 }
11606 else
11607 {
11608 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11609 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11610 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11611 ix86_add_queued_cfa_restore_notes (insn);
11612
11613 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11614 if (m->fs.cfa_offset != UNITS_PER_WORD)
11615 {
11616 m->fs.cfa_offset = UNITS_PER_WORD;
11617 add_reg_note (insn, REG_CFA_DEF_CFA,
11618 plus_constant (Pmode, stack_pointer_rtx,
11619 UNITS_PER_WORD));
11620 RTX_FRAME_RELATED_P (insn) = 1;
11621 }
11622 }
11623 m->fs.sp_offset = UNITS_PER_WORD;
11624 m->fs.sp_valid = true;
11625 }
11626 }
11627 else
11628 {
11629 /* SEH requires that the function end with (1) a stack adjustment
11630 if necessary, (2) a sequence of pops, and (3) a return or
11631 jump instruction. Prevent insns from the function body from
11632 being scheduled into this sequence. */
11633 if (TARGET_SEH)
11634 {
11635 /* Prevent a catch region from being adjacent to the standard
11636 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11637 several other flags that would be interesting to test are
11638 not yet set up. */
11639 if (flag_non_call_exceptions)
11640 emit_insn (gen_nops (const1_rtx));
11641 else
11642 emit_insn (gen_blockage ());
11643 }
11644
11645 /* First step is to deallocate the stack frame so that we can
11646 pop the registers. Also do it on SEH target for very large
11647 frame as the emitted instructions aren't allowed by the ABI in
11648 epilogues. */
11649 if (!m->fs.sp_valid
11650 || (TARGET_SEH
11651 && (m->fs.sp_offset - frame.reg_save_offset
11652 >= SEH_MAX_FRAME_SIZE)))
11653 {
11654 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11655 GEN_INT (m->fs.fp_offset
11656 - frame.reg_save_offset),
11657 style, false);
11658 }
11659 else if (m->fs.sp_offset != frame.reg_save_offset)
11660 {
11661 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11662 GEN_INT (m->fs.sp_offset
11663 - frame.reg_save_offset),
11664 style,
11665 m->fs.cfa_reg == stack_pointer_rtx);
11666 }
11667
11668 ix86_emit_restore_regs_using_pop ();
11669 }
11670
11671 /* If we used a stack pointer and haven't already got rid of it,
11672 then do so now. */
11673 if (m->fs.fp_valid)
11674 {
11675 /* If the stack pointer is valid and pointing at the frame
11676 pointer store address, then we only need a pop. */
11677 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11678 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11679 /* Leave results in shorter dependency chains on CPUs that are
11680 able to grok it fast. */
11681 else if (TARGET_USE_LEAVE
11682 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11683 || !cfun->machine->use_fast_prologue_epilogue)
11684 ix86_emit_leave ();
11685 else
11686 {
11687 pro_epilogue_adjust_stack (stack_pointer_rtx,
11688 hard_frame_pointer_rtx,
11689 const0_rtx, style, !using_drap);
11690 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11691 }
11692 }
11693
11694 if (using_drap)
11695 {
11696 int param_ptr_offset = UNITS_PER_WORD;
11697 rtx insn;
11698
11699 gcc_assert (stack_realign_drap);
11700
11701 if (ix86_static_chain_on_stack)
11702 param_ptr_offset += UNITS_PER_WORD;
11703 if (!call_used_regs[REGNO (crtl->drap_reg)])
11704 param_ptr_offset += UNITS_PER_WORD;
11705
11706 insn = emit_insn (gen_rtx_SET
11707 (VOIDmode, stack_pointer_rtx,
11708 gen_rtx_PLUS (Pmode,
11709 crtl->drap_reg,
11710 GEN_INT (-param_ptr_offset))));
11711 m->fs.cfa_reg = stack_pointer_rtx;
11712 m->fs.cfa_offset = param_ptr_offset;
11713 m->fs.sp_offset = param_ptr_offset;
11714 m->fs.realigned = false;
11715
11716 add_reg_note (insn, REG_CFA_DEF_CFA,
11717 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11718 GEN_INT (param_ptr_offset)));
11719 RTX_FRAME_RELATED_P (insn) = 1;
11720
11721 if (!call_used_regs[REGNO (crtl->drap_reg)])
11722 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11723 }
11724
11725 /* At this point the stack pointer must be valid, and we must have
11726 restored all of the registers. We may not have deallocated the
11727 entire stack frame. We've delayed this until now because it may
11728 be possible to merge the local stack deallocation with the
11729 deallocation forced by ix86_static_chain_on_stack. */
11730 gcc_assert (m->fs.sp_valid);
11731 gcc_assert (!m->fs.fp_valid);
11732 gcc_assert (!m->fs.realigned);
11733 if (m->fs.sp_offset != UNITS_PER_WORD)
11734 {
11735 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11736 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11737 style, true);
11738 }
11739 else
11740 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11741
11742 /* Sibcall epilogues don't want a return instruction. */
11743 if (style == 0)
11744 {
11745 m->fs = frame_state_save;
11746 return;
11747 }
11748
11749 if (crtl->args.pops_args && crtl->args.size)
11750 {
11751 rtx popc = GEN_INT (crtl->args.pops_args);
11752
11753 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11754 address, do explicit add, and jump indirectly to the caller. */
11755
11756 if (crtl->args.pops_args >= 65536)
11757 {
11758 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11759 rtx insn;
11760
11761 /* There is no "pascal" calling convention in any 64bit ABI. */
11762 gcc_assert (!TARGET_64BIT);
11763
11764 insn = emit_insn (gen_pop (ecx));
11765 m->fs.cfa_offset -= UNITS_PER_WORD;
11766 m->fs.sp_offset -= UNITS_PER_WORD;
11767
11768 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11769 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11770 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11771 add_reg_note (insn, REG_CFA_REGISTER,
11772 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11773 RTX_FRAME_RELATED_P (insn) = 1;
11774
11775 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11776 popc, -1, true);
11777 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11778 }
11779 else
11780 emit_jump_insn (gen_simple_return_pop_internal (popc));
11781 }
11782 else
11783 emit_jump_insn (gen_simple_return_internal ());
11784
11785 /* Restore the state back to the state from the prologue,
11786 so that it's correct for the next epilogue. */
11787 m->fs = frame_state_save;
11788 }
11789
11790 /* Reset from the function's potential modifications. */
11791
11792 static void
11793 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11794 {
11795 if (pic_offset_table_rtx)
11796 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11797 #if TARGET_MACHO
11798 /* Mach-O doesn't support labels at the end of objects, so if
11799 it looks like we might want one, insert a NOP. */
11800 {
11801 rtx_insn *insn = get_last_insn ();
11802 rtx_insn *deleted_debug_label = NULL;
11803 while (insn
11804 && NOTE_P (insn)
11805 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11806 {
11807 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11808 notes only, instead set their CODE_LABEL_NUMBER to -1,
11809 otherwise there would be code generation differences
11810 in between -g and -g0. */
11811 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11812 deleted_debug_label = insn;
11813 insn = PREV_INSN (insn);
11814 }
11815 if (insn
11816 && (LABEL_P (insn)
11817 || (NOTE_P (insn)
11818 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11819 fputs ("\tnop\n", file);
11820 else if (deleted_debug_label)
11821 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11822 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11823 CODE_LABEL_NUMBER (insn) = -1;
11824 }
11825 #endif
11826
11827 }
11828
11829 /* Return a scratch register to use in the split stack prologue. The
11830 split stack prologue is used for -fsplit-stack. It is the first
11831 instructions in the function, even before the regular prologue.
11832 The scratch register can be any caller-saved register which is not
11833 used for parameters or for the static chain. */
11834
11835 static unsigned int
11836 split_stack_prologue_scratch_regno (void)
11837 {
11838 if (TARGET_64BIT)
11839 return R11_REG;
11840 else
11841 {
11842 bool is_fastcall, is_thiscall;
11843 int regparm;
11844
11845 is_fastcall = (lookup_attribute ("fastcall",
11846 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11847 != NULL);
11848 is_thiscall = (lookup_attribute ("thiscall",
11849 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11850 != NULL);
11851 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11852
11853 if (is_fastcall)
11854 {
11855 if (DECL_STATIC_CHAIN (cfun->decl))
11856 {
11857 sorry ("-fsplit-stack does not support fastcall with "
11858 "nested function");
11859 return INVALID_REGNUM;
11860 }
11861 return AX_REG;
11862 }
11863 else if (is_thiscall)
11864 {
11865 if (!DECL_STATIC_CHAIN (cfun->decl))
11866 return DX_REG;
11867 return AX_REG;
11868 }
11869 else if (regparm < 3)
11870 {
11871 if (!DECL_STATIC_CHAIN (cfun->decl))
11872 return CX_REG;
11873 else
11874 {
11875 if (regparm >= 2)
11876 {
11877 sorry ("-fsplit-stack does not support 2 register "
11878 "parameters for a nested function");
11879 return INVALID_REGNUM;
11880 }
11881 return DX_REG;
11882 }
11883 }
11884 else
11885 {
11886 /* FIXME: We could make this work by pushing a register
11887 around the addition and comparison. */
11888 sorry ("-fsplit-stack does not support 3 register parameters");
11889 return INVALID_REGNUM;
11890 }
11891 }
11892 }
11893
11894 /* A SYMBOL_REF for the function which allocates new stackspace for
11895 -fsplit-stack. */
11896
11897 static GTY(()) rtx split_stack_fn;
11898
11899 /* A SYMBOL_REF for the more stack function when using the large
11900 model. */
11901
11902 static GTY(()) rtx split_stack_fn_large;
11903
11904 /* Handle -fsplit-stack. These are the first instructions in the
11905 function, even before the regular prologue. */
11906
11907 void
11908 ix86_expand_split_stack_prologue (void)
11909 {
11910 struct ix86_frame frame;
11911 HOST_WIDE_INT allocate;
11912 unsigned HOST_WIDE_INT args_size;
11913 rtx_code_label *label;
11914 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11915 rtx scratch_reg = NULL_RTX;
11916 rtx_code_label *varargs_label = NULL;
11917 rtx fn;
11918
11919 gcc_assert (flag_split_stack && reload_completed);
11920
11921 ix86_finalize_stack_realign_flags ();
11922 ix86_compute_frame_layout (&frame);
11923 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11924
11925 /* This is the label we will branch to if we have enough stack
11926 space. We expect the basic block reordering pass to reverse this
11927 branch if optimizing, so that we branch in the unlikely case. */
11928 label = gen_label_rtx ();
11929
11930 /* We need to compare the stack pointer minus the frame size with
11931 the stack boundary in the TCB. The stack boundary always gives
11932 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11933 can compare directly. Otherwise we need to do an addition. */
11934
11935 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11936 UNSPEC_STACK_CHECK);
11937 limit = gen_rtx_CONST (Pmode, limit);
11938 limit = gen_rtx_MEM (Pmode, limit);
11939 if (allocate < SPLIT_STACK_AVAILABLE)
11940 current = stack_pointer_rtx;
11941 else
11942 {
11943 unsigned int scratch_regno;
11944 rtx offset;
11945
11946 /* We need a scratch register to hold the stack pointer minus
11947 the required frame size. Since this is the very start of the
11948 function, the scratch register can be any caller-saved
11949 register which is not used for parameters. */
11950 offset = GEN_INT (- allocate);
11951 scratch_regno = split_stack_prologue_scratch_regno ();
11952 if (scratch_regno == INVALID_REGNUM)
11953 return;
11954 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11955 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11956 {
11957 /* We don't use ix86_gen_add3 in this case because it will
11958 want to split to lea, but when not optimizing the insn
11959 will not be split after this point. */
11960 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11961 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11962 offset)));
11963 }
11964 else
11965 {
11966 emit_move_insn (scratch_reg, offset);
11967 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11968 stack_pointer_rtx));
11969 }
11970 current = scratch_reg;
11971 }
11972
11973 ix86_expand_branch (GEU, current, limit, label);
11974 jump_insn = get_last_insn ();
11975 JUMP_LABEL (jump_insn) = label;
11976
11977 /* Mark the jump as very likely to be taken. */
11978 add_int_reg_note (jump_insn, REG_BR_PROB,
11979 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11980
11981 if (split_stack_fn == NULL_RTX)
11982 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11983 fn = split_stack_fn;
11984
11985 /* Get more stack space. We pass in the desired stack space and the
11986 size of the arguments to copy to the new stack. In 32-bit mode
11987 we push the parameters; __morestack will return on a new stack
11988 anyhow. In 64-bit mode we pass the parameters in r10 and
11989 r11. */
11990 allocate_rtx = GEN_INT (allocate);
11991 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11992 call_fusage = NULL_RTX;
11993 if (TARGET_64BIT)
11994 {
11995 rtx reg10, reg11;
11996
11997 reg10 = gen_rtx_REG (Pmode, R10_REG);
11998 reg11 = gen_rtx_REG (Pmode, R11_REG);
11999
12000 /* If this function uses a static chain, it will be in %r10.
12001 Preserve it across the call to __morestack. */
12002 if (DECL_STATIC_CHAIN (cfun->decl))
12003 {
12004 rtx rax;
12005
12006 rax = gen_rtx_REG (word_mode, AX_REG);
12007 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12008 use_reg (&call_fusage, rax);
12009 }
12010
12011 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12012 && !TARGET_PECOFF)
12013 {
12014 HOST_WIDE_INT argval;
12015
12016 gcc_assert (Pmode == DImode);
12017 /* When using the large model we need to load the address
12018 into a register, and we've run out of registers. So we
12019 switch to a different calling convention, and we call a
12020 different function: __morestack_large. We pass the
12021 argument size in the upper 32 bits of r10 and pass the
12022 frame size in the lower 32 bits. */
12023 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12024 gcc_assert ((args_size & 0xffffffff) == args_size);
12025
12026 if (split_stack_fn_large == NULL_RTX)
12027 split_stack_fn_large =
12028 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12029
12030 if (ix86_cmodel == CM_LARGE_PIC)
12031 {
12032 rtx_code_label *label;
12033 rtx x;
12034
12035 label = gen_label_rtx ();
12036 emit_label (label);
12037 LABEL_PRESERVE_P (label) = 1;
12038 emit_insn (gen_set_rip_rex64 (reg10, label));
12039 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12040 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12041 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12042 UNSPEC_GOT);
12043 x = gen_rtx_CONST (Pmode, x);
12044 emit_move_insn (reg11, x);
12045 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12046 x = gen_const_mem (Pmode, x);
12047 emit_move_insn (reg11, x);
12048 }
12049 else
12050 emit_move_insn (reg11, split_stack_fn_large);
12051
12052 fn = reg11;
12053
12054 argval = ((args_size << 16) << 16) + allocate;
12055 emit_move_insn (reg10, GEN_INT (argval));
12056 }
12057 else
12058 {
12059 emit_move_insn (reg10, allocate_rtx);
12060 emit_move_insn (reg11, GEN_INT (args_size));
12061 use_reg (&call_fusage, reg11);
12062 }
12063
12064 use_reg (&call_fusage, reg10);
12065 }
12066 else
12067 {
12068 emit_insn (gen_push (GEN_INT (args_size)));
12069 emit_insn (gen_push (allocate_rtx));
12070 }
12071 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12072 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12073 NULL_RTX, false);
12074 add_function_usage_to (call_insn, call_fusage);
12075
12076 /* In order to make call/return prediction work right, we now need
12077 to execute a return instruction. See
12078 libgcc/config/i386/morestack.S for the details on how this works.
12079
12080 For flow purposes gcc must not see this as a return
12081 instruction--we need control flow to continue at the subsequent
12082 label. Therefore, we use an unspec. */
12083 gcc_assert (crtl->args.pops_args < 65536);
12084 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12085
12086 /* If we are in 64-bit mode and this function uses a static chain,
12087 we saved %r10 in %rax before calling _morestack. */
12088 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12089 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12090 gen_rtx_REG (word_mode, AX_REG));
12091
12092 /* If this function calls va_start, we need to store a pointer to
12093 the arguments on the old stack, because they may not have been
12094 all copied to the new stack. At this point the old stack can be
12095 found at the frame pointer value used by __morestack, because
12096 __morestack has set that up before calling back to us. Here we
12097 store that pointer in a scratch register, and in
12098 ix86_expand_prologue we store the scratch register in a stack
12099 slot. */
12100 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12101 {
12102 unsigned int scratch_regno;
12103 rtx frame_reg;
12104 int words;
12105
12106 scratch_regno = split_stack_prologue_scratch_regno ();
12107 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12108 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12109
12110 /* 64-bit:
12111 fp -> old fp value
12112 return address within this function
12113 return address of caller of this function
12114 stack arguments
12115 So we add three words to get to the stack arguments.
12116
12117 32-bit:
12118 fp -> old fp value
12119 return address within this function
12120 first argument to __morestack
12121 second argument to __morestack
12122 return address of caller of this function
12123 stack arguments
12124 So we add five words to get to the stack arguments.
12125 */
12126 words = TARGET_64BIT ? 3 : 5;
12127 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12128 gen_rtx_PLUS (Pmode, frame_reg,
12129 GEN_INT (words * UNITS_PER_WORD))));
12130
12131 varargs_label = gen_label_rtx ();
12132 emit_jump_insn (gen_jump (varargs_label));
12133 JUMP_LABEL (get_last_insn ()) = varargs_label;
12134
12135 emit_barrier ();
12136 }
12137
12138 emit_label (label);
12139 LABEL_NUSES (label) = 1;
12140
12141 /* If this function calls va_start, we now have to set the scratch
12142 register for the case where we do not call __morestack. In this
12143 case we need to set it based on the stack pointer. */
12144 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12145 {
12146 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12147 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12148 GEN_INT (UNITS_PER_WORD))));
12149
12150 emit_label (varargs_label);
12151 LABEL_NUSES (varargs_label) = 1;
12152 }
12153 }
12154
12155 /* We may have to tell the dataflow pass that the split stack prologue
12156 is initializing a scratch register. */
12157
12158 static void
12159 ix86_live_on_entry (bitmap regs)
12160 {
12161 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12162 {
12163 gcc_assert (flag_split_stack);
12164 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12165 }
12166 }
12167 \f
12168 /* Extract the parts of an RTL expression that is a valid memory address
12169 for an instruction. Return 0 if the structure of the address is
12170 grossly off. Return -1 if the address contains ASHIFT, so it is not
12171 strictly valid, but still used for computing length of lea instruction. */
12172
12173 int
12174 ix86_decompose_address (rtx addr, struct ix86_address *out)
12175 {
12176 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12177 rtx base_reg, index_reg;
12178 HOST_WIDE_INT scale = 1;
12179 rtx scale_rtx = NULL_RTX;
12180 rtx tmp;
12181 int retval = 1;
12182 enum ix86_address_seg seg = SEG_DEFAULT;
12183
12184 /* Allow zero-extended SImode addresses,
12185 they will be emitted with addr32 prefix. */
12186 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12187 {
12188 if (GET_CODE (addr) == ZERO_EXTEND
12189 && GET_MODE (XEXP (addr, 0)) == SImode)
12190 {
12191 addr = XEXP (addr, 0);
12192 if (CONST_INT_P (addr))
12193 return 0;
12194 }
12195 else if (GET_CODE (addr) == AND
12196 && const_32bit_mask (XEXP (addr, 1), DImode))
12197 {
12198 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12199 if (addr == NULL_RTX)
12200 return 0;
12201
12202 if (CONST_INT_P (addr))
12203 return 0;
12204 }
12205 }
12206
12207 /* Allow SImode subregs of DImode addresses,
12208 they will be emitted with addr32 prefix. */
12209 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12210 {
12211 if (GET_CODE (addr) == SUBREG
12212 && GET_MODE (SUBREG_REG (addr)) == DImode)
12213 {
12214 addr = SUBREG_REG (addr);
12215 if (CONST_INT_P (addr))
12216 return 0;
12217 }
12218 }
12219
12220 if (REG_P (addr))
12221 base = addr;
12222 else if (GET_CODE (addr) == SUBREG)
12223 {
12224 if (REG_P (SUBREG_REG (addr)))
12225 base = addr;
12226 else
12227 return 0;
12228 }
12229 else if (GET_CODE (addr) == PLUS)
12230 {
12231 rtx addends[4], op;
12232 int n = 0, i;
12233
12234 op = addr;
12235 do
12236 {
12237 if (n >= 4)
12238 return 0;
12239 addends[n++] = XEXP (op, 1);
12240 op = XEXP (op, 0);
12241 }
12242 while (GET_CODE (op) == PLUS);
12243 if (n >= 4)
12244 return 0;
12245 addends[n] = op;
12246
12247 for (i = n; i >= 0; --i)
12248 {
12249 op = addends[i];
12250 switch (GET_CODE (op))
12251 {
12252 case MULT:
12253 if (index)
12254 return 0;
12255 index = XEXP (op, 0);
12256 scale_rtx = XEXP (op, 1);
12257 break;
12258
12259 case ASHIFT:
12260 if (index)
12261 return 0;
12262 index = XEXP (op, 0);
12263 tmp = XEXP (op, 1);
12264 if (!CONST_INT_P (tmp))
12265 return 0;
12266 scale = INTVAL (tmp);
12267 if ((unsigned HOST_WIDE_INT) scale > 3)
12268 return 0;
12269 scale = 1 << scale;
12270 break;
12271
12272 case ZERO_EXTEND:
12273 op = XEXP (op, 0);
12274 if (GET_CODE (op) != UNSPEC)
12275 return 0;
12276 /* FALLTHRU */
12277
12278 case UNSPEC:
12279 if (XINT (op, 1) == UNSPEC_TP
12280 && TARGET_TLS_DIRECT_SEG_REFS
12281 && seg == SEG_DEFAULT)
12282 seg = DEFAULT_TLS_SEG_REG;
12283 else
12284 return 0;
12285 break;
12286
12287 case SUBREG:
12288 if (!REG_P (SUBREG_REG (op)))
12289 return 0;
12290 /* FALLTHRU */
12291
12292 case REG:
12293 if (!base)
12294 base = op;
12295 else if (!index)
12296 index = op;
12297 else
12298 return 0;
12299 break;
12300
12301 case CONST:
12302 case CONST_INT:
12303 case SYMBOL_REF:
12304 case LABEL_REF:
12305 if (disp)
12306 return 0;
12307 disp = op;
12308 break;
12309
12310 default:
12311 return 0;
12312 }
12313 }
12314 }
12315 else if (GET_CODE (addr) == MULT)
12316 {
12317 index = XEXP (addr, 0); /* index*scale */
12318 scale_rtx = XEXP (addr, 1);
12319 }
12320 else if (GET_CODE (addr) == ASHIFT)
12321 {
12322 /* We're called for lea too, which implements ashift on occasion. */
12323 index = XEXP (addr, 0);
12324 tmp = XEXP (addr, 1);
12325 if (!CONST_INT_P (tmp))
12326 return 0;
12327 scale = INTVAL (tmp);
12328 if ((unsigned HOST_WIDE_INT) scale > 3)
12329 return 0;
12330 scale = 1 << scale;
12331 retval = -1;
12332 }
12333 else
12334 disp = addr; /* displacement */
12335
12336 if (index)
12337 {
12338 if (REG_P (index))
12339 ;
12340 else if (GET_CODE (index) == SUBREG
12341 && REG_P (SUBREG_REG (index)))
12342 ;
12343 else
12344 return 0;
12345 }
12346
12347 /* Extract the integral value of scale. */
12348 if (scale_rtx)
12349 {
12350 if (!CONST_INT_P (scale_rtx))
12351 return 0;
12352 scale = INTVAL (scale_rtx);
12353 }
12354
12355 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12356 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12357
12358 /* Avoid useless 0 displacement. */
12359 if (disp == const0_rtx && (base || index))
12360 disp = NULL_RTX;
12361
12362 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12363 if (base_reg && index_reg && scale == 1
12364 && (index_reg == arg_pointer_rtx
12365 || index_reg == frame_pointer_rtx
12366 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12367 {
12368 rtx tmp;
12369 tmp = base, base = index, index = tmp;
12370 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12371 }
12372
12373 /* Special case: %ebp cannot be encoded as a base without a displacement.
12374 Similarly %r13. */
12375 if (!disp
12376 && base_reg
12377 && (base_reg == hard_frame_pointer_rtx
12378 || base_reg == frame_pointer_rtx
12379 || base_reg == arg_pointer_rtx
12380 || (REG_P (base_reg)
12381 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12382 || REGNO (base_reg) == R13_REG))))
12383 disp = const0_rtx;
12384
12385 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12386 Avoid this by transforming to [%esi+0].
12387 Reload calls address legitimization without cfun defined, so we need
12388 to test cfun for being non-NULL. */
12389 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12390 && base_reg && !index_reg && !disp
12391 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12392 disp = const0_rtx;
12393
12394 /* Special case: encode reg+reg instead of reg*2. */
12395 if (!base && index && scale == 2)
12396 base = index, base_reg = index_reg, scale = 1;
12397
12398 /* Special case: scaling cannot be encoded without base or displacement. */
12399 if (!base && !disp && index && scale != 1)
12400 disp = const0_rtx;
12401
12402 out->base = base;
12403 out->index = index;
12404 out->disp = disp;
12405 out->scale = scale;
12406 out->seg = seg;
12407
12408 return retval;
12409 }
12410 \f
12411 /* Return cost of the memory address x.
12412 For i386, it is better to use a complex address than let gcc copy
12413 the address into a reg and make a new pseudo. But not if the address
12414 requires to two regs - that would mean more pseudos with longer
12415 lifetimes. */
12416 static int
12417 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12418 {
12419 struct ix86_address parts;
12420 int cost = 1;
12421 int ok = ix86_decompose_address (x, &parts);
12422
12423 gcc_assert (ok);
12424
12425 if (parts.base && GET_CODE (parts.base) == SUBREG)
12426 parts.base = SUBREG_REG (parts.base);
12427 if (parts.index && GET_CODE (parts.index) == SUBREG)
12428 parts.index = SUBREG_REG (parts.index);
12429
12430 /* Attempt to minimize number of registers in the address. */
12431 if ((parts.base
12432 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12433 || (parts.index
12434 && (!REG_P (parts.index)
12435 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12436 cost++;
12437
12438 if (parts.base
12439 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12440 && parts.index
12441 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12442 && parts.base != parts.index)
12443 cost++;
12444
12445 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12446 since it's predecode logic can't detect the length of instructions
12447 and it degenerates to vector decoded. Increase cost of such
12448 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12449 to split such addresses or even refuse such addresses at all.
12450
12451 Following addressing modes are affected:
12452 [base+scale*index]
12453 [scale*index+disp]
12454 [base+index]
12455
12456 The first and last case may be avoidable by explicitly coding the zero in
12457 memory address, but I don't have AMD-K6 machine handy to check this
12458 theory. */
12459
12460 if (TARGET_K6
12461 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12462 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12463 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12464 cost += 10;
12465
12466 return cost;
12467 }
12468 \f
12469 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12470 this is used for to form addresses to local data when -fPIC is in
12471 use. */
12472
12473 static bool
12474 darwin_local_data_pic (rtx disp)
12475 {
12476 return (GET_CODE (disp) == UNSPEC
12477 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12478 }
12479
12480 /* Determine if a given RTX is a valid constant. We already know this
12481 satisfies CONSTANT_P. */
12482
12483 static bool
12484 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12485 {
12486 switch (GET_CODE (x))
12487 {
12488 case CONST:
12489 x = XEXP (x, 0);
12490
12491 if (GET_CODE (x) == PLUS)
12492 {
12493 if (!CONST_INT_P (XEXP (x, 1)))
12494 return false;
12495 x = XEXP (x, 0);
12496 }
12497
12498 if (TARGET_MACHO && darwin_local_data_pic (x))
12499 return true;
12500
12501 /* Only some unspecs are valid as "constants". */
12502 if (GET_CODE (x) == UNSPEC)
12503 switch (XINT (x, 1))
12504 {
12505 case UNSPEC_GOT:
12506 case UNSPEC_GOTOFF:
12507 case UNSPEC_PLTOFF:
12508 return TARGET_64BIT;
12509 case UNSPEC_TPOFF:
12510 case UNSPEC_NTPOFF:
12511 x = XVECEXP (x, 0, 0);
12512 return (GET_CODE (x) == SYMBOL_REF
12513 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12514 case UNSPEC_DTPOFF:
12515 x = XVECEXP (x, 0, 0);
12516 return (GET_CODE (x) == SYMBOL_REF
12517 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12518 default:
12519 return false;
12520 }
12521
12522 /* We must have drilled down to a symbol. */
12523 if (GET_CODE (x) == LABEL_REF)
12524 return true;
12525 if (GET_CODE (x) != SYMBOL_REF)
12526 return false;
12527 /* FALLTHRU */
12528
12529 case SYMBOL_REF:
12530 /* TLS symbols are never valid. */
12531 if (SYMBOL_REF_TLS_MODEL (x))
12532 return false;
12533
12534 /* DLLIMPORT symbols are never valid. */
12535 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12536 && SYMBOL_REF_DLLIMPORT_P (x))
12537 return false;
12538
12539 #if TARGET_MACHO
12540 /* mdynamic-no-pic */
12541 if (MACHO_DYNAMIC_NO_PIC_P)
12542 return machopic_symbol_defined_p (x);
12543 #endif
12544 break;
12545
12546 case CONST_DOUBLE:
12547 if (GET_MODE (x) == TImode
12548 && x != CONST0_RTX (TImode)
12549 && !TARGET_64BIT)
12550 return false;
12551 break;
12552
12553 case CONST_VECTOR:
12554 if (!standard_sse_constant_p (x))
12555 return false;
12556
12557 default:
12558 break;
12559 }
12560
12561 /* Otherwise we handle everything else in the move patterns. */
12562 return true;
12563 }
12564
12565 /* Determine if it's legal to put X into the constant pool. This
12566 is not possible for the address of thread-local symbols, which
12567 is checked above. */
12568
12569 static bool
12570 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12571 {
12572 /* We can always put integral constants and vectors in memory. */
12573 switch (GET_CODE (x))
12574 {
12575 case CONST_INT:
12576 case CONST_DOUBLE:
12577 case CONST_VECTOR:
12578 return false;
12579
12580 default:
12581 break;
12582 }
12583 return !ix86_legitimate_constant_p (mode, x);
12584 }
12585
12586 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12587 otherwise zero. */
12588
12589 static bool
12590 is_imported_p (rtx x)
12591 {
12592 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12593 || GET_CODE (x) != SYMBOL_REF)
12594 return false;
12595
12596 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12597 }
12598
12599
12600 /* Nonzero if the constant value X is a legitimate general operand
12601 when generating PIC code. It is given that flag_pic is on and
12602 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12603
12604 bool
12605 legitimate_pic_operand_p (rtx x)
12606 {
12607 rtx inner;
12608
12609 switch (GET_CODE (x))
12610 {
12611 case CONST:
12612 inner = XEXP (x, 0);
12613 if (GET_CODE (inner) == PLUS
12614 && CONST_INT_P (XEXP (inner, 1)))
12615 inner = XEXP (inner, 0);
12616
12617 /* Only some unspecs are valid as "constants". */
12618 if (GET_CODE (inner) == UNSPEC)
12619 switch (XINT (inner, 1))
12620 {
12621 case UNSPEC_GOT:
12622 case UNSPEC_GOTOFF:
12623 case UNSPEC_PLTOFF:
12624 return TARGET_64BIT;
12625 case UNSPEC_TPOFF:
12626 x = XVECEXP (inner, 0, 0);
12627 return (GET_CODE (x) == SYMBOL_REF
12628 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12629 case UNSPEC_MACHOPIC_OFFSET:
12630 return legitimate_pic_address_disp_p (x);
12631 default:
12632 return false;
12633 }
12634 /* FALLTHRU */
12635
12636 case SYMBOL_REF:
12637 case LABEL_REF:
12638 return legitimate_pic_address_disp_p (x);
12639
12640 default:
12641 return true;
12642 }
12643 }
12644
12645 /* Determine if a given CONST RTX is a valid memory displacement
12646 in PIC mode. */
12647
12648 bool
12649 legitimate_pic_address_disp_p (rtx disp)
12650 {
12651 bool saw_plus;
12652
12653 /* In 64bit mode we can allow direct addresses of symbols and labels
12654 when they are not dynamic symbols. */
12655 if (TARGET_64BIT)
12656 {
12657 rtx op0 = disp, op1;
12658
12659 switch (GET_CODE (disp))
12660 {
12661 case LABEL_REF:
12662 return true;
12663
12664 case CONST:
12665 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12666 break;
12667 op0 = XEXP (XEXP (disp, 0), 0);
12668 op1 = XEXP (XEXP (disp, 0), 1);
12669 if (!CONST_INT_P (op1)
12670 || INTVAL (op1) >= 16*1024*1024
12671 || INTVAL (op1) < -16*1024*1024)
12672 break;
12673 if (GET_CODE (op0) == LABEL_REF)
12674 return true;
12675 if (GET_CODE (op0) == CONST
12676 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12677 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12678 return true;
12679 if (GET_CODE (op0) == UNSPEC
12680 && XINT (op0, 1) == UNSPEC_PCREL)
12681 return true;
12682 if (GET_CODE (op0) != SYMBOL_REF)
12683 break;
12684 /* FALLTHRU */
12685
12686 case SYMBOL_REF:
12687 /* TLS references should always be enclosed in UNSPEC.
12688 The dllimported symbol needs always to be resolved. */
12689 if (SYMBOL_REF_TLS_MODEL (op0)
12690 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12691 return false;
12692
12693 if (TARGET_PECOFF)
12694 {
12695 if (is_imported_p (op0))
12696 return true;
12697
12698 if (SYMBOL_REF_FAR_ADDR_P (op0)
12699 || !SYMBOL_REF_LOCAL_P (op0))
12700 break;
12701
12702 /* Function-symbols need to be resolved only for
12703 large-model.
12704 For the small-model we don't need to resolve anything
12705 here. */
12706 if ((ix86_cmodel != CM_LARGE_PIC
12707 && SYMBOL_REF_FUNCTION_P (op0))
12708 || ix86_cmodel == CM_SMALL_PIC)
12709 return true;
12710 /* Non-external symbols don't need to be resolved for
12711 large, and medium-model. */
12712 if ((ix86_cmodel == CM_LARGE_PIC
12713 || ix86_cmodel == CM_MEDIUM_PIC)
12714 && !SYMBOL_REF_EXTERNAL_P (op0))
12715 return true;
12716 }
12717 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12718 && SYMBOL_REF_LOCAL_P (op0)
12719 && ix86_cmodel != CM_LARGE_PIC)
12720 return true;
12721 break;
12722
12723 default:
12724 break;
12725 }
12726 }
12727 if (GET_CODE (disp) != CONST)
12728 return false;
12729 disp = XEXP (disp, 0);
12730
12731 if (TARGET_64BIT)
12732 {
12733 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12734 of GOT tables. We should not need these anyway. */
12735 if (GET_CODE (disp) != UNSPEC
12736 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12737 && XINT (disp, 1) != UNSPEC_GOTOFF
12738 && XINT (disp, 1) != UNSPEC_PCREL
12739 && XINT (disp, 1) != UNSPEC_PLTOFF))
12740 return false;
12741
12742 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12743 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12744 return false;
12745 return true;
12746 }
12747
12748 saw_plus = false;
12749 if (GET_CODE (disp) == PLUS)
12750 {
12751 if (!CONST_INT_P (XEXP (disp, 1)))
12752 return false;
12753 disp = XEXP (disp, 0);
12754 saw_plus = true;
12755 }
12756
12757 if (TARGET_MACHO && darwin_local_data_pic (disp))
12758 return true;
12759
12760 if (GET_CODE (disp) != UNSPEC)
12761 return false;
12762
12763 switch (XINT (disp, 1))
12764 {
12765 case UNSPEC_GOT:
12766 if (saw_plus)
12767 return false;
12768 /* We need to check for both symbols and labels because VxWorks loads
12769 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12770 details. */
12771 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12772 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12773 case UNSPEC_GOTOFF:
12774 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12775 While ABI specify also 32bit relocation but we don't produce it in
12776 small PIC model at all. */
12777 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12778 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12779 && !TARGET_64BIT)
12780 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12781 return false;
12782 case UNSPEC_GOTTPOFF:
12783 case UNSPEC_GOTNTPOFF:
12784 case UNSPEC_INDNTPOFF:
12785 if (saw_plus)
12786 return false;
12787 disp = XVECEXP (disp, 0, 0);
12788 return (GET_CODE (disp) == SYMBOL_REF
12789 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12790 case UNSPEC_NTPOFF:
12791 disp = XVECEXP (disp, 0, 0);
12792 return (GET_CODE (disp) == SYMBOL_REF
12793 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12794 case UNSPEC_DTPOFF:
12795 disp = XVECEXP (disp, 0, 0);
12796 return (GET_CODE (disp) == SYMBOL_REF
12797 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12798 }
12799
12800 return false;
12801 }
12802
12803 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12804 replace the input X, or the original X if no replacement is called for.
12805 The output parameter *WIN is 1 if the calling macro should goto WIN,
12806 0 if it should not. */
12807
12808 bool
12809 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12810 int)
12811 {
12812 /* Reload can generate:
12813
12814 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12815 (reg:DI 97))
12816 (reg:DI 2 cx))
12817
12818 This RTX is rejected from ix86_legitimate_address_p due to
12819 non-strictness of base register 97. Following this rejection,
12820 reload pushes all three components into separate registers,
12821 creating invalid memory address RTX.
12822
12823 Following code reloads only the invalid part of the
12824 memory address RTX. */
12825
12826 if (GET_CODE (x) == PLUS
12827 && REG_P (XEXP (x, 1))
12828 && GET_CODE (XEXP (x, 0)) == PLUS
12829 && REG_P (XEXP (XEXP (x, 0), 1)))
12830 {
12831 rtx base, index;
12832 bool something_reloaded = false;
12833
12834 base = XEXP (XEXP (x, 0), 1);
12835 if (!REG_OK_FOR_BASE_STRICT_P (base))
12836 {
12837 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12838 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12839 opnum, (enum reload_type) type);
12840 something_reloaded = true;
12841 }
12842
12843 index = XEXP (x, 1);
12844 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12845 {
12846 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12847 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12848 opnum, (enum reload_type) type);
12849 something_reloaded = true;
12850 }
12851
12852 gcc_assert (something_reloaded);
12853 return true;
12854 }
12855
12856 return false;
12857 }
12858
12859 /* Determine if op is suitable RTX for an address register.
12860 Return naked register if a register or a register subreg is
12861 found, otherwise return NULL_RTX. */
12862
12863 static rtx
12864 ix86_validate_address_register (rtx op)
12865 {
12866 enum machine_mode mode = GET_MODE (op);
12867
12868 /* Only SImode or DImode registers can form the address. */
12869 if (mode != SImode && mode != DImode)
12870 return NULL_RTX;
12871
12872 if (REG_P (op))
12873 return op;
12874 else if (GET_CODE (op) == SUBREG)
12875 {
12876 rtx reg = SUBREG_REG (op);
12877
12878 if (!REG_P (reg))
12879 return NULL_RTX;
12880
12881 mode = GET_MODE (reg);
12882
12883 /* Don't allow SUBREGs that span more than a word. It can
12884 lead to spill failures when the register is one word out
12885 of a two word structure. */
12886 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12887 return NULL_RTX;
12888
12889 /* Allow only SUBREGs of non-eliminable hard registers. */
12890 if (register_no_elim_operand (reg, mode))
12891 return reg;
12892 }
12893
12894 /* Op is not a register. */
12895 return NULL_RTX;
12896 }
12897
12898 /* Recognizes RTL expressions that are valid memory addresses for an
12899 instruction. The MODE argument is the machine mode for the MEM
12900 expression that wants to use this address.
12901
12902 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12903 convert common non-canonical forms to canonical form so that they will
12904 be recognized. */
12905
12906 static bool
12907 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12908 {
12909 struct ix86_address parts;
12910 rtx base, index, disp;
12911 HOST_WIDE_INT scale;
12912 enum ix86_address_seg seg;
12913
12914 if (ix86_decompose_address (addr, &parts) <= 0)
12915 /* Decomposition failed. */
12916 return false;
12917
12918 base = parts.base;
12919 index = parts.index;
12920 disp = parts.disp;
12921 scale = parts.scale;
12922 seg = parts.seg;
12923
12924 /* Validate base register. */
12925 if (base)
12926 {
12927 rtx reg = ix86_validate_address_register (base);
12928
12929 if (reg == NULL_RTX)
12930 return false;
12931
12932 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12933 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12934 /* Base is not valid. */
12935 return false;
12936 }
12937
12938 /* Validate index register. */
12939 if (index)
12940 {
12941 rtx reg = ix86_validate_address_register (index);
12942
12943 if (reg == NULL_RTX)
12944 return false;
12945
12946 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12947 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12948 /* Index is not valid. */
12949 return false;
12950 }
12951
12952 /* Index and base should have the same mode. */
12953 if (base && index
12954 && GET_MODE (base) != GET_MODE (index))
12955 return false;
12956
12957 /* Address override works only on the (%reg) part of %fs:(%reg). */
12958 if (seg != SEG_DEFAULT
12959 && ((base && GET_MODE (base) != word_mode)
12960 || (index && GET_MODE (index) != word_mode)))
12961 return false;
12962
12963 /* Validate scale factor. */
12964 if (scale != 1)
12965 {
12966 if (!index)
12967 /* Scale without index. */
12968 return false;
12969
12970 if (scale != 2 && scale != 4 && scale != 8)
12971 /* Scale is not a valid multiplier. */
12972 return false;
12973 }
12974
12975 /* Validate displacement. */
12976 if (disp)
12977 {
12978 if (GET_CODE (disp) == CONST
12979 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12980 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12981 switch (XINT (XEXP (disp, 0), 1))
12982 {
12983 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12984 used. While ABI specify also 32bit relocations, we don't produce
12985 them at all and use IP relative instead. */
12986 case UNSPEC_GOT:
12987 case UNSPEC_GOTOFF:
12988 gcc_assert (flag_pic);
12989 if (!TARGET_64BIT)
12990 goto is_legitimate_pic;
12991
12992 /* 64bit address unspec. */
12993 return false;
12994
12995 case UNSPEC_GOTPCREL:
12996 case UNSPEC_PCREL:
12997 gcc_assert (flag_pic);
12998 goto is_legitimate_pic;
12999
13000 case UNSPEC_GOTTPOFF:
13001 case UNSPEC_GOTNTPOFF:
13002 case UNSPEC_INDNTPOFF:
13003 case UNSPEC_NTPOFF:
13004 case UNSPEC_DTPOFF:
13005 break;
13006
13007 case UNSPEC_STACK_CHECK:
13008 gcc_assert (flag_split_stack);
13009 break;
13010
13011 default:
13012 /* Invalid address unspec. */
13013 return false;
13014 }
13015
13016 else if (SYMBOLIC_CONST (disp)
13017 && (flag_pic
13018 || (TARGET_MACHO
13019 #if TARGET_MACHO
13020 && MACHOPIC_INDIRECT
13021 && !machopic_operand_p (disp)
13022 #endif
13023 )))
13024 {
13025
13026 is_legitimate_pic:
13027 if (TARGET_64BIT && (index || base))
13028 {
13029 /* foo@dtpoff(%rX) is ok. */
13030 if (GET_CODE (disp) != CONST
13031 || GET_CODE (XEXP (disp, 0)) != PLUS
13032 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13033 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13034 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13035 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13036 /* Non-constant pic memory reference. */
13037 return false;
13038 }
13039 else if ((!TARGET_MACHO || flag_pic)
13040 && ! legitimate_pic_address_disp_p (disp))
13041 /* Displacement is an invalid pic construct. */
13042 return false;
13043 #if TARGET_MACHO
13044 else if (MACHO_DYNAMIC_NO_PIC_P
13045 && !ix86_legitimate_constant_p (Pmode, disp))
13046 /* displacment must be referenced via non_lazy_pointer */
13047 return false;
13048 #endif
13049
13050 /* This code used to verify that a symbolic pic displacement
13051 includes the pic_offset_table_rtx register.
13052
13053 While this is good idea, unfortunately these constructs may
13054 be created by "adds using lea" optimization for incorrect
13055 code like:
13056
13057 int a;
13058 int foo(int i)
13059 {
13060 return *(&a+i);
13061 }
13062
13063 This code is nonsensical, but results in addressing
13064 GOT table with pic_offset_table_rtx base. We can't
13065 just refuse it easily, since it gets matched by
13066 "addsi3" pattern, that later gets split to lea in the
13067 case output register differs from input. While this
13068 can be handled by separate addsi pattern for this case
13069 that never results in lea, this seems to be easier and
13070 correct fix for crash to disable this test. */
13071 }
13072 else if (GET_CODE (disp) != LABEL_REF
13073 && !CONST_INT_P (disp)
13074 && (GET_CODE (disp) != CONST
13075 || !ix86_legitimate_constant_p (Pmode, disp))
13076 && (GET_CODE (disp) != SYMBOL_REF
13077 || !ix86_legitimate_constant_p (Pmode, disp)))
13078 /* Displacement is not constant. */
13079 return false;
13080 else if (TARGET_64BIT
13081 && !x86_64_immediate_operand (disp, VOIDmode))
13082 /* Displacement is out of range. */
13083 return false;
13084 /* In x32 mode, constant addresses are sign extended to 64bit, so
13085 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13086 else if (TARGET_X32 && !(index || base)
13087 && CONST_INT_P (disp)
13088 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13089 return false;
13090 }
13091
13092 /* Everything looks valid. */
13093 return true;
13094 }
13095
13096 /* Determine if a given RTX is a valid constant address. */
13097
13098 bool
13099 constant_address_p (rtx x)
13100 {
13101 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13102 }
13103 \f
13104 /* Return a unique alias set for the GOT. */
13105
13106 static alias_set_type
13107 ix86_GOT_alias_set (void)
13108 {
13109 static alias_set_type set = -1;
13110 if (set == -1)
13111 set = new_alias_set ();
13112 return set;
13113 }
13114
13115 /* Return a legitimate reference for ORIG (an address) using the
13116 register REG. If REG is 0, a new pseudo is generated.
13117
13118 There are two types of references that must be handled:
13119
13120 1. Global data references must load the address from the GOT, via
13121 the PIC reg. An insn is emitted to do this load, and the reg is
13122 returned.
13123
13124 2. Static data references, constant pool addresses, and code labels
13125 compute the address as an offset from the GOT, whose base is in
13126 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13127 differentiate them from global data objects. The returned
13128 address is the PIC reg + an unspec constant.
13129
13130 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13131 reg also appears in the address. */
13132
13133 static rtx
13134 legitimize_pic_address (rtx orig, rtx reg)
13135 {
13136 rtx addr = orig;
13137 rtx new_rtx = orig;
13138
13139 #if TARGET_MACHO
13140 if (TARGET_MACHO && !TARGET_64BIT)
13141 {
13142 if (reg == 0)
13143 reg = gen_reg_rtx (Pmode);
13144 /* Use the generic Mach-O PIC machinery. */
13145 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13146 }
13147 #endif
13148
13149 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13150 {
13151 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13152 if (tmp)
13153 return tmp;
13154 }
13155
13156 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13157 new_rtx = addr;
13158 else if (TARGET_64BIT && !TARGET_PECOFF
13159 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13160 {
13161 rtx tmpreg;
13162 /* This symbol may be referenced via a displacement from the PIC
13163 base address (@GOTOFF). */
13164
13165 if (reload_in_progress)
13166 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13167 if (GET_CODE (addr) == CONST)
13168 addr = XEXP (addr, 0);
13169 if (GET_CODE (addr) == PLUS)
13170 {
13171 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13172 UNSPEC_GOTOFF);
13173 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13174 }
13175 else
13176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13177 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13178 if (!reg)
13179 tmpreg = gen_reg_rtx (Pmode);
13180 else
13181 tmpreg = reg;
13182 emit_move_insn (tmpreg, new_rtx);
13183
13184 if (reg != 0)
13185 {
13186 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13187 tmpreg, 1, OPTAB_DIRECT);
13188 new_rtx = reg;
13189 }
13190 else
13191 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13192 }
13193 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13194 {
13195 /* This symbol may be referenced via a displacement from the PIC
13196 base address (@GOTOFF). */
13197
13198 if (reload_in_progress)
13199 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13200 if (GET_CODE (addr) == CONST)
13201 addr = XEXP (addr, 0);
13202 if (GET_CODE (addr) == PLUS)
13203 {
13204 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13205 UNSPEC_GOTOFF);
13206 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13207 }
13208 else
13209 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13210 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13211 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13212
13213 if (reg != 0)
13214 {
13215 emit_move_insn (reg, new_rtx);
13216 new_rtx = reg;
13217 }
13218 }
13219 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13220 /* We can't use @GOTOFF for text labels on VxWorks;
13221 see gotoff_operand. */
13222 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13223 {
13224 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13225 if (tmp)
13226 return tmp;
13227
13228 /* For x64 PE-COFF there is no GOT table. So we use address
13229 directly. */
13230 if (TARGET_64BIT && TARGET_PECOFF)
13231 {
13232 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13233 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13234
13235 if (reg == 0)
13236 reg = gen_reg_rtx (Pmode);
13237 emit_move_insn (reg, new_rtx);
13238 new_rtx = reg;
13239 }
13240 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13241 {
13242 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13243 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13244 new_rtx = gen_const_mem (Pmode, new_rtx);
13245 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13246
13247 if (reg == 0)
13248 reg = gen_reg_rtx (Pmode);
13249 /* Use directly gen_movsi, otherwise the address is loaded
13250 into register for CSE. We don't want to CSE this addresses,
13251 instead we CSE addresses from the GOT table, so skip this. */
13252 emit_insn (gen_movsi (reg, new_rtx));
13253 new_rtx = reg;
13254 }
13255 else
13256 {
13257 /* This symbol must be referenced via a load from the
13258 Global Offset Table (@GOT). */
13259
13260 if (reload_in_progress)
13261 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13262 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13263 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13264 if (TARGET_64BIT)
13265 new_rtx = force_reg (Pmode, new_rtx);
13266 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13267 new_rtx = gen_const_mem (Pmode, new_rtx);
13268 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13269
13270 if (reg == 0)
13271 reg = gen_reg_rtx (Pmode);
13272 emit_move_insn (reg, new_rtx);
13273 new_rtx = reg;
13274 }
13275 }
13276 else
13277 {
13278 if (CONST_INT_P (addr)
13279 && !x86_64_immediate_operand (addr, VOIDmode))
13280 {
13281 if (reg)
13282 {
13283 emit_move_insn (reg, addr);
13284 new_rtx = reg;
13285 }
13286 else
13287 new_rtx = force_reg (Pmode, addr);
13288 }
13289 else if (GET_CODE (addr) == CONST)
13290 {
13291 addr = XEXP (addr, 0);
13292
13293 /* We must match stuff we generate before. Assume the only
13294 unspecs that can get here are ours. Not that we could do
13295 anything with them anyway.... */
13296 if (GET_CODE (addr) == UNSPEC
13297 || (GET_CODE (addr) == PLUS
13298 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13299 return orig;
13300 gcc_assert (GET_CODE (addr) == PLUS);
13301 }
13302 if (GET_CODE (addr) == PLUS)
13303 {
13304 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13305
13306 /* Check first to see if this is a constant offset from a @GOTOFF
13307 symbol reference. */
13308 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13309 && CONST_INT_P (op1))
13310 {
13311 if (!TARGET_64BIT)
13312 {
13313 if (reload_in_progress)
13314 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13315 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13316 UNSPEC_GOTOFF);
13317 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13318 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13319 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13320
13321 if (reg != 0)
13322 {
13323 emit_move_insn (reg, new_rtx);
13324 new_rtx = reg;
13325 }
13326 }
13327 else
13328 {
13329 if (INTVAL (op1) < -16*1024*1024
13330 || INTVAL (op1) >= 16*1024*1024)
13331 {
13332 if (!x86_64_immediate_operand (op1, Pmode))
13333 op1 = force_reg (Pmode, op1);
13334 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13335 }
13336 }
13337 }
13338 else
13339 {
13340 rtx base = legitimize_pic_address (op0, reg);
13341 enum machine_mode mode = GET_MODE (base);
13342 new_rtx
13343 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13344
13345 if (CONST_INT_P (new_rtx))
13346 {
13347 if (INTVAL (new_rtx) < -16*1024*1024
13348 || INTVAL (new_rtx) >= 16*1024*1024)
13349 {
13350 if (!x86_64_immediate_operand (new_rtx, mode))
13351 new_rtx = force_reg (mode, new_rtx);
13352 new_rtx
13353 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13354 }
13355 else
13356 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13357 }
13358 else
13359 {
13360 if (GET_CODE (new_rtx) == PLUS
13361 && CONSTANT_P (XEXP (new_rtx, 1)))
13362 {
13363 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13364 new_rtx = XEXP (new_rtx, 1);
13365 }
13366 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13367 }
13368 }
13369 }
13370 }
13371 return new_rtx;
13372 }
13373 \f
13374 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13375
13376 static rtx
13377 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13378 {
13379 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13380
13381 if (GET_MODE (tp) != tp_mode)
13382 {
13383 gcc_assert (GET_MODE (tp) == SImode);
13384 gcc_assert (tp_mode == DImode);
13385
13386 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13387 }
13388
13389 if (to_reg)
13390 tp = copy_to_mode_reg (tp_mode, tp);
13391
13392 return tp;
13393 }
13394
13395 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13396
13397 static GTY(()) rtx ix86_tls_symbol;
13398
13399 static rtx
13400 ix86_tls_get_addr (void)
13401 {
13402 if (!ix86_tls_symbol)
13403 {
13404 const char *sym
13405 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13406 ? "___tls_get_addr" : "__tls_get_addr");
13407
13408 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13409 }
13410
13411 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13412 {
13413 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13414 UNSPEC_PLTOFF);
13415 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13416 gen_rtx_CONST (Pmode, unspec));
13417 }
13418
13419 return ix86_tls_symbol;
13420 }
13421
13422 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13423
13424 static GTY(()) rtx ix86_tls_module_base_symbol;
13425
13426 rtx
13427 ix86_tls_module_base (void)
13428 {
13429 if (!ix86_tls_module_base_symbol)
13430 {
13431 ix86_tls_module_base_symbol
13432 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13433
13434 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13435 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13436 }
13437
13438 return ix86_tls_module_base_symbol;
13439 }
13440
13441 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13442 false if we expect this to be used for a memory address and true if
13443 we expect to load the address into a register. */
13444
13445 static rtx
13446 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13447 {
13448 rtx dest, base, off;
13449 rtx pic = NULL_RTX, tp = NULL_RTX;
13450 enum machine_mode tp_mode = Pmode;
13451 int type;
13452
13453 /* Fall back to global dynamic model if tool chain cannot support local
13454 dynamic. */
13455 if (TARGET_SUN_TLS && !TARGET_64BIT
13456 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13457 && model == TLS_MODEL_LOCAL_DYNAMIC)
13458 model = TLS_MODEL_GLOBAL_DYNAMIC;
13459
13460 switch (model)
13461 {
13462 case TLS_MODEL_GLOBAL_DYNAMIC:
13463 dest = gen_reg_rtx (Pmode);
13464
13465 if (!TARGET_64BIT)
13466 {
13467 if (flag_pic && !TARGET_PECOFF)
13468 pic = pic_offset_table_rtx;
13469 else
13470 {
13471 pic = gen_reg_rtx (Pmode);
13472 emit_insn (gen_set_got (pic));
13473 }
13474 }
13475
13476 if (TARGET_GNU2_TLS)
13477 {
13478 if (TARGET_64BIT)
13479 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13480 else
13481 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13482
13483 tp = get_thread_pointer (Pmode, true);
13484 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13485
13486 if (GET_MODE (x) != Pmode)
13487 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13488
13489 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13490 }
13491 else
13492 {
13493 rtx caddr = ix86_tls_get_addr ();
13494
13495 if (TARGET_64BIT)
13496 {
13497 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13498 rtx_insn *insns;
13499
13500 start_sequence ();
13501 emit_call_insn
13502 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13503 insns = get_insns ();
13504 end_sequence ();
13505
13506 if (GET_MODE (x) != Pmode)
13507 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13508
13509 RTL_CONST_CALL_P (insns) = 1;
13510 emit_libcall_block (insns, dest, rax, x);
13511 }
13512 else
13513 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13514 }
13515 break;
13516
13517 case TLS_MODEL_LOCAL_DYNAMIC:
13518 base = gen_reg_rtx (Pmode);
13519
13520 if (!TARGET_64BIT)
13521 {
13522 if (flag_pic)
13523 pic = pic_offset_table_rtx;
13524 else
13525 {
13526 pic = gen_reg_rtx (Pmode);
13527 emit_insn (gen_set_got (pic));
13528 }
13529 }
13530
13531 if (TARGET_GNU2_TLS)
13532 {
13533 rtx tmp = ix86_tls_module_base ();
13534
13535 if (TARGET_64BIT)
13536 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13537 else
13538 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13539
13540 tp = get_thread_pointer (Pmode, true);
13541 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13542 gen_rtx_MINUS (Pmode, tmp, tp));
13543 }
13544 else
13545 {
13546 rtx caddr = ix86_tls_get_addr ();
13547
13548 if (TARGET_64BIT)
13549 {
13550 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13551 rtx_insn *insns;
13552 rtx eqv;
13553
13554 start_sequence ();
13555 emit_call_insn
13556 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13557 insns = get_insns ();
13558 end_sequence ();
13559
13560 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13561 share the LD_BASE result with other LD model accesses. */
13562 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13563 UNSPEC_TLS_LD_BASE);
13564
13565 RTL_CONST_CALL_P (insns) = 1;
13566 emit_libcall_block (insns, base, rax, eqv);
13567 }
13568 else
13569 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13570 }
13571
13572 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13573 off = gen_rtx_CONST (Pmode, off);
13574
13575 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13576
13577 if (TARGET_GNU2_TLS)
13578 {
13579 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13580
13581 if (GET_MODE (x) != Pmode)
13582 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13583
13584 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13585 }
13586 break;
13587
13588 case TLS_MODEL_INITIAL_EXEC:
13589 if (TARGET_64BIT)
13590 {
13591 if (TARGET_SUN_TLS && !TARGET_X32)
13592 {
13593 /* The Sun linker took the AMD64 TLS spec literally
13594 and can only handle %rax as destination of the
13595 initial executable code sequence. */
13596
13597 dest = gen_reg_rtx (DImode);
13598 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13599 return dest;
13600 }
13601
13602 /* Generate DImode references to avoid %fs:(%reg32)
13603 problems and linker IE->LE relaxation bug. */
13604 tp_mode = DImode;
13605 pic = NULL;
13606 type = UNSPEC_GOTNTPOFF;
13607 }
13608 else if (flag_pic)
13609 {
13610 if (reload_in_progress)
13611 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13612 pic = pic_offset_table_rtx;
13613 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13614 }
13615 else if (!TARGET_ANY_GNU_TLS)
13616 {
13617 pic = gen_reg_rtx (Pmode);
13618 emit_insn (gen_set_got (pic));
13619 type = UNSPEC_GOTTPOFF;
13620 }
13621 else
13622 {
13623 pic = NULL;
13624 type = UNSPEC_INDNTPOFF;
13625 }
13626
13627 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13628 off = gen_rtx_CONST (tp_mode, off);
13629 if (pic)
13630 off = gen_rtx_PLUS (tp_mode, pic, off);
13631 off = gen_const_mem (tp_mode, off);
13632 set_mem_alias_set (off, ix86_GOT_alias_set ());
13633
13634 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13635 {
13636 base = get_thread_pointer (tp_mode,
13637 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13638 off = force_reg (tp_mode, off);
13639 return gen_rtx_PLUS (tp_mode, base, off);
13640 }
13641 else
13642 {
13643 base = get_thread_pointer (Pmode, true);
13644 dest = gen_reg_rtx (Pmode);
13645 emit_insn (ix86_gen_sub3 (dest, base, off));
13646 }
13647 break;
13648
13649 case TLS_MODEL_LOCAL_EXEC:
13650 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13651 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13652 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13653 off = gen_rtx_CONST (Pmode, off);
13654
13655 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13656 {
13657 base = get_thread_pointer (Pmode,
13658 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13659 return gen_rtx_PLUS (Pmode, base, off);
13660 }
13661 else
13662 {
13663 base = get_thread_pointer (Pmode, true);
13664 dest = gen_reg_rtx (Pmode);
13665 emit_insn (ix86_gen_sub3 (dest, base, off));
13666 }
13667 break;
13668
13669 default:
13670 gcc_unreachable ();
13671 }
13672
13673 return dest;
13674 }
13675
13676 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13677 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13678 unique refptr-DECL symbol corresponding to symbol DECL. */
13679
13680 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13681 htab_t dllimport_map;
13682
13683 static tree
13684 get_dllimport_decl (tree decl, bool beimport)
13685 {
13686 struct tree_map *h, in;
13687 void **loc;
13688 const char *name;
13689 const char *prefix;
13690 size_t namelen, prefixlen;
13691 char *imp_name;
13692 tree to;
13693 rtx rtl;
13694
13695 if (!dllimport_map)
13696 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13697
13698 in.hash = htab_hash_pointer (decl);
13699 in.base.from = decl;
13700 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13701 h = (struct tree_map *) *loc;
13702 if (h)
13703 return h->to;
13704
13705 *loc = h = ggc_alloc<tree_map> ();
13706 h->hash = in.hash;
13707 h->base.from = decl;
13708 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13709 VAR_DECL, NULL, ptr_type_node);
13710 DECL_ARTIFICIAL (to) = 1;
13711 DECL_IGNORED_P (to) = 1;
13712 DECL_EXTERNAL (to) = 1;
13713 TREE_READONLY (to) = 1;
13714
13715 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13716 name = targetm.strip_name_encoding (name);
13717 if (beimport)
13718 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13719 ? "*__imp_" : "*__imp__";
13720 else
13721 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13722 namelen = strlen (name);
13723 prefixlen = strlen (prefix);
13724 imp_name = (char *) alloca (namelen + prefixlen + 1);
13725 memcpy (imp_name, prefix, prefixlen);
13726 memcpy (imp_name + prefixlen, name, namelen + 1);
13727
13728 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13729 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13730 SET_SYMBOL_REF_DECL (rtl, to);
13731 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13732 if (!beimport)
13733 {
13734 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13735 #ifdef SUB_TARGET_RECORD_STUB
13736 SUB_TARGET_RECORD_STUB (name);
13737 #endif
13738 }
13739
13740 rtl = gen_const_mem (Pmode, rtl);
13741 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13742
13743 SET_DECL_RTL (to, rtl);
13744 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13745
13746 return to;
13747 }
13748
13749 /* Expand SYMBOL into its corresponding far-addresse symbol.
13750 WANT_REG is true if we require the result be a register. */
13751
13752 static rtx
13753 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13754 {
13755 tree imp_decl;
13756 rtx x;
13757
13758 gcc_assert (SYMBOL_REF_DECL (symbol));
13759 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13760
13761 x = DECL_RTL (imp_decl);
13762 if (want_reg)
13763 x = force_reg (Pmode, x);
13764 return x;
13765 }
13766
13767 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13768 true if we require the result be a register. */
13769
13770 static rtx
13771 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13772 {
13773 tree imp_decl;
13774 rtx x;
13775
13776 gcc_assert (SYMBOL_REF_DECL (symbol));
13777 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13778
13779 x = DECL_RTL (imp_decl);
13780 if (want_reg)
13781 x = force_reg (Pmode, x);
13782 return x;
13783 }
13784
13785 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13786 is true if we require the result be a register. */
13787
13788 static rtx
13789 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13790 {
13791 if (!TARGET_PECOFF)
13792 return NULL_RTX;
13793
13794 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13795 {
13796 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13797 return legitimize_dllimport_symbol (addr, inreg);
13798 if (GET_CODE (addr) == CONST
13799 && GET_CODE (XEXP (addr, 0)) == PLUS
13800 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13801 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13802 {
13803 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13804 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13805 }
13806 }
13807
13808 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13809 return NULL_RTX;
13810 if (GET_CODE (addr) == SYMBOL_REF
13811 && !is_imported_p (addr)
13812 && SYMBOL_REF_EXTERNAL_P (addr)
13813 && SYMBOL_REF_DECL (addr))
13814 return legitimize_pe_coff_extern_decl (addr, inreg);
13815
13816 if (GET_CODE (addr) == CONST
13817 && GET_CODE (XEXP (addr, 0)) == PLUS
13818 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13819 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13820 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13821 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13822 {
13823 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13824 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13825 }
13826 return NULL_RTX;
13827 }
13828
13829 /* Try machine-dependent ways of modifying an illegitimate address
13830 to be legitimate. If we find one, return the new, valid address.
13831 This macro is used in only one place: `memory_address' in explow.c.
13832
13833 OLDX is the address as it was before break_out_memory_refs was called.
13834 In some cases it is useful to look at this to decide what needs to be done.
13835
13836 It is always safe for this macro to do nothing. It exists to recognize
13837 opportunities to optimize the output.
13838
13839 For the 80386, we handle X+REG by loading X into a register R and
13840 using R+REG. R will go in a general reg and indexing will be used.
13841 However, if REG is a broken-out memory address or multiplication,
13842 nothing needs to be done because REG can certainly go in a general reg.
13843
13844 When -fpic is used, special handling is needed for symbolic references.
13845 See comments by legitimize_pic_address in i386.c for details. */
13846
13847 static rtx
13848 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13849 {
13850 int changed = 0;
13851 unsigned log;
13852
13853 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13854 if (log)
13855 return legitimize_tls_address (x, (enum tls_model) log, false);
13856 if (GET_CODE (x) == CONST
13857 && GET_CODE (XEXP (x, 0)) == PLUS
13858 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13859 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13860 {
13861 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13862 (enum tls_model) log, false);
13863 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13864 }
13865
13866 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13867 {
13868 rtx tmp = legitimize_pe_coff_symbol (x, true);
13869 if (tmp)
13870 return tmp;
13871 }
13872
13873 if (flag_pic && SYMBOLIC_CONST (x))
13874 return legitimize_pic_address (x, 0);
13875
13876 #if TARGET_MACHO
13877 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13878 return machopic_indirect_data_reference (x, 0);
13879 #endif
13880
13881 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13882 if (GET_CODE (x) == ASHIFT
13883 && CONST_INT_P (XEXP (x, 1))
13884 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13885 {
13886 changed = 1;
13887 log = INTVAL (XEXP (x, 1));
13888 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13889 GEN_INT (1 << log));
13890 }
13891
13892 if (GET_CODE (x) == PLUS)
13893 {
13894 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13895
13896 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13897 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13898 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13899 {
13900 changed = 1;
13901 log = INTVAL (XEXP (XEXP (x, 0), 1));
13902 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13903 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13904 GEN_INT (1 << log));
13905 }
13906
13907 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13908 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13909 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13910 {
13911 changed = 1;
13912 log = INTVAL (XEXP (XEXP (x, 1), 1));
13913 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13914 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13915 GEN_INT (1 << log));
13916 }
13917
13918 /* Put multiply first if it isn't already. */
13919 if (GET_CODE (XEXP (x, 1)) == MULT)
13920 {
13921 rtx tmp = XEXP (x, 0);
13922 XEXP (x, 0) = XEXP (x, 1);
13923 XEXP (x, 1) = tmp;
13924 changed = 1;
13925 }
13926
13927 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13928 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13929 created by virtual register instantiation, register elimination, and
13930 similar optimizations. */
13931 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13932 {
13933 changed = 1;
13934 x = gen_rtx_PLUS (Pmode,
13935 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13936 XEXP (XEXP (x, 1), 0)),
13937 XEXP (XEXP (x, 1), 1));
13938 }
13939
13940 /* Canonicalize
13941 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13942 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13943 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13944 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13945 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13946 && CONSTANT_P (XEXP (x, 1)))
13947 {
13948 rtx constant;
13949 rtx other = NULL_RTX;
13950
13951 if (CONST_INT_P (XEXP (x, 1)))
13952 {
13953 constant = XEXP (x, 1);
13954 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13955 }
13956 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13957 {
13958 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13959 other = XEXP (x, 1);
13960 }
13961 else
13962 constant = 0;
13963
13964 if (constant)
13965 {
13966 changed = 1;
13967 x = gen_rtx_PLUS (Pmode,
13968 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13969 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13970 plus_constant (Pmode, other,
13971 INTVAL (constant)));
13972 }
13973 }
13974
13975 if (changed && ix86_legitimate_address_p (mode, x, false))
13976 return x;
13977
13978 if (GET_CODE (XEXP (x, 0)) == MULT)
13979 {
13980 changed = 1;
13981 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13982 }
13983
13984 if (GET_CODE (XEXP (x, 1)) == MULT)
13985 {
13986 changed = 1;
13987 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13988 }
13989
13990 if (changed
13991 && REG_P (XEXP (x, 1))
13992 && REG_P (XEXP (x, 0)))
13993 return x;
13994
13995 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13996 {
13997 changed = 1;
13998 x = legitimize_pic_address (x, 0);
13999 }
14000
14001 if (changed && ix86_legitimate_address_p (mode, x, false))
14002 return x;
14003
14004 if (REG_P (XEXP (x, 0)))
14005 {
14006 rtx temp = gen_reg_rtx (Pmode);
14007 rtx val = force_operand (XEXP (x, 1), temp);
14008 if (val != temp)
14009 {
14010 val = convert_to_mode (Pmode, val, 1);
14011 emit_move_insn (temp, val);
14012 }
14013
14014 XEXP (x, 1) = temp;
14015 return x;
14016 }
14017
14018 else if (REG_P (XEXP (x, 1)))
14019 {
14020 rtx temp = gen_reg_rtx (Pmode);
14021 rtx val = force_operand (XEXP (x, 0), temp);
14022 if (val != temp)
14023 {
14024 val = convert_to_mode (Pmode, val, 1);
14025 emit_move_insn (temp, val);
14026 }
14027
14028 XEXP (x, 0) = temp;
14029 return x;
14030 }
14031 }
14032
14033 return x;
14034 }
14035 \f
14036 /* Print an integer constant expression in assembler syntax. Addition
14037 and subtraction are the only arithmetic that may appear in these
14038 expressions. FILE is the stdio stream to write to, X is the rtx, and
14039 CODE is the operand print code from the output string. */
14040
14041 static void
14042 output_pic_addr_const (FILE *file, rtx x, int code)
14043 {
14044 char buf[256];
14045
14046 switch (GET_CODE (x))
14047 {
14048 case PC:
14049 gcc_assert (flag_pic);
14050 putc ('.', file);
14051 break;
14052
14053 case SYMBOL_REF:
14054 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14055 output_addr_const (file, x);
14056 else
14057 {
14058 const char *name = XSTR (x, 0);
14059
14060 /* Mark the decl as referenced so that cgraph will
14061 output the function. */
14062 if (SYMBOL_REF_DECL (x))
14063 mark_decl_referenced (SYMBOL_REF_DECL (x));
14064
14065 #if TARGET_MACHO
14066 if (MACHOPIC_INDIRECT
14067 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14068 name = machopic_indirection_name (x, /*stub_p=*/true);
14069 #endif
14070 assemble_name (file, name);
14071 }
14072 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14073 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14074 fputs ("@PLT", file);
14075 break;
14076
14077 case LABEL_REF:
14078 x = XEXP (x, 0);
14079 /* FALLTHRU */
14080 case CODE_LABEL:
14081 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14082 assemble_name (asm_out_file, buf);
14083 break;
14084
14085 case CONST_INT:
14086 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14087 break;
14088
14089 case CONST:
14090 /* This used to output parentheses around the expression,
14091 but that does not work on the 386 (either ATT or BSD assembler). */
14092 output_pic_addr_const (file, XEXP (x, 0), code);
14093 break;
14094
14095 case CONST_DOUBLE:
14096 if (GET_MODE (x) == VOIDmode)
14097 {
14098 /* We can use %d if the number is <32 bits and positive. */
14099 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14100 fprintf (file, "0x%lx%08lx",
14101 (unsigned long) CONST_DOUBLE_HIGH (x),
14102 (unsigned long) CONST_DOUBLE_LOW (x));
14103 else
14104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14105 }
14106 else
14107 /* We can't handle floating point constants;
14108 TARGET_PRINT_OPERAND must handle them. */
14109 output_operand_lossage ("floating constant misused");
14110 break;
14111
14112 case PLUS:
14113 /* Some assemblers need integer constants to appear first. */
14114 if (CONST_INT_P (XEXP (x, 0)))
14115 {
14116 output_pic_addr_const (file, XEXP (x, 0), code);
14117 putc ('+', file);
14118 output_pic_addr_const (file, XEXP (x, 1), code);
14119 }
14120 else
14121 {
14122 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14123 output_pic_addr_const (file, XEXP (x, 1), code);
14124 putc ('+', file);
14125 output_pic_addr_const (file, XEXP (x, 0), code);
14126 }
14127 break;
14128
14129 case MINUS:
14130 if (!TARGET_MACHO)
14131 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14132 output_pic_addr_const (file, XEXP (x, 0), code);
14133 putc ('-', file);
14134 output_pic_addr_const (file, XEXP (x, 1), code);
14135 if (!TARGET_MACHO)
14136 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14137 break;
14138
14139 case UNSPEC:
14140 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14141 {
14142 bool f = i386_asm_output_addr_const_extra (file, x);
14143 gcc_assert (f);
14144 break;
14145 }
14146
14147 gcc_assert (XVECLEN (x, 0) == 1);
14148 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14149 switch (XINT (x, 1))
14150 {
14151 case UNSPEC_GOT:
14152 fputs ("@GOT", file);
14153 break;
14154 case UNSPEC_GOTOFF:
14155 fputs ("@GOTOFF", file);
14156 break;
14157 case UNSPEC_PLTOFF:
14158 fputs ("@PLTOFF", file);
14159 break;
14160 case UNSPEC_PCREL:
14161 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14162 "(%rip)" : "[rip]", file);
14163 break;
14164 case UNSPEC_GOTPCREL:
14165 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14166 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14167 break;
14168 case UNSPEC_GOTTPOFF:
14169 /* FIXME: This might be @TPOFF in Sun ld too. */
14170 fputs ("@gottpoff", file);
14171 break;
14172 case UNSPEC_TPOFF:
14173 fputs ("@tpoff", file);
14174 break;
14175 case UNSPEC_NTPOFF:
14176 if (TARGET_64BIT)
14177 fputs ("@tpoff", file);
14178 else
14179 fputs ("@ntpoff", file);
14180 break;
14181 case UNSPEC_DTPOFF:
14182 fputs ("@dtpoff", file);
14183 break;
14184 case UNSPEC_GOTNTPOFF:
14185 if (TARGET_64BIT)
14186 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14187 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14188 else
14189 fputs ("@gotntpoff", file);
14190 break;
14191 case UNSPEC_INDNTPOFF:
14192 fputs ("@indntpoff", file);
14193 break;
14194 #if TARGET_MACHO
14195 case UNSPEC_MACHOPIC_OFFSET:
14196 putc ('-', file);
14197 machopic_output_function_base_name (file);
14198 break;
14199 #endif
14200 default:
14201 output_operand_lossage ("invalid UNSPEC as operand");
14202 break;
14203 }
14204 break;
14205
14206 default:
14207 output_operand_lossage ("invalid expression as operand");
14208 }
14209 }
14210
14211 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14212 We need to emit DTP-relative relocations. */
14213
14214 static void ATTRIBUTE_UNUSED
14215 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14216 {
14217 fputs (ASM_LONG, file);
14218 output_addr_const (file, x);
14219 fputs ("@dtpoff", file);
14220 switch (size)
14221 {
14222 case 4:
14223 break;
14224 case 8:
14225 fputs (", 0", file);
14226 break;
14227 default:
14228 gcc_unreachable ();
14229 }
14230 }
14231
14232 /* Return true if X is a representation of the PIC register. This copes
14233 with calls from ix86_find_base_term, where the register might have
14234 been replaced by a cselib value. */
14235
14236 static bool
14237 ix86_pic_register_p (rtx x)
14238 {
14239 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14240 return (pic_offset_table_rtx
14241 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14242 else
14243 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14244 }
14245
14246 /* Helper function for ix86_delegitimize_address.
14247 Attempt to delegitimize TLS local-exec accesses. */
14248
14249 static rtx
14250 ix86_delegitimize_tls_address (rtx orig_x)
14251 {
14252 rtx x = orig_x, unspec;
14253 struct ix86_address addr;
14254
14255 if (!TARGET_TLS_DIRECT_SEG_REFS)
14256 return orig_x;
14257 if (MEM_P (x))
14258 x = XEXP (x, 0);
14259 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14260 return orig_x;
14261 if (ix86_decompose_address (x, &addr) == 0
14262 || addr.seg != DEFAULT_TLS_SEG_REG
14263 || addr.disp == NULL_RTX
14264 || GET_CODE (addr.disp) != CONST)
14265 return orig_x;
14266 unspec = XEXP (addr.disp, 0);
14267 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14268 unspec = XEXP (unspec, 0);
14269 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14270 return orig_x;
14271 x = XVECEXP (unspec, 0, 0);
14272 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14273 if (unspec != XEXP (addr.disp, 0))
14274 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14275 if (addr.index)
14276 {
14277 rtx idx = addr.index;
14278 if (addr.scale != 1)
14279 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14280 x = gen_rtx_PLUS (Pmode, idx, x);
14281 }
14282 if (addr.base)
14283 x = gen_rtx_PLUS (Pmode, addr.base, x);
14284 if (MEM_P (orig_x))
14285 x = replace_equiv_address_nv (orig_x, x);
14286 return x;
14287 }
14288
14289 /* In the name of slightly smaller debug output, and to cater to
14290 general assembler lossage, recognize PIC+GOTOFF and turn it back
14291 into a direct symbol reference.
14292
14293 On Darwin, this is necessary to avoid a crash, because Darwin
14294 has a different PIC label for each routine but the DWARF debugging
14295 information is not associated with any particular routine, so it's
14296 necessary to remove references to the PIC label from RTL stored by
14297 the DWARF output code. */
14298
14299 static rtx
14300 ix86_delegitimize_address (rtx x)
14301 {
14302 rtx orig_x = delegitimize_mem_from_attrs (x);
14303 /* addend is NULL or some rtx if x is something+GOTOFF where
14304 something doesn't include the PIC register. */
14305 rtx addend = NULL_RTX;
14306 /* reg_addend is NULL or a multiple of some register. */
14307 rtx reg_addend = NULL_RTX;
14308 /* const_addend is NULL or a const_int. */
14309 rtx const_addend = NULL_RTX;
14310 /* This is the result, or NULL. */
14311 rtx result = NULL_RTX;
14312
14313 x = orig_x;
14314
14315 if (MEM_P (x))
14316 x = XEXP (x, 0);
14317
14318 if (TARGET_64BIT)
14319 {
14320 if (GET_CODE (x) == CONST
14321 && GET_CODE (XEXP (x, 0)) == PLUS
14322 && GET_MODE (XEXP (x, 0)) == Pmode
14323 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14324 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14325 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14326 {
14327 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14328 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14329 if (MEM_P (orig_x))
14330 x = replace_equiv_address_nv (orig_x, x);
14331 return x;
14332 }
14333
14334 if (GET_CODE (x) == CONST
14335 && GET_CODE (XEXP (x, 0)) == UNSPEC
14336 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14337 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14338 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14339 {
14340 x = XVECEXP (XEXP (x, 0), 0, 0);
14341 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14342 {
14343 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14344 GET_MODE (x), 0);
14345 if (x == NULL_RTX)
14346 return orig_x;
14347 }
14348 return x;
14349 }
14350
14351 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14352 return ix86_delegitimize_tls_address (orig_x);
14353
14354 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14355 and -mcmodel=medium -fpic. */
14356 }
14357
14358 if (GET_CODE (x) != PLUS
14359 || GET_CODE (XEXP (x, 1)) != CONST)
14360 return ix86_delegitimize_tls_address (orig_x);
14361
14362 if (ix86_pic_register_p (XEXP (x, 0)))
14363 /* %ebx + GOT/GOTOFF */
14364 ;
14365 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14366 {
14367 /* %ebx + %reg * scale + GOT/GOTOFF */
14368 reg_addend = XEXP (x, 0);
14369 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14370 reg_addend = XEXP (reg_addend, 1);
14371 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14372 reg_addend = XEXP (reg_addend, 0);
14373 else
14374 {
14375 reg_addend = NULL_RTX;
14376 addend = XEXP (x, 0);
14377 }
14378 }
14379 else
14380 addend = XEXP (x, 0);
14381
14382 x = XEXP (XEXP (x, 1), 0);
14383 if (GET_CODE (x) == PLUS
14384 && CONST_INT_P (XEXP (x, 1)))
14385 {
14386 const_addend = XEXP (x, 1);
14387 x = XEXP (x, 0);
14388 }
14389
14390 if (GET_CODE (x) == UNSPEC
14391 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14392 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14393 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14394 && !MEM_P (orig_x) && !addend)))
14395 result = XVECEXP (x, 0, 0);
14396
14397 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14398 && !MEM_P (orig_x))
14399 result = XVECEXP (x, 0, 0);
14400
14401 if (! result)
14402 return ix86_delegitimize_tls_address (orig_x);
14403
14404 if (const_addend)
14405 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14406 if (reg_addend)
14407 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14408 if (addend)
14409 {
14410 /* If the rest of original X doesn't involve the PIC register, add
14411 addend and subtract pic_offset_table_rtx. This can happen e.g.
14412 for code like:
14413 leal (%ebx, %ecx, 4), %ecx
14414 ...
14415 movl foo@GOTOFF(%ecx), %edx
14416 in which case we return (%ecx - %ebx) + foo. */
14417 if (pic_offset_table_rtx)
14418 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14419 pic_offset_table_rtx),
14420 result);
14421 else
14422 return orig_x;
14423 }
14424 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14425 {
14426 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14427 if (result == NULL_RTX)
14428 return orig_x;
14429 }
14430 return result;
14431 }
14432
14433 /* If X is a machine specific address (i.e. a symbol or label being
14434 referenced as a displacement from the GOT implemented using an
14435 UNSPEC), then return the base term. Otherwise return X. */
14436
14437 rtx
14438 ix86_find_base_term (rtx x)
14439 {
14440 rtx term;
14441
14442 if (TARGET_64BIT)
14443 {
14444 if (GET_CODE (x) != CONST)
14445 return x;
14446 term = XEXP (x, 0);
14447 if (GET_CODE (term) == PLUS
14448 && (CONST_INT_P (XEXP (term, 1))
14449 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14450 term = XEXP (term, 0);
14451 if (GET_CODE (term) != UNSPEC
14452 || (XINT (term, 1) != UNSPEC_GOTPCREL
14453 && XINT (term, 1) != UNSPEC_PCREL))
14454 return x;
14455
14456 return XVECEXP (term, 0, 0);
14457 }
14458
14459 return ix86_delegitimize_address (x);
14460 }
14461 \f
14462 static void
14463 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14464 bool fp, FILE *file)
14465 {
14466 const char *suffix;
14467
14468 if (mode == CCFPmode || mode == CCFPUmode)
14469 {
14470 code = ix86_fp_compare_code_to_integer (code);
14471 mode = CCmode;
14472 }
14473 if (reverse)
14474 code = reverse_condition (code);
14475
14476 switch (code)
14477 {
14478 case EQ:
14479 switch (mode)
14480 {
14481 case CCAmode:
14482 suffix = "a";
14483 break;
14484
14485 case CCCmode:
14486 suffix = "c";
14487 break;
14488
14489 case CCOmode:
14490 suffix = "o";
14491 break;
14492
14493 case CCSmode:
14494 suffix = "s";
14495 break;
14496
14497 default:
14498 suffix = "e";
14499 }
14500 break;
14501 case NE:
14502 switch (mode)
14503 {
14504 case CCAmode:
14505 suffix = "na";
14506 break;
14507
14508 case CCCmode:
14509 suffix = "nc";
14510 break;
14511
14512 case CCOmode:
14513 suffix = "no";
14514 break;
14515
14516 case CCSmode:
14517 suffix = "ns";
14518 break;
14519
14520 default:
14521 suffix = "ne";
14522 }
14523 break;
14524 case GT:
14525 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14526 suffix = "g";
14527 break;
14528 case GTU:
14529 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14530 Those same assemblers have the same but opposite lossage on cmov. */
14531 if (mode == CCmode)
14532 suffix = fp ? "nbe" : "a";
14533 else
14534 gcc_unreachable ();
14535 break;
14536 case LT:
14537 switch (mode)
14538 {
14539 case CCNOmode:
14540 case CCGOCmode:
14541 suffix = "s";
14542 break;
14543
14544 case CCmode:
14545 case CCGCmode:
14546 suffix = "l";
14547 break;
14548
14549 default:
14550 gcc_unreachable ();
14551 }
14552 break;
14553 case LTU:
14554 if (mode == CCmode)
14555 suffix = "b";
14556 else if (mode == CCCmode)
14557 suffix = "c";
14558 else
14559 gcc_unreachable ();
14560 break;
14561 case GE:
14562 switch (mode)
14563 {
14564 case CCNOmode:
14565 case CCGOCmode:
14566 suffix = "ns";
14567 break;
14568
14569 case CCmode:
14570 case CCGCmode:
14571 suffix = "ge";
14572 break;
14573
14574 default:
14575 gcc_unreachable ();
14576 }
14577 break;
14578 case GEU:
14579 if (mode == CCmode)
14580 suffix = fp ? "nb" : "ae";
14581 else if (mode == CCCmode)
14582 suffix = "nc";
14583 else
14584 gcc_unreachable ();
14585 break;
14586 case LE:
14587 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14588 suffix = "le";
14589 break;
14590 case LEU:
14591 if (mode == CCmode)
14592 suffix = "be";
14593 else
14594 gcc_unreachable ();
14595 break;
14596 case UNORDERED:
14597 suffix = fp ? "u" : "p";
14598 break;
14599 case ORDERED:
14600 suffix = fp ? "nu" : "np";
14601 break;
14602 default:
14603 gcc_unreachable ();
14604 }
14605 fputs (suffix, file);
14606 }
14607
14608 /* Print the name of register X to FILE based on its machine mode and number.
14609 If CODE is 'w', pretend the mode is HImode.
14610 If CODE is 'b', pretend the mode is QImode.
14611 If CODE is 'k', pretend the mode is SImode.
14612 If CODE is 'q', pretend the mode is DImode.
14613 If CODE is 'x', pretend the mode is V4SFmode.
14614 If CODE is 't', pretend the mode is V8SFmode.
14615 If CODE is 'g', pretend the mode is V16SFmode.
14616 If CODE is 'h', pretend the reg is the 'high' byte register.
14617 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14618 If CODE is 'd', duplicate the operand for AVX instruction.
14619 */
14620
14621 void
14622 print_reg (rtx x, int code, FILE *file)
14623 {
14624 const char *reg;
14625 unsigned int regno;
14626 bool duplicated = code == 'd' && TARGET_AVX;
14627
14628 if (ASSEMBLER_DIALECT == ASM_ATT)
14629 putc ('%', file);
14630
14631 if (x == pc_rtx)
14632 {
14633 gcc_assert (TARGET_64BIT);
14634 fputs ("rip", file);
14635 return;
14636 }
14637
14638 regno = true_regnum (x);
14639 gcc_assert (regno != ARG_POINTER_REGNUM
14640 && regno != FRAME_POINTER_REGNUM
14641 && regno != FLAGS_REG
14642 && regno != FPSR_REG
14643 && regno != FPCR_REG);
14644
14645 if (code == 'w' || MMX_REG_P (x))
14646 code = 2;
14647 else if (code == 'b')
14648 code = 1;
14649 else if (code == 'k')
14650 code = 4;
14651 else if (code == 'q')
14652 code = 8;
14653 else if (code == 'y')
14654 code = 3;
14655 else if (code == 'h')
14656 code = 0;
14657 else if (code == 'x')
14658 code = 16;
14659 else if (code == 't')
14660 code = 32;
14661 else if (code == 'g')
14662 code = 64;
14663 else
14664 code = GET_MODE_SIZE (GET_MODE (x));
14665
14666 /* Irritatingly, AMD extended registers use different naming convention
14667 from the normal registers: "r%d[bwd]" */
14668 if (REX_INT_REGNO_P (regno))
14669 {
14670 gcc_assert (TARGET_64BIT);
14671 putc ('r', file);
14672 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14673 switch (code)
14674 {
14675 case 0:
14676 error ("extended registers have no high halves");
14677 break;
14678 case 1:
14679 putc ('b', file);
14680 break;
14681 case 2:
14682 putc ('w', file);
14683 break;
14684 case 4:
14685 putc ('d', file);
14686 break;
14687 case 8:
14688 /* no suffix */
14689 break;
14690 default:
14691 error ("unsupported operand size for extended register");
14692 break;
14693 }
14694 return;
14695 }
14696
14697 reg = NULL;
14698 switch (code)
14699 {
14700 case 3:
14701 if (STACK_TOP_P (x))
14702 {
14703 reg = "st(0)";
14704 break;
14705 }
14706 /* FALLTHRU */
14707 case 8:
14708 case 4:
14709 case 12:
14710 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14711 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14712 /* FALLTHRU */
14713 case 16:
14714 case 2:
14715 normal:
14716 reg = hi_reg_name[regno];
14717 break;
14718 case 1:
14719 if (regno >= ARRAY_SIZE (qi_reg_name))
14720 goto normal;
14721 reg = qi_reg_name[regno];
14722 break;
14723 case 0:
14724 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14725 goto normal;
14726 reg = qi_high_reg_name[regno];
14727 break;
14728 case 32:
14729 if (SSE_REG_P (x))
14730 {
14731 gcc_assert (!duplicated);
14732 putc ('y', file);
14733 fputs (hi_reg_name[regno] + 1, file);
14734 return;
14735 }
14736 case 64:
14737 if (SSE_REG_P (x))
14738 {
14739 gcc_assert (!duplicated);
14740 putc ('z', file);
14741 fputs (hi_reg_name[REGNO (x)] + 1, file);
14742 return;
14743 }
14744 break;
14745 default:
14746 gcc_unreachable ();
14747 }
14748
14749 fputs (reg, file);
14750 if (duplicated)
14751 {
14752 if (ASSEMBLER_DIALECT == ASM_ATT)
14753 fprintf (file, ", %%%s", reg);
14754 else
14755 fprintf (file, ", %s", reg);
14756 }
14757 }
14758
14759 /* Locate some local-dynamic symbol still in use by this function
14760 so that we can print its name in some tls_local_dynamic_base
14761 pattern. */
14762
14763 static int
14764 get_some_local_dynamic_name_1 (rtx *px, void *)
14765 {
14766 rtx x = *px;
14767
14768 if (GET_CODE (x) == SYMBOL_REF
14769 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14770 {
14771 cfun->machine->some_ld_name = XSTR (x, 0);
14772 return 1;
14773 }
14774
14775 return 0;
14776 }
14777
14778 static const char *
14779 get_some_local_dynamic_name (void)
14780 {
14781 rtx_insn *insn;
14782
14783 if (cfun->machine->some_ld_name)
14784 return cfun->machine->some_ld_name;
14785
14786 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14787 if (NONDEBUG_INSN_P (insn)
14788 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14789 return cfun->machine->some_ld_name;
14790
14791 return NULL;
14792 }
14793
14794 /* Meaning of CODE:
14795 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14796 C -- print opcode suffix for set/cmov insn.
14797 c -- like C, but print reversed condition
14798 F,f -- likewise, but for floating-point.
14799 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14800 otherwise nothing
14801 R -- print embeded rounding and sae.
14802 r -- print only sae.
14803 z -- print the opcode suffix for the size of the current operand.
14804 Z -- likewise, with special suffixes for x87 instructions.
14805 * -- print a star (in certain assembler syntax)
14806 A -- print an absolute memory reference.
14807 E -- print address with DImode register names if TARGET_64BIT.
14808 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14809 s -- print a shift double count, followed by the assemblers argument
14810 delimiter.
14811 b -- print the QImode name of the register for the indicated operand.
14812 %b0 would print %al if operands[0] is reg 0.
14813 w -- likewise, print the HImode name of the register.
14814 k -- likewise, print the SImode name of the register.
14815 q -- likewise, print the DImode name of the register.
14816 x -- likewise, print the V4SFmode name of the register.
14817 t -- likewise, print the V8SFmode name of the register.
14818 g -- likewise, print the V16SFmode name of the register.
14819 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14820 y -- print "st(0)" instead of "st" as a register.
14821 d -- print duplicated register operand for AVX instruction.
14822 D -- print condition for SSE cmp instruction.
14823 P -- if PIC, print an @PLT suffix.
14824 p -- print raw symbol name.
14825 X -- don't print any sort of PIC '@' suffix for a symbol.
14826 & -- print some in-use local-dynamic symbol name.
14827 H -- print a memory address offset by 8; used for sse high-parts
14828 Y -- print condition for XOP pcom* instruction.
14829 + -- print a branch hint as 'cs' or 'ds' prefix
14830 ; -- print a semicolon (after prefixes due to bug in older gas).
14831 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14832 @ -- print a segment register of thread base pointer load
14833 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14834 */
14835
14836 void
14837 ix86_print_operand (FILE *file, rtx x, int code)
14838 {
14839 if (code)
14840 {
14841 switch (code)
14842 {
14843 case 'A':
14844 switch (ASSEMBLER_DIALECT)
14845 {
14846 case ASM_ATT:
14847 putc ('*', file);
14848 break;
14849
14850 case ASM_INTEL:
14851 /* Intel syntax. For absolute addresses, registers should not
14852 be surrounded by braces. */
14853 if (!REG_P (x))
14854 {
14855 putc ('[', file);
14856 ix86_print_operand (file, x, 0);
14857 putc (']', file);
14858 return;
14859 }
14860 break;
14861
14862 default:
14863 gcc_unreachable ();
14864 }
14865
14866 ix86_print_operand (file, x, 0);
14867 return;
14868
14869 case 'E':
14870 /* Wrap address in an UNSPEC to declare special handling. */
14871 if (TARGET_64BIT)
14872 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14873
14874 output_address (x);
14875 return;
14876
14877 case 'L':
14878 if (ASSEMBLER_DIALECT == ASM_ATT)
14879 putc ('l', file);
14880 return;
14881
14882 case 'W':
14883 if (ASSEMBLER_DIALECT == ASM_ATT)
14884 putc ('w', file);
14885 return;
14886
14887 case 'B':
14888 if (ASSEMBLER_DIALECT == ASM_ATT)
14889 putc ('b', file);
14890 return;
14891
14892 case 'Q':
14893 if (ASSEMBLER_DIALECT == ASM_ATT)
14894 putc ('l', file);
14895 return;
14896
14897 case 'S':
14898 if (ASSEMBLER_DIALECT == ASM_ATT)
14899 putc ('s', file);
14900 return;
14901
14902 case 'T':
14903 if (ASSEMBLER_DIALECT == ASM_ATT)
14904 putc ('t', file);
14905 return;
14906
14907 case 'O':
14908 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14909 if (ASSEMBLER_DIALECT != ASM_ATT)
14910 return;
14911
14912 switch (GET_MODE_SIZE (GET_MODE (x)))
14913 {
14914 case 2:
14915 putc ('w', file);
14916 break;
14917
14918 case 4:
14919 putc ('l', file);
14920 break;
14921
14922 case 8:
14923 putc ('q', file);
14924 break;
14925
14926 default:
14927 output_operand_lossage
14928 ("invalid operand size for operand code 'O'");
14929 return;
14930 }
14931
14932 putc ('.', file);
14933 #endif
14934 return;
14935
14936 case 'z':
14937 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14938 {
14939 /* Opcodes don't get size suffixes if using Intel opcodes. */
14940 if (ASSEMBLER_DIALECT == ASM_INTEL)
14941 return;
14942
14943 switch (GET_MODE_SIZE (GET_MODE (x)))
14944 {
14945 case 1:
14946 putc ('b', file);
14947 return;
14948
14949 case 2:
14950 putc ('w', file);
14951 return;
14952
14953 case 4:
14954 putc ('l', file);
14955 return;
14956
14957 case 8:
14958 putc ('q', file);
14959 return;
14960
14961 default:
14962 output_operand_lossage
14963 ("invalid operand size for operand code 'z'");
14964 return;
14965 }
14966 }
14967
14968 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14969 warning
14970 (0, "non-integer operand used with operand code 'z'");
14971 /* FALLTHRU */
14972
14973 case 'Z':
14974 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14975 if (ASSEMBLER_DIALECT == ASM_INTEL)
14976 return;
14977
14978 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14979 {
14980 switch (GET_MODE_SIZE (GET_MODE (x)))
14981 {
14982 case 2:
14983 #ifdef HAVE_AS_IX86_FILDS
14984 putc ('s', file);
14985 #endif
14986 return;
14987
14988 case 4:
14989 putc ('l', file);
14990 return;
14991
14992 case 8:
14993 #ifdef HAVE_AS_IX86_FILDQ
14994 putc ('q', file);
14995 #else
14996 fputs ("ll", file);
14997 #endif
14998 return;
14999
15000 default:
15001 break;
15002 }
15003 }
15004 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15005 {
15006 /* 387 opcodes don't get size suffixes
15007 if the operands are registers. */
15008 if (STACK_REG_P (x))
15009 return;
15010
15011 switch (GET_MODE_SIZE (GET_MODE (x)))
15012 {
15013 case 4:
15014 putc ('s', file);
15015 return;
15016
15017 case 8:
15018 putc ('l', file);
15019 return;
15020
15021 case 12:
15022 case 16:
15023 putc ('t', file);
15024 return;
15025
15026 default:
15027 break;
15028 }
15029 }
15030 else
15031 {
15032 output_operand_lossage
15033 ("invalid operand type used with operand code 'Z'");
15034 return;
15035 }
15036
15037 output_operand_lossage
15038 ("invalid operand size for operand code 'Z'");
15039 return;
15040
15041 case 'd':
15042 case 'b':
15043 case 'w':
15044 case 'k':
15045 case 'q':
15046 case 'h':
15047 case 't':
15048 case 'g':
15049 case 'y':
15050 case 'x':
15051 case 'X':
15052 case 'P':
15053 case 'p':
15054 break;
15055
15056 case 's':
15057 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15058 {
15059 ix86_print_operand (file, x, 0);
15060 fputs (", ", file);
15061 }
15062 return;
15063
15064 case 'Y':
15065 switch (GET_CODE (x))
15066 {
15067 case NE:
15068 fputs ("neq", file);
15069 break;
15070 case EQ:
15071 fputs ("eq", file);
15072 break;
15073 case GE:
15074 case GEU:
15075 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15076 break;
15077 case GT:
15078 case GTU:
15079 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15080 break;
15081 case LE:
15082 case LEU:
15083 fputs ("le", file);
15084 break;
15085 case LT:
15086 case LTU:
15087 fputs ("lt", file);
15088 break;
15089 case UNORDERED:
15090 fputs ("unord", file);
15091 break;
15092 case ORDERED:
15093 fputs ("ord", file);
15094 break;
15095 case UNEQ:
15096 fputs ("ueq", file);
15097 break;
15098 case UNGE:
15099 fputs ("nlt", file);
15100 break;
15101 case UNGT:
15102 fputs ("nle", file);
15103 break;
15104 case UNLE:
15105 fputs ("ule", file);
15106 break;
15107 case UNLT:
15108 fputs ("ult", file);
15109 break;
15110 case LTGT:
15111 fputs ("une", file);
15112 break;
15113 default:
15114 output_operand_lossage ("operand is not a condition code, "
15115 "invalid operand code 'Y'");
15116 return;
15117 }
15118 return;
15119
15120 case 'D':
15121 /* Little bit of braindamage here. The SSE compare instructions
15122 does use completely different names for the comparisons that the
15123 fp conditional moves. */
15124 switch (GET_CODE (x))
15125 {
15126 case UNEQ:
15127 if (TARGET_AVX)
15128 {
15129 fputs ("eq_us", file);
15130 break;
15131 }
15132 case EQ:
15133 fputs ("eq", file);
15134 break;
15135 case UNLT:
15136 if (TARGET_AVX)
15137 {
15138 fputs ("nge", file);
15139 break;
15140 }
15141 case LT:
15142 fputs ("lt", file);
15143 break;
15144 case UNLE:
15145 if (TARGET_AVX)
15146 {
15147 fputs ("ngt", file);
15148 break;
15149 }
15150 case LE:
15151 fputs ("le", file);
15152 break;
15153 case UNORDERED:
15154 fputs ("unord", file);
15155 break;
15156 case LTGT:
15157 if (TARGET_AVX)
15158 {
15159 fputs ("neq_oq", file);
15160 break;
15161 }
15162 case NE:
15163 fputs ("neq", file);
15164 break;
15165 case GE:
15166 if (TARGET_AVX)
15167 {
15168 fputs ("ge", file);
15169 break;
15170 }
15171 case UNGE:
15172 fputs ("nlt", file);
15173 break;
15174 case GT:
15175 if (TARGET_AVX)
15176 {
15177 fputs ("gt", file);
15178 break;
15179 }
15180 case UNGT:
15181 fputs ("nle", file);
15182 break;
15183 case ORDERED:
15184 fputs ("ord", file);
15185 break;
15186 default:
15187 output_operand_lossage ("operand is not a condition code, "
15188 "invalid operand code 'D'");
15189 return;
15190 }
15191 return;
15192
15193 case 'F':
15194 case 'f':
15195 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15196 if (ASSEMBLER_DIALECT == ASM_ATT)
15197 putc ('.', file);
15198 #endif
15199
15200 case 'C':
15201 case 'c':
15202 if (!COMPARISON_P (x))
15203 {
15204 output_operand_lossage ("operand is not a condition code, "
15205 "invalid operand code '%c'", code);
15206 return;
15207 }
15208 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15209 code == 'c' || code == 'f',
15210 code == 'F' || code == 'f',
15211 file);
15212 return;
15213
15214 case 'H':
15215 if (!offsettable_memref_p (x))
15216 {
15217 output_operand_lossage ("operand is not an offsettable memory "
15218 "reference, invalid operand code 'H'");
15219 return;
15220 }
15221 /* It doesn't actually matter what mode we use here, as we're
15222 only going to use this for printing. */
15223 x = adjust_address_nv (x, DImode, 8);
15224 /* Output 'qword ptr' for intel assembler dialect. */
15225 if (ASSEMBLER_DIALECT == ASM_INTEL)
15226 code = 'q';
15227 break;
15228
15229 case 'K':
15230 gcc_assert (CONST_INT_P (x));
15231
15232 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15233 #ifdef HAVE_AS_IX86_HLE
15234 fputs ("xacquire ", file);
15235 #else
15236 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15237 #endif
15238 else if (INTVAL (x) & IX86_HLE_RELEASE)
15239 #ifdef HAVE_AS_IX86_HLE
15240 fputs ("xrelease ", file);
15241 #else
15242 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15243 #endif
15244 /* We do not want to print value of the operand. */
15245 return;
15246
15247 case 'N':
15248 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15249 fputs ("{z}", file);
15250 return;
15251
15252 case 'r':
15253 gcc_assert (CONST_INT_P (x));
15254 gcc_assert (INTVAL (x) == ROUND_SAE);
15255
15256 if (ASSEMBLER_DIALECT == ASM_INTEL)
15257 fputs (", ", file);
15258
15259 fputs ("{sae}", file);
15260
15261 if (ASSEMBLER_DIALECT == ASM_ATT)
15262 fputs (", ", file);
15263
15264 return;
15265
15266 case 'R':
15267 gcc_assert (CONST_INT_P (x));
15268
15269 if (ASSEMBLER_DIALECT == ASM_INTEL)
15270 fputs (", ", file);
15271
15272 switch (INTVAL (x))
15273 {
15274 case ROUND_NEAREST_INT | ROUND_SAE:
15275 fputs ("{rn-sae}", file);
15276 break;
15277 case ROUND_NEG_INF | ROUND_SAE:
15278 fputs ("{rd-sae}", file);
15279 break;
15280 case ROUND_POS_INF | ROUND_SAE:
15281 fputs ("{ru-sae}", file);
15282 break;
15283 case ROUND_ZERO | ROUND_SAE:
15284 fputs ("{rz-sae}", file);
15285 break;
15286 default:
15287 gcc_unreachable ();
15288 }
15289
15290 if (ASSEMBLER_DIALECT == ASM_ATT)
15291 fputs (", ", file);
15292
15293 return;
15294
15295 case '*':
15296 if (ASSEMBLER_DIALECT == ASM_ATT)
15297 putc ('*', file);
15298 return;
15299
15300 case '&':
15301 {
15302 const char *name = get_some_local_dynamic_name ();
15303 if (name == NULL)
15304 output_operand_lossage ("'%%&' used without any "
15305 "local dynamic TLS references");
15306 else
15307 assemble_name (file, name);
15308 return;
15309 }
15310
15311 case '+':
15312 {
15313 rtx x;
15314
15315 if (!optimize
15316 || optimize_function_for_size_p (cfun)
15317 || !TARGET_BRANCH_PREDICTION_HINTS)
15318 return;
15319
15320 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15321 if (x)
15322 {
15323 int pred_val = XINT (x, 0);
15324
15325 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15326 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15327 {
15328 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15329 bool cputaken
15330 = final_forward_branch_p (current_output_insn) == 0;
15331
15332 /* Emit hints only in the case default branch prediction
15333 heuristics would fail. */
15334 if (taken != cputaken)
15335 {
15336 /* We use 3e (DS) prefix for taken branches and
15337 2e (CS) prefix for not taken branches. */
15338 if (taken)
15339 fputs ("ds ; ", file);
15340 else
15341 fputs ("cs ; ", file);
15342 }
15343 }
15344 }
15345 return;
15346 }
15347
15348 case ';':
15349 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15350 putc (';', file);
15351 #endif
15352 return;
15353
15354 case '@':
15355 if (ASSEMBLER_DIALECT == ASM_ATT)
15356 putc ('%', file);
15357
15358 /* The kernel uses a different segment register for performance
15359 reasons; a system call would not have to trash the userspace
15360 segment register, which would be expensive. */
15361 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15362 fputs ("fs", file);
15363 else
15364 fputs ("gs", file);
15365 return;
15366
15367 case '~':
15368 putc (TARGET_AVX2 ? 'i' : 'f', file);
15369 return;
15370
15371 case '^':
15372 if (TARGET_64BIT && Pmode != word_mode)
15373 fputs ("addr32 ", file);
15374 return;
15375
15376 default:
15377 output_operand_lossage ("invalid operand code '%c'", code);
15378 }
15379 }
15380
15381 if (REG_P (x))
15382 print_reg (x, code, file);
15383
15384 else if (MEM_P (x))
15385 {
15386 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15387 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15388 && GET_MODE (x) != BLKmode)
15389 {
15390 const char * size;
15391 switch (GET_MODE_SIZE (GET_MODE (x)))
15392 {
15393 case 1: size = "BYTE"; break;
15394 case 2: size = "WORD"; break;
15395 case 4: size = "DWORD"; break;
15396 case 8: size = "QWORD"; break;
15397 case 12: size = "TBYTE"; break;
15398 case 16:
15399 if (GET_MODE (x) == XFmode)
15400 size = "TBYTE";
15401 else
15402 size = "XMMWORD";
15403 break;
15404 case 32: size = "YMMWORD"; break;
15405 case 64: size = "ZMMWORD"; break;
15406 default:
15407 gcc_unreachable ();
15408 }
15409
15410 /* Check for explicit size override (codes 'b', 'w', 'k',
15411 'q' and 'x') */
15412 if (code == 'b')
15413 size = "BYTE";
15414 else if (code == 'w')
15415 size = "WORD";
15416 else if (code == 'k')
15417 size = "DWORD";
15418 else if (code == 'q')
15419 size = "QWORD";
15420 else if (code == 'x')
15421 size = "XMMWORD";
15422
15423 fputs (size, file);
15424 fputs (" PTR ", file);
15425 }
15426
15427 x = XEXP (x, 0);
15428 /* Avoid (%rip) for call operands. */
15429 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15430 && !CONST_INT_P (x))
15431 output_addr_const (file, x);
15432 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15433 output_operand_lossage ("invalid constraints for operand");
15434 else
15435 output_address (x);
15436 }
15437
15438 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15439 {
15440 REAL_VALUE_TYPE r;
15441 long l;
15442
15443 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15444 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15445
15446 if (ASSEMBLER_DIALECT == ASM_ATT)
15447 putc ('$', file);
15448 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15449 if (code == 'q')
15450 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15451 (unsigned long long) (int) l);
15452 else
15453 fprintf (file, "0x%08x", (unsigned int) l);
15454 }
15455
15456 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15457 {
15458 REAL_VALUE_TYPE r;
15459 long l[2];
15460
15461 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15462 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15463
15464 if (ASSEMBLER_DIALECT == ASM_ATT)
15465 putc ('$', file);
15466 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15467 }
15468
15469 /* These float cases don't actually occur as immediate operands. */
15470 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15471 {
15472 char dstr[30];
15473
15474 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15475 fputs (dstr, file);
15476 }
15477
15478 else
15479 {
15480 /* We have patterns that allow zero sets of memory, for instance.
15481 In 64-bit mode, we should probably support all 8-byte vectors,
15482 since we can in fact encode that into an immediate. */
15483 if (GET_CODE (x) == CONST_VECTOR)
15484 {
15485 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15486 x = const0_rtx;
15487 }
15488
15489 if (code != 'P' && code != 'p')
15490 {
15491 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15492 {
15493 if (ASSEMBLER_DIALECT == ASM_ATT)
15494 putc ('$', file);
15495 }
15496 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15497 || GET_CODE (x) == LABEL_REF)
15498 {
15499 if (ASSEMBLER_DIALECT == ASM_ATT)
15500 putc ('$', file);
15501 else
15502 fputs ("OFFSET FLAT:", file);
15503 }
15504 }
15505 if (CONST_INT_P (x))
15506 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15507 else if (flag_pic || MACHOPIC_INDIRECT)
15508 output_pic_addr_const (file, x, code);
15509 else
15510 output_addr_const (file, x);
15511 }
15512 }
15513
15514 static bool
15515 ix86_print_operand_punct_valid_p (unsigned char code)
15516 {
15517 return (code == '@' || code == '*' || code == '+' || code == '&'
15518 || code == ';' || code == '~' || code == '^');
15519 }
15520 \f
15521 /* Print a memory operand whose address is ADDR. */
15522
15523 static void
15524 ix86_print_operand_address (FILE *file, rtx addr)
15525 {
15526 struct ix86_address parts;
15527 rtx base, index, disp;
15528 int scale;
15529 int ok;
15530 bool vsib = false;
15531 int code = 0;
15532
15533 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15534 {
15535 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15536 gcc_assert (parts.index == NULL_RTX);
15537 parts.index = XVECEXP (addr, 0, 1);
15538 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15539 addr = XVECEXP (addr, 0, 0);
15540 vsib = true;
15541 }
15542 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15543 {
15544 gcc_assert (TARGET_64BIT);
15545 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15546 code = 'q';
15547 }
15548 else
15549 ok = ix86_decompose_address (addr, &parts);
15550
15551 gcc_assert (ok);
15552
15553 base = parts.base;
15554 index = parts.index;
15555 disp = parts.disp;
15556 scale = parts.scale;
15557
15558 switch (parts.seg)
15559 {
15560 case SEG_DEFAULT:
15561 break;
15562 case SEG_FS:
15563 case SEG_GS:
15564 if (ASSEMBLER_DIALECT == ASM_ATT)
15565 putc ('%', file);
15566 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15567 break;
15568 default:
15569 gcc_unreachable ();
15570 }
15571
15572 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15573 if (TARGET_64BIT && !base && !index)
15574 {
15575 rtx symbol = disp;
15576
15577 if (GET_CODE (disp) == CONST
15578 && GET_CODE (XEXP (disp, 0)) == PLUS
15579 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15580 symbol = XEXP (XEXP (disp, 0), 0);
15581
15582 if (GET_CODE (symbol) == LABEL_REF
15583 || (GET_CODE (symbol) == SYMBOL_REF
15584 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15585 base = pc_rtx;
15586 }
15587 if (!base && !index)
15588 {
15589 /* Displacement only requires special attention. */
15590
15591 if (CONST_INT_P (disp))
15592 {
15593 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15594 fputs ("ds:", file);
15595 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15596 }
15597 else if (flag_pic)
15598 output_pic_addr_const (file, disp, 0);
15599 else
15600 output_addr_const (file, disp);
15601 }
15602 else
15603 {
15604 /* Print SImode register names to force addr32 prefix. */
15605 if (SImode_address_operand (addr, VOIDmode))
15606 {
15607 #ifdef ENABLE_CHECKING
15608 gcc_assert (TARGET_64BIT);
15609 switch (GET_CODE (addr))
15610 {
15611 case SUBREG:
15612 gcc_assert (GET_MODE (addr) == SImode);
15613 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15614 break;
15615 case ZERO_EXTEND:
15616 case AND:
15617 gcc_assert (GET_MODE (addr) == DImode);
15618 break;
15619 default:
15620 gcc_unreachable ();
15621 }
15622 #endif
15623 gcc_assert (!code);
15624 code = 'k';
15625 }
15626 else if (code == 0
15627 && TARGET_X32
15628 && disp
15629 && CONST_INT_P (disp)
15630 && INTVAL (disp) < -16*1024*1024)
15631 {
15632 /* X32 runs in 64-bit mode, where displacement, DISP, in
15633 address DISP(%r64), is encoded as 32-bit immediate sign-
15634 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15635 address is %r64 + 0xffffffffbffffd00. When %r64 <
15636 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15637 which is invalid for x32. The correct address is %r64
15638 - 0x40000300 == 0xf7ffdd64. To properly encode
15639 -0x40000300(%r64) for x32, we zero-extend negative
15640 displacement by forcing addr32 prefix which truncates
15641 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15642 zero-extend all negative displacements, including -1(%rsp).
15643 However, for small negative displacements, sign-extension
15644 won't cause overflow. We only zero-extend negative
15645 displacements if they < -16*1024*1024, which is also used
15646 to check legitimate address displacements for PIC. */
15647 code = 'k';
15648 }
15649
15650 if (ASSEMBLER_DIALECT == ASM_ATT)
15651 {
15652 if (disp)
15653 {
15654 if (flag_pic)
15655 output_pic_addr_const (file, disp, 0);
15656 else if (GET_CODE (disp) == LABEL_REF)
15657 output_asm_label (disp);
15658 else
15659 output_addr_const (file, disp);
15660 }
15661
15662 putc ('(', file);
15663 if (base)
15664 print_reg (base, code, file);
15665 if (index)
15666 {
15667 putc (',', file);
15668 print_reg (index, vsib ? 0 : code, file);
15669 if (scale != 1 || vsib)
15670 fprintf (file, ",%d", scale);
15671 }
15672 putc (')', file);
15673 }
15674 else
15675 {
15676 rtx offset = NULL_RTX;
15677
15678 if (disp)
15679 {
15680 /* Pull out the offset of a symbol; print any symbol itself. */
15681 if (GET_CODE (disp) == CONST
15682 && GET_CODE (XEXP (disp, 0)) == PLUS
15683 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15684 {
15685 offset = XEXP (XEXP (disp, 0), 1);
15686 disp = gen_rtx_CONST (VOIDmode,
15687 XEXP (XEXP (disp, 0), 0));
15688 }
15689
15690 if (flag_pic)
15691 output_pic_addr_const (file, disp, 0);
15692 else if (GET_CODE (disp) == LABEL_REF)
15693 output_asm_label (disp);
15694 else if (CONST_INT_P (disp))
15695 offset = disp;
15696 else
15697 output_addr_const (file, disp);
15698 }
15699
15700 putc ('[', file);
15701 if (base)
15702 {
15703 print_reg (base, code, file);
15704 if (offset)
15705 {
15706 if (INTVAL (offset) >= 0)
15707 putc ('+', file);
15708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15709 }
15710 }
15711 else if (offset)
15712 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15713 else
15714 putc ('0', file);
15715
15716 if (index)
15717 {
15718 putc ('+', file);
15719 print_reg (index, vsib ? 0 : code, file);
15720 if (scale != 1 || vsib)
15721 fprintf (file, "*%d", scale);
15722 }
15723 putc (']', file);
15724 }
15725 }
15726 }
15727
15728 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15729
15730 static bool
15731 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15732 {
15733 rtx op;
15734
15735 if (GET_CODE (x) != UNSPEC)
15736 return false;
15737
15738 op = XVECEXP (x, 0, 0);
15739 switch (XINT (x, 1))
15740 {
15741 case UNSPEC_GOTTPOFF:
15742 output_addr_const (file, op);
15743 /* FIXME: This might be @TPOFF in Sun ld. */
15744 fputs ("@gottpoff", file);
15745 break;
15746 case UNSPEC_TPOFF:
15747 output_addr_const (file, op);
15748 fputs ("@tpoff", file);
15749 break;
15750 case UNSPEC_NTPOFF:
15751 output_addr_const (file, op);
15752 if (TARGET_64BIT)
15753 fputs ("@tpoff", file);
15754 else
15755 fputs ("@ntpoff", file);
15756 break;
15757 case UNSPEC_DTPOFF:
15758 output_addr_const (file, op);
15759 fputs ("@dtpoff", file);
15760 break;
15761 case UNSPEC_GOTNTPOFF:
15762 output_addr_const (file, op);
15763 if (TARGET_64BIT)
15764 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15765 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15766 else
15767 fputs ("@gotntpoff", file);
15768 break;
15769 case UNSPEC_INDNTPOFF:
15770 output_addr_const (file, op);
15771 fputs ("@indntpoff", file);
15772 break;
15773 #if TARGET_MACHO
15774 case UNSPEC_MACHOPIC_OFFSET:
15775 output_addr_const (file, op);
15776 putc ('-', file);
15777 machopic_output_function_base_name (file);
15778 break;
15779 #endif
15780
15781 case UNSPEC_STACK_CHECK:
15782 {
15783 int offset;
15784
15785 gcc_assert (flag_split_stack);
15786
15787 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15788 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15789 #else
15790 gcc_unreachable ();
15791 #endif
15792
15793 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15794 }
15795 break;
15796
15797 default:
15798 return false;
15799 }
15800
15801 return true;
15802 }
15803 \f
15804 /* Split one or more double-mode RTL references into pairs of half-mode
15805 references. The RTL can be REG, offsettable MEM, integer constant, or
15806 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15807 split and "num" is its length. lo_half and hi_half are output arrays
15808 that parallel "operands". */
15809
15810 void
15811 split_double_mode (enum machine_mode mode, rtx operands[],
15812 int num, rtx lo_half[], rtx hi_half[])
15813 {
15814 enum machine_mode half_mode;
15815 unsigned int byte;
15816
15817 switch (mode)
15818 {
15819 case TImode:
15820 half_mode = DImode;
15821 break;
15822 case DImode:
15823 half_mode = SImode;
15824 break;
15825 default:
15826 gcc_unreachable ();
15827 }
15828
15829 byte = GET_MODE_SIZE (half_mode);
15830
15831 while (num--)
15832 {
15833 rtx op = operands[num];
15834
15835 /* simplify_subreg refuse to split volatile memory addresses,
15836 but we still have to handle it. */
15837 if (MEM_P (op))
15838 {
15839 lo_half[num] = adjust_address (op, half_mode, 0);
15840 hi_half[num] = adjust_address (op, half_mode, byte);
15841 }
15842 else
15843 {
15844 lo_half[num] = simplify_gen_subreg (half_mode, op,
15845 GET_MODE (op) == VOIDmode
15846 ? mode : GET_MODE (op), 0);
15847 hi_half[num] = simplify_gen_subreg (half_mode, op,
15848 GET_MODE (op) == VOIDmode
15849 ? mode : GET_MODE (op), byte);
15850 }
15851 }
15852 }
15853 \f
15854 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15855 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15856 is the expression of the binary operation. The output may either be
15857 emitted here, or returned to the caller, like all output_* functions.
15858
15859 There is no guarantee that the operands are the same mode, as they
15860 might be within FLOAT or FLOAT_EXTEND expressions. */
15861
15862 #ifndef SYSV386_COMPAT
15863 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15864 wants to fix the assemblers because that causes incompatibility
15865 with gcc. No-one wants to fix gcc because that causes
15866 incompatibility with assemblers... You can use the option of
15867 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15868 #define SYSV386_COMPAT 1
15869 #endif
15870
15871 const char *
15872 output_387_binary_op (rtx insn, rtx *operands)
15873 {
15874 static char buf[40];
15875 const char *p;
15876 const char *ssep;
15877 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15878
15879 #ifdef ENABLE_CHECKING
15880 /* Even if we do not want to check the inputs, this documents input
15881 constraints. Which helps in understanding the following code. */
15882 if (STACK_REG_P (operands[0])
15883 && ((REG_P (operands[1])
15884 && REGNO (operands[0]) == REGNO (operands[1])
15885 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15886 || (REG_P (operands[2])
15887 && REGNO (operands[0]) == REGNO (operands[2])
15888 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15889 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15890 ; /* ok */
15891 else
15892 gcc_assert (is_sse);
15893 #endif
15894
15895 switch (GET_CODE (operands[3]))
15896 {
15897 case PLUS:
15898 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15899 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15900 p = "fiadd";
15901 else
15902 p = "fadd";
15903 ssep = "vadd";
15904 break;
15905
15906 case MINUS:
15907 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15908 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15909 p = "fisub";
15910 else
15911 p = "fsub";
15912 ssep = "vsub";
15913 break;
15914
15915 case MULT:
15916 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15917 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15918 p = "fimul";
15919 else
15920 p = "fmul";
15921 ssep = "vmul";
15922 break;
15923
15924 case DIV:
15925 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15926 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15927 p = "fidiv";
15928 else
15929 p = "fdiv";
15930 ssep = "vdiv";
15931 break;
15932
15933 default:
15934 gcc_unreachable ();
15935 }
15936
15937 if (is_sse)
15938 {
15939 if (TARGET_AVX)
15940 {
15941 strcpy (buf, ssep);
15942 if (GET_MODE (operands[0]) == SFmode)
15943 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15944 else
15945 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15946 }
15947 else
15948 {
15949 strcpy (buf, ssep + 1);
15950 if (GET_MODE (operands[0]) == SFmode)
15951 strcat (buf, "ss\t{%2, %0|%0, %2}");
15952 else
15953 strcat (buf, "sd\t{%2, %0|%0, %2}");
15954 }
15955 return buf;
15956 }
15957 strcpy (buf, p);
15958
15959 switch (GET_CODE (operands[3]))
15960 {
15961 case MULT:
15962 case PLUS:
15963 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15964 {
15965 rtx temp = operands[2];
15966 operands[2] = operands[1];
15967 operands[1] = temp;
15968 }
15969
15970 /* know operands[0] == operands[1]. */
15971
15972 if (MEM_P (operands[2]))
15973 {
15974 p = "%Z2\t%2";
15975 break;
15976 }
15977
15978 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15979 {
15980 if (STACK_TOP_P (operands[0]))
15981 /* How is it that we are storing to a dead operand[2]?
15982 Well, presumably operands[1] is dead too. We can't
15983 store the result to st(0) as st(0) gets popped on this
15984 instruction. Instead store to operands[2] (which I
15985 think has to be st(1)). st(1) will be popped later.
15986 gcc <= 2.8.1 didn't have this check and generated
15987 assembly code that the Unixware assembler rejected. */
15988 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15989 else
15990 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15991 break;
15992 }
15993
15994 if (STACK_TOP_P (operands[0]))
15995 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15996 else
15997 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15998 break;
15999
16000 case MINUS:
16001 case DIV:
16002 if (MEM_P (operands[1]))
16003 {
16004 p = "r%Z1\t%1";
16005 break;
16006 }
16007
16008 if (MEM_P (operands[2]))
16009 {
16010 p = "%Z2\t%2";
16011 break;
16012 }
16013
16014 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16015 {
16016 #if SYSV386_COMPAT
16017 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16018 derived assemblers, confusingly reverse the direction of
16019 the operation for fsub{r} and fdiv{r} when the
16020 destination register is not st(0). The Intel assembler
16021 doesn't have this brain damage. Read !SYSV386_COMPAT to
16022 figure out what the hardware really does. */
16023 if (STACK_TOP_P (operands[0]))
16024 p = "{p\t%0, %2|rp\t%2, %0}";
16025 else
16026 p = "{rp\t%2, %0|p\t%0, %2}";
16027 #else
16028 if (STACK_TOP_P (operands[0]))
16029 /* As above for fmul/fadd, we can't store to st(0). */
16030 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16031 else
16032 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16033 #endif
16034 break;
16035 }
16036
16037 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16038 {
16039 #if SYSV386_COMPAT
16040 if (STACK_TOP_P (operands[0]))
16041 p = "{rp\t%0, %1|p\t%1, %0}";
16042 else
16043 p = "{p\t%1, %0|rp\t%0, %1}";
16044 #else
16045 if (STACK_TOP_P (operands[0]))
16046 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16047 else
16048 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16049 #endif
16050 break;
16051 }
16052
16053 if (STACK_TOP_P (operands[0]))
16054 {
16055 if (STACK_TOP_P (operands[1]))
16056 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16057 else
16058 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16059 break;
16060 }
16061 else if (STACK_TOP_P (operands[1]))
16062 {
16063 #if SYSV386_COMPAT
16064 p = "{\t%1, %0|r\t%0, %1}";
16065 #else
16066 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16067 #endif
16068 }
16069 else
16070 {
16071 #if SYSV386_COMPAT
16072 p = "{r\t%2, %0|\t%0, %2}";
16073 #else
16074 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16075 #endif
16076 }
16077 break;
16078
16079 default:
16080 gcc_unreachable ();
16081 }
16082
16083 strcat (buf, p);
16084 return buf;
16085 }
16086
16087 /* Check if a 256bit AVX register is referenced inside of EXP. */
16088
16089 static int
16090 ix86_check_avx256_register (rtx *pexp, void *)
16091 {
16092 rtx exp = *pexp;
16093
16094 if (GET_CODE (exp) == SUBREG)
16095 exp = SUBREG_REG (exp);
16096
16097 if (REG_P (exp)
16098 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16099 return 1;
16100
16101 return 0;
16102 }
16103
16104 /* Return needed mode for entity in optimize_mode_switching pass. */
16105
16106 static int
16107 ix86_avx_u128_mode_needed (rtx insn)
16108 {
16109 if (CALL_P (insn))
16110 {
16111 rtx link;
16112
16113 /* Needed mode is set to AVX_U128_CLEAN if there are
16114 no 256bit modes used in function arguments. */
16115 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16116 link;
16117 link = XEXP (link, 1))
16118 {
16119 if (GET_CODE (XEXP (link, 0)) == USE)
16120 {
16121 rtx arg = XEXP (XEXP (link, 0), 0);
16122
16123 if (ix86_check_avx256_register (&arg, NULL))
16124 return AVX_U128_DIRTY;
16125 }
16126 }
16127
16128 return AVX_U128_CLEAN;
16129 }
16130
16131 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16132 changes state only when a 256bit register is written to, but we need
16133 to prevent the compiler from moving optimal insertion point above
16134 eventual read from 256bit register. */
16135 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16136 return AVX_U128_DIRTY;
16137
16138 return AVX_U128_ANY;
16139 }
16140
16141 /* Return mode that i387 must be switched into
16142 prior to the execution of insn. */
16143
16144 static int
16145 ix86_i387_mode_needed (int entity, rtx insn)
16146 {
16147 enum attr_i387_cw mode;
16148
16149 /* The mode UNINITIALIZED is used to store control word after a
16150 function call or ASM pattern. The mode ANY specify that function
16151 has no requirements on the control word and make no changes in the
16152 bits we are interested in. */
16153
16154 if (CALL_P (insn)
16155 || (NONJUMP_INSN_P (insn)
16156 && (asm_noperands (PATTERN (insn)) >= 0
16157 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16158 return I387_CW_UNINITIALIZED;
16159
16160 if (recog_memoized (insn) < 0)
16161 return I387_CW_ANY;
16162
16163 mode = get_attr_i387_cw (insn);
16164
16165 switch (entity)
16166 {
16167 case I387_TRUNC:
16168 if (mode == I387_CW_TRUNC)
16169 return mode;
16170 break;
16171
16172 case I387_FLOOR:
16173 if (mode == I387_CW_FLOOR)
16174 return mode;
16175 break;
16176
16177 case I387_CEIL:
16178 if (mode == I387_CW_CEIL)
16179 return mode;
16180 break;
16181
16182 case I387_MASK_PM:
16183 if (mode == I387_CW_MASK_PM)
16184 return mode;
16185 break;
16186
16187 default:
16188 gcc_unreachable ();
16189 }
16190
16191 return I387_CW_ANY;
16192 }
16193
16194 /* Return mode that entity must be switched into
16195 prior to the execution of insn. */
16196
16197 static int
16198 ix86_mode_needed (int entity, rtx insn)
16199 {
16200 switch (entity)
16201 {
16202 case AVX_U128:
16203 return ix86_avx_u128_mode_needed (insn);
16204 case I387_TRUNC:
16205 case I387_FLOOR:
16206 case I387_CEIL:
16207 case I387_MASK_PM:
16208 return ix86_i387_mode_needed (entity, insn);
16209 default:
16210 gcc_unreachable ();
16211 }
16212 return 0;
16213 }
16214
16215 /* Check if a 256bit AVX register is referenced in stores. */
16216
16217 static void
16218 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16219 {
16220 if (ix86_check_avx256_register (&dest, NULL))
16221 {
16222 bool *used = (bool *) data;
16223 *used = true;
16224 }
16225 }
16226
16227 /* Calculate mode of upper 128bit AVX registers after the insn. */
16228
16229 static int
16230 ix86_avx_u128_mode_after (int mode, rtx insn)
16231 {
16232 rtx pat = PATTERN (insn);
16233
16234 if (vzeroupper_operation (pat, VOIDmode)
16235 || vzeroall_operation (pat, VOIDmode))
16236 return AVX_U128_CLEAN;
16237
16238 /* We know that state is clean after CALL insn if there are no
16239 256bit registers used in the function return register. */
16240 if (CALL_P (insn))
16241 {
16242 bool avx_reg256_found = false;
16243 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16244
16245 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16246 }
16247
16248 /* Otherwise, return current mode. Remember that if insn
16249 references AVX 256bit registers, the mode was already changed
16250 to DIRTY from MODE_NEEDED. */
16251 return mode;
16252 }
16253
16254 /* Return the mode that an insn results in. */
16255
16256 int
16257 ix86_mode_after (int entity, int mode, rtx insn)
16258 {
16259 switch (entity)
16260 {
16261 case AVX_U128:
16262 return ix86_avx_u128_mode_after (mode, insn);
16263 case I387_TRUNC:
16264 case I387_FLOOR:
16265 case I387_CEIL:
16266 case I387_MASK_PM:
16267 return mode;
16268 default:
16269 gcc_unreachable ();
16270 }
16271 }
16272
16273 static int
16274 ix86_avx_u128_mode_entry (void)
16275 {
16276 tree arg;
16277
16278 /* Entry mode is set to AVX_U128_DIRTY if there are
16279 256bit modes used in function arguments. */
16280 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16281 arg = TREE_CHAIN (arg))
16282 {
16283 rtx incoming = DECL_INCOMING_RTL (arg);
16284
16285 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16286 return AVX_U128_DIRTY;
16287 }
16288
16289 return AVX_U128_CLEAN;
16290 }
16291
16292 /* Return a mode that ENTITY is assumed to be
16293 switched to at function entry. */
16294
16295 static int
16296 ix86_mode_entry (int entity)
16297 {
16298 switch (entity)
16299 {
16300 case AVX_U128:
16301 return ix86_avx_u128_mode_entry ();
16302 case I387_TRUNC:
16303 case I387_FLOOR:
16304 case I387_CEIL:
16305 case I387_MASK_PM:
16306 return I387_CW_ANY;
16307 default:
16308 gcc_unreachable ();
16309 }
16310 }
16311
16312 static int
16313 ix86_avx_u128_mode_exit (void)
16314 {
16315 rtx reg = crtl->return_rtx;
16316
16317 /* Exit mode is set to AVX_U128_DIRTY if there are
16318 256bit modes used in the function return register. */
16319 if (reg && ix86_check_avx256_register (&reg, NULL))
16320 return AVX_U128_DIRTY;
16321
16322 return AVX_U128_CLEAN;
16323 }
16324
16325 /* Return a mode that ENTITY is assumed to be
16326 switched to at function exit. */
16327
16328 static int
16329 ix86_mode_exit (int entity)
16330 {
16331 switch (entity)
16332 {
16333 case AVX_U128:
16334 return ix86_avx_u128_mode_exit ();
16335 case I387_TRUNC:
16336 case I387_FLOOR:
16337 case I387_CEIL:
16338 case I387_MASK_PM:
16339 return I387_CW_ANY;
16340 default:
16341 gcc_unreachable ();
16342 }
16343 }
16344
16345 static int
16346 ix86_mode_priority (int, int n)
16347 {
16348 return n;
16349 }
16350
16351 /* Output code to initialize control word copies used by trunc?f?i and
16352 rounding patterns. CURRENT_MODE is set to current control word,
16353 while NEW_MODE is set to new control word. */
16354
16355 static void
16356 emit_i387_cw_initialization (int mode)
16357 {
16358 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16359 rtx new_mode;
16360
16361 enum ix86_stack_slot slot;
16362
16363 rtx reg = gen_reg_rtx (HImode);
16364
16365 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16366 emit_move_insn (reg, copy_rtx (stored_mode));
16367
16368 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16369 || optimize_insn_for_size_p ())
16370 {
16371 switch (mode)
16372 {
16373 case I387_CW_TRUNC:
16374 /* round toward zero (truncate) */
16375 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16376 slot = SLOT_CW_TRUNC;
16377 break;
16378
16379 case I387_CW_FLOOR:
16380 /* round down toward -oo */
16381 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16382 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16383 slot = SLOT_CW_FLOOR;
16384 break;
16385
16386 case I387_CW_CEIL:
16387 /* round up toward +oo */
16388 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16389 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16390 slot = SLOT_CW_CEIL;
16391 break;
16392
16393 case I387_CW_MASK_PM:
16394 /* mask precision exception for nearbyint() */
16395 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16396 slot = SLOT_CW_MASK_PM;
16397 break;
16398
16399 default:
16400 gcc_unreachable ();
16401 }
16402 }
16403 else
16404 {
16405 switch (mode)
16406 {
16407 case I387_CW_TRUNC:
16408 /* round toward zero (truncate) */
16409 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16410 slot = SLOT_CW_TRUNC;
16411 break;
16412
16413 case I387_CW_FLOOR:
16414 /* round down toward -oo */
16415 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16416 slot = SLOT_CW_FLOOR;
16417 break;
16418
16419 case I387_CW_CEIL:
16420 /* round up toward +oo */
16421 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16422 slot = SLOT_CW_CEIL;
16423 break;
16424
16425 case I387_CW_MASK_PM:
16426 /* mask precision exception for nearbyint() */
16427 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16428 slot = SLOT_CW_MASK_PM;
16429 break;
16430
16431 default:
16432 gcc_unreachable ();
16433 }
16434 }
16435
16436 gcc_assert (slot < MAX_386_STACK_LOCALS);
16437
16438 new_mode = assign_386_stack_local (HImode, slot);
16439 emit_move_insn (new_mode, reg);
16440 }
16441
16442 /* Emit vzeroupper. */
16443
16444 void
16445 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16446 {
16447 int i;
16448
16449 /* Cancel automatic vzeroupper insertion if there are
16450 live call-saved SSE registers at the insertion point. */
16451
16452 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16453 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16454 return;
16455
16456 if (TARGET_64BIT)
16457 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16458 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16459 return;
16460
16461 emit_insn (gen_avx_vzeroupper ());
16462 }
16463
16464 /* Generate one or more insns to set ENTITY to MODE. */
16465
16466 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16467 is the set of hard registers live at the point where the insn(s)
16468 are to be inserted. */
16469
16470 static void
16471 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16472 HARD_REG_SET regs_live)
16473 {
16474 switch (entity)
16475 {
16476 case AVX_U128:
16477 if (mode == AVX_U128_CLEAN)
16478 ix86_avx_emit_vzeroupper (regs_live);
16479 break;
16480 case I387_TRUNC:
16481 case I387_FLOOR:
16482 case I387_CEIL:
16483 case I387_MASK_PM:
16484 if (mode != I387_CW_ANY
16485 && mode != I387_CW_UNINITIALIZED)
16486 emit_i387_cw_initialization (mode);
16487 break;
16488 default:
16489 gcc_unreachable ();
16490 }
16491 }
16492
16493 /* Output code for INSN to convert a float to a signed int. OPERANDS
16494 are the insn operands. The output may be [HSD]Imode and the input
16495 operand may be [SDX]Fmode. */
16496
16497 const char *
16498 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16499 {
16500 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16501 int dimode_p = GET_MODE (operands[0]) == DImode;
16502 int round_mode = get_attr_i387_cw (insn);
16503
16504 /* Jump through a hoop or two for DImode, since the hardware has no
16505 non-popping instruction. We used to do this a different way, but
16506 that was somewhat fragile and broke with post-reload splitters. */
16507 if ((dimode_p || fisttp) && !stack_top_dies)
16508 output_asm_insn ("fld\t%y1", operands);
16509
16510 gcc_assert (STACK_TOP_P (operands[1]));
16511 gcc_assert (MEM_P (operands[0]));
16512 gcc_assert (GET_MODE (operands[1]) != TFmode);
16513
16514 if (fisttp)
16515 output_asm_insn ("fisttp%Z0\t%0", operands);
16516 else
16517 {
16518 if (round_mode != I387_CW_ANY)
16519 output_asm_insn ("fldcw\t%3", operands);
16520 if (stack_top_dies || dimode_p)
16521 output_asm_insn ("fistp%Z0\t%0", operands);
16522 else
16523 output_asm_insn ("fist%Z0\t%0", operands);
16524 if (round_mode != I387_CW_ANY)
16525 output_asm_insn ("fldcw\t%2", operands);
16526 }
16527
16528 return "";
16529 }
16530
16531 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16532 have the values zero or one, indicates the ffreep insn's operand
16533 from the OPERANDS array. */
16534
16535 static const char *
16536 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16537 {
16538 if (TARGET_USE_FFREEP)
16539 #ifdef HAVE_AS_IX86_FFREEP
16540 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16541 #else
16542 {
16543 static char retval[32];
16544 int regno = REGNO (operands[opno]);
16545
16546 gcc_assert (STACK_REGNO_P (regno));
16547
16548 regno -= FIRST_STACK_REG;
16549
16550 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16551 return retval;
16552 }
16553 #endif
16554
16555 return opno ? "fstp\t%y1" : "fstp\t%y0";
16556 }
16557
16558
16559 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16560 should be used. UNORDERED_P is true when fucom should be used. */
16561
16562 const char *
16563 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16564 {
16565 int stack_top_dies;
16566 rtx cmp_op0, cmp_op1;
16567 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16568
16569 if (eflags_p)
16570 {
16571 cmp_op0 = operands[0];
16572 cmp_op1 = operands[1];
16573 }
16574 else
16575 {
16576 cmp_op0 = operands[1];
16577 cmp_op1 = operands[2];
16578 }
16579
16580 if (is_sse)
16581 {
16582 if (GET_MODE (operands[0]) == SFmode)
16583 if (unordered_p)
16584 return "%vucomiss\t{%1, %0|%0, %1}";
16585 else
16586 return "%vcomiss\t{%1, %0|%0, %1}";
16587 else
16588 if (unordered_p)
16589 return "%vucomisd\t{%1, %0|%0, %1}";
16590 else
16591 return "%vcomisd\t{%1, %0|%0, %1}";
16592 }
16593
16594 gcc_assert (STACK_TOP_P (cmp_op0));
16595
16596 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16597
16598 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16599 {
16600 if (stack_top_dies)
16601 {
16602 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16603 return output_387_ffreep (operands, 1);
16604 }
16605 else
16606 return "ftst\n\tfnstsw\t%0";
16607 }
16608
16609 if (STACK_REG_P (cmp_op1)
16610 && stack_top_dies
16611 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16612 && REGNO (cmp_op1) != FIRST_STACK_REG)
16613 {
16614 /* If both the top of the 387 stack dies, and the other operand
16615 is also a stack register that dies, then this must be a
16616 `fcompp' float compare */
16617
16618 if (eflags_p)
16619 {
16620 /* There is no double popping fcomi variant. Fortunately,
16621 eflags is immune from the fstp's cc clobbering. */
16622 if (unordered_p)
16623 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16624 else
16625 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16626 return output_387_ffreep (operands, 0);
16627 }
16628 else
16629 {
16630 if (unordered_p)
16631 return "fucompp\n\tfnstsw\t%0";
16632 else
16633 return "fcompp\n\tfnstsw\t%0";
16634 }
16635 }
16636 else
16637 {
16638 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16639
16640 static const char * const alt[16] =
16641 {
16642 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16643 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16644 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16645 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16646
16647 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16648 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16649 NULL,
16650 NULL,
16651
16652 "fcomi\t{%y1, %0|%0, %y1}",
16653 "fcomip\t{%y1, %0|%0, %y1}",
16654 "fucomi\t{%y1, %0|%0, %y1}",
16655 "fucomip\t{%y1, %0|%0, %y1}",
16656
16657 NULL,
16658 NULL,
16659 NULL,
16660 NULL
16661 };
16662
16663 int mask;
16664 const char *ret;
16665
16666 mask = eflags_p << 3;
16667 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16668 mask |= unordered_p << 1;
16669 mask |= stack_top_dies;
16670
16671 gcc_assert (mask < 16);
16672 ret = alt[mask];
16673 gcc_assert (ret);
16674
16675 return ret;
16676 }
16677 }
16678
16679 void
16680 ix86_output_addr_vec_elt (FILE *file, int value)
16681 {
16682 const char *directive = ASM_LONG;
16683
16684 #ifdef ASM_QUAD
16685 if (TARGET_LP64)
16686 directive = ASM_QUAD;
16687 #else
16688 gcc_assert (!TARGET_64BIT);
16689 #endif
16690
16691 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16692 }
16693
16694 void
16695 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16696 {
16697 const char *directive = ASM_LONG;
16698
16699 #ifdef ASM_QUAD
16700 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16701 directive = ASM_QUAD;
16702 #else
16703 gcc_assert (!TARGET_64BIT);
16704 #endif
16705 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16706 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16707 fprintf (file, "%s%s%d-%s%d\n",
16708 directive, LPREFIX, value, LPREFIX, rel);
16709 else if (HAVE_AS_GOTOFF_IN_DATA)
16710 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16711 #if TARGET_MACHO
16712 else if (TARGET_MACHO)
16713 {
16714 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16715 machopic_output_function_base_name (file);
16716 putc ('\n', file);
16717 }
16718 #endif
16719 else
16720 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16721 GOT_SYMBOL_NAME, LPREFIX, value);
16722 }
16723 \f
16724 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16725 for the target. */
16726
16727 void
16728 ix86_expand_clear (rtx dest)
16729 {
16730 rtx tmp;
16731
16732 /* We play register width games, which are only valid after reload. */
16733 gcc_assert (reload_completed);
16734
16735 /* Avoid HImode and its attendant prefix byte. */
16736 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16737 dest = gen_rtx_REG (SImode, REGNO (dest));
16738 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16739
16740 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16741 {
16742 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16743 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16744 }
16745
16746 emit_insn (tmp);
16747 }
16748
16749 /* X is an unchanging MEM. If it is a constant pool reference, return
16750 the constant pool rtx, else NULL. */
16751
16752 rtx
16753 maybe_get_pool_constant (rtx x)
16754 {
16755 x = ix86_delegitimize_address (XEXP (x, 0));
16756
16757 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16758 return get_pool_constant (x);
16759
16760 return NULL_RTX;
16761 }
16762
16763 void
16764 ix86_expand_move (enum machine_mode mode, rtx operands[])
16765 {
16766 rtx op0, op1;
16767 enum tls_model model;
16768
16769 op0 = operands[0];
16770 op1 = operands[1];
16771
16772 if (GET_CODE (op1) == SYMBOL_REF)
16773 {
16774 rtx tmp;
16775
16776 model = SYMBOL_REF_TLS_MODEL (op1);
16777 if (model)
16778 {
16779 op1 = legitimize_tls_address (op1, model, true);
16780 op1 = force_operand (op1, op0);
16781 if (op1 == op0)
16782 return;
16783 op1 = convert_to_mode (mode, op1, 1);
16784 }
16785 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16786 op1 = tmp;
16787 }
16788 else if (GET_CODE (op1) == CONST
16789 && GET_CODE (XEXP (op1, 0)) == PLUS
16790 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16791 {
16792 rtx addend = XEXP (XEXP (op1, 0), 1);
16793 rtx symbol = XEXP (XEXP (op1, 0), 0);
16794 rtx tmp;
16795
16796 model = SYMBOL_REF_TLS_MODEL (symbol);
16797 if (model)
16798 tmp = legitimize_tls_address (symbol, model, true);
16799 else
16800 tmp = legitimize_pe_coff_symbol (symbol, true);
16801
16802 if (tmp)
16803 {
16804 tmp = force_operand (tmp, NULL);
16805 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16806 op0, 1, OPTAB_DIRECT);
16807 if (tmp == op0)
16808 return;
16809 op1 = convert_to_mode (mode, tmp, 1);
16810 }
16811 }
16812
16813 if ((flag_pic || MACHOPIC_INDIRECT)
16814 && symbolic_operand (op1, mode))
16815 {
16816 if (TARGET_MACHO && !TARGET_64BIT)
16817 {
16818 #if TARGET_MACHO
16819 /* dynamic-no-pic */
16820 if (MACHOPIC_INDIRECT)
16821 {
16822 rtx temp = ((reload_in_progress
16823 || ((op0 && REG_P (op0))
16824 && mode == Pmode))
16825 ? op0 : gen_reg_rtx (Pmode));
16826 op1 = machopic_indirect_data_reference (op1, temp);
16827 if (MACHOPIC_PURE)
16828 op1 = machopic_legitimize_pic_address (op1, mode,
16829 temp == op1 ? 0 : temp);
16830 }
16831 if (op0 != op1 && GET_CODE (op0) != MEM)
16832 {
16833 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16834 emit_insn (insn);
16835 return;
16836 }
16837 if (GET_CODE (op0) == MEM)
16838 op1 = force_reg (Pmode, op1);
16839 else
16840 {
16841 rtx temp = op0;
16842 if (GET_CODE (temp) != REG)
16843 temp = gen_reg_rtx (Pmode);
16844 temp = legitimize_pic_address (op1, temp);
16845 if (temp == op0)
16846 return;
16847 op1 = temp;
16848 }
16849 /* dynamic-no-pic */
16850 #endif
16851 }
16852 else
16853 {
16854 if (MEM_P (op0))
16855 op1 = force_reg (mode, op1);
16856 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16857 {
16858 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16859 op1 = legitimize_pic_address (op1, reg);
16860 if (op0 == op1)
16861 return;
16862 op1 = convert_to_mode (mode, op1, 1);
16863 }
16864 }
16865 }
16866 else
16867 {
16868 if (MEM_P (op0)
16869 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16870 || !push_operand (op0, mode))
16871 && MEM_P (op1))
16872 op1 = force_reg (mode, op1);
16873
16874 if (push_operand (op0, mode)
16875 && ! general_no_elim_operand (op1, mode))
16876 op1 = copy_to_mode_reg (mode, op1);
16877
16878 /* Force large constants in 64bit compilation into register
16879 to get them CSEed. */
16880 if (can_create_pseudo_p ()
16881 && (mode == DImode) && TARGET_64BIT
16882 && immediate_operand (op1, mode)
16883 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16884 && !register_operand (op0, mode)
16885 && optimize)
16886 op1 = copy_to_mode_reg (mode, op1);
16887
16888 if (can_create_pseudo_p ()
16889 && FLOAT_MODE_P (mode)
16890 && GET_CODE (op1) == CONST_DOUBLE)
16891 {
16892 /* If we are loading a floating point constant to a register,
16893 force the value to memory now, since we'll get better code
16894 out the back end. */
16895
16896 op1 = validize_mem (force_const_mem (mode, op1));
16897 if (!register_operand (op0, mode))
16898 {
16899 rtx temp = gen_reg_rtx (mode);
16900 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16901 emit_move_insn (op0, temp);
16902 return;
16903 }
16904 }
16905 }
16906
16907 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16908 }
16909
16910 void
16911 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16912 {
16913 rtx op0 = operands[0], op1 = operands[1];
16914 unsigned int align = GET_MODE_ALIGNMENT (mode);
16915
16916 if (push_operand (op0, VOIDmode))
16917 op0 = emit_move_resolve_push (mode, op0);
16918
16919 /* Force constants other than zero into memory. We do not know how
16920 the instructions used to build constants modify the upper 64 bits
16921 of the register, once we have that information we may be able
16922 to handle some of them more efficiently. */
16923 if (can_create_pseudo_p ()
16924 && register_operand (op0, mode)
16925 && (CONSTANT_P (op1)
16926 || (GET_CODE (op1) == SUBREG
16927 && CONSTANT_P (SUBREG_REG (op1))))
16928 && !standard_sse_constant_p (op1))
16929 op1 = validize_mem (force_const_mem (mode, op1));
16930
16931 /* We need to check memory alignment for SSE mode since attribute
16932 can make operands unaligned. */
16933 if (can_create_pseudo_p ()
16934 && SSE_REG_MODE_P (mode)
16935 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16936 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16937 {
16938 rtx tmp[2];
16939
16940 /* ix86_expand_vector_move_misalign() does not like constants ... */
16941 if (CONSTANT_P (op1)
16942 || (GET_CODE (op1) == SUBREG
16943 && CONSTANT_P (SUBREG_REG (op1))))
16944 op1 = validize_mem (force_const_mem (mode, op1));
16945
16946 /* ... nor both arguments in memory. */
16947 if (!register_operand (op0, mode)
16948 && !register_operand (op1, mode))
16949 op1 = force_reg (mode, op1);
16950
16951 tmp[0] = op0; tmp[1] = op1;
16952 ix86_expand_vector_move_misalign (mode, tmp);
16953 return;
16954 }
16955
16956 /* Make operand1 a register if it isn't already. */
16957 if (can_create_pseudo_p ()
16958 && !register_operand (op0, mode)
16959 && !register_operand (op1, mode))
16960 {
16961 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16962 return;
16963 }
16964
16965 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16966 }
16967
16968 /* Split 32-byte AVX unaligned load and store if needed. */
16969
16970 static void
16971 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16972 {
16973 rtx m;
16974 rtx (*extract) (rtx, rtx, rtx);
16975 rtx (*load_unaligned) (rtx, rtx);
16976 rtx (*store_unaligned) (rtx, rtx);
16977 enum machine_mode mode;
16978
16979 switch (GET_MODE (op0))
16980 {
16981 default:
16982 gcc_unreachable ();
16983 case V32QImode:
16984 extract = gen_avx_vextractf128v32qi;
16985 load_unaligned = gen_avx_loaddquv32qi;
16986 store_unaligned = gen_avx_storedquv32qi;
16987 mode = V16QImode;
16988 break;
16989 case V8SFmode:
16990 extract = gen_avx_vextractf128v8sf;
16991 load_unaligned = gen_avx_loadups256;
16992 store_unaligned = gen_avx_storeups256;
16993 mode = V4SFmode;
16994 break;
16995 case V4DFmode:
16996 extract = gen_avx_vextractf128v4df;
16997 load_unaligned = gen_avx_loadupd256;
16998 store_unaligned = gen_avx_storeupd256;
16999 mode = V2DFmode;
17000 break;
17001 }
17002
17003 if (MEM_P (op1))
17004 {
17005 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17006 {
17007 rtx r = gen_reg_rtx (mode);
17008 m = adjust_address (op1, mode, 0);
17009 emit_move_insn (r, m);
17010 m = adjust_address (op1, mode, 16);
17011 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17012 emit_move_insn (op0, r);
17013 }
17014 /* Normal *mov<mode>_internal pattern will handle
17015 unaligned loads just fine if misaligned_operand
17016 is true, and without the UNSPEC it can be combined
17017 with arithmetic instructions. */
17018 else if (misaligned_operand (op1, GET_MODE (op1)))
17019 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17020 else
17021 emit_insn (load_unaligned (op0, op1));
17022 }
17023 else if (MEM_P (op0))
17024 {
17025 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17026 {
17027 m = adjust_address (op0, mode, 0);
17028 emit_insn (extract (m, op1, const0_rtx));
17029 m = adjust_address (op0, mode, 16);
17030 emit_insn (extract (m, op1, const1_rtx));
17031 }
17032 else
17033 emit_insn (store_unaligned (op0, op1));
17034 }
17035 else
17036 gcc_unreachable ();
17037 }
17038
17039 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17040 straight to ix86_expand_vector_move. */
17041 /* Code generation for scalar reg-reg moves of single and double precision data:
17042 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17043 movaps reg, reg
17044 else
17045 movss reg, reg
17046 if (x86_sse_partial_reg_dependency == true)
17047 movapd reg, reg
17048 else
17049 movsd reg, reg
17050
17051 Code generation for scalar loads of double precision data:
17052 if (x86_sse_split_regs == true)
17053 movlpd mem, reg (gas syntax)
17054 else
17055 movsd mem, reg
17056
17057 Code generation for unaligned packed loads of single precision data
17058 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17059 if (x86_sse_unaligned_move_optimal)
17060 movups mem, reg
17061
17062 if (x86_sse_partial_reg_dependency == true)
17063 {
17064 xorps reg, reg
17065 movlps mem, reg
17066 movhps mem+8, reg
17067 }
17068 else
17069 {
17070 movlps mem, reg
17071 movhps mem+8, reg
17072 }
17073
17074 Code generation for unaligned packed loads of double precision data
17075 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17076 if (x86_sse_unaligned_move_optimal)
17077 movupd mem, reg
17078
17079 if (x86_sse_split_regs == true)
17080 {
17081 movlpd mem, reg
17082 movhpd mem+8, reg
17083 }
17084 else
17085 {
17086 movsd mem, reg
17087 movhpd mem+8, reg
17088 }
17089 */
17090
17091 void
17092 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17093 {
17094 rtx op0, op1, orig_op0 = NULL_RTX, m;
17095 rtx (*load_unaligned) (rtx, rtx);
17096 rtx (*store_unaligned) (rtx, rtx);
17097
17098 op0 = operands[0];
17099 op1 = operands[1];
17100
17101 if (GET_MODE_SIZE (mode) == 64)
17102 {
17103 switch (GET_MODE_CLASS (mode))
17104 {
17105 case MODE_VECTOR_INT:
17106 case MODE_INT:
17107 if (GET_MODE (op0) != V16SImode)
17108 {
17109 if (!MEM_P (op0))
17110 {
17111 orig_op0 = op0;
17112 op0 = gen_reg_rtx (V16SImode);
17113 }
17114 else
17115 op0 = gen_lowpart (V16SImode, op0);
17116 }
17117 op1 = gen_lowpart (V16SImode, op1);
17118 /* FALLTHRU */
17119
17120 case MODE_VECTOR_FLOAT:
17121 switch (GET_MODE (op0))
17122 {
17123 default:
17124 gcc_unreachable ();
17125 case V16SImode:
17126 load_unaligned = gen_avx512f_loaddquv16si;
17127 store_unaligned = gen_avx512f_storedquv16si;
17128 break;
17129 case V16SFmode:
17130 load_unaligned = gen_avx512f_loadups512;
17131 store_unaligned = gen_avx512f_storeups512;
17132 break;
17133 case V8DFmode:
17134 load_unaligned = gen_avx512f_loadupd512;
17135 store_unaligned = gen_avx512f_storeupd512;
17136 break;
17137 }
17138
17139 if (MEM_P (op1))
17140 emit_insn (load_unaligned (op0, op1));
17141 else if (MEM_P (op0))
17142 emit_insn (store_unaligned (op0, op1));
17143 else
17144 gcc_unreachable ();
17145 if (orig_op0)
17146 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17147 break;
17148
17149 default:
17150 gcc_unreachable ();
17151 }
17152
17153 return;
17154 }
17155
17156 if (TARGET_AVX
17157 && GET_MODE_SIZE (mode) == 32)
17158 {
17159 switch (GET_MODE_CLASS (mode))
17160 {
17161 case MODE_VECTOR_INT:
17162 case MODE_INT:
17163 if (GET_MODE (op0) != V32QImode)
17164 {
17165 if (!MEM_P (op0))
17166 {
17167 orig_op0 = op0;
17168 op0 = gen_reg_rtx (V32QImode);
17169 }
17170 else
17171 op0 = gen_lowpart (V32QImode, op0);
17172 }
17173 op1 = gen_lowpart (V32QImode, op1);
17174 /* FALLTHRU */
17175
17176 case MODE_VECTOR_FLOAT:
17177 ix86_avx256_split_vector_move_misalign (op0, op1);
17178 if (orig_op0)
17179 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17180 break;
17181
17182 default:
17183 gcc_unreachable ();
17184 }
17185
17186 return;
17187 }
17188
17189 if (MEM_P (op1))
17190 {
17191 /* Normal *mov<mode>_internal pattern will handle
17192 unaligned loads just fine if misaligned_operand
17193 is true, and without the UNSPEC it can be combined
17194 with arithmetic instructions. */
17195 if (TARGET_AVX
17196 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17197 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17198 && misaligned_operand (op1, GET_MODE (op1)))
17199 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17200 /* ??? If we have typed data, then it would appear that using
17201 movdqu is the only way to get unaligned data loaded with
17202 integer type. */
17203 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17204 {
17205 if (GET_MODE (op0) != V16QImode)
17206 {
17207 orig_op0 = op0;
17208 op0 = gen_reg_rtx (V16QImode);
17209 }
17210 op1 = gen_lowpart (V16QImode, op1);
17211 /* We will eventually emit movups based on insn attributes. */
17212 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17213 if (orig_op0)
17214 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17215 }
17216 else if (TARGET_SSE2 && mode == V2DFmode)
17217 {
17218 rtx zero;
17219
17220 if (TARGET_AVX
17221 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17222 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17223 || optimize_insn_for_size_p ())
17224 {
17225 /* We will eventually emit movups based on insn attributes. */
17226 emit_insn (gen_sse2_loadupd (op0, op1));
17227 return;
17228 }
17229
17230 /* When SSE registers are split into halves, we can avoid
17231 writing to the top half twice. */
17232 if (TARGET_SSE_SPLIT_REGS)
17233 {
17234 emit_clobber (op0);
17235 zero = op0;
17236 }
17237 else
17238 {
17239 /* ??? Not sure about the best option for the Intel chips.
17240 The following would seem to satisfy; the register is
17241 entirely cleared, breaking the dependency chain. We
17242 then store to the upper half, with a dependency depth
17243 of one. A rumor has it that Intel recommends two movsd
17244 followed by an unpacklpd, but this is unconfirmed. And
17245 given that the dependency depth of the unpacklpd would
17246 still be one, I'm not sure why this would be better. */
17247 zero = CONST0_RTX (V2DFmode);
17248 }
17249
17250 m = adjust_address (op1, DFmode, 0);
17251 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17252 m = adjust_address (op1, DFmode, 8);
17253 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17254 }
17255 else
17256 {
17257 rtx t;
17258
17259 if (TARGET_AVX
17260 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17261 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17262 || optimize_insn_for_size_p ())
17263 {
17264 if (GET_MODE (op0) != V4SFmode)
17265 {
17266 orig_op0 = op0;
17267 op0 = gen_reg_rtx (V4SFmode);
17268 }
17269 op1 = gen_lowpart (V4SFmode, op1);
17270 emit_insn (gen_sse_loadups (op0, op1));
17271 if (orig_op0)
17272 emit_move_insn (orig_op0,
17273 gen_lowpart (GET_MODE (orig_op0), op0));
17274 return;
17275 }
17276
17277 if (mode != V4SFmode)
17278 t = gen_reg_rtx (V4SFmode);
17279 else
17280 t = op0;
17281
17282 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17283 emit_move_insn (t, CONST0_RTX (V4SFmode));
17284 else
17285 emit_clobber (t);
17286
17287 m = adjust_address (op1, V2SFmode, 0);
17288 emit_insn (gen_sse_loadlps (t, t, m));
17289 m = adjust_address (op1, V2SFmode, 8);
17290 emit_insn (gen_sse_loadhps (t, t, m));
17291 if (mode != V4SFmode)
17292 emit_move_insn (op0, gen_lowpart (mode, t));
17293 }
17294 }
17295 else if (MEM_P (op0))
17296 {
17297 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17298 {
17299 op0 = gen_lowpart (V16QImode, op0);
17300 op1 = gen_lowpart (V16QImode, op1);
17301 /* We will eventually emit movups based on insn attributes. */
17302 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17303 }
17304 else if (TARGET_SSE2 && mode == V2DFmode)
17305 {
17306 if (TARGET_AVX
17307 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17308 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17309 || optimize_insn_for_size_p ())
17310 /* We will eventually emit movups based on insn attributes. */
17311 emit_insn (gen_sse2_storeupd (op0, op1));
17312 else
17313 {
17314 m = adjust_address (op0, DFmode, 0);
17315 emit_insn (gen_sse2_storelpd (m, op1));
17316 m = adjust_address (op0, DFmode, 8);
17317 emit_insn (gen_sse2_storehpd (m, op1));
17318 }
17319 }
17320 else
17321 {
17322 if (mode != V4SFmode)
17323 op1 = gen_lowpart (V4SFmode, op1);
17324
17325 if (TARGET_AVX
17326 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17327 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17328 || optimize_insn_for_size_p ())
17329 {
17330 op0 = gen_lowpart (V4SFmode, op0);
17331 emit_insn (gen_sse_storeups (op0, op1));
17332 }
17333 else
17334 {
17335 m = adjust_address (op0, V2SFmode, 0);
17336 emit_insn (gen_sse_storelps (m, op1));
17337 m = adjust_address (op0, V2SFmode, 8);
17338 emit_insn (gen_sse_storehps (m, op1));
17339 }
17340 }
17341 }
17342 else
17343 gcc_unreachable ();
17344 }
17345
17346 /* Helper function of ix86_fixup_binary_operands to canonicalize
17347 operand order. Returns true if the operands should be swapped. */
17348
17349 static bool
17350 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17351 rtx operands[])
17352 {
17353 rtx dst = operands[0];
17354 rtx src1 = operands[1];
17355 rtx src2 = operands[2];
17356
17357 /* If the operation is not commutative, we can't do anything. */
17358 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17359 return false;
17360
17361 /* Highest priority is that src1 should match dst. */
17362 if (rtx_equal_p (dst, src1))
17363 return false;
17364 if (rtx_equal_p (dst, src2))
17365 return true;
17366
17367 /* Next highest priority is that immediate constants come second. */
17368 if (immediate_operand (src2, mode))
17369 return false;
17370 if (immediate_operand (src1, mode))
17371 return true;
17372
17373 /* Lowest priority is that memory references should come second. */
17374 if (MEM_P (src2))
17375 return false;
17376 if (MEM_P (src1))
17377 return true;
17378
17379 return false;
17380 }
17381
17382
17383 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17384 destination to use for the operation. If different from the true
17385 destination in operands[0], a copy operation will be required. */
17386
17387 rtx
17388 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17389 rtx operands[])
17390 {
17391 rtx dst = operands[0];
17392 rtx src1 = operands[1];
17393 rtx src2 = operands[2];
17394
17395 /* Canonicalize operand order. */
17396 if (ix86_swap_binary_operands_p (code, mode, operands))
17397 {
17398 rtx temp;
17399
17400 /* It is invalid to swap operands of different modes. */
17401 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17402
17403 temp = src1;
17404 src1 = src2;
17405 src2 = temp;
17406 }
17407
17408 /* Both source operands cannot be in memory. */
17409 if (MEM_P (src1) && MEM_P (src2))
17410 {
17411 /* Optimization: Only read from memory once. */
17412 if (rtx_equal_p (src1, src2))
17413 {
17414 src2 = force_reg (mode, src2);
17415 src1 = src2;
17416 }
17417 else if (rtx_equal_p (dst, src1))
17418 src2 = force_reg (mode, src2);
17419 else
17420 src1 = force_reg (mode, src1);
17421 }
17422
17423 /* If the destination is memory, and we do not have matching source
17424 operands, do things in registers. */
17425 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17426 dst = gen_reg_rtx (mode);
17427
17428 /* Source 1 cannot be a constant. */
17429 if (CONSTANT_P (src1))
17430 src1 = force_reg (mode, src1);
17431
17432 /* Source 1 cannot be a non-matching memory. */
17433 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17434 src1 = force_reg (mode, src1);
17435
17436 /* Improve address combine. */
17437 if (code == PLUS
17438 && GET_MODE_CLASS (mode) == MODE_INT
17439 && MEM_P (src2))
17440 src2 = force_reg (mode, src2);
17441
17442 operands[1] = src1;
17443 operands[2] = src2;
17444 return dst;
17445 }
17446
17447 /* Similarly, but assume that the destination has already been
17448 set up properly. */
17449
17450 void
17451 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17452 enum machine_mode mode, rtx operands[])
17453 {
17454 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17455 gcc_assert (dst == operands[0]);
17456 }
17457
17458 /* Attempt to expand a binary operator. Make the expansion closer to the
17459 actual machine, then just general_operand, which will allow 3 separate
17460 memory references (one output, two input) in a single insn. */
17461
17462 void
17463 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17464 rtx operands[])
17465 {
17466 rtx src1, src2, dst, op, clob;
17467
17468 dst = ix86_fixup_binary_operands (code, mode, operands);
17469 src1 = operands[1];
17470 src2 = operands[2];
17471
17472 /* Emit the instruction. */
17473
17474 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17475 if (reload_in_progress)
17476 {
17477 /* Reload doesn't know about the flags register, and doesn't know that
17478 it doesn't want to clobber it. We can only do this with PLUS. */
17479 gcc_assert (code == PLUS);
17480 emit_insn (op);
17481 }
17482 else if (reload_completed
17483 && code == PLUS
17484 && !rtx_equal_p (dst, src1))
17485 {
17486 /* This is going to be an LEA; avoid splitting it later. */
17487 emit_insn (op);
17488 }
17489 else
17490 {
17491 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17492 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17493 }
17494
17495 /* Fix up the destination if needed. */
17496 if (dst != operands[0])
17497 emit_move_insn (operands[0], dst);
17498 }
17499
17500 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17501 the given OPERANDS. */
17502
17503 void
17504 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17505 rtx operands[])
17506 {
17507 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17508 if (GET_CODE (operands[1]) == SUBREG)
17509 {
17510 op1 = operands[1];
17511 op2 = operands[2];
17512 }
17513 else if (GET_CODE (operands[2]) == SUBREG)
17514 {
17515 op1 = operands[2];
17516 op2 = operands[1];
17517 }
17518 /* Optimize (__m128i) d | (__m128i) e and similar code
17519 when d and e are float vectors into float vector logical
17520 insn. In C/C++ without using intrinsics there is no other way
17521 to express vector logical operation on float vectors than
17522 to cast them temporarily to integer vectors. */
17523 if (op1
17524 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17525 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17526 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17527 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17528 && SUBREG_BYTE (op1) == 0
17529 && (GET_CODE (op2) == CONST_VECTOR
17530 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17531 && SUBREG_BYTE (op2) == 0))
17532 && can_create_pseudo_p ())
17533 {
17534 rtx dst;
17535 switch (GET_MODE (SUBREG_REG (op1)))
17536 {
17537 case V4SFmode:
17538 case V8SFmode:
17539 case V2DFmode:
17540 case V4DFmode:
17541 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17542 if (GET_CODE (op2) == CONST_VECTOR)
17543 {
17544 op2 = gen_lowpart (GET_MODE (dst), op2);
17545 op2 = force_reg (GET_MODE (dst), op2);
17546 }
17547 else
17548 {
17549 op1 = operands[1];
17550 op2 = SUBREG_REG (operands[2]);
17551 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17552 op2 = force_reg (GET_MODE (dst), op2);
17553 }
17554 op1 = SUBREG_REG (op1);
17555 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17556 op1 = force_reg (GET_MODE (dst), op1);
17557 emit_insn (gen_rtx_SET (VOIDmode, dst,
17558 gen_rtx_fmt_ee (code, GET_MODE (dst),
17559 op1, op2)));
17560 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17561 return;
17562 default:
17563 break;
17564 }
17565 }
17566 if (!nonimmediate_operand (operands[1], mode))
17567 operands[1] = force_reg (mode, operands[1]);
17568 if (!nonimmediate_operand (operands[2], mode))
17569 operands[2] = force_reg (mode, operands[2]);
17570 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17571 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17572 gen_rtx_fmt_ee (code, mode, operands[1],
17573 operands[2])));
17574 }
17575
17576 /* Return TRUE or FALSE depending on whether the binary operator meets the
17577 appropriate constraints. */
17578
17579 bool
17580 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17581 rtx operands[3])
17582 {
17583 rtx dst = operands[0];
17584 rtx src1 = operands[1];
17585 rtx src2 = operands[2];
17586
17587 /* Both source operands cannot be in memory. */
17588 if (MEM_P (src1) && MEM_P (src2))
17589 return false;
17590
17591 /* Canonicalize operand order for commutative operators. */
17592 if (ix86_swap_binary_operands_p (code, mode, operands))
17593 {
17594 rtx temp = src1;
17595 src1 = src2;
17596 src2 = temp;
17597 }
17598
17599 /* If the destination is memory, we must have a matching source operand. */
17600 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17601 return false;
17602
17603 /* Source 1 cannot be a constant. */
17604 if (CONSTANT_P (src1))
17605 return false;
17606
17607 /* Source 1 cannot be a non-matching memory. */
17608 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17609 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17610 return (code == AND
17611 && (mode == HImode
17612 || mode == SImode
17613 || (TARGET_64BIT && mode == DImode))
17614 && satisfies_constraint_L (src2));
17615
17616 return true;
17617 }
17618
17619 /* Attempt to expand a unary operator. Make the expansion closer to the
17620 actual machine, then just general_operand, which will allow 2 separate
17621 memory references (one output, one input) in a single insn. */
17622
17623 void
17624 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17625 rtx operands[])
17626 {
17627 int matching_memory;
17628 rtx src, dst, op, clob;
17629
17630 dst = operands[0];
17631 src = operands[1];
17632
17633 /* If the destination is memory, and we do not have matching source
17634 operands, do things in registers. */
17635 matching_memory = 0;
17636 if (MEM_P (dst))
17637 {
17638 if (rtx_equal_p (dst, src))
17639 matching_memory = 1;
17640 else
17641 dst = gen_reg_rtx (mode);
17642 }
17643
17644 /* When source operand is memory, destination must match. */
17645 if (MEM_P (src) && !matching_memory)
17646 src = force_reg (mode, src);
17647
17648 /* Emit the instruction. */
17649
17650 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17651 if (reload_in_progress || code == NOT)
17652 {
17653 /* Reload doesn't know about the flags register, and doesn't know that
17654 it doesn't want to clobber it. */
17655 gcc_assert (code == NOT);
17656 emit_insn (op);
17657 }
17658 else
17659 {
17660 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17661 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17662 }
17663
17664 /* Fix up the destination if needed. */
17665 if (dst != operands[0])
17666 emit_move_insn (operands[0], dst);
17667 }
17668
17669 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17670 divisor are within the range [0-255]. */
17671
17672 void
17673 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17674 bool signed_p)
17675 {
17676 rtx_code_label *end_label, *qimode_label;
17677 rtx insn, div, mod;
17678 rtx scratch, tmp0, tmp1, tmp2;
17679 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17680 rtx (*gen_zero_extend) (rtx, rtx);
17681 rtx (*gen_test_ccno_1) (rtx, rtx);
17682
17683 switch (mode)
17684 {
17685 case SImode:
17686 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17687 gen_test_ccno_1 = gen_testsi_ccno_1;
17688 gen_zero_extend = gen_zero_extendqisi2;
17689 break;
17690 case DImode:
17691 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17692 gen_test_ccno_1 = gen_testdi_ccno_1;
17693 gen_zero_extend = gen_zero_extendqidi2;
17694 break;
17695 default:
17696 gcc_unreachable ();
17697 }
17698
17699 end_label = gen_label_rtx ();
17700 qimode_label = gen_label_rtx ();
17701
17702 scratch = gen_reg_rtx (mode);
17703
17704 /* Use 8bit unsigned divimod if dividend and divisor are within
17705 the range [0-255]. */
17706 emit_move_insn (scratch, operands[2]);
17707 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17708 scratch, 1, OPTAB_DIRECT);
17709 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17710 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17711 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17712 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17713 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17714 pc_rtx);
17715 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17716 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17717 JUMP_LABEL (insn) = qimode_label;
17718
17719 /* Generate original signed/unsigned divimod. */
17720 div = gen_divmod4_1 (operands[0], operands[1],
17721 operands[2], operands[3]);
17722 emit_insn (div);
17723
17724 /* Branch to the end. */
17725 emit_jump_insn (gen_jump (end_label));
17726 emit_barrier ();
17727
17728 /* Generate 8bit unsigned divide. */
17729 emit_label (qimode_label);
17730 /* Don't use operands[0] for result of 8bit divide since not all
17731 registers support QImode ZERO_EXTRACT. */
17732 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17733 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17734 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17735 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17736
17737 if (signed_p)
17738 {
17739 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17740 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17741 }
17742 else
17743 {
17744 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17745 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17746 }
17747
17748 /* Extract remainder from AH. */
17749 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17750 if (REG_P (operands[1]))
17751 insn = emit_move_insn (operands[1], tmp1);
17752 else
17753 {
17754 /* Need a new scratch register since the old one has result
17755 of 8bit divide. */
17756 scratch = gen_reg_rtx (mode);
17757 emit_move_insn (scratch, tmp1);
17758 insn = emit_move_insn (operands[1], scratch);
17759 }
17760 set_unique_reg_note (insn, REG_EQUAL, mod);
17761
17762 /* Zero extend quotient from AL. */
17763 tmp1 = gen_lowpart (QImode, tmp0);
17764 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17765 set_unique_reg_note (insn, REG_EQUAL, div);
17766
17767 emit_label (end_label);
17768 }
17769
17770 /* Whether it is OK to emit CFI directives when emitting asm code. */
17771
17772 bool
17773 ix86_emit_cfi ()
17774 {
17775 return dwarf2out_do_cfi_asm ();
17776 }
17777
17778 #define LEA_MAX_STALL (3)
17779 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17780
17781 /* Increase given DISTANCE in half-cycles according to
17782 dependencies between PREV and NEXT instructions.
17783 Add 1 half-cycle if there is no dependency and
17784 go to next cycle if there is some dependecy. */
17785
17786 static unsigned int
17787 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17788 {
17789 df_ref def, use;
17790
17791 if (!prev || !next)
17792 return distance + (distance & 1) + 2;
17793
17794 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17795 return distance + 1;
17796
17797 FOR_EACH_INSN_USE (use, next)
17798 FOR_EACH_INSN_DEF (def, prev)
17799 if (!DF_REF_IS_ARTIFICIAL (def)
17800 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17801 return distance + (distance & 1) + 2;
17802
17803 return distance + 1;
17804 }
17805
17806 /* Function checks if instruction INSN defines register number
17807 REGNO1 or REGNO2. */
17808
17809 static bool
17810 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17811 rtx insn)
17812 {
17813 df_ref def;
17814
17815 FOR_EACH_INSN_DEF (def, insn)
17816 if (DF_REF_REG_DEF_P (def)
17817 && !DF_REF_IS_ARTIFICIAL (def)
17818 && (regno1 == DF_REF_REGNO (def)
17819 || regno2 == DF_REF_REGNO (def)))
17820 return true;
17821
17822 return false;
17823 }
17824
17825 /* Function checks if instruction INSN uses register number
17826 REGNO as a part of address expression. */
17827
17828 static bool
17829 insn_uses_reg_mem (unsigned int regno, rtx insn)
17830 {
17831 df_ref use;
17832
17833 FOR_EACH_INSN_USE (use, insn)
17834 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17835 return true;
17836
17837 return false;
17838 }
17839
17840 /* Search backward for non-agu definition of register number REGNO1
17841 or register number REGNO2 in basic block starting from instruction
17842 START up to head of basic block or instruction INSN.
17843
17844 Function puts true value into *FOUND var if definition was found
17845 and false otherwise.
17846
17847 Distance in half-cycles between START and found instruction or head
17848 of BB is added to DISTANCE and returned. */
17849
17850 static int
17851 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17852 rtx_insn *insn, int distance,
17853 rtx_insn *start, bool *found)
17854 {
17855 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17856 rtx_insn *prev = start;
17857 rtx_insn *next = NULL;
17858
17859 *found = false;
17860
17861 while (prev
17862 && prev != insn
17863 && distance < LEA_SEARCH_THRESHOLD)
17864 {
17865 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17866 {
17867 distance = increase_distance (prev, next, distance);
17868 if (insn_defines_reg (regno1, regno2, prev))
17869 {
17870 if (recog_memoized (prev) < 0
17871 || get_attr_type (prev) != TYPE_LEA)
17872 {
17873 *found = true;
17874 return distance;
17875 }
17876 }
17877
17878 next = prev;
17879 }
17880 if (prev == BB_HEAD (bb))
17881 break;
17882
17883 prev = PREV_INSN (prev);
17884 }
17885
17886 return distance;
17887 }
17888
17889 /* Search backward for non-agu definition of register number REGNO1
17890 or register number REGNO2 in INSN's basic block until
17891 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17892 2. Reach neighbour BBs boundary, or
17893 3. Reach agu definition.
17894 Returns the distance between the non-agu definition point and INSN.
17895 If no definition point, returns -1. */
17896
17897 static int
17898 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17899 rtx_insn *insn)
17900 {
17901 basic_block bb = BLOCK_FOR_INSN (insn);
17902 int distance = 0;
17903 bool found = false;
17904
17905 if (insn != BB_HEAD (bb))
17906 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17907 distance, PREV_INSN (insn),
17908 &found);
17909
17910 if (!found && distance < LEA_SEARCH_THRESHOLD)
17911 {
17912 edge e;
17913 edge_iterator ei;
17914 bool simple_loop = false;
17915
17916 FOR_EACH_EDGE (e, ei, bb->preds)
17917 if (e->src == bb)
17918 {
17919 simple_loop = true;
17920 break;
17921 }
17922
17923 if (simple_loop)
17924 distance = distance_non_agu_define_in_bb (regno1, regno2,
17925 insn, distance,
17926 BB_END (bb), &found);
17927 else
17928 {
17929 int shortest_dist = -1;
17930 bool found_in_bb = false;
17931
17932 FOR_EACH_EDGE (e, ei, bb->preds)
17933 {
17934 int bb_dist
17935 = distance_non_agu_define_in_bb (regno1, regno2,
17936 insn, distance,
17937 BB_END (e->src),
17938 &found_in_bb);
17939 if (found_in_bb)
17940 {
17941 if (shortest_dist < 0)
17942 shortest_dist = bb_dist;
17943 else if (bb_dist > 0)
17944 shortest_dist = MIN (bb_dist, shortest_dist);
17945
17946 found = true;
17947 }
17948 }
17949
17950 distance = shortest_dist;
17951 }
17952 }
17953
17954 /* get_attr_type may modify recog data. We want to make sure
17955 that recog data is valid for instruction INSN, on which
17956 distance_non_agu_define is called. INSN is unchanged here. */
17957 extract_insn_cached (insn);
17958
17959 if (!found)
17960 return -1;
17961
17962 return distance >> 1;
17963 }
17964
17965 /* Return the distance in half-cycles between INSN and the next
17966 insn that uses register number REGNO in memory address added
17967 to DISTANCE. Return -1 if REGNO0 is set.
17968
17969 Put true value into *FOUND if register usage was found and
17970 false otherwise.
17971 Put true value into *REDEFINED if register redefinition was
17972 found and false otherwise. */
17973
17974 static int
17975 distance_agu_use_in_bb (unsigned int regno,
17976 rtx_insn *insn, int distance, rtx_insn *start,
17977 bool *found, bool *redefined)
17978 {
17979 basic_block bb = NULL;
17980 rtx_insn *next = start;
17981 rtx_insn *prev = NULL;
17982
17983 *found = false;
17984 *redefined = false;
17985
17986 if (start != NULL_RTX)
17987 {
17988 bb = BLOCK_FOR_INSN (start);
17989 if (start != BB_HEAD (bb))
17990 /* If insn and start belong to the same bb, set prev to insn,
17991 so the call to increase_distance will increase the distance
17992 between insns by 1. */
17993 prev = insn;
17994 }
17995
17996 while (next
17997 && next != insn
17998 && distance < LEA_SEARCH_THRESHOLD)
17999 {
18000 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18001 {
18002 distance = increase_distance(prev, next, distance);
18003 if (insn_uses_reg_mem (regno, next))
18004 {
18005 /* Return DISTANCE if OP0 is used in memory
18006 address in NEXT. */
18007 *found = true;
18008 return distance;
18009 }
18010
18011 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18012 {
18013 /* Return -1 if OP0 is set in NEXT. */
18014 *redefined = true;
18015 return -1;
18016 }
18017
18018 prev = next;
18019 }
18020
18021 if (next == BB_END (bb))
18022 break;
18023
18024 next = NEXT_INSN (next);
18025 }
18026
18027 return distance;
18028 }
18029
18030 /* Return the distance between INSN and the next insn that uses
18031 register number REGNO0 in memory address. Return -1 if no such
18032 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18033
18034 static int
18035 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18036 {
18037 basic_block bb = BLOCK_FOR_INSN (insn);
18038 int distance = 0;
18039 bool found = false;
18040 bool redefined = false;
18041
18042 if (insn != BB_END (bb))
18043 distance = distance_agu_use_in_bb (regno0, insn, distance,
18044 NEXT_INSN (insn),
18045 &found, &redefined);
18046
18047 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18048 {
18049 edge e;
18050 edge_iterator ei;
18051 bool simple_loop = false;
18052
18053 FOR_EACH_EDGE (e, ei, bb->succs)
18054 if (e->dest == bb)
18055 {
18056 simple_loop = true;
18057 break;
18058 }
18059
18060 if (simple_loop)
18061 distance = distance_agu_use_in_bb (regno0, insn,
18062 distance, BB_HEAD (bb),
18063 &found, &redefined);
18064 else
18065 {
18066 int shortest_dist = -1;
18067 bool found_in_bb = false;
18068 bool redefined_in_bb = false;
18069
18070 FOR_EACH_EDGE (e, ei, bb->succs)
18071 {
18072 int bb_dist
18073 = distance_agu_use_in_bb (regno0, insn,
18074 distance, BB_HEAD (e->dest),
18075 &found_in_bb, &redefined_in_bb);
18076 if (found_in_bb)
18077 {
18078 if (shortest_dist < 0)
18079 shortest_dist = bb_dist;
18080 else if (bb_dist > 0)
18081 shortest_dist = MIN (bb_dist, shortest_dist);
18082
18083 found = true;
18084 }
18085 }
18086
18087 distance = shortest_dist;
18088 }
18089 }
18090
18091 if (!found || redefined)
18092 return -1;
18093
18094 return distance >> 1;
18095 }
18096
18097 /* Define this macro to tune LEA priority vs ADD, it take effect when
18098 there is a dilemma of choicing LEA or ADD
18099 Negative value: ADD is more preferred than LEA
18100 Zero: Netrual
18101 Positive value: LEA is more preferred than ADD*/
18102 #define IX86_LEA_PRIORITY 0
18103
18104 /* Return true if usage of lea INSN has performance advantage
18105 over a sequence of instructions. Instructions sequence has
18106 SPLIT_COST cycles higher latency than lea latency. */
18107
18108 static bool
18109 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18110 unsigned int regno2, int split_cost, bool has_scale)
18111 {
18112 int dist_define, dist_use;
18113
18114 /* For Silvermont if using a 2-source or 3-source LEA for
18115 non-destructive destination purposes, or due to wanting
18116 ability to use SCALE, the use of LEA is justified. */
18117 if (TARGET_SILVERMONT || TARGET_INTEL)
18118 {
18119 if (has_scale)
18120 return true;
18121 if (split_cost < 1)
18122 return false;
18123 if (regno0 == regno1 || regno0 == regno2)
18124 return false;
18125 return true;
18126 }
18127
18128 dist_define = distance_non_agu_define (regno1, regno2, insn);
18129 dist_use = distance_agu_use (regno0, insn);
18130
18131 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18132 {
18133 /* If there is no non AGU operand definition, no AGU
18134 operand usage and split cost is 0 then both lea
18135 and non lea variants have same priority. Currently
18136 we prefer lea for 64 bit code and non lea on 32 bit
18137 code. */
18138 if (dist_use < 0 && split_cost == 0)
18139 return TARGET_64BIT || IX86_LEA_PRIORITY;
18140 else
18141 return true;
18142 }
18143
18144 /* With longer definitions distance lea is more preferable.
18145 Here we change it to take into account splitting cost and
18146 lea priority. */
18147 dist_define += split_cost + IX86_LEA_PRIORITY;
18148
18149 /* If there is no use in memory addess then we just check
18150 that split cost exceeds AGU stall. */
18151 if (dist_use < 0)
18152 return dist_define > LEA_MAX_STALL;
18153
18154 /* If this insn has both backward non-agu dependence and forward
18155 agu dependence, the one with short distance takes effect. */
18156 return dist_define >= dist_use;
18157 }
18158
18159 /* Return true if it is legal to clobber flags by INSN and
18160 false otherwise. */
18161
18162 static bool
18163 ix86_ok_to_clobber_flags (rtx_insn *insn)
18164 {
18165 basic_block bb = BLOCK_FOR_INSN (insn);
18166 df_ref use;
18167 bitmap live;
18168
18169 while (insn)
18170 {
18171 if (NONDEBUG_INSN_P (insn))
18172 {
18173 FOR_EACH_INSN_USE (use, insn)
18174 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18175 return false;
18176
18177 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18178 return true;
18179 }
18180
18181 if (insn == BB_END (bb))
18182 break;
18183
18184 insn = NEXT_INSN (insn);
18185 }
18186
18187 live = df_get_live_out(bb);
18188 return !REGNO_REG_SET_P (live, FLAGS_REG);
18189 }
18190
18191 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18192 move and add to avoid AGU stalls. */
18193
18194 bool
18195 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18196 {
18197 unsigned int regno0, regno1, regno2;
18198
18199 /* Check if we need to optimize. */
18200 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18201 return false;
18202
18203 /* Check it is correct to split here. */
18204 if (!ix86_ok_to_clobber_flags(insn))
18205 return false;
18206
18207 regno0 = true_regnum (operands[0]);
18208 regno1 = true_regnum (operands[1]);
18209 regno2 = true_regnum (operands[2]);
18210
18211 /* We need to split only adds with non destructive
18212 destination operand. */
18213 if (regno0 == regno1 || regno0 == regno2)
18214 return false;
18215 else
18216 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18217 }
18218
18219 /* Return true if we should emit lea instruction instead of mov
18220 instruction. */
18221
18222 bool
18223 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18224 {
18225 unsigned int regno0, regno1;
18226
18227 /* Check if we need to optimize. */
18228 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18229 return false;
18230
18231 /* Use lea for reg to reg moves only. */
18232 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18233 return false;
18234
18235 regno0 = true_regnum (operands[0]);
18236 regno1 = true_regnum (operands[1]);
18237
18238 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18239 }
18240
18241 /* Return true if we need to split lea into a sequence of
18242 instructions to avoid AGU stalls. */
18243
18244 bool
18245 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18246 {
18247 unsigned int regno0, regno1, regno2;
18248 int split_cost;
18249 struct ix86_address parts;
18250 int ok;
18251
18252 /* Check we need to optimize. */
18253 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18254 return false;
18255
18256 /* The "at least two components" test below might not catch simple
18257 move or zero extension insns if parts.base is non-NULL and parts.disp
18258 is const0_rtx as the only components in the address, e.g. if the
18259 register is %rbp or %r13. As this test is much cheaper and moves or
18260 zero extensions are the common case, do this check first. */
18261 if (REG_P (operands[1])
18262 || (SImode_address_operand (operands[1], VOIDmode)
18263 && REG_P (XEXP (operands[1], 0))))
18264 return false;
18265
18266 /* Check if it is OK to split here. */
18267 if (!ix86_ok_to_clobber_flags (insn))
18268 return false;
18269
18270 ok = ix86_decompose_address (operands[1], &parts);
18271 gcc_assert (ok);
18272
18273 /* There should be at least two components in the address. */
18274 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18275 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18276 return false;
18277
18278 /* We should not split into add if non legitimate pic
18279 operand is used as displacement. */
18280 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18281 return false;
18282
18283 regno0 = true_regnum (operands[0]) ;
18284 regno1 = INVALID_REGNUM;
18285 regno2 = INVALID_REGNUM;
18286
18287 if (parts.base)
18288 regno1 = true_regnum (parts.base);
18289 if (parts.index)
18290 regno2 = true_regnum (parts.index);
18291
18292 split_cost = 0;
18293
18294 /* Compute how many cycles we will add to execution time
18295 if split lea into a sequence of instructions. */
18296 if (parts.base || parts.index)
18297 {
18298 /* Have to use mov instruction if non desctructive
18299 destination form is used. */
18300 if (regno1 != regno0 && regno2 != regno0)
18301 split_cost += 1;
18302
18303 /* Have to add index to base if both exist. */
18304 if (parts.base && parts.index)
18305 split_cost += 1;
18306
18307 /* Have to use shift and adds if scale is 2 or greater. */
18308 if (parts.scale > 1)
18309 {
18310 if (regno0 != regno1)
18311 split_cost += 1;
18312 else if (regno2 == regno0)
18313 split_cost += 4;
18314 else
18315 split_cost += parts.scale;
18316 }
18317
18318 /* Have to use add instruction with immediate if
18319 disp is non zero. */
18320 if (parts.disp && parts.disp != const0_rtx)
18321 split_cost += 1;
18322
18323 /* Subtract the price of lea. */
18324 split_cost -= 1;
18325 }
18326
18327 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18328 parts.scale > 1);
18329 }
18330
18331 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18332 matches destination. RTX includes clobber of FLAGS_REG. */
18333
18334 static void
18335 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18336 rtx dst, rtx src)
18337 {
18338 rtx op, clob;
18339
18340 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18341 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18342
18343 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18344 }
18345
18346 /* Return true if regno1 def is nearest to the insn. */
18347
18348 static bool
18349 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18350 {
18351 rtx_insn *prev = insn;
18352 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18353
18354 if (insn == start)
18355 return false;
18356 while (prev && prev != start)
18357 {
18358 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18359 {
18360 prev = PREV_INSN (prev);
18361 continue;
18362 }
18363 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18364 return true;
18365 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18366 return false;
18367 prev = PREV_INSN (prev);
18368 }
18369
18370 /* None of the regs is defined in the bb. */
18371 return false;
18372 }
18373
18374 /* Split lea instructions into a sequence of instructions
18375 which are executed on ALU to avoid AGU stalls.
18376 It is assumed that it is allowed to clobber flags register
18377 at lea position. */
18378
18379 void
18380 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18381 {
18382 unsigned int regno0, regno1, regno2;
18383 struct ix86_address parts;
18384 rtx target, tmp;
18385 int ok, adds;
18386
18387 ok = ix86_decompose_address (operands[1], &parts);
18388 gcc_assert (ok);
18389
18390 target = gen_lowpart (mode, operands[0]);
18391
18392 regno0 = true_regnum (target);
18393 regno1 = INVALID_REGNUM;
18394 regno2 = INVALID_REGNUM;
18395
18396 if (parts.base)
18397 {
18398 parts.base = gen_lowpart (mode, parts.base);
18399 regno1 = true_regnum (parts.base);
18400 }
18401
18402 if (parts.index)
18403 {
18404 parts.index = gen_lowpart (mode, parts.index);
18405 regno2 = true_regnum (parts.index);
18406 }
18407
18408 if (parts.disp)
18409 parts.disp = gen_lowpart (mode, parts.disp);
18410
18411 if (parts.scale > 1)
18412 {
18413 /* Case r1 = r1 + ... */
18414 if (regno1 == regno0)
18415 {
18416 /* If we have a case r1 = r1 + C * r2 then we
18417 should use multiplication which is very
18418 expensive. Assume cost model is wrong if we
18419 have such case here. */
18420 gcc_assert (regno2 != regno0);
18421
18422 for (adds = parts.scale; adds > 0; adds--)
18423 ix86_emit_binop (PLUS, mode, target, parts.index);
18424 }
18425 else
18426 {
18427 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18428 if (regno0 != regno2)
18429 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18430
18431 /* Use shift for scaling. */
18432 ix86_emit_binop (ASHIFT, mode, target,
18433 GEN_INT (exact_log2 (parts.scale)));
18434
18435 if (parts.base)
18436 ix86_emit_binop (PLUS, mode, target, parts.base);
18437
18438 if (parts.disp && parts.disp != const0_rtx)
18439 ix86_emit_binop (PLUS, mode, target, parts.disp);
18440 }
18441 }
18442 else if (!parts.base && !parts.index)
18443 {
18444 gcc_assert(parts.disp);
18445 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18446 }
18447 else
18448 {
18449 if (!parts.base)
18450 {
18451 if (regno0 != regno2)
18452 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18453 }
18454 else if (!parts.index)
18455 {
18456 if (regno0 != regno1)
18457 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18458 }
18459 else
18460 {
18461 if (regno0 == regno1)
18462 tmp = parts.index;
18463 else if (regno0 == regno2)
18464 tmp = parts.base;
18465 else
18466 {
18467 rtx tmp1;
18468
18469 /* Find better operand for SET instruction, depending
18470 on which definition is farther from the insn. */
18471 if (find_nearest_reg_def (insn, regno1, regno2))
18472 tmp = parts.index, tmp1 = parts.base;
18473 else
18474 tmp = parts.base, tmp1 = parts.index;
18475
18476 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18477
18478 if (parts.disp && parts.disp != const0_rtx)
18479 ix86_emit_binop (PLUS, mode, target, parts.disp);
18480
18481 ix86_emit_binop (PLUS, mode, target, tmp1);
18482 return;
18483 }
18484
18485 ix86_emit_binop (PLUS, mode, target, tmp);
18486 }
18487
18488 if (parts.disp && parts.disp != const0_rtx)
18489 ix86_emit_binop (PLUS, mode, target, parts.disp);
18490 }
18491 }
18492
18493 /* Return true if it is ok to optimize an ADD operation to LEA
18494 operation to avoid flag register consumation. For most processors,
18495 ADD is faster than LEA. For the processors like BONNELL, if the
18496 destination register of LEA holds an actual address which will be
18497 used soon, LEA is better and otherwise ADD is better. */
18498
18499 bool
18500 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18501 {
18502 unsigned int regno0 = true_regnum (operands[0]);
18503 unsigned int regno1 = true_regnum (operands[1]);
18504 unsigned int regno2 = true_regnum (operands[2]);
18505
18506 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18507 if (regno0 != regno1 && regno0 != regno2)
18508 return true;
18509
18510 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18511 return false;
18512
18513 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18514 }
18515
18516 /* Return true if destination reg of SET_BODY is shift count of
18517 USE_BODY. */
18518
18519 static bool
18520 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18521 {
18522 rtx set_dest;
18523 rtx shift_rtx;
18524 int i;
18525
18526 /* Retrieve destination of SET_BODY. */
18527 switch (GET_CODE (set_body))
18528 {
18529 case SET:
18530 set_dest = SET_DEST (set_body);
18531 if (!set_dest || !REG_P (set_dest))
18532 return false;
18533 break;
18534 case PARALLEL:
18535 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18536 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18537 use_body))
18538 return true;
18539 default:
18540 return false;
18541 break;
18542 }
18543
18544 /* Retrieve shift count of USE_BODY. */
18545 switch (GET_CODE (use_body))
18546 {
18547 case SET:
18548 shift_rtx = XEXP (use_body, 1);
18549 break;
18550 case PARALLEL:
18551 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18552 if (ix86_dep_by_shift_count_body (set_body,
18553 XVECEXP (use_body, 0, i)))
18554 return true;
18555 default:
18556 return false;
18557 break;
18558 }
18559
18560 if (shift_rtx
18561 && (GET_CODE (shift_rtx) == ASHIFT
18562 || GET_CODE (shift_rtx) == LSHIFTRT
18563 || GET_CODE (shift_rtx) == ASHIFTRT
18564 || GET_CODE (shift_rtx) == ROTATE
18565 || GET_CODE (shift_rtx) == ROTATERT))
18566 {
18567 rtx shift_count = XEXP (shift_rtx, 1);
18568
18569 /* Return true if shift count is dest of SET_BODY. */
18570 if (REG_P (shift_count))
18571 {
18572 /* Add check since it can be invoked before register
18573 allocation in pre-reload schedule. */
18574 if (reload_completed
18575 && true_regnum (set_dest) == true_regnum (shift_count))
18576 return true;
18577 else if (REGNO(set_dest) == REGNO(shift_count))
18578 return true;
18579 }
18580 }
18581
18582 return false;
18583 }
18584
18585 /* Return true if destination reg of SET_INSN is shift count of
18586 USE_INSN. */
18587
18588 bool
18589 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18590 {
18591 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18592 PATTERN (use_insn));
18593 }
18594
18595 /* Return TRUE or FALSE depending on whether the unary operator meets the
18596 appropriate constraints. */
18597
18598 bool
18599 ix86_unary_operator_ok (enum rtx_code,
18600 enum machine_mode,
18601 rtx operands[2])
18602 {
18603 /* If one of operands is memory, source and destination must match. */
18604 if ((MEM_P (operands[0])
18605 || MEM_P (operands[1]))
18606 && ! rtx_equal_p (operands[0], operands[1]))
18607 return false;
18608 return true;
18609 }
18610
18611 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18612 are ok, keeping in mind the possible movddup alternative. */
18613
18614 bool
18615 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18616 {
18617 if (MEM_P (operands[0]))
18618 return rtx_equal_p (operands[0], operands[1 + high]);
18619 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18620 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18621 return true;
18622 }
18623
18624 /* Post-reload splitter for converting an SF or DFmode value in an
18625 SSE register into an unsigned SImode. */
18626
18627 void
18628 ix86_split_convert_uns_si_sse (rtx operands[])
18629 {
18630 enum machine_mode vecmode;
18631 rtx value, large, zero_or_two31, input, two31, x;
18632
18633 large = operands[1];
18634 zero_or_two31 = operands[2];
18635 input = operands[3];
18636 two31 = operands[4];
18637 vecmode = GET_MODE (large);
18638 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18639
18640 /* Load up the value into the low element. We must ensure that the other
18641 elements are valid floats -- zero is the easiest such value. */
18642 if (MEM_P (input))
18643 {
18644 if (vecmode == V4SFmode)
18645 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18646 else
18647 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18648 }
18649 else
18650 {
18651 input = gen_rtx_REG (vecmode, REGNO (input));
18652 emit_move_insn (value, CONST0_RTX (vecmode));
18653 if (vecmode == V4SFmode)
18654 emit_insn (gen_sse_movss (value, value, input));
18655 else
18656 emit_insn (gen_sse2_movsd (value, value, input));
18657 }
18658
18659 emit_move_insn (large, two31);
18660 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18661
18662 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18663 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18664
18665 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18666 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18667
18668 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18669 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18670
18671 large = gen_rtx_REG (V4SImode, REGNO (large));
18672 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18673
18674 x = gen_rtx_REG (V4SImode, REGNO (value));
18675 if (vecmode == V4SFmode)
18676 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18677 else
18678 emit_insn (gen_sse2_cvttpd2dq (x, value));
18679 value = x;
18680
18681 emit_insn (gen_xorv4si3 (value, value, large));
18682 }
18683
18684 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18685 Expects the 64-bit DImode to be supplied in a pair of integral
18686 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18687 -mfpmath=sse, !optimize_size only. */
18688
18689 void
18690 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18691 {
18692 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18693 rtx int_xmm, fp_xmm;
18694 rtx biases, exponents;
18695 rtx x;
18696
18697 int_xmm = gen_reg_rtx (V4SImode);
18698 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18699 emit_insn (gen_movdi_to_sse (int_xmm, input));
18700 else if (TARGET_SSE_SPLIT_REGS)
18701 {
18702 emit_clobber (int_xmm);
18703 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18704 }
18705 else
18706 {
18707 x = gen_reg_rtx (V2DImode);
18708 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18709 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18710 }
18711
18712 x = gen_rtx_CONST_VECTOR (V4SImode,
18713 gen_rtvec (4, GEN_INT (0x43300000UL),
18714 GEN_INT (0x45300000UL),
18715 const0_rtx, const0_rtx));
18716 exponents = validize_mem (force_const_mem (V4SImode, x));
18717
18718 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18719 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18720
18721 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18722 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18723 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18724 (0x1.0p84 + double(fp_value_hi_xmm)).
18725 Note these exponents differ by 32. */
18726
18727 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18728
18729 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18730 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18731 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18732 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18733 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18734 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18735 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18736 biases = validize_mem (force_const_mem (V2DFmode, biases));
18737 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18738
18739 /* Add the upper and lower DFmode values together. */
18740 if (TARGET_SSE3)
18741 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18742 else
18743 {
18744 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18745 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18746 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18747 }
18748
18749 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18750 }
18751
18752 /* Not used, but eases macroization of patterns. */
18753 void
18754 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18755 {
18756 gcc_unreachable ();
18757 }
18758
18759 /* Convert an unsigned SImode value into a DFmode. Only currently used
18760 for SSE, but applicable anywhere. */
18761
18762 void
18763 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18764 {
18765 REAL_VALUE_TYPE TWO31r;
18766 rtx x, fp;
18767
18768 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18769 NULL, 1, OPTAB_DIRECT);
18770
18771 fp = gen_reg_rtx (DFmode);
18772 emit_insn (gen_floatsidf2 (fp, x));
18773
18774 real_ldexp (&TWO31r, &dconst1, 31);
18775 x = const_double_from_real_value (TWO31r, DFmode);
18776
18777 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18778 if (x != target)
18779 emit_move_insn (target, x);
18780 }
18781
18782 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18783 32-bit mode; otherwise we have a direct convert instruction. */
18784
18785 void
18786 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18787 {
18788 REAL_VALUE_TYPE TWO32r;
18789 rtx fp_lo, fp_hi, x;
18790
18791 fp_lo = gen_reg_rtx (DFmode);
18792 fp_hi = gen_reg_rtx (DFmode);
18793
18794 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18795
18796 real_ldexp (&TWO32r, &dconst1, 32);
18797 x = const_double_from_real_value (TWO32r, DFmode);
18798 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18799
18800 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18801
18802 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18803 0, OPTAB_DIRECT);
18804 if (x != target)
18805 emit_move_insn (target, x);
18806 }
18807
18808 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18809 For x86_32, -mfpmath=sse, !optimize_size only. */
18810 void
18811 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18812 {
18813 REAL_VALUE_TYPE ONE16r;
18814 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18815
18816 real_ldexp (&ONE16r, &dconst1, 16);
18817 x = const_double_from_real_value (ONE16r, SFmode);
18818 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18819 NULL, 0, OPTAB_DIRECT);
18820 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18821 NULL, 0, OPTAB_DIRECT);
18822 fp_hi = gen_reg_rtx (SFmode);
18823 fp_lo = gen_reg_rtx (SFmode);
18824 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18825 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18826 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18827 0, OPTAB_DIRECT);
18828 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18829 0, OPTAB_DIRECT);
18830 if (!rtx_equal_p (target, fp_hi))
18831 emit_move_insn (target, fp_hi);
18832 }
18833
18834 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18835 a vector of unsigned ints VAL to vector of floats TARGET. */
18836
18837 void
18838 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18839 {
18840 rtx tmp[8];
18841 REAL_VALUE_TYPE TWO16r;
18842 enum machine_mode intmode = GET_MODE (val);
18843 enum machine_mode fltmode = GET_MODE (target);
18844 rtx (*cvt) (rtx, rtx);
18845
18846 if (intmode == V4SImode)
18847 cvt = gen_floatv4siv4sf2;
18848 else
18849 cvt = gen_floatv8siv8sf2;
18850 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18851 tmp[0] = force_reg (intmode, tmp[0]);
18852 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18853 OPTAB_DIRECT);
18854 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18855 NULL_RTX, 1, OPTAB_DIRECT);
18856 tmp[3] = gen_reg_rtx (fltmode);
18857 emit_insn (cvt (tmp[3], tmp[1]));
18858 tmp[4] = gen_reg_rtx (fltmode);
18859 emit_insn (cvt (tmp[4], tmp[2]));
18860 real_ldexp (&TWO16r, &dconst1, 16);
18861 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18862 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18863 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18864 OPTAB_DIRECT);
18865 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18866 OPTAB_DIRECT);
18867 if (tmp[7] != target)
18868 emit_move_insn (target, tmp[7]);
18869 }
18870
18871 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18872 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18873 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18874 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18875
18876 rtx
18877 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18878 {
18879 REAL_VALUE_TYPE TWO31r;
18880 rtx two31r, tmp[4];
18881 enum machine_mode mode = GET_MODE (val);
18882 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18883 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18884 rtx (*cmp) (rtx, rtx, rtx, rtx);
18885 int i;
18886
18887 for (i = 0; i < 3; i++)
18888 tmp[i] = gen_reg_rtx (mode);
18889 real_ldexp (&TWO31r, &dconst1, 31);
18890 two31r = const_double_from_real_value (TWO31r, scalarmode);
18891 two31r = ix86_build_const_vector (mode, 1, two31r);
18892 two31r = force_reg (mode, two31r);
18893 switch (mode)
18894 {
18895 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18896 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18897 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18898 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18899 default: gcc_unreachable ();
18900 }
18901 tmp[3] = gen_rtx_LE (mode, two31r, val);
18902 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18903 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18904 0, OPTAB_DIRECT);
18905 if (intmode == V4SImode || TARGET_AVX2)
18906 *xorp = expand_simple_binop (intmode, ASHIFT,
18907 gen_lowpart (intmode, tmp[0]),
18908 GEN_INT (31), NULL_RTX, 0,
18909 OPTAB_DIRECT);
18910 else
18911 {
18912 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18913 two31 = ix86_build_const_vector (intmode, 1, two31);
18914 *xorp = expand_simple_binop (intmode, AND,
18915 gen_lowpart (intmode, tmp[0]),
18916 two31, NULL_RTX, 0,
18917 OPTAB_DIRECT);
18918 }
18919 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18920 0, OPTAB_DIRECT);
18921 }
18922
18923 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18924 then replicate the value for all elements of the vector
18925 register. */
18926
18927 rtx
18928 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18929 {
18930 int i, n_elt;
18931 rtvec v;
18932 enum machine_mode scalar_mode;
18933
18934 switch (mode)
18935 {
18936 case V64QImode:
18937 case V32QImode:
18938 case V16QImode:
18939 case V32HImode:
18940 case V16HImode:
18941 case V8HImode:
18942 case V16SImode:
18943 case V8SImode:
18944 case V4SImode:
18945 case V8DImode:
18946 case V4DImode:
18947 case V2DImode:
18948 gcc_assert (vect);
18949 case V16SFmode:
18950 case V8SFmode:
18951 case V4SFmode:
18952 case V8DFmode:
18953 case V4DFmode:
18954 case V2DFmode:
18955 n_elt = GET_MODE_NUNITS (mode);
18956 v = rtvec_alloc (n_elt);
18957 scalar_mode = GET_MODE_INNER (mode);
18958
18959 RTVEC_ELT (v, 0) = value;
18960
18961 for (i = 1; i < n_elt; ++i)
18962 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18963
18964 return gen_rtx_CONST_VECTOR (mode, v);
18965
18966 default:
18967 gcc_unreachable ();
18968 }
18969 }
18970
18971 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18972 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18973 for an SSE register. If VECT is true, then replicate the mask for
18974 all elements of the vector register. If INVERT is true, then create
18975 a mask excluding the sign bit. */
18976
18977 rtx
18978 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18979 {
18980 enum machine_mode vec_mode, imode;
18981 HOST_WIDE_INT hi, lo;
18982 int shift = 63;
18983 rtx v;
18984 rtx mask;
18985
18986 /* Find the sign bit, sign extended to 2*HWI. */
18987 switch (mode)
18988 {
18989 case V16SImode:
18990 case V16SFmode:
18991 case V8SImode:
18992 case V4SImode:
18993 case V8SFmode:
18994 case V4SFmode:
18995 vec_mode = mode;
18996 mode = GET_MODE_INNER (mode);
18997 imode = SImode;
18998 lo = 0x80000000, hi = lo < 0;
18999 break;
19000
19001 case V8DImode:
19002 case V4DImode:
19003 case V2DImode:
19004 case V8DFmode:
19005 case V4DFmode:
19006 case V2DFmode:
19007 vec_mode = mode;
19008 mode = GET_MODE_INNER (mode);
19009 imode = DImode;
19010 if (HOST_BITS_PER_WIDE_INT >= 64)
19011 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19012 else
19013 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19014 break;
19015
19016 case TImode:
19017 case TFmode:
19018 vec_mode = VOIDmode;
19019 if (HOST_BITS_PER_WIDE_INT >= 64)
19020 {
19021 imode = TImode;
19022 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19023 }
19024 else
19025 {
19026 rtvec vec;
19027
19028 imode = DImode;
19029 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19030
19031 if (invert)
19032 {
19033 lo = ~lo, hi = ~hi;
19034 v = constm1_rtx;
19035 }
19036 else
19037 v = const0_rtx;
19038
19039 mask = immed_double_const (lo, hi, imode);
19040
19041 vec = gen_rtvec (2, v, mask);
19042 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19043 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19044
19045 return v;
19046 }
19047 break;
19048
19049 default:
19050 gcc_unreachable ();
19051 }
19052
19053 if (invert)
19054 lo = ~lo, hi = ~hi;
19055
19056 /* Force this value into the low part of a fp vector constant. */
19057 mask = immed_double_const (lo, hi, imode);
19058 mask = gen_lowpart (mode, mask);
19059
19060 if (vec_mode == VOIDmode)
19061 return force_reg (mode, mask);
19062
19063 v = ix86_build_const_vector (vec_mode, vect, mask);
19064 return force_reg (vec_mode, v);
19065 }
19066
19067 /* Generate code for floating point ABS or NEG. */
19068
19069 void
19070 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19071 rtx operands[])
19072 {
19073 rtx mask, set, dst, src;
19074 bool use_sse = false;
19075 bool vector_mode = VECTOR_MODE_P (mode);
19076 enum machine_mode vmode = mode;
19077
19078 if (vector_mode)
19079 use_sse = true;
19080 else if (mode == TFmode)
19081 use_sse = true;
19082 else if (TARGET_SSE_MATH)
19083 {
19084 use_sse = SSE_FLOAT_MODE_P (mode);
19085 if (mode == SFmode)
19086 vmode = V4SFmode;
19087 else if (mode == DFmode)
19088 vmode = V2DFmode;
19089 }
19090
19091 /* NEG and ABS performed with SSE use bitwise mask operations.
19092 Create the appropriate mask now. */
19093 if (use_sse)
19094 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19095 else
19096 mask = NULL_RTX;
19097
19098 dst = operands[0];
19099 src = operands[1];
19100
19101 set = gen_rtx_fmt_e (code, mode, src);
19102 set = gen_rtx_SET (VOIDmode, dst, set);
19103
19104 if (mask)
19105 {
19106 rtx use, clob;
19107 rtvec par;
19108
19109 use = gen_rtx_USE (VOIDmode, mask);
19110 if (vector_mode)
19111 par = gen_rtvec (2, set, use);
19112 else
19113 {
19114 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19115 par = gen_rtvec (3, set, use, clob);
19116 }
19117 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19118 }
19119 else
19120 emit_insn (set);
19121 }
19122
19123 /* Expand a copysign operation. Special case operand 0 being a constant. */
19124
19125 void
19126 ix86_expand_copysign (rtx operands[])
19127 {
19128 enum machine_mode mode, vmode;
19129 rtx dest, op0, op1, mask, nmask;
19130
19131 dest = operands[0];
19132 op0 = operands[1];
19133 op1 = operands[2];
19134
19135 mode = GET_MODE (dest);
19136
19137 if (mode == SFmode)
19138 vmode = V4SFmode;
19139 else if (mode == DFmode)
19140 vmode = V2DFmode;
19141 else
19142 vmode = mode;
19143
19144 if (GET_CODE (op0) == CONST_DOUBLE)
19145 {
19146 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19147
19148 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19149 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19150
19151 if (mode == SFmode || mode == DFmode)
19152 {
19153 if (op0 == CONST0_RTX (mode))
19154 op0 = CONST0_RTX (vmode);
19155 else
19156 {
19157 rtx v = ix86_build_const_vector (vmode, false, op0);
19158
19159 op0 = force_reg (vmode, v);
19160 }
19161 }
19162 else if (op0 != CONST0_RTX (mode))
19163 op0 = force_reg (mode, op0);
19164
19165 mask = ix86_build_signbit_mask (vmode, 0, 0);
19166
19167 if (mode == SFmode)
19168 copysign_insn = gen_copysignsf3_const;
19169 else if (mode == DFmode)
19170 copysign_insn = gen_copysigndf3_const;
19171 else
19172 copysign_insn = gen_copysigntf3_const;
19173
19174 emit_insn (copysign_insn (dest, op0, op1, mask));
19175 }
19176 else
19177 {
19178 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19179
19180 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19181 mask = ix86_build_signbit_mask (vmode, 0, 0);
19182
19183 if (mode == SFmode)
19184 copysign_insn = gen_copysignsf3_var;
19185 else if (mode == DFmode)
19186 copysign_insn = gen_copysigndf3_var;
19187 else
19188 copysign_insn = gen_copysigntf3_var;
19189
19190 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19191 }
19192 }
19193
19194 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19195 be a constant, and so has already been expanded into a vector constant. */
19196
19197 void
19198 ix86_split_copysign_const (rtx operands[])
19199 {
19200 enum machine_mode mode, vmode;
19201 rtx dest, op0, mask, x;
19202
19203 dest = operands[0];
19204 op0 = operands[1];
19205 mask = operands[3];
19206
19207 mode = GET_MODE (dest);
19208 vmode = GET_MODE (mask);
19209
19210 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19211 x = gen_rtx_AND (vmode, dest, mask);
19212 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19213
19214 if (op0 != CONST0_RTX (vmode))
19215 {
19216 x = gen_rtx_IOR (vmode, dest, op0);
19217 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19218 }
19219 }
19220
19221 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19222 so we have to do two masks. */
19223
19224 void
19225 ix86_split_copysign_var (rtx operands[])
19226 {
19227 enum machine_mode mode, vmode;
19228 rtx dest, scratch, op0, op1, mask, nmask, x;
19229
19230 dest = operands[0];
19231 scratch = operands[1];
19232 op0 = operands[2];
19233 op1 = operands[3];
19234 nmask = operands[4];
19235 mask = operands[5];
19236
19237 mode = GET_MODE (dest);
19238 vmode = GET_MODE (mask);
19239
19240 if (rtx_equal_p (op0, op1))
19241 {
19242 /* Shouldn't happen often (it's useless, obviously), but when it does
19243 we'd generate incorrect code if we continue below. */
19244 emit_move_insn (dest, op0);
19245 return;
19246 }
19247
19248 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19249 {
19250 gcc_assert (REGNO (op1) == REGNO (scratch));
19251
19252 x = gen_rtx_AND (vmode, scratch, mask);
19253 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19254
19255 dest = mask;
19256 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19257 x = gen_rtx_NOT (vmode, dest);
19258 x = gen_rtx_AND (vmode, x, op0);
19259 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19260 }
19261 else
19262 {
19263 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19264 {
19265 x = gen_rtx_AND (vmode, scratch, mask);
19266 }
19267 else /* alternative 2,4 */
19268 {
19269 gcc_assert (REGNO (mask) == REGNO (scratch));
19270 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19271 x = gen_rtx_AND (vmode, scratch, op1);
19272 }
19273 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19274
19275 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19276 {
19277 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19278 x = gen_rtx_AND (vmode, dest, nmask);
19279 }
19280 else /* alternative 3,4 */
19281 {
19282 gcc_assert (REGNO (nmask) == REGNO (dest));
19283 dest = nmask;
19284 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19285 x = gen_rtx_AND (vmode, dest, op0);
19286 }
19287 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19288 }
19289
19290 x = gen_rtx_IOR (vmode, dest, scratch);
19291 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19292 }
19293
19294 /* Return TRUE or FALSE depending on whether the first SET in INSN
19295 has source and destination with matching CC modes, and that the
19296 CC mode is at least as constrained as REQ_MODE. */
19297
19298 bool
19299 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19300 {
19301 rtx set;
19302 enum machine_mode set_mode;
19303
19304 set = PATTERN (insn);
19305 if (GET_CODE (set) == PARALLEL)
19306 set = XVECEXP (set, 0, 0);
19307 gcc_assert (GET_CODE (set) == SET);
19308 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19309
19310 set_mode = GET_MODE (SET_DEST (set));
19311 switch (set_mode)
19312 {
19313 case CCNOmode:
19314 if (req_mode != CCNOmode
19315 && (req_mode != CCmode
19316 || XEXP (SET_SRC (set), 1) != const0_rtx))
19317 return false;
19318 break;
19319 case CCmode:
19320 if (req_mode == CCGCmode)
19321 return false;
19322 /* FALLTHRU */
19323 case CCGCmode:
19324 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19325 return false;
19326 /* FALLTHRU */
19327 case CCGOCmode:
19328 if (req_mode == CCZmode)
19329 return false;
19330 /* FALLTHRU */
19331 case CCZmode:
19332 break;
19333
19334 case CCAmode:
19335 case CCCmode:
19336 case CCOmode:
19337 case CCSmode:
19338 if (set_mode != req_mode)
19339 return false;
19340 break;
19341
19342 default:
19343 gcc_unreachable ();
19344 }
19345
19346 return GET_MODE (SET_SRC (set)) == set_mode;
19347 }
19348
19349 /* Generate insn patterns to do an integer compare of OPERANDS. */
19350
19351 static rtx
19352 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19353 {
19354 enum machine_mode cmpmode;
19355 rtx tmp, flags;
19356
19357 cmpmode = SELECT_CC_MODE (code, op0, op1);
19358 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19359
19360 /* This is very simple, but making the interface the same as in the
19361 FP case makes the rest of the code easier. */
19362 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19363 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19364
19365 /* Return the test that should be put into the flags user, i.e.
19366 the bcc, scc, or cmov instruction. */
19367 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19368 }
19369
19370 /* Figure out whether to use ordered or unordered fp comparisons.
19371 Return the appropriate mode to use. */
19372
19373 enum machine_mode
19374 ix86_fp_compare_mode (enum rtx_code)
19375 {
19376 /* ??? In order to make all comparisons reversible, we do all comparisons
19377 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19378 all forms trapping and nontrapping comparisons, we can make inequality
19379 comparisons trapping again, since it results in better code when using
19380 FCOM based compares. */
19381 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19382 }
19383
19384 enum machine_mode
19385 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19386 {
19387 enum machine_mode mode = GET_MODE (op0);
19388
19389 if (SCALAR_FLOAT_MODE_P (mode))
19390 {
19391 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19392 return ix86_fp_compare_mode (code);
19393 }
19394
19395 switch (code)
19396 {
19397 /* Only zero flag is needed. */
19398 case EQ: /* ZF=0 */
19399 case NE: /* ZF!=0 */
19400 return CCZmode;
19401 /* Codes needing carry flag. */
19402 case GEU: /* CF=0 */
19403 case LTU: /* CF=1 */
19404 /* Detect overflow checks. They need just the carry flag. */
19405 if (GET_CODE (op0) == PLUS
19406 && rtx_equal_p (op1, XEXP (op0, 0)))
19407 return CCCmode;
19408 else
19409 return CCmode;
19410 case GTU: /* CF=0 & ZF=0 */
19411 case LEU: /* CF=1 | ZF=1 */
19412 return CCmode;
19413 /* Codes possibly doable only with sign flag when
19414 comparing against zero. */
19415 case GE: /* SF=OF or SF=0 */
19416 case LT: /* SF<>OF or SF=1 */
19417 if (op1 == const0_rtx)
19418 return CCGOCmode;
19419 else
19420 /* For other cases Carry flag is not required. */
19421 return CCGCmode;
19422 /* Codes doable only with sign flag when comparing
19423 against zero, but we miss jump instruction for it
19424 so we need to use relational tests against overflow
19425 that thus needs to be zero. */
19426 case GT: /* ZF=0 & SF=OF */
19427 case LE: /* ZF=1 | SF<>OF */
19428 if (op1 == const0_rtx)
19429 return CCNOmode;
19430 else
19431 return CCGCmode;
19432 /* strcmp pattern do (use flags) and combine may ask us for proper
19433 mode. */
19434 case USE:
19435 return CCmode;
19436 default:
19437 gcc_unreachable ();
19438 }
19439 }
19440
19441 /* Return the fixed registers used for condition codes. */
19442
19443 static bool
19444 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19445 {
19446 *p1 = FLAGS_REG;
19447 *p2 = FPSR_REG;
19448 return true;
19449 }
19450
19451 /* If two condition code modes are compatible, return a condition code
19452 mode which is compatible with both. Otherwise, return
19453 VOIDmode. */
19454
19455 static enum machine_mode
19456 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19457 {
19458 if (m1 == m2)
19459 return m1;
19460
19461 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19462 return VOIDmode;
19463
19464 if ((m1 == CCGCmode && m2 == CCGOCmode)
19465 || (m1 == CCGOCmode && m2 == CCGCmode))
19466 return CCGCmode;
19467
19468 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19469 return m2;
19470 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19471 return m1;
19472
19473 switch (m1)
19474 {
19475 default:
19476 gcc_unreachable ();
19477
19478 case CCmode:
19479 case CCGCmode:
19480 case CCGOCmode:
19481 case CCNOmode:
19482 case CCAmode:
19483 case CCCmode:
19484 case CCOmode:
19485 case CCSmode:
19486 case CCZmode:
19487 switch (m2)
19488 {
19489 default:
19490 return VOIDmode;
19491
19492 case CCmode:
19493 case CCGCmode:
19494 case CCGOCmode:
19495 case CCNOmode:
19496 case CCAmode:
19497 case CCCmode:
19498 case CCOmode:
19499 case CCSmode:
19500 case CCZmode:
19501 return CCmode;
19502 }
19503
19504 case CCFPmode:
19505 case CCFPUmode:
19506 /* These are only compatible with themselves, which we already
19507 checked above. */
19508 return VOIDmode;
19509 }
19510 }
19511
19512
19513 /* Return a comparison we can do and that it is equivalent to
19514 swap_condition (code) apart possibly from orderedness.
19515 But, never change orderedness if TARGET_IEEE_FP, returning
19516 UNKNOWN in that case if necessary. */
19517
19518 static enum rtx_code
19519 ix86_fp_swap_condition (enum rtx_code code)
19520 {
19521 switch (code)
19522 {
19523 case GT: /* GTU - CF=0 & ZF=0 */
19524 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19525 case GE: /* GEU - CF=0 */
19526 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19527 case UNLT: /* LTU - CF=1 */
19528 return TARGET_IEEE_FP ? UNKNOWN : GT;
19529 case UNLE: /* LEU - CF=1 | ZF=1 */
19530 return TARGET_IEEE_FP ? UNKNOWN : GE;
19531 default:
19532 return swap_condition (code);
19533 }
19534 }
19535
19536 /* Return cost of comparison CODE using the best strategy for performance.
19537 All following functions do use number of instructions as a cost metrics.
19538 In future this should be tweaked to compute bytes for optimize_size and
19539 take into account performance of various instructions on various CPUs. */
19540
19541 static int
19542 ix86_fp_comparison_cost (enum rtx_code code)
19543 {
19544 int arith_cost;
19545
19546 /* The cost of code using bit-twiddling on %ah. */
19547 switch (code)
19548 {
19549 case UNLE:
19550 case UNLT:
19551 case LTGT:
19552 case GT:
19553 case GE:
19554 case UNORDERED:
19555 case ORDERED:
19556 case UNEQ:
19557 arith_cost = 4;
19558 break;
19559 case LT:
19560 case NE:
19561 case EQ:
19562 case UNGE:
19563 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19564 break;
19565 case LE:
19566 case UNGT:
19567 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19568 break;
19569 default:
19570 gcc_unreachable ();
19571 }
19572
19573 switch (ix86_fp_comparison_strategy (code))
19574 {
19575 case IX86_FPCMP_COMI:
19576 return arith_cost > 4 ? 3 : 2;
19577 case IX86_FPCMP_SAHF:
19578 return arith_cost > 4 ? 4 : 3;
19579 default:
19580 return arith_cost;
19581 }
19582 }
19583
19584 /* Return strategy to use for floating-point. We assume that fcomi is always
19585 preferrable where available, since that is also true when looking at size
19586 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19587
19588 enum ix86_fpcmp_strategy
19589 ix86_fp_comparison_strategy (enum rtx_code)
19590 {
19591 /* Do fcomi/sahf based test when profitable. */
19592
19593 if (TARGET_CMOVE)
19594 return IX86_FPCMP_COMI;
19595
19596 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19597 return IX86_FPCMP_SAHF;
19598
19599 return IX86_FPCMP_ARITH;
19600 }
19601
19602 /* Swap, force into registers, or otherwise massage the two operands
19603 to a fp comparison. The operands are updated in place; the new
19604 comparison code is returned. */
19605
19606 static enum rtx_code
19607 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19608 {
19609 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19610 rtx op0 = *pop0, op1 = *pop1;
19611 enum machine_mode op_mode = GET_MODE (op0);
19612 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19613
19614 /* All of the unordered compare instructions only work on registers.
19615 The same is true of the fcomi compare instructions. The XFmode
19616 compare instructions require registers except when comparing
19617 against zero or when converting operand 1 from fixed point to
19618 floating point. */
19619
19620 if (!is_sse
19621 && (fpcmp_mode == CCFPUmode
19622 || (op_mode == XFmode
19623 && ! (standard_80387_constant_p (op0) == 1
19624 || standard_80387_constant_p (op1) == 1)
19625 && GET_CODE (op1) != FLOAT)
19626 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19627 {
19628 op0 = force_reg (op_mode, op0);
19629 op1 = force_reg (op_mode, op1);
19630 }
19631 else
19632 {
19633 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19634 things around if they appear profitable, otherwise force op0
19635 into a register. */
19636
19637 if (standard_80387_constant_p (op0) == 0
19638 || (MEM_P (op0)
19639 && ! (standard_80387_constant_p (op1) == 0
19640 || MEM_P (op1))))
19641 {
19642 enum rtx_code new_code = ix86_fp_swap_condition (code);
19643 if (new_code != UNKNOWN)
19644 {
19645 rtx tmp;
19646 tmp = op0, op0 = op1, op1 = tmp;
19647 code = new_code;
19648 }
19649 }
19650
19651 if (!REG_P (op0))
19652 op0 = force_reg (op_mode, op0);
19653
19654 if (CONSTANT_P (op1))
19655 {
19656 int tmp = standard_80387_constant_p (op1);
19657 if (tmp == 0)
19658 op1 = validize_mem (force_const_mem (op_mode, op1));
19659 else if (tmp == 1)
19660 {
19661 if (TARGET_CMOVE)
19662 op1 = force_reg (op_mode, op1);
19663 }
19664 else
19665 op1 = force_reg (op_mode, op1);
19666 }
19667 }
19668
19669 /* Try to rearrange the comparison to make it cheaper. */
19670 if (ix86_fp_comparison_cost (code)
19671 > ix86_fp_comparison_cost (swap_condition (code))
19672 && (REG_P (op1) || can_create_pseudo_p ()))
19673 {
19674 rtx tmp;
19675 tmp = op0, op0 = op1, op1 = tmp;
19676 code = swap_condition (code);
19677 if (!REG_P (op0))
19678 op0 = force_reg (op_mode, op0);
19679 }
19680
19681 *pop0 = op0;
19682 *pop1 = op1;
19683 return code;
19684 }
19685
19686 /* Convert comparison codes we use to represent FP comparison to integer
19687 code that will result in proper branch. Return UNKNOWN if no such code
19688 is available. */
19689
19690 enum rtx_code
19691 ix86_fp_compare_code_to_integer (enum rtx_code code)
19692 {
19693 switch (code)
19694 {
19695 case GT:
19696 return GTU;
19697 case GE:
19698 return GEU;
19699 case ORDERED:
19700 case UNORDERED:
19701 return code;
19702 break;
19703 case UNEQ:
19704 return EQ;
19705 break;
19706 case UNLT:
19707 return LTU;
19708 break;
19709 case UNLE:
19710 return LEU;
19711 break;
19712 case LTGT:
19713 return NE;
19714 break;
19715 default:
19716 return UNKNOWN;
19717 }
19718 }
19719
19720 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19721
19722 static rtx
19723 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19724 {
19725 enum machine_mode fpcmp_mode, intcmp_mode;
19726 rtx tmp, tmp2;
19727
19728 fpcmp_mode = ix86_fp_compare_mode (code);
19729 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19730
19731 /* Do fcomi/sahf based test when profitable. */
19732 switch (ix86_fp_comparison_strategy (code))
19733 {
19734 case IX86_FPCMP_COMI:
19735 intcmp_mode = fpcmp_mode;
19736 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19737 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19738 tmp);
19739 emit_insn (tmp);
19740 break;
19741
19742 case IX86_FPCMP_SAHF:
19743 intcmp_mode = fpcmp_mode;
19744 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19745 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19746 tmp);
19747
19748 if (!scratch)
19749 scratch = gen_reg_rtx (HImode);
19750 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19751 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19752 break;
19753
19754 case IX86_FPCMP_ARITH:
19755 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19756 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19757 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19758 if (!scratch)
19759 scratch = gen_reg_rtx (HImode);
19760 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19761
19762 /* In the unordered case, we have to check C2 for NaN's, which
19763 doesn't happen to work out to anything nice combination-wise.
19764 So do some bit twiddling on the value we've got in AH to come
19765 up with an appropriate set of condition codes. */
19766
19767 intcmp_mode = CCNOmode;
19768 switch (code)
19769 {
19770 case GT:
19771 case UNGT:
19772 if (code == GT || !TARGET_IEEE_FP)
19773 {
19774 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19775 code = EQ;
19776 }
19777 else
19778 {
19779 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19780 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19781 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19782 intcmp_mode = CCmode;
19783 code = GEU;
19784 }
19785 break;
19786 case LT:
19787 case UNLT:
19788 if (code == LT && TARGET_IEEE_FP)
19789 {
19790 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19791 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19792 intcmp_mode = CCmode;
19793 code = EQ;
19794 }
19795 else
19796 {
19797 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19798 code = NE;
19799 }
19800 break;
19801 case GE:
19802 case UNGE:
19803 if (code == GE || !TARGET_IEEE_FP)
19804 {
19805 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19806 code = EQ;
19807 }
19808 else
19809 {
19810 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19811 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19812 code = NE;
19813 }
19814 break;
19815 case LE:
19816 case UNLE:
19817 if (code == LE && TARGET_IEEE_FP)
19818 {
19819 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19820 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19821 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19822 intcmp_mode = CCmode;
19823 code = LTU;
19824 }
19825 else
19826 {
19827 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19828 code = NE;
19829 }
19830 break;
19831 case EQ:
19832 case UNEQ:
19833 if (code == EQ && TARGET_IEEE_FP)
19834 {
19835 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19837 intcmp_mode = CCmode;
19838 code = EQ;
19839 }
19840 else
19841 {
19842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19843 code = NE;
19844 }
19845 break;
19846 case NE:
19847 case LTGT:
19848 if (code == NE && TARGET_IEEE_FP)
19849 {
19850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19851 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19852 GEN_INT (0x40)));
19853 code = NE;
19854 }
19855 else
19856 {
19857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19858 code = EQ;
19859 }
19860 break;
19861
19862 case UNORDERED:
19863 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19864 code = NE;
19865 break;
19866 case ORDERED:
19867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19868 code = EQ;
19869 break;
19870
19871 default:
19872 gcc_unreachable ();
19873 }
19874 break;
19875
19876 default:
19877 gcc_unreachable();
19878 }
19879
19880 /* Return the test that should be put into the flags user, i.e.
19881 the bcc, scc, or cmov instruction. */
19882 return gen_rtx_fmt_ee (code, VOIDmode,
19883 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19884 const0_rtx);
19885 }
19886
19887 static rtx
19888 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19889 {
19890 rtx ret;
19891
19892 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19893 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19894
19895 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19896 {
19897 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19898 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19899 }
19900 else
19901 ret = ix86_expand_int_compare (code, op0, op1);
19902
19903 return ret;
19904 }
19905
19906 void
19907 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19908 {
19909 enum machine_mode mode = GET_MODE (op0);
19910 rtx tmp;
19911
19912 switch (mode)
19913 {
19914 case SFmode:
19915 case DFmode:
19916 case XFmode:
19917 case QImode:
19918 case HImode:
19919 case SImode:
19920 simple:
19921 tmp = ix86_expand_compare (code, op0, op1);
19922 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19923 gen_rtx_LABEL_REF (VOIDmode, label),
19924 pc_rtx);
19925 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19926 return;
19927
19928 case DImode:
19929 if (TARGET_64BIT)
19930 goto simple;
19931 case TImode:
19932 /* Expand DImode branch into multiple compare+branch. */
19933 {
19934 rtx lo[2], hi[2];
19935 rtx_code_label *label2;
19936 enum rtx_code code1, code2, code3;
19937 enum machine_mode submode;
19938
19939 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19940 {
19941 tmp = op0, op0 = op1, op1 = tmp;
19942 code = swap_condition (code);
19943 }
19944
19945 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19946 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19947
19948 submode = mode == DImode ? SImode : DImode;
19949
19950 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19951 avoid two branches. This costs one extra insn, so disable when
19952 optimizing for size. */
19953
19954 if ((code == EQ || code == NE)
19955 && (!optimize_insn_for_size_p ()
19956 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19957 {
19958 rtx xor0, xor1;
19959
19960 xor1 = hi[0];
19961 if (hi[1] != const0_rtx)
19962 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19963 NULL_RTX, 0, OPTAB_WIDEN);
19964
19965 xor0 = lo[0];
19966 if (lo[1] != const0_rtx)
19967 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19968 NULL_RTX, 0, OPTAB_WIDEN);
19969
19970 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19971 NULL_RTX, 0, OPTAB_WIDEN);
19972
19973 ix86_expand_branch (code, tmp, const0_rtx, label);
19974 return;
19975 }
19976
19977 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19978 op1 is a constant and the low word is zero, then we can just
19979 examine the high word. Similarly for low word -1 and
19980 less-or-equal-than or greater-than. */
19981
19982 if (CONST_INT_P (hi[1]))
19983 switch (code)
19984 {
19985 case LT: case LTU: case GE: case GEU:
19986 if (lo[1] == const0_rtx)
19987 {
19988 ix86_expand_branch (code, hi[0], hi[1], label);
19989 return;
19990 }
19991 break;
19992 case LE: case LEU: case GT: case GTU:
19993 if (lo[1] == constm1_rtx)
19994 {
19995 ix86_expand_branch (code, hi[0], hi[1], label);
19996 return;
19997 }
19998 break;
19999 default:
20000 break;
20001 }
20002
20003 /* Otherwise, we need two or three jumps. */
20004
20005 label2 = gen_label_rtx ();
20006
20007 code1 = code;
20008 code2 = swap_condition (code);
20009 code3 = unsigned_condition (code);
20010
20011 switch (code)
20012 {
20013 case LT: case GT: case LTU: case GTU:
20014 break;
20015
20016 case LE: code1 = LT; code2 = GT; break;
20017 case GE: code1 = GT; code2 = LT; break;
20018 case LEU: code1 = LTU; code2 = GTU; break;
20019 case GEU: code1 = GTU; code2 = LTU; break;
20020
20021 case EQ: code1 = UNKNOWN; code2 = NE; break;
20022 case NE: code2 = UNKNOWN; break;
20023
20024 default:
20025 gcc_unreachable ();
20026 }
20027
20028 /*
20029 * a < b =>
20030 * if (hi(a) < hi(b)) goto true;
20031 * if (hi(a) > hi(b)) goto false;
20032 * if (lo(a) < lo(b)) goto true;
20033 * false:
20034 */
20035
20036 if (code1 != UNKNOWN)
20037 ix86_expand_branch (code1, hi[0], hi[1], label);
20038 if (code2 != UNKNOWN)
20039 ix86_expand_branch (code2, hi[0], hi[1], label2);
20040
20041 ix86_expand_branch (code3, lo[0], lo[1], label);
20042
20043 if (code2 != UNKNOWN)
20044 emit_label (label2);
20045 return;
20046 }
20047
20048 default:
20049 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20050 goto simple;
20051 }
20052 }
20053
20054 /* Split branch based on floating point condition. */
20055 void
20056 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20057 rtx target1, rtx target2, rtx tmp)
20058 {
20059 rtx condition;
20060 rtx i;
20061
20062 if (target2 != pc_rtx)
20063 {
20064 rtx tmp = target2;
20065 code = reverse_condition_maybe_unordered (code);
20066 target2 = target1;
20067 target1 = tmp;
20068 }
20069
20070 condition = ix86_expand_fp_compare (code, op1, op2,
20071 tmp);
20072
20073 i = emit_jump_insn (gen_rtx_SET
20074 (VOIDmode, pc_rtx,
20075 gen_rtx_IF_THEN_ELSE (VOIDmode,
20076 condition, target1, target2)));
20077 if (split_branch_probability >= 0)
20078 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20079 }
20080
20081 void
20082 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20083 {
20084 rtx ret;
20085
20086 gcc_assert (GET_MODE (dest) == QImode);
20087
20088 ret = ix86_expand_compare (code, op0, op1);
20089 PUT_MODE (ret, QImode);
20090 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20091 }
20092
20093 /* Expand comparison setting or clearing carry flag. Return true when
20094 successful and set pop for the operation. */
20095 static bool
20096 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20097 {
20098 enum machine_mode mode =
20099 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20100
20101 /* Do not handle double-mode compares that go through special path. */
20102 if (mode == (TARGET_64BIT ? TImode : DImode))
20103 return false;
20104
20105 if (SCALAR_FLOAT_MODE_P (mode))
20106 {
20107 rtx compare_op;
20108 rtx_insn *compare_seq;
20109
20110 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20111
20112 /* Shortcut: following common codes never translate
20113 into carry flag compares. */
20114 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20115 || code == ORDERED || code == UNORDERED)
20116 return false;
20117
20118 /* These comparisons require zero flag; swap operands so they won't. */
20119 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20120 && !TARGET_IEEE_FP)
20121 {
20122 rtx tmp = op0;
20123 op0 = op1;
20124 op1 = tmp;
20125 code = swap_condition (code);
20126 }
20127
20128 /* Try to expand the comparison and verify that we end up with
20129 carry flag based comparison. This fails to be true only when
20130 we decide to expand comparison using arithmetic that is not
20131 too common scenario. */
20132 start_sequence ();
20133 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20134 compare_seq = get_insns ();
20135 end_sequence ();
20136
20137 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20138 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20139 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20140 else
20141 code = GET_CODE (compare_op);
20142
20143 if (code != LTU && code != GEU)
20144 return false;
20145
20146 emit_insn (compare_seq);
20147 *pop = compare_op;
20148 return true;
20149 }
20150
20151 if (!INTEGRAL_MODE_P (mode))
20152 return false;
20153
20154 switch (code)
20155 {
20156 case LTU:
20157 case GEU:
20158 break;
20159
20160 /* Convert a==0 into (unsigned)a<1. */
20161 case EQ:
20162 case NE:
20163 if (op1 != const0_rtx)
20164 return false;
20165 op1 = const1_rtx;
20166 code = (code == EQ ? LTU : GEU);
20167 break;
20168
20169 /* Convert a>b into b<a or a>=b-1. */
20170 case GTU:
20171 case LEU:
20172 if (CONST_INT_P (op1))
20173 {
20174 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20175 /* Bail out on overflow. We still can swap operands but that
20176 would force loading of the constant into register. */
20177 if (op1 == const0_rtx
20178 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20179 return false;
20180 code = (code == GTU ? GEU : LTU);
20181 }
20182 else
20183 {
20184 rtx tmp = op1;
20185 op1 = op0;
20186 op0 = tmp;
20187 code = (code == GTU ? LTU : GEU);
20188 }
20189 break;
20190
20191 /* Convert a>=0 into (unsigned)a<0x80000000. */
20192 case LT:
20193 case GE:
20194 if (mode == DImode || op1 != const0_rtx)
20195 return false;
20196 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20197 code = (code == LT ? GEU : LTU);
20198 break;
20199 case LE:
20200 case GT:
20201 if (mode == DImode || op1 != constm1_rtx)
20202 return false;
20203 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20204 code = (code == LE ? GEU : LTU);
20205 break;
20206
20207 default:
20208 return false;
20209 }
20210 /* Swapping operands may cause constant to appear as first operand. */
20211 if (!nonimmediate_operand (op0, VOIDmode))
20212 {
20213 if (!can_create_pseudo_p ())
20214 return false;
20215 op0 = force_reg (mode, op0);
20216 }
20217 *pop = ix86_expand_compare (code, op0, op1);
20218 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20219 return true;
20220 }
20221
20222 bool
20223 ix86_expand_int_movcc (rtx operands[])
20224 {
20225 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20226 rtx_insn *compare_seq;
20227 rtx compare_op;
20228 enum machine_mode mode = GET_MODE (operands[0]);
20229 bool sign_bit_compare_p = false;
20230 rtx op0 = XEXP (operands[1], 0);
20231 rtx op1 = XEXP (operands[1], 1);
20232
20233 if (GET_MODE (op0) == TImode
20234 || (GET_MODE (op0) == DImode
20235 && !TARGET_64BIT))
20236 return false;
20237
20238 start_sequence ();
20239 compare_op = ix86_expand_compare (code, op0, op1);
20240 compare_seq = get_insns ();
20241 end_sequence ();
20242
20243 compare_code = GET_CODE (compare_op);
20244
20245 if ((op1 == const0_rtx && (code == GE || code == LT))
20246 || (op1 == constm1_rtx && (code == GT || code == LE)))
20247 sign_bit_compare_p = true;
20248
20249 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20250 HImode insns, we'd be swallowed in word prefix ops. */
20251
20252 if ((mode != HImode || TARGET_FAST_PREFIX)
20253 && (mode != (TARGET_64BIT ? TImode : DImode))
20254 && CONST_INT_P (operands[2])
20255 && CONST_INT_P (operands[3]))
20256 {
20257 rtx out = operands[0];
20258 HOST_WIDE_INT ct = INTVAL (operands[2]);
20259 HOST_WIDE_INT cf = INTVAL (operands[3]);
20260 HOST_WIDE_INT diff;
20261
20262 diff = ct - cf;
20263 /* Sign bit compares are better done using shifts than we do by using
20264 sbb. */
20265 if (sign_bit_compare_p
20266 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20267 {
20268 /* Detect overlap between destination and compare sources. */
20269 rtx tmp = out;
20270
20271 if (!sign_bit_compare_p)
20272 {
20273 rtx flags;
20274 bool fpcmp = false;
20275
20276 compare_code = GET_CODE (compare_op);
20277
20278 flags = XEXP (compare_op, 0);
20279
20280 if (GET_MODE (flags) == CCFPmode
20281 || GET_MODE (flags) == CCFPUmode)
20282 {
20283 fpcmp = true;
20284 compare_code
20285 = ix86_fp_compare_code_to_integer (compare_code);
20286 }
20287
20288 /* To simplify rest of code, restrict to the GEU case. */
20289 if (compare_code == LTU)
20290 {
20291 HOST_WIDE_INT tmp = ct;
20292 ct = cf;
20293 cf = tmp;
20294 compare_code = reverse_condition (compare_code);
20295 code = reverse_condition (code);
20296 }
20297 else
20298 {
20299 if (fpcmp)
20300 PUT_CODE (compare_op,
20301 reverse_condition_maybe_unordered
20302 (GET_CODE (compare_op)));
20303 else
20304 PUT_CODE (compare_op,
20305 reverse_condition (GET_CODE (compare_op)));
20306 }
20307 diff = ct - cf;
20308
20309 if (reg_overlap_mentioned_p (out, op0)
20310 || reg_overlap_mentioned_p (out, op1))
20311 tmp = gen_reg_rtx (mode);
20312
20313 if (mode == DImode)
20314 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20315 else
20316 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20317 flags, compare_op));
20318 }
20319 else
20320 {
20321 if (code == GT || code == GE)
20322 code = reverse_condition (code);
20323 else
20324 {
20325 HOST_WIDE_INT tmp = ct;
20326 ct = cf;
20327 cf = tmp;
20328 diff = ct - cf;
20329 }
20330 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20331 }
20332
20333 if (diff == 1)
20334 {
20335 /*
20336 * cmpl op0,op1
20337 * sbbl dest,dest
20338 * [addl dest, ct]
20339 *
20340 * Size 5 - 8.
20341 */
20342 if (ct)
20343 tmp = expand_simple_binop (mode, PLUS,
20344 tmp, GEN_INT (ct),
20345 copy_rtx (tmp), 1, OPTAB_DIRECT);
20346 }
20347 else if (cf == -1)
20348 {
20349 /*
20350 * cmpl op0,op1
20351 * sbbl dest,dest
20352 * orl $ct, dest
20353 *
20354 * Size 8.
20355 */
20356 tmp = expand_simple_binop (mode, IOR,
20357 tmp, GEN_INT (ct),
20358 copy_rtx (tmp), 1, OPTAB_DIRECT);
20359 }
20360 else if (diff == -1 && ct)
20361 {
20362 /*
20363 * cmpl op0,op1
20364 * sbbl dest,dest
20365 * notl dest
20366 * [addl dest, cf]
20367 *
20368 * Size 8 - 11.
20369 */
20370 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20371 if (cf)
20372 tmp = expand_simple_binop (mode, PLUS,
20373 copy_rtx (tmp), GEN_INT (cf),
20374 copy_rtx (tmp), 1, OPTAB_DIRECT);
20375 }
20376 else
20377 {
20378 /*
20379 * cmpl op0,op1
20380 * sbbl dest,dest
20381 * [notl dest]
20382 * andl cf - ct, dest
20383 * [addl dest, ct]
20384 *
20385 * Size 8 - 11.
20386 */
20387
20388 if (cf == 0)
20389 {
20390 cf = ct;
20391 ct = 0;
20392 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20393 }
20394
20395 tmp = expand_simple_binop (mode, AND,
20396 copy_rtx (tmp),
20397 gen_int_mode (cf - ct, mode),
20398 copy_rtx (tmp), 1, OPTAB_DIRECT);
20399 if (ct)
20400 tmp = expand_simple_binop (mode, PLUS,
20401 copy_rtx (tmp), GEN_INT (ct),
20402 copy_rtx (tmp), 1, OPTAB_DIRECT);
20403 }
20404
20405 if (!rtx_equal_p (tmp, out))
20406 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20407
20408 return true;
20409 }
20410
20411 if (diff < 0)
20412 {
20413 enum machine_mode cmp_mode = GET_MODE (op0);
20414
20415 HOST_WIDE_INT tmp;
20416 tmp = ct, ct = cf, cf = tmp;
20417 diff = -diff;
20418
20419 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20420 {
20421 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20422
20423 /* We may be reversing unordered compare to normal compare, that
20424 is not valid in general (we may convert non-trapping condition
20425 to trapping one), however on i386 we currently emit all
20426 comparisons unordered. */
20427 compare_code = reverse_condition_maybe_unordered (compare_code);
20428 code = reverse_condition_maybe_unordered (code);
20429 }
20430 else
20431 {
20432 compare_code = reverse_condition (compare_code);
20433 code = reverse_condition (code);
20434 }
20435 }
20436
20437 compare_code = UNKNOWN;
20438 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20439 && CONST_INT_P (op1))
20440 {
20441 if (op1 == const0_rtx
20442 && (code == LT || code == GE))
20443 compare_code = code;
20444 else if (op1 == constm1_rtx)
20445 {
20446 if (code == LE)
20447 compare_code = LT;
20448 else if (code == GT)
20449 compare_code = GE;
20450 }
20451 }
20452
20453 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20454 if (compare_code != UNKNOWN
20455 && GET_MODE (op0) == GET_MODE (out)
20456 && (cf == -1 || ct == -1))
20457 {
20458 /* If lea code below could be used, only optimize
20459 if it results in a 2 insn sequence. */
20460
20461 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20462 || diff == 3 || diff == 5 || diff == 9)
20463 || (compare_code == LT && ct == -1)
20464 || (compare_code == GE && cf == -1))
20465 {
20466 /*
20467 * notl op1 (if necessary)
20468 * sarl $31, op1
20469 * orl cf, op1
20470 */
20471 if (ct != -1)
20472 {
20473 cf = ct;
20474 ct = -1;
20475 code = reverse_condition (code);
20476 }
20477
20478 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20479
20480 out = expand_simple_binop (mode, IOR,
20481 out, GEN_INT (cf),
20482 out, 1, OPTAB_DIRECT);
20483 if (out != operands[0])
20484 emit_move_insn (operands[0], out);
20485
20486 return true;
20487 }
20488 }
20489
20490
20491 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20492 || diff == 3 || diff == 5 || diff == 9)
20493 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20494 && (mode != DImode
20495 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20496 {
20497 /*
20498 * xorl dest,dest
20499 * cmpl op1,op2
20500 * setcc dest
20501 * lea cf(dest*(ct-cf)),dest
20502 *
20503 * Size 14.
20504 *
20505 * This also catches the degenerate setcc-only case.
20506 */
20507
20508 rtx tmp;
20509 int nops;
20510
20511 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20512
20513 nops = 0;
20514 /* On x86_64 the lea instruction operates on Pmode, so we need
20515 to get arithmetics done in proper mode to match. */
20516 if (diff == 1)
20517 tmp = copy_rtx (out);
20518 else
20519 {
20520 rtx out1;
20521 out1 = copy_rtx (out);
20522 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20523 nops++;
20524 if (diff & 1)
20525 {
20526 tmp = gen_rtx_PLUS (mode, tmp, out1);
20527 nops++;
20528 }
20529 }
20530 if (cf != 0)
20531 {
20532 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20533 nops++;
20534 }
20535 if (!rtx_equal_p (tmp, out))
20536 {
20537 if (nops == 1)
20538 out = force_operand (tmp, copy_rtx (out));
20539 else
20540 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20541 }
20542 if (!rtx_equal_p (out, operands[0]))
20543 emit_move_insn (operands[0], copy_rtx (out));
20544
20545 return true;
20546 }
20547
20548 /*
20549 * General case: Jumpful:
20550 * xorl dest,dest cmpl op1, op2
20551 * cmpl op1, op2 movl ct, dest
20552 * setcc dest jcc 1f
20553 * decl dest movl cf, dest
20554 * andl (cf-ct),dest 1:
20555 * addl ct,dest
20556 *
20557 * Size 20. Size 14.
20558 *
20559 * This is reasonably steep, but branch mispredict costs are
20560 * high on modern cpus, so consider failing only if optimizing
20561 * for space.
20562 */
20563
20564 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20565 && BRANCH_COST (optimize_insn_for_speed_p (),
20566 false) >= 2)
20567 {
20568 if (cf == 0)
20569 {
20570 enum machine_mode cmp_mode = GET_MODE (op0);
20571
20572 cf = ct;
20573 ct = 0;
20574
20575 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20576 {
20577 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20578
20579 /* We may be reversing unordered compare to normal compare,
20580 that is not valid in general (we may convert non-trapping
20581 condition to trapping one), however on i386 we currently
20582 emit all comparisons unordered. */
20583 code = reverse_condition_maybe_unordered (code);
20584 }
20585 else
20586 {
20587 code = reverse_condition (code);
20588 if (compare_code != UNKNOWN)
20589 compare_code = reverse_condition (compare_code);
20590 }
20591 }
20592
20593 if (compare_code != UNKNOWN)
20594 {
20595 /* notl op1 (if needed)
20596 sarl $31, op1
20597 andl (cf-ct), op1
20598 addl ct, op1
20599
20600 For x < 0 (resp. x <= -1) there will be no notl,
20601 so if possible swap the constants to get rid of the
20602 complement.
20603 True/false will be -1/0 while code below (store flag
20604 followed by decrement) is 0/-1, so the constants need
20605 to be exchanged once more. */
20606
20607 if (compare_code == GE || !cf)
20608 {
20609 code = reverse_condition (code);
20610 compare_code = LT;
20611 }
20612 else
20613 {
20614 HOST_WIDE_INT tmp = cf;
20615 cf = ct;
20616 ct = tmp;
20617 }
20618
20619 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20620 }
20621 else
20622 {
20623 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20624
20625 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20626 constm1_rtx,
20627 copy_rtx (out), 1, OPTAB_DIRECT);
20628 }
20629
20630 out = expand_simple_binop (mode, AND, copy_rtx (out),
20631 gen_int_mode (cf - ct, mode),
20632 copy_rtx (out), 1, OPTAB_DIRECT);
20633 if (ct)
20634 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20635 copy_rtx (out), 1, OPTAB_DIRECT);
20636 if (!rtx_equal_p (out, operands[0]))
20637 emit_move_insn (operands[0], copy_rtx (out));
20638
20639 return true;
20640 }
20641 }
20642
20643 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20644 {
20645 /* Try a few things more with specific constants and a variable. */
20646
20647 optab op;
20648 rtx var, orig_out, out, tmp;
20649
20650 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20651 return false;
20652
20653 /* If one of the two operands is an interesting constant, load a
20654 constant with the above and mask it in with a logical operation. */
20655
20656 if (CONST_INT_P (operands[2]))
20657 {
20658 var = operands[3];
20659 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20660 operands[3] = constm1_rtx, op = and_optab;
20661 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20662 operands[3] = const0_rtx, op = ior_optab;
20663 else
20664 return false;
20665 }
20666 else if (CONST_INT_P (operands[3]))
20667 {
20668 var = operands[2];
20669 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20670 operands[2] = constm1_rtx, op = and_optab;
20671 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20672 operands[2] = const0_rtx, op = ior_optab;
20673 else
20674 return false;
20675 }
20676 else
20677 return false;
20678
20679 orig_out = operands[0];
20680 tmp = gen_reg_rtx (mode);
20681 operands[0] = tmp;
20682
20683 /* Recurse to get the constant loaded. */
20684 if (ix86_expand_int_movcc (operands) == 0)
20685 return false;
20686
20687 /* Mask in the interesting variable. */
20688 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20689 OPTAB_WIDEN);
20690 if (!rtx_equal_p (out, orig_out))
20691 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20692
20693 return true;
20694 }
20695
20696 /*
20697 * For comparison with above,
20698 *
20699 * movl cf,dest
20700 * movl ct,tmp
20701 * cmpl op1,op2
20702 * cmovcc tmp,dest
20703 *
20704 * Size 15.
20705 */
20706
20707 if (! nonimmediate_operand (operands[2], mode))
20708 operands[2] = force_reg (mode, operands[2]);
20709 if (! nonimmediate_operand (operands[3], mode))
20710 operands[3] = force_reg (mode, operands[3]);
20711
20712 if (! register_operand (operands[2], VOIDmode)
20713 && (mode == QImode
20714 || ! register_operand (operands[3], VOIDmode)))
20715 operands[2] = force_reg (mode, operands[2]);
20716
20717 if (mode == QImode
20718 && ! register_operand (operands[3], VOIDmode))
20719 operands[3] = force_reg (mode, operands[3]);
20720
20721 emit_insn (compare_seq);
20722 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20723 gen_rtx_IF_THEN_ELSE (mode,
20724 compare_op, operands[2],
20725 operands[3])));
20726 return true;
20727 }
20728
20729 /* Swap, force into registers, or otherwise massage the two operands
20730 to an sse comparison with a mask result. Thus we differ a bit from
20731 ix86_prepare_fp_compare_args which expects to produce a flags result.
20732
20733 The DEST operand exists to help determine whether to commute commutative
20734 operators. The POP0/POP1 operands are updated in place. The new
20735 comparison code is returned, or UNKNOWN if not implementable. */
20736
20737 static enum rtx_code
20738 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20739 rtx *pop0, rtx *pop1)
20740 {
20741 rtx tmp;
20742
20743 switch (code)
20744 {
20745 case LTGT:
20746 case UNEQ:
20747 /* AVX supports all the needed comparisons. */
20748 if (TARGET_AVX)
20749 break;
20750 /* We have no LTGT as an operator. We could implement it with
20751 NE & ORDERED, but this requires an extra temporary. It's
20752 not clear that it's worth it. */
20753 return UNKNOWN;
20754
20755 case LT:
20756 case LE:
20757 case UNGT:
20758 case UNGE:
20759 /* These are supported directly. */
20760 break;
20761
20762 case EQ:
20763 case NE:
20764 case UNORDERED:
20765 case ORDERED:
20766 /* AVX has 3 operand comparisons, no need to swap anything. */
20767 if (TARGET_AVX)
20768 break;
20769 /* For commutative operators, try to canonicalize the destination
20770 operand to be first in the comparison - this helps reload to
20771 avoid extra moves. */
20772 if (!dest || !rtx_equal_p (dest, *pop1))
20773 break;
20774 /* FALLTHRU */
20775
20776 case GE:
20777 case GT:
20778 case UNLE:
20779 case UNLT:
20780 /* These are not supported directly before AVX, and furthermore
20781 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20782 comparison operands to transform into something that is
20783 supported. */
20784 tmp = *pop0;
20785 *pop0 = *pop1;
20786 *pop1 = tmp;
20787 code = swap_condition (code);
20788 break;
20789
20790 default:
20791 gcc_unreachable ();
20792 }
20793
20794 return code;
20795 }
20796
20797 /* Detect conditional moves that exactly match min/max operational
20798 semantics. Note that this is IEEE safe, as long as we don't
20799 interchange the operands.
20800
20801 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20802 and TRUE if the operation is successful and instructions are emitted. */
20803
20804 static bool
20805 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20806 rtx cmp_op1, rtx if_true, rtx if_false)
20807 {
20808 enum machine_mode mode;
20809 bool is_min;
20810 rtx tmp;
20811
20812 if (code == LT)
20813 ;
20814 else if (code == UNGE)
20815 {
20816 tmp = if_true;
20817 if_true = if_false;
20818 if_false = tmp;
20819 }
20820 else
20821 return false;
20822
20823 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20824 is_min = true;
20825 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20826 is_min = false;
20827 else
20828 return false;
20829
20830 mode = GET_MODE (dest);
20831
20832 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20833 but MODE may be a vector mode and thus not appropriate. */
20834 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20835 {
20836 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20837 rtvec v;
20838
20839 if_true = force_reg (mode, if_true);
20840 v = gen_rtvec (2, if_true, if_false);
20841 tmp = gen_rtx_UNSPEC (mode, v, u);
20842 }
20843 else
20844 {
20845 code = is_min ? SMIN : SMAX;
20846 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20847 }
20848
20849 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20850 return true;
20851 }
20852
20853 /* Expand an sse vector comparison. Return the register with the result. */
20854
20855 static rtx
20856 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20857 rtx op_true, rtx op_false)
20858 {
20859 enum machine_mode mode = GET_MODE (dest);
20860 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20861
20862 /* In general case result of comparison can differ from operands' type. */
20863 enum machine_mode cmp_mode;
20864
20865 /* In AVX512F the result of comparison is an integer mask. */
20866 bool maskcmp = false;
20867 rtx x;
20868
20869 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20870 {
20871 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20872 gcc_assert (cmp_mode != BLKmode);
20873
20874 maskcmp = true;
20875 }
20876 else
20877 cmp_mode = cmp_ops_mode;
20878
20879
20880 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20881 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20882 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20883
20884 if (optimize
20885 || reg_overlap_mentioned_p (dest, op_true)
20886 || reg_overlap_mentioned_p (dest, op_false))
20887 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20888
20889 /* Compare patterns for int modes are unspec in AVX512F only. */
20890 if (maskcmp && (code == GT || code == EQ))
20891 {
20892 rtx (*gen)(rtx, rtx, rtx);
20893
20894 switch (cmp_ops_mode)
20895 {
20896 case V16SImode:
20897 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20898 break;
20899 case V8DImode:
20900 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20901 break;
20902 default:
20903 gen = NULL;
20904 }
20905
20906 if (gen)
20907 {
20908 emit_insn (gen (dest, cmp_op0, cmp_op1));
20909 return dest;
20910 }
20911 }
20912 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20913
20914 if (cmp_mode != mode && !maskcmp)
20915 {
20916 x = force_reg (cmp_ops_mode, x);
20917 convert_move (dest, x, false);
20918 }
20919 else
20920 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20921
20922 return dest;
20923 }
20924
20925 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20926 operations. This is used for both scalar and vector conditional moves. */
20927
20928 static void
20929 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20930 {
20931 enum machine_mode mode = GET_MODE (dest);
20932 enum machine_mode cmpmode = GET_MODE (cmp);
20933
20934 /* In AVX512F the result of comparison is an integer mask. */
20935 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20936
20937 rtx t2, t3, x;
20938
20939 if (vector_all_ones_operand (op_true, mode)
20940 && rtx_equal_p (op_false, CONST0_RTX (mode))
20941 && !maskcmp)
20942 {
20943 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20944 }
20945 else if (op_false == CONST0_RTX (mode)
20946 && !maskcmp)
20947 {
20948 op_true = force_reg (mode, op_true);
20949 x = gen_rtx_AND (mode, cmp, op_true);
20950 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20951 }
20952 else if (op_true == CONST0_RTX (mode)
20953 && !maskcmp)
20954 {
20955 op_false = force_reg (mode, op_false);
20956 x = gen_rtx_NOT (mode, cmp);
20957 x = gen_rtx_AND (mode, x, op_false);
20958 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20959 }
20960 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20961 && !maskcmp)
20962 {
20963 op_false = force_reg (mode, op_false);
20964 x = gen_rtx_IOR (mode, cmp, op_false);
20965 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20966 }
20967 else if (TARGET_XOP
20968 && !maskcmp)
20969 {
20970 op_true = force_reg (mode, op_true);
20971
20972 if (!nonimmediate_operand (op_false, mode))
20973 op_false = force_reg (mode, op_false);
20974
20975 emit_insn (gen_rtx_SET (mode, dest,
20976 gen_rtx_IF_THEN_ELSE (mode, cmp,
20977 op_true,
20978 op_false)));
20979 }
20980 else
20981 {
20982 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20983 rtx d = dest;
20984
20985 if (!nonimmediate_operand (op_true, mode))
20986 op_true = force_reg (mode, op_true);
20987
20988 op_false = force_reg (mode, op_false);
20989
20990 switch (mode)
20991 {
20992 case V4SFmode:
20993 if (TARGET_SSE4_1)
20994 gen = gen_sse4_1_blendvps;
20995 break;
20996 case V2DFmode:
20997 if (TARGET_SSE4_1)
20998 gen = gen_sse4_1_blendvpd;
20999 break;
21000 case V16QImode:
21001 case V8HImode:
21002 case V4SImode:
21003 case V2DImode:
21004 if (TARGET_SSE4_1)
21005 {
21006 gen = gen_sse4_1_pblendvb;
21007 if (mode != V16QImode)
21008 d = gen_reg_rtx (V16QImode);
21009 op_false = gen_lowpart (V16QImode, op_false);
21010 op_true = gen_lowpart (V16QImode, op_true);
21011 cmp = gen_lowpart (V16QImode, cmp);
21012 }
21013 break;
21014 case V8SFmode:
21015 if (TARGET_AVX)
21016 gen = gen_avx_blendvps256;
21017 break;
21018 case V4DFmode:
21019 if (TARGET_AVX)
21020 gen = gen_avx_blendvpd256;
21021 break;
21022 case V32QImode:
21023 case V16HImode:
21024 case V8SImode:
21025 case V4DImode:
21026 if (TARGET_AVX2)
21027 {
21028 gen = gen_avx2_pblendvb;
21029 if (mode != V32QImode)
21030 d = gen_reg_rtx (V32QImode);
21031 op_false = gen_lowpart (V32QImode, op_false);
21032 op_true = gen_lowpart (V32QImode, op_true);
21033 cmp = gen_lowpart (V32QImode, cmp);
21034 }
21035 break;
21036
21037 case V16SImode:
21038 gen = gen_avx512f_blendmv16si;
21039 break;
21040 case V8DImode:
21041 gen = gen_avx512f_blendmv8di;
21042 break;
21043 case V8DFmode:
21044 gen = gen_avx512f_blendmv8df;
21045 break;
21046 case V16SFmode:
21047 gen = gen_avx512f_blendmv16sf;
21048 break;
21049
21050 default:
21051 break;
21052 }
21053
21054 if (gen != NULL)
21055 {
21056 emit_insn (gen (d, op_false, op_true, cmp));
21057 if (d != dest)
21058 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21059 }
21060 else
21061 {
21062 op_true = force_reg (mode, op_true);
21063
21064 t2 = gen_reg_rtx (mode);
21065 if (optimize)
21066 t3 = gen_reg_rtx (mode);
21067 else
21068 t3 = dest;
21069
21070 x = gen_rtx_AND (mode, op_true, cmp);
21071 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21072
21073 x = gen_rtx_NOT (mode, cmp);
21074 x = gen_rtx_AND (mode, x, op_false);
21075 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21076
21077 x = gen_rtx_IOR (mode, t3, t2);
21078 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21079 }
21080 }
21081 }
21082
21083 /* Expand a floating-point conditional move. Return true if successful. */
21084
21085 bool
21086 ix86_expand_fp_movcc (rtx operands[])
21087 {
21088 enum machine_mode mode = GET_MODE (operands[0]);
21089 enum rtx_code code = GET_CODE (operands[1]);
21090 rtx tmp, compare_op;
21091 rtx op0 = XEXP (operands[1], 0);
21092 rtx op1 = XEXP (operands[1], 1);
21093
21094 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21095 {
21096 enum machine_mode cmode;
21097
21098 /* Since we've no cmove for sse registers, don't force bad register
21099 allocation just to gain access to it. Deny movcc when the
21100 comparison mode doesn't match the move mode. */
21101 cmode = GET_MODE (op0);
21102 if (cmode == VOIDmode)
21103 cmode = GET_MODE (op1);
21104 if (cmode != mode)
21105 return false;
21106
21107 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21108 if (code == UNKNOWN)
21109 return false;
21110
21111 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21112 operands[2], operands[3]))
21113 return true;
21114
21115 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21116 operands[2], operands[3]);
21117 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21118 return true;
21119 }
21120
21121 if (GET_MODE (op0) == TImode
21122 || (GET_MODE (op0) == DImode
21123 && !TARGET_64BIT))
21124 return false;
21125
21126 /* The floating point conditional move instructions don't directly
21127 support conditions resulting from a signed integer comparison. */
21128
21129 compare_op = ix86_expand_compare (code, op0, op1);
21130 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21131 {
21132 tmp = gen_reg_rtx (QImode);
21133 ix86_expand_setcc (tmp, code, op0, op1);
21134
21135 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21136 }
21137
21138 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21139 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21140 operands[2], operands[3])));
21141
21142 return true;
21143 }
21144
21145 /* Expand a floating-point vector conditional move; a vcond operation
21146 rather than a movcc operation. */
21147
21148 bool
21149 ix86_expand_fp_vcond (rtx operands[])
21150 {
21151 enum rtx_code code = GET_CODE (operands[3]);
21152 rtx cmp;
21153
21154 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21155 &operands[4], &operands[5]);
21156 if (code == UNKNOWN)
21157 {
21158 rtx temp;
21159 switch (GET_CODE (operands[3]))
21160 {
21161 case LTGT:
21162 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21163 operands[5], operands[0], operands[0]);
21164 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21165 operands[5], operands[1], operands[2]);
21166 code = AND;
21167 break;
21168 case UNEQ:
21169 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21170 operands[5], operands[0], operands[0]);
21171 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21172 operands[5], operands[1], operands[2]);
21173 code = IOR;
21174 break;
21175 default:
21176 gcc_unreachable ();
21177 }
21178 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21179 OPTAB_DIRECT);
21180 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21181 return true;
21182 }
21183
21184 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21185 operands[5], operands[1], operands[2]))
21186 return true;
21187
21188 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21189 operands[1], operands[2]);
21190 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21191 return true;
21192 }
21193
21194 /* Expand a signed/unsigned integral vector conditional move. */
21195
21196 bool
21197 ix86_expand_int_vcond (rtx operands[])
21198 {
21199 enum machine_mode data_mode = GET_MODE (operands[0]);
21200 enum machine_mode mode = GET_MODE (operands[4]);
21201 enum rtx_code code = GET_CODE (operands[3]);
21202 bool negate = false;
21203 rtx x, cop0, cop1;
21204
21205 cop0 = operands[4];
21206 cop1 = operands[5];
21207
21208 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21209 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21210 if ((code == LT || code == GE)
21211 && data_mode == mode
21212 && cop1 == CONST0_RTX (mode)
21213 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21214 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21215 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21216 && (GET_MODE_SIZE (data_mode) == 16
21217 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21218 {
21219 rtx negop = operands[2 - (code == LT)];
21220 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21221 if (negop == CONST1_RTX (data_mode))
21222 {
21223 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21224 operands[0], 1, OPTAB_DIRECT);
21225 if (res != operands[0])
21226 emit_move_insn (operands[0], res);
21227 return true;
21228 }
21229 else if (GET_MODE_INNER (data_mode) != DImode
21230 && vector_all_ones_operand (negop, data_mode))
21231 {
21232 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21233 operands[0], 0, OPTAB_DIRECT);
21234 if (res != operands[0])
21235 emit_move_insn (operands[0], res);
21236 return true;
21237 }
21238 }
21239
21240 if (!nonimmediate_operand (cop1, mode))
21241 cop1 = force_reg (mode, cop1);
21242 if (!general_operand (operands[1], data_mode))
21243 operands[1] = force_reg (data_mode, operands[1]);
21244 if (!general_operand (operands[2], data_mode))
21245 operands[2] = force_reg (data_mode, operands[2]);
21246
21247 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21248 if (TARGET_XOP
21249 && (mode == V16QImode || mode == V8HImode
21250 || mode == V4SImode || mode == V2DImode))
21251 ;
21252 else
21253 {
21254 /* Canonicalize the comparison to EQ, GT, GTU. */
21255 switch (code)
21256 {
21257 case EQ:
21258 case GT:
21259 case GTU:
21260 break;
21261
21262 case NE:
21263 case LE:
21264 case LEU:
21265 code = reverse_condition (code);
21266 negate = true;
21267 break;
21268
21269 case GE:
21270 case GEU:
21271 code = reverse_condition (code);
21272 negate = true;
21273 /* FALLTHRU */
21274
21275 case LT:
21276 case LTU:
21277 code = swap_condition (code);
21278 x = cop0, cop0 = cop1, cop1 = x;
21279 break;
21280
21281 default:
21282 gcc_unreachable ();
21283 }
21284
21285 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21286 if (mode == V2DImode)
21287 {
21288 switch (code)
21289 {
21290 case EQ:
21291 /* SSE4.1 supports EQ. */
21292 if (!TARGET_SSE4_1)
21293 return false;
21294 break;
21295
21296 case GT:
21297 case GTU:
21298 /* SSE4.2 supports GT/GTU. */
21299 if (!TARGET_SSE4_2)
21300 return false;
21301 break;
21302
21303 default:
21304 gcc_unreachable ();
21305 }
21306 }
21307
21308 /* Unsigned parallel compare is not supported by the hardware.
21309 Play some tricks to turn this into a signed comparison
21310 against 0. */
21311 if (code == GTU)
21312 {
21313 cop0 = force_reg (mode, cop0);
21314
21315 switch (mode)
21316 {
21317 case V16SImode:
21318 case V8DImode:
21319 case V8SImode:
21320 case V4DImode:
21321 case V4SImode:
21322 case V2DImode:
21323 {
21324 rtx t1, t2, mask;
21325 rtx (*gen_sub3) (rtx, rtx, rtx);
21326
21327 switch (mode)
21328 {
21329 case V16SImode: gen_sub3 = gen_subv16si3; break;
21330 case V8DImode: gen_sub3 = gen_subv8di3; break;
21331 case V8SImode: gen_sub3 = gen_subv8si3; break;
21332 case V4DImode: gen_sub3 = gen_subv4di3; break;
21333 case V4SImode: gen_sub3 = gen_subv4si3; break;
21334 case V2DImode: gen_sub3 = gen_subv2di3; break;
21335 default:
21336 gcc_unreachable ();
21337 }
21338 /* Subtract (-(INT MAX) - 1) from both operands to make
21339 them signed. */
21340 mask = ix86_build_signbit_mask (mode, true, false);
21341 t1 = gen_reg_rtx (mode);
21342 emit_insn (gen_sub3 (t1, cop0, mask));
21343
21344 t2 = gen_reg_rtx (mode);
21345 emit_insn (gen_sub3 (t2, cop1, mask));
21346
21347 cop0 = t1;
21348 cop1 = t2;
21349 code = GT;
21350 }
21351 break;
21352
21353 case V32QImode:
21354 case V16HImode:
21355 case V16QImode:
21356 case V8HImode:
21357 /* Perform a parallel unsigned saturating subtraction. */
21358 x = gen_reg_rtx (mode);
21359 emit_insn (gen_rtx_SET (VOIDmode, x,
21360 gen_rtx_US_MINUS (mode, cop0, cop1)));
21361
21362 cop0 = x;
21363 cop1 = CONST0_RTX (mode);
21364 code = EQ;
21365 negate = !negate;
21366 break;
21367
21368 default:
21369 gcc_unreachable ();
21370 }
21371 }
21372 }
21373
21374 /* Allow the comparison to be done in one mode, but the movcc to
21375 happen in another mode. */
21376 if (data_mode == mode)
21377 {
21378 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21379 operands[1+negate], operands[2-negate]);
21380 }
21381 else
21382 {
21383 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21384 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21385 operands[1+negate], operands[2-negate]);
21386 if (GET_MODE (x) == mode)
21387 x = gen_lowpart (data_mode, x);
21388 }
21389
21390 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21391 operands[2-negate]);
21392 return true;
21393 }
21394
21395 static bool
21396 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21397 {
21398 enum machine_mode mode = GET_MODE (op0);
21399 switch (mode)
21400 {
21401 case V16SImode:
21402 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21403 force_reg (V16SImode, mask),
21404 op1));
21405 return true;
21406 case V16SFmode:
21407 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21408 force_reg (V16SImode, mask),
21409 op1));
21410 return true;
21411 case V8DImode:
21412 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21413 force_reg (V8DImode, mask), op1));
21414 return true;
21415 case V8DFmode:
21416 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21417 force_reg (V8DImode, mask), op1));
21418 return true;
21419 default:
21420 return false;
21421 }
21422 }
21423
21424 /* Expand a variable vector permutation. */
21425
21426 void
21427 ix86_expand_vec_perm (rtx operands[])
21428 {
21429 rtx target = operands[0];
21430 rtx op0 = operands[1];
21431 rtx op1 = operands[2];
21432 rtx mask = operands[3];
21433 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21434 enum machine_mode mode = GET_MODE (op0);
21435 enum machine_mode maskmode = GET_MODE (mask);
21436 int w, e, i;
21437 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21438
21439 /* Number of elements in the vector. */
21440 w = GET_MODE_NUNITS (mode);
21441 e = GET_MODE_UNIT_SIZE (mode);
21442 gcc_assert (w <= 64);
21443
21444 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21445 return;
21446
21447 if (TARGET_AVX2)
21448 {
21449 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21450 {
21451 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21452 an constant shuffle operand. With a tiny bit of effort we can
21453 use VPERMD instead. A re-interpretation stall for V4DFmode is
21454 unfortunate but there's no avoiding it.
21455 Similarly for V16HImode we don't have instructions for variable
21456 shuffling, while for V32QImode we can use after preparing suitable
21457 masks vpshufb; vpshufb; vpermq; vpor. */
21458
21459 if (mode == V16HImode)
21460 {
21461 maskmode = mode = V32QImode;
21462 w = 32;
21463 e = 1;
21464 }
21465 else
21466 {
21467 maskmode = mode = V8SImode;
21468 w = 8;
21469 e = 4;
21470 }
21471 t1 = gen_reg_rtx (maskmode);
21472
21473 /* Replicate the low bits of the V4DImode mask into V8SImode:
21474 mask = { A B C D }
21475 t1 = { A A B B C C D D }. */
21476 for (i = 0; i < w / 2; ++i)
21477 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21478 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21479 vt = force_reg (maskmode, vt);
21480 mask = gen_lowpart (maskmode, mask);
21481 if (maskmode == V8SImode)
21482 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21483 else
21484 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21485
21486 /* Multiply the shuffle indicies by two. */
21487 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21488 OPTAB_DIRECT);
21489
21490 /* Add one to the odd shuffle indicies:
21491 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21492 for (i = 0; i < w / 2; ++i)
21493 {
21494 vec[i * 2] = const0_rtx;
21495 vec[i * 2 + 1] = const1_rtx;
21496 }
21497 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21498 vt = validize_mem (force_const_mem (maskmode, vt));
21499 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21500 OPTAB_DIRECT);
21501
21502 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21503 operands[3] = mask = t1;
21504 target = gen_reg_rtx (mode);
21505 op0 = gen_lowpart (mode, op0);
21506 op1 = gen_lowpart (mode, op1);
21507 }
21508
21509 switch (mode)
21510 {
21511 case V8SImode:
21512 /* The VPERMD and VPERMPS instructions already properly ignore
21513 the high bits of the shuffle elements. No need for us to
21514 perform an AND ourselves. */
21515 if (one_operand_shuffle)
21516 {
21517 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21518 if (target != operands[0])
21519 emit_move_insn (operands[0],
21520 gen_lowpart (GET_MODE (operands[0]), target));
21521 }
21522 else
21523 {
21524 t1 = gen_reg_rtx (V8SImode);
21525 t2 = gen_reg_rtx (V8SImode);
21526 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21527 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21528 goto merge_two;
21529 }
21530 return;
21531
21532 case V8SFmode:
21533 mask = gen_lowpart (V8SImode, mask);
21534 if (one_operand_shuffle)
21535 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21536 else
21537 {
21538 t1 = gen_reg_rtx (V8SFmode);
21539 t2 = gen_reg_rtx (V8SFmode);
21540 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21541 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21542 goto merge_two;
21543 }
21544 return;
21545
21546 case V4SImode:
21547 /* By combining the two 128-bit input vectors into one 256-bit
21548 input vector, we can use VPERMD and VPERMPS for the full
21549 two-operand shuffle. */
21550 t1 = gen_reg_rtx (V8SImode);
21551 t2 = gen_reg_rtx (V8SImode);
21552 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21553 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21554 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21555 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21556 return;
21557
21558 case V4SFmode:
21559 t1 = gen_reg_rtx (V8SFmode);
21560 t2 = gen_reg_rtx (V8SImode);
21561 mask = gen_lowpart (V4SImode, mask);
21562 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21563 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21564 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21565 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21566 return;
21567
21568 case V32QImode:
21569 t1 = gen_reg_rtx (V32QImode);
21570 t2 = gen_reg_rtx (V32QImode);
21571 t3 = gen_reg_rtx (V32QImode);
21572 vt2 = GEN_INT (-128);
21573 for (i = 0; i < 32; i++)
21574 vec[i] = vt2;
21575 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21576 vt = force_reg (V32QImode, vt);
21577 for (i = 0; i < 32; i++)
21578 vec[i] = i < 16 ? vt2 : const0_rtx;
21579 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21580 vt2 = force_reg (V32QImode, vt2);
21581 /* From mask create two adjusted masks, which contain the same
21582 bits as mask in the low 7 bits of each vector element.
21583 The first mask will have the most significant bit clear
21584 if it requests element from the same 128-bit lane
21585 and MSB set if it requests element from the other 128-bit lane.
21586 The second mask will have the opposite values of the MSB,
21587 and additionally will have its 128-bit lanes swapped.
21588 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21589 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21590 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21591 stands for other 12 bytes. */
21592 /* The bit whether element is from the same lane or the other
21593 lane is bit 4, so shift it up by 3 to the MSB position. */
21594 t5 = gen_reg_rtx (V4DImode);
21595 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21596 GEN_INT (3)));
21597 /* Clear MSB bits from the mask just in case it had them set. */
21598 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21599 /* After this t1 will have MSB set for elements from other lane. */
21600 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21601 /* Clear bits other than MSB. */
21602 emit_insn (gen_andv32qi3 (t1, t1, vt));
21603 /* Or in the lower bits from mask into t3. */
21604 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21605 /* And invert MSB bits in t1, so MSB is set for elements from the same
21606 lane. */
21607 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21608 /* Swap 128-bit lanes in t3. */
21609 t6 = gen_reg_rtx (V4DImode);
21610 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21611 const2_rtx, GEN_INT (3),
21612 const0_rtx, const1_rtx));
21613 /* And or in the lower bits from mask into t1. */
21614 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21615 if (one_operand_shuffle)
21616 {
21617 /* Each of these shuffles will put 0s in places where
21618 element from the other 128-bit lane is needed, otherwise
21619 will shuffle in the requested value. */
21620 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21621 gen_lowpart (V32QImode, t6)));
21622 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21623 /* For t3 the 128-bit lanes are swapped again. */
21624 t7 = gen_reg_rtx (V4DImode);
21625 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21626 const2_rtx, GEN_INT (3),
21627 const0_rtx, const1_rtx));
21628 /* And oring both together leads to the result. */
21629 emit_insn (gen_iorv32qi3 (target, t1,
21630 gen_lowpart (V32QImode, t7)));
21631 if (target != operands[0])
21632 emit_move_insn (operands[0],
21633 gen_lowpart (GET_MODE (operands[0]), target));
21634 return;
21635 }
21636
21637 t4 = gen_reg_rtx (V32QImode);
21638 /* Similarly to the above one_operand_shuffle code,
21639 just for repeated twice for each operand. merge_two:
21640 code will merge the two results together. */
21641 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21642 gen_lowpart (V32QImode, t6)));
21643 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21644 gen_lowpart (V32QImode, t6)));
21645 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21646 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21647 t7 = gen_reg_rtx (V4DImode);
21648 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21649 const2_rtx, GEN_INT (3),
21650 const0_rtx, const1_rtx));
21651 t8 = gen_reg_rtx (V4DImode);
21652 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21653 const2_rtx, GEN_INT (3),
21654 const0_rtx, const1_rtx));
21655 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21656 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21657 t1 = t4;
21658 t2 = t3;
21659 goto merge_two;
21660
21661 default:
21662 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21663 break;
21664 }
21665 }
21666
21667 if (TARGET_XOP)
21668 {
21669 /* The XOP VPPERM insn supports three inputs. By ignoring the
21670 one_operand_shuffle special case, we avoid creating another
21671 set of constant vectors in memory. */
21672 one_operand_shuffle = false;
21673
21674 /* mask = mask & {2*w-1, ...} */
21675 vt = GEN_INT (2*w - 1);
21676 }
21677 else
21678 {
21679 /* mask = mask & {w-1, ...} */
21680 vt = GEN_INT (w - 1);
21681 }
21682
21683 for (i = 0; i < w; i++)
21684 vec[i] = vt;
21685 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21686 mask = expand_simple_binop (maskmode, AND, mask, vt,
21687 NULL_RTX, 0, OPTAB_DIRECT);
21688
21689 /* For non-QImode operations, convert the word permutation control
21690 into a byte permutation control. */
21691 if (mode != V16QImode)
21692 {
21693 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21694 GEN_INT (exact_log2 (e)),
21695 NULL_RTX, 0, OPTAB_DIRECT);
21696
21697 /* Convert mask to vector of chars. */
21698 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21699
21700 /* Replicate each of the input bytes into byte positions:
21701 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21702 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21703 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21704 for (i = 0; i < 16; ++i)
21705 vec[i] = GEN_INT (i/e * e);
21706 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21707 vt = validize_mem (force_const_mem (V16QImode, vt));
21708 if (TARGET_XOP)
21709 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21710 else
21711 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21712
21713 /* Convert it into the byte positions by doing
21714 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21715 for (i = 0; i < 16; ++i)
21716 vec[i] = GEN_INT (i % e);
21717 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21718 vt = validize_mem (force_const_mem (V16QImode, vt));
21719 emit_insn (gen_addv16qi3 (mask, mask, vt));
21720 }
21721
21722 /* The actual shuffle operations all operate on V16QImode. */
21723 op0 = gen_lowpart (V16QImode, op0);
21724 op1 = gen_lowpart (V16QImode, op1);
21725
21726 if (TARGET_XOP)
21727 {
21728 if (GET_MODE (target) != V16QImode)
21729 target = gen_reg_rtx (V16QImode);
21730 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21731 if (target != operands[0])
21732 emit_move_insn (operands[0],
21733 gen_lowpart (GET_MODE (operands[0]), target));
21734 }
21735 else if (one_operand_shuffle)
21736 {
21737 if (GET_MODE (target) != V16QImode)
21738 target = gen_reg_rtx (V16QImode);
21739 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21740 if (target != operands[0])
21741 emit_move_insn (operands[0],
21742 gen_lowpart (GET_MODE (operands[0]), target));
21743 }
21744 else
21745 {
21746 rtx xops[6];
21747 bool ok;
21748
21749 /* Shuffle the two input vectors independently. */
21750 t1 = gen_reg_rtx (V16QImode);
21751 t2 = gen_reg_rtx (V16QImode);
21752 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21753 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21754
21755 merge_two:
21756 /* Then merge them together. The key is whether any given control
21757 element contained a bit set that indicates the second word. */
21758 mask = operands[3];
21759 vt = GEN_INT (w);
21760 if (maskmode == V2DImode && !TARGET_SSE4_1)
21761 {
21762 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21763 more shuffle to convert the V2DI input mask into a V4SI
21764 input mask. At which point the masking that expand_int_vcond
21765 will work as desired. */
21766 rtx t3 = gen_reg_rtx (V4SImode);
21767 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21768 const0_rtx, const0_rtx,
21769 const2_rtx, const2_rtx));
21770 mask = t3;
21771 maskmode = V4SImode;
21772 e = w = 4;
21773 }
21774
21775 for (i = 0; i < w; i++)
21776 vec[i] = vt;
21777 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21778 vt = force_reg (maskmode, vt);
21779 mask = expand_simple_binop (maskmode, AND, mask, vt,
21780 NULL_RTX, 0, OPTAB_DIRECT);
21781
21782 if (GET_MODE (target) != mode)
21783 target = gen_reg_rtx (mode);
21784 xops[0] = target;
21785 xops[1] = gen_lowpart (mode, t2);
21786 xops[2] = gen_lowpart (mode, t1);
21787 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21788 xops[4] = mask;
21789 xops[5] = vt;
21790 ok = ix86_expand_int_vcond (xops);
21791 gcc_assert (ok);
21792 if (target != operands[0])
21793 emit_move_insn (operands[0],
21794 gen_lowpart (GET_MODE (operands[0]), target));
21795 }
21796 }
21797
21798 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21799 true if we should do zero extension, else sign extension. HIGH_P is
21800 true if we want the N/2 high elements, else the low elements. */
21801
21802 void
21803 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21804 {
21805 enum machine_mode imode = GET_MODE (src);
21806 rtx tmp;
21807
21808 if (TARGET_SSE4_1)
21809 {
21810 rtx (*unpack)(rtx, rtx);
21811 rtx (*extract)(rtx, rtx) = NULL;
21812 enum machine_mode halfmode = BLKmode;
21813
21814 switch (imode)
21815 {
21816 case V32QImode:
21817 if (unsigned_p)
21818 unpack = gen_avx2_zero_extendv16qiv16hi2;
21819 else
21820 unpack = gen_avx2_sign_extendv16qiv16hi2;
21821 halfmode = V16QImode;
21822 extract
21823 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21824 break;
21825 case V32HImode:
21826 if (unsigned_p)
21827 unpack = gen_avx512f_zero_extendv16hiv16si2;
21828 else
21829 unpack = gen_avx512f_sign_extendv16hiv16si2;
21830 halfmode = V16HImode;
21831 extract
21832 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21833 break;
21834 case V16HImode:
21835 if (unsigned_p)
21836 unpack = gen_avx2_zero_extendv8hiv8si2;
21837 else
21838 unpack = gen_avx2_sign_extendv8hiv8si2;
21839 halfmode = V8HImode;
21840 extract
21841 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21842 break;
21843 case V16SImode:
21844 if (unsigned_p)
21845 unpack = gen_avx512f_zero_extendv8siv8di2;
21846 else
21847 unpack = gen_avx512f_sign_extendv8siv8di2;
21848 halfmode = V8SImode;
21849 extract
21850 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21851 break;
21852 case V8SImode:
21853 if (unsigned_p)
21854 unpack = gen_avx2_zero_extendv4siv4di2;
21855 else
21856 unpack = gen_avx2_sign_extendv4siv4di2;
21857 halfmode = V4SImode;
21858 extract
21859 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21860 break;
21861 case V16QImode:
21862 if (unsigned_p)
21863 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21864 else
21865 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21866 break;
21867 case V8HImode:
21868 if (unsigned_p)
21869 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21870 else
21871 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21872 break;
21873 case V4SImode:
21874 if (unsigned_p)
21875 unpack = gen_sse4_1_zero_extendv2siv2di2;
21876 else
21877 unpack = gen_sse4_1_sign_extendv2siv2di2;
21878 break;
21879 default:
21880 gcc_unreachable ();
21881 }
21882
21883 if (GET_MODE_SIZE (imode) >= 32)
21884 {
21885 tmp = gen_reg_rtx (halfmode);
21886 emit_insn (extract (tmp, src));
21887 }
21888 else if (high_p)
21889 {
21890 /* Shift higher 8 bytes to lower 8 bytes. */
21891 tmp = gen_reg_rtx (V1TImode);
21892 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21893 GEN_INT (64)));
21894 tmp = gen_lowpart (imode, tmp);
21895 }
21896 else
21897 tmp = src;
21898
21899 emit_insn (unpack (dest, tmp));
21900 }
21901 else
21902 {
21903 rtx (*unpack)(rtx, rtx, rtx);
21904
21905 switch (imode)
21906 {
21907 case V16QImode:
21908 if (high_p)
21909 unpack = gen_vec_interleave_highv16qi;
21910 else
21911 unpack = gen_vec_interleave_lowv16qi;
21912 break;
21913 case V8HImode:
21914 if (high_p)
21915 unpack = gen_vec_interleave_highv8hi;
21916 else
21917 unpack = gen_vec_interleave_lowv8hi;
21918 break;
21919 case V4SImode:
21920 if (high_p)
21921 unpack = gen_vec_interleave_highv4si;
21922 else
21923 unpack = gen_vec_interleave_lowv4si;
21924 break;
21925 default:
21926 gcc_unreachable ();
21927 }
21928
21929 if (unsigned_p)
21930 tmp = force_reg (imode, CONST0_RTX (imode));
21931 else
21932 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21933 src, pc_rtx, pc_rtx);
21934
21935 rtx tmp2 = gen_reg_rtx (imode);
21936 emit_insn (unpack (tmp2, src, tmp));
21937 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21938 }
21939 }
21940
21941 /* Expand conditional increment or decrement using adb/sbb instructions.
21942 The default case using setcc followed by the conditional move can be
21943 done by generic code. */
21944 bool
21945 ix86_expand_int_addcc (rtx operands[])
21946 {
21947 enum rtx_code code = GET_CODE (operands[1]);
21948 rtx flags;
21949 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21950 rtx compare_op;
21951 rtx val = const0_rtx;
21952 bool fpcmp = false;
21953 enum machine_mode mode;
21954 rtx op0 = XEXP (operands[1], 0);
21955 rtx op1 = XEXP (operands[1], 1);
21956
21957 if (operands[3] != const1_rtx
21958 && operands[3] != constm1_rtx)
21959 return false;
21960 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21961 return false;
21962 code = GET_CODE (compare_op);
21963
21964 flags = XEXP (compare_op, 0);
21965
21966 if (GET_MODE (flags) == CCFPmode
21967 || GET_MODE (flags) == CCFPUmode)
21968 {
21969 fpcmp = true;
21970 code = ix86_fp_compare_code_to_integer (code);
21971 }
21972
21973 if (code != LTU)
21974 {
21975 val = constm1_rtx;
21976 if (fpcmp)
21977 PUT_CODE (compare_op,
21978 reverse_condition_maybe_unordered
21979 (GET_CODE (compare_op)));
21980 else
21981 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21982 }
21983
21984 mode = GET_MODE (operands[0]);
21985
21986 /* Construct either adc or sbb insn. */
21987 if ((code == LTU) == (operands[3] == constm1_rtx))
21988 {
21989 switch (mode)
21990 {
21991 case QImode:
21992 insn = gen_subqi3_carry;
21993 break;
21994 case HImode:
21995 insn = gen_subhi3_carry;
21996 break;
21997 case SImode:
21998 insn = gen_subsi3_carry;
21999 break;
22000 case DImode:
22001 insn = gen_subdi3_carry;
22002 break;
22003 default:
22004 gcc_unreachable ();
22005 }
22006 }
22007 else
22008 {
22009 switch (mode)
22010 {
22011 case QImode:
22012 insn = gen_addqi3_carry;
22013 break;
22014 case HImode:
22015 insn = gen_addhi3_carry;
22016 break;
22017 case SImode:
22018 insn = gen_addsi3_carry;
22019 break;
22020 case DImode:
22021 insn = gen_adddi3_carry;
22022 break;
22023 default:
22024 gcc_unreachable ();
22025 }
22026 }
22027 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22028
22029 return true;
22030 }
22031
22032
22033 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22034 but works for floating pointer parameters and nonoffsetable memories.
22035 For pushes, it returns just stack offsets; the values will be saved
22036 in the right order. Maximally three parts are generated. */
22037
22038 static int
22039 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22040 {
22041 int size;
22042
22043 if (!TARGET_64BIT)
22044 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22045 else
22046 size = (GET_MODE_SIZE (mode) + 4) / 8;
22047
22048 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22049 gcc_assert (size >= 2 && size <= 4);
22050
22051 /* Optimize constant pool reference to immediates. This is used by fp
22052 moves, that force all constants to memory to allow combining. */
22053 if (MEM_P (operand) && MEM_READONLY_P (operand))
22054 {
22055 rtx tmp = maybe_get_pool_constant (operand);
22056 if (tmp)
22057 operand = tmp;
22058 }
22059
22060 if (MEM_P (operand) && !offsettable_memref_p (operand))
22061 {
22062 /* The only non-offsetable memories we handle are pushes. */
22063 int ok = push_operand (operand, VOIDmode);
22064
22065 gcc_assert (ok);
22066
22067 operand = copy_rtx (operand);
22068 PUT_MODE (operand, word_mode);
22069 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22070 return size;
22071 }
22072
22073 if (GET_CODE (operand) == CONST_VECTOR)
22074 {
22075 enum machine_mode imode = int_mode_for_mode (mode);
22076 /* Caution: if we looked through a constant pool memory above,
22077 the operand may actually have a different mode now. That's
22078 ok, since we want to pun this all the way back to an integer. */
22079 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22080 gcc_assert (operand != NULL);
22081 mode = imode;
22082 }
22083
22084 if (!TARGET_64BIT)
22085 {
22086 if (mode == DImode)
22087 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22088 else
22089 {
22090 int i;
22091
22092 if (REG_P (operand))
22093 {
22094 gcc_assert (reload_completed);
22095 for (i = 0; i < size; i++)
22096 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22097 }
22098 else if (offsettable_memref_p (operand))
22099 {
22100 operand = adjust_address (operand, SImode, 0);
22101 parts[0] = operand;
22102 for (i = 1; i < size; i++)
22103 parts[i] = adjust_address (operand, SImode, 4 * i);
22104 }
22105 else if (GET_CODE (operand) == CONST_DOUBLE)
22106 {
22107 REAL_VALUE_TYPE r;
22108 long l[4];
22109
22110 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22111 switch (mode)
22112 {
22113 case TFmode:
22114 real_to_target (l, &r, mode);
22115 parts[3] = gen_int_mode (l[3], SImode);
22116 parts[2] = gen_int_mode (l[2], SImode);
22117 break;
22118 case XFmode:
22119 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22120 long double may not be 80-bit. */
22121 real_to_target (l, &r, mode);
22122 parts[2] = gen_int_mode (l[2], SImode);
22123 break;
22124 case DFmode:
22125 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22126 break;
22127 default:
22128 gcc_unreachable ();
22129 }
22130 parts[1] = gen_int_mode (l[1], SImode);
22131 parts[0] = gen_int_mode (l[0], SImode);
22132 }
22133 else
22134 gcc_unreachable ();
22135 }
22136 }
22137 else
22138 {
22139 if (mode == TImode)
22140 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22141 if (mode == XFmode || mode == TFmode)
22142 {
22143 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22144 if (REG_P (operand))
22145 {
22146 gcc_assert (reload_completed);
22147 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22148 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22149 }
22150 else if (offsettable_memref_p (operand))
22151 {
22152 operand = adjust_address (operand, DImode, 0);
22153 parts[0] = operand;
22154 parts[1] = adjust_address (operand, upper_mode, 8);
22155 }
22156 else if (GET_CODE (operand) == CONST_DOUBLE)
22157 {
22158 REAL_VALUE_TYPE r;
22159 long l[4];
22160
22161 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22162 real_to_target (l, &r, mode);
22163
22164 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22165 if (HOST_BITS_PER_WIDE_INT >= 64)
22166 parts[0]
22167 = gen_int_mode
22168 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22169 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22170 DImode);
22171 else
22172 parts[0] = immed_double_const (l[0], l[1], DImode);
22173
22174 if (upper_mode == SImode)
22175 parts[1] = gen_int_mode (l[2], SImode);
22176 else if (HOST_BITS_PER_WIDE_INT >= 64)
22177 parts[1]
22178 = gen_int_mode
22179 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22180 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22181 DImode);
22182 else
22183 parts[1] = immed_double_const (l[2], l[3], DImode);
22184 }
22185 else
22186 gcc_unreachable ();
22187 }
22188 }
22189
22190 return size;
22191 }
22192
22193 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22194 Return false when normal moves are needed; true when all required
22195 insns have been emitted. Operands 2-4 contain the input values
22196 int the correct order; operands 5-7 contain the output values. */
22197
22198 void
22199 ix86_split_long_move (rtx operands[])
22200 {
22201 rtx part[2][4];
22202 int nparts, i, j;
22203 int push = 0;
22204 int collisions = 0;
22205 enum machine_mode mode = GET_MODE (operands[0]);
22206 bool collisionparts[4];
22207
22208 /* The DFmode expanders may ask us to move double.
22209 For 64bit target this is single move. By hiding the fact
22210 here we simplify i386.md splitters. */
22211 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22212 {
22213 /* Optimize constant pool reference to immediates. This is used by
22214 fp moves, that force all constants to memory to allow combining. */
22215
22216 if (MEM_P (operands[1])
22217 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22218 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22219 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22220 if (push_operand (operands[0], VOIDmode))
22221 {
22222 operands[0] = copy_rtx (operands[0]);
22223 PUT_MODE (operands[0], word_mode);
22224 }
22225 else
22226 operands[0] = gen_lowpart (DImode, operands[0]);
22227 operands[1] = gen_lowpart (DImode, operands[1]);
22228 emit_move_insn (operands[0], operands[1]);
22229 return;
22230 }
22231
22232 /* The only non-offsettable memory we handle is push. */
22233 if (push_operand (operands[0], VOIDmode))
22234 push = 1;
22235 else
22236 gcc_assert (!MEM_P (operands[0])
22237 || offsettable_memref_p (operands[0]));
22238
22239 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22240 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22241
22242 /* When emitting push, take care for source operands on the stack. */
22243 if (push && MEM_P (operands[1])
22244 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22245 {
22246 rtx src_base = XEXP (part[1][nparts - 1], 0);
22247
22248 /* Compensate for the stack decrement by 4. */
22249 if (!TARGET_64BIT && nparts == 3
22250 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22251 src_base = plus_constant (Pmode, src_base, 4);
22252
22253 /* src_base refers to the stack pointer and is
22254 automatically decreased by emitted push. */
22255 for (i = 0; i < nparts; i++)
22256 part[1][i] = change_address (part[1][i],
22257 GET_MODE (part[1][i]), src_base);
22258 }
22259
22260 /* We need to do copy in the right order in case an address register
22261 of the source overlaps the destination. */
22262 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22263 {
22264 rtx tmp;
22265
22266 for (i = 0; i < nparts; i++)
22267 {
22268 collisionparts[i]
22269 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22270 if (collisionparts[i])
22271 collisions++;
22272 }
22273
22274 /* Collision in the middle part can be handled by reordering. */
22275 if (collisions == 1 && nparts == 3 && collisionparts [1])
22276 {
22277 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22278 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22279 }
22280 else if (collisions == 1
22281 && nparts == 4
22282 && (collisionparts [1] || collisionparts [2]))
22283 {
22284 if (collisionparts [1])
22285 {
22286 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22287 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22288 }
22289 else
22290 {
22291 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22292 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22293 }
22294 }
22295
22296 /* If there are more collisions, we can't handle it by reordering.
22297 Do an lea to the last part and use only one colliding move. */
22298 else if (collisions > 1)
22299 {
22300 rtx base;
22301
22302 collisions = 1;
22303
22304 base = part[0][nparts - 1];
22305
22306 /* Handle the case when the last part isn't valid for lea.
22307 Happens in 64-bit mode storing the 12-byte XFmode. */
22308 if (GET_MODE (base) != Pmode)
22309 base = gen_rtx_REG (Pmode, REGNO (base));
22310
22311 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22312 part[1][0] = replace_equiv_address (part[1][0], base);
22313 for (i = 1; i < nparts; i++)
22314 {
22315 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22316 part[1][i] = replace_equiv_address (part[1][i], tmp);
22317 }
22318 }
22319 }
22320
22321 if (push)
22322 {
22323 if (!TARGET_64BIT)
22324 {
22325 if (nparts == 3)
22326 {
22327 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22328 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22329 stack_pointer_rtx, GEN_INT (-4)));
22330 emit_move_insn (part[0][2], part[1][2]);
22331 }
22332 else if (nparts == 4)
22333 {
22334 emit_move_insn (part[0][3], part[1][3]);
22335 emit_move_insn (part[0][2], part[1][2]);
22336 }
22337 }
22338 else
22339 {
22340 /* In 64bit mode we don't have 32bit push available. In case this is
22341 register, it is OK - we will just use larger counterpart. We also
22342 retype memory - these comes from attempt to avoid REX prefix on
22343 moving of second half of TFmode value. */
22344 if (GET_MODE (part[1][1]) == SImode)
22345 {
22346 switch (GET_CODE (part[1][1]))
22347 {
22348 case MEM:
22349 part[1][1] = adjust_address (part[1][1], DImode, 0);
22350 break;
22351
22352 case REG:
22353 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22354 break;
22355
22356 default:
22357 gcc_unreachable ();
22358 }
22359
22360 if (GET_MODE (part[1][0]) == SImode)
22361 part[1][0] = part[1][1];
22362 }
22363 }
22364 emit_move_insn (part[0][1], part[1][1]);
22365 emit_move_insn (part[0][0], part[1][0]);
22366 return;
22367 }
22368
22369 /* Choose correct order to not overwrite the source before it is copied. */
22370 if ((REG_P (part[0][0])
22371 && REG_P (part[1][1])
22372 && (REGNO (part[0][0]) == REGNO (part[1][1])
22373 || (nparts == 3
22374 && REGNO (part[0][0]) == REGNO (part[1][2]))
22375 || (nparts == 4
22376 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22377 || (collisions > 0
22378 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22379 {
22380 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22381 {
22382 operands[2 + i] = part[0][j];
22383 operands[6 + i] = part[1][j];
22384 }
22385 }
22386 else
22387 {
22388 for (i = 0; i < nparts; i++)
22389 {
22390 operands[2 + i] = part[0][i];
22391 operands[6 + i] = part[1][i];
22392 }
22393 }
22394
22395 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22396 if (optimize_insn_for_size_p ())
22397 {
22398 for (j = 0; j < nparts - 1; j++)
22399 if (CONST_INT_P (operands[6 + j])
22400 && operands[6 + j] != const0_rtx
22401 && REG_P (operands[2 + j]))
22402 for (i = j; i < nparts - 1; i++)
22403 if (CONST_INT_P (operands[7 + i])
22404 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22405 operands[7 + i] = operands[2 + j];
22406 }
22407
22408 for (i = 0; i < nparts; i++)
22409 emit_move_insn (operands[2 + i], operands[6 + i]);
22410
22411 return;
22412 }
22413
22414 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22415 left shift by a constant, either using a single shift or
22416 a sequence of add instructions. */
22417
22418 static void
22419 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22420 {
22421 rtx (*insn)(rtx, rtx, rtx);
22422
22423 if (count == 1
22424 || (count * ix86_cost->add <= ix86_cost->shift_const
22425 && !optimize_insn_for_size_p ()))
22426 {
22427 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22428 while (count-- > 0)
22429 emit_insn (insn (operand, operand, operand));
22430 }
22431 else
22432 {
22433 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22434 emit_insn (insn (operand, operand, GEN_INT (count)));
22435 }
22436 }
22437
22438 void
22439 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22440 {
22441 rtx (*gen_ashl3)(rtx, rtx, rtx);
22442 rtx (*gen_shld)(rtx, rtx, rtx);
22443 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22444
22445 rtx low[2], high[2];
22446 int count;
22447
22448 if (CONST_INT_P (operands[2]))
22449 {
22450 split_double_mode (mode, operands, 2, low, high);
22451 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22452
22453 if (count >= half_width)
22454 {
22455 emit_move_insn (high[0], low[1]);
22456 emit_move_insn (low[0], const0_rtx);
22457
22458 if (count > half_width)
22459 ix86_expand_ashl_const (high[0], count - half_width, mode);
22460 }
22461 else
22462 {
22463 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22464
22465 if (!rtx_equal_p (operands[0], operands[1]))
22466 emit_move_insn (operands[0], operands[1]);
22467
22468 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22469 ix86_expand_ashl_const (low[0], count, mode);
22470 }
22471 return;
22472 }
22473
22474 split_double_mode (mode, operands, 1, low, high);
22475
22476 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22477
22478 if (operands[1] == const1_rtx)
22479 {
22480 /* Assuming we've chosen a QImode capable registers, then 1 << N
22481 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22482 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22483 {
22484 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22485
22486 ix86_expand_clear (low[0]);
22487 ix86_expand_clear (high[0]);
22488 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22489
22490 d = gen_lowpart (QImode, low[0]);
22491 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22492 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22493 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22494
22495 d = gen_lowpart (QImode, high[0]);
22496 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22497 s = gen_rtx_NE (QImode, flags, const0_rtx);
22498 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22499 }
22500
22501 /* Otherwise, we can get the same results by manually performing
22502 a bit extract operation on bit 5/6, and then performing the two
22503 shifts. The two methods of getting 0/1 into low/high are exactly
22504 the same size. Avoiding the shift in the bit extract case helps
22505 pentium4 a bit; no one else seems to care much either way. */
22506 else
22507 {
22508 enum machine_mode half_mode;
22509 rtx (*gen_lshr3)(rtx, rtx, rtx);
22510 rtx (*gen_and3)(rtx, rtx, rtx);
22511 rtx (*gen_xor3)(rtx, rtx, rtx);
22512 HOST_WIDE_INT bits;
22513 rtx x;
22514
22515 if (mode == DImode)
22516 {
22517 half_mode = SImode;
22518 gen_lshr3 = gen_lshrsi3;
22519 gen_and3 = gen_andsi3;
22520 gen_xor3 = gen_xorsi3;
22521 bits = 5;
22522 }
22523 else
22524 {
22525 half_mode = DImode;
22526 gen_lshr3 = gen_lshrdi3;
22527 gen_and3 = gen_anddi3;
22528 gen_xor3 = gen_xordi3;
22529 bits = 6;
22530 }
22531
22532 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22533 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22534 else
22535 x = gen_lowpart (half_mode, operands[2]);
22536 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22537
22538 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22539 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22540 emit_move_insn (low[0], high[0]);
22541 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22542 }
22543
22544 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22545 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22546 return;
22547 }
22548
22549 if (operands[1] == constm1_rtx)
22550 {
22551 /* For -1 << N, we can avoid the shld instruction, because we
22552 know that we're shifting 0...31/63 ones into a -1. */
22553 emit_move_insn (low[0], constm1_rtx);
22554 if (optimize_insn_for_size_p ())
22555 emit_move_insn (high[0], low[0]);
22556 else
22557 emit_move_insn (high[0], constm1_rtx);
22558 }
22559 else
22560 {
22561 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22562
22563 if (!rtx_equal_p (operands[0], operands[1]))
22564 emit_move_insn (operands[0], operands[1]);
22565
22566 split_double_mode (mode, operands, 1, low, high);
22567 emit_insn (gen_shld (high[0], low[0], operands[2]));
22568 }
22569
22570 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22571
22572 if (TARGET_CMOVE && scratch)
22573 {
22574 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22575 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22576
22577 ix86_expand_clear (scratch);
22578 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22579 }
22580 else
22581 {
22582 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22583 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22584
22585 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22586 }
22587 }
22588
22589 void
22590 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22591 {
22592 rtx (*gen_ashr3)(rtx, rtx, rtx)
22593 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22594 rtx (*gen_shrd)(rtx, rtx, rtx);
22595 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22596
22597 rtx low[2], high[2];
22598 int count;
22599
22600 if (CONST_INT_P (operands[2]))
22601 {
22602 split_double_mode (mode, operands, 2, low, high);
22603 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22604
22605 if (count == GET_MODE_BITSIZE (mode) - 1)
22606 {
22607 emit_move_insn (high[0], high[1]);
22608 emit_insn (gen_ashr3 (high[0], high[0],
22609 GEN_INT (half_width - 1)));
22610 emit_move_insn (low[0], high[0]);
22611
22612 }
22613 else if (count >= half_width)
22614 {
22615 emit_move_insn (low[0], high[1]);
22616 emit_move_insn (high[0], low[0]);
22617 emit_insn (gen_ashr3 (high[0], high[0],
22618 GEN_INT (half_width - 1)));
22619
22620 if (count > half_width)
22621 emit_insn (gen_ashr3 (low[0], low[0],
22622 GEN_INT (count - half_width)));
22623 }
22624 else
22625 {
22626 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22627
22628 if (!rtx_equal_p (operands[0], operands[1]))
22629 emit_move_insn (operands[0], operands[1]);
22630
22631 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22632 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22633 }
22634 }
22635 else
22636 {
22637 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22638
22639 if (!rtx_equal_p (operands[0], operands[1]))
22640 emit_move_insn (operands[0], operands[1]);
22641
22642 split_double_mode (mode, operands, 1, low, high);
22643
22644 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22645 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22646
22647 if (TARGET_CMOVE && scratch)
22648 {
22649 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22650 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22651
22652 emit_move_insn (scratch, high[0]);
22653 emit_insn (gen_ashr3 (scratch, scratch,
22654 GEN_INT (half_width - 1)));
22655 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22656 scratch));
22657 }
22658 else
22659 {
22660 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22661 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22662
22663 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22664 }
22665 }
22666 }
22667
22668 void
22669 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22670 {
22671 rtx (*gen_lshr3)(rtx, rtx, rtx)
22672 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22673 rtx (*gen_shrd)(rtx, rtx, rtx);
22674 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22675
22676 rtx low[2], high[2];
22677 int count;
22678
22679 if (CONST_INT_P (operands[2]))
22680 {
22681 split_double_mode (mode, operands, 2, low, high);
22682 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22683
22684 if (count >= half_width)
22685 {
22686 emit_move_insn (low[0], high[1]);
22687 ix86_expand_clear (high[0]);
22688
22689 if (count > half_width)
22690 emit_insn (gen_lshr3 (low[0], low[0],
22691 GEN_INT (count - half_width)));
22692 }
22693 else
22694 {
22695 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22696
22697 if (!rtx_equal_p (operands[0], operands[1]))
22698 emit_move_insn (operands[0], operands[1]);
22699
22700 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22701 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22702 }
22703 }
22704 else
22705 {
22706 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22707
22708 if (!rtx_equal_p (operands[0], operands[1]))
22709 emit_move_insn (operands[0], operands[1]);
22710
22711 split_double_mode (mode, operands, 1, low, high);
22712
22713 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22714 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22715
22716 if (TARGET_CMOVE && scratch)
22717 {
22718 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22719 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22720
22721 ix86_expand_clear (scratch);
22722 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22723 scratch));
22724 }
22725 else
22726 {
22727 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22728 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22729
22730 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22731 }
22732 }
22733 }
22734
22735 /* Predict just emitted jump instruction to be taken with probability PROB. */
22736 static void
22737 predict_jump (int prob)
22738 {
22739 rtx insn = get_last_insn ();
22740 gcc_assert (JUMP_P (insn));
22741 add_int_reg_note (insn, REG_BR_PROB, prob);
22742 }
22743
22744 /* Helper function for the string operations below. Dest VARIABLE whether
22745 it is aligned to VALUE bytes. If true, jump to the label. */
22746 static rtx_code_label *
22747 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22748 {
22749 rtx_code_label *label = gen_label_rtx ();
22750 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22751 if (GET_MODE (variable) == DImode)
22752 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22753 else
22754 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22755 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22756 1, label);
22757 if (epilogue)
22758 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22759 else
22760 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22761 return label;
22762 }
22763
22764 /* Adjust COUNTER by the VALUE. */
22765 static void
22766 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22767 {
22768 rtx (*gen_add)(rtx, rtx, rtx)
22769 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22770
22771 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22772 }
22773
22774 /* Zero extend possibly SImode EXP to Pmode register. */
22775 rtx
22776 ix86_zero_extend_to_Pmode (rtx exp)
22777 {
22778 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22779 }
22780
22781 /* Divide COUNTREG by SCALE. */
22782 static rtx
22783 scale_counter (rtx countreg, int scale)
22784 {
22785 rtx sc;
22786
22787 if (scale == 1)
22788 return countreg;
22789 if (CONST_INT_P (countreg))
22790 return GEN_INT (INTVAL (countreg) / scale);
22791 gcc_assert (REG_P (countreg));
22792
22793 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22794 GEN_INT (exact_log2 (scale)),
22795 NULL, 1, OPTAB_DIRECT);
22796 return sc;
22797 }
22798
22799 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22800 DImode for constant loop counts. */
22801
22802 static enum machine_mode
22803 counter_mode (rtx count_exp)
22804 {
22805 if (GET_MODE (count_exp) != VOIDmode)
22806 return GET_MODE (count_exp);
22807 if (!CONST_INT_P (count_exp))
22808 return Pmode;
22809 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22810 return DImode;
22811 return SImode;
22812 }
22813
22814 /* Copy the address to a Pmode register. This is used for x32 to
22815 truncate DImode TLS address to a SImode register. */
22816
22817 static rtx
22818 ix86_copy_addr_to_reg (rtx addr)
22819 {
22820 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22821 return copy_addr_to_reg (addr);
22822 else
22823 {
22824 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22825 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22826 }
22827 }
22828
22829 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22830 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22831 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22832 memory by VALUE (supposed to be in MODE).
22833
22834 The size is rounded down to whole number of chunk size moved at once.
22835 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22836
22837
22838 static void
22839 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22840 rtx destptr, rtx srcptr, rtx value,
22841 rtx count, enum machine_mode mode, int unroll,
22842 int expected_size, bool issetmem)
22843 {
22844 rtx_code_label *out_label, *top_label;
22845 rtx iter, tmp;
22846 enum machine_mode iter_mode = counter_mode (count);
22847 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22848 rtx piece_size = GEN_INT (piece_size_n);
22849 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22850 rtx size;
22851 int i;
22852
22853 top_label = gen_label_rtx ();
22854 out_label = gen_label_rtx ();
22855 iter = gen_reg_rtx (iter_mode);
22856
22857 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22858 NULL, 1, OPTAB_DIRECT);
22859 /* Those two should combine. */
22860 if (piece_size == const1_rtx)
22861 {
22862 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22863 true, out_label);
22864 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22865 }
22866 emit_move_insn (iter, const0_rtx);
22867
22868 emit_label (top_label);
22869
22870 tmp = convert_modes (Pmode, iter_mode, iter, true);
22871
22872 /* This assert could be relaxed - in this case we'll need to compute
22873 smallest power of two, containing in PIECE_SIZE_N and pass it to
22874 offset_address. */
22875 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22876 destmem = offset_address (destmem, tmp, piece_size_n);
22877 destmem = adjust_address (destmem, mode, 0);
22878
22879 if (!issetmem)
22880 {
22881 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22882 srcmem = adjust_address (srcmem, mode, 0);
22883
22884 /* When unrolling for chips that reorder memory reads and writes,
22885 we can save registers by using single temporary.
22886 Also using 4 temporaries is overkill in 32bit mode. */
22887 if (!TARGET_64BIT && 0)
22888 {
22889 for (i = 0; i < unroll; i++)
22890 {
22891 if (i)
22892 {
22893 destmem =
22894 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22895 srcmem =
22896 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22897 }
22898 emit_move_insn (destmem, srcmem);
22899 }
22900 }
22901 else
22902 {
22903 rtx tmpreg[4];
22904 gcc_assert (unroll <= 4);
22905 for (i = 0; i < unroll; i++)
22906 {
22907 tmpreg[i] = gen_reg_rtx (mode);
22908 if (i)
22909 {
22910 srcmem =
22911 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22912 }
22913 emit_move_insn (tmpreg[i], srcmem);
22914 }
22915 for (i = 0; i < unroll; i++)
22916 {
22917 if (i)
22918 {
22919 destmem =
22920 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22921 }
22922 emit_move_insn (destmem, tmpreg[i]);
22923 }
22924 }
22925 }
22926 else
22927 for (i = 0; i < unroll; i++)
22928 {
22929 if (i)
22930 destmem =
22931 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22932 emit_move_insn (destmem, value);
22933 }
22934
22935 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22936 true, OPTAB_LIB_WIDEN);
22937 if (tmp != iter)
22938 emit_move_insn (iter, tmp);
22939
22940 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22941 true, top_label);
22942 if (expected_size != -1)
22943 {
22944 expected_size /= GET_MODE_SIZE (mode) * unroll;
22945 if (expected_size == 0)
22946 predict_jump (0);
22947 else if (expected_size > REG_BR_PROB_BASE)
22948 predict_jump (REG_BR_PROB_BASE - 1);
22949 else
22950 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22951 }
22952 else
22953 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22954 iter = ix86_zero_extend_to_Pmode (iter);
22955 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22956 true, OPTAB_LIB_WIDEN);
22957 if (tmp != destptr)
22958 emit_move_insn (destptr, tmp);
22959 if (!issetmem)
22960 {
22961 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22962 true, OPTAB_LIB_WIDEN);
22963 if (tmp != srcptr)
22964 emit_move_insn (srcptr, tmp);
22965 }
22966 emit_label (out_label);
22967 }
22968
22969 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22970 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22971 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22972 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22973 ORIG_VALUE is the original value passed to memset to fill the memory with.
22974 Other arguments have same meaning as for previous function. */
22975
22976 static void
22977 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22978 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22979 rtx count,
22980 enum machine_mode mode, bool issetmem)
22981 {
22982 rtx destexp;
22983 rtx srcexp;
22984 rtx countreg;
22985 HOST_WIDE_INT rounded_count;
22986
22987 /* If possible, it is shorter to use rep movs.
22988 TODO: Maybe it is better to move this logic to decide_alg. */
22989 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22990 && (!issetmem || orig_value == const0_rtx))
22991 mode = SImode;
22992
22993 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22994 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22995
22996 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22997 GET_MODE_SIZE (mode)));
22998 if (mode != QImode)
22999 {
23000 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23001 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23002 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23003 }
23004 else
23005 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23006 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23007 {
23008 rounded_count = (INTVAL (count)
23009 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23010 destmem = shallow_copy_rtx (destmem);
23011 set_mem_size (destmem, rounded_count);
23012 }
23013 else if (MEM_SIZE_KNOWN_P (destmem))
23014 clear_mem_size (destmem);
23015
23016 if (issetmem)
23017 {
23018 value = force_reg (mode, gen_lowpart (mode, value));
23019 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23020 }
23021 else
23022 {
23023 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23024 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23025 if (mode != QImode)
23026 {
23027 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23028 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23029 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23030 }
23031 else
23032 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23033 if (CONST_INT_P (count))
23034 {
23035 rounded_count = (INTVAL (count)
23036 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23037 srcmem = shallow_copy_rtx (srcmem);
23038 set_mem_size (srcmem, rounded_count);
23039 }
23040 else
23041 {
23042 if (MEM_SIZE_KNOWN_P (srcmem))
23043 clear_mem_size (srcmem);
23044 }
23045 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23046 destexp, srcexp));
23047 }
23048 }
23049
23050 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23051 DESTMEM.
23052 SRC is passed by pointer to be updated on return.
23053 Return value is updated DST. */
23054 static rtx
23055 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23056 HOST_WIDE_INT size_to_move)
23057 {
23058 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23059 enum insn_code code;
23060 enum machine_mode move_mode;
23061 int piece_size, i;
23062
23063 /* Find the widest mode in which we could perform moves.
23064 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23065 it until move of such size is supported. */
23066 piece_size = 1 << floor_log2 (size_to_move);
23067 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23068 code = optab_handler (mov_optab, move_mode);
23069 while (code == CODE_FOR_nothing && piece_size > 1)
23070 {
23071 piece_size >>= 1;
23072 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23073 code = optab_handler (mov_optab, move_mode);
23074 }
23075
23076 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23077 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23078 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23079 {
23080 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23081 move_mode = mode_for_vector (word_mode, nunits);
23082 code = optab_handler (mov_optab, move_mode);
23083 if (code == CODE_FOR_nothing)
23084 {
23085 move_mode = word_mode;
23086 piece_size = GET_MODE_SIZE (move_mode);
23087 code = optab_handler (mov_optab, move_mode);
23088 }
23089 }
23090 gcc_assert (code != CODE_FOR_nothing);
23091
23092 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23093 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23094
23095 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23096 gcc_assert (size_to_move % piece_size == 0);
23097 adjust = GEN_INT (piece_size);
23098 for (i = 0; i < size_to_move; i += piece_size)
23099 {
23100 /* We move from memory to memory, so we'll need to do it via
23101 a temporary register. */
23102 tempreg = gen_reg_rtx (move_mode);
23103 emit_insn (GEN_FCN (code) (tempreg, src));
23104 emit_insn (GEN_FCN (code) (dst, tempreg));
23105
23106 emit_move_insn (destptr,
23107 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23108 emit_move_insn (srcptr,
23109 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23110
23111 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23112 piece_size);
23113 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23114 piece_size);
23115 }
23116
23117 /* Update DST and SRC rtx. */
23118 *srcmem = src;
23119 return dst;
23120 }
23121
23122 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23123 static void
23124 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23125 rtx destptr, rtx srcptr, rtx count, int max_size)
23126 {
23127 rtx src, dest;
23128 if (CONST_INT_P (count))
23129 {
23130 HOST_WIDE_INT countval = INTVAL (count);
23131 HOST_WIDE_INT epilogue_size = countval % max_size;
23132 int i;
23133
23134 /* For now MAX_SIZE should be a power of 2. This assert could be
23135 relaxed, but it'll require a bit more complicated epilogue
23136 expanding. */
23137 gcc_assert ((max_size & (max_size - 1)) == 0);
23138 for (i = max_size; i >= 1; i >>= 1)
23139 {
23140 if (epilogue_size & i)
23141 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23142 }
23143 return;
23144 }
23145 if (max_size > 8)
23146 {
23147 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23148 count, 1, OPTAB_DIRECT);
23149 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23150 count, QImode, 1, 4, false);
23151 return;
23152 }
23153
23154 /* When there are stringops, we can cheaply increase dest and src pointers.
23155 Otherwise we save code size by maintaining offset (zero is readily
23156 available from preceding rep operation) and using x86 addressing modes.
23157 */
23158 if (TARGET_SINGLE_STRINGOP)
23159 {
23160 if (max_size > 4)
23161 {
23162 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23163 src = change_address (srcmem, SImode, srcptr);
23164 dest = change_address (destmem, SImode, destptr);
23165 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23166 emit_label (label);
23167 LABEL_NUSES (label) = 1;
23168 }
23169 if (max_size > 2)
23170 {
23171 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23172 src = change_address (srcmem, HImode, srcptr);
23173 dest = change_address (destmem, HImode, destptr);
23174 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23175 emit_label (label);
23176 LABEL_NUSES (label) = 1;
23177 }
23178 if (max_size > 1)
23179 {
23180 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23181 src = change_address (srcmem, QImode, srcptr);
23182 dest = change_address (destmem, QImode, destptr);
23183 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23184 emit_label (label);
23185 LABEL_NUSES (label) = 1;
23186 }
23187 }
23188 else
23189 {
23190 rtx offset = force_reg (Pmode, const0_rtx);
23191 rtx tmp;
23192
23193 if (max_size > 4)
23194 {
23195 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23196 src = change_address (srcmem, SImode, srcptr);
23197 dest = change_address (destmem, SImode, destptr);
23198 emit_move_insn (dest, src);
23199 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23200 true, OPTAB_LIB_WIDEN);
23201 if (tmp != offset)
23202 emit_move_insn (offset, tmp);
23203 emit_label (label);
23204 LABEL_NUSES (label) = 1;
23205 }
23206 if (max_size > 2)
23207 {
23208 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23209 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23210 src = change_address (srcmem, HImode, tmp);
23211 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23212 dest = change_address (destmem, HImode, tmp);
23213 emit_move_insn (dest, src);
23214 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23215 true, OPTAB_LIB_WIDEN);
23216 if (tmp != offset)
23217 emit_move_insn (offset, tmp);
23218 emit_label (label);
23219 LABEL_NUSES (label) = 1;
23220 }
23221 if (max_size > 1)
23222 {
23223 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23224 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23225 src = change_address (srcmem, QImode, tmp);
23226 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23227 dest = change_address (destmem, QImode, tmp);
23228 emit_move_insn (dest, src);
23229 emit_label (label);
23230 LABEL_NUSES (label) = 1;
23231 }
23232 }
23233 }
23234
23235 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23236 with value PROMOTED_VAL.
23237 SRC is passed by pointer to be updated on return.
23238 Return value is updated DST. */
23239 static rtx
23240 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23241 HOST_WIDE_INT size_to_move)
23242 {
23243 rtx dst = destmem, adjust;
23244 enum insn_code code;
23245 enum machine_mode move_mode;
23246 int piece_size, i;
23247
23248 /* Find the widest mode in which we could perform moves.
23249 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23250 it until move of such size is supported. */
23251 move_mode = GET_MODE (promoted_val);
23252 if (move_mode == VOIDmode)
23253 move_mode = QImode;
23254 if (size_to_move < GET_MODE_SIZE (move_mode))
23255 {
23256 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23257 promoted_val = gen_lowpart (move_mode, promoted_val);
23258 }
23259 piece_size = GET_MODE_SIZE (move_mode);
23260 code = optab_handler (mov_optab, move_mode);
23261 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23262
23263 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23264
23265 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23266 gcc_assert (size_to_move % piece_size == 0);
23267 adjust = GEN_INT (piece_size);
23268 for (i = 0; i < size_to_move; i += piece_size)
23269 {
23270 if (piece_size <= GET_MODE_SIZE (word_mode))
23271 {
23272 emit_insn (gen_strset (destptr, dst, promoted_val));
23273 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23274 piece_size);
23275 continue;
23276 }
23277
23278 emit_insn (GEN_FCN (code) (dst, promoted_val));
23279
23280 emit_move_insn (destptr,
23281 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23282
23283 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23284 piece_size);
23285 }
23286
23287 /* Update DST rtx. */
23288 return dst;
23289 }
23290 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23291 static void
23292 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23293 rtx count, int max_size)
23294 {
23295 count =
23296 expand_simple_binop (counter_mode (count), AND, count,
23297 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23298 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23299 gen_lowpart (QImode, value), count, QImode,
23300 1, max_size / 2, true);
23301 }
23302
23303 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23304 static void
23305 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23306 rtx count, int max_size)
23307 {
23308 rtx dest;
23309
23310 if (CONST_INT_P (count))
23311 {
23312 HOST_WIDE_INT countval = INTVAL (count);
23313 HOST_WIDE_INT epilogue_size = countval % max_size;
23314 int i;
23315
23316 /* For now MAX_SIZE should be a power of 2. This assert could be
23317 relaxed, but it'll require a bit more complicated epilogue
23318 expanding. */
23319 gcc_assert ((max_size & (max_size - 1)) == 0);
23320 for (i = max_size; i >= 1; i >>= 1)
23321 {
23322 if (epilogue_size & i)
23323 {
23324 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23325 destmem = emit_memset (destmem, destptr, vec_value, i);
23326 else
23327 destmem = emit_memset (destmem, destptr, value, i);
23328 }
23329 }
23330 return;
23331 }
23332 if (max_size > 32)
23333 {
23334 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23335 return;
23336 }
23337 if (max_size > 16)
23338 {
23339 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23340 if (TARGET_64BIT)
23341 {
23342 dest = change_address (destmem, DImode, destptr);
23343 emit_insn (gen_strset (destptr, dest, value));
23344 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23345 emit_insn (gen_strset (destptr, dest, value));
23346 }
23347 else
23348 {
23349 dest = change_address (destmem, SImode, destptr);
23350 emit_insn (gen_strset (destptr, dest, value));
23351 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23352 emit_insn (gen_strset (destptr, dest, value));
23353 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23354 emit_insn (gen_strset (destptr, dest, value));
23355 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23356 emit_insn (gen_strset (destptr, dest, value));
23357 }
23358 emit_label (label);
23359 LABEL_NUSES (label) = 1;
23360 }
23361 if (max_size > 8)
23362 {
23363 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23364 if (TARGET_64BIT)
23365 {
23366 dest = change_address (destmem, DImode, destptr);
23367 emit_insn (gen_strset (destptr, dest, value));
23368 }
23369 else
23370 {
23371 dest = change_address (destmem, SImode, destptr);
23372 emit_insn (gen_strset (destptr, dest, value));
23373 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23374 emit_insn (gen_strset (destptr, dest, value));
23375 }
23376 emit_label (label);
23377 LABEL_NUSES (label) = 1;
23378 }
23379 if (max_size > 4)
23380 {
23381 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23382 dest = change_address (destmem, SImode, destptr);
23383 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23384 emit_label (label);
23385 LABEL_NUSES (label) = 1;
23386 }
23387 if (max_size > 2)
23388 {
23389 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23390 dest = change_address (destmem, HImode, destptr);
23391 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23392 emit_label (label);
23393 LABEL_NUSES (label) = 1;
23394 }
23395 if (max_size > 1)
23396 {
23397 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23398 dest = change_address (destmem, QImode, destptr);
23399 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23400 emit_label (label);
23401 LABEL_NUSES (label) = 1;
23402 }
23403 }
23404
23405 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23406 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23407 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23408 ignored.
23409 Return value is updated DESTMEM. */
23410 static rtx
23411 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23412 rtx destptr, rtx srcptr, rtx value,
23413 rtx vec_value, rtx count, int align,
23414 int desired_alignment, bool issetmem)
23415 {
23416 int i;
23417 for (i = 1; i < desired_alignment; i <<= 1)
23418 {
23419 if (align <= i)
23420 {
23421 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23422 if (issetmem)
23423 {
23424 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23425 destmem = emit_memset (destmem, destptr, vec_value, i);
23426 else
23427 destmem = emit_memset (destmem, destptr, value, i);
23428 }
23429 else
23430 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23431 ix86_adjust_counter (count, i);
23432 emit_label (label);
23433 LABEL_NUSES (label) = 1;
23434 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23435 }
23436 }
23437 return destmem;
23438 }
23439
23440 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23441 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23442 and jump to DONE_LABEL. */
23443 static void
23444 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23445 rtx destptr, rtx srcptr,
23446 rtx value, rtx vec_value,
23447 rtx count, int size,
23448 rtx done_label, bool issetmem)
23449 {
23450 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23451 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23452 rtx modesize;
23453 int n;
23454
23455 /* If we do not have vector value to copy, we must reduce size. */
23456 if (issetmem)
23457 {
23458 if (!vec_value)
23459 {
23460 if (GET_MODE (value) == VOIDmode && size > 8)
23461 mode = Pmode;
23462 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23463 mode = GET_MODE (value);
23464 }
23465 else
23466 mode = GET_MODE (vec_value), value = vec_value;
23467 }
23468 else
23469 {
23470 /* Choose appropriate vector mode. */
23471 if (size >= 32)
23472 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23473 else if (size >= 16)
23474 mode = TARGET_SSE ? V16QImode : DImode;
23475 srcmem = change_address (srcmem, mode, srcptr);
23476 }
23477 destmem = change_address (destmem, mode, destptr);
23478 modesize = GEN_INT (GET_MODE_SIZE (mode));
23479 gcc_assert (GET_MODE_SIZE (mode) <= size);
23480 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23481 {
23482 if (issetmem)
23483 emit_move_insn (destmem, gen_lowpart (mode, value));
23484 else
23485 {
23486 emit_move_insn (destmem, srcmem);
23487 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23488 }
23489 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23490 }
23491
23492 destmem = offset_address (destmem, count, 1);
23493 destmem = offset_address (destmem, GEN_INT (-2 * size),
23494 GET_MODE_SIZE (mode));
23495 if (!issetmem)
23496 {
23497 srcmem = offset_address (srcmem, count, 1);
23498 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23499 GET_MODE_SIZE (mode));
23500 }
23501 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23502 {
23503 if (issetmem)
23504 emit_move_insn (destmem, gen_lowpart (mode, value));
23505 else
23506 {
23507 emit_move_insn (destmem, srcmem);
23508 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23509 }
23510 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23511 }
23512 emit_jump_insn (gen_jump (done_label));
23513 emit_barrier ();
23514
23515 emit_label (label);
23516 LABEL_NUSES (label) = 1;
23517 }
23518
23519 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23520 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23521 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23522 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23523 DONE_LABEL is a label after the whole copying sequence. The label is created
23524 on demand if *DONE_LABEL is NULL.
23525 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23526 bounds after the initial copies.
23527
23528 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23529 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23530 we will dispatch to a library call for large blocks.
23531
23532 In pseudocode we do:
23533
23534 if (COUNT < SIZE)
23535 {
23536 Assume that SIZE is 4. Bigger sizes are handled analogously
23537 if (COUNT & 4)
23538 {
23539 copy 4 bytes from SRCPTR to DESTPTR
23540 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23541 goto done_label
23542 }
23543 if (!COUNT)
23544 goto done_label;
23545 copy 1 byte from SRCPTR to DESTPTR
23546 if (COUNT & 2)
23547 {
23548 copy 2 bytes from SRCPTR to DESTPTR
23549 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23550 }
23551 }
23552 else
23553 {
23554 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23555 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23556
23557 OLD_DESPTR = DESTPTR;
23558 Align DESTPTR up to DESIRED_ALIGN
23559 SRCPTR += DESTPTR - OLD_DESTPTR
23560 COUNT -= DEST_PTR - OLD_DESTPTR
23561 if (DYNAMIC_CHECK)
23562 Round COUNT down to multiple of SIZE
23563 << optional caller supplied zero size guard is here >>
23564 << optional caller suppplied dynamic check is here >>
23565 << caller supplied main copy loop is here >>
23566 }
23567 done_label:
23568 */
23569 static void
23570 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23571 rtx *destptr, rtx *srcptr,
23572 enum machine_mode mode,
23573 rtx value, rtx vec_value,
23574 rtx *count,
23575 rtx_code_label **done_label,
23576 int size,
23577 int desired_align,
23578 int align,
23579 unsigned HOST_WIDE_INT *min_size,
23580 bool dynamic_check,
23581 bool issetmem)
23582 {
23583 rtx_code_label *loop_label = NULL, *label;
23584 int n;
23585 rtx modesize;
23586 int prolog_size = 0;
23587 rtx mode_value;
23588
23589 /* Chose proper value to copy. */
23590 if (issetmem && VECTOR_MODE_P (mode))
23591 mode_value = vec_value;
23592 else
23593 mode_value = value;
23594 gcc_assert (GET_MODE_SIZE (mode) <= size);
23595
23596 /* See if block is big or small, handle small blocks. */
23597 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23598 {
23599 int size2 = size;
23600 loop_label = gen_label_rtx ();
23601
23602 if (!*done_label)
23603 *done_label = gen_label_rtx ();
23604
23605 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23606 1, loop_label);
23607 size2 >>= 1;
23608
23609 /* Handle sizes > 3. */
23610 for (;size2 > 2; size2 >>= 1)
23611 expand_small_movmem_or_setmem (destmem, srcmem,
23612 *destptr, *srcptr,
23613 value, vec_value,
23614 *count,
23615 size2, *done_label, issetmem);
23616 /* Nothing to copy? Jump to DONE_LABEL if so */
23617 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23618 1, *done_label);
23619
23620 /* Do a byte copy. */
23621 destmem = change_address (destmem, QImode, *destptr);
23622 if (issetmem)
23623 emit_move_insn (destmem, gen_lowpart (QImode, value));
23624 else
23625 {
23626 srcmem = change_address (srcmem, QImode, *srcptr);
23627 emit_move_insn (destmem, srcmem);
23628 }
23629
23630 /* Handle sizes 2 and 3. */
23631 label = ix86_expand_aligntest (*count, 2, false);
23632 destmem = change_address (destmem, HImode, *destptr);
23633 destmem = offset_address (destmem, *count, 1);
23634 destmem = offset_address (destmem, GEN_INT (-2), 2);
23635 if (issetmem)
23636 emit_move_insn (destmem, gen_lowpart (HImode, value));
23637 else
23638 {
23639 srcmem = change_address (srcmem, HImode, *srcptr);
23640 srcmem = offset_address (srcmem, *count, 1);
23641 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23642 emit_move_insn (destmem, srcmem);
23643 }
23644
23645 emit_label (label);
23646 LABEL_NUSES (label) = 1;
23647 emit_jump_insn (gen_jump (*done_label));
23648 emit_barrier ();
23649 }
23650 else
23651 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23652 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23653
23654 /* Start memcpy for COUNT >= SIZE. */
23655 if (loop_label)
23656 {
23657 emit_label (loop_label);
23658 LABEL_NUSES (loop_label) = 1;
23659 }
23660
23661 /* Copy first desired_align bytes. */
23662 if (!issetmem)
23663 srcmem = change_address (srcmem, mode, *srcptr);
23664 destmem = change_address (destmem, mode, *destptr);
23665 modesize = GEN_INT (GET_MODE_SIZE (mode));
23666 for (n = 0; prolog_size < desired_align - align; n++)
23667 {
23668 if (issetmem)
23669 emit_move_insn (destmem, mode_value);
23670 else
23671 {
23672 emit_move_insn (destmem, srcmem);
23673 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23674 }
23675 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23676 prolog_size += GET_MODE_SIZE (mode);
23677 }
23678
23679
23680 /* Copy last SIZE bytes. */
23681 destmem = offset_address (destmem, *count, 1);
23682 destmem = offset_address (destmem,
23683 GEN_INT (-size - prolog_size),
23684 1);
23685 if (issetmem)
23686 emit_move_insn (destmem, mode_value);
23687 else
23688 {
23689 srcmem = offset_address (srcmem, *count, 1);
23690 srcmem = offset_address (srcmem,
23691 GEN_INT (-size - prolog_size),
23692 1);
23693 emit_move_insn (destmem, srcmem);
23694 }
23695 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23696 {
23697 destmem = offset_address (destmem, modesize, 1);
23698 if (issetmem)
23699 emit_move_insn (destmem, mode_value);
23700 else
23701 {
23702 srcmem = offset_address (srcmem, modesize, 1);
23703 emit_move_insn (destmem, srcmem);
23704 }
23705 }
23706
23707 /* Align destination. */
23708 if (desired_align > 1 && desired_align > align)
23709 {
23710 rtx saveddest = *destptr;
23711
23712 gcc_assert (desired_align <= size);
23713 /* Align destptr up, place it to new register. */
23714 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23715 GEN_INT (prolog_size),
23716 NULL_RTX, 1, OPTAB_DIRECT);
23717 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23718 GEN_INT (-desired_align),
23719 *destptr, 1, OPTAB_DIRECT);
23720 /* See how many bytes we skipped. */
23721 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23722 *destptr,
23723 saveddest, 1, OPTAB_DIRECT);
23724 /* Adjust srcptr and count. */
23725 if (!issetmem)
23726 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23727 *srcptr, 1, OPTAB_DIRECT);
23728 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23729 saveddest, *count, 1, OPTAB_DIRECT);
23730 /* We copied at most size + prolog_size. */
23731 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23732 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23733 else
23734 *min_size = 0;
23735
23736 /* Our loops always round down the bock size, but for dispatch to library
23737 we need precise value. */
23738 if (dynamic_check)
23739 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23740 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23741 }
23742 else
23743 {
23744 gcc_assert (prolog_size == 0);
23745 /* Decrease count, so we won't end up copying last word twice. */
23746 if (!CONST_INT_P (*count))
23747 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23748 constm1_rtx, *count, 1, OPTAB_DIRECT);
23749 else
23750 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23751 if (*min_size)
23752 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23753 }
23754 }
23755
23756
23757 /* This function is like the previous one, except here we know how many bytes
23758 need to be copied. That allows us to update alignment not only of DST, which
23759 is returned, but also of SRC, which is passed as a pointer for that
23760 reason. */
23761 static rtx
23762 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23763 rtx srcreg, rtx value, rtx vec_value,
23764 int desired_align, int align_bytes,
23765 bool issetmem)
23766 {
23767 rtx src = NULL;
23768 rtx orig_dst = dst;
23769 rtx orig_src = NULL;
23770 int piece_size = 1;
23771 int copied_bytes = 0;
23772
23773 if (!issetmem)
23774 {
23775 gcc_assert (srcp != NULL);
23776 src = *srcp;
23777 orig_src = src;
23778 }
23779
23780 for (piece_size = 1;
23781 piece_size <= desired_align && copied_bytes < align_bytes;
23782 piece_size <<= 1)
23783 {
23784 if (align_bytes & piece_size)
23785 {
23786 if (issetmem)
23787 {
23788 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23789 dst = emit_memset (dst, destreg, vec_value, piece_size);
23790 else
23791 dst = emit_memset (dst, destreg, value, piece_size);
23792 }
23793 else
23794 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23795 copied_bytes += piece_size;
23796 }
23797 }
23798 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23799 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23800 if (MEM_SIZE_KNOWN_P (orig_dst))
23801 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23802
23803 if (!issetmem)
23804 {
23805 int src_align_bytes = get_mem_align_offset (src, desired_align
23806 * BITS_PER_UNIT);
23807 if (src_align_bytes >= 0)
23808 src_align_bytes = desired_align - src_align_bytes;
23809 if (src_align_bytes >= 0)
23810 {
23811 unsigned int src_align;
23812 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23813 {
23814 if ((src_align_bytes & (src_align - 1))
23815 == (align_bytes & (src_align - 1)))
23816 break;
23817 }
23818 if (src_align > (unsigned int) desired_align)
23819 src_align = desired_align;
23820 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23821 set_mem_align (src, src_align * BITS_PER_UNIT);
23822 }
23823 if (MEM_SIZE_KNOWN_P (orig_src))
23824 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23825 *srcp = src;
23826 }
23827
23828 return dst;
23829 }
23830
23831 /* Return true if ALG can be used in current context.
23832 Assume we expand memset if MEMSET is true. */
23833 static bool
23834 alg_usable_p (enum stringop_alg alg, bool memset)
23835 {
23836 if (alg == no_stringop)
23837 return false;
23838 if (alg == vector_loop)
23839 return TARGET_SSE || TARGET_AVX;
23840 /* Algorithms using the rep prefix want at least edi and ecx;
23841 additionally, memset wants eax and memcpy wants esi. Don't
23842 consider such algorithms if the user has appropriated those
23843 registers for their own purposes. */
23844 if (alg == rep_prefix_1_byte
23845 || alg == rep_prefix_4_byte
23846 || alg == rep_prefix_8_byte)
23847 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23848 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23849 return true;
23850 }
23851
23852 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23853 static enum stringop_alg
23854 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23855 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23856 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23857 {
23858 const struct stringop_algs * algs;
23859 bool optimize_for_speed;
23860 int max = 0;
23861 const struct processor_costs *cost;
23862 int i;
23863 bool any_alg_usable_p = false;
23864
23865 *noalign = false;
23866 *dynamic_check = -1;
23867
23868 /* Even if the string operation call is cold, we still might spend a lot
23869 of time processing large blocks. */
23870 if (optimize_function_for_size_p (cfun)
23871 || (optimize_insn_for_size_p ()
23872 && (max_size < 256
23873 || (expected_size != -1 && expected_size < 256))))
23874 optimize_for_speed = false;
23875 else
23876 optimize_for_speed = true;
23877
23878 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23879 if (memset)
23880 algs = &cost->memset[TARGET_64BIT != 0];
23881 else
23882 algs = &cost->memcpy[TARGET_64BIT != 0];
23883
23884 /* See maximal size for user defined algorithm. */
23885 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23886 {
23887 enum stringop_alg candidate = algs->size[i].alg;
23888 bool usable = alg_usable_p (candidate, memset);
23889 any_alg_usable_p |= usable;
23890
23891 if (candidate != libcall && candidate && usable)
23892 max = algs->size[i].max;
23893 }
23894
23895 /* If expected size is not known but max size is small enough
23896 so inline version is a win, set expected size into
23897 the range. */
23898 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23899 && expected_size == -1)
23900 expected_size = min_size / 2 + max_size / 2;
23901
23902 /* If user specified the algorithm, honnor it if possible. */
23903 if (ix86_stringop_alg != no_stringop
23904 && alg_usable_p (ix86_stringop_alg, memset))
23905 return ix86_stringop_alg;
23906 /* rep; movq or rep; movl is the smallest variant. */
23907 else if (!optimize_for_speed)
23908 {
23909 *noalign = true;
23910 if (!count || (count & 3) || (memset && !zero_memset))
23911 return alg_usable_p (rep_prefix_1_byte, memset)
23912 ? rep_prefix_1_byte : loop_1_byte;
23913 else
23914 return alg_usable_p (rep_prefix_4_byte, memset)
23915 ? rep_prefix_4_byte : loop;
23916 }
23917 /* Very tiny blocks are best handled via the loop, REP is expensive to
23918 setup. */
23919 else if (expected_size != -1 && expected_size < 4)
23920 return loop_1_byte;
23921 else if (expected_size != -1)
23922 {
23923 enum stringop_alg alg = libcall;
23924 bool alg_noalign = false;
23925 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23926 {
23927 /* We get here if the algorithms that were not libcall-based
23928 were rep-prefix based and we are unable to use rep prefixes
23929 based on global register usage. Break out of the loop and
23930 use the heuristic below. */
23931 if (algs->size[i].max == 0)
23932 break;
23933 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23934 {
23935 enum stringop_alg candidate = algs->size[i].alg;
23936
23937 if (candidate != libcall && alg_usable_p (candidate, memset))
23938 {
23939 alg = candidate;
23940 alg_noalign = algs->size[i].noalign;
23941 }
23942 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23943 last non-libcall inline algorithm. */
23944 if (TARGET_INLINE_ALL_STRINGOPS)
23945 {
23946 /* When the current size is best to be copied by a libcall,
23947 but we are still forced to inline, run the heuristic below
23948 that will pick code for medium sized blocks. */
23949 if (alg != libcall)
23950 {
23951 *noalign = alg_noalign;
23952 return alg;
23953 }
23954 break;
23955 }
23956 else if (alg_usable_p (candidate, memset))
23957 {
23958 *noalign = algs->size[i].noalign;
23959 return candidate;
23960 }
23961 }
23962 }
23963 }
23964 /* When asked to inline the call anyway, try to pick meaningful choice.
23965 We look for maximal size of block that is faster to copy by hand and
23966 take blocks of at most of that size guessing that average size will
23967 be roughly half of the block.
23968
23969 If this turns out to be bad, we might simply specify the preferred
23970 choice in ix86_costs. */
23971 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23972 && (algs->unknown_size == libcall
23973 || !alg_usable_p (algs->unknown_size, memset)))
23974 {
23975 enum stringop_alg alg;
23976
23977 /* If there aren't any usable algorithms, then recursing on
23978 smaller sizes isn't going to find anything. Just return the
23979 simple byte-at-a-time copy loop. */
23980 if (!any_alg_usable_p)
23981 {
23982 /* Pick something reasonable. */
23983 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23984 *dynamic_check = 128;
23985 return loop_1_byte;
23986 }
23987 if (max <= 0)
23988 max = 4096;
23989 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23990 zero_memset, dynamic_check, noalign);
23991 gcc_assert (*dynamic_check == -1);
23992 gcc_assert (alg != libcall);
23993 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23994 *dynamic_check = max;
23995 return alg;
23996 }
23997 return (alg_usable_p (algs->unknown_size, memset)
23998 ? algs->unknown_size : libcall);
23999 }
24000
24001 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24002 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24003 static int
24004 decide_alignment (int align,
24005 enum stringop_alg alg,
24006 int expected_size,
24007 enum machine_mode move_mode)
24008 {
24009 int desired_align = 0;
24010
24011 gcc_assert (alg != no_stringop);
24012
24013 if (alg == libcall)
24014 return 0;
24015 if (move_mode == VOIDmode)
24016 return 0;
24017
24018 desired_align = GET_MODE_SIZE (move_mode);
24019 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24020 copying whole cacheline at once. */
24021 if (TARGET_PENTIUMPRO
24022 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24023 desired_align = 8;
24024
24025 if (optimize_size)
24026 desired_align = 1;
24027 if (desired_align < align)
24028 desired_align = align;
24029 if (expected_size != -1 && expected_size < 4)
24030 desired_align = align;
24031
24032 return desired_align;
24033 }
24034
24035
24036 /* Helper function for memcpy. For QImode value 0xXY produce
24037 0xXYXYXYXY of wide specified by MODE. This is essentially
24038 a * 0x10101010, but we can do slightly better than
24039 synth_mult by unwinding the sequence by hand on CPUs with
24040 slow multiply. */
24041 static rtx
24042 promote_duplicated_reg (enum machine_mode mode, rtx val)
24043 {
24044 enum machine_mode valmode = GET_MODE (val);
24045 rtx tmp;
24046 int nops = mode == DImode ? 3 : 2;
24047
24048 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24049 if (val == const0_rtx)
24050 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24051 if (CONST_INT_P (val))
24052 {
24053 HOST_WIDE_INT v = INTVAL (val) & 255;
24054
24055 v |= v << 8;
24056 v |= v << 16;
24057 if (mode == DImode)
24058 v |= (v << 16) << 16;
24059 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24060 }
24061
24062 if (valmode == VOIDmode)
24063 valmode = QImode;
24064 if (valmode != QImode)
24065 val = gen_lowpart (QImode, val);
24066 if (mode == QImode)
24067 return val;
24068 if (!TARGET_PARTIAL_REG_STALL)
24069 nops--;
24070 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24071 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24072 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24073 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24074 {
24075 rtx reg = convert_modes (mode, QImode, val, true);
24076 tmp = promote_duplicated_reg (mode, const1_rtx);
24077 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24078 OPTAB_DIRECT);
24079 }
24080 else
24081 {
24082 rtx reg = convert_modes (mode, QImode, val, true);
24083
24084 if (!TARGET_PARTIAL_REG_STALL)
24085 if (mode == SImode)
24086 emit_insn (gen_movsi_insv_1 (reg, reg));
24087 else
24088 emit_insn (gen_movdi_insv_1 (reg, reg));
24089 else
24090 {
24091 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24092 NULL, 1, OPTAB_DIRECT);
24093 reg =
24094 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24095 }
24096 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24097 NULL, 1, OPTAB_DIRECT);
24098 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24099 if (mode == SImode)
24100 return reg;
24101 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24102 NULL, 1, OPTAB_DIRECT);
24103 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24104 return reg;
24105 }
24106 }
24107
24108 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24109 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24110 alignment from ALIGN to DESIRED_ALIGN. */
24111 static rtx
24112 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24113 int align)
24114 {
24115 rtx promoted_val;
24116
24117 if (TARGET_64BIT
24118 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24119 promoted_val = promote_duplicated_reg (DImode, val);
24120 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24121 promoted_val = promote_duplicated_reg (SImode, val);
24122 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24123 promoted_val = promote_duplicated_reg (HImode, val);
24124 else
24125 promoted_val = val;
24126
24127 return promoted_val;
24128 }
24129
24130 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24131 operations when profitable. The code depends upon architecture, block size
24132 and alignment, but always has one of the following overall structures:
24133
24134 Aligned move sequence:
24135
24136 1) Prologue guard: Conditional that jumps up to epilogues for small
24137 blocks that can be handled by epilogue alone. This is faster
24138 but also needed for correctness, since prologue assume the block
24139 is larger than the desired alignment.
24140
24141 Optional dynamic check for size and libcall for large
24142 blocks is emitted here too, with -minline-stringops-dynamically.
24143
24144 2) Prologue: copy first few bytes in order to get destination
24145 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24146 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24147 copied. We emit either a jump tree on power of two sized
24148 blocks, or a byte loop.
24149
24150 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24151 with specified algorithm.
24152
24153 4) Epilogue: code copying tail of the block that is too small to be
24154 handled by main body (or up to size guarded by prologue guard).
24155
24156 Misaligned move sequence
24157
24158 1) missaligned move prologue/epilogue containing:
24159 a) Prologue handling small memory blocks and jumping to done_label
24160 (skipped if blocks are known to be large enough)
24161 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24162 needed by single possibly misaligned move
24163 (skipped if alignment is not needed)
24164 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24165
24166 2) Zero size guard dispatching to done_label, if needed
24167
24168 3) dispatch to library call, if needed,
24169
24170 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24171 with specified algorithm. */
24172 bool
24173 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24174 rtx align_exp, rtx expected_align_exp,
24175 rtx expected_size_exp, rtx min_size_exp,
24176 rtx max_size_exp, rtx probable_max_size_exp,
24177 bool issetmem)
24178 {
24179 rtx destreg;
24180 rtx srcreg = NULL;
24181 rtx_code_label *label = NULL;
24182 rtx tmp;
24183 rtx_code_label *jump_around_label = NULL;
24184 HOST_WIDE_INT align = 1;
24185 unsigned HOST_WIDE_INT count = 0;
24186 HOST_WIDE_INT expected_size = -1;
24187 int size_needed = 0, epilogue_size_needed;
24188 int desired_align = 0, align_bytes = 0;
24189 enum stringop_alg alg;
24190 rtx promoted_val = NULL;
24191 rtx vec_promoted_val = NULL;
24192 bool force_loopy_epilogue = false;
24193 int dynamic_check;
24194 bool need_zero_guard = false;
24195 bool noalign;
24196 enum machine_mode move_mode = VOIDmode;
24197 int unroll_factor = 1;
24198 /* TODO: Once value ranges are available, fill in proper data. */
24199 unsigned HOST_WIDE_INT min_size = 0;
24200 unsigned HOST_WIDE_INT max_size = -1;
24201 unsigned HOST_WIDE_INT probable_max_size = -1;
24202 bool misaligned_prologue_used = false;
24203
24204 if (CONST_INT_P (align_exp))
24205 align = INTVAL (align_exp);
24206 /* i386 can do misaligned access on reasonably increased cost. */
24207 if (CONST_INT_P (expected_align_exp)
24208 && INTVAL (expected_align_exp) > align)
24209 align = INTVAL (expected_align_exp);
24210 /* ALIGN is the minimum of destination and source alignment, but we care here
24211 just about destination alignment. */
24212 else if (!issetmem
24213 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24214 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24215
24216 if (CONST_INT_P (count_exp))
24217 {
24218 min_size = max_size = probable_max_size = count = expected_size
24219 = INTVAL (count_exp);
24220 /* When COUNT is 0, there is nothing to do. */
24221 if (!count)
24222 return true;
24223 }
24224 else
24225 {
24226 if (min_size_exp)
24227 min_size = INTVAL (min_size_exp);
24228 if (max_size_exp)
24229 max_size = INTVAL (max_size_exp);
24230 if (probable_max_size_exp)
24231 probable_max_size = INTVAL (probable_max_size_exp);
24232 if (CONST_INT_P (expected_size_exp))
24233 expected_size = INTVAL (expected_size_exp);
24234 }
24235
24236 /* Make sure we don't need to care about overflow later on. */
24237 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24238 return false;
24239
24240 /* Step 0: Decide on preferred algorithm, desired alignment and
24241 size of chunks to be copied by main loop. */
24242 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24243 issetmem,
24244 issetmem && val_exp == const0_rtx,
24245 &dynamic_check, &noalign);
24246 if (alg == libcall)
24247 return false;
24248 gcc_assert (alg != no_stringop);
24249
24250 /* For now vector-version of memset is generated only for memory zeroing, as
24251 creating of promoted vector value is very cheap in this case. */
24252 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24253 alg = unrolled_loop;
24254
24255 if (!count)
24256 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24257 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24258 if (!issetmem)
24259 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24260
24261 unroll_factor = 1;
24262 move_mode = word_mode;
24263 switch (alg)
24264 {
24265 case libcall:
24266 case no_stringop:
24267 case last_alg:
24268 gcc_unreachable ();
24269 case loop_1_byte:
24270 need_zero_guard = true;
24271 move_mode = QImode;
24272 break;
24273 case loop:
24274 need_zero_guard = true;
24275 break;
24276 case unrolled_loop:
24277 need_zero_guard = true;
24278 unroll_factor = (TARGET_64BIT ? 4 : 2);
24279 break;
24280 case vector_loop:
24281 need_zero_guard = true;
24282 unroll_factor = 4;
24283 /* Find the widest supported mode. */
24284 move_mode = word_mode;
24285 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24286 != CODE_FOR_nothing)
24287 move_mode = GET_MODE_WIDER_MODE (move_mode);
24288
24289 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24290 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24291 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24292 {
24293 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24294 move_mode = mode_for_vector (word_mode, nunits);
24295 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24296 move_mode = word_mode;
24297 }
24298 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24299 break;
24300 case rep_prefix_8_byte:
24301 move_mode = DImode;
24302 break;
24303 case rep_prefix_4_byte:
24304 move_mode = SImode;
24305 break;
24306 case rep_prefix_1_byte:
24307 move_mode = QImode;
24308 break;
24309 }
24310 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24311 epilogue_size_needed = size_needed;
24312
24313 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24314 if (!TARGET_ALIGN_STRINGOPS || noalign)
24315 align = desired_align;
24316
24317 /* Step 1: Prologue guard. */
24318
24319 /* Alignment code needs count to be in register. */
24320 if (CONST_INT_P (count_exp) && desired_align > align)
24321 {
24322 if (INTVAL (count_exp) > desired_align
24323 && INTVAL (count_exp) > size_needed)
24324 {
24325 align_bytes
24326 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24327 if (align_bytes <= 0)
24328 align_bytes = 0;
24329 else
24330 align_bytes = desired_align - align_bytes;
24331 }
24332 if (align_bytes == 0)
24333 count_exp = force_reg (counter_mode (count_exp), count_exp);
24334 }
24335 gcc_assert (desired_align >= 1 && align >= 1);
24336
24337 /* Misaligned move sequences handle both prologue and epilogue at once.
24338 Default code generation results in a smaller code for large alignments
24339 and also avoids redundant job when sizes are known precisely. */
24340 misaligned_prologue_used
24341 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24342 && MAX (desired_align, epilogue_size_needed) <= 32
24343 && desired_align <= epilogue_size_needed
24344 && ((desired_align > align && !align_bytes)
24345 || (!count && epilogue_size_needed > 1)));
24346
24347 /* Do the cheap promotion to allow better CSE across the
24348 main loop and epilogue (ie one load of the big constant in the
24349 front of all code.
24350 For now the misaligned move sequences do not have fast path
24351 without broadcasting. */
24352 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24353 {
24354 if (alg == vector_loop)
24355 {
24356 gcc_assert (val_exp == const0_rtx);
24357 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24358 promoted_val = promote_duplicated_reg_to_size (val_exp,
24359 GET_MODE_SIZE (word_mode),
24360 desired_align, align);
24361 }
24362 else
24363 {
24364 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24365 desired_align, align);
24366 }
24367 }
24368 /* Misaligned move sequences handles both prologues and epilogues at once.
24369 Default code generation results in smaller code for large alignments and
24370 also avoids redundant job when sizes are known precisely. */
24371 if (misaligned_prologue_used)
24372 {
24373 /* Misaligned move prologue handled small blocks by itself. */
24374 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24375 (dst, src, &destreg, &srcreg,
24376 move_mode, promoted_val, vec_promoted_val,
24377 &count_exp,
24378 &jump_around_label,
24379 desired_align < align
24380 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24381 desired_align, align, &min_size, dynamic_check, issetmem);
24382 if (!issetmem)
24383 src = change_address (src, BLKmode, srcreg);
24384 dst = change_address (dst, BLKmode, destreg);
24385 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24386 epilogue_size_needed = 0;
24387 if (need_zero_guard && !min_size)
24388 {
24389 /* It is possible that we copied enough so the main loop will not
24390 execute. */
24391 gcc_assert (size_needed > 1);
24392 if (jump_around_label == NULL_RTX)
24393 jump_around_label = gen_label_rtx ();
24394 emit_cmp_and_jump_insns (count_exp,
24395 GEN_INT (size_needed),
24396 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24397 if (expected_size == -1
24398 || expected_size < (desired_align - align) / 2 + size_needed)
24399 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24400 else
24401 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24402 }
24403 }
24404 /* Ensure that alignment prologue won't copy past end of block. */
24405 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24406 {
24407 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24408 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24409 Make sure it is power of 2. */
24410 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24411
24412 /* To improve performance of small blocks, we jump around the VAL
24413 promoting mode. This mean that if the promoted VAL is not constant,
24414 we might not use it in the epilogue and have to use byte
24415 loop variant. */
24416 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24417 force_loopy_epilogue = true;
24418 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24419 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24420 {
24421 /* If main algorithm works on QImode, no epilogue is needed.
24422 For small sizes just don't align anything. */
24423 if (size_needed == 1)
24424 desired_align = align;
24425 else
24426 goto epilogue;
24427 }
24428 else if (!count
24429 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24430 {
24431 label = gen_label_rtx ();
24432 emit_cmp_and_jump_insns (count_exp,
24433 GEN_INT (epilogue_size_needed),
24434 LTU, 0, counter_mode (count_exp), 1, label);
24435 if (expected_size == -1 || expected_size < epilogue_size_needed)
24436 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24437 else
24438 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24439 }
24440 }
24441
24442 /* Emit code to decide on runtime whether library call or inline should be
24443 used. */
24444 if (dynamic_check != -1)
24445 {
24446 if (!issetmem && CONST_INT_P (count_exp))
24447 {
24448 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24449 {
24450 emit_block_move_via_libcall (dst, src, count_exp, false);
24451 count_exp = const0_rtx;
24452 goto epilogue;
24453 }
24454 }
24455 else
24456 {
24457 rtx_code_label *hot_label = gen_label_rtx ();
24458 if (jump_around_label == NULL_RTX)
24459 jump_around_label = gen_label_rtx ();
24460 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24461 LEU, 0, counter_mode (count_exp),
24462 1, hot_label);
24463 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24464 if (issetmem)
24465 set_storage_via_libcall (dst, count_exp, val_exp, false);
24466 else
24467 emit_block_move_via_libcall (dst, src, count_exp, false);
24468 emit_jump (jump_around_label);
24469 emit_label (hot_label);
24470 }
24471 }
24472
24473 /* Step 2: Alignment prologue. */
24474 /* Do the expensive promotion once we branched off the small blocks. */
24475 if (issetmem && !promoted_val)
24476 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24477 desired_align, align);
24478
24479 if (desired_align > align && !misaligned_prologue_used)
24480 {
24481 if (align_bytes == 0)
24482 {
24483 /* Except for the first move in prologue, we no longer know
24484 constant offset in aliasing info. It don't seems to worth
24485 the pain to maintain it for the first move, so throw away
24486 the info early. */
24487 dst = change_address (dst, BLKmode, destreg);
24488 if (!issetmem)
24489 src = change_address (src, BLKmode, srcreg);
24490 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24491 promoted_val, vec_promoted_val,
24492 count_exp, align, desired_align,
24493 issetmem);
24494 /* At most desired_align - align bytes are copied. */
24495 if (min_size < (unsigned)(desired_align - align))
24496 min_size = 0;
24497 else
24498 min_size -= desired_align - align;
24499 }
24500 else
24501 {
24502 /* If we know how many bytes need to be stored before dst is
24503 sufficiently aligned, maintain aliasing info accurately. */
24504 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24505 srcreg,
24506 promoted_val,
24507 vec_promoted_val,
24508 desired_align,
24509 align_bytes,
24510 issetmem);
24511
24512 count_exp = plus_constant (counter_mode (count_exp),
24513 count_exp, -align_bytes);
24514 count -= align_bytes;
24515 min_size -= align_bytes;
24516 max_size -= align_bytes;
24517 }
24518 if (need_zero_guard
24519 && !min_size
24520 && (count < (unsigned HOST_WIDE_INT) size_needed
24521 || (align_bytes == 0
24522 && count < ((unsigned HOST_WIDE_INT) size_needed
24523 + desired_align - align))))
24524 {
24525 /* It is possible that we copied enough so the main loop will not
24526 execute. */
24527 gcc_assert (size_needed > 1);
24528 if (label == NULL_RTX)
24529 label = gen_label_rtx ();
24530 emit_cmp_and_jump_insns (count_exp,
24531 GEN_INT (size_needed),
24532 LTU, 0, counter_mode (count_exp), 1, label);
24533 if (expected_size == -1
24534 || expected_size < (desired_align - align) / 2 + size_needed)
24535 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24536 else
24537 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24538 }
24539 }
24540 if (label && size_needed == 1)
24541 {
24542 emit_label (label);
24543 LABEL_NUSES (label) = 1;
24544 label = NULL;
24545 epilogue_size_needed = 1;
24546 if (issetmem)
24547 promoted_val = val_exp;
24548 }
24549 else if (label == NULL_RTX && !misaligned_prologue_used)
24550 epilogue_size_needed = size_needed;
24551
24552 /* Step 3: Main loop. */
24553
24554 switch (alg)
24555 {
24556 case libcall:
24557 case no_stringop:
24558 case last_alg:
24559 gcc_unreachable ();
24560 case loop_1_byte:
24561 case loop:
24562 case unrolled_loop:
24563 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24564 count_exp, move_mode, unroll_factor,
24565 expected_size, issetmem);
24566 break;
24567 case vector_loop:
24568 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24569 vec_promoted_val, count_exp, move_mode,
24570 unroll_factor, expected_size, issetmem);
24571 break;
24572 case rep_prefix_8_byte:
24573 case rep_prefix_4_byte:
24574 case rep_prefix_1_byte:
24575 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24576 val_exp, count_exp, move_mode, issetmem);
24577 break;
24578 }
24579 /* Adjust properly the offset of src and dest memory for aliasing. */
24580 if (CONST_INT_P (count_exp))
24581 {
24582 if (!issetmem)
24583 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24584 (count / size_needed) * size_needed);
24585 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24586 (count / size_needed) * size_needed);
24587 }
24588 else
24589 {
24590 if (!issetmem)
24591 src = change_address (src, BLKmode, srcreg);
24592 dst = change_address (dst, BLKmode, destreg);
24593 }
24594
24595 /* Step 4: Epilogue to copy the remaining bytes. */
24596 epilogue:
24597 if (label)
24598 {
24599 /* When the main loop is done, COUNT_EXP might hold original count,
24600 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24601 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24602 bytes. Compensate if needed. */
24603
24604 if (size_needed < epilogue_size_needed)
24605 {
24606 tmp =
24607 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24608 GEN_INT (size_needed - 1), count_exp, 1,
24609 OPTAB_DIRECT);
24610 if (tmp != count_exp)
24611 emit_move_insn (count_exp, tmp);
24612 }
24613 emit_label (label);
24614 LABEL_NUSES (label) = 1;
24615 }
24616
24617 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24618 {
24619 if (force_loopy_epilogue)
24620 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24621 epilogue_size_needed);
24622 else
24623 {
24624 if (issetmem)
24625 expand_setmem_epilogue (dst, destreg, promoted_val,
24626 vec_promoted_val, count_exp,
24627 epilogue_size_needed);
24628 else
24629 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24630 epilogue_size_needed);
24631 }
24632 }
24633 if (jump_around_label)
24634 emit_label (jump_around_label);
24635 return true;
24636 }
24637
24638
24639 /* Expand the appropriate insns for doing strlen if not just doing
24640 repnz; scasb
24641
24642 out = result, initialized with the start address
24643 align_rtx = alignment of the address.
24644 scratch = scratch register, initialized with the startaddress when
24645 not aligned, otherwise undefined
24646
24647 This is just the body. It needs the initializations mentioned above and
24648 some address computing at the end. These things are done in i386.md. */
24649
24650 static void
24651 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24652 {
24653 int align;
24654 rtx tmp;
24655 rtx_code_label *align_2_label = NULL;
24656 rtx_code_label *align_3_label = NULL;
24657 rtx_code_label *align_4_label = gen_label_rtx ();
24658 rtx_code_label *end_0_label = gen_label_rtx ();
24659 rtx mem;
24660 rtx tmpreg = gen_reg_rtx (SImode);
24661 rtx scratch = gen_reg_rtx (SImode);
24662 rtx cmp;
24663
24664 align = 0;
24665 if (CONST_INT_P (align_rtx))
24666 align = INTVAL (align_rtx);
24667
24668 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24669
24670 /* Is there a known alignment and is it less than 4? */
24671 if (align < 4)
24672 {
24673 rtx scratch1 = gen_reg_rtx (Pmode);
24674 emit_move_insn (scratch1, out);
24675 /* Is there a known alignment and is it not 2? */
24676 if (align != 2)
24677 {
24678 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24679 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24680
24681 /* Leave just the 3 lower bits. */
24682 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24683 NULL_RTX, 0, OPTAB_WIDEN);
24684
24685 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24686 Pmode, 1, align_4_label);
24687 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24688 Pmode, 1, align_2_label);
24689 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24690 Pmode, 1, align_3_label);
24691 }
24692 else
24693 {
24694 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24695 check if is aligned to 4 - byte. */
24696
24697 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24698 NULL_RTX, 0, OPTAB_WIDEN);
24699
24700 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24701 Pmode, 1, align_4_label);
24702 }
24703
24704 mem = change_address (src, QImode, out);
24705
24706 /* Now compare the bytes. */
24707
24708 /* Compare the first n unaligned byte on a byte per byte basis. */
24709 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24710 QImode, 1, end_0_label);
24711
24712 /* Increment the address. */
24713 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24714
24715 /* Not needed with an alignment of 2 */
24716 if (align != 2)
24717 {
24718 emit_label (align_2_label);
24719
24720 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24721 end_0_label);
24722
24723 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24724
24725 emit_label (align_3_label);
24726 }
24727
24728 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24729 end_0_label);
24730
24731 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24732 }
24733
24734 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24735 align this loop. It gives only huge programs, but does not help to
24736 speed up. */
24737 emit_label (align_4_label);
24738
24739 mem = change_address (src, SImode, out);
24740 emit_move_insn (scratch, mem);
24741 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24742
24743 /* This formula yields a nonzero result iff one of the bytes is zero.
24744 This saves three branches inside loop and many cycles. */
24745
24746 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24747 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24748 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24749 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24750 gen_int_mode (0x80808080, SImode)));
24751 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24752 align_4_label);
24753
24754 if (TARGET_CMOVE)
24755 {
24756 rtx reg = gen_reg_rtx (SImode);
24757 rtx reg2 = gen_reg_rtx (Pmode);
24758 emit_move_insn (reg, tmpreg);
24759 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24760
24761 /* If zero is not in the first two bytes, move two bytes forward. */
24762 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24763 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24764 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24765 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24766 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24767 reg,
24768 tmpreg)));
24769 /* Emit lea manually to avoid clobbering of flags. */
24770 emit_insn (gen_rtx_SET (SImode, reg2,
24771 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24772
24773 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24774 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24775 emit_insn (gen_rtx_SET (VOIDmode, out,
24776 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24777 reg2,
24778 out)));
24779 }
24780 else
24781 {
24782 rtx_code_label *end_2_label = gen_label_rtx ();
24783 /* Is zero in the first two bytes? */
24784
24785 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24786 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24787 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24788 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24789 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24790 pc_rtx);
24791 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24792 JUMP_LABEL (tmp) = end_2_label;
24793
24794 /* Not in the first two. Move two bytes forward. */
24795 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24796 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24797
24798 emit_label (end_2_label);
24799
24800 }
24801
24802 /* Avoid branch in fixing the byte. */
24803 tmpreg = gen_lowpart (QImode, tmpreg);
24804 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24805 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24806 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24807 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24808
24809 emit_label (end_0_label);
24810 }
24811
24812 /* Expand strlen. */
24813
24814 bool
24815 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24816 {
24817 rtx addr, scratch1, scratch2, scratch3, scratch4;
24818
24819 /* The generic case of strlen expander is long. Avoid it's
24820 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24821
24822 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24823 && !TARGET_INLINE_ALL_STRINGOPS
24824 && !optimize_insn_for_size_p ()
24825 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24826 return false;
24827
24828 addr = force_reg (Pmode, XEXP (src, 0));
24829 scratch1 = gen_reg_rtx (Pmode);
24830
24831 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24832 && !optimize_insn_for_size_p ())
24833 {
24834 /* Well it seems that some optimizer does not combine a call like
24835 foo(strlen(bar), strlen(bar));
24836 when the move and the subtraction is done here. It does calculate
24837 the length just once when these instructions are done inside of
24838 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24839 often used and I use one fewer register for the lifetime of
24840 output_strlen_unroll() this is better. */
24841
24842 emit_move_insn (out, addr);
24843
24844 ix86_expand_strlensi_unroll_1 (out, src, align);
24845
24846 /* strlensi_unroll_1 returns the address of the zero at the end of
24847 the string, like memchr(), so compute the length by subtracting
24848 the start address. */
24849 emit_insn (ix86_gen_sub3 (out, out, addr));
24850 }
24851 else
24852 {
24853 rtx unspec;
24854
24855 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24856 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24857 return false;
24858
24859 scratch2 = gen_reg_rtx (Pmode);
24860 scratch3 = gen_reg_rtx (Pmode);
24861 scratch4 = force_reg (Pmode, constm1_rtx);
24862
24863 emit_move_insn (scratch3, addr);
24864 eoschar = force_reg (QImode, eoschar);
24865
24866 src = replace_equiv_address_nv (src, scratch3);
24867
24868 /* If .md starts supporting :P, this can be done in .md. */
24869 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24870 scratch4), UNSPEC_SCAS);
24871 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24872 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24873 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24874 }
24875 return true;
24876 }
24877
24878 /* For given symbol (function) construct code to compute address of it's PLT
24879 entry in large x86-64 PIC model. */
24880 static rtx
24881 construct_plt_address (rtx symbol)
24882 {
24883 rtx tmp, unspec;
24884
24885 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24886 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24887 gcc_assert (Pmode == DImode);
24888
24889 tmp = gen_reg_rtx (Pmode);
24890 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24891
24892 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24893 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24894 return tmp;
24895 }
24896
24897 rtx
24898 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24899 rtx callarg2,
24900 rtx pop, bool sibcall)
24901 {
24902 unsigned int const cregs_size
24903 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24904 rtx vec[3 + cregs_size];
24905 rtx use = NULL, call;
24906 unsigned int vec_len = 0;
24907
24908 if (pop == const0_rtx)
24909 pop = NULL;
24910 gcc_assert (!TARGET_64BIT || !pop);
24911
24912 if (TARGET_MACHO && !TARGET_64BIT)
24913 {
24914 #if TARGET_MACHO
24915 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24916 fnaddr = machopic_indirect_call_target (fnaddr);
24917 #endif
24918 }
24919 else
24920 {
24921 /* Static functions and indirect calls don't need the pic register. */
24922 if (flag_pic
24923 && (!TARGET_64BIT
24924 || (ix86_cmodel == CM_LARGE_PIC
24925 && DEFAULT_ABI != MS_ABI))
24926 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24927 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24928 use_reg (&use, pic_offset_table_rtx);
24929 }
24930
24931 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24932 {
24933 rtx al = gen_rtx_REG (QImode, AX_REG);
24934 emit_move_insn (al, callarg2);
24935 use_reg (&use, al);
24936 }
24937
24938 if (ix86_cmodel == CM_LARGE_PIC
24939 && !TARGET_PECOFF
24940 && MEM_P (fnaddr)
24941 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24942 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24943 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24944 else if (sibcall
24945 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24946 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24947 {
24948 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24949 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24950 }
24951
24952 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24953 if (retval)
24954 call = gen_rtx_SET (VOIDmode, retval, call);
24955 vec[vec_len++] = call;
24956
24957 if (pop)
24958 {
24959 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24960 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24961 vec[vec_len++] = pop;
24962 }
24963
24964 if (TARGET_64BIT_MS_ABI
24965 && (!callarg2 || INTVAL (callarg2) != -2))
24966 {
24967 unsigned i;
24968
24969 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24970 UNSPEC_MS_TO_SYSV_CALL);
24971
24972 for (i = 0; i < cregs_size; i++)
24973 {
24974 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24975 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24976
24977 vec[vec_len++]
24978 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24979 }
24980 }
24981
24982 if (vec_len > 1)
24983 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24984 call = emit_call_insn (call);
24985 if (use)
24986 CALL_INSN_FUNCTION_USAGE (call) = use;
24987
24988 return call;
24989 }
24990
24991 /* Output the assembly for a call instruction. */
24992
24993 const char *
24994 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
24995 {
24996 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24997 bool seh_nop_p = false;
24998 const char *xasm;
24999
25000 if (SIBLING_CALL_P (insn))
25001 {
25002 if (direct_p)
25003 xasm = "jmp\t%P0";
25004 /* SEH epilogue detection requires the indirect branch case
25005 to include REX.W. */
25006 else if (TARGET_SEH)
25007 xasm = "rex.W jmp %A0";
25008 else
25009 xasm = "jmp\t%A0";
25010
25011 output_asm_insn (xasm, &call_op);
25012 return "";
25013 }
25014
25015 /* SEH unwinding can require an extra nop to be emitted in several
25016 circumstances. Determine if we have one of those. */
25017 if (TARGET_SEH)
25018 {
25019 rtx_insn *i;
25020
25021 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25022 {
25023 /* If we get to another real insn, we don't need the nop. */
25024 if (INSN_P (i))
25025 break;
25026
25027 /* If we get to the epilogue note, prevent a catch region from
25028 being adjacent to the standard epilogue sequence. If non-
25029 call-exceptions, we'll have done this during epilogue emission. */
25030 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25031 && !flag_non_call_exceptions
25032 && !can_throw_internal (insn))
25033 {
25034 seh_nop_p = true;
25035 break;
25036 }
25037 }
25038
25039 /* If we didn't find a real insn following the call, prevent the
25040 unwinder from looking into the next function. */
25041 if (i == NULL)
25042 seh_nop_p = true;
25043 }
25044
25045 if (direct_p)
25046 xasm = "call\t%P0";
25047 else
25048 xasm = "call\t%A0";
25049
25050 output_asm_insn (xasm, &call_op);
25051
25052 if (seh_nop_p)
25053 return "nop";
25054
25055 return "";
25056 }
25057 \f
25058 /* Clear stack slot assignments remembered from previous functions.
25059 This is called from INIT_EXPANDERS once before RTL is emitted for each
25060 function. */
25061
25062 static struct machine_function *
25063 ix86_init_machine_status (void)
25064 {
25065 struct machine_function *f;
25066
25067 f = ggc_cleared_alloc<machine_function> ();
25068 f->use_fast_prologue_epilogue_nregs = -1;
25069 f->call_abi = ix86_abi;
25070
25071 return f;
25072 }
25073
25074 /* Return a MEM corresponding to a stack slot with mode MODE.
25075 Allocate a new slot if necessary.
25076
25077 The RTL for a function can have several slots available: N is
25078 which slot to use. */
25079
25080 rtx
25081 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25082 {
25083 struct stack_local_entry *s;
25084
25085 gcc_assert (n < MAX_386_STACK_LOCALS);
25086
25087 for (s = ix86_stack_locals; s; s = s->next)
25088 if (s->mode == mode && s->n == n)
25089 return validize_mem (copy_rtx (s->rtl));
25090
25091 s = ggc_alloc<stack_local_entry> ();
25092 s->n = n;
25093 s->mode = mode;
25094 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25095
25096 s->next = ix86_stack_locals;
25097 ix86_stack_locals = s;
25098 return validize_mem (copy_rtx (s->rtl));
25099 }
25100
25101 static void
25102 ix86_instantiate_decls (void)
25103 {
25104 struct stack_local_entry *s;
25105
25106 for (s = ix86_stack_locals; s; s = s->next)
25107 if (s->rtl != NULL_RTX)
25108 instantiate_decl_rtl (s->rtl);
25109 }
25110 \f
25111 /* Check whether x86 address PARTS is a pc-relative address. */
25112
25113 static bool
25114 rip_relative_addr_p (struct ix86_address *parts)
25115 {
25116 rtx base, index, disp;
25117
25118 base = parts->base;
25119 index = parts->index;
25120 disp = parts->disp;
25121
25122 if (disp && !base && !index)
25123 {
25124 if (TARGET_64BIT)
25125 {
25126 rtx symbol = disp;
25127
25128 if (GET_CODE (disp) == CONST)
25129 symbol = XEXP (disp, 0);
25130 if (GET_CODE (symbol) == PLUS
25131 && CONST_INT_P (XEXP (symbol, 1)))
25132 symbol = XEXP (symbol, 0);
25133
25134 if (GET_CODE (symbol) == LABEL_REF
25135 || (GET_CODE (symbol) == SYMBOL_REF
25136 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25137 || (GET_CODE (symbol) == UNSPEC
25138 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25139 || XINT (symbol, 1) == UNSPEC_PCREL
25140 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25141 return true;
25142 }
25143 }
25144 return false;
25145 }
25146
25147 /* Calculate the length of the memory address in the instruction encoding.
25148 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25149 or other prefixes. We never generate addr32 prefix for LEA insn. */
25150
25151 int
25152 memory_address_length (rtx addr, bool lea)
25153 {
25154 struct ix86_address parts;
25155 rtx base, index, disp;
25156 int len;
25157 int ok;
25158
25159 if (GET_CODE (addr) == PRE_DEC
25160 || GET_CODE (addr) == POST_INC
25161 || GET_CODE (addr) == PRE_MODIFY
25162 || GET_CODE (addr) == POST_MODIFY)
25163 return 0;
25164
25165 ok = ix86_decompose_address (addr, &parts);
25166 gcc_assert (ok);
25167
25168 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25169
25170 /* If this is not LEA instruction, add the length of addr32 prefix. */
25171 if (TARGET_64BIT && !lea
25172 && (SImode_address_operand (addr, VOIDmode)
25173 || (parts.base && GET_MODE (parts.base) == SImode)
25174 || (parts.index && GET_MODE (parts.index) == SImode)))
25175 len++;
25176
25177 base = parts.base;
25178 index = parts.index;
25179 disp = parts.disp;
25180
25181 if (base && GET_CODE (base) == SUBREG)
25182 base = SUBREG_REG (base);
25183 if (index && GET_CODE (index) == SUBREG)
25184 index = SUBREG_REG (index);
25185
25186 gcc_assert (base == NULL_RTX || REG_P (base));
25187 gcc_assert (index == NULL_RTX || REG_P (index));
25188
25189 /* Rule of thumb:
25190 - esp as the base always wants an index,
25191 - ebp as the base always wants a displacement,
25192 - r12 as the base always wants an index,
25193 - r13 as the base always wants a displacement. */
25194
25195 /* Register Indirect. */
25196 if (base && !index && !disp)
25197 {
25198 /* esp (for its index) and ebp (for its displacement) need
25199 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25200 code. */
25201 if (base == arg_pointer_rtx
25202 || base == frame_pointer_rtx
25203 || REGNO (base) == SP_REG
25204 || REGNO (base) == BP_REG
25205 || REGNO (base) == R12_REG
25206 || REGNO (base) == R13_REG)
25207 len++;
25208 }
25209
25210 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25211 is not disp32, but disp32(%rip), so for disp32
25212 SIB byte is needed, unless print_operand_address
25213 optimizes it into disp32(%rip) or (%rip) is implied
25214 by UNSPEC. */
25215 else if (disp && !base && !index)
25216 {
25217 len += 4;
25218 if (rip_relative_addr_p (&parts))
25219 len++;
25220 }
25221 else
25222 {
25223 /* Find the length of the displacement constant. */
25224 if (disp)
25225 {
25226 if (base && satisfies_constraint_K (disp))
25227 len += 1;
25228 else
25229 len += 4;
25230 }
25231 /* ebp always wants a displacement. Similarly r13. */
25232 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25233 len++;
25234
25235 /* An index requires the two-byte modrm form.... */
25236 if (index
25237 /* ...like esp (or r12), which always wants an index. */
25238 || base == arg_pointer_rtx
25239 || base == frame_pointer_rtx
25240 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25241 len++;
25242 }
25243
25244 return len;
25245 }
25246
25247 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25248 is set, expect that insn have 8bit immediate alternative. */
25249 int
25250 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25251 {
25252 int len = 0;
25253 int i;
25254 extract_insn_cached (insn);
25255 for (i = recog_data.n_operands - 1; i >= 0; --i)
25256 if (CONSTANT_P (recog_data.operand[i]))
25257 {
25258 enum attr_mode mode = get_attr_mode (insn);
25259
25260 gcc_assert (!len);
25261 if (shortform && CONST_INT_P (recog_data.operand[i]))
25262 {
25263 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25264 switch (mode)
25265 {
25266 case MODE_QI:
25267 len = 1;
25268 continue;
25269 case MODE_HI:
25270 ival = trunc_int_for_mode (ival, HImode);
25271 break;
25272 case MODE_SI:
25273 ival = trunc_int_for_mode (ival, SImode);
25274 break;
25275 default:
25276 break;
25277 }
25278 if (IN_RANGE (ival, -128, 127))
25279 {
25280 len = 1;
25281 continue;
25282 }
25283 }
25284 switch (mode)
25285 {
25286 case MODE_QI:
25287 len = 1;
25288 break;
25289 case MODE_HI:
25290 len = 2;
25291 break;
25292 case MODE_SI:
25293 len = 4;
25294 break;
25295 /* Immediates for DImode instructions are encoded
25296 as 32bit sign extended values. */
25297 case MODE_DI:
25298 len = 4;
25299 break;
25300 default:
25301 fatal_insn ("unknown insn mode", insn);
25302 }
25303 }
25304 return len;
25305 }
25306
25307 /* Compute default value for "length_address" attribute. */
25308 int
25309 ix86_attr_length_address_default (rtx insn)
25310 {
25311 int i;
25312
25313 if (get_attr_type (insn) == TYPE_LEA)
25314 {
25315 rtx set = PATTERN (insn), addr;
25316
25317 if (GET_CODE (set) == PARALLEL)
25318 set = XVECEXP (set, 0, 0);
25319
25320 gcc_assert (GET_CODE (set) == SET);
25321
25322 addr = SET_SRC (set);
25323
25324 return memory_address_length (addr, true);
25325 }
25326
25327 extract_insn_cached (insn);
25328 for (i = recog_data.n_operands - 1; i >= 0; --i)
25329 if (MEM_P (recog_data.operand[i]))
25330 {
25331 constrain_operands_cached (reload_completed);
25332 if (which_alternative != -1)
25333 {
25334 const char *constraints = recog_data.constraints[i];
25335 int alt = which_alternative;
25336
25337 while (*constraints == '=' || *constraints == '+')
25338 constraints++;
25339 while (alt-- > 0)
25340 while (*constraints++ != ',')
25341 ;
25342 /* Skip ignored operands. */
25343 if (*constraints == 'X')
25344 continue;
25345 }
25346 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25347 }
25348 return 0;
25349 }
25350
25351 /* Compute default value for "length_vex" attribute. It includes
25352 2 or 3 byte VEX prefix and 1 opcode byte. */
25353
25354 int
25355 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25356 {
25357 int i;
25358
25359 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25360 byte VEX prefix. */
25361 if (!has_0f_opcode || has_vex_w)
25362 return 3 + 1;
25363
25364 /* We can always use 2 byte VEX prefix in 32bit. */
25365 if (!TARGET_64BIT)
25366 return 2 + 1;
25367
25368 extract_insn_cached (insn);
25369
25370 for (i = recog_data.n_operands - 1; i >= 0; --i)
25371 if (REG_P (recog_data.operand[i]))
25372 {
25373 /* REX.W bit uses 3 byte VEX prefix. */
25374 if (GET_MODE (recog_data.operand[i]) == DImode
25375 && GENERAL_REG_P (recog_data.operand[i]))
25376 return 3 + 1;
25377 }
25378 else
25379 {
25380 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25381 if (MEM_P (recog_data.operand[i])
25382 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25383 return 3 + 1;
25384 }
25385
25386 return 2 + 1;
25387 }
25388 \f
25389 /* Return the maximum number of instructions a cpu can issue. */
25390
25391 static int
25392 ix86_issue_rate (void)
25393 {
25394 switch (ix86_tune)
25395 {
25396 case PROCESSOR_PENTIUM:
25397 case PROCESSOR_BONNELL:
25398 case PROCESSOR_SILVERMONT:
25399 case PROCESSOR_INTEL:
25400 case PROCESSOR_K6:
25401 case PROCESSOR_BTVER2:
25402 case PROCESSOR_PENTIUM4:
25403 case PROCESSOR_NOCONA:
25404 return 2;
25405
25406 case PROCESSOR_PENTIUMPRO:
25407 case PROCESSOR_ATHLON:
25408 case PROCESSOR_K8:
25409 case PROCESSOR_AMDFAM10:
25410 case PROCESSOR_GENERIC:
25411 case PROCESSOR_BTVER1:
25412 return 3;
25413
25414 case PROCESSOR_BDVER1:
25415 case PROCESSOR_BDVER2:
25416 case PROCESSOR_BDVER3:
25417 case PROCESSOR_BDVER4:
25418 case PROCESSOR_CORE2:
25419 case PROCESSOR_NEHALEM:
25420 case PROCESSOR_SANDYBRIDGE:
25421 case PROCESSOR_HASWELL:
25422 return 4;
25423
25424 default:
25425 return 1;
25426 }
25427 }
25428
25429 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25430 by DEP_INSN and nothing set by DEP_INSN. */
25431
25432 static bool
25433 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25434 {
25435 rtx set, set2;
25436
25437 /* Simplify the test for uninteresting insns. */
25438 if (insn_type != TYPE_SETCC
25439 && insn_type != TYPE_ICMOV
25440 && insn_type != TYPE_FCMOV
25441 && insn_type != TYPE_IBR)
25442 return false;
25443
25444 if ((set = single_set (dep_insn)) != 0)
25445 {
25446 set = SET_DEST (set);
25447 set2 = NULL_RTX;
25448 }
25449 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25450 && XVECLEN (PATTERN (dep_insn), 0) == 2
25451 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25452 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25453 {
25454 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25455 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25456 }
25457 else
25458 return false;
25459
25460 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25461 return false;
25462
25463 /* This test is true if the dependent insn reads the flags but
25464 not any other potentially set register. */
25465 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25466 return false;
25467
25468 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25469 return false;
25470
25471 return true;
25472 }
25473
25474 /* Return true iff USE_INSN has a memory address with operands set by
25475 SET_INSN. */
25476
25477 bool
25478 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25479 {
25480 int i;
25481 extract_insn_cached (use_insn);
25482 for (i = recog_data.n_operands - 1; i >= 0; --i)
25483 if (MEM_P (recog_data.operand[i]))
25484 {
25485 rtx addr = XEXP (recog_data.operand[i], 0);
25486 return modified_in_p (addr, set_insn) != 0;
25487 }
25488 return false;
25489 }
25490
25491 /* Helper function for exact_store_load_dependency.
25492 Return true if addr is found in insn. */
25493 static bool
25494 exact_dependency_1 (rtx addr, rtx insn)
25495 {
25496 enum rtx_code code;
25497 const char *format_ptr;
25498 int i, j;
25499
25500 code = GET_CODE (insn);
25501 switch (code)
25502 {
25503 case MEM:
25504 if (rtx_equal_p (addr, insn))
25505 return true;
25506 break;
25507 case REG:
25508 CASE_CONST_ANY:
25509 case SYMBOL_REF:
25510 case CODE_LABEL:
25511 case PC:
25512 case CC0:
25513 case EXPR_LIST:
25514 return false;
25515 default:
25516 break;
25517 }
25518
25519 format_ptr = GET_RTX_FORMAT (code);
25520 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25521 {
25522 switch (*format_ptr++)
25523 {
25524 case 'e':
25525 if (exact_dependency_1 (addr, XEXP (insn, i)))
25526 return true;
25527 break;
25528 case 'E':
25529 for (j = 0; j < XVECLEN (insn, i); j++)
25530 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25531 return true;
25532 break;
25533 }
25534 }
25535 return false;
25536 }
25537
25538 /* Return true if there exists exact dependency for store & load, i.e.
25539 the same memory address is used in them. */
25540 static bool
25541 exact_store_load_dependency (rtx store, rtx load)
25542 {
25543 rtx set1, set2;
25544
25545 set1 = single_set (store);
25546 if (!set1)
25547 return false;
25548 if (!MEM_P (SET_DEST (set1)))
25549 return false;
25550 set2 = single_set (load);
25551 if (!set2)
25552 return false;
25553 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25554 return true;
25555 return false;
25556 }
25557
25558 static int
25559 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25560 {
25561 enum attr_type insn_type, dep_insn_type;
25562 enum attr_memory memory;
25563 rtx set, set2;
25564 int dep_insn_code_number;
25565
25566 /* Anti and output dependencies have zero cost on all CPUs. */
25567 if (REG_NOTE_KIND (link) != 0)
25568 return 0;
25569
25570 dep_insn_code_number = recog_memoized (dep_insn);
25571
25572 /* If we can't recognize the insns, we can't really do anything. */
25573 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25574 return cost;
25575
25576 insn_type = get_attr_type (insn);
25577 dep_insn_type = get_attr_type (dep_insn);
25578
25579 switch (ix86_tune)
25580 {
25581 case PROCESSOR_PENTIUM:
25582 /* Address Generation Interlock adds a cycle of latency. */
25583 if (insn_type == TYPE_LEA)
25584 {
25585 rtx addr = PATTERN (insn);
25586
25587 if (GET_CODE (addr) == PARALLEL)
25588 addr = XVECEXP (addr, 0, 0);
25589
25590 gcc_assert (GET_CODE (addr) == SET);
25591
25592 addr = SET_SRC (addr);
25593 if (modified_in_p (addr, dep_insn))
25594 cost += 1;
25595 }
25596 else if (ix86_agi_dependent (dep_insn, insn))
25597 cost += 1;
25598
25599 /* ??? Compares pair with jump/setcc. */
25600 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25601 cost = 0;
25602
25603 /* Floating point stores require value to be ready one cycle earlier. */
25604 if (insn_type == TYPE_FMOV
25605 && get_attr_memory (insn) == MEMORY_STORE
25606 && !ix86_agi_dependent (dep_insn, insn))
25607 cost += 1;
25608 break;
25609
25610 case PROCESSOR_PENTIUMPRO:
25611 /* INT->FP conversion is expensive. */
25612 if (get_attr_fp_int_src (dep_insn))
25613 cost += 5;
25614
25615 /* There is one cycle extra latency between an FP op and a store. */
25616 if (insn_type == TYPE_FMOV
25617 && (set = single_set (dep_insn)) != NULL_RTX
25618 && (set2 = single_set (insn)) != NULL_RTX
25619 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25620 && MEM_P (SET_DEST (set2)))
25621 cost += 1;
25622
25623 memory = get_attr_memory (insn);
25624
25625 /* Show ability of reorder buffer to hide latency of load by executing
25626 in parallel with previous instruction in case
25627 previous instruction is not needed to compute the address. */
25628 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25629 && !ix86_agi_dependent (dep_insn, insn))
25630 {
25631 /* Claim moves to take one cycle, as core can issue one load
25632 at time and the next load can start cycle later. */
25633 if (dep_insn_type == TYPE_IMOV
25634 || dep_insn_type == TYPE_FMOV)
25635 cost = 1;
25636 else if (cost > 1)
25637 cost--;
25638 }
25639 break;
25640
25641 case PROCESSOR_K6:
25642 /* The esp dependency is resolved before
25643 the instruction is really finished. */
25644 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25645 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25646 return 1;
25647
25648 /* INT->FP conversion is expensive. */
25649 if (get_attr_fp_int_src (dep_insn))
25650 cost += 5;
25651
25652 memory = get_attr_memory (insn);
25653
25654 /* Show ability of reorder buffer to hide latency of load by executing
25655 in parallel with previous instruction in case
25656 previous instruction is not needed to compute the address. */
25657 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25658 && !ix86_agi_dependent (dep_insn, insn))
25659 {
25660 /* Claim moves to take one cycle, as core can issue one load
25661 at time and the next load can start cycle later. */
25662 if (dep_insn_type == TYPE_IMOV
25663 || dep_insn_type == TYPE_FMOV)
25664 cost = 1;
25665 else if (cost > 2)
25666 cost -= 2;
25667 else
25668 cost = 1;
25669 }
25670 break;
25671
25672 case PROCESSOR_AMDFAM10:
25673 case PROCESSOR_BDVER1:
25674 case PROCESSOR_BDVER2:
25675 case PROCESSOR_BDVER3:
25676 case PROCESSOR_BDVER4:
25677 case PROCESSOR_BTVER1:
25678 case PROCESSOR_BTVER2:
25679 case PROCESSOR_GENERIC:
25680 /* Stack engine allows to execute push&pop instructions in parall. */
25681 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25682 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25683 return 0;
25684 /* FALLTHRU */
25685
25686 case PROCESSOR_ATHLON:
25687 case PROCESSOR_K8:
25688 memory = get_attr_memory (insn);
25689
25690 /* Show ability of reorder buffer to hide latency of load by executing
25691 in parallel with previous instruction in case
25692 previous instruction is not needed to compute the address. */
25693 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25694 && !ix86_agi_dependent (dep_insn, insn))
25695 {
25696 enum attr_unit unit = get_attr_unit (insn);
25697 int loadcost = 3;
25698
25699 /* Because of the difference between the length of integer and
25700 floating unit pipeline preparation stages, the memory operands
25701 for floating point are cheaper.
25702
25703 ??? For Athlon it the difference is most probably 2. */
25704 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25705 loadcost = 3;
25706 else
25707 loadcost = TARGET_ATHLON ? 2 : 0;
25708
25709 if (cost >= loadcost)
25710 cost -= loadcost;
25711 else
25712 cost = 0;
25713 }
25714 break;
25715
25716 case PROCESSOR_CORE2:
25717 case PROCESSOR_NEHALEM:
25718 case PROCESSOR_SANDYBRIDGE:
25719 case PROCESSOR_HASWELL:
25720 /* Stack engine allows to execute push&pop instructions in parall. */
25721 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25722 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25723 return 0;
25724
25725 memory = get_attr_memory (insn);
25726
25727 /* Show ability of reorder buffer to hide latency of load by executing
25728 in parallel with previous instruction in case
25729 previous instruction is not needed to compute the address. */
25730 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25731 && !ix86_agi_dependent (dep_insn, insn))
25732 {
25733 if (cost >= 4)
25734 cost -= 4;
25735 else
25736 cost = 0;
25737 }
25738 break;
25739
25740 case PROCESSOR_SILVERMONT:
25741 case PROCESSOR_INTEL:
25742 if (!reload_completed)
25743 return cost;
25744
25745 /* Increase cost of integer loads. */
25746 memory = get_attr_memory (dep_insn);
25747 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25748 {
25749 enum attr_unit unit = get_attr_unit (dep_insn);
25750 if (unit == UNIT_INTEGER && cost == 1)
25751 {
25752 if (memory == MEMORY_LOAD)
25753 cost = 3;
25754 else
25755 {
25756 /* Increase cost of ld/st for short int types only
25757 because of store forwarding issue. */
25758 rtx set = single_set (dep_insn);
25759 if (set && (GET_MODE (SET_DEST (set)) == QImode
25760 || GET_MODE (SET_DEST (set)) == HImode))
25761 {
25762 /* Increase cost of store/load insn if exact
25763 dependence exists and it is load insn. */
25764 enum attr_memory insn_memory = get_attr_memory (insn);
25765 if (insn_memory == MEMORY_LOAD
25766 && exact_store_load_dependency (dep_insn, insn))
25767 cost = 3;
25768 }
25769 }
25770 }
25771 }
25772
25773 default:
25774 break;
25775 }
25776
25777 return cost;
25778 }
25779
25780 /* How many alternative schedules to try. This should be as wide as the
25781 scheduling freedom in the DFA, but no wider. Making this value too
25782 large results extra work for the scheduler. */
25783
25784 static int
25785 ia32_multipass_dfa_lookahead (void)
25786 {
25787 switch (ix86_tune)
25788 {
25789 case PROCESSOR_PENTIUM:
25790 return 2;
25791
25792 case PROCESSOR_PENTIUMPRO:
25793 case PROCESSOR_K6:
25794 return 1;
25795
25796 case PROCESSOR_BDVER1:
25797 case PROCESSOR_BDVER2:
25798 case PROCESSOR_BDVER3:
25799 case PROCESSOR_BDVER4:
25800 /* We use lookahead value 4 for BD both before and after reload
25801 schedules. Plan is to have value 8 included for O3. */
25802 return 4;
25803
25804 case PROCESSOR_CORE2:
25805 case PROCESSOR_NEHALEM:
25806 case PROCESSOR_SANDYBRIDGE:
25807 case PROCESSOR_HASWELL:
25808 case PROCESSOR_BONNELL:
25809 case PROCESSOR_SILVERMONT:
25810 case PROCESSOR_INTEL:
25811 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25812 as many instructions can be executed on a cycle, i.e.,
25813 issue_rate. I wonder why tuning for many CPUs does not do this. */
25814 if (reload_completed)
25815 return ix86_issue_rate ();
25816 /* Don't use lookahead for pre-reload schedule to save compile time. */
25817 return 0;
25818
25819 default:
25820 return 0;
25821 }
25822 }
25823
25824 /* Return true if target platform supports macro-fusion. */
25825
25826 static bool
25827 ix86_macro_fusion_p ()
25828 {
25829 return TARGET_FUSE_CMP_AND_BRANCH;
25830 }
25831
25832 /* Check whether current microarchitecture support macro fusion
25833 for insn pair "CONDGEN + CONDJMP". Refer to
25834 "Intel Architectures Optimization Reference Manual". */
25835
25836 static bool
25837 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25838 {
25839 rtx src, dest;
25840 rtx single_set = single_set (condgen);
25841 enum rtx_code ccode;
25842 rtx compare_set = NULL_RTX, test_if, cond;
25843 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25844
25845 if (!any_condjump_p (condjmp))
25846 return false;
25847
25848 if (get_attr_type (condgen) != TYPE_TEST
25849 && get_attr_type (condgen) != TYPE_ICMP
25850 && get_attr_type (condgen) != TYPE_INCDEC
25851 && get_attr_type (condgen) != TYPE_ALU)
25852 return false;
25853
25854 if (single_set == NULL_RTX
25855 && !TARGET_FUSE_ALU_AND_BRANCH)
25856 return false;
25857
25858 if (single_set != NULL_RTX)
25859 compare_set = single_set;
25860 else
25861 {
25862 int i;
25863 rtx pat = PATTERN (condgen);
25864 for (i = 0; i < XVECLEN (pat, 0); i++)
25865 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25866 {
25867 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25868 if (GET_CODE (set_src) == COMPARE)
25869 compare_set = XVECEXP (pat, 0, i);
25870 else
25871 alu_set = XVECEXP (pat, 0, i);
25872 }
25873 }
25874 if (compare_set == NULL_RTX)
25875 return false;
25876 src = SET_SRC (compare_set);
25877 if (GET_CODE (src) != COMPARE)
25878 return false;
25879
25880 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25881 supported. */
25882 if ((MEM_P (XEXP (src, 0))
25883 && CONST_INT_P (XEXP (src, 1)))
25884 || (MEM_P (XEXP (src, 1))
25885 && CONST_INT_P (XEXP (src, 0))))
25886 return false;
25887
25888 /* No fusion for RIP-relative address. */
25889 if (MEM_P (XEXP (src, 0)))
25890 addr = XEXP (XEXP (src, 0), 0);
25891 else if (MEM_P (XEXP (src, 1)))
25892 addr = XEXP (XEXP (src, 1), 0);
25893
25894 if (addr) {
25895 ix86_address parts;
25896 int ok = ix86_decompose_address (addr, &parts);
25897 gcc_assert (ok);
25898
25899 if (rip_relative_addr_p (&parts))
25900 return false;
25901 }
25902
25903 test_if = SET_SRC (pc_set (condjmp));
25904 cond = XEXP (test_if, 0);
25905 ccode = GET_CODE (cond);
25906 /* Check whether conditional jump use Sign or Overflow Flags. */
25907 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25908 && (ccode == GE
25909 || ccode == GT
25910 || ccode == LE
25911 || ccode == LT))
25912 return false;
25913
25914 /* Return true for TYPE_TEST and TYPE_ICMP. */
25915 if (get_attr_type (condgen) == TYPE_TEST
25916 || get_attr_type (condgen) == TYPE_ICMP)
25917 return true;
25918
25919 /* The following is the case that macro-fusion for alu + jmp. */
25920 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25921 return false;
25922
25923 /* No fusion for alu op with memory destination operand. */
25924 dest = SET_DEST (alu_set);
25925 if (MEM_P (dest))
25926 return false;
25927
25928 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25929 supported. */
25930 if (get_attr_type (condgen) == TYPE_INCDEC
25931 && (ccode == GEU
25932 || ccode == GTU
25933 || ccode == LEU
25934 || ccode == LTU))
25935 return false;
25936
25937 return true;
25938 }
25939
25940 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25941 execution. It is applied if
25942 (1) IMUL instruction is on the top of list;
25943 (2) There exists the only producer of independent IMUL instruction in
25944 ready list.
25945 Return index of IMUL producer if it was found and -1 otherwise. */
25946 static int
25947 do_reorder_for_imul (rtx_insn **ready, int n_ready)
25948 {
25949 rtx_insn *insn;
25950 rtx set, insn1, insn2;
25951 sd_iterator_def sd_it;
25952 dep_t dep;
25953 int index = -1;
25954 int i;
25955
25956 if (!TARGET_BONNELL)
25957 return index;
25958
25959 /* Check that IMUL instruction is on the top of ready list. */
25960 insn = ready[n_ready - 1];
25961 set = single_set (insn);
25962 if (!set)
25963 return index;
25964 if (!(GET_CODE (SET_SRC (set)) == MULT
25965 && GET_MODE (SET_SRC (set)) == SImode))
25966 return index;
25967
25968 /* Search for producer of independent IMUL instruction. */
25969 for (i = n_ready - 2; i >= 0; i--)
25970 {
25971 insn = ready[i];
25972 if (!NONDEBUG_INSN_P (insn))
25973 continue;
25974 /* Skip IMUL instruction. */
25975 insn2 = PATTERN (insn);
25976 if (GET_CODE (insn2) == PARALLEL)
25977 insn2 = XVECEXP (insn2, 0, 0);
25978 if (GET_CODE (insn2) == SET
25979 && GET_CODE (SET_SRC (insn2)) == MULT
25980 && GET_MODE (SET_SRC (insn2)) == SImode)
25981 continue;
25982
25983 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25984 {
25985 rtx con;
25986 con = DEP_CON (dep);
25987 if (!NONDEBUG_INSN_P (con))
25988 continue;
25989 insn1 = PATTERN (con);
25990 if (GET_CODE (insn1) == PARALLEL)
25991 insn1 = XVECEXP (insn1, 0, 0);
25992
25993 if (GET_CODE (insn1) == SET
25994 && GET_CODE (SET_SRC (insn1)) == MULT
25995 && GET_MODE (SET_SRC (insn1)) == SImode)
25996 {
25997 sd_iterator_def sd_it1;
25998 dep_t dep1;
25999 /* Check if there is no other dependee for IMUL. */
26000 index = i;
26001 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26002 {
26003 rtx pro;
26004 pro = DEP_PRO (dep1);
26005 if (!NONDEBUG_INSN_P (pro))
26006 continue;
26007 if (pro != insn)
26008 index = -1;
26009 }
26010 if (index >= 0)
26011 break;
26012 }
26013 }
26014 if (index >= 0)
26015 break;
26016 }
26017 return index;
26018 }
26019
26020 /* Try to find the best candidate on the top of ready list if two insns
26021 have the same priority - candidate is best if its dependees were
26022 scheduled earlier. Applied for Silvermont only.
26023 Return true if top 2 insns must be interchanged. */
26024 static bool
26025 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26026 {
26027 rtx_insn *top = ready[n_ready - 1];
26028 rtx_insn *next = ready[n_ready - 2];
26029 rtx set;
26030 sd_iterator_def sd_it;
26031 dep_t dep;
26032 int clock1 = -1;
26033 int clock2 = -1;
26034 #define INSN_TICK(INSN) (HID (INSN)->tick)
26035
26036 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26037 return false;
26038
26039 if (!NONDEBUG_INSN_P (top))
26040 return false;
26041 if (!NONJUMP_INSN_P (top))
26042 return false;
26043 if (!NONDEBUG_INSN_P (next))
26044 return false;
26045 if (!NONJUMP_INSN_P (next))
26046 return false;
26047 set = single_set (top);
26048 if (!set)
26049 return false;
26050 set = single_set (next);
26051 if (!set)
26052 return false;
26053
26054 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26055 {
26056 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26057 return false;
26058 /* Determine winner more precise. */
26059 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26060 {
26061 rtx pro;
26062 pro = DEP_PRO (dep);
26063 if (!NONDEBUG_INSN_P (pro))
26064 continue;
26065 if (INSN_TICK (pro) > clock1)
26066 clock1 = INSN_TICK (pro);
26067 }
26068 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26069 {
26070 rtx pro;
26071 pro = DEP_PRO (dep);
26072 if (!NONDEBUG_INSN_P (pro))
26073 continue;
26074 if (INSN_TICK (pro) > clock2)
26075 clock2 = INSN_TICK (pro);
26076 }
26077
26078 if (clock1 == clock2)
26079 {
26080 /* Determine winner - load must win. */
26081 enum attr_memory memory1, memory2;
26082 memory1 = get_attr_memory (top);
26083 memory2 = get_attr_memory (next);
26084 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26085 return true;
26086 }
26087 return (bool) (clock2 < clock1);
26088 }
26089 return false;
26090 #undef INSN_TICK
26091 }
26092
26093 /* Perform possible reodering of ready list for Atom/Silvermont only.
26094 Return issue rate. */
26095 static int
26096 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26097 int *pn_ready, int clock_var)
26098 {
26099 int issue_rate = -1;
26100 int n_ready = *pn_ready;
26101 int i;
26102 rtx_insn *insn;
26103 int index = -1;
26104
26105 /* Set up issue rate. */
26106 issue_rate = ix86_issue_rate ();
26107
26108 /* Do reodering for BONNELL/SILVERMONT only. */
26109 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26110 return issue_rate;
26111
26112 /* Nothing to do if ready list contains only 1 instruction. */
26113 if (n_ready <= 1)
26114 return issue_rate;
26115
26116 /* Do reodering for post-reload scheduler only. */
26117 if (!reload_completed)
26118 return issue_rate;
26119
26120 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26121 {
26122 if (sched_verbose > 1)
26123 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26124 INSN_UID (ready[index]));
26125
26126 /* Put IMUL producer (ready[index]) at the top of ready list. */
26127 insn = ready[index];
26128 for (i = index; i < n_ready - 1; i++)
26129 ready[i] = ready[i + 1];
26130 ready[n_ready - 1] = insn;
26131 return issue_rate;
26132 }
26133 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26134 {
26135 if (sched_verbose > 1)
26136 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26137 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26138 /* Swap 2 top elements of ready list. */
26139 insn = ready[n_ready - 1];
26140 ready[n_ready - 1] = ready[n_ready - 2];
26141 ready[n_ready - 2] = insn;
26142 }
26143 return issue_rate;
26144 }
26145
26146 static bool
26147 ix86_class_likely_spilled_p (reg_class_t);
26148
26149 /* Returns true if lhs of insn is HW function argument register and set up
26150 is_spilled to true if it is likely spilled HW register. */
26151 static bool
26152 insn_is_function_arg (rtx insn, bool* is_spilled)
26153 {
26154 rtx dst;
26155
26156 if (!NONDEBUG_INSN_P (insn))
26157 return false;
26158 /* Call instructions are not movable, ignore it. */
26159 if (CALL_P (insn))
26160 return false;
26161 insn = PATTERN (insn);
26162 if (GET_CODE (insn) == PARALLEL)
26163 insn = XVECEXP (insn, 0, 0);
26164 if (GET_CODE (insn) != SET)
26165 return false;
26166 dst = SET_DEST (insn);
26167 if (REG_P (dst) && HARD_REGISTER_P (dst)
26168 && ix86_function_arg_regno_p (REGNO (dst)))
26169 {
26170 /* Is it likely spilled HW register? */
26171 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26172 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26173 *is_spilled = true;
26174 return true;
26175 }
26176 return false;
26177 }
26178
26179 /* Add output dependencies for chain of function adjacent arguments if only
26180 there is a move to likely spilled HW register. Return first argument
26181 if at least one dependence was added or NULL otherwise. */
26182 static rtx_insn *
26183 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26184 {
26185 rtx_insn *insn;
26186 rtx_insn *last = call;
26187 rtx_insn *first_arg = NULL;
26188 bool is_spilled = false;
26189
26190 head = PREV_INSN (head);
26191
26192 /* Find nearest to call argument passing instruction. */
26193 while (true)
26194 {
26195 last = PREV_INSN (last);
26196 if (last == head)
26197 return NULL;
26198 if (!NONDEBUG_INSN_P (last))
26199 continue;
26200 if (insn_is_function_arg (last, &is_spilled))
26201 break;
26202 return NULL;
26203 }
26204
26205 first_arg = last;
26206 while (true)
26207 {
26208 insn = PREV_INSN (last);
26209 if (!INSN_P (insn))
26210 break;
26211 if (insn == head)
26212 break;
26213 if (!NONDEBUG_INSN_P (insn))
26214 {
26215 last = insn;
26216 continue;
26217 }
26218 if (insn_is_function_arg (insn, &is_spilled))
26219 {
26220 /* Add output depdendence between two function arguments if chain
26221 of output arguments contains likely spilled HW registers. */
26222 if (is_spilled)
26223 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26224 first_arg = last = insn;
26225 }
26226 else
26227 break;
26228 }
26229 if (!is_spilled)
26230 return NULL;
26231 return first_arg;
26232 }
26233
26234 /* Add output or anti dependency from insn to first_arg to restrict its code
26235 motion. */
26236 static void
26237 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26238 {
26239 rtx set;
26240 rtx tmp;
26241
26242 set = single_set (insn);
26243 if (!set)
26244 return;
26245 tmp = SET_DEST (set);
26246 if (REG_P (tmp))
26247 {
26248 /* Add output dependency to the first function argument. */
26249 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26250 return;
26251 }
26252 /* Add anti dependency. */
26253 add_dependence (first_arg, insn, REG_DEP_ANTI);
26254 }
26255
26256 /* Avoid cross block motion of function argument through adding dependency
26257 from the first non-jump instruction in bb. */
26258 static void
26259 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26260 {
26261 rtx_insn *insn = BB_END (bb);
26262
26263 while (insn)
26264 {
26265 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26266 {
26267 rtx set = single_set (insn);
26268 if (set)
26269 {
26270 avoid_func_arg_motion (arg, insn);
26271 return;
26272 }
26273 }
26274 if (insn == BB_HEAD (bb))
26275 return;
26276 insn = PREV_INSN (insn);
26277 }
26278 }
26279
26280 /* Hook for pre-reload schedule - avoid motion of function arguments
26281 passed in likely spilled HW registers. */
26282 static void
26283 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26284 {
26285 rtx_insn *insn;
26286 rtx_insn *first_arg = NULL;
26287 if (reload_completed)
26288 return;
26289 while (head != tail && DEBUG_INSN_P (head))
26290 head = NEXT_INSN (head);
26291 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26292 if (INSN_P (insn) && CALL_P (insn))
26293 {
26294 first_arg = add_parameter_dependencies (insn, head);
26295 if (first_arg)
26296 {
26297 /* Add dependee for first argument to predecessors if only
26298 region contains more than one block. */
26299 basic_block bb = BLOCK_FOR_INSN (insn);
26300 int rgn = CONTAINING_RGN (bb->index);
26301 int nr_blks = RGN_NR_BLOCKS (rgn);
26302 /* Skip trivial regions and region head blocks that can have
26303 predecessors outside of region. */
26304 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26305 {
26306 edge e;
26307 edge_iterator ei;
26308
26309 /* Regions are SCCs with the exception of selective
26310 scheduling with pipelining of outer blocks enabled.
26311 So also check that immediate predecessors of a non-head
26312 block are in the same region. */
26313 FOR_EACH_EDGE (e, ei, bb->preds)
26314 {
26315 /* Avoid creating of loop-carried dependencies through
26316 using topological ordering in the region. */
26317 if (rgn == CONTAINING_RGN (e->src->index)
26318 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26319 add_dependee_for_func_arg (first_arg, e->src);
26320 }
26321 }
26322 insn = first_arg;
26323 if (insn == head)
26324 break;
26325 }
26326 }
26327 else if (first_arg)
26328 avoid_func_arg_motion (first_arg, insn);
26329 }
26330
26331 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26332 HW registers to maximum, to schedule them at soon as possible. These are
26333 moves from function argument registers at the top of the function entry
26334 and moves from function return value registers after call. */
26335 static int
26336 ix86_adjust_priority (rtx insn, int priority)
26337 {
26338 rtx set;
26339
26340 if (reload_completed)
26341 return priority;
26342
26343 if (!NONDEBUG_INSN_P (insn))
26344 return priority;
26345
26346 set = single_set (insn);
26347 if (set)
26348 {
26349 rtx tmp = SET_SRC (set);
26350 if (REG_P (tmp)
26351 && HARD_REGISTER_P (tmp)
26352 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26353 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26354 return current_sched_info->sched_max_insns_priority;
26355 }
26356
26357 return priority;
26358 }
26359
26360 /* Model decoder of Core 2/i7.
26361 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26362 track the instruction fetch block boundaries and make sure that long
26363 (9+ bytes) instructions are assigned to D0. */
26364
26365 /* Maximum length of an insn that can be handled by
26366 a secondary decoder unit. '8' for Core 2/i7. */
26367 static int core2i7_secondary_decoder_max_insn_size;
26368
26369 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26370 '16' for Core 2/i7. */
26371 static int core2i7_ifetch_block_size;
26372
26373 /* Maximum number of instructions decoder can handle per cycle.
26374 '6' for Core 2/i7. */
26375 static int core2i7_ifetch_block_max_insns;
26376
26377 typedef struct ix86_first_cycle_multipass_data_ *
26378 ix86_first_cycle_multipass_data_t;
26379 typedef const struct ix86_first_cycle_multipass_data_ *
26380 const_ix86_first_cycle_multipass_data_t;
26381
26382 /* A variable to store target state across calls to max_issue within
26383 one cycle. */
26384 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26385 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26386
26387 /* Initialize DATA. */
26388 static void
26389 core2i7_first_cycle_multipass_init (void *_data)
26390 {
26391 ix86_first_cycle_multipass_data_t data
26392 = (ix86_first_cycle_multipass_data_t) _data;
26393
26394 data->ifetch_block_len = 0;
26395 data->ifetch_block_n_insns = 0;
26396 data->ready_try_change = NULL;
26397 data->ready_try_change_size = 0;
26398 }
26399
26400 /* Advancing the cycle; reset ifetch block counts. */
26401 static void
26402 core2i7_dfa_post_advance_cycle (void)
26403 {
26404 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26405
26406 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26407
26408 data->ifetch_block_len = 0;
26409 data->ifetch_block_n_insns = 0;
26410 }
26411
26412 static int min_insn_size (rtx);
26413
26414 /* Filter out insns from ready_try that the core will not be able to issue
26415 on current cycle due to decoder. */
26416 static void
26417 core2i7_first_cycle_multipass_filter_ready_try
26418 (const_ix86_first_cycle_multipass_data_t data,
26419 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26420 {
26421 while (n_ready--)
26422 {
26423 rtx insn;
26424 int insn_size;
26425
26426 if (ready_try[n_ready])
26427 continue;
26428
26429 insn = get_ready_element (n_ready);
26430 insn_size = min_insn_size (insn);
26431
26432 if (/* If this is a too long an insn for a secondary decoder ... */
26433 (!first_cycle_insn_p
26434 && insn_size > core2i7_secondary_decoder_max_insn_size)
26435 /* ... or it would not fit into the ifetch block ... */
26436 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26437 /* ... or the decoder is full already ... */
26438 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26439 /* ... mask the insn out. */
26440 {
26441 ready_try[n_ready] = 1;
26442
26443 if (data->ready_try_change)
26444 bitmap_set_bit (data->ready_try_change, n_ready);
26445 }
26446 }
26447 }
26448
26449 /* Prepare for a new round of multipass lookahead scheduling. */
26450 static void
26451 core2i7_first_cycle_multipass_begin (void *_data,
26452 signed char *ready_try, int n_ready,
26453 bool first_cycle_insn_p)
26454 {
26455 ix86_first_cycle_multipass_data_t data
26456 = (ix86_first_cycle_multipass_data_t) _data;
26457 const_ix86_first_cycle_multipass_data_t prev_data
26458 = ix86_first_cycle_multipass_data;
26459
26460 /* Restore the state from the end of the previous round. */
26461 data->ifetch_block_len = prev_data->ifetch_block_len;
26462 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26463
26464 /* Filter instructions that cannot be issued on current cycle due to
26465 decoder restrictions. */
26466 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26467 first_cycle_insn_p);
26468 }
26469
26470 /* INSN is being issued in current solution. Account for its impact on
26471 the decoder model. */
26472 static void
26473 core2i7_first_cycle_multipass_issue (void *_data,
26474 signed char *ready_try, int n_ready,
26475 rtx insn, const void *_prev_data)
26476 {
26477 ix86_first_cycle_multipass_data_t data
26478 = (ix86_first_cycle_multipass_data_t) _data;
26479 const_ix86_first_cycle_multipass_data_t prev_data
26480 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26481
26482 int insn_size = min_insn_size (insn);
26483
26484 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26485 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26486 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26487 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26488
26489 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26490 if (!data->ready_try_change)
26491 {
26492 data->ready_try_change = sbitmap_alloc (n_ready);
26493 data->ready_try_change_size = n_ready;
26494 }
26495 else if (data->ready_try_change_size < n_ready)
26496 {
26497 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26498 n_ready, 0);
26499 data->ready_try_change_size = n_ready;
26500 }
26501 bitmap_clear (data->ready_try_change);
26502
26503 /* Filter out insns from ready_try that the core will not be able to issue
26504 on current cycle due to decoder. */
26505 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26506 false);
26507 }
26508
26509 /* Revert the effect on ready_try. */
26510 static void
26511 core2i7_first_cycle_multipass_backtrack (const void *_data,
26512 signed char *ready_try,
26513 int n_ready ATTRIBUTE_UNUSED)
26514 {
26515 const_ix86_first_cycle_multipass_data_t data
26516 = (const_ix86_first_cycle_multipass_data_t) _data;
26517 unsigned int i = 0;
26518 sbitmap_iterator sbi;
26519
26520 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26521 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26522 {
26523 ready_try[i] = 0;
26524 }
26525 }
26526
26527 /* Save the result of multipass lookahead scheduling for the next round. */
26528 static void
26529 core2i7_first_cycle_multipass_end (const void *_data)
26530 {
26531 const_ix86_first_cycle_multipass_data_t data
26532 = (const_ix86_first_cycle_multipass_data_t) _data;
26533 ix86_first_cycle_multipass_data_t next_data
26534 = ix86_first_cycle_multipass_data;
26535
26536 if (data != NULL)
26537 {
26538 next_data->ifetch_block_len = data->ifetch_block_len;
26539 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26540 }
26541 }
26542
26543 /* Deallocate target data. */
26544 static void
26545 core2i7_first_cycle_multipass_fini (void *_data)
26546 {
26547 ix86_first_cycle_multipass_data_t data
26548 = (ix86_first_cycle_multipass_data_t) _data;
26549
26550 if (data->ready_try_change)
26551 {
26552 sbitmap_free (data->ready_try_change);
26553 data->ready_try_change = NULL;
26554 data->ready_try_change_size = 0;
26555 }
26556 }
26557
26558 /* Prepare for scheduling pass. */
26559 static void
26560 ix86_sched_init_global (FILE *, int, int)
26561 {
26562 /* Install scheduling hooks for current CPU. Some of these hooks are used
26563 in time-critical parts of the scheduler, so we only set them up when
26564 they are actually used. */
26565 switch (ix86_tune)
26566 {
26567 case PROCESSOR_CORE2:
26568 case PROCESSOR_NEHALEM:
26569 case PROCESSOR_SANDYBRIDGE:
26570 case PROCESSOR_HASWELL:
26571 /* Do not perform multipass scheduling for pre-reload schedule
26572 to save compile time. */
26573 if (reload_completed)
26574 {
26575 targetm.sched.dfa_post_advance_cycle
26576 = core2i7_dfa_post_advance_cycle;
26577 targetm.sched.first_cycle_multipass_init
26578 = core2i7_first_cycle_multipass_init;
26579 targetm.sched.first_cycle_multipass_begin
26580 = core2i7_first_cycle_multipass_begin;
26581 targetm.sched.first_cycle_multipass_issue
26582 = core2i7_first_cycle_multipass_issue;
26583 targetm.sched.first_cycle_multipass_backtrack
26584 = core2i7_first_cycle_multipass_backtrack;
26585 targetm.sched.first_cycle_multipass_end
26586 = core2i7_first_cycle_multipass_end;
26587 targetm.sched.first_cycle_multipass_fini
26588 = core2i7_first_cycle_multipass_fini;
26589
26590 /* Set decoder parameters. */
26591 core2i7_secondary_decoder_max_insn_size = 8;
26592 core2i7_ifetch_block_size = 16;
26593 core2i7_ifetch_block_max_insns = 6;
26594 break;
26595 }
26596 /* ... Fall through ... */
26597 default:
26598 targetm.sched.dfa_post_advance_cycle = NULL;
26599 targetm.sched.first_cycle_multipass_init = NULL;
26600 targetm.sched.first_cycle_multipass_begin = NULL;
26601 targetm.sched.first_cycle_multipass_issue = NULL;
26602 targetm.sched.first_cycle_multipass_backtrack = NULL;
26603 targetm.sched.first_cycle_multipass_end = NULL;
26604 targetm.sched.first_cycle_multipass_fini = NULL;
26605 break;
26606 }
26607 }
26608
26609 \f
26610 /* Compute the alignment given to a constant that is being placed in memory.
26611 EXP is the constant and ALIGN is the alignment that the object would
26612 ordinarily have.
26613 The value of this function is used instead of that alignment to align
26614 the object. */
26615
26616 int
26617 ix86_constant_alignment (tree exp, int align)
26618 {
26619 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26620 || TREE_CODE (exp) == INTEGER_CST)
26621 {
26622 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26623 return 64;
26624 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26625 return 128;
26626 }
26627 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26628 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26629 return BITS_PER_WORD;
26630
26631 return align;
26632 }
26633
26634 /* Compute the alignment for a static variable.
26635 TYPE is the data type, and ALIGN is the alignment that
26636 the object would ordinarily have. The value of this function is used
26637 instead of that alignment to align the object. */
26638
26639 int
26640 ix86_data_alignment (tree type, int align, bool opt)
26641 {
26642 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26643 for symbols from other compilation units or symbols that don't need
26644 to bind locally. In order to preserve some ABI compatibility with
26645 those compilers, ensure we don't decrease alignment from what we
26646 used to assume. */
26647
26648 int max_align_compat
26649 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26650
26651 /* A data structure, equal or greater than the size of a cache line
26652 (64 bytes in the Pentium 4 and other recent Intel processors, including
26653 processors based on Intel Core microarchitecture) should be aligned
26654 so that its base address is a multiple of a cache line size. */
26655
26656 int max_align
26657 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26658
26659 if (max_align < BITS_PER_WORD)
26660 max_align = BITS_PER_WORD;
26661
26662 if (opt
26663 && AGGREGATE_TYPE_P (type)
26664 && TYPE_SIZE (type)
26665 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26666 {
26667 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26668 && align < max_align_compat)
26669 align = max_align_compat;
26670 if (wi::geu_p (TYPE_SIZE (type), max_align)
26671 && align < max_align)
26672 align = max_align;
26673 }
26674
26675 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26676 to 16byte boundary. */
26677 if (TARGET_64BIT)
26678 {
26679 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26680 && TYPE_SIZE (type)
26681 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26682 && wi::geu_p (TYPE_SIZE (type), 128)
26683 && align < 128)
26684 return 128;
26685 }
26686
26687 if (!opt)
26688 return align;
26689
26690 if (TREE_CODE (type) == ARRAY_TYPE)
26691 {
26692 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26693 return 64;
26694 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26695 return 128;
26696 }
26697 else if (TREE_CODE (type) == COMPLEX_TYPE)
26698 {
26699
26700 if (TYPE_MODE (type) == DCmode && align < 64)
26701 return 64;
26702 if ((TYPE_MODE (type) == XCmode
26703 || TYPE_MODE (type) == TCmode) && align < 128)
26704 return 128;
26705 }
26706 else if ((TREE_CODE (type) == RECORD_TYPE
26707 || TREE_CODE (type) == UNION_TYPE
26708 || TREE_CODE (type) == QUAL_UNION_TYPE)
26709 && TYPE_FIELDS (type))
26710 {
26711 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26712 return 64;
26713 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26714 return 128;
26715 }
26716 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26717 || TREE_CODE (type) == INTEGER_TYPE)
26718 {
26719 if (TYPE_MODE (type) == DFmode && align < 64)
26720 return 64;
26721 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26722 return 128;
26723 }
26724
26725 return align;
26726 }
26727
26728 /* Compute the alignment for a local variable or a stack slot. EXP is
26729 the data type or decl itself, MODE is the widest mode available and
26730 ALIGN is the alignment that the object would ordinarily have. The
26731 value of this macro is used instead of that alignment to align the
26732 object. */
26733
26734 unsigned int
26735 ix86_local_alignment (tree exp, enum machine_mode mode,
26736 unsigned int align)
26737 {
26738 tree type, decl;
26739
26740 if (exp && DECL_P (exp))
26741 {
26742 type = TREE_TYPE (exp);
26743 decl = exp;
26744 }
26745 else
26746 {
26747 type = exp;
26748 decl = NULL;
26749 }
26750
26751 /* Don't do dynamic stack realignment for long long objects with
26752 -mpreferred-stack-boundary=2. */
26753 if (!TARGET_64BIT
26754 && align == 64
26755 && ix86_preferred_stack_boundary < 64
26756 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26757 && (!type || !TYPE_USER_ALIGN (type))
26758 && (!decl || !DECL_USER_ALIGN (decl)))
26759 align = 32;
26760
26761 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26762 register in MODE. We will return the largest alignment of XF
26763 and DF. */
26764 if (!type)
26765 {
26766 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26767 align = GET_MODE_ALIGNMENT (DFmode);
26768 return align;
26769 }
26770
26771 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26772 to 16byte boundary. Exact wording is:
26773
26774 An array uses the same alignment as its elements, except that a local or
26775 global array variable of length at least 16 bytes or
26776 a C99 variable-length array variable always has alignment of at least 16 bytes.
26777
26778 This was added to allow use of aligned SSE instructions at arrays. This
26779 rule is meant for static storage (where compiler can not do the analysis
26780 by itself). We follow it for automatic variables only when convenient.
26781 We fully control everything in the function compiled and functions from
26782 other unit can not rely on the alignment.
26783
26784 Exclude va_list type. It is the common case of local array where
26785 we can not benefit from the alignment.
26786
26787 TODO: Probably one should optimize for size only when var is not escaping. */
26788 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26789 && TARGET_SSE)
26790 {
26791 if (AGGREGATE_TYPE_P (type)
26792 && (va_list_type_node == NULL_TREE
26793 || (TYPE_MAIN_VARIANT (type)
26794 != TYPE_MAIN_VARIANT (va_list_type_node)))
26795 && TYPE_SIZE (type)
26796 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26797 && wi::geu_p (TYPE_SIZE (type), 16)
26798 && align < 128)
26799 return 128;
26800 }
26801 if (TREE_CODE (type) == ARRAY_TYPE)
26802 {
26803 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26804 return 64;
26805 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26806 return 128;
26807 }
26808 else if (TREE_CODE (type) == COMPLEX_TYPE)
26809 {
26810 if (TYPE_MODE (type) == DCmode && align < 64)
26811 return 64;
26812 if ((TYPE_MODE (type) == XCmode
26813 || TYPE_MODE (type) == TCmode) && align < 128)
26814 return 128;
26815 }
26816 else if ((TREE_CODE (type) == RECORD_TYPE
26817 || TREE_CODE (type) == UNION_TYPE
26818 || TREE_CODE (type) == QUAL_UNION_TYPE)
26819 && TYPE_FIELDS (type))
26820 {
26821 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26822 return 64;
26823 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26824 return 128;
26825 }
26826 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26827 || TREE_CODE (type) == INTEGER_TYPE)
26828 {
26829
26830 if (TYPE_MODE (type) == DFmode && align < 64)
26831 return 64;
26832 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26833 return 128;
26834 }
26835 return align;
26836 }
26837
26838 /* Compute the minimum required alignment for dynamic stack realignment
26839 purposes for a local variable, parameter or a stack slot. EXP is
26840 the data type or decl itself, MODE is its mode and ALIGN is the
26841 alignment that the object would ordinarily have. */
26842
26843 unsigned int
26844 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26845 unsigned int align)
26846 {
26847 tree type, decl;
26848
26849 if (exp && DECL_P (exp))
26850 {
26851 type = TREE_TYPE (exp);
26852 decl = exp;
26853 }
26854 else
26855 {
26856 type = exp;
26857 decl = NULL;
26858 }
26859
26860 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26861 return align;
26862
26863 /* Don't do dynamic stack realignment for long long objects with
26864 -mpreferred-stack-boundary=2. */
26865 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26866 && (!type || !TYPE_USER_ALIGN (type))
26867 && (!decl || !DECL_USER_ALIGN (decl)))
26868 return 32;
26869
26870 return align;
26871 }
26872 \f
26873 /* Find a location for the static chain incoming to a nested function.
26874 This is a register, unless all free registers are used by arguments. */
26875
26876 static rtx
26877 ix86_static_chain (const_tree fndecl, bool incoming_p)
26878 {
26879 unsigned regno;
26880
26881 if (!DECL_STATIC_CHAIN (fndecl))
26882 return NULL;
26883
26884 if (TARGET_64BIT)
26885 {
26886 /* We always use R10 in 64-bit mode. */
26887 regno = R10_REG;
26888 }
26889 else
26890 {
26891 tree fntype;
26892 unsigned int ccvt;
26893
26894 /* By default in 32-bit mode we use ECX to pass the static chain. */
26895 regno = CX_REG;
26896
26897 fntype = TREE_TYPE (fndecl);
26898 ccvt = ix86_get_callcvt (fntype);
26899 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26900 {
26901 /* Fastcall functions use ecx/edx for arguments, which leaves
26902 us with EAX for the static chain.
26903 Thiscall functions use ecx for arguments, which also
26904 leaves us with EAX for the static chain. */
26905 regno = AX_REG;
26906 }
26907 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26908 {
26909 /* Thiscall functions use ecx for arguments, which leaves
26910 us with EAX and EDX for the static chain.
26911 We are using for abi-compatibility EAX. */
26912 regno = AX_REG;
26913 }
26914 else if (ix86_function_regparm (fntype, fndecl) == 3)
26915 {
26916 /* For regparm 3, we have no free call-clobbered registers in
26917 which to store the static chain. In order to implement this,
26918 we have the trampoline push the static chain to the stack.
26919 However, we can't push a value below the return address when
26920 we call the nested function directly, so we have to use an
26921 alternate entry point. For this we use ESI, and have the
26922 alternate entry point push ESI, so that things appear the
26923 same once we're executing the nested function. */
26924 if (incoming_p)
26925 {
26926 if (fndecl == current_function_decl)
26927 ix86_static_chain_on_stack = true;
26928 return gen_frame_mem (SImode,
26929 plus_constant (Pmode,
26930 arg_pointer_rtx, -8));
26931 }
26932 regno = SI_REG;
26933 }
26934 }
26935
26936 return gen_rtx_REG (Pmode, regno);
26937 }
26938
26939 /* Emit RTL insns to initialize the variable parts of a trampoline.
26940 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26941 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26942 to be passed to the target function. */
26943
26944 static void
26945 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26946 {
26947 rtx mem, fnaddr;
26948 int opcode;
26949 int offset = 0;
26950
26951 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26952
26953 if (TARGET_64BIT)
26954 {
26955 int size;
26956
26957 /* Load the function address to r11. Try to load address using
26958 the shorter movl instead of movabs. We may want to support
26959 movq for kernel mode, but kernel does not use trampolines at
26960 the moment. FNADDR is a 32bit address and may not be in
26961 DImode when ptr_mode == SImode. Always use movl in this
26962 case. */
26963 if (ptr_mode == SImode
26964 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26965 {
26966 fnaddr = copy_addr_to_reg (fnaddr);
26967
26968 mem = adjust_address (m_tramp, HImode, offset);
26969 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26970
26971 mem = adjust_address (m_tramp, SImode, offset + 2);
26972 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26973 offset += 6;
26974 }
26975 else
26976 {
26977 mem = adjust_address (m_tramp, HImode, offset);
26978 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26979
26980 mem = adjust_address (m_tramp, DImode, offset + 2);
26981 emit_move_insn (mem, fnaddr);
26982 offset += 10;
26983 }
26984
26985 /* Load static chain using movabs to r10. Use the shorter movl
26986 instead of movabs when ptr_mode == SImode. */
26987 if (ptr_mode == SImode)
26988 {
26989 opcode = 0xba41;
26990 size = 6;
26991 }
26992 else
26993 {
26994 opcode = 0xba49;
26995 size = 10;
26996 }
26997
26998 mem = adjust_address (m_tramp, HImode, offset);
26999 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27000
27001 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27002 emit_move_insn (mem, chain_value);
27003 offset += size;
27004
27005 /* Jump to r11; the last (unused) byte is a nop, only there to
27006 pad the write out to a single 32-bit store. */
27007 mem = adjust_address (m_tramp, SImode, offset);
27008 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27009 offset += 4;
27010 }
27011 else
27012 {
27013 rtx disp, chain;
27014
27015 /* Depending on the static chain location, either load a register
27016 with a constant, or push the constant to the stack. All of the
27017 instructions are the same size. */
27018 chain = ix86_static_chain (fndecl, true);
27019 if (REG_P (chain))
27020 {
27021 switch (REGNO (chain))
27022 {
27023 case AX_REG:
27024 opcode = 0xb8; break;
27025 case CX_REG:
27026 opcode = 0xb9; break;
27027 default:
27028 gcc_unreachable ();
27029 }
27030 }
27031 else
27032 opcode = 0x68;
27033
27034 mem = adjust_address (m_tramp, QImode, offset);
27035 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27036
27037 mem = adjust_address (m_tramp, SImode, offset + 1);
27038 emit_move_insn (mem, chain_value);
27039 offset += 5;
27040
27041 mem = adjust_address (m_tramp, QImode, offset);
27042 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27043
27044 mem = adjust_address (m_tramp, SImode, offset + 1);
27045
27046 /* Compute offset from the end of the jmp to the target function.
27047 In the case in which the trampoline stores the static chain on
27048 the stack, we need to skip the first insn which pushes the
27049 (call-saved) register static chain; this push is 1 byte. */
27050 offset += 5;
27051 disp = expand_binop (SImode, sub_optab, fnaddr,
27052 plus_constant (Pmode, XEXP (m_tramp, 0),
27053 offset - (MEM_P (chain) ? 1 : 0)),
27054 NULL_RTX, 1, OPTAB_DIRECT);
27055 emit_move_insn (mem, disp);
27056 }
27057
27058 gcc_assert (offset <= TRAMPOLINE_SIZE);
27059
27060 #ifdef HAVE_ENABLE_EXECUTE_STACK
27061 #ifdef CHECK_EXECUTE_STACK_ENABLED
27062 if (CHECK_EXECUTE_STACK_ENABLED)
27063 #endif
27064 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27065 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27066 #endif
27067 }
27068 \f
27069 /* The following file contains several enumerations and data structures
27070 built from the definitions in i386-builtin-types.def. */
27071
27072 #include "i386-builtin-types.inc"
27073
27074 /* Table for the ix86 builtin non-function types. */
27075 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27076
27077 /* Retrieve an element from the above table, building some of
27078 the types lazily. */
27079
27080 static tree
27081 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27082 {
27083 unsigned int index;
27084 tree type, itype;
27085
27086 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27087
27088 type = ix86_builtin_type_tab[(int) tcode];
27089 if (type != NULL)
27090 return type;
27091
27092 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27093 if (tcode <= IX86_BT_LAST_VECT)
27094 {
27095 enum machine_mode mode;
27096
27097 index = tcode - IX86_BT_LAST_PRIM - 1;
27098 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27099 mode = ix86_builtin_type_vect_mode[index];
27100
27101 type = build_vector_type_for_mode (itype, mode);
27102 }
27103 else
27104 {
27105 int quals;
27106
27107 index = tcode - IX86_BT_LAST_VECT - 1;
27108 if (tcode <= IX86_BT_LAST_PTR)
27109 quals = TYPE_UNQUALIFIED;
27110 else
27111 quals = TYPE_QUAL_CONST;
27112
27113 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27114 if (quals != TYPE_UNQUALIFIED)
27115 itype = build_qualified_type (itype, quals);
27116
27117 type = build_pointer_type (itype);
27118 }
27119
27120 ix86_builtin_type_tab[(int) tcode] = type;
27121 return type;
27122 }
27123
27124 /* Table for the ix86 builtin function types. */
27125 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27126
27127 /* Retrieve an element from the above table, building some of
27128 the types lazily. */
27129
27130 static tree
27131 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27132 {
27133 tree type;
27134
27135 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27136
27137 type = ix86_builtin_func_type_tab[(int) tcode];
27138 if (type != NULL)
27139 return type;
27140
27141 if (tcode <= IX86_BT_LAST_FUNC)
27142 {
27143 unsigned start = ix86_builtin_func_start[(int) tcode];
27144 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27145 tree rtype, atype, args = void_list_node;
27146 unsigned i;
27147
27148 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27149 for (i = after - 1; i > start; --i)
27150 {
27151 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27152 args = tree_cons (NULL, atype, args);
27153 }
27154
27155 type = build_function_type (rtype, args);
27156 }
27157 else
27158 {
27159 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27160 enum ix86_builtin_func_type icode;
27161
27162 icode = ix86_builtin_func_alias_base[index];
27163 type = ix86_get_builtin_func_type (icode);
27164 }
27165
27166 ix86_builtin_func_type_tab[(int) tcode] = type;
27167 return type;
27168 }
27169
27170
27171 /* Codes for all the SSE/MMX builtins. */
27172 enum ix86_builtins
27173 {
27174 IX86_BUILTIN_ADDPS,
27175 IX86_BUILTIN_ADDSS,
27176 IX86_BUILTIN_DIVPS,
27177 IX86_BUILTIN_DIVSS,
27178 IX86_BUILTIN_MULPS,
27179 IX86_BUILTIN_MULSS,
27180 IX86_BUILTIN_SUBPS,
27181 IX86_BUILTIN_SUBSS,
27182
27183 IX86_BUILTIN_CMPEQPS,
27184 IX86_BUILTIN_CMPLTPS,
27185 IX86_BUILTIN_CMPLEPS,
27186 IX86_BUILTIN_CMPGTPS,
27187 IX86_BUILTIN_CMPGEPS,
27188 IX86_BUILTIN_CMPNEQPS,
27189 IX86_BUILTIN_CMPNLTPS,
27190 IX86_BUILTIN_CMPNLEPS,
27191 IX86_BUILTIN_CMPNGTPS,
27192 IX86_BUILTIN_CMPNGEPS,
27193 IX86_BUILTIN_CMPORDPS,
27194 IX86_BUILTIN_CMPUNORDPS,
27195 IX86_BUILTIN_CMPEQSS,
27196 IX86_BUILTIN_CMPLTSS,
27197 IX86_BUILTIN_CMPLESS,
27198 IX86_BUILTIN_CMPNEQSS,
27199 IX86_BUILTIN_CMPNLTSS,
27200 IX86_BUILTIN_CMPNLESS,
27201 IX86_BUILTIN_CMPORDSS,
27202 IX86_BUILTIN_CMPUNORDSS,
27203
27204 IX86_BUILTIN_COMIEQSS,
27205 IX86_BUILTIN_COMILTSS,
27206 IX86_BUILTIN_COMILESS,
27207 IX86_BUILTIN_COMIGTSS,
27208 IX86_BUILTIN_COMIGESS,
27209 IX86_BUILTIN_COMINEQSS,
27210 IX86_BUILTIN_UCOMIEQSS,
27211 IX86_BUILTIN_UCOMILTSS,
27212 IX86_BUILTIN_UCOMILESS,
27213 IX86_BUILTIN_UCOMIGTSS,
27214 IX86_BUILTIN_UCOMIGESS,
27215 IX86_BUILTIN_UCOMINEQSS,
27216
27217 IX86_BUILTIN_CVTPI2PS,
27218 IX86_BUILTIN_CVTPS2PI,
27219 IX86_BUILTIN_CVTSI2SS,
27220 IX86_BUILTIN_CVTSI642SS,
27221 IX86_BUILTIN_CVTSS2SI,
27222 IX86_BUILTIN_CVTSS2SI64,
27223 IX86_BUILTIN_CVTTPS2PI,
27224 IX86_BUILTIN_CVTTSS2SI,
27225 IX86_BUILTIN_CVTTSS2SI64,
27226
27227 IX86_BUILTIN_MAXPS,
27228 IX86_BUILTIN_MAXSS,
27229 IX86_BUILTIN_MINPS,
27230 IX86_BUILTIN_MINSS,
27231
27232 IX86_BUILTIN_LOADUPS,
27233 IX86_BUILTIN_STOREUPS,
27234 IX86_BUILTIN_MOVSS,
27235
27236 IX86_BUILTIN_MOVHLPS,
27237 IX86_BUILTIN_MOVLHPS,
27238 IX86_BUILTIN_LOADHPS,
27239 IX86_BUILTIN_LOADLPS,
27240 IX86_BUILTIN_STOREHPS,
27241 IX86_BUILTIN_STORELPS,
27242
27243 IX86_BUILTIN_MASKMOVQ,
27244 IX86_BUILTIN_MOVMSKPS,
27245 IX86_BUILTIN_PMOVMSKB,
27246
27247 IX86_BUILTIN_MOVNTPS,
27248 IX86_BUILTIN_MOVNTQ,
27249
27250 IX86_BUILTIN_LOADDQU,
27251 IX86_BUILTIN_STOREDQU,
27252
27253 IX86_BUILTIN_PACKSSWB,
27254 IX86_BUILTIN_PACKSSDW,
27255 IX86_BUILTIN_PACKUSWB,
27256
27257 IX86_BUILTIN_PADDB,
27258 IX86_BUILTIN_PADDW,
27259 IX86_BUILTIN_PADDD,
27260 IX86_BUILTIN_PADDQ,
27261 IX86_BUILTIN_PADDSB,
27262 IX86_BUILTIN_PADDSW,
27263 IX86_BUILTIN_PADDUSB,
27264 IX86_BUILTIN_PADDUSW,
27265 IX86_BUILTIN_PSUBB,
27266 IX86_BUILTIN_PSUBW,
27267 IX86_BUILTIN_PSUBD,
27268 IX86_BUILTIN_PSUBQ,
27269 IX86_BUILTIN_PSUBSB,
27270 IX86_BUILTIN_PSUBSW,
27271 IX86_BUILTIN_PSUBUSB,
27272 IX86_BUILTIN_PSUBUSW,
27273
27274 IX86_BUILTIN_PAND,
27275 IX86_BUILTIN_PANDN,
27276 IX86_BUILTIN_POR,
27277 IX86_BUILTIN_PXOR,
27278
27279 IX86_BUILTIN_PAVGB,
27280 IX86_BUILTIN_PAVGW,
27281
27282 IX86_BUILTIN_PCMPEQB,
27283 IX86_BUILTIN_PCMPEQW,
27284 IX86_BUILTIN_PCMPEQD,
27285 IX86_BUILTIN_PCMPGTB,
27286 IX86_BUILTIN_PCMPGTW,
27287 IX86_BUILTIN_PCMPGTD,
27288
27289 IX86_BUILTIN_PMADDWD,
27290
27291 IX86_BUILTIN_PMAXSW,
27292 IX86_BUILTIN_PMAXUB,
27293 IX86_BUILTIN_PMINSW,
27294 IX86_BUILTIN_PMINUB,
27295
27296 IX86_BUILTIN_PMULHUW,
27297 IX86_BUILTIN_PMULHW,
27298 IX86_BUILTIN_PMULLW,
27299
27300 IX86_BUILTIN_PSADBW,
27301 IX86_BUILTIN_PSHUFW,
27302
27303 IX86_BUILTIN_PSLLW,
27304 IX86_BUILTIN_PSLLD,
27305 IX86_BUILTIN_PSLLQ,
27306 IX86_BUILTIN_PSRAW,
27307 IX86_BUILTIN_PSRAD,
27308 IX86_BUILTIN_PSRLW,
27309 IX86_BUILTIN_PSRLD,
27310 IX86_BUILTIN_PSRLQ,
27311 IX86_BUILTIN_PSLLWI,
27312 IX86_BUILTIN_PSLLDI,
27313 IX86_BUILTIN_PSLLQI,
27314 IX86_BUILTIN_PSRAWI,
27315 IX86_BUILTIN_PSRADI,
27316 IX86_BUILTIN_PSRLWI,
27317 IX86_BUILTIN_PSRLDI,
27318 IX86_BUILTIN_PSRLQI,
27319
27320 IX86_BUILTIN_PUNPCKHBW,
27321 IX86_BUILTIN_PUNPCKHWD,
27322 IX86_BUILTIN_PUNPCKHDQ,
27323 IX86_BUILTIN_PUNPCKLBW,
27324 IX86_BUILTIN_PUNPCKLWD,
27325 IX86_BUILTIN_PUNPCKLDQ,
27326
27327 IX86_BUILTIN_SHUFPS,
27328
27329 IX86_BUILTIN_RCPPS,
27330 IX86_BUILTIN_RCPSS,
27331 IX86_BUILTIN_RSQRTPS,
27332 IX86_BUILTIN_RSQRTPS_NR,
27333 IX86_BUILTIN_RSQRTSS,
27334 IX86_BUILTIN_RSQRTF,
27335 IX86_BUILTIN_SQRTPS,
27336 IX86_BUILTIN_SQRTPS_NR,
27337 IX86_BUILTIN_SQRTSS,
27338
27339 IX86_BUILTIN_UNPCKHPS,
27340 IX86_BUILTIN_UNPCKLPS,
27341
27342 IX86_BUILTIN_ANDPS,
27343 IX86_BUILTIN_ANDNPS,
27344 IX86_BUILTIN_ORPS,
27345 IX86_BUILTIN_XORPS,
27346
27347 IX86_BUILTIN_EMMS,
27348 IX86_BUILTIN_LDMXCSR,
27349 IX86_BUILTIN_STMXCSR,
27350 IX86_BUILTIN_SFENCE,
27351
27352 IX86_BUILTIN_FXSAVE,
27353 IX86_BUILTIN_FXRSTOR,
27354 IX86_BUILTIN_FXSAVE64,
27355 IX86_BUILTIN_FXRSTOR64,
27356
27357 IX86_BUILTIN_XSAVE,
27358 IX86_BUILTIN_XRSTOR,
27359 IX86_BUILTIN_XSAVE64,
27360 IX86_BUILTIN_XRSTOR64,
27361
27362 IX86_BUILTIN_XSAVEOPT,
27363 IX86_BUILTIN_XSAVEOPT64,
27364
27365 IX86_BUILTIN_XSAVEC,
27366 IX86_BUILTIN_XSAVEC64,
27367
27368 IX86_BUILTIN_XSAVES,
27369 IX86_BUILTIN_XRSTORS,
27370 IX86_BUILTIN_XSAVES64,
27371 IX86_BUILTIN_XRSTORS64,
27372
27373 /* 3DNow! Original */
27374 IX86_BUILTIN_FEMMS,
27375 IX86_BUILTIN_PAVGUSB,
27376 IX86_BUILTIN_PF2ID,
27377 IX86_BUILTIN_PFACC,
27378 IX86_BUILTIN_PFADD,
27379 IX86_BUILTIN_PFCMPEQ,
27380 IX86_BUILTIN_PFCMPGE,
27381 IX86_BUILTIN_PFCMPGT,
27382 IX86_BUILTIN_PFMAX,
27383 IX86_BUILTIN_PFMIN,
27384 IX86_BUILTIN_PFMUL,
27385 IX86_BUILTIN_PFRCP,
27386 IX86_BUILTIN_PFRCPIT1,
27387 IX86_BUILTIN_PFRCPIT2,
27388 IX86_BUILTIN_PFRSQIT1,
27389 IX86_BUILTIN_PFRSQRT,
27390 IX86_BUILTIN_PFSUB,
27391 IX86_BUILTIN_PFSUBR,
27392 IX86_BUILTIN_PI2FD,
27393 IX86_BUILTIN_PMULHRW,
27394
27395 /* 3DNow! Athlon Extensions */
27396 IX86_BUILTIN_PF2IW,
27397 IX86_BUILTIN_PFNACC,
27398 IX86_BUILTIN_PFPNACC,
27399 IX86_BUILTIN_PI2FW,
27400 IX86_BUILTIN_PSWAPDSI,
27401 IX86_BUILTIN_PSWAPDSF,
27402
27403 /* SSE2 */
27404 IX86_BUILTIN_ADDPD,
27405 IX86_BUILTIN_ADDSD,
27406 IX86_BUILTIN_DIVPD,
27407 IX86_BUILTIN_DIVSD,
27408 IX86_BUILTIN_MULPD,
27409 IX86_BUILTIN_MULSD,
27410 IX86_BUILTIN_SUBPD,
27411 IX86_BUILTIN_SUBSD,
27412
27413 IX86_BUILTIN_CMPEQPD,
27414 IX86_BUILTIN_CMPLTPD,
27415 IX86_BUILTIN_CMPLEPD,
27416 IX86_BUILTIN_CMPGTPD,
27417 IX86_BUILTIN_CMPGEPD,
27418 IX86_BUILTIN_CMPNEQPD,
27419 IX86_BUILTIN_CMPNLTPD,
27420 IX86_BUILTIN_CMPNLEPD,
27421 IX86_BUILTIN_CMPNGTPD,
27422 IX86_BUILTIN_CMPNGEPD,
27423 IX86_BUILTIN_CMPORDPD,
27424 IX86_BUILTIN_CMPUNORDPD,
27425 IX86_BUILTIN_CMPEQSD,
27426 IX86_BUILTIN_CMPLTSD,
27427 IX86_BUILTIN_CMPLESD,
27428 IX86_BUILTIN_CMPNEQSD,
27429 IX86_BUILTIN_CMPNLTSD,
27430 IX86_BUILTIN_CMPNLESD,
27431 IX86_BUILTIN_CMPORDSD,
27432 IX86_BUILTIN_CMPUNORDSD,
27433
27434 IX86_BUILTIN_COMIEQSD,
27435 IX86_BUILTIN_COMILTSD,
27436 IX86_BUILTIN_COMILESD,
27437 IX86_BUILTIN_COMIGTSD,
27438 IX86_BUILTIN_COMIGESD,
27439 IX86_BUILTIN_COMINEQSD,
27440 IX86_BUILTIN_UCOMIEQSD,
27441 IX86_BUILTIN_UCOMILTSD,
27442 IX86_BUILTIN_UCOMILESD,
27443 IX86_BUILTIN_UCOMIGTSD,
27444 IX86_BUILTIN_UCOMIGESD,
27445 IX86_BUILTIN_UCOMINEQSD,
27446
27447 IX86_BUILTIN_MAXPD,
27448 IX86_BUILTIN_MAXSD,
27449 IX86_BUILTIN_MINPD,
27450 IX86_BUILTIN_MINSD,
27451
27452 IX86_BUILTIN_ANDPD,
27453 IX86_BUILTIN_ANDNPD,
27454 IX86_BUILTIN_ORPD,
27455 IX86_BUILTIN_XORPD,
27456
27457 IX86_BUILTIN_SQRTPD,
27458 IX86_BUILTIN_SQRTSD,
27459
27460 IX86_BUILTIN_UNPCKHPD,
27461 IX86_BUILTIN_UNPCKLPD,
27462
27463 IX86_BUILTIN_SHUFPD,
27464
27465 IX86_BUILTIN_LOADUPD,
27466 IX86_BUILTIN_STOREUPD,
27467 IX86_BUILTIN_MOVSD,
27468
27469 IX86_BUILTIN_LOADHPD,
27470 IX86_BUILTIN_LOADLPD,
27471
27472 IX86_BUILTIN_CVTDQ2PD,
27473 IX86_BUILTIN_CVTDQ2PS,
27474
27475 IX86_BUILTIN_CVTPD2DQ,
27476 IX86_BUILTIN_CVTPD2PI,
27477 IX86_BUILTIN_CVTPD2PS,
27478 IX86_BUILTIN_CVTTPD2DQ,
27479 IX86_BUILTIN_CVTTPD2PI,
27480
27481 IX86_BUILTIN_CVTPI2PD,
27482 IX86_BUILTIN_CVTSI2SD,
27483 IX86_BUILTIN_CVTSI642SD,
27484
27485 IX86_BUILTIN_CVTSD2SI,
27486 IX86_BUILTIN_CVTSD2SI64,
27487 IX86_BUILTIN_CVTSD2SS,
27488 IX86_BUILTIN_CVTSS2SD,
27489 IX86_BUILTIN_CVTTSD2SI,
27490 IX86_BUILTIN_CVTTSD2SI64,
27491
27492 IX86_BUILTIN_CVTPS2DQ,
27493 IX86_BUILTIN_CVTPS2PD,
27494 IX86_BUILTIN_CVTTPS2DQ,
27495
27496 IX86_BUILTIN_MOVNTI,
27497 IX86_BUILTIN_MOVNTI64,
27498 IX86_BUILTIN_MOVNTPD,
27499 IX86_BUILTIN_MOVNTDQ,
27500
27501 IX86_BUILTIN_MOVQ128,
27502
27503 /* SSE2 MMX */
27504 IX86_BUILTIN_MASKMOVDQU,
27505 IX86_BUILTIN_MOVMSKPD,
27506 IX86_BUILTIN_PMOVMSKB128,
27507
27508 IX86_BUILTIN_PACKSSWB128,
27509 IX86_BUILTIN_PACKSSDW128,
27510 IX86_BUILTIN_PACKUSWB128,
27511
27512 IX86_BUILTIN_PADDB128,
27513 IX86_BUILTIN_PADDW128,
27514 IX86_BUILTIN_PADDD128,
27515 IX86_BUILTIN_PADDQ128,
27516 IX86_BUILTIN_PADDSB128,
27517 IX86_BUILTIN_PADDSW128,
27518 IX86_BUILTIN_PADDUSB128,
27519 IX86_BUILTIN_PADDUSW128,
27520 IX86_BUILTIN_PSUBB128,
27521 IX86_BUILTIN_PSUBW128,
27522 IX86_BUILTIN_PSUBD128,
27523 IX86_BUILTIN_PSUBQ128,
27524 IX86_BUILTIN_PSUBSB128,
27525 IX86_BUILTIN_PSUBSW128,
27526 IX86_BUILTIN_PSUBUSB128,
27527 IX86_BUILTIN_PSUBUSW128,
27528
27529 IX86_BUILTIN_PAND128,
27530 IX86_BUILTIN_PANDN128,
27531 IX86_BUILTIN_POR128,
27532 IX86_BUILTIN_PXOR128,
27533
27534 IX86_BUILTIN_PAVGB128,
27535 IX86_BUILTIN_PAVGW128,
27536
27537 IX86_BUILTIN_PCMPEQB128,
27538 IX86_BUILTIN_PCMPEQW128,
27539 IX86_BUILTIN_PCMPEQD128,
27540 IX86_BUILTIN_PCMPGTB128,
27541 IX86_BUILTIN_PCMPGTW128,
27542 IX86_BUILTIN_PCMPGTD128,
27543
27544 IX86_BUILTIN_PMADDWD128,
27545
27546 IX86_BUILTIN_PMAXSW128,
27547 IX86_BUILTIN_PMAXUB128,
27548 IX86_BUILTIN_PMINSW128,
27549 IX86_BUILTIN_PMINUB128,
27550
27551 IX86_BUILTIN_PMULUDQ,
27552 IX86_BUILTIN_PMULUDQ128,
27553 IX86_BUILTIN_PMULHUW128,
27554 IX86_BUILTIN_PMULHW128,
27555 IX86_BUILTIN_PMULLW128,
27556
27557 IX86_BUILTIN_PSADBW128,
27558 IX86_BUILTIN_PSHUFHW,
27559 IX86_BUILTIN_PSHUFLW,
27560 IX86_BUILTIN_PSHUFD,
27561
27562 IX86_BUILTIN_PSLLDQI128,
27563 IX86_BUILTIN_PSLLWI128,
27564 IX86_BUILTIN_PSLLDI128,
27565 IX86_BUILTIN_PSLLQI128,
27566 IX86_BUILTIN_PSRAWI128,
27567 IX86_BUILTIN_PSRADI128,
27568 IX86_BUILTIN_PSRLDQI128,
27569 IX86_BUILTIN_PSRLWI128,
27570 IX86_BUILTIN_PSRLDI128,
27571 IX86_BUILTIN_PSRLQI128,
27572
27573 IX86_BUILTIN_PSLLDQ128,
27574 IX86_BUILTIN_PSLLW128,
27575 IX86_BUILTIN_PSLLD128,
27576 IX86_BUILTIN_PSLLQ128,
27577 IX86_BUILTIN_PSRAW128,
27578 IX86_BUILTIN_PSRAD128,
27579 IX86_BUILTIN_PSRLW128,
27580 IX86_BUILTIN_PSRLD128,
27581 IX86_BUILTIN_PSRLQ128,
27582
27583 IX86_BUILTIN_PUNPCKHBW128,
27584 IX86_BUILTIN_PUNPCKHWD128,
27585 IX86_BUILTIN_PUNPCKHDQ128,
27586 IX86_BUILTIN_PUNPCKHQDQ128,
27587 IX86_BUILTIN_PUNPCKLBW128,
27588 IX86_BUILTIN_PUNPCKLWD128,
27589 IX86_BUILTIN_PUNPCKLDQ128,
27590 IX86_BUILTIN_PUNPCKLQDQ128,
27591
27592 IX86_BUILTIN_CLFLUSH,
27593 IX86_BUILTIN_MFENCE,
27594 IX86_BUILTIN_LFENCE,
27595 IX86_BUILTIN_PAUSE,
27596
27597 IX86_BUILTIN_FNSTENV,
27598 IX86_BUILTIN_FLDENV,
27599 IX86_BUILTIN_FNSTSW,
27600 IX86_BUILTIN_FNCLEX,
27601
27602 IX86_BUILTIN_BSRSI,
27603 IX86_BUILTIN_BSRDI,
27604 IX86_BUILTIN_RDPMC,
27605 IX86_BUILTIN_RDTSC,
27606 IX86_BUILTIN_RDTSCP,
27607 IX86_BUILTIN_ROLQI,
27608 IX86_BUILTIN_ROLHI,
27609 IX86_BUILTIN_RORQI,
27610 IX86_BUILTIN_RORHI,
27611
27612 /* SSE3. */
27613 IX86_BUILTIN_ADDSUBPS,
27614 IX86_BUILTIN_HADDPS,
27615 IX86_BUILTIN_HSUBPS,
27616 IX86_BUILTIN_MOVSHDUP,
27617 IX86_BUILTIN_MOVSLDUP,
27618 IX86_BUILTIN_ADDSUBPD,
27619 IX86_BUILTIN_HADDPD,
27620 IX86_BUILTIN_HSUBPD,
27621 IX86_BUILTIN_LDDQU,
27622
27623 IX86_BUILTIN_MONITOR,
27624 IX86_BUILTIN_MWAIT,
27625
27626 /* SSSE3. */
27627 IX86_BUILTIN_PHADDW,
27628 IX86_BUILTIN_PHADDD,
27629 IX86_BUILTIN_PHADDSW,
27630 IX86_BUILTIN_PHSUBW,
27631 IX86_BUILTIN_PHSUBD,
27632 IX86_BUILTIN_PHSUBSW,
27633 IX86_BUILTIN_PMADDUBSW,
27634 IX86_BUILTIN_PMULHRSW,
27635 IX86_BUILTIN_PSHUFB,
27636 IX86_BUILTIN_PSIGNB,
27637 IX86_BUILTIN_PSIGNW,
27638 IX86_BUILTIN_PSIGND,
27639 IX86_BUILTIN_PALIGNR,
27640 IX86_BUILTIN_PABSB,
27641 IX86_BUILTIN_PABSW,
27642 IX86_BUILTIN_PABSD,
27643
27644 IX86_BUILTIN_PHADDW128,
27645 IX86_BUILTIN_PHADDD128,
27646 IX86_BUILTIN_PHADDSW128,
27647 IX86_BUILTIN_PHSUBW128,
27648 IX86_BUILTIN_PHSUBD128,
27649 IX86_BUILTIN_PHSUBSW128,
27650 IX86_BUILTIN_PMADDUBSW128,
27651 IX86_BUILTIN_PMULHRSW128,
27652 IX86_BUILTIN_PSHUFB128,
27653 IX86_BUILTIN_PSIGNB128,
27654 IX86_BUILTIN_PSIGNW128,
27655 IX86_BUILTIN_PSIGND128,
27656 IX86_BUILTIN_PALIGNR128,
27657 IX86_BUILTIN_PABSB128,
27658 IX86_BUILTIN_PABSW128,
27659 IX86_BUILTIN_PABSD128,
27660
27661 /* AMDFAM10 - SSE4A New Instructions. */
27662 IX86_BUILTIN_MOVNTSD,
27663 IX86_BUILTIN_MOVNTSS,
27664 IX86_BUILTIN_EXTRQI,
27665 IX86_BUILTIN_EXTRQ,
27666 IX86_BUILTIN_INSERTQI,
27667 IX86_BUILTIN_INSERTQ,
27668
27669 /* SSE4.1. */
27670 IX86_BUILTIN_BLENDPD,
27671 IX86_BUILTIN_BLENDPS,
27672 IX86_BUILTIN_BLENDVPD,
27673 IX86_BUILTIN_BLENDVPS,
27674 IX86_BUILTIN_PBLENDVB128,
27675 IX86_BUILTIN_PBLENDW128,
27676
27677 IX86_BUILTIN_DPPD,
27678 IX86_BUILTIN_DPPS,
27679
27680 IX86_BUILTIN_INSERTPS128,
27681
27682 IX86_BUILTIN_MOVNTDQA,
27683 IX86_BUILTIN_MPSADBW128,
27684 IX86_BUILTIN_PACKUSDW128,
27685 IX86_BUILTIN_PCMPEQQ,
27686 IX86_BUILTIN_PHMINPOSUW128,
27687
27688 IX86_BUILTIN_PMAXSB128,
27689 IX86_BUILTIN_PMAXSD128,
27690 IX86_BUILTIN_PMAXUD128,
27691 IX86_BUILTIN_PMAXUW128,
27692
27693 IX86_BUILTIN_PMINSB128,
27694 IX86_BUILTIN_PMINSD128,
27695 IX86_BUILTIN_PMINUD128,
27696 IX86_BUILTIN_PMINUW128,
27697
27698 IX86_BUILTIN_PMOVSXBW128,
27699 IX86_BUILTIN_PMOVSXBD128,
27700 IX86_BUILTIN_PMOVSXBQ128,
27701 IX86_BUILTIN_PMOVSXWD128,
27702 IX86_BUILTIN_PMOVSXWQ128,
27703 IX86_BUILTIN_PMOVSXDQ128,
27704
27705 IX86_BUILTIN_PMOVZXBW128,
27706 IX86_BUILTIN_PMOVZXBD128,
27707 IX86_BUILTIN_PMOVZXBQ128,
27708 IX86_BUILTIN_PMOVZXWD128,
27709 IX86_BUILTIN_PMOVZXWQ128,
27710 IX86_BUILTIN_PMOVZXDQ128,
27711
27712 IX86_BUILTIN_PMULDQ128,
27713 IX86_BUILTIN_PMULLD128,
27714
27715 IX86_BUILTIN_ROUNDSD,
27716 IX86_BUILTIN_ROUNDSS,
27717
27718 IX86_BUILTIN_ROUNDPD,
27719 IX86_BUILTIN_ROUNDPS,
27720
27721 IX86_BUILTIN_FLOORPD,
27722 IX86_BUILTIN_CEILPD,
27723 IX86_BUILTIN_TRUNCPD,
27724 IX86_BUILTIN_RINTPD,
27725 IX86_BUILTIN_ROUNDPD_AZ,
27726
27727 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27728 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27729 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27730
27731 IX86_BUILTIN_FLOORPS,
27732 IX86_BUILTIN_CEILPS,
27733 IX86_BUILTIN_TRUNCPS,
27734 IX86_BUILTIN_RINTPS,
27735 IX86_BUILTIN_ROUNDPS_AZ,
27736
27737 IX86_BUILTIN_FLOORPS_SFIX,
27738 IX86_BUILTIN_CEILPS_SFIX,
27739 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27740
27741 IX86_BUILTIN_PTESTZ,
27742 IX86_BUILTIN_PTESTC,
27743 IX86_BUILTIN_PTESTNZC,
27744
27745 IX86_BUILTIN_VEC_INIT_V2SI,
27746 IX86_BUILTIN_VEC_INIT_V4HI,
27747 IX86_BUILTIN_VEC_INIT_V8QI,
27748 IX86_BUILTIN_VEC_EXT_V2DF,
27749 IX86_BUILTIN_VEC_EXT_V2DI,
27750 IX86_BUILTIN_VEC_EXT_V4SF,
27751 IX86_BUILTIN_VEC_EXT_V4SI,
27752 IX86_BUILTIN_VEC_EXT_V8HI,
27753 IX86_BUILTIN_VEC_EXT_V2SI,
27754 IX86_BUILTIN_VEC_EXT_V4HI,
27755 IX86_BUILTIN_VEC_EXT_V16QI,
27756 IX86_BUILTIN_VEC_SET_V2DI,
27757 IX86_BUILTIN_VEC_SET_V4SF,
27758 IX86_BUILTIN_VEC_SET_V4SI,
27759 IX86_BUILTIN_VEC_SET_V8HI,
27760 IX86_BUILTIN_VEC_SET_V4HI,
27761 IX86_BUILTIN_VEC_SET_V16QI,
27762
27763 IX86_BUILTIN_VEC_PACK_SFIX,
27764 IX86_BUILTIN_VEC_PACK_SFIX256,
27765
27766 /* SSE4.2. */
27767 IX86_BUILTIN_CRC32QI,
27768 IX86_BUILTIN_CRC32HI,
27769 IX86_BUILTIN_CRC32SI,
27770 IX86_BUILTIN_CRC32DI,
27771
27772 IX86_BUILTIN_PCMPESTRI128,
27773 IX86_BUILTIN_PCMPESTRM128,
27774 IX86_BUILTIN_PCMPESTRA128,
27775 IX86_BUILTIN_PCMPESTRC128,
27776 IX86_BUILTIN_PCMPESTRO128,
27777 IX86_BUILTIN_PCMPESTRS128,
27778 IX86_BUILTIN_PCMPESTRZ128,
27779 IX86_BUILTIN_PCMPISTRI128,
27780 IX86_BUILTIN_PCMPISTRM128,
27781 IX86_BUILTIN_PCMPISTRA128,
27782 IX86_BUILTIN_PCMPISTRC128,
27783 IX86_BUILTIN_PCMPISTRO128,
27784 IX86_BUILTIN_PCMPISTRS128,
27785 IX86_BUILTIN_PCMPISTRZ128,
27786
27787 IX86_BUILTIN_PCMPGTQ,
27788
27789 /* AES instructions */
27790 IX86_BUILTIN_AESENC128,
27791 IX86_BUILTIN_AESENCLAST128,
27792 IX86_BUILTIN_AESDEC128,
27793 IX86_BUILTIN_AESDECLAST128,
27794 IX86_BUILTIN_AESIMC128,
27795 IX86_BUILTIN_AESKEYGENASSIST128,
27796
27797 /* PCLMUL instruction */
27798 IX86_BUILTIN_PCLMULQDQ128,
27799
27800 /* AVX */
27801 IX86_BUILTIN_ADDPD256,
27802 IX86_BUILTIN_ADDPS256,
27803 IX86_BUILTIN_ADDSUBPD256,
27804 IX86_BUILTIN_ADDSUBPS256,
27805 IX86_BUILTIN_ANDPD256,
27806 IX86_BUILTIN_ANDPS256,
27807 IX86_BUILTIN_ANDNPD256,
27808 IX86_BUILTIN_ANDNPS256,
27809 IX86_BUILTIN_BLENDPD256,
27810 IX86_BUILTIN_BLENDPS256,
27811 IX86_BUILTIN_BLENDVPD256,
27812 IX86_BUILTIN_BLENDVPS256,
27813 IX86_BUILTIN_DIVPD256,
27814 IX86_BUILTIN_DIVPS256,
27815 IX86_BUILTIN_DPPS256,
27816 IX86_BUILTIN_HADDPD256,
27817 IX86_BUILTIN_HADDPS256,
27818 IX86_BUILTIN_HSUBPD256,
27819 IX86_BUILTIN_HSUBPS256,
27820 IX86_BUILTIN_MAXPD256,
27821 IX86_BUILTIN_MAXPS256,
27822 IX86_BUILTIN_MINPD256,
27823 IX86_BUILTIN_MINPS256,
27824 IX86_BUILTIN_MULPD256,
27825 IX86_BUILTIN_MULPS256,
27826 IX86_BUILTIN_ORPD256,
27827 IX86_BUILTIN_ORPS256,
27828 IX86_BUILTIN_SHUFPD256,
27829 IX86_BUILTIN_SHUFPS256,
27830 IX86_BUILTIN_SUBPD256,
27831 IX86_BUILTIN_SUBPS256,
27832 IX86_BUILTIN_XORPD256,
27833 IX86_BUILTIN_XORPS256,
27834 IX86_BUILTIN_CMPSD,
27835 IX86_BUILTIN_CMPSS,
27836 IX86_BUILTIN_CMPPD,
27837 IX86_BUILTIN_CMPPS,
27838 IX86_BUILTIN_CMPPD256,
27839 IX86_BUILTIN_CMPPS256,
27840 IX86_BUILTIN_CVTDQ2PD256,
27841 IX86_BUILTIN_CVTDQ2PS256,
27842 IX86_BUILTIN_CVTPD2PS256,
27843 IX86_BUILTIN_CVTPS2DQ256,
27844 IX86_BUILTIN_CVTPS2PD256,
27845 IX86_BUILTIN_CVTTPD2DQ256,
27846 IX86_BUILTIN_CVTPD2DQ256,
27847 IX86_BUILTIN_CVTTPS2DQ256,
27848 IX86_BUILTIN_EXTRACTF128PD256,
27849 IX86_BUILTIN_EXTRACTF128PS256,
27850 IX86_BUILTIN_EXTRACTF128SI256,
27851 IX86_BUILTIN_VZEROALL,
27852 IX86_BUILTIN_VZEROUPPER,
27853 IX86_BUILTIN_VPERMILVARPD,
27854 IX86_BUILTIN_VPERMILVARPS,
27855 IX86_BUILTIN_VPERMILVARPD256,
27856 IX86_BUILTIN_VPERMILVARPS256,
27857 IX86_BUILTIN_VPERMILPD,
27858 IX86_BUILTIN_VPERMILPS,
27859 IX86_BUILTIN_VPERMILPD256,
27860 IX86_BUILTIN_VPERMILPS256,
27861 IX86_BUILTIN_VPERMIL2PD,
27862 IX86_BUILTIN_VPERMIL2PS,
27863 IX86_BUILTIN_VPERMIL2PD256,
27864 IX86_BUILTIN_VPERMIL2PS256,
27865 IX86_BUILTIN_VPERM2F128PD256,
27866 IX86_BUILTIN_VPERM2F128PS256,
27867 IX86_BUILTIN_VPERM2F128SI256,
27868 IX86_BUILTIN_VBROADCASTSS,
27869 IX86_BUILTIN_VBROADCASTSD256,
27870 IX86_BUILTIN_VBROADCASTSS256,
27871 IX86_BUILTIN_VBROADCASTPD256,
27872 IX86_BUILTIN_VBROADCASTPS256,
27873 IX86_BUILTIN_VINSERTF128PD256,
27874 IX86_BUILTIN_VINSERTF128PS256,
27875 IX86_BUILTIN_VINSERTF128SI256,
27876 IX86_BUILTIN_LOADUPD256,
27877 IX86_BUILTIN_LOADUPS256,
27878 IX86_BUILTIN_STOREUPD256,
27879 IX86_BUILTIN_STOREUPS256,
27880 IX86_BUILTIN_LDDQU256,
27881 IX86_BUILTIN_MOVNTDQ256,
27882 IX86_BUILTIN_MOVNTPD256,
27883 IX86_BUILTIN_MOVNTPS256,
27884 IX86_BUILTIN_LOADDQU256,
27885 IX86_BUILTIN_STOREDQU256,
27886 IX86_BUILTIN_MASKLOADPD,
27887 IX86_BUILTIN_MASKLOADPS,
27888 IX86_BUILTIN_MASKSTOREPD,
27889 IX86_BUILTIN_MASKSTOREPS,
27890 IX86_BUILTIN_MASKLOADPD256,
27891 IX86_BUILTIN_MASKLOADPS256,
27892 IX86_BUILTIN_MASKSTOREPD256,
27893 IX86_BUILTIN_MASKSTOREPS256,
27894 IX86_BUILTIN_MOVSHDUP256,
27895 IX86_BUILTIN_MOVSLDUP256,
27896 IX86_BUILTIN_MOVDDUP256,
27897
27898 IX86_BUILTIN_SQRTPD256,
27899 IX86_BUILTIN_SQRTPS256,
27900 IX86_BUILTIN_SQRTPS_NR256,
27901 IX86_BUILTIN_RSQRTPS256,
27902 IX86_BUILTIN_RSQRTPS_NR256,
27903
27904 IX86_BUILTIN_RCPPS256,
27905
27906 IX86_BUILTIN_ROUNDPD256,
27907 IX86_BUILTIN_ROUNDPS256,
27908
27909 IX86_BUILTIN_FLOORPD256,
27910 IX86_BUILTIN_CEILPD256,
27911 IX86_BUILTIN_TRUNCPD256,
27912 IX86_BUILTIN_RINTPD256,
27913 IX86_BUILTIN_ROUNDPD_AZ256,
27914
27915 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27916 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27917 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27918
27919 IX86_BUILTIN_FLOORPS256,
27920 IX86_BUILTIN_CEILPS256,
27921 IX86_BUILTIN_TRUNCPS256,
27922 IX86_BUILTIN_RINTPS256,
27923 IX86_BUILTIN_ROUNDPS_AZ256,
27924
27925 IX86_BUILTIN_FLOORPS_SFIX256,
27926 IX86_BUILTIN_CEILPS_SFIX256,
27927 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27928
27929 IX86_BUILTIN_UNPCKHPD256,
27930 IX86_BUILTIN_UNPCKLPD256,
27931 IX86_BUILTIN_UNPCKHPS256,
27932 IX86_BUILTIN_UNPCKLPS256,
27933
27934 IX86_BUILTIN_SI256_SI,
27935 IX86_BUILTIN_PS256_PS,
27936 IX86_BUILTIN_PD256_PD,
27937 IX86_BUILTIN_SI_SI256,
27938 IX86_BUILTIN_PS_PS256,
27939 IX86_BUILTIN_PD_PD256,
27940
27941 IX86_BUILTIN_VTESTZPD,
27942 IX86_BUILTIN_VTESTCPD,
27943 IX86_BUILTIN_VTESTNZCPD,
27944 IX86_BUILTIN_VTESTZPS,
27945 IX86_BUILTIN_VTESTCPS,
27946 IX86_BUILTIN_VTESTNZCPS,
27947 IX86_BUILTIN_VTESTZPD256,
27948 IX86_BUILTIN_VTESTCPD256,
27949 IX86_BUILTIN_VTESTNZCPD256,
27950 IX86_BUILTIN_VTESTZPS256,
27951 IX86_BUILTIN_VTESTCPS256,
27952 IX86_BUILTIN_VTESTNZCPS256,
27953 IX86_BUILTIN_PTESTZ256,
27954 IX86_BUILTIN_PTESTC256,
27955 IX86_BUILTIN_PTESTNZC256,
27956
27957 IX86_BUILTIN_MOVMSKPD256,
27958 IX86_BUILTIN_MOVMSKPS256,
27959
27960 /* AVX2 */
27961 IX86_BUILTIN_MPSADBW256,
27962 IX86_BUILTIN_PABSB256,
27963 IX86_BUILTIN_PABSW256,
27964 IX86_BUILTIN_PABSD256,
27965 IX86_BUILTIN_PACKSSDW256,
27966 IX86_BUILTIN_PACKSSWB256,
27967 IX86_BUILTIN_PACKUSDW256,
27968 IX86_BUILTIN_PACKUSWB256,
27969 IX86_BUILTIN_PADDB256,
27970 IX86_BUILTIN_PADDW256,
27971 IX86_BUILTIN_PADDD256,
27972 IX86_BUILTIN_PADDQ256,
27973 IX86_BUILTIN_PADDSB256,
27974 IX86_BUILTIN_PADDSW256,
27975 IX86_BUILTIN_PADDUSB256,
27976 IX86_BUILTIN_PADDUSW256,
27977 IX86_BUILTIN_PALIGNR256,
27978 IX86_BUILTIN_AND256I,
27979 IX86_BUILTIN_ANDNOT256I,
27980 IX86_BUILTIN_PAVGB256,
27981 IX86_BUILTIN_PAVGW256,
27982 IX86_BUILTIN_PBLENDVB256,
27983 IX86_BUILTIN_PBLENDVW256,
27984 IX86_BUILTIN_PCMPEQB256,
27985 IX86_BUILTIN_PCMPEQW256,
27986 IX86_BUILTIN_PCMPEQD256,
27987 IX86_BUILTIN_PCMPEQQ256,
27988 IX86_BUILTIN_PCMPGTB256,
27989 IX86_BUILTIN_PCMPGTW256,
27990 IX86_BUILTIN_PCMPGTD256,
27991 IX86_BUILTIN_PCMPGTQ256,
27992 IX86_BUILTIN_PHADDW256,
27993 IX86_BUILTIN_PHADDD256,
27994 IX86_BUILTIN_PHADDSW256,
27995 IX86_BUILTIN_PHSUBW256,
27996 IX86_BUILTIN_PHSUBD256,
27997 IX86_BUILTIN_PHSUBSW256,
27998 IX86_BUILTIN_PMADDUBSW256,
27999 IX86_BUILTIN_PMADDWD256,
28000 IX86_BUILTIN_PMAXSB256,
28001 IX86_BUILTIN_PMAXSW256,
28002 IX86_BUILTIN_PMAXSD256,
28003 IX86_BUILTIN_PMAXUB256,
28004 IX86_BUILTIN_PMAXUW256,
28005 IX86_BUILTIN_PMAXUD256,
28006 IX86_BUILTIN_PMINSB256,
28007 IX86_BUILTIN_PMINSW256,
28008 IX86_BUILTIN_PMINSD256,
28009 IX86_BUILTIN_PMINUB256,
28010 IX86_BUILTIN_PMINUW256,
28011 IX86_BUILTIN_PMINUD256,
28012 IX86_BUILTIN_PMOVMSKB256,
28013 IX86_BUILTIN_PMOVSXBW256,
28014 IX86_BUILTIN_PMOVSXBD256,
28015 IX86_BUILTIN_PMOVSXBQ256,
28016 IX86_BUILTIN_PMOVSXWD256,
28017 IX86_BUILTIN_PMOVSXWQ256,
28018 IX86_BUILTIN_PMOVSXDQ256,
28019 IX86_BUILTIN_PMOVZXBW256,
28020 IX86_BUILTIN_PMOVZXBD256,
28021 IX86_BUILTIN_PMOVZXBQ256,
28022 IX86_BUILTIN_PMOVZXWD256,
28023 IX86_BUILTIN_PMOVZXWQ256,
28024 IX86_BUILTIN_PMOVZXDQ256,
28025 IX86_BUILTIN_PMULDQ256,
28026 IX86_BUILTIN_PMULHRSW256,
28027 IX86_BUILTIN_PMULHUW256,
28028 IX86_BUILTIN_PMULHW256,
28029 IX86_BUILTIN_PMULLW256,
28030 IX86_BUILTIN_PMULLD256,
28031 IX86_BUILTIN_PMULUDQ256,
28032 IX86_BUILTIN_POR256,
28033 IX86_BUILTIN_PSADBW256,
28034 IX86_BUILTIN_PSHUFB256,
28035 IX86_BUILTIN_PSHUFD256,
28036 IX86_BUILTIN_PSHUFHW256,
28037 IX86_BUILTIN_PSHUFLW256,
28038 IX86_BUILTIN_PSIGNB256,
28039 IX86_BUILTIN_PSIGNW256,
28040 IX86_BUILTIN_PSIGND256,
28041 IX86_BUILTIN_PSLLDQI256,
28042 IX86_BUILTIN_PSLLWI256,
28043 IX86_BUILTIN_PSLLW256,
28044 IX86_BUILTIN_PSLLDI256,
28045 IX86_BUILTIN_PSLLD256,
28046 IX86_BUILTIN_PSLLQI256,
28047 IX86_BUILTIN_PSLLQ256,
28048 IX86_BUILTIN_PSRAWI256,
28049 IX86_BUILTIN_PSRAW256,
28050 IX86_BUILTIN_PSRADI256,
28051 IX86_BUILTIN_PSRAD256,
28052 IX86_BUILTIN_PSRLDQI256,
28053 IX86_BUILTIN_PSRLWI256,
28054 IX86_BUILTIN_PSRLW256,
28055 IX86_BUILTIN_PSRLDI256,
28056 IX86_BUILTIN_PSRLD256,
28057 IX86_BUILTIN_PSRLQI256,
28058 IX86_BUILTIN_PSRLQ256,
28059 IX86_BUILTIN_PSUBB256,
28060 IX86_BUILTIN_PSUBW256,
28061 IX86_BUILTIN_PSUBD256,
28062 IX86_BUILTIN_PSUBQ256,
28063 IX86_BUILTIN_PSUBSB256,
28064 IX86_BUILTIN_PSUBSW256,
28065 IX86_BUILTIN_PSUBUSB256,
28066 IX86_BUILTIN_PSUBUSW256,
28067 IX86_BUILTIN_PUNPCKHBW256,
28068 IX86_BUILTIN_PUNPCKHWD256,
28069 IX86_BUILTIN_PUNPCKHDQ256,
28070 IX86_BUILTIN_PUNPCKHQDQ256,
28071 IX86_BUILTIN_PUNPCKLBW256,
28072 IX86_BUILTIN_PUNPCKLWD256,
28073 IX86_BUILTIN_PUNPCKLDQ256,
28074 IX86_BUILTIN_PUNPCKLQDQ256,
28075 IX86_BUILTIN_PXOR256,
28076 IX86_BUILTIN_MOVNTDQA256,
28077 IX86_BUILTIN_VBROADCASTSS_PS,
28078 IX86_BUILTIN_VBROADCASTSS_PS256,
28079 IX86_BUILTIN_VBROADCASTSD_PD256,
28080 IX86_BUILTIN_VBROADCASTSI256,
28081 IX86_BUILTIN_PBLENDD256,
28082 IX86_BUILTIN_PBLENDD128,
28083 IX86_BUILTIN_PBROADCASTB256,
28084 IX86_BUILTIN_PBROADCASTW256,
28085 IX86_BUILTIN_PBROADCASTD256,
28086 IX86_BUILTIN_PBROADCASTQ256,
28087 IX86_BUILTIN_PBROADCASTB128,
28088 IX86_BUILTIN_PBROADCASTW128,
28089 IX86_BUILTIN_PBROADCASTD128,
28090 IX86_BUILTIN_PBROADCASTQ128,
28091 IX86_BUILTIN_VPERMVARSI256,
28092 IX86_BUILTIN_VPERMDF256,
28093 IX86_BUILTIN_VPERMVARSF256,
28094 IX86_BUILTIN_VPERMDI256,
28095 IX86_BUILTIN_VPERMTI256,
28096 IX86_BUILTIN_VEXTRACT128I256,
28097 IX86_BUILTIN_VINSERT128I256,
28098 IX86_BUILTIN_MASKLOADD,
28099 IX86_BUILTIN_MASKLOADQ,
28100 IX86_BUILTIN_MASKLOADD256,
28101 IX86_BUILTIN_MASKLOADQ256,
28102 IX86_BUILTIN_MASKSTORED,
28103 IX86_BUILTIN_MASKSTOREQ,
28104 IX86_BUILTIN_MASKSTORED256,
28105 IX86_BUILTIN_MASKSTOREQ256,
28106 IX86_BUILTIN_PSLLVV4DI,
28107 IX86_BUILTIN_PSLLVV2DI,
28108 IX86_BUILTIN_PSLLVV8SI,
28109 IX86_BUILTIN_PSLLVV4SI,
28110 IX86_BUILTIN_PSRAVV8SI,
28111 IX86_BUILTIN_PSRAVV4SI,
28112 IX86_BUILTIN_PSRLVV4DI,
28113 IX86_BUILTIN_PSRLVV2DI,
28114 IX86_BUILTIN_PSRLVV8SI,
28115 IX86_BUILTIN_PSRLVV4SI,
28116
28117 IX86_BUILTIN_GATHERSIV2DF,
28118 IX86_BUILTIN_GATHERSIV4DF,
28119 IX86_BUILTIN_GATHERDIV2DF,
28120 IX86_BUILTIN_GATHERDIV4DF,
28121 IX86_BUILTIN_GATHERSIV4SF,
28122 IX86_BUILTIN_GATHERSIV8SF,
28123 IX86_BUILTIN_GATHERDIV4SF,
28124 IX86_BUILTIN_GATHERDIV8SF,
28125 IX86_BUILTIN_GATHERSIV2DI,
28126 IX86_BUILTIN_GATHERSIV4DI,
28127 IX86_BUILTIN_GATHERDIV2DI,
28128 IX86_BUILTIN_GATHERDIV4DI,
28129 IX86_BUILTIN_GATHERSIV4SI,
28130 IX86_BUILTIN_GATHERSIV8SI,
28131 IX86_BUILTIN_GATHERDIV4SI,
28132 IX86_BUILTIN_GATHERDIV8SI,
28133
28134 /* AVX512F */
28135 IX86_BUILTIN_SI512_SI256,
28136 IX86_BUILTIN_PD512_PD256,
28137 IX86_BUILTIN_PS512_PS256,
28138 IX86_BUILTIN_SI512_SI,
28139 IX86_BUILTIN_PD512_PD,
28140 IX86_BUILTIN_PS512_PS,
28141 IX86_BUILTIN_ADDPD512,
28142 IX86_BUILTIN_ADDPS512,
28143 IX86_BUILTIN_ADDSD_ROUND,
28144 IX86_BUILTIN_ADDSS_ROUND,
28145 IX86_BUILTIN_ALIGND512,
28146 IX86_BUILTIN_ALIGNQ512,
28147 IX86_BUILTIN_BLENDMD512,
28148 IX86_BUILTIN_BLENDMPD512,
28149 IX86_BUILTIN_BLENDMPS512,
28150 IX86_BUILTIN_BLENDMQ512,
28151 IX86_BUILTIN_BROADCASTF32X4_512,
28152 IX86_BUILTIN_BROADCASTF64X4_512,
28153 IX86_BUILTIN_BROADCASTI32X4_512,
28154 IX86_BUILTIN_BROADCASTI64X4_512,
28155 IX86_BUILTIN_BROADCASTSD512,
28156 IX86_BUILTIN_BROADCASTSS512,
28157 IX86_BUILTIN_CMPD512,
28158 IX86_BUILTIN_CMPPD512,
28159 IX86_BUILTIN_CMPPS512,
28160 IX86_BUILTIN_CMPQ512,
28161 IX86_BUILTIN_CMPSD_MASK,
28162 IX86_BUILTIN_CMPSS_MASK,
28163 IX86_BUILTIN_COMIDF,
28164 IX86_BUILTIN_COMISF,
28165 IX86_BUILTIN_COMPRESSPD512,
28166 IX86_BUILTIN_COMPRESSPDSTORE512,
28167 IX86_BUILTIN_COMPRESSPS512,
28168 IX86_BUILTIN_COMPRESSPSSTORE512,
28169 IX86_BUILTIN_CVTDQ2PD512,
28170 IX86_BUILTIN_CVTDQ2PS512,
28171 IX86_BUILTIN_CVTPD2DQ512,
28172 IX86_BUILTIN_CVTPD2PS512,
28173 IX86_BUILTIN_CVTPD2UDQ512,
28174 IX86_BUILTIN_CVTPH2PS512,
28175 IX86_BUILTIN_CVTPS2DQ512,
28176 IX86_BUILTIN_CVTPS2PD512,
28177 IX86_BUILTIN_CVTPS2PH512,
28178 IX86_BUILTIN_CVTPS2UDQ512,
28179 IX86_BUILTIN_CVTSD2SS_ROUND,
28180 IX86_BUILTIN_CVTSI2SD64,
28181 IX86_BUILTIN_CVTSI2SS32,
28182 IX86_BUILTIN_CVTSI2SS64,
28183 IX86_BUILTIN_CVTSS2SD_ROUND,
28184 IX86_BUILTIN_CVTTPD2DQ512,
28185 IX86_BUILTIN_CVTTPD2UDQ512,
28186 IX86_BUILTIN_CVTTPS2DQ512,
28187 IX86_BUILTIN_CVTTPS2UDQ512,
28188 IX86_BUILTIN_CVTUDQ2PD512,
28189 IX86_BUILTIN_CVTUDQ2PS512,
28190 IX86_BUILTIN_CVTUSI2SD32,
28191 IX86_BUILTIN_CVTUSI2SD64,
28192 IX86_BUILTIN_CVTUSI2SS32,
28193 IX86_BUILTIN_CVTUSI2SS64,
28194 IX86_BUILTIN_DIVPD512,
28195 IX86_BUILTIN_DIVPS512,
28196 IX86_BUILTIN_DIVSD_ROUND,
28197 IX86_BUILTIN_DIVSS_ROUND,
28198 IX86_BUILTIN_EXPANDPD512,
28199 IX86_BUILTIN_EXPANDPD512Z,
28200 IX86_BUILTIN_EXPANDPDLOAD512,
28201 IX86_BUILTIN_EXPANDPDLOAD512Z,
28202 IX86_BUILTIN_EXPANDPS512,
28203 IX86_BUILTIN_EXPANDPS512Z,
28204 IX86_BUILTIN_EXPANDPSLOAD512,
28205 IX86_BUILTIN_EXPANDPSLOAD512Z,
28206 IX86_BUILTIN_EXTRACTF32X4,
28207 IX86_BUILTIN_EXTRACTF64X4,
28208 IX86_BUILTIN_EXTRACTI32X4,
28209 IX86_BUILTIN_EXTRACTI64X4,
28210 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28211 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28212 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28213 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28214 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28215 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28216 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28217 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28218 IX86_BUILTIN_GETEXPPD512,
28219 IX86_BUILTIN_GETEXPPS512,
28220 IX86_BUILTIN_GETEXPSD128,
28221 IX86_BUILTIN_GETEXPSS128,
28222 IX86_BUILTIN_GETMANTPD512,
28223 IX86_BUILTIN_GETMANTPS512,
28224 IX86_BUILTIN_GETMANTSD128,
28225 IX86_BUILTIN_GETMANTSS128,
28226 IX86_BUILTIN_INSERTF32X4,
28227 IX86_BUILTIN_INSERTF64X4,
28228 IX86_BUILTIN_INSERTI32X4,
28229 IX86_BUILTIN_INSERTI64X4,
28230 IX86_BUILTIN_LOADAPD512,
28231 IX86_BUILTIN_LOADAPS512,
28232 IX86_BUILTIN_LOADDQUDI512,
28233 IX86_BUILTIN_LOADDQUSI512,
28234 IX86_BUILTIN_LOADUPD512,
28235 IX86_BUILTIN_LOADUPS512,
28236 IX86_BUILTIN_MAXPD512,
28237 IX86_BUILTIN_MAXPS512,
28238 IX86_BUILTIN_MAXSD_ROUND,
28239 IX86_BUILTIN_MAXSS_ROUND,
28240 IX86_BUILTIN_MINPD512,
28241 IX86_BUILTIN_MINPS512,
28242 IX86_BUILTIN_MINSD_ROUND,
28243 IX86_BUILTIN_MINSS_ROUND,
28244 IX86_BUILTIN_MOVAPD512,
28245 IX86_BUILTIN_MOVAPS512,
28246 IX86_BUILTIN_MOVDDUP512,
28247 IX86_BUILTIN_MOVDQA32LOAD512,
28248 IX86_BUILTIN_MOVDQA32STORE512,
28249 IX86_BUILTIN_MOVDQA32_512,
28250 IX86_BUILTIN_MOVDQA64LOAD512,
28251 IX86_BUILTIN_MOVDQA64STORE512,
28252 IX86_BUILTIN_MOVDQA64_512,
28253 IX86_BUILTIN_MOVNTDQ512,
28254 IX86_BUILTIN_MOVNTDQA512,
28255 IX86_BUILTIN_MOVNTPD512,
28256 IX86_BUILTIN_MOVNTPS512,
28257 IX86_BUILTIN_MOVSHDUP512,
28258 IX86_BUILTIN_MOVSLDUP512,
28259 IX86_BUILTIN_MULPD512,
28260 IX86_BUILTIN_MULPS512,
28261 IX86_BUILTIN_MULSD_ROUND,
28262 IX86_BUILTIN_MULSS_ROUND,
28263 IX86_BUILTIN_PABSD512,
28264 IX86_BUILTIN_PABSQ512,
28265 IX86_BUILTIN_PADDD512,
28266 IX86_BUILTIN_PADDQ512,
28267 IX86_BUILTIN_PANDD512,
28268 IX86_BUILTIN_PANDND512,
28269 IX86_BUILTIN_PANDNQ512,
28270 IX86_BUILTIN_PANDQ512,
28271 IX86_BUILTIN_PBROADCASTD512,
28272 IX86_BUILTIN_PBROADCASTD512_GPR,
28273 IX86_BUILTIN_PBROADCASTMB512,
28274 IX86_BUILTIN_PBROADCASTMW512,
28275 IX86_BUILTIN_PBROADCASTQ512,
28276 IX86_BUILTIN_PBROADCASTQ512_GPR,
28277 IX86_BUILTIN_PBROADCASTQ512_MEM,
28278 IX86_BUILTIN_PCMPEQD512_MASK,
28279 IX86_BUILTIN_PCMPEQQ512_MASK,
28280 IX86_BUILTIN_PCMPGTD512_MASK,
28281 IX86_BUILTIN_PCMPGTQ512_MASK,
28282 IX86_BUILTIN_PCOMPRESSD512,
28283 IX86_BUILTIN_PCOMPRESSDSTORE512,
28284 IX86_BUILTIN_PCOMPRESSQ512,
28285 IX86_BUILTIN_PCOMPRESSQSTORE512,
28286 IX86_BUILTIN_PEXPANDD512,
28287 IX86_BUILTIN_PEXPANDD512Z,
28288 IX86_BUILTIN_PEXPANDDLOAD512,
28289 IX86_BUILTIN_PEXPANDDLOAD512Z,
28290 IX86_BUILTIN_PEXPANDQ512,
28291 IX86_BUILTIN_PEXPANDQ512Z,
28292 IX86_BUILTIN_PEXPANDQLOAD512,
28293 IX86_BUILTIN_PEXPANDQLOAD512Z,
28294 IX86_BUILTIN_PMAXSD512,
28295 IX86_BUILTIN_PMAXSQ512,
28296 IX86_BUILTIN_PMAXUD512,
28297 IX86_BUILTIN_PMAXUQ512,
28298 IX86_BUILTIN_PMINSD512,
28299 IX86_BUILTIN_PMINSQ512,
28300 IX86_BUILTIN_PMINUD512,
28301 IX86_BUILTIN_PMINUQ512,
28302 IX86_BUILTIN_PMOVDB512,
28303 IX86_BUILTIN_PMOVDB512_MEM,
28304 IX86_BUILTIN_PMOVDW512,
28305 IX86_BUILTIN_PMOVDW512_MEM,
28306 IX86_BUILTIN_PMOVQB512,
28307 IX86_BUILTIN_PMOVQB512_MEM,
28308 IX86_BUILTIN_PMOVQD512,
28309 IX86_BUILTIN_PMOVQD512_MEM,
28310 IX86_BUILTIN_PMOVQW512,
28311 IX86_BUILTIN_PMOVQW512_MEM,
28312 IX86_BUILTIN_PMOVSDB512,
28313 IX86_BUILTIN_PMOVSDB512_MEM,
28314 IX86_BUILTIN_PMOVSDW512,
28315 IX86_BUILTIN_PMOVSDW512_MEM,
28316 IX86_BUILTIN_PMOVSQB512,
28317 IX86_BUILTIN_PMOVSQB512_MEM,
28318 IX86_BUILTIN_PMOVSQD512,
28319 IX86_BUILTIN_PMOVSQD512_MEM,
28320 IX86_BUILTIN_PMOVSQW512,
28321 IX86_BUILTIN_PMOVSQW512_MEM,
28322 IX86_BUILTIN_PMOVSXBD512,
28323 IX86_BUILTIN_PMOVSXBQ512,
28324 IX86_BUILTIN_PMOVSXDQ512,
28325 IX86_BUILTIN_PMOVSXWD512,
28326 IX86_BUILTIN_PMOVSXWQ512,
28327 IX86_BUILTIN_PMOVUSDB512,
28328 IX86_BUILTIN_PMOVUSDB512_MEM,
28329 IX86_BUILTIN_PMOVUSDW512,
28330 IX86_BUILTIN_PMOVUSDW512_MEM,
28331 IX86_BUILTIN_PMOVUSQB512,
28332 IX86_BUILTIN_PMOVUSQB512_MEM,
28333 IX86_BUILTIN_PMOVUSQD512,
28334 IX86_BUILTIN_PMOVUSQD512_MEM,
28335 IX86_BUILTIN_PMOVUSQW512,
28336 IX86_BUILTIN_PMOVUSQW512_MEM,
28337 IX86_BUILTIN_PMOVZXBD512,
28338 IX86_BUILTIN_PMOVZXBQ512,
28339 IX86_BUILTIN_PMOVZXDQ512,
28340 IX86_BUILTIN_PMOVZXWD512,
28341 IX86_BUILTIN_PMOVZXWQ512,
28342 IX86_BUILTIN_PMULDQ512,
28343 IX86_BUILTIN_PMULLD512,
28344 IX86_BUILTIN_PMULUDQ512,
28345 IX86_BUILTIN_PORD512,
28346 IX86_BUILTIN_PORQ512,
28347 IX86_BUILTIN_PROLD512,
28348 IX86_BUILTIN_PROLQ512,
28349 IX86_BUILTIN_PROLVD512,
28350 IX86_BUILTIN_PROLVQ512,
28351 IX86_BUILTIN_PRORD512,
28352 IX86_BUILTIN_PRORQ512,
28353 IX86_BUILTIN_PRORVD512,
28354 IX86_BUILTIN_PRORVQ512,
28355 IX86_BUILTIN_PSHUFD512,
28356 IX86_BUILTIN_PSLLD512,
28357 IX86_BUILTIN_PSLLDI512,
28358 IX86_BUILTIN_PSLLQ512,
28359 IX86_BUILTIN_PSLLQI512,
28360 IX86_BUILTIN_PSLLVV16SI,
28361 IX86_BUILTIN_PSLLVV8DI,
28362 IX86_BUILTIN_PSRAD512,
28363 IX86_BUILTIN_PSRADI512,
28364 IX86_BUILTIN_PSRAQ512,
28365 IX86_BUILTIN_PSRAQI512,
28366 IX86_BUILTIN_PSRAVV16SI,
28367 IX86_BUILTIN_PSRAVV8DI,
28368 IX86_BUILTIN_PSRLD512,
28369 IX86_BUILTIN_PSRLDI512,
28370 IX86_BUILTIN_PSRLQ512,
28371 IX86_BUILTIN_PSRLQI512,
28372 IX86_BUILTIN_PSRLVV16SI,
28373 IX86_BUILTIN_PSRLVV8DI,
28374 IX86_BUILTIN_PSUBD512,
28375 IX86_BUILTIN_PSUBQ512,
28376 IX86_BUILTIN_PTESTMD512,
28377 IX86_BUILTIN_PTESTMQ512,
28378 IX86_BUILTIN_PTESTNMD512,
28379 IX86_BUILTIN_PTESTNMQ512,
28380 IX86_BUILTIN_PUNPCKHDQ512,
28381 IX86_BUILTIN_PUNPCKHQDQ512,
28382 IX86_BUILTIN_PUNPCKLDQ512,
28383 IX86_BUILTIN_PUNPCKLQDQ512,
28384 IX86_BUILTIN_PXORD512,
28385 IX86_BUILTIN_PXORQ512,
28386 IX86_BUILTIN_RCP14PD512,
28387 IX86_BUILTIN_RCP14PS512,
28388 IX86_BUILTIN_RCP14SD,
28389 IX86_BUILTIN_RCP14SS,
28390 IX86_BUILTIN_RNDSCALEPD,
28391 IX86_BUILTIN_RNDSCALEPS,
28392 IX86_BUILTIN_RNDSCALESD,
28393 IX86_BUILTIN_RNDSCALESS,
28394 IX86_BUILTIN_RSQRT14PD512,
28395 IX86_BUILTIN_RSQRT14PS512,
28396 IX86_BUILTIN_RSQRT14SD,
28397 IX86_BUILTIN_RSQRT14SS,
28398 IX86_BUILTIN_SCALEFPD512,
28399 IX86_BUILTIN_SCALEFPS512,
28400 IX86_BUILTIN_SCALEFSD,
28401 IX86_BUILTIN_SCALEFSS,
28402 IX86_BUILTIN_SHUFPD512,
28403 IX86_BUILTIN_SHUFPS512,
28404 IX86_BUILTIN_SHUF_F32x4,
28405 IX86_BUILTIN_SHUF_F64x2,
28406 IX86_BUILTIN_SHUF_I32x4,
28407 IX86_BUILTIN_SHUF_I64x2,
28408 IX86_BUILTIN_SQRTPD512,
28409 IX86_BUILTIN_SQRTPD512_MASK,
28410 IX86_BUILTIN_SQRTPS512_MASK,
28411 IX86_BUILTIN_SQRTPS_NR512,
28412 IX86_BUILTIN_SQRTSD_ROUND,
28413 IX86_BUILTIN_SQRTSS_ROUND,
28414 IX86_BUILTIN_STOREAPD512,
28415 IX86_BUILTIN_STOREAPS512,
28416 IX86_BUILTIN_STOREDQUDI512,
28417 IX86_BUILTIN_STOREDQUSI512,
28418 IX86_BUILTIN_STOREUPD512,
28419 IX86_BUILTIN_STOREUPS512,
28420 IX86_BUILTIN_SUBPD512,
28421 IX86_BUILTIN_SUBPS512,
28422 IX86_BUILTIN_SUBSD_ROUND,
28423 IX86_BUILTIN_SUBSS_ROUND,
28424 IX86_BUILTIN_UCMPD512,
28425 IX86_BUILTIN_UCMPQ512,
28426 IX86_BUILTIN_UNPCKHPD512,
28427 IX86_BUILTIN_UNPCKHPS512,
28428 IX86_BUILTIN_UNPCKLPD512,
28429 IX86_BUILTIN_UNPCKLPS512,
28430 IX86_BUILTIN_VCVTSD2SI32,
28431 IX86_BUILTIN_VCVTSD2SI64,
28432 IX86_BUILTIN_VCVTSD2USI32,
28433 IX86_BUILTIN_VCVTSD2USI64,
28434 IX86_BUILTIN_VCVTSS2SI32,
28435 IX86_BUILTIN_VCVTSS2SI64,
28436 IX86_BUILTIN_VCVTSS2USI32,
28437 IX86_BUILTIN_VCVTSS2USI64,
28438 IX86_BUILTIN_VCVTTSD2SI32,
28439 IX86_BUILTIN_VCVTTSD2SI64,
28440 IX86_BUILTIN_VCVTTSD2USI32,
28441 IX86_BUILTIN_VCVTTSD2USI64,
28442 IX86_BUILTIN_VCVTTSS2SI32,
28443 IX86_BUILTIN_VCVTTSS2SI64,
28444 IX86_BUILTIN_VCVTTSS2USI32,
28445 IX86_BUILTIN_VCVTTSS2USI64,
28446 IX86_BUILTIN_VFMADDPD512_MASK,
28447 IX86_BUILTIN_VFMADDPD512_MASK3,
28448 IX86_BUILTIN_VFMADDPD512_MASKZ,
28449 IX86_BUILTIN_VFMADDPS512_MASK,
28450 IX86_BUILTIN_VFMADDPS512_MASK3,
28451 IX86_BUILTIN_VFMADDPS512_MASKZ,
28452 IX86_BUILTIN_VFMADDSD3_ROUND,
28453 IX86_BUILTIN_VFMADDSS3_ROUND,
28454 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28455 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28456 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28457 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28458 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28459 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28460 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28461 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28462 IX86_BUILTIN_VFMSUBPD512_MASK3,
28463 IX86_BUILTIN_VFMSUBPS512_MASK3,
28464 IX86_BUILTIN_VFMSUBSD3_MASK3,
28465 IX86_BUILTIN_VFMSUBSS3_MASK3,
28466 IX86_BUILTIN_VFNMADDPD512_MASK,
28467 IX86_BUILTIN_VFNMADDPS512_MASK,
28468 IX86_BUILTIN_VFNMSUBPD512_MASK,
28469 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28470 IX86_BUILTIN_VFNMSUBPS512_MASK,
28471 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28472 IX86_BUILTIN_VPCLZCNTD512,
28473 IX86_BUILTIN_VPCLZCNTQ512,
28474 IX86_BUILTIN_VPCONFLICTD512,
28475 IX86_BUILTIN_VPCONFLICTQ512,
28476 IX86_BUILTIN_VPERMDF512,
28477 IX86_BUILTIN_VPERMDI512,
28478 IX86_BUILTIN_VPERMI2VARD512,
28479 IX86_BUILTIN_VPERMI2VARPD512,
28480 IX86_BUILTIN_VPERMI2VARPS512,
28481 IX86_BUILTIN_VPERMI2VARQ512,
28482 IX86_BUILTIN_VPERMILPD512,
28483 IX86_BUILTIN_VPERMILPS512,
28484 IX86_BUILTIN_VPERMILVARPD512,
28485 IX86_BUILTIN_VPERMILVARPS512,
28486 IX86_BUILTIN_VPERMT2VARD512,
28487 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28488 IX86_BUILTIN_VPERMT2VARPD512,
28489 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28490 IX86_BUILTIN_VPERMT2VARPS512,
28491 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28492 IX86_BUILTIN_VPERMT2VARQ512,
28493 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28494 IX86_BUILTIN_VPERMVARDF512,
28495 IX86_BUILTIN_VPERMVARDI512,
28496 IX86_BUILTIN_VPERMVARSF512,
28497 IX86_BUILTIN_VPERMVARSI512,
28498 IX86_BUILTIN_VTERNLOGD512_MASK,
28499 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28500 IX86_BUILTIN_VTERNLOGQ512_MASK,
28501 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28502
28503 /* Mask arithmetic operations */
28504 IX86_BUILTIN_KAND16,
28505 IX86_BUILTIN_KANDN16,
28506 IX86_BUILTIN_KNOT16,
28507 IX86_BUILTIN_KOR16,
28508 IX86_BUILTIN_KORTESTC16,
28509 IX86_BUILTIN_KORTESTZ16,
28510 IX86_BUILTIN_KUNPCKBW,
28511 IX86_BUILTIN_KXNOR16,
28512 IX86_BUILTIN_KXOR16,
28513 IX86_BUILTIN_KMOV16,
28514
28515 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28516 where all operands are 32-byte or 64-byte wide respectively. */
28517 IX86_BUILTIN_GATHERALTSIV4DF,
28518 IX86_BUILTIN_GATHERALTDIV8SF,
28519 IX86_BUILTIN_GATHERALTSIV4DI,
28520 IX86_BUILTIN_GATHERALTDIV8SI,
28521 IX86_BUILTIN_GATHER3ALTDIV16SF,
28522 IX86_BUILTIN_GATHER3ALTDIV16SI,
28523 IX86_BUILTIN_GATHER3ALTSIV8DF,
28524 IX86_BUILTIN_GATHER3ALTSIV8DI,
28525 IX86_BUILTIN_GATHER3DIV16SF,
28526 IX86_BUILTIN_GATHER3DIV16SI,
28527 IX86_BUILTIN_GATHER3DIV8DF,
28528 IX86_BUILTIN_GATHER3DIV8DI,
28529 IX86_BUILTIN_GATHER3SIV16SF,
28530 IX86_BUILTIN_GATHER3SIV16SI,
28531 IX86_BUILTIN_GATHER3SIV8DF,
28532 IX86_BUILTIN_GATHER3SIV8DI,
28533 IX86_BUILTIN_SCATTERDIV16SF,
28534 IX86_BUILTIN_SCATTERDIV16SI,
28535 IX86_BUILTIN_SCATTERDIV8DF,
28536 IX86_BUILTIN_SCATTERDIV8DI,
28537 IX86_BUILTIN_SCATTERSIV16SF,
28538 IX86_BUILTIN_SCATTERSIV16SI,
28539 IX86_BUILTIN_SCATTERSIV8DF,
28540 IX86_BUILTIN_SCATTERSIV8DI,
28541
28542 /* AVX512PF */
28543 IX86_BUILTIN_GATHERPFQPD,
28544 IX86_BUILTIN_GATHERPFDPS,
28545 IX86_BUILTIN_GATHERPFDPD,
28546 IX86_BUILTIN_GATHERPFQPS,
28547 IX86_BUILTIN_SCATTERPFDPD,
28548 IX86_BUILTIN_SCATTERPFDPS,
28549 IX86_BUILTIN_SCATTERPFQPD,
28550 IX86_BUILTIN_SCATTERPFQPS,
28551
28552 /* AVX-512ER */
28553 IX86_BUILTIN_EXP2PD_MASK,
28554 IX86_BUILTIN_EXP2PS_MASK,
28555 IX86_BUILTIN_EXP2PS,
28556 IX86_BUILTIN_RCP28PD,
28557 IX86_BUILTIN_RCP28PS,
28558 IX86_BUILTIN_RCP28SD,
28559 IX86_BUILTIN_RCP28SS,
28560 IX86_BUILTIN_RSQRT28PD,
28561 IX86_BUILTIN_RSQRT28PS,
28562 IX86_BUILTIN_RSQRT28SD,
28563 IX86_BUILTIN_RSQRT28SS,
28564
28565 /* SHA builtins. */
28566 IX86_BUILTIN_SHA1MSG1,
28567 IX86_BUILTIN_SHA1MSG2,
28568 IX86_BUILTIN_SHA1NEXTE,
28569 IX86_BUILTIN_SHA1RNDS4,
28570 IX86_BUILTIN_SHA256MSG1,
28571 IX86_BUILTIN_SHA256MSG2,
28572 IX86_BUILTIN_SHA256RNDS2,
28573
28574 /* CLFLUSHOPT instructions. */
28575 IX86_BUILTIN_CLFLUSHOPT,
28576
28577 /* TFmode support builtins. */
28578 IX86_BUILTIN_INFQ,
28579 IX86_BUILTIN_HUGE_VALQ,
28580 IX86_BUILTIN_FABSQ,
28581 IX86_BUILTIN_COPYSIGNQ,
28582
28583 /* Vectorizer support builtins. */
28584 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28585 IX86_BUILTIN_CPYSGNPS,
28586 IX86_BUILTIN_CPYSGNPD,
28587 IX86_BUILTIN_CPYSGNPS256,
28588 IX86_BUILTIN_CPYSGNPS512,
28589 IX86_BUILTIN_CPYSGNPD256,
28590 IX86_BUILTIN_CPYSGNPD512,
28591 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28592 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28593
28594
28595 /* FMA4 instructions. */
28596 IX86_BUILTIN_VFMADDSS,
28597 IX86_BUILTIN_VFMADDSD,
28598 IX86_BUILTIN_VFMADDPS,
28599 IX86_BUILTIN_VFMADDPD,
28600 IX86_BUILTIN_VFMADDPS256,
28601 IX86_BUILTIN_VFMADDPD256,
28602 IX86_BUILTIN_VFMADDSUBPS,
28603 IX86_BUILTIN_VFMADDSUBPD,
28604 IX86_BUILTIN_VFMADDSUBPS256,
28605 IX86_BUILTIN_VFMADDSUBPD256,
28606
28607 /* FMA3 instructions. */
28608 IX86_BUILTIN_VFMADDSS3,
28609 IX86_BUILTIN_VFMADDSD3,
28610
28611 /* XOP instructions. */
28612 IX86_BUILTIN_VPCMOV,
28613 IX86_BUILTIN_VPCMOV_V2DI,
28614 IX86_BUILTIN_VPCMOV_V4SI,
28615 IX86_BUILTIN_VPCMOV_V8HI,
28616 IX86_BUILTIN_VPCMOV_V16QI,
28617 IX86_BUILTIN_VPCMOV_V4SF,
28618 IX86_BUILTIN_VPCMOV_V2DF,
28619 IX86_BUILTIN_VPCMOV256,
28620 IX86_BUILTIN_VPCMOV_V4DI256,
28621 IX86_BUILTIN_VPCMOV_V8SI256,
28622 IX86_BUILTIN_VPCMOV_V16HI256,
28623 IX86_BUILTIN_VPCMOV_V32QI256,
28624 IX86_BUILTIN_VPCMOV_V8SF256,
28625 IX86_BUILTIN_VPCMOV_V4DF256,
28626
28627 IX86_BUILTIN_VPPERM,
28628
28629 IX86_BUILTIN_VPMACSSWW,
28630 IX86_BUILTIN_VPMACSWW,
28631 IX86_BUILTIN_VPMACSSWD,
28632 IX86_BUILTIN_VPMACSWD,
28633 IX86_BUILTIN_VPMACSSDD,
28634 IX86_BUILTIN_VPMACSDD,
28635 IX86_BUILTIN_VPMACSSDQL,
28636 IX86_BUILTIN_VPMACSSDQH,
28637 IX86_BUILTIN_VPMACSDQL,
28638 IX86_BUILTIN_VPMACSDQH,
28639 IX86_BUILTIN_VPMADCSSWD,
28640 IX86_BUILTIN_VPMADCSWD,
28641
28642 IX86_BUILTIN_VPHADDBW,
28643 IX86_BUILTIN_VPHADDBD,
28644 IX86_BUILTIN_VPHADDBQ,
28645 IX86_BUILTIN_VPHADDWD,
28646 IX86_BUILTIN_VPHADDWQ,
28647 IX86_BUILTIN_VPHADDDQ,
28648 IX86_BUILTIN_VPHADDUBW,
28649 IX86_BUILTIN_VPHADDUBD,
28650 IX86_BUILTIN_VPHADDUBQ,
28651 IX86_BUILTIN_VPHADDUWD,
28652 IX86_BUILTIN_VPHADDUWQ,
28653 IX86_BUILTIN_VPHADDUDQ,
28654 IX86_BUILTIN_VPHSUBBW,
28655 IX86_BUILTIN_VPHSUBWD,
28656 IX86_BUILTIN_VPHSUBDQ,
28657
28658 IX86_BUILTIN_VPROTB,
28659 IX86_BUILTIN_VPROTW,
28660 IX86_BUILTIN_VPROTD,
28661 IX86_BUILTIN_VPROTQ,
28662 IX86_BUILTIN_VPROTB_IMM,
28663 IX86_BUILTIN_VPROTW_IMM,
28664 IX86_BUILTIN_VPROTD_IMM,
28665 IX86_BUILTIN_VPROTQ_IMM,
28666
28667 IX86_BUILTIN_VPSHLB,
28668 IX86_BUILTIN_VPSHLW,
28669 IX86_BUILTIN_VPSHLD,
28670 IX86_BUILTIN_VPSHLQ,
28671 IX86_BUILTIN_VPSHAB,
28672 IX86_BUILTIN_VPSHAW,
28673 IX86_BUILTIN_VPSHAD,
28674 IX86_BUILTIN_VPSHAQ,
28675
28676 IX86_BUILTIN_VFRCZSS,
28677 IX86_BUILTIN_VFRCZSD,
28678 IX86_BUILTIN_VFRCZPS,
28679 IX86_BUILTIN_VFRCZPD,
28680 IX86_BUILTIN_VFRCZPS256,
28681 IX86_BUILTIN_VFRCZPD256,
28682
28683 IX86_BUILTIN_VPCOMEQUB,
28684 IX86_BUILTIN_VPCOMNEUB,
28685 IX86_BUILTIN_VPCOMLTUB,
28686 IX86_BUILTIN_VPCOMLEUB,
28687 IX86_BUILTIN_VPCOMGTUB,
28688 IX86_BUILTIN_VPCOMGEUB,
28689 IX86_BUILTIN_VPCOMFALSEUB,
28690 IX86_BUILTIN_VPCOMTRUEUB,
28691
28692 IX86_BUILTIN_VPCOMEQUW,
28693 IX86_BUILTIN_VPCOMNEUW,
28694 IX86_BUILTIN_VPCOMLTUW,
28695 IX86_BUILTIN_VPCOMLEUW,
28696 IX86_BUILTIN_VPCOMGTUW,
28697 IX86_BUILTIN_VPCOMGEUW,
28698 IX86_BUILTIN_VPCOMFALSEUW,
28699 IX86_BUILTIN_VPCOMTRUEUW,
28700
28701 IX86_BUILTIN_VPCOMEQUD,
28702 IX86_BUILTIN_VPCOMNEUD,
28703 IX86_BUILTIN_VPCOMLTUD,
28704 IX86_BUILTIN_VPCOMLEUD,
28705 IX86_BUILTIN_VPCOMGTUD,
28706 IX86_BUILTIN_VPCOMGEUD,
28707 IX86_BUILTIN_VPCOMFALSEUD,
28708 IX86_BUILTIN_VPCOMTRUEUD,
28709
28710 IX86_BUILTIN_VPCOMEQUQ,
28711 IX86_BUILTIN_VPCOMNEUQ,
28712 IX86_BUILTIN_VPCOMLTUQ,
28713 IX86_BUILTIN_VPCOMLEUQ,
28714 IX86_BUILTIN_VPCOMGTUQ,
28715 IX86_BUILTIN_VPCOMGEUQ,
28716 IX86_BUILTIN_VPCOMFALSEUQ,
28717 IX86_BUILTIN_VPCOMTRUEUQ,
28718
28719 IX86_BUILTIN_VPCOMEQB,
28720 IX86_BUILTIN_VPCOMNEB,
28721 IX86_BUILTIN_VPCOMLTB,
28722 IX86_BUILTIN_VPCOMLEB,
28723 IX86_BUILTIN_VPCOMGTB,
28724 IX86_BUILTIN_VPCOMGEB,
28725 IX86_BUILTIN_VPCOMFALSEB,
28726 IX86_BUILTIN_VPCOMTRUEB,
28727
28728 IX86_BUILTIN_VPCOMEQW,
28729 IX86_BUILTIN_VPCOMNEW,
28730 IX86_BUILTIN_VPCOMLTW,
28731 IX86_BUILTIN_VPCOMLEW,
28732 IX86_BUILTIN_VPCOMGTW,
28733 IX86_BUILTIN_VPCOMGEW,
28734 IX86_BUILTIN_VPCOMFALSEW,
28735 IX86_BUILTIN_VPCOMTRUEW,
28736
28737 IX86_BUILTIN_VPCOMEQD,
28738 IX86_BUILTIN_VPCOMNED,
28739 IX86_BUILTIN_VPCOMLTD,
28740 IX86_BUILTIN_VPCOMLED,
28741 IX86_BUILTIN_VPCOMGTD,
28742 IX86_BUILTIN_VPCOMGED,
28743 IX86_BUILTIN_VPCOMFALSED,
28744 IX86_BUILTIN_VPCOMTRUED,
28745
28746 IX86_BUILTIN_VPCOMEQQ,
28747 IX86_BUILTIN_VPCOMNEQ,
28748 IX86_BUILTIN_VPCOMLTQ,
28749 IX86_BUILTIN_VPCOMLEQ,
28750 IX86_BUILTIN_VPCOMGTQ,
28751 IX86_BUILTIN_VPCOMGEQ,
28752 IX86_BUILTIN_VPCOMFALSEQ,
28753 IX86_BUILTIN_VPCOMTRUEQ,
28754
28755 /* LWP instructions. */
28756 IX86_BUILTIN_LLWPCB,
28757 IX86_BUILTIN_SLWPCB,
28758 IX86_BUILTIN_LWPVAL32,
28759 IX86_BUILTIN_LWPVAL64,
28760 IX86_BUILTIN_LWPINS32,
28761 IX86_BUILTIN_LWPINS64,
28762
28763 IX86_BUILTIN_CLZS,
28764
28765 /* RTM */
28766 IX86_BUILTIN_XBEGIN,
28767 IX86_BUILTIN_XEND,
28768 IX86_BUILTIN_XABORT,
28769 IX86_BUILTIN_XTEST,
28770
28771 /* BMI instructions. */
28772 IX86_BUILTIN_BEXTR32,
28773 IX86_BUILTIN_BEXTR64,
28774 IX86_BUILTIN_CTZS,
28775
28776 /* TBM instructions. */
28777 IX86_BUILTIN_BEXTRI32,
28778 IX86_BUILTIN_BEXTRI64,
28779
28780 /* BMI2 instructions. */
28781 IX86_BUILTIN_BZHI32,
28782 IX86_BUILTIN_BZHI64,
28783 IX86_BUILTIN_PDEP32,
28784 IX86_BUILTIN_PDEP64,
28785 IX86_BUILTIN_PEXT32,
28786 IX86_BUILTIN_PEXT64,
28787
28788 /* ADX instructions. */
28789 IX86_BUILTIN_ADDCARRYX32,
28790 IX86_BUILTIN_ADDCARRYX64,
28791
28792 /* FSGSBASE instructions. */
28793 IX86_BUILTIN_RDFSBASE32,
28794 IX86_BUILTIN_RDFSBASE64,
28795 IX86_BUILTIN_RDGSBASE32,
28796 IX86_BUILTIN_RDGSBASE64,
28797 IX86_BUILTIN_WRFSBASE32,
28798 IX86_BUILTIN_WRFSBASE64,
28799 IX86_BUILTIN_WRGSBASE32,
28800 IX86_BUILTIN_WRGSBASE64,
28801
28802 /* RDRND instructions. */
28803 IX86_BUILTIN_RDRAND16_STEP,
28804 IX86_BUILTIN_RDRAND32_STEP,
28805 IX86_BUILTIN_RDRAND64_STEP,
28806
28807 /* RDSEED instructions. */
28808 IX86_BUILTIN_RDSEED16_STEP,
28809 IX86_BUILTIN_RDSEED32_STEP,
28810 IX86_BUILTIN_RDSEED64_STEP,
28811
28812 /* F16C instructions. */
28813 IX86_BUILTIN_CVTPH2PS,
28814 IX86_BUILTIN_CVTPH2PS256,
28815 IX86_BUILTIN_CVTPS2PH,
28816 IX86_BUILTIN_CVTPS2PH256,
28817
28818 /* CFString built-in for darwin */
28819 IX86_BUILTIN_CFSTRING,
28820
28821 /* Builtins to get CPU type and supported features. */
28822 IX86_BUILTIN_CPU_INIT,
28823 IX86_BUILTIN_CPU_IS,
28824 IX86_BUILTIN_CPU_SUPPORTS,
28825
28826 /* Read/write FLAGS register built-ins. */
28827 IX86_BUILTIN_READ_FLAGS,
28828 IX86_BUILTIN_WRITE_FLAGS,
28829
28830 IX86_BUILTIN_MAX
28831 };
28832
28833 /* Table for the ix86 builtin decls. */
28834 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28835
28836 /* Table of all of the builtin functions that are possible with different ISA's
28837 but are waiting to be built until a function is declared to use that
28838 ISA. */
28839 struct builtin_isa {
28840 const char *name; /* function name */
28841 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28842 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28843 bool const_p; /* true if the declaration is constant */
28844 bool set_and_not_built_p;
28845 };
28846
28847 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28848
28849
28850 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28851 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28852 function decl in the ix86_builtins array. Returns the function decl or
28853 NULL_TREE, if the builtin was not added.
28854
28855 If the front end has a special hook for builtin functions, delay adding
28856 builtin functions that aren't in the current ISA until the ISA is changed
28857 with function specific optimization. Doing so, can save about 300K for the
28858 default compiler. When the builtin is expanded, check at that time whether
28859 it is valid.
28860
28861 If the front end doesn't have a special hook, record all builtins, even if
28862 it isn't an instruction set in the current ISA in case the user uses
28863 function specific options for a different ISA, so that we don't get scope
28864 errors if a builtin is added in the middle of a function scope. */
28865
28866 static inline tree
28867 def_builtin (HOST_WIDE_INT mask, const char *name,
28868 enum ix86_builtin_func_type tcode,
28869 enum ix86_builtins code)
28870 {
28871 tree decl = NULL_TREE;
28872
28873 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28874 {
28875 ix86_builtins_isa[(int) code].isa = mask;
28876
28877 mask &= ~OPTION_MASK_ISA_64BIT;
28878 if (mask == 0
28879 || (mask & ix86_isa_flags) != 0
28880 || (lang_hooks.builtin_function
28881 == lang_hooks.builtin_function_ext_scope))
28882
28883 {
28884 tree type = ix86_get_builtin_func_type (tcode);
28885 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28886 NULL, NULL_TREE);
28887 ix86_builtins[(int) code] = decl;
28888 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28889 }
28890 else
28891 {
28892 ix86_builtins[(int) code] = NULL_TREE;
28893 ix86_builtins_isa[(int) code].tcode = tcode;
28894 ix86_builtins_isa[(int) code].name = name;
28895 ix86_builtins_isa[(int) code].const_p = false;
28896 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28897 }
28898 }
28899
28900 return decl;
28901 }
28902
28903 /* Like def_builtin, but also marks the function decl "const". */
28904
28905 static inline tree
28906 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28907 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28908 {
28909 tree decl = def_builtin (mask, name, tcode, code);
28910 if (decl)
28911 TREE_READONLY (decl) = 1;
28912 else
28913 ix86_builtins_isa[(int) code].const_p = true;
28914
28915 return decl;
28916 }
28917
28918 /* Add any new builtin functions for a given ISA that may not have been
28919 declared. This saves a bit of space compared to adding all of the
28920 declarations to the tree, even if we didn't use them. */
28921
28922 static void
28923 ix86_add_new_builtins (HOST_WIDE_INT isa)
28924 {
28925 int i;
28926
28927 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28928 {
28929 if ((ix86_builtins_isa[i].isa & isa) != 0
28930 && ix86_builtins_isa[i].set_and_not_built_p)
28931 {
28932 tree decl, type;
28933
28934 /* Don't define the builtin again. */
28935 ix86_builtins_isa[i].set_and_not_built_p = false;
28936
28937 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28938 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28939 type, i, BUILT_IN_MD, NULL,
28940 NULL_TREE);
28941
28942 ix86_builtins[i] = decl;
28943 if (ix86_builtins_isa[i].const_p)
28944 TREE_READONLY (decl) = 1;
28945 }
28946 }
28947 }
28948
28949 /* Bits for builtin_description.flag. */
28950
28951 /* Set when we don't support the comparison natively, and should
28952 swap_comparison in order to support it. */
28953 #define BUILTIN_DESC_SWAP_OPERANDS 1
28954
28955 struct builtin_description
28956 {
28957 const HOST_WIDE_INT mask;
28958 const enum insn_code icode;
28959 const char *const name;
28960 const enum ix86_builtins code;
28961 const enum rtx_code comparison;
28962 const int flag;
28963 };
28964
28965 static const struct builtin_description bdesc_comi[] =
28966 {
28967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28991 };
28992
28993 static const struct builtin_description bdesc_pcmpestr[] =
28994 {
28995 /* SSE4.2 */
28996 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28997 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28998 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28999 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29000 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29001 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29002 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29003 };
29004
29005 static const struct builtin_description bdesc_pcmpistr[] =
29006 {
29007 /* SSE4.2 */
29008 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29009 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29010 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29011 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29012 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29013 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29014 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29015 };
29016
29017 /* Special builtins with variable number of arguments. */
29018 static const struct builtin_description bdesc_special_args[] =
29019 {
29020 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29021 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29022 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29023
29024 /* 80387 (for use internally for atomic compound assignment). */
29025 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29026 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29027 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29028 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29029
29030 /* MMX */
29031 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29032
29033 /* 3DNow! */
29034 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29035
29036 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29037 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29038 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29039 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29040 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29041 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29042 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29043 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29044 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29045
29046 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29047 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29048 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29049 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29050 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29051 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29052 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29053 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29054
29055 /* SSE */
29056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29059
29060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29064
29065 /* SSE or 3DNow!A */
29066 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29067 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29068
29069 /* SSE2 */
29070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29077 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29080
29081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29083
29084 /* SSE3 */
29085 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29086
29087 /* SSE4.1 */
29088 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29089
29090 /* SSE4A */
29091 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29092 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29093
29094 /* AVX */
29095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29097
29098 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29099 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29100 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29103
29104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29111
29112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29115
29116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29124
29125 /* AVX2 */
29126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29135
29136 /* AVX512F */
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29184
29185 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29186 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29187 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29188 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29189 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29190 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29191
29192 /* FSGSBASE */
29193 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29194 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29195 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29196 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29197 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29198 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29199 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29200 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29201
29202 /* RTM */
29203 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29204 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29205 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29206 };
29207
29208 /* Builtins with variable number of arguments. */
29209 static const struct builtin_description bdesc_args[] =
29210 {
29211 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29212 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29213 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29214 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29215 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29216 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29217 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29218
29219 /* MMX */
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29226
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29235
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29238
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29243
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29250
29251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29257
29258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29261
29262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29263
29264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29270
29271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29277
29278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29282
29283 /* 3DNow! */
29284 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29285 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29286 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29287 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29288
29289 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29290 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29291 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29292 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29293 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29294 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29295 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29296 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29297 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29298 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29299 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29300 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29301 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29302 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29303 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29304
29305 /* 3DNow!A */
29306 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29307 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29308 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29309 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29310 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29311 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29312
29313 /* SSE */
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29322 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29325 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29326
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29328
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29358
29359 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29360 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29363
29364 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29366 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29367 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29368
29369 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29370
29371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29374 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29375 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29376
29377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29379 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29380
29381 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29382
29383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29386
29387 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29388 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29389
29390 /* SSE MMX or 3Dnow!A */
29391 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29392 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29393 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29394
29395 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29396 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29397 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29398 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29399
29400 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29401 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29402
29403 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29404
29405 /* SSE2 */
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29407
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29413
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29419
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29421
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29424 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29425 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29426
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29430
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29460
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29465
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29470
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29472
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29476
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29478
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29487
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29496
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29499
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29504
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29507
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29514
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29519
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29528
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29532
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29535
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29538
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29540
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29542 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29545
29546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29547 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29548 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29549 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29550 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29551 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29552 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29553
29554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29555 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29556 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29557 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29558 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29559 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29560 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29561
29562 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29563 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29564 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29565 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29566
29567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29570
29571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29572
29573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29574
29575 /* SSE2 MMX */
29576 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29577 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29578
29579 /* SSE3 */
29580 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29581 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29582
29583 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29584 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29585 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29586 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29587 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29588 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29589
29590 /* SSSE3 */
29591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29597
29598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29602 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29611 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29612 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29613 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29614 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29615 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29616 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29617 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29618 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29619 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29620 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29621 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29622
29623 /* SSSE3. */
29624 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29625 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29626
29627 /* SSE4.1 */
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29638
29639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29642 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29652
29653 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29655 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29656 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29657 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29658 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29659 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29660 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29661 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29662 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29663 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29664 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29665
29666 /* SSE4.1 */
29667 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29668 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29669 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29670 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29671
29672 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29673 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29674 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29675 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29676
29677 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29678 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29679
29680 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29681 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29682
29683 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29684 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29685 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29686 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29687
29688 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29689 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29690
29691 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29692 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29693
29694 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29695 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29696 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29697
29698 /* SSE4.2 */
29699 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29700 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29701 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29702 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29703 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29704
29705 /* SSE4A */
29706 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29707 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29708 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29709 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29710
29711 /* AES */
29712 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29713 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29714
29715 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29716 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29717 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29718 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29719
29720 /* PCLMUL */
29721 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29722
29723 /* AVX */
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29750
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29755
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29790
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29794
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29800
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29802
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29805
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29810
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29813
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29816
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29821
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29824
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29826 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29827
29828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29832
29833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29836 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29837 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29838 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29839
29840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29855
29856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29858
29859 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29860 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29861
29862 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29863
29864 /* AVX2 */
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29979 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29999 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30001 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30002 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30011
30012 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30013
30014 /* BMI */
30015 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30016 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30017 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30018
30019 /* TBM */
30020 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30021 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30022
30023 /* F16C */
30024 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30025 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30026 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30027 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30028
30029 /* BMI2 */
30030 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30031 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30032 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30033 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30034 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30035 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30036
30037 /* AVX512F */
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30093 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30094 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30205 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30206 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30207 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30208 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30235
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30240 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30244
30245 /* Mask arithmetic operations */
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30256
30257 /* SHA */
30258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30265 };
30266
30267 /* Builtins with rounding support. */
30268 static const struct builtin_description bdesc_round_args[] =
30269 {
30270 /* AVX512F */
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30290 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30292 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30299 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30301 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30351 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30353 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30355 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30357 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30359 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30361 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30363 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30365 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30390
30391 /* AVX512ER */
30392 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30393 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30394 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30395 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30396 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30397 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30398 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30399 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30400 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30401 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30402 };
30403
30404 /* FMA4 and XOP. */
30405 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30406 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30407 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30408 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30409 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30410 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30411 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30412 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30413 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30414 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30415 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30416 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30417 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30418 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30419 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30420 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30421 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30422 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30423 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30424 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30425 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30426 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30427 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30428 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30429 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30430 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30431 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30432 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30433 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30434 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30435 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30436 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30437 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30438 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30439 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30440 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30441 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30442 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30443 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30444 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30445 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30446 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30447 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30448 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30449 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30450 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30451 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30452 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30453 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30454 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30455 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30456 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30457
30458 static const struct builtin_description bdesc_multi_arg[] =
30459 {
30460 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30461 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30462 UNKNOWN, (int)MULTI_ARG_3_SF },
30463 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30464 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30465 UNKNOWN, (int)MULTI_ARG_3_DF },
30466
30467 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30468 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30469 UNKNOWN, (int)MULTI_ARG_3_SF },
30470 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30471 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30472 UNKNOWN, (int)MULTI_ARG_3_DF },
30473
30474 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30475 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30476 UNKNOWN, (int)MULTI_ARG_3_SF },
30477 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30478 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30479 UNKNOWN, (int)MULTI_ARG_3_DF },
30480 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30481 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30482 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30483 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30484 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30485 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30486
30487 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30488 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30489 UNKNOWN, (int)MULTI_ARG_3_SF },
30490 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30491 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30492 UNKNOWN, (int)MULTI_ARG_3_DF },
30493 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30494 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30495 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30496 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30497 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30498 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30499
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30507
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30515
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30517
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30530
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30547
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30554
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30570
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30578
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30586
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30594
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30602
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30610
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30618
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30626
30627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30634
30635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30643
30644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30652
30653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30657
30658 };
30659 \f
30660 /* TM vector builtins. */
30661
30662 /* Reuse the existing x86-specific `struct builtin_description' cause
30663 we're lazy. Add casts to make them fit. */
30664 static const struct builtin_description bdesc_tm[] =
30665 {
30666 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30667 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30668 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30669 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30670 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30671 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30672 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30673
30674 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30675 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30676 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30677 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30678 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30679 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30680 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30681
30682 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30683 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30684 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30685 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30686 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30687 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30688 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30689
30690 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30691 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30692 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30693 };
30694
30695 /* TM callbacks. */
30696
30697 /* Return the builtin decl needed to load a vector of TYPE. */
30698
30699 static tree
30700 ix86_builtin_tm_load (tree type)
30701 {
30702 if (TREE_CODE (type) == VECTOR_TYPE)
30703 {
30704 switch (tree_to_uhwi (TYPE_SIZE (type)))
30705 {
30706 case 64:
30707 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30708 case 128:
30709 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30710 case 256:
30711 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30712 }
30713 }
30714 return NULL_TREE;
30715 }
30716
30717 /* Return the builtin decl needed to store a vector of TYPE. */
30718
30719 static tree
30720 ix86_builtin_tm_store (tree type)
30721 {
30722 if (TREE_CODE (type) == VECTOR_TYPE)
30723 {
30724 switch (tree_to_uhwi (TYPE_SIZE (type)))
30725 {
30726 case 64:
30727 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30728 case 128:
30729 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30730 case 256:
30731 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30732 }
30733 }
30734 return NULL_TREE;
30735 }
30736 \f
30737 /* Initialize the transactional memory vector load/store builtins. */
30738
30739 static void
30740 ix86_init_tm_builtins (void)
30741 {
30742 enum ix86_builtin_func_type ftype;
30743 const struct builtin_description *d;
30744 size_t i;
30745 tree decl;
30746 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30747 tree attrs_log, attrs_type_log;
30748
30749 if (!flag_tm)
30750 return;
30751
30752 /* If there are no builtins defined, we must be compiling in a
30753 language without trans-mem support. */
30754 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30755 return;
30756
30757 /* Use whatever attributes a normal TM load has. */
30758 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30759 attrs_load = DECL_ATTRIBUTES (decl);
30760 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30761 /* Use whatever attributes a normal TM store has. */
30762 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30763 attrs_store = DECL_ATTRIBUTES (decl);
30764 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30765 /* Use whatever attributes a normal TM log has. */
30766 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30767 attrs_log = DECL_ATTRIBUTES (decl);
30768 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30769
30770 for (i = 0, d = bdesc_tm;
30771 i < ARRAY_SIZE (bdesc_tm);
30772 i++, d++)
30773 {
30774 if ((d->mask & ix86_isa_flags) != 0
30775 || (lang_hooks.builtin_function
30776 == lang_hooks.builtin_function_ext_scope))
30777 {
30778 tree type, attrs, attrs_type;
30779 enum built_in_function code = (enum built_in_function) d->code;
30780
30781 ftype = (enum ix86_builtin_func_type) d->flag;
30782 type = ix86_get_builtin_func_type (ftype);
30783
30784 if (BUILTIN_TM_LOAD_P (code))
30785 {
30786 attrs = attrs_load;
30787 attrs_type = attrs_type_load;
30788 }
30789 else if (BUILTIN_TM_STORE_P (code))
30790 {
30791 attrs = attrs_store;
30792 attrs_type = attrs_type_store;
30793 }
30794 else
30795 {
30796 attrs = attrs_log;
30797 attrs_type = attrs_type_log;
30798 }
30799 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30800 /* The builtin without the prefix for
30801 calling it directly. */
30802 d->name + strlen ("__builtin_"),
30803 attrs);
30804 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30805 set the TYPE_ATTRIBUTES. */
30806 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30807
30808 set_builtin_decl (code, decl, false);
30809 }
30810 }
30811 }
30812
30813 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30814 in the current target ISA to allow the user to compile particular modules
30815 with different target specific options that differ from the command line
30816 options. */
30817 static void
30818 ix86_init_mmx_sse_builtins (void)
30819 {
30820 const struct builtin_description * d;
30821 enum ix86_builtin_func_type ftype;
30822 size_t i;
30823
30824 /* Add all special builtins with variable number of operands. */
30825 for (i = 0, d = bdesc_special_args;
30826 i < ARRAY_SIZE (bdesc_special_args);
30827 i++, d++)
30828 {
30829 if (d->name == 0)
30830 continue;
30831
30832 ftype = (enum ix86_builtin_func_type) d->flag;
30833 def_builtin (d->mask, d->name, ftype, d->code);
30834 }
30835
30836 /* Add all builtins with variable number of operands. */
30837 for (i = 0, d = bdesc_args;
30838 i < ARRAY_SIZE (bdesc_args);
30839 i++, d++)
30840 {
30841 if (d->name == 0)
30842 continue;
30843
30844 ftype = (enum ix86_builtin_func_type) d->flag;
30845 def_builtin_const (d->mask, d->name, ftype, d->code);
30846 }
30847
30848 /* Add all builtins with rounding. */
30849 for (i = 0, d = bdesc_round_args;
30850 i < ARRAY_SIZE (bdesc_round_args);
30851 i++, d++)
30852 {
30853 if (d->name == 0)
30854 continue;
30855
30856 ftype = (enum ix86_builtin_func_type) d->flag;
30857 def_builtin_const (d->mask, d->name, ftype, d->code);
30858 }
30859
30860 /* pcmpestr[im] insns. */
30861 for (i = 0, d = bdesc_pcmpestr;
30862 i < ARRAY_SIZE (bdesc_pcmpestr);
30863 i++, d++)
30864 {
30865 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30866 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30867 else
30868 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30869 def_builtin_const (d->mask, d->name, ftype, d->code);
30870 }
30871
30872 /* pcmpistr[im] insns. */
30873 for (i = 0, d = bdesc_pcmpistr;
30874 i < ARRAY_SIZE (bdesc_pcmpistr);
30875 i++, d++)
30876 {
30877 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30878 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30879 else
30880 ftype = INT_FTYPE_V16QI_V16QI_INT;
30881 def_builtin_const (d->mask, d->name, ftype, d->code);
30882 }
30883
30884 /* comi/ucomi insns. */
30885 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30886 {
30887 if (d->mask == OPTION_MASK_ISA_SSE2)
30888 ftype = INT_FTYPE_V2DF_V2DF;
30889 else
30890 ftype = INT_FTYPE_V4SF_V4SF;
30891 def_builtin_const (d->mask, d->name, ftype, d->code);
30892 }
30893
30894 /* SSE */
30895 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30896 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30897 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30898 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30899
30900 /* SSE or 3DNow!A */
30901 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30902 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30903 IX86_BUILTIN_MASKMOVQ);
30904
30905 /* SSE2 */
30906 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30907 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30908
30909 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30910 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30911 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30912 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30913
30914 /* SSE3. */
30915 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30916 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30917 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30918 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30919
30920 /* AES */
30921 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30922 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30923 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30924 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30925 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30926 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30927 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30928 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30929 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30930 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30931 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30932 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30933
30934 /* PCLMUL */
30935 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30936 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30937
30938 /* RDRND */
30939 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30940 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30941 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30942 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30943 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30944 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30945 IX86_BUILTIN_RDRAND64_STEP);
30946
30947 /* AVX2 */
30948 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30949 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30950 IX86_BUILTIN_GATHERSIV2DF);
30951
30952 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30953 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30954 IX86_BUILTIN_GATHERSIV4DF);
30955
30956 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30957 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30958 IX86_BUILTIN_GATHERDIV2DF);
30959
30960 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30961 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30962 IX86_BUILTIN_GATHERDIV4DF);
30963
30964 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30965 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30966 IX86_BUILTIN_GATHERSIV4SF);
30967
30968 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30969 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30970 IX86_BUILTIN_GATHERSIV8SF);
30971
30972 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30973 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30974 IX86_BUILTIN_GATHERDIV4SF);
30975
30976 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30977 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30978 IX86_BUILTIN_GATHERDIV8SF);
30979
30980 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30981 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30982 IX86_BUILTIN_GATHERSIV2DI);
30983
30984 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30985 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30986 IX86_BUILTIN_GATHERSIV4DI);
30987
30988 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30989 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30990 IX86_BUILTIN_GATHERDIV2DI);
30991
30992 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30993 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30994 IX86_BUILTIN_GATHERDIV4DI);
30995
30996 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30997 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30998 IX86_BUILTIN_GATHERSIV4SI);
30999
31000 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31001 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31002 IX86_BUILTIN_GATHERSIV8SI);
31003
31004 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31005 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31006 IX86_BUILTIN_GATHERDIV4SI);
31007
31008 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31009 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31010 IX86_BUILTIN_GATHERDIV8SI);
31011
31012 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31013 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31014 IX86_BUILTIN_GATHERALTSIV4DF);
31015
31016 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31017 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31018 IX86_BUILTIN_GATHERALTDIV8SF);
31019
31020 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31021 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31022 IX86_BUILTIN_GATHERALTSIV4DI);
31023
31024 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31025 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31026 IX86_BUILTIN_GATHERALTDIV8SI);
31027
31028 /* AVX512F */
31029 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31030 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31031 IX86_BUILTIN_GATHER3SIV16SF);
31032
31033 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31034 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31035 IX86_BUILTIN_GATHER3SIV8DF);
31036
31037 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31038 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31039 IX86_BUILTIN_GATHER3DIV16SF);
31040
31041 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31042 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31043 IX86_BUILTIN_GATHER3DIV8DF);
31044
31045 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31046 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31047 IX86_BUILTIN_GATHER3SIV16SI);
31048
31049 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31050 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31051 IX86_BUILTIN_GATHER3SIV8DI);
31052
31053 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31054 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31055 IX86_BUILTIN_GATHER3DIV16SI);
31056
31057 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31058 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31059 IX86_BUILTIN_GATHER3DIV8DI);
31060
31061 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31062 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31063 IX86_BUILTIN_GATHER3ALTSIV8DF);
31064
31065 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31066 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31067 IX86_BUILTIN_GATHER3ALTDIV16SF);
31068
31069 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31070 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31071 IX86_BUILTIN_GATHER3ALTSIV8DI);
31072
31073 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31074 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31075 IX86_BUILTIN_GATHER3ALTDIV16SI);
31076
31077 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31078 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31079 IX86_BUILTIN_SCATTERSIV16SF);
31080
31081 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31082 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31083 IX86_BUILTIN_SCATTERSIV8DF);
31084
31085 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31086 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31087 IX86_BUILTIN_SCATTERDIV16SF);
31088
31089 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31090 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31091 IX86_BUILTIN_SCATTERDIV8DF);
31092
31093 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31094 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31095 IX86_BUILTIN_SCATTERSIV16SI);
31096
31097 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31098 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31099 IX86_BUILTIN_SCATTERSIV8DI);
31100
31101 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31102 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31103 IX86_BUILTIN_SCATTERDIV16SI);
31104
31105 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31106 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31107 IX86_BUILTIN_SCATTERDIV8DI);
31108
31109 /* AVX512PF */
31110 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31111 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31112 IX86_BUILTIN_GATHERPFDPD);
31113 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31114 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31115 IX86_BUILTIN_GATHERPFDPS);
31116 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31117 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31118 IX86_BUILTIN_GATHERPFQPD);
31119 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31120 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31121 IX86_BUILTIN_GATHERPFQPS);
31122 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31123 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31124 IX86_BUILTIN_SCATTERPFDPD);
31125 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31126 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31127 IX86_BUILTIN_SCATTERPFDPS);
31128 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31129 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31130 IX86_BUILTIN_SCATTERPFQPD);
31131 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31132 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31133 IX86_BUILTIN_SCATTERPFQPS);
31134
31135 /* SHA */
31136 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31137 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31138 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31139 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31140 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31141 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31142 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31143 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31144 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31145 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31146 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31147 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31148 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31149 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31150
31151 /* RTM. */
31152 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31153 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31154
31155 /* MMX access to the vec_init patterns. */
31156 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31157 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31158
31159 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31160 V4HI_FTYPE_HI_HI_HI_HI,
31161 IX86_BUILTIN_VEC_INIT_V4HI);
31162
31163 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31164 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31165 IX86_BUILTIN_VEC_INIT_V8QI);
31166
31167 /* Access to the vec_extract patterns. */
31168 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31169 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31170 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31171 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31172 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31173 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31174 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31175 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31176 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31177 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31178
31179 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31180 "__builtin_ia32_vec_ext_v4hi",
31181 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31182
31183 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31184 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31185
31186 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31187 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31188
31189 /* Access to the vec_set patterns. */
31190 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31191 "__builtin_ia32_vec_set_v2di",
31192 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31193
31194 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31195 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31196
31197 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31198 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31199
31200 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31201 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31202
31203 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31204 "__builtin_ia32_vec_set_v4hi",
31205 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31206
31207 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31208 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31209
31210 /* RDSEED */
31211 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31212 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31213 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31214 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31215 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31216 "__builtin_ia32_rdseed_di_step",
31217 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31218
31219 /* ADCX */
31220 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31221 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31222 def_builtin (OPTION_MASK_ISA_64BIT,
31223 "__builtin_ia32_addcarryx_u64",
31224 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31225 IX86_BUILTIN_ADDCARRYX64);
31226
31227 /* Read/write FLAGS. */
31228 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31229 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31230 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31231 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31232 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31233 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31234 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31235 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31236
31237 /* CLFLUSHOPT. */
31238 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31239 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31240
31241 /* Add FMA4 multi-arg argument instructions */
31242 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31243 {
31244 if (d->name == 0)
31245 continue;
31246
31247 ftype = (enum ix86_builtin_func_type) d->flag;
31248 def_builtin_const (d->mask, d->name, ftype, d->code);
31249 }
31250 }
31251
31252 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31253 to return a pointer to VERSION_DECL if the outcome of the expression
31254 formed by PREDICATE_CHAIN is true. This function will be called during
31255 version dispatch to decide which function version to execute. It returns
31256 the basic block at the end, to which more conditions can be added. */
31257
31258 static basic_block
31259 add_condition_to_bb (tree function_decl, tree version_decl,
31260 tree predicate_chain, basic_block new_bb)
31261 {
31262 gimple return_stmt;
31263 tree convert_expr, result_var;
31264 gimple convert_stmt;
31265 gimple call_cond_stmt;
31266 gimple if_else_stmt;
31267
31268 basic_block bb1, bb2, bb3;
31269 edge e12, e23;
31270
31271 tree cond_var, and_expr_var = NULL_TREE;
31272 gimple_seq gseq;
31273
31274 tree predicate_decl, predicate_arg;
31275
31276 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31277
31278 gcc_assert (new_bb != NULL);
31279 gseq = bb_seq (new_bb);
31280
31281
31282 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31283 build_fold_addr_expr (version_decl));
31284 result_var = create_tmp_var (ptr_type_node, NULL);
31285 convert_stmt = gimple_build_assign (result_var, convert_expr);
31286 return_stmt = gimple_build_return (result_var);
31287
31288 if (predicate_chain == NULL_TREE)
31289 {
31290 gimple_seq_add_stmt (&gseq, convert_stmt);
31291 gimple_seq_add_stmt (&gseq, return_stmt);
31292 set_bb_seq (new_bb, gseq);
31293 gimple_set_bb (convert_stmt, new_bb);
31294 gimple_set_bb (return_stmt, new_bb);
31295 pop_cfun ();
31296 return new_bb;
31297 }
31298
31299 while (predicate_chain != NULL)
31300 {
31301 cond_var = create_tmp_var (integer_type_node, NULL);
31302 predicate_decl = TREE_PURPOSE (predicate_chain);
31303 predicate_arg = TREE_VALUE (predicate_chain);
31304 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31305 gimple_call_set_lhs (call_cond_stmt, cond_var);
31306
31307 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31308 gimple_set_bb (call_cond_stmt, new_bb);
31309 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31310
31311 predicate_chain = TREE_CHAIN (predicate_chain);
31312
31313 if (and_expr_var == NULL)
31314 and_expr_var = cond_var;
31315 else
31316 {
31317 gimple assign_stmt;
31318 /* Use MIN_EXPR to check if any integer is zero?.
31319 and_expr_var = min_expr <cond_var, and_expr_var> */
31320 assign_stmt = gimple_build_assign (and_expr_var,
31321 build2 (MIN_EXPR, integer_type_node,
31322 cond_var, and_expr_var));
31323
31324 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31325 gimple_set_bb (assign_stmt, new_bb);
31326 gimple_seq_add_stmt (&gseq, assign_stmt);
31327 }
31328 }
31329
31330 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31331 integer_zero_node,
31332 NULL_TREE, NULL_TREE);
31333 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31334 gimple_set_bb (if_else_stmt, new_bb);
31335 gimple_seq_add_stmt (&gseq, if_else_stmt);
31336
31337 gimple_seq_add_stmt (&gseq, convert_stmt);
31338 gimple_seq_add_stmt (&gseq, return_stmt);
31339 set_bb_seq (new_bb, gseq);
31340
31341 bb1 = new_bb;
31342 e12 = split_block (bb1, if_else_stmt);
31343 bb2 = e12->dest;
31344 e12->flags &= ~EDGE_FALLTHRU;
31345 e12->flags |= EDGE_TRUE_VALUE;
31346
31347 e23 = split_block (bb2, return_stmt);
31348
31349 gimple_set_bb (convert_stmt, bb2);
31350 gimple_set_bb (return_stmt, bb2);
31351
31352 bb3 = e23->dest;
31353 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31354
31355 remove_edge (e23);
31356 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31357
31358 pop_cfun ();
31359
31360 return bb3;
31361 }
31362
31363 /* This parses the attribute arguments to target in DECL and determines
31364 the right builtin to use to match the platform specification.
31365 It returns the priority value for this version decl. If PREDICATE_LIST
31366 is not NULL, it stores the list of cpu features that need to be checked
31367 before dispatching this function. */
31368
31369 static unsigned int
31370 get_builtin_code_for_version (tree decl, tree *predicate_list)
31371 {
31372 tree attrs;
31373 struct cl_target_option cur_target;
31374 tree target_node;
31375 struct cl_target_option *new_target;
31376 const char *arg_str = NULL;
31377 const char *attrs_str = NULL;
31378 char *tok_str = NULL;
31379 char *token;
31380
31381 /* Priority of i386 features, greater value is higher priority. This is
31382 used to decide the order in which function dispatch must happen. For
31383 instance, a version specialized for SSE4.2 should be checked for dispatch
31384 before a version for SSE3, as SSE4.2 implies SSE3. */
31385 enum feature_priority
31386 {
31387 P_ZERO = 0,
31388 P_MMX,
31389 P_SSE,
31390 P_SSE2,
31391 P_SSE3,
31392 P_SSSE3,
31393 P_PROC_SSSE3,
31394 P_SSE4_A,
31395 P_PROC_SSE4_A,
31396 P_SSE4_1,
31397 P_SSE4_2,
31398 P_PROC_SSE4_2,
31399 P_POPCNT,
31400 P_AVX,
31401 P_PROC_AVX,
31402 P_FMA4,
31403 P_XOP,
31404 P_PROC_XOP,
31405 P_FMA,
31406 P_PROC_FMA,
31407 P_AVX2,
31408 P_PROC_AVX2
31409 };
31410
31411 enum feature_priority priority = P_ZERO;
31412
31413 /* These are the target attribute strings for which a dispatcher is
31414 available, from fold_builtin_cpu. */
31415
31416 static struct _feature_list
31417 {
31418 const char *const name;
31419 const enum feature_priority priority;
31420 }
31421 const feature_list[] =
31422 {
31423 {"mmx", P_MMX},
31424 {"sse", P_SSE},
31425 {"sse2", P_SSE2},
31426 {"sse3", P_SSE3},
31427 {"sse4a", P_SSE4_A},
31428 {"ssse3", P_SSSE3},
31429 {"sse4.1", P_SSE4_1},
31430 {"sse4.2", P_SSE4_2},
31431 {"popcnt", P_POPCNT},
31432 {"avx", P_AVX},
31433 {"fma4", P_FMA4},
31434 {"xop", P_XOP},
31435 {"fma", P_FMA},
31436 {"avx2", P_AVX2}
31437 };
31438
31439
31440 static unsigned int NUM_FEATURES
31441 = sizeof (feature_list) / sizeof (struct _feature_list);
31442
31443 unsigned int i;
31444
31445 tree predicate_chain = NULL_TREE;
31446 tree predicate_decl, predicate_arg;
31447
31448 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31449 gcc_assert (attrs != NULL);
31450
31451 attrs = TREE_VALUE (TREE_VALUE (attrs));
31452
31453 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31454 attrs_str = TREE_STRING_POINTER (attrs);
31455
31456 /* Return priority zero for default function. */
31457 if (strcmp (attrs_str, "default") == 0)
31458 return 0;
31459
31460 /* Handle arch= if specified. For priority, set it to be 1 more than
31461 the best instruction set the processor can handle. For instance, if
31462 there is a version for atom and a version for ssse3 (the highest ISA
31463 priority for atom), the atom version must be checked for dispatch
31464 before the ssse3 version. */
31465 if (strstr (attrs_str, "arch=") != NULL)
31466 {
31467 cl_target_option_save (&cur_target, &global_options);
31468 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31469 &global_options_set);
31470
31471 gcc_assert (target_node);
31472 new_target = TREE_TARGET_OPTION (target_node);
31473 gcc_assert (new_target);
31474
31475 if (new_target->arch_specified && new_target->arch > 0)
31476 {
31477 switch (new_target->arch)
31478 {
31479 case PROCESSOR_CORE2:
31480 arg_str = "core2";
31481 priority = P_PROC_SSSE3;
31482 break;
31483 case PROCESSOR_NEHALEM:
31484 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31485 arg_str = "westmere";
31486 else
31487 /* We translate "arch=corei7" and "arch=nehalem" to
31488 "corei7" so that it will be mapped to M_INTEL_COREI7
31489 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31490 arg_str = "corei7";
31491 priority = P_PROC_SSE4_2;
31492 break;
31493 case PROCESSOR_SANDYBRIDGE:
31494 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31495 arg_str = "ivybridge";
31496 else
31497 arg_str = "sandybridge";
31498 priority = P_PROC_AVX;
31499 break;
31500 case PROCESSOR_HASWELL:
31501 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31502 arg_str = "broadwell";
31503 else
31504 arg_str = "haswell";
31505 priority = P_PROC_AVX2;
31506 break;
31507 case PROCESSOR_BONNELL:
31508 arg_str = "bonnell";
31509 priority = P_PROC_SSSE3;
31510 break;
31511 case PROCESSOR_SILVERMONT:
31512 arg_str = "silvermont";
31513 priority = P_PROC_SSE4_2;
31514 break;
31515 case PROCESSOR_AMDFAM10:
31516 arg_str = "amdfam10h";
31517 priority = P_PROC_SSE4_A;
31518 break;
31519 case PROCESSOR_BTVER1:
31520 arg_str = "btver1";
31521 priority = P_PROC_SSE4_A;
31522 break;
31523 case PROCESSOR_BTVER2:
31524 arg_str = "btver2";
31525 priority = P_PROC_AVX;
31526 break;
31527 case PROCESSOR_BDVER1:
31528 arg_str = "bdver1";
31529 priority = P_PROC_XOP;
31530 break;
31531 case PROCESSOR_BDVER2:
31532 arg_str = "bdver2";
31533 priority = P_PROC_FMA;
31534 break;
31535 case PROCESSOR_BDVER3:
31536 arg_str = "bdver3";
31537 priority = P_PROC_FMA;
31538 break;
31539 case PROCESSOR_BDVER4:
31540 arg_str = "bdver4";
31541 priority = P_PROC_AVX2;
31542 break;
31543 }
31544 }
31545
31546 cl_target_option_restore (&global_options, &cur_target);
31547
31548 if (predicate_list && arg_str == NULL)
31549 {
31550 error_at (DECL_SOURCE_LOCATION (decl),
31551 "No dispatcher found for the versioning attributes");
31552 return 0;
31553 }
31554
31555 if (predicate_list)
31556 {
31557 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31558 /* For a C string literal the length includes the trailing NULL. */
31559 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31560 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31561 predicate_chain);
31562 }
31563 }
31564
31565 /* Process feature name. */
31566 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31567 strcpy (tok_str, attrs_str);
31568 token = strtok (tok_str, ",");
31569 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31570
31571 while (token != NULL)
31572 {
31573 /* Do not process "arch=" */
31574 if (strncmp (token, "arch=", 5) == 0)
31575 {
31576 token = strtok (NULL, ",");
31577 continue;
31578 }
31579 for (i = 0; i < NUM_FEATURES; ++i)
31580 {
31581 if (strcmp (token, feature_list[i].name) == 0)
31582 {
31583 if (predicate_list)
31584 {
31585 predicate_arg = build_string_literal (
31586 strlen (feature_list[i].name) + 1,
31587 feature_list[i].name);
31588 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31589 predicate_chain);
31590 }
31591 /* Find the maximum priority feature. */
31592 if (feature_list[i].priority > priority)
31593 priority = feature_list[i].priority;
31594
31595 break;
31596 }
31597 }
31598 if (predicate_list && i == NUM_FEATURES)
31599 {
31600 error_at (DECL_SOURCE_LOCATION (decl),
31601 "No dispatcher found for %s", token);
31602 return 0;
31603 }
31604 token = strtok (NULL, ",");
31605 }
31606 free (tok_str);
31607
31608 if (predicate_list && predicate_chain == NULL_TREE)
31609 {
31610 error_at (DECL_SOURCE_LOCATION (decl),
31611 "No dispatcher found for the versioning attributes : %s",
31612 attrs_str);
31613 return 0;
31614 }
31615 else if (predicate_list)
31616 {
31617 predicate_chain = nreverse (predicate_chain);
31618 *predicate_list = predicate_chain;
31619 }
31620
31621 return priority;
31622 }
31623
31624 /* This compares the priority of target features in function DECL1
31625 and DECL2. It returns positive value if DECL1 is higher priority,
31626 negative value if DECL2 is higher priority and 0 if they are the
31627 same. */
31628
31629 static int
31630 ix86_compare_version_priority (tree decl1, tree decl2)
31631 {
31632 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31633 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31634
31635 return (int)priority1 - (int)priority2;
31636 }
31637
31638 /* V1 and V2 point to function versions with different priorities
31639 based on the target ISA. This function compares their priorities. */
31640
31641 static int
31642 feature_compare (const void *v1, const void *v2)
31643 {
31644 typedef struct _function_version_info
31645 {
31646 tree version_decl;
31647 tree predicate_chain;
31648 unsigned int dispatch_priority;
31649 } function_version_info;
31650
31651 const function_version_info c1 = *(const function_version_info *)v1;
31652 const function_version_info c2 = *(const function_version_info *)v2;
31653 return (c2.dispatch_priority - c1.dispatch_priority);
31654 }
31655
31656 /* This function generates the dispatch function for
31657 multi-versioned functions. DISPATCH_DECL is the function which will
31658 contain the dispatch logic. FNDECLS are the function choices for
31659 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31660 in DISPATCH_DECL in which the dispatch code is generated. */
31661
31662 static int
31663 dispatch_function_versions (tree dispatch_decl,
31664 void *fndecls_p,
31665 basic_block *empty_bb)
31666 {
31667 tree default_decl;
31668 gimple ifunc_cpu_init_stmt;
31669 gimple_seq gseq;
31670 int ix;
31671 tree ele;
31672 vec<tree> *fndecls;
31673 unsigned int num_versions = 0;
31674 unsigned int actual_versions = 0;
31675 unsigned int i;
31676
31677 struct _function_version_info
31678 {
31679 tree version_decl;
31680 tree predicate_chain;
31681 unsigned int dispatch_priority;
31682 }*function_version_info;
31683
31684 gcc_assert (dispatch_decl != NULL
31685 && fndecls_p != NULL
31686 && empty_bb != NULL);
31687
31688 /*fndecls_p is actually a vector. */
31689 fndecls = static_cast<vec<tree> *> (fndecls_p);
31690
31691 /* At least one more version other than the default. */
31692 num_versions = fndecls->length ();
31693 gcc_assert (num_versions >= 2);
31694
31695 function_version_info = (struct _function_version_info *)
31696 XNEWVEC (struct _function_version_info, (num_versions - 1));
31697
31698 /* The first version in the vector is the default decl. */
31699 default_decl = (*fndecls)[0];
31700
31701 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31702
31703 gseq = bb_seq (*empty_bb);
31704 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31705 constructors, so explicity call __builtin_cpu_init here. */
31706 ifunc_cpu_init_stmt = gimple_build_call_vec (
31707 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31708 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31709 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31710 set_bb_seq (*empty_bb, gseq);
31711
31712 pop_cfun ();
31713
31714
31715 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31716 {
31717 tree version_decl = ele;
31718 tree predicate_chain = NULL_TREE;
31719 unsigned int priority;
31720 /* Get attribute string, parse it and find the right predicate decl.
31721 The predicate function could be a lengthy combination of many
31722 features, like arch-type and various isa-variants. */
31723 priority = get_builtin_code_for_version (version_decl,
31724 &predicate_chain);
31725
31726 if (predicate_chain == NULL_TREE)
31727 continue;
31728
31729 function_version_info [actual_versions].version_decl = version_decl;
31730 function_version_info [actual_versions].predicate_chain
31731 = predicate_chain;
31732 function_version_info [actual_versions].dispatch_priority = priority;
31733 actual_versions++;
31734 }
31735
31736 /* Sort the versions according to descending order of dispatch priority. The
31737 priority is based on the ISA. This is not a perfect solution. There
31738 could still be ambiguity. If more than one function version is suitable
31739 to execute, which one should be dispatched? In future, allow the user
31740 to specify a dispatch priority next to the version. */
31741 qsort (function_version_info, actual_versions,
31742 sizeof (struct _function_version_info), feature_compare);
31743
31744 for (i = 0; i < actual_versions; ++i)
31745 *empty_bb = add_condition_to_bb (dispatch_decl,
31746 function_version_info[i].version_decl,
31747 function_version_info[i].predicate_chain,
31748 *empty_bb);
31749
31750 /* dispatch default version at the end. */
31751 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31752 NULL, *empty_bb);
31753
31754 free (function_version_info);
31755 return 0;
31756 }
31757
31758 /* Comparator function to be used in qsort routine to sort attribute
31759 specification strings to "target". */
31760
31761 static int
31762 attr_strcmp (const void *v1, const void *v2)
31763 {
31764 const char *c1 = *(char *const*)v1;
31765 const char *c2 = *(char *const*)v2;
31766 return strcmp (c1, c2);
31767 }
31768
31769 /* ARGLIST is the argument to target attribute. This function tokenizes
31770 the comma separated arguments, sorts them and returns a string which
31771 is a unique identifier for the comma separated arguments. It also
31772 replaces non-identifier characters "=,-" with "_". */
31773
31774 static char *
31775 sorted_attr_string (tree arglist)
31776 {
31777 tree arg;
31778 size_t str_len_sum = 0;
31779 char **args = NULL;
31780 char *attr_str, *ret_str;
31781 char *attr = NULL;
31782 unsigned int argnum = 1;
31783 unsigned int i;
31784
31785 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31786 {
31787 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31788 size_t len = strlen (str);
31789 str_len_sum += len + 1;
31790 if (arg != arglist)
31791 argnum++;
31792 for (i = 0; i < strlen (str); i++)
31793 if (str[i] == ',')
31794 argnum++;
31795 }
31796
31797 attr_str = XNEWVEC (char, str_len_sum);
31798 str_len_sum = 0;
31799 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31800 {
31801 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31802 size_t len = strlen (str);
31803 memcpy (attr_str + str_len_sum, str, len);
31804 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31805 str_len_sum += len + 1;
31806 }
31807
31808 /* Replace "=,-" with "_". */
31809 for (i = 0; i < strlen (attr_str); i++)
31810 if (attr_str[i] == '=' || attr_str[i]== '-')
31811 attr_str[i] = '_';
31812
31813 if (argnum == 1)
31814 return attr_str;
31815
31816 args = XNEWVEC (char *, argnum);
31817
31818 i = 0;
31819 attr = strtok (attr_str, ",");
31820 while (attr != NULL)
31821 {
31822 args[i] = attr;
31823 i++;
31824 attr = strtok (NULL, ",");
31825 }
31826
31827 qsort (args, argnum, sizeof (char *), attr_strcmp);
31828
31829 ret_str = XNEWVEC (char, str_len_sum);
31830 str_len_sum = 0;
31831 for (i = 0; i < argnum; i++)
31832 {
31833 size_t len = strlen (args[i]);
31834 memcpy (ret_str + str_len_sum, args[i], len);
31835 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31836 str_len_sum += len + 1;
31837 }
31838
31839 XDELETEVEC (args);
31840 XDELETEVEC (attr_str);
31841 return ret_str;
31842 }
31843
31844 /* This function changes the assembler name for functions that are
31845 versions. If DECL is a function version and has a "target"
31846 attribute, it appends the attribute string to its assembler name. */
31847
31848 static tree
31849 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31850 {
31851 tree version_attr;
31852 const char *orig_name, *version_string;
31853 char *attr_str, *assembler_name;
31854
31855 if (DECL_DECLARED_INLINE_P (decl)
31856 && lookup_attribute ("gnu_inline",
31857 DECL_ATTRIBUTES (decl)))
31858 error_at (DECL_SOURCE_LOCATION (decl),
31859 "Function versions cannot be marked as gnu_inline,"
31860 " bodies have to be generated");
31861
31862 if (DECL_VIRTUAL_P (decl)
31863 || DECL_VINDEX (decl))
31864 sorry ("Virtual function multiversioning not supported");
31865
31866 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31867
31868 /* target attribute string cannot be NULL. */
31869 gcc_assert (version_attr != NULL_TREE);
31870
31871 orig_name = IDENTIFIER_POINTER (id);
31872 version_string
31873 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31874
31875 if (strcmp (version_string, "default") == 0)
31876 return id;
31877
31878 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31879 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31880
31881 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31882
31883 /* Allow assembler name to be modified if already set. */
31884 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31885 SET_DECL_RTL (decl, NULL);
31886
31887 tree ret = get_identifier (assembler_name);
31888 XDELETEVEC (attr_str);
31889 XDELETEVEC (assembler_name);
31890 return ret;
31891 }
31892
31893 /* This function returns true if FN1 and FN2 are versions of the same function,
31894 that is, the target strings of the function decls are different. This assumes
31895 that FN1 and FN2 have the same signature. */
31896
31897 static bool
31898 ix86_function_versions (tree fn1, tree fn2)
31899 {
31900 tree attr1, attr2;
31901 char *target1, *target2;
31902 bool result;
31903
31904 if (TREE_CODE (fn1) != FUNCTION_DECL
31905 || TREE_CODE (fn2) != FUNCTION_DECL)
31906 return false;
31907
31908 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31909 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31910
31911 /* At least one function decl should have the target attribute specified. */
31912 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31913 return false;
31914
31915 /* Diagnose missing target attribute if one of the decls is already
31916 multi-versioned. */
31917 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31918 {
31919 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31920 {
31921 if (attr2 != NULL_TREE)
31922 {
31923 tree tem = fn1;
31924 fn1 = fn2;
31925 fn2 = tem;
31926 attr1 = attr2;
31927 }
31928 error_at (DECL_SOURCE_LOCATION (fn2),
31929 "missing %<target%> attribute for multi-versioned %D",
31930 fn2);
31931 inform (DECL_SOURCE_LOCATION (fn1),
31932 "previous declaration of %D", fn1);
31933 /* Prevent diagnosing of the same error multiple times. */
31934 DECL_ATTRIBUTES (fn2)
31935 = tree_cons (get_identifier ("target"),
31936 copy_node (TREE_VALUE (attr1)),
31937 DECL_ATTRIBUTES (fn2));
31938 }
31939 return false;
31940 }
31941
31942 target1 = sorted_attr_string (TREE_VALUE (attr1));
31943 target2 = sorted_attr_string (TREE_VALUE (attr2));
31944
31945 /* The sorted target strings must be different for fn1 and fn2
31946 to be versions. */
31947 if (strcmp (target1, target2) == 0)
31948 result = false;
31949 else
31950 result = true;
31951
31952 XDELETEVEC (target1);
31953 XDELETEVEC (target2);
31954
31955 return result;
31956 }
31957
31958 static tree
31959 ix86_mangle_decl_assembler_name (tree decl, tree id)
31960 {
31961 /* For function version, add the target suffix to the assembler name. */
31962 if (TREE_CODE (decl) == FUNCTION_DECL
31963 && DECL_FUNCTION_VERSIONED (decl))
31964 id = ix86_mangle_function_version_assembler_name (decl, id);
31965 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31966 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31967 #endif
31968
31969 return id;
31970 }
31971
31972 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31973 is true, append the full path name of the source file. */
31974
31975 static char *
31976 make_name (tree decl, const char *suffix, bool make_unique)
31977 {
31978 char *global_var_name;
31979 int name_len;
31980 const char *name;
31981 const char *unique_name = NULL;
31982
31983 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31984
31985 /* Get a unique name that can be used globally without any chances
31986 of collision at link time. */
31987 if (make_unique)
31988 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31989
31990 name_len = strlen (name) + strlen (suffix) + 2;
31991
31992 if (make_unique)
31993 name_len += strlen (unique_name) + 1;
31994 global_var_name = XNEWVEC (char, name_len);
31995
31996 /* Use '.' to concatenate names as it is demangler friendly. */
31997 if (make_unique)
31998 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31999 suffix);
32000 else
32001 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32002
32003 return global_var_name;
32004 }
32005
32006 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32007
32008 /* Make a dispatcher declaration for the multi-versioned function DECL.
32009 Calls to DECL function will be replaced with calls to the dispatcher
32010 by the front-end. Return the decl created. */
32011
32012 static tree
32013 make_dispatcher_decl (const tree decl)
32014 {
32015 tree func_decl;
32016 char *func_name;
32017 tree fn_type, func_type;
32018 bool is_uniq = false;
32019
32020 if (TREE_PUBLIC (decl) == 0)
32021 is_uniq = true;
32022
32023 func_name = make_name (decl, "ifunc", is_uniq);
32024
32025 fn_type = TREE_TYPE (decl);
32026 func_type = build_function_type (TREE_TYPE (fn_type),
32027 TYPE_ARG_TYPES (fn_type));
32028
32029 func_decl = build_fn_decl (func_name, func_type);
32030 XDELETEVEC (func_name);
32031 TREE_USED (func_decl) = 1;
32032 DECL_CONTEXT (func_decl) = NULL_TREE;
32033 DECL_INITIAL (func_decl) = error_mark_node;
32034 DECL_ARTIFICIAL (func_decl) = 1;
32035 /* Mark this func as external, the resolver will flip it again if
32036 it gets generated. */
32037 DECL_EXTERNAL (func_decl) = 1;
32038 /* This will be of type IFUNCs have to be externally visible. */
32039 TREE_PUBLIC (func_decl) = 1;
32040
32041 return func_decl;
32042 }
32043
32044 #endif
32045
32046 /* Returns true if decl is multi-versioned and DECL is the default function,
32047 that is it is not tagged with target specific optimization. */
32048
32049 static bool
32050 is_function_default_version (const tree decl)
32051 {
32052 if (TREE_CODE (decl) != FUNCTION_DECL
32053 || !DECL_FUNCTION_VERSIONED (decl))
32054 return false;
32055 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32056 gcc_assert (attr);
32057 attr = TREE_VALUE (TREE_VALUE (attr));
32058 return (TREE_CODE (attr) == STRING_CST
32059 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32060 }
32061
32062 /* Make a dispatcher declaration for the multi-versioned function DECL.
32063 Calls to DECL function will be replaced with calls to the dispatcher
32064 by the front-end. Returns the decl of the dispatcher function. */
32065
32066 static tree
32067 ix86_get_function_versions_dispatcher (void *decl)
32068 {
32069 tree fn = (tree) decl;
32070 struct cgraph_node *node = NULL;
32071 struct cgraph_node *default_node = NULL;
32072 struct cgraph_function_version_info *node_v = NULL;
32073 struct cgraph_function_version_info *first_v = NULL;
32074
32075 tree dispatch_decl = NULL;
32076
32077 struct cgraph_function_version_info *default_version_info = NULL;
32078
32079 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32080
32081 node = cgraph_node::get (fn);
32082 gcc_assert (node != NULL);
32083
32084 node_v = node->function_version ();
32085 gcc_assert (node_v != NULL);
32086
32087 if (node_v->dispatcher_resolver != NULL)
32088 return node_v->dispatcher_resolver;
32089
32090 /* Find the default version and make it the first node. */
32091 first_v = node_v;
32092 /* Go to the beginning of the chain. */
32093 while (first_v->prev != NULL)
32094 first_v = first_v->prev;
32095 default_version_info = first_v;
32096 while (default_version_info != NULL)
32097 {
32098 if (is_function_default_version
32099 (default_version_info->this_node->decl))
32100 break;
32101 default_version_info = default_version_info->next;
32102 }
32103
32104 /* If there is no default node, just return NULL. */
32105 if (default_version_info == NULL)
32106 return NULL;
32107
32108 /* Make default info the first node. */
32109 if (first_v != default_version_info)
32110 {
32111 default_version_info->prev->next = default_version_info->next;
32112 if (default_version_info->next)
32113 default_version_info->next->prev = default_version_info->prev;
32114 first_v->prev = default_version_info;
32115 default_version_info->next = first_v;
32116 default_version_info->prev = NULL;
32117 }
32118
32119 default_node = default_version_info->this_node;
32120
32121 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32122 if (targetm.has_ifunc_p ())
32123 {
32124 struct cgraph_function_version_info *it_v = NULL;
32125 struct cgraph_node *dispatcher_node = NULL;
32126 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32127
32128 /* Right now, the dispatching is done via ifunc. */
32129 dispatch_decl = make_dispatcher_decl (default_node->decl);
32130
32131 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32132 gcc_assert (dispatcher_node != NULL);
32133 dispatcher_node->dispatcher_function = 1;
32134 dispatcher_version_info
32135 = dispatcher_node->insert_new_function_version ();
32136 dispatcher_version_info->next = default_version_info;
32137 dispatcher_node->definition = 1;
32138
32139 /* Set the dispatcher for all the versions. */
32140 it_v = default_version_info;
32141 while (it_v != NULL)
32142 {
32143 it_v->dispatcher_resolver = dispatch_decl;
32144 it_v = it_v->next;
32145 }
32146 }
32147 else
32148 #endif
32149 {
32150 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32151 "multiversioning needs ifunc which is not supported "
32152 "on this target");
32153 }
32154
32155 return dispatch_decl;
32156 }
32157
32158 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32159 it to CHAIN. */
32160
32161 static tree
32162 make_attribute (const char *name, const char *arg_name, tree chain)
32163 {
32164 tree attr_name;
32165 tree attr_arg_name;
32166 tree attr_args;
32167 tree attr;
32168
32169 attr_name = get_identifier (name);
32170 attr_arg_name = build_string (strlen (arg_name), arg_name);
32171 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32172 attr = tree_cons (attr_name, attr_args, chain);
32173 return attr;
32174 }
32175
32176 /* Make the resolver function decl to dispatch the versions of
32177 a multi-versioned function, DEFAULT_DECL. Create an
32178 empty basic block in the resolver and store the pointer in
32179 EMPTY_BB. Return the decl of the resolver function. */
32180
32181 static tree
32182 make_resolver_func (const tree default_decl,
32183 const tree dispatch_decl,
32184 basic_block *empty_bb)
32185 {
32186 char *resolver_name;
32187 tree decl, type, decl_name, t;
32188 bool is_uniq = false;
32189
32190 /* IFUNC's have to be globally visible. So, if the default_decl is
32191 not, then the name of the IFUNC should be made unique. */
32192 if (TREE_PUBLIC (default_decl) == 0)
32193 is_uniq = true;
32194
32195 /* Append the filename to the resolver function if the versions are
32196 not externally visible. This is because the resolver function has
32197 to be externally visible for the loader to find it. So, appending
32198 the filename will prevent conflicts with a resolver function from
32199 another module which is based on the same version name. */
32200 resolver_name = make_name (default_decl, "resolver", is_uniq);
32201
32202 /* The resolver function should return a (void *). */
32203 type = build_function_type_list (ptr_type_node, NULL_TREE);
32204
32205 decl = build_fn_decl (resolver_name, type);
32206 decl_name = get_identifier (resolver_name);
32207 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32208
32209 DECL_NAME (decl) = decl_name;
32210 TREE_USED (decl) = 1;
32211 DECL_ARTIFICIAL (decl) = 1;
32212 DECL_IGNORED_P (decl) = 0;
32213 /* IFUNC resolvers have to be externally visible. */
32214 TREE_PUBLIC (decl) = 1;
32215 DECL_UNINLINABLE (decl) = 1;
32216
32217 /* Resolver is not external, body is generated. */
32218 DECL_EXTERNAL (decl) = 0;
32219 DECL_EXTERNAL (dispatch_decl) = 0;
32220
32221 DECL_CONTEXT (decl) = NULL_TREE;
32222 DECL_INITIAL (decl) = make_node (BLOCK);
32223 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32224
32225 if (DECL_COMDAT_GROUP (default_decl)
32226 || TREE_PUBLIC (default_decl))
32227 {
32228 /* In this case, each translation unit with a call to this
32229 versioned function will put out a resolver. Ensure it
32230 is comdat to keep just one copy. */
32231 DECL_COMDAT (decl) = 1;
32232 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32233 }
32234 /* Build result decl and add to function_decl. */
32235 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32236 DECL_ARTIFICIAL (t) = 1;
32237 DECL_IGNORED_P (t) = 1;
32238 DECL_RESULT (decl) = t;
32239
32240 gimplify_function_tree (decl);
32241 push_cfun (DECL_STRUCT_FUNCTION (decl));
32242 *empty_bb = init_lowered_empty_function (decl, false);
32243
32244 cgraph_node::add_new_function (decl, true);
32245 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32246
32247 pop_cfun ();
32248
32249 gcc_assert (dispatch_decl != NULL);
32250 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32251 DECL_ATTRIBUTES (dispatch_decl)
32252 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32253
32254 /* Create the alias for dispatch to resolver here. */
32255 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32256 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32257 XDELETEVEC (resolver_name);
32258 return decl;
32259 }
32260
32261 /* Generate the dispatching code body to dispatch multi-versioned function
32262 DECL. The target hook is called to process the "target" attributes and
32263 provide the code to dispatch the right function at run-time. NODE points
32264 to the dispatcher decl whose body will be created. */
32265
32266 static tree
32267 ix86_generate_version_dispatcher_body (void *node_p)
32268 {
32269 tree resolver_decl;
32270 basic_block empty_bb;
32271 tree default_ver_decl;
32272 struct cgraph_node *versn;
32273 struct cgraph_node *node;
32274
32275 struct cgraph_function_version_info *node_version_info = NULL;
32276 struct cgraph_function_version_info *versn_info = NULL;
32277
32278 node = (cgraph_node *)node_p;
32279
32280 node_version_info = node->function_version ();
32281 gcc_assert (node->dispatcher_function
32282 && node_version_info != NULL);
32283
32284 if (node_version_info->dispatcher_resolver)
32285 return node_version_info->dispatcher_resolver;
32286
32287 /* The first version in the chain corresponds to the default version. */
32288 default_ver_decl = node_version_info->next->this_node->decl;
32289
32290 /* node is going to be an alias, so remove the finalized bit. */
32291 node->definition = false;
32292
32293 resolver_decl = make_resolver_func (default_ver_decl,
32294 node->decl, &empty_bb);
32295
32296 node_version_info->dispatcher_resolver = resolver_decl;
32297
32298 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32299
32300 auto_vec<tree, 2> fn_ver_vec;
32301
32302 for (versn_info = node_version_info->next; versn_info;
32303 versn_info = versn_info->next)
32304 {
32305 versn = versn_info->this_node;
32306 /* Check for virtual functions here again, as by this time it should
32307 have been determined if this function needs a vtable index or
32308 not. This happens for methods in derived classes that override
32309 virtual methods in base classes but are not explicitly marked as
32310 virtual. */
32311 if (DECL_VINDEX (versn->decl))
32312 sorry ("Virtual function multiversioning not supported");
32313
32314 fn_ver_vec.safe_push (versn->decl);
32315 }
32316
32317 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32318 cgraph_edge::rebuild_edges ();
32319 pop_cfun ();
32320 return resolver_decl;
32321 }
32322 /* This builds the processor_model struct type defined in
32323 libgcc/config/i386/cpuinfo.c */
32324
32325 static tree
32326 build_processor_model_struct (void)
32327 {
32328 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32329 "__cpu_features"};
32330 tree field = NULL_TREE, field_chain = NULL_TREE;
32331 int i;
32332 tree type = make_node (RECORD_TYPE);
32333
32334 /* The first 3 fields are unsigned int. */
32335 for (i = 0; i < 3; ++i)
32336 {
32337 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32338 get_identifier (field_name[i]), unsigned_type_node);
32339 if (field_chain != NULL_TREE)
32340 DECL_CHAIN (field) = field_chain;
32341 field_chain = field;
32342 }
32343
32344 /* The last field is an array of unsigned integers of size one. */
32345 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32346 get_identifier (field_name[3]),
32347 build_array_type (unsigned_type_node,
32348 build_index_type (size_one_node)));
32349 if (field_chain != NULL_TREE)
32350 DECL_CHAIN (field) = field_chain;
32351 field_chain = field;
32352
32353 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32354 return type;
32355 }
32356
32357 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32358
32359 static tree
32360 make_var_decl (tree type, const char *name)
32361 {
32362 tree new_decl;
32363
32364 new_decl = build_decl (UNKNOWN_LOCATION,
32365 VAR_DECL,
32366 get_identifier(name),
32367 type);
32368
32369 DECL_EXTERNAL (new_decl) = 1;
32370 TREE_STATIC (new_decl) = 1;
32371 TREE_PUBLIC (new_decl) = 1;
32372 DECL_INITIAL (new_decl) = 0;
32373 DECL_ARTIFICIAL (new_decl) = 0;
32374 DECL_PRESERVE_P (new_decl) = 1;
32375
32376 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32377 assemble_variable (new_decl, 0, 0, 0);
32378
32379 return new_decl;
32380 }
32381
32382 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32383 into an integer defined in libgcc/config/i386/cpuinfo.c */
32384
32385 static tree
32386 fold_builtin_cpu (tree fndecl, tree *args)
32387 {
32388 unsigned int i;
32389 enum ix86_builtins fn_code = (enum ix86_builtins)
32390 DECL_FUNCTION_CODE (fndecl);
32391 tree param_string_cst = NULL;
32392
32393 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32394 enum processor_features
32395 {
32396 F_CMOV = 0,
32397 F_MMX,
32398 F_POPCNT,
32399 F_SSE,
32400 F_SSE2,
32401 F_SSE3,
32402 F_SSSE3,
32403 F_SSE4_1,
32404 F_SSE4_2,
32405 F_AVX,
32406 F_AVX2,
32407 F_SSE4_A,
32408 F_FMA4,
32409 F_XOP,
32410 F_FMA,
32411 F_MAX
32412 };
32413
32414 /* These are the values for vendor types and cpu types and subtypes
32415 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32416 the corresponding start value. */
32417 enum processor_model
32418 {
32419 M_INTEL = 1,
32420 M_AMD,
32421 M_CPU_TYPE_START,
32422 M_INTEL_BONNELL,
32423 M_INTEL_CORE2,
32424 M_INTEL_COREI7,
32425 M_AMDFAM10H,
32426 M_AMDFAM15H,
32427 M_INTEL_SILVERMONT,
32428 M_AMD_BTVER1,
32429 M_AMD_BTVER2,
32430 M_CPU_SUBTYPE_START,
32431 M_INTEL_COREI7_NEHALEM,
32432 M_INTEL_COREI7_WESTMERE,
32433 M_INTEL_COREI7_SANDYBRIDGE,
32434 M_AMDFAM10H_BARCELONA,
32435 M_AMDFAM10H_SHANGHAI,
32436 M_AMDFAM10H_ISTANBUL,
32437 M_AMDFAM15H_BDVER1,
32438 M_AMDFAM15H_BDVER2,
32439 M_AMDFAM15H_BDVER3,
32440 M_AMDFAM15H_BDVER4,
32441 M_INTEL_COREI7_IVYBRIDGE,
32442 M_INTEL_COREI7_HASWELL
32443 };
32444
32445 static struct _arch_names_table
32446 {
32447 const char *const name;
32448 const enum processor_model model;
32449 }
32450 const arch_names_table[] =
32451 {
32452 {"amd", M_AMD},
32453 {"intel", M_INTEL},
32454 {"atom", M_INTEL_BONNELL},
32455 {"slm", M_INTEL_SILVERMONT},
32456 {"core2", M_INTEL_CORE2},
32457 {"corei7", M_INTEL_COREI7},
32458 {"nehalem", M_INTEL_COREI7_NEHALEM},
32459 {"westmere", M_INTEL_COREI7_WESTMERE},
32460 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32461 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32462 {"haswell", M_INTEL_COREI7_HASWELL},
32463 {"bonnell", M_INTEL_BONNELL},
32464 {"silvermont", M_INTEL_SILVERMONT},
32465 {"amdfam10h", M_AMDFAM10H},
32466 {"barcelona", M_AMDFAM10H_BARCELONA},
32467 {"shanghai", M_AMDFAM10H_SHANGHAI},
32468 {"istanbul", M_AMDFAM10H_ISTANBUL},
32469 {"btver1", M_AMD_BTVER1},
32470 {"amdfam15h", M_AMDFAM15H},
32471 {"bdver1", M_AMDFAM15H_BDVER1},
32472 {"bdver2", M_AMDFAM15H_BDVER2},
32473 {"bdver3", M_AMDFAM15H_BDVER3},
32474 {"bdver4", M_AMDFAM15H_BDVER4},
32475 {"btver2", M_AMD_BTVER2},
32476 };
32477
32478 static struct _isa_names_table
32479 {
32480 const char *const name;
32481 const enum processor_features feature;
32482 }
32483 const isa_names_table[] =
32484 {
32485 {"cmov", F_CMOV},
32486 {"mmx", F_MMX},
32487 {"popcnt", F_POPCNT},
32488 {"sse", F_SSE},
32489 {"sse2", F_SSE2},
32490 {"sse3", F_SSE3},
32491 {"ssse3", F_SSSE3},
32492 {"sse4a", F_SSE4_A},
32493 {"sse4.1", F_SSE4_1},
32494 {"sse4.2", F_SSE4_2},
32495 {"avx", F_AVX},
32496 {"fma4", F_FMA4},
32497 {"xop", F_XOP},
32498 {"fma", F_FMA},
32499 {"avx2", F_AVX2}
32500 };
32501
32502 tree __processor_model_type = build_processor_model_struct ();
32503 tree __cpu_model_var = make_var_decl (__processor_model_type,
32504 "__cpu_model");
32505
32506
32507 varpool_node::add (__cpu_model_var);
32508
32509 gcc_assert ((args != NULL) && (*args != NULL));
32510
32511 param_string_cst = *args;
32512 while (param_string_cst
32513 && TREE_CODE (param_string_cst) != STRING_CST)
32514 {
32515 /* *args must be a expr that can contain other EXPRS leading to a
32516 STRING_CST. */
32517 if (!EXPR_P (param_string_cst))
32518 {
32519 error ("Parameter to builtin must be a string constant or literal");
32520 return integer_zero_node;
32521 }
32522 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32523 }
32524
32525 gcc_assert (param_string_cst);
32526
32527 if (fn_code == IX86_BUILTIN_CPU_IS)
32528 {
32529 tree ref;
32530 tree field;
32531 tree final;
32532
32533 unsigned int field_val = 0;
32534 unsigned int NUM_ARCH_NAMES
32535 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32536
32537 for (i = 0; i < NUM_ARCH_NAMES; i++)
32538 if (strcmp (arch_names_table[i].name,
32539 TREE_STRING_POINTER (param_string_cst)) == 0)
32540 break;
32541
32542 if (i == NUM_ARCH_NAMES)
32543 {
32544 error ("Parameter to builtin not valid: %s",
32545 TREE_STRING_POINTER (param_string_cst));
32546 return integer_zero_node;
32547 }
32548
32549 field = TYPE_FIELDS (__processor_model_type);
32550 field_val = arch_names_table[i].model;
32551
32552 /* CPU types are stored in the next field. */
32553 if (field_val > M_CPU_TYPE_START
32554 && field_val < M_CPU_SUBTYPE_START)
32555 {
32556 field = DECL_CHAIN (field);
32557 field_val -= M_CPU_TYPE_START;
32558 }
32559
32560 /* CPU subtypes are stored in the next field. */
32561 if (field_val > M_CPU_SUBTYPE_START)
32562 {
32563 field = DECL_CHAIN ( DECL_CHAIN (field));
32564 field_val -= M_CPU_SUBTYPE_START;
32565 }
32566
32567 /* Get the appropriate field in __cpu_model. */
32568 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32569 field, NULL_TREE);
32570
32571 /* Check the value. */
32572 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32573 build_int_cstu (unsigned_type_node, field_val));
32574 return build1 (CONVERT_EXPR, integer_type_node, final);
32575 }
32576 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32577 {
32578 tree ref;
32579 tree array_elt;
32580 tree field;
32581 tree final;
32582
32583 unsigned int field_val = 0;
32584 unsigned int NUM_ISA_NAMES
32585 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32586
32587 for (i = 0; i < NUM_ISA_NAMES; i++)
32588 if (strcmp (isa_names_table[i].name,
32589 TREE_STRING_POINTER (param_string_cst)) == 0)
32590 break;
32591
32592 if (i == NUM_ISA_NAMES)
32593 {
32594 error ("Parameter to builtin not valid: %s",
32595 TREE_STRING_POINTER (param_string_cst));
32596 return integer_zero_node;
32597 }
32598
32599 field = TYPE_FIELDS (__processor_model_type);
32600 /* Get the last field, which is __cpu_features. */
32601 while (DECL_CHAIN (field))
32602 field = DECL_CHAIN (field);
32603
32604 /* Get the appropriate field: __cpu_model.__cpu_features */
32605 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32606 field, NULL_TREE);
32607
32608 /* Access the 0th element of __cpu_features array. */
32609 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32610 integer_zero_node, NULL_TREE, NULL_TREE);
32611
32612 field_val = (1 << isa_names_table[i].feature);
32613 /* Return __cpu_model.__cpu_features[0] & field_val */
32614 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32615 build_int_cstu (unsigned_type_node, field_val));
32616 return build1 (CONVERT_EXPR, integer_type_node, final);
32617 }
32618 gcc_unreachable ();
32619 }
32620
32621 static tree
32622 ix86_fold_builtin (tree fndecl, int n_args,
32623 tree *args, bool ignore ATTRIBUTE_UNUSED)
32624 {
32625 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32626 {
32627 enum ix86_builtins fn_code = (enum ix86_builtins)
32628 DECL_FUNCTION_CODE (fndecl);
32629 if (fn_code == IX86_BUILTIN_CPU_IS
32630 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32631 {
32632 gcc_assert (n_args == 1);
32633 return fold_builtin_cpu (fndecl, args);
32634 }
32635 }
32636
32637 #ifdef SUBTARGET_FOLD_BUILTIN
32638 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32639 #endif
32640
32641 return NULL_TREE;
32642 }
32643
32644 /* Make builtins to detect cpu type and features supported. NAME is
32645 the builtin name, CODE is the builtin code, and FTYPE is the function
32646 type of the builtin. */
32647
32648 static void
32649 make_cpu_type_builtin (const char* name, int code,
32650 enum ix86_builtin_func_type ftype, bool is_const)
32651 {
32652 tree decl;
32653 tree type;
32654
32655 type = ix86_get_builtin_func_type (ftype);
32656 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32657 NULL, NULL_TREE);
32658 gcc_assert (decl != NULL_TREE);
32659 ix86_builtins[(int) code] = decl;
32660 TREE_READONLY (decl) = is_const;
32661 }
32662
32663 /* Make builtins to get CPU type and features supported. The created
32664 builtins are :
32665
32666 __builtin_cpu_init (), to detect cpu type and features,
32667 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32668 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32669 */
32670
32671 static void
32672 ix86_init_platform_type_builtins (void)
32673 {
32674 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32675 INT_FTYPE_VOID, false);
32676 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32677 INT_FTYPE_PCCHAR, true);
32678 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32679 INT_FTYPE_PCCHAR, true);
32680 }
32681
32682 /* Internal method for ix86_init_builtins. */
32683
32684 static void
32685 ix86_init_builtins_va_builtins_abi (void)
32686 {
32687 tree ms_va_ref, sysv_va_ref;
32688 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32689 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32690 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32691 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32692
32693 if (!TARGET_64BIT)
32694 return;
32695 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32696 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32697 ms_va_ref = build_reference_type (ms_va_list_type_node);
32698 sysv_va_ref =
32699 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32700
32701 fnvoid_va_end_ms =
32702 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32703 fnvoid_va_start_ms =
32704 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32705 fnvoid_va_end_sysv =
32706 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32707 fnvoid_va_start_sysv =
32708 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32709 NULL_TREE);
32710 fnvoid_va_copy_ms =
32711 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32712 NULL_TREE);
32713 fnvoid_va_copy_sysv =
32714 build_function_type_list (void_type_node, sysv_va_ref,
32715 sysv_va_ref, NULL_TREE);
32716
32717 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32718 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32719 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32720 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32721 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32722 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32723 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32724 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32725 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32726 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32727 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32728 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32729 }
32730
32731 static void
32732 ix86_init_builtin_types (void)
32733 {
32734 tree float128_type_node, float80_type_node;
32735
32736 /* The __float80 type. */
32737 float80_type_node = long_double_type_node;
32738 if (TYPE_MODE (float80_type_node) != XFmode)
32739 {
32740 /* The __float80 type. */
32741 float80_type_node = make_node (REAL_TYPE);
32742
32743 TYPE_PRECISION (float80_type_node) = 80;
32744 layout_type (float80_type_node);
32745 }
32746 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32747
32748 /* The __float128 type. */
32749 float128_type_node = make_node (REAL_TYPE);
32750 TYPE_PRECISION (float128_type_node) = 128;
32751 layout_type (float128_type_node);
32752 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32753
32754 /* This macro is built by i386-builtin-types.awk. */
32755 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32756 }
32757
32758 static void
32759 ix86_init_builtins (void)
32760 {
32761 tree t;
32762
32763 ix86_init_builtin_types ();
32764
32765 /* Builtins to get CPU type and features. */
32766 ix86_init_platform_type_builtins ();
32767
32768 /* TFmode support builtins. */
32769 def_builtin_const (0, "__builtin_infq",
32770 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32771 def_builtin_const (0, "__builtin_huge_valq",
32772 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32773
32774 /* We will expand them to normal call if SSE isn't available since
32775 they are used by libgcc. */
32776 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32777 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32778 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32779 TREE_READONLY (t) = 1;
32780 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32781
32782 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32783 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32784 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32785 TREE_READONLY (t) = 1;
32786 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32787
32788 ix86_init_tm_builtins ();
32789 ix86_init_mmx_sse_builtins ();
32790
32791 if (TARGET_LP64)
32792 ix86_init_builtins_va_builtins_abi ();
32793
32794 #ifdef SUBTARGET_INIT_BUILTINS
32795 SUBTARGET_INIT_BUILTINS;
32796 #endif
32797 }
32798
32799 /* Return the ix86 builtin for CODE. */
32800
32801 static tree
32802 ix86_builtin_decl (unsigned code, bool)
32803 {
32804 if (code >= IX86_BUILTIN_MAX)
32805 return error_mark_node;
32806
32807 return ix86_builtins[code];
32808 }
32809
32810 /* Errors in the source file can cause expand_expr to return const0_rtx
32811 where we expect a vector. To avoid crashing, use one of the vector
32812 clear instructions. */
32813 static rtx
32814 safe_vector_operand (rtx x, enum machine_mode mode)
32815 {
32816 if (x == const0_rtx)
32817 x = CONST0_RTX (mode);
32818 return x;
32819 }
32820
32821 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32822
32823 static rtx
32824 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32825 {
32826 rtx pat;
32827 tree arg0 = CALL_EXPR_ARG (exp, 0);
32828 tree arg1 = CALL_EXPR_ARG (exp, 1);
32829 rtx op0 = expand_normal (arg0);
32830 rtx op1 = expand_normal (arg1);
32831 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32832 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32833 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32834
32835 if (VECTOR_MODE_P (mode0))
32836 op0 = safe_vector_operand (op0, mode0);
32837 if (VECTOR_MODE_P (mode1))
32838 op1 = safe_vector_operand (op1, mode1);
32839
32840 if (optimize || !target
32841 || GET_MODE (target) != tmode
32842 || !insn_data[icode].operand[0].predicate (target, tmode))
32843 target = gen_reg_rtx (tmode);
32844
32845 if (GET_MODE (op1) == SImode && mode1 == TImode)
32846 {
32847 rtx x = gen_reg_rtx (V4SImode);
32848 emit_insn (gen_sse2_loadd (x, op1));
32849 op1 = gen_lowpart (TImode, x);
32850 }
32851
32852 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32853 op0 = copy_to_mode_reg (mode0, op0);
32854 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32855 op1 = copy_to_mode_reg (mode1, op1);
32856
32857 pat = GEN_FCN (icode) (target, op0, op1);
32858 if (! pat)
32859 return 0;
32860
32861 emit_insn (pat);
32862
32863 return target;
32864 }
32865
32866 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32867
32868 static rtx
32869 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32870 enum ix86_builtin_func_type m_type,
32871 enum rtx_code sub_code)
32872 {
32873 rtx pat;
32874 int i;
32875 int nargs;
32876 bool comparison_p = false;
32877 bool tf_p = false;
32878 bool last_arg_constant = false;
32879 int num_memory = 0;
32880 struct {
32881 rtx op;
32882 enum machine_mode mode;
32883 } args[4];
32884
32885 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32886
32887 switch (m_type)
32888 {
32889 case MULTI_ARG_4_DF2_DI_I:
32890 case MULTI_ARG_4_DF2_DI_I1:
32891 case MULTI_ARG_4_SF2_SI_I:
32892 case MULTI_ARG_4_SF2_SI_I1:
32893 nargs = 4;
32894 last_arg_constant = true;
32895 break;
32896
32897 case MULTI_ARG_3_SF:
32898 case MULTI_ARG_3_DF:
32899 case MULTI_ARG_3_SF2:
32900 case MULTI_ARG_3_DF2:
32901 case MULTI_ARG_3_DI:
32902 case MULTI_ARG_3_SI:
32903 case MULTI_ARG_3_SI_DI:
32904 case MULTI_ARG_3_HI:
32905 case MULTI_ARG_3_HI_SI:
32906 case MULTI_ARG_3_QI:
32907 case MULTI_ARG_3_DI2:
32908 case MULTI_ARG_3_SI2:
32909 case MULTI_ARG_3_HI2:
32910 case MULTI_ARG_3_QI2:
32911 nargs = 3;
32912 break;
32913
32914 case MULTI_ARG_2_SF:
32915 case MULTI_ARG_2_DF:
32916 case MULTI_ARG_2_DI:
32917 case MULTI_ARG_2_SI:
32918 case MULTI_ARG_2_HI:
32919 case MULTI_ARG_2_QI:
32920 nargs = 2;
32921 break;
32922
32923 case MULTI_ARG_2_DI_IMM:
32924 case MULTI_ARG_2_SI_IMM:
32925 case MULTI_ARG_2_HI_IMM:
32926 case MULTI_ARG_2_QI_IMM:
32927 nargs = 2;
32928 last_arg_constant = true;
32929 break;
32930
32931 case MULTI_ARG_1_SF:
32932 case MULTI_ARG_1_DF:
32933 case MULTI_ARG_1_SF2:
32934 case MULTI_ARG_1_DF2:
32935 case MULTI_ARG_1_DI:
32936 case MULTI_ARG_1_SI:
32937 case MULTI_ARG_1_HI:
32938 case MULTI_ARG_1_QI:
32939 case MULTI_ARG_1_SI_DI:
32940 case MULTI_ARG_1_HI_DI:
32941 case MULTI_ARG_1_HI_SI:
32942 case MULTI_ARG_1_QI_DI:
32943 case MULTI_ARG_1_QI_SI:
32944 case MULTI_ARG_1_QI_HI:
32945 nargs = 1;
32946 break;
32947
32948 case MULTI_ARG_2_DI_CMP:
32949 case MULTI_ARG_2_SI_CMP:
32950 case MULTI_ARG_2_HI_CMP:
32951 case MULTI_ARG_2_QI_CMP:
32952 nargs = 2;
32953 comparison_p = true;
32954 break;
32955
32956 case MULTI_ARG_2_SF_TF:
32957 case MULTI_ARG_2_DF_TF:
32958 case MULTI_ARG_2_DI_TF:
32959 case MULTI_ARG_2_SI_TF:
32960 case MULTI_ARG_2_HI_TF:
32961 case MULTI_ARG_2_QI_TF:
32962 nargs = 2;
32963 tf_p = true;
32964 break;
32965
32966 default:
32967 gcc_unreachable ();
32968 }
32969
32970 if (optimize || !target
32971 || GET_MODE (target) != tmode
32972 || !insn_data[icode].operand[0].predicate (target, tmode))
32973 target = gen_reg_rtx (tmode);
32974
32975 gcc_assert (nargs <= 4);
32976
32977 for (i = 0; i < nargs; i++)
32978 {
32979 tree arg = CALL_EXPR_ARG (exp, i);
32980 rtx op = expand_normal (arg);
32981 int adjust = (comparison_p) ? 1 : 0;
32982 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32983
32984 if (last_arg_constant && i == nargs - 1)
32985 {
32986 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32987 {
32988 enum insn_code new_icode = icode;
32989 switch (icode)
32990 {
32991 case CODE_FOR_xop_vpermil2v2df3:
32992 case CODE_FOR_xop_vpermil2v4sf3:
32993 case CODE_FOR_xop_vpermil2v4df3:
32994 case CODE_FOR_xop_vpermil2v8sf3:
32995 error ("the last argument must be a 2-bit immediate");
32996 return gen_reg_rtx (tmode);
32997 case CODE_FOR_xop_rotlv2di3:
32998 new_icode = CODE_FOR_rotlv2di3;
32999 goto xop_rotl;
33000 case CODE_FOR_xop_rotlv4si3:
33001 new_icode = CODE_FOR_rotlv4si3;
33002 goto xop_rotl;
33003 case CODE_FOR_xop_rotlv8hi3:
33004 new_icode = CODE_FOR_rotlv8hi3;
33005 goto xop_rotl;
33006 case CODE_FOR_xop_rotlv16qi3:
33007 new_icode = CODE_FOR_rotlv16qi3;
33008 xop_rotl:
33009 if (CONST_INT_P (op))
33010 {
33011 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33012 op = GEN_INT (INTVAL (op) & mask);
33013 gcc_checking_assert
33014 (insn_data[icode].operand[i + 1].predicate (op, mode));
33015 }
33016 else
33017 {
33018 gcc_checking_assert
33019 (nargs == 2
33020 && insn_data[new_icode].operand[0].mode == tmode
33021 && insn_data[new_icode].operand[1].mode == tmode
33022 && insn_data[new_icode].operand[2].mode == mode
33023 && insn_data[new_icode].operand[0].predicate
33024 == insn_data[icode].operand[0].predicate
33025 && insn_data[new_icode].operand[1].predicate
33026 == insn_data[icode].operand[1].predicate);
33027 icode = new_icode;
33028 goto non_constant;
33029 }
33030 break;
33031 default:
33032 gcc_unreachable ();
33033 }
33034 }
33035 }
33036 else
33037 {
33038 non_constant:
33039 if (VECTOR_MODE_P (mode))
33040 op = safe_vector_operand (op, mode);
33041
33042 /* If we aren't optimizing, only allow one memory operand to be
33043 generated. */
33044 if (memory_operand (op, mode))
33045 num_memory++;
33046
33047 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33048
33049 if (optimize
33050 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33051 || num_memory > 1)
33052 op = force_reg (mode, op);
33053 }
33054
33055 args[i].op = op;
33056 args[i].mode = mode;
33057 }
33058
33059 switch (nargs)
33060 {
33061 case 1:
33062 pat = GEN_FCN (icode) (target, args[0].op);
33063 break;
33064
33065 case 2:
33066 if (tf_p)
33067 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33068 GEN_INT ((int)sub_code));
33069 else if (! comparison_p)
33070 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33071 else
33072 {
33073 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33074 args[0].op,
33075 args[1].op);
33076
33077 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33078 }
33079 break;
33080
33081 case 3:
33082 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33083 break;
33084
33085 case 4:
33086 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33087 break;
33088
33089 default:
33090 gcc_unreachable ();
33091 }
33092
33093 if (! pat)
33094 return 0;
33095
33096 emit_insn (pat);
33097 return target;
33098 }
33099
33100 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33101 insns with vec_merge. */
33102
33103 static rtx
33104 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33105 rtx target)
33106 {
33107 rtx pat;
33108 tree arg0 = CALL_EXPR_ARG (exp, 0);
33109 rtx op1, op0 = expand_normal (arg0);
33110 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33111 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33112
33113 if (optimize || !target
33114 || GET_MODE (target) != tmode
33115 || !insn_data[icode].operand[0].predicate (target, tmode))
33116 target = gen_reg_rtx (tmode);
33117
33118 if (VECTOR_MODE_P (mode0))
33119 op0 = safe_vector_operand (op0, mode0);
33120
33121 if ((optimize && !register_operand (op0, mode0))
33122 || !insn_data[icode].operand[1].predicate (op0, mode0))
33123 op0 = copy_to_mode_reg (mode0, op0);
33124
33125 op1 = op0;
33126 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33127 op1 = copy_to_mode_reg (mode0, op1);
33128
33129 pat = GEN_FCN (icode) (target, op0, op1);
33130 if (! pat)
33131 return 0;
33132 emit_insn (pat);
33133 return target;
33134 }
33135
33136 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33137
33138 static rtx
33139 ix86_expand_sse_compare (const struct builtin_description *d,
33140 tree exp, rtx target, bool swap)
33141 {
33142 rtx pat;
33143 tree arg0 = CALL_EXPR_ARG (exp, 0);
33144 tree arg1 = CALL_EXPR_ARG (exp, 1);
33145 rtx op0 = expand_normal (arg0);
33146 rtx op1 = expand_normal (arg1);
33147 rtx op2;
33148 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33149 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33150 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33151 enum rtx_code comparison = d->comparison;
33152
33153 if (VECTOR_MODE_P (mode0))
33154 op0 = safe_vector_operand (op0, mode0);
33155 if (VECTOR_MODE_P (mode1))
33156 op1 = safe_vector_operand (op1, mode1);
33157
33158 /* Swap operands if we have a comparison that isn't available in
33159 hardware. */
33160 if (swap)
33161 {
33162 rtx tmp = gen_reg_rtx (mode1);
33163 emit_move_insn (tmp, op1);
33164 op1 = op0;
33165 op0 = tmp;
33166 }
33167
33168 if (optimize || !target
33169 || GET_MODE (target) != tmode
33170 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33171 target = gen_reg_rtx (tmode);
33172
33173 if ((optimize && !register_operand (op0, mode0))
33174 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33175 op0 = copy_to_mode_reg (mode0, op0);
33176 if ((optimize && !register_operand (op1, mode1))
33177 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33178 op1 = copy_to_mode_reg (mode1, op1);
33179
33180 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33181 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33182 if (! pat)
33183 return 0;
33184 emit_insn (pat);
33185 return target;
33186 }
33187
33188 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33189
33190 static rtx
33191 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33192 rtx target)
33193 {
33194 rtx pat;
33195 tree arg0 = CALL_EXPR_ARG (exp, 0);
33196 tree arg1 = CALL_EXPR_ARG (exp, 1);
33197 rtx op0 = expand_normal (arg0);
33198 rtx op1 = expand_normal (arg1);
33199 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33200 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33201 enum rtx_code comparison = d->comparison;
33202
33203 if (VECTOR_MODE_P (mode0))
33204 op0 = safe_vector_operand (op0, mode0);
33205 if (VECTOR_MODE_P (mode1))
33206 op1 = safe_vector_operand (op1, mode1);
33207
33208 /* Swap operands if we have a comparison that isn't available in
33209 hardware. */
33210 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33211 {
33212 rtx tmp = op1;
33213 op1 = op0;
33214 op0 = tmp;
33215 }
33216
33217 target = gen_reg_rtx (SImode);
33218 emit_move_insn (target, const0_rtx);
33219 target = gen_rtx_SUBREG (QImode, target, 0);
33220
33221 if ((optimize && !register_operand (op0, mode0))
33222 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33223 op0 = copy_to_mode_reg (mode0, op0);
33224 if ((optimize && !register_operand (op1, mode1))
33225 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33226 op1 = copy_to_mode_reg (mode1, op1);
33227
33228 pat = GEN_FCN (d->icode) (op0, op1);
33229 if (! pat)
33230 return 0;
33231 emit_insn (pat);
33232 emit_insn (gen_rtx_SET (VOIDmode,
33233 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33234 gen_rtx_fmt_ee (comparison, QImode,
33235 SET_DEST (pat),
33236 const0_rtx)));
33237
33238 return SUBREG_REG (target);
33239 }
33240
33241 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33242
33243 static rtx
33244 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33245 rtx target)
33246 {
33247 rtx pat;
33248 tree arg0 = CALL_EXPR_ARG (exp, 0);
33249 rtx op1, op0 = expand_normal (arg0);
33250 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33251 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33252
33253 if (optimize || target == 0
33254 || GET_MODE (target) != tmode
33255 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33256 target = gen_reg_rtx (tmode);
33257
33258 if (VECTOR_MODE_P (mode0))
33259 op0 = safe_vector_operand (op0, mode0);
33260
33261 if ((optimize && !register_operand (op0, mode0))
33262 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33263 op0 = copy_to_mode_reg (mode0, op0);
33264
33265 op1 = GEN_INT (d->comparison);
33266
33267 pat = GEN_FCN (d->icode) (target, op0, op1);
33268 if (! pat)
33269 return 0;
33270 emit_insn (pat);
33271 return target;
33272 }
33273
33274 static rtx
33275 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33276 tree exp, rtx target)
33277 {
33278 rtx pat;
33279 tree arg0 = CALL_EXPR_ARG (exp, 0);
33280 tree arg1 = CALL_EXPR_ARG (exp, 1);
33281 rtx op0 = expand_normal (arg0);
33282 rtx op1 = expand_normal (arg1);
33283 rtx op2;
33284 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33285 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33286 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33287
33288 if (optimize || target == 0
33289 || GET_MODE (target) != tmode
33290 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33291 target = gen_reg_rtx (tmode);
33292
33293 op0 = safe_vector_operand (op0, mode0);
33294 op1 = safe_vector_operand (op1, mode1);
33295
33296 if ((optimize && !register_operand (op0, mode0))
33297 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33298 op0 = copy_to_mode_reg (mode0, op0);
33299 if ((optimize && !register_operand (op1, mode1))
33300 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33301 op1 = copy_to_mode_reg (mode1, op1);
33302
33303 op2 = GEN_INT (d->comparison);
33304
33305 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33306 if (! pat)
33307 return 0;
33308 emit_insn (pat);
33309 return target;
33310 }
33311
33312 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33313
33314 static rtx
33315 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33316 rtx target)
33317 {
33318 rtx pat;
33319 tree arg0 = CALL_EXPR_ARG (exp, 0);
33320 tree arg1 = CALL_EXPR_ARG (exp, 1);
33321 rtx op0 = expand_normal (arg0);
33322 rtx op1 = expand_normal (arg1);
33323 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33324 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33325 enum rtx_code comparison = d->comparison;
33326
33327 if (VECTOR_MODE_P (mode0))
33328 op0 = safe_vector_operand (op0, mode0);
33329 if (VECTOR_MODE_P (mode1))
33330 op1 = safe_vector_operand (op1, mode1);
33331
33332 target = gen_reg_rtx (SImode);
33333 emit_move_insn (target, const0_rtx);
33334 target = gen_rtx_SUBREG (QImode, target, 0);
33335
33336 if ((optimize && !register_operand (op0, mode0))
33337 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33338 op0 = copy_to_mode_reg (mode0, op0);
33339 if ((optimize && !register_operand (op1, mode1))
33340 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33341 op1 = copy_to_mode_reg (mode1, op1);
33342
33343 pat = GEN_FCN (d->icode) (op0, op1);
33344 if (! pat)
33345 return 0;
33346 emit_insn (pat);
33347 emit_insn (gen_rtx_SET (VOIDmode,
33348 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33349 gen_rtx_fmt_ee (comparison, QImode,
33350 SET_DEST (pat),
33351 const0_rtx)));
33352
33353 return SUBREG_REG (target);
33354 }
33355
33356 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33357
33358 static rtx
33359 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33360 tree exp, rtx target)
33361 {
33362 rtx pat;
33363 tree arg0 = CALL_EXPR_ARG (exp, 0);
33364 tree arg1 = CALL_EXPR_ARG (exp, 1);
33365 tree arg2 = CALL_EXPR_ARG (exp, 2);
33366 tree arg3 = CALL_EXPR_ARG (exp, 3);
33367 tree arg4 = CALL_EXPR_ARG (exp, 4);
33368 rtx scratch0, scratch1;
33369 rtx op0 = expand_normal (arg0);
33370 rtx op1 = expand_normal (arg1);
33371 rtx op2 = expand_normal (arg2);
33372 rtx op3 = expand_normal (arg3);
33373 rtx op4 = expand_normal (arg4);
33374 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33375
33376 tmode0 = insn_data[d->icode].operand[0].mode;
33377 tmode1 = insn_data[d->icode].operand[1].mode;
33378 modev2 = insn_data[d->icode].operand[2].mode;
33379 modei3 = insn_data[d->icode].operand[3].mode;
33380 modev4 = insn_data[d->icode].operand[4].mode;
33381 modei5 = insn_data[d->icode].operand[5].mode;
33382 modeimm = insn_data[d->icode].operand[6].mode;
33383
33384 if (VECTOR_MODE_P (modev2))
33385 op0 = safe_vector_operand (op0, modev2);
33386 if (VECTOR_MODE_P (modev4))
33387 op2 = safe_vector_operand (op2, modev4);
33388
33389 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33390 op0 = copy_to_mode_reg (modev2, op0);
33391 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33392 op1 = copy_to_mode_reg (modei3, op1);
33393 if ((optimize && !register_operand (op2, modev4))
33394 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33395 op2 = copy_to_mode_reg (modev4, op2);
33396 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33397 op3 = copy_to_mode_reg (modei5, op3);
33398
33399 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33400 {
33401 error ("the fifth argument must be an 8-bit immediate");
33402 return const0_rtx;
33403 }
33404
33405 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33406 {
33407 if (optimize || !target
33408 || GET_MODE (target) != tmode0
33409 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33410 target = gen_reg_rtx (tmode0);
33411
33412 scratch1 = gen_reg_rtx (tmode1);
33413
33414 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33415 }
33416 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33417 {
33418 if (optimize || !target
33419 || GET_MODE (target) != tmode1
33420 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33421 target = gen_reg_rtx (tmode1);
33422
33423 scratch0 = gen_reg_rtx (tmode0);
33424
33425 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33426 }
33427 else
33428 {
33429 gcc_assert (d->flag);
33430
33431 scratch0 = gen_reg_rtx (tmode0);
33432 scratch1 = gen_reg_rtx (tmode1);
33433
33434 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33435 }
33436
33437 if (! pat)
33438 return 0;
33439
33440 emit_insn (pat);
33441
33442 if (d->flag)
33443 {
33444 target = gen_reg_rtx (SImode);
33445 emit_move_insn (target, const0_rtx);
33446 target = gen_rtx_SUBREG (QImode, target, 0);
33447
33448 emit_insn
33449 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33450 gen_rtx_fmt_ee (EQ, QImode,
33451 gen_rtx_REG ((enum machine_mode) d->flag,
33452 FLAGS_REG),
33453 const0_rtx)));
33454 return SUBREG_REG (target);
33455 }
33456 else
33457 return target;
33458 }
33459
33460
33461 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33462
33463 static rtx
33464 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33465 tree exp, rtx target)
33466 {
33467 rtx pat;
33468 tree arg0 = CALL_EXPR_ARG (exp, 0);
33469 tree arg1 = CALL_EXPR_ARG (exp, 1);
33470 tree arg2 = CALL_EXPR_ARG (exp, 2);
33471 rtx scratch0, scratch1;
33472 rtx op0 = expand_normal (arg0);
33473 rtx op1 = expand_normal (arg1);
33474 rtx op2 = expand_normal (arg2);
33475 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33476
33477 tmode0 = insn_data[d->icode].operand[0].mode;
33478 tmode1 = insn_data[d->icode].operand[1].mode;
33479 modev2 = insn_data[d->icode].operand[2].mode;
33480 modev3 = insn_data[d->icode].operand[3].mode;
33481 modeimm = insn_data[d->icode].operand[4].mode;
33482
33483 if (VECTOR_MODE_P (modev2))
33484 op0 = safe_vector_operand (op0, modev2);
33485 if (VECTOR_MODE_P (modev3))
33486 op1 = safe_vector_operand (op1, modev3);
33487
33488 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33489 op0 = copy_to_mode_reg (modev2, op0);
33490 if ((optimize && !register_operand (op1, modev3))
33491 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33492 op1 = copy_to_mode_reg (modev3, op1);
33493
33494 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33495 {
33496 error ("the third argument must be an 8-bit immediate");
33497 return const0_rtx;
33498 }
33499
33500 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33501 {
33502 if (optimize || !target
33503 || GET_MODE (target) != tmode0
33504 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33505 target = gen_reg_rtx (tmode0);
33506
33507 scratch1 = gen_reg_rtx (tmode1);
33508
33509 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33510 }
33511 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33512 {
33513 if (optimize || !target
33514 || GET_MODE (target) != tmode1
33515 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33516 target = gen_reg_rtx (tmode1);
33517
33518 scratch0 = gen_reg_rtx (tmode0);
33519
33520 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33521 }
33522 else
33523 {
33524 gcc_assert (d->flag);
33525
33526 scratch0 = gen_reg_rtx (tmode0);
33527 scratch1 = gen_reg_rtx (tmode1);
33528
33529 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33530 }
33531
33532 if (! pat)
33533 return 0;
33534
33535 emit_insn (pat);
33536
33537 if (d->flag)
33538 {
33539 target = gen_reg_rtx (SImode);
33540 emit_move_insn (target, const0_rtx);
33541 target = gen_rtx_SUBREG (QImode, target, 0);
33542
33543 emit_insn
33544 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33545 gen_rtx_fmt_ee (EQ, QImode,
33546 gen_rtx_REG ((enum machine_mode) d->flag,
33547 FLAGS_REG),
33548 const0_rtx)));
33549 return SUBREG_REG (target);
33550 }
33551 else
33552 return target;
33553 }
33554
33555 /* Subroutine of ix86_expand_builtin to take care of insns with
33556 variable number of operands. */
33557
33558 static rtx
33559 ix86_expand_args_builtin (const struct builtin_description *d,
33560 tree exp, rtx target)
33561 {
33562 rtx pat, real_target;
33563 unsigned int i, nargs;
33564 unsigned int nargs_constant = 0;
33565 unsigned int mask_pos = 0;
33566 int num_memory = 0;
33567 struct
33568 {
33569 rtx op;
33570 enum machine_mode mode;
33571 } args[6];
33572 bool last_arg_count = false;
33573 enum insn_code icode = d->icode;
33574 const struct insn_data_d *insn_p = &insn_data[icode];
33575 enum machine_mode tmode = insn_p->operand[0].mode;
33576 enum machine_mode rmode = VOIDmode;
33577 bool swap = false;
33578 enum rtx_code comparison = d->comparison;
33579
33580 switch ((enum ix86_builtin_func_type) d->flag)
33581 {
33582 case V2DF_FTYPE_V2DF_ROUND:
33583 case V4DF_FTYPE_V4DF_ROUND:
33584 case V4SF_FTYPE_V4SF_ROUND:
33585 case V8SF_FTYPE_V8SF_ROUND:
33586 case V4SI_FTYPE_V4SF_ROUND:
33587 case V8SI_FTYPE_V8SF_ROUND:
33588 return ix86_expand_sse_round (d, exp, target);
33589 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33590 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33591 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33592 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33593 case INT_FTYPE_V8SF_V8SF_PTEST:
33594 case INT_FTYPE_V4DI_V4DI_PTEST:
33595 case INT_FTYPE_V4DF_V4DF_PTEST:
33596 case INT_FTYPE_V4SF_V4SF_PTEST:
33597 case INT_FTYPE_V2DI_V2DI_PTEST:
33598 case INT_FTYPE_V2DF_V2DF_PTEST:
33599 return ix86_expand_sse_ptest (d, exp, target);
33600 case FLOAT128_FTYPE_FLOAT128:
33601 case FLOAT_FTYPE_FLOAT:
33602 case INT_FTYPE_INT:
33603 case UINT64_FTYPE_INT:
33604 case UINT16_FTYPE_UINT16:
33605 case INT64_FTYPE_INT64:
33606 case INT64_FTYPE_V4SF:
33607 case INT64_FTYPE_V2DF:
33608 case INT_FTYPE_V16QI:
33609 case INT_FTYPE_V8QI:
33610 case INT_FTYPE_V8SF:
33611 case INT_FTYPE_V4DF:
33612 case INT_FTYPE_V4SF:
33613 case INT_FTYPE_V2DF:
33614 case INT_FTYPE_V32QI:
33615 case V16QI_FTYPE_V16QI:
33616 case V8SI_FTYPE_V8SF:
33617 case V8SI_FTYPE_V4SI:
33618 case V8HI_FTYPE_V8HI:
33619 case V8HI_FTYPE_V16QI:
33620 case V8QI_FTYPE_V8QI:
33621 case V8SF_FTYPE_V8SF:
33622 case V8SF_FTYPE_V8SI:
33623 case V8SF_FTYPE_V4SF:
33624 case V8SF_FTYPE_V8HI:
33625 case V4SI_FTYPE_V4SI:
33626 case V4SI_FTYPE_V16QI:
33627 case V4SI_FTYPE_V4SF:
33628 case V4SI_FTYPE_V8SI:
33629 case V4SI_FTYPE_V8HI:
33630 case V4SI_FTYPE_V4DF:
33631 case V4SI_FTYPE_V2DF:
33632 case V4HI_FTYPE_V4HI:
33633 case V4DF_FTYPE_V4DF:
33634 case V4DF_FTYPE_V4SI:
33635 case V4DF_FTYPE_V4SF:
33636 case V4DF_FTYPE_V2DF:
33637 case V4SF_FTYPE_V4SF:
33638 case V4SF_FTYPE_V4SI:
33639 case V4SF_FTYPE_V8SF:
33640 case V4SF_FTYPE_V4DF:
33641 case V4SF_FTYPE_V8HI:
33642 case V4SF_FTYPE_V2DF:
33643 case V2DI_FTYPE_V2DI:
33644 case V2DI_FTYPE_V16QI:
33645 case V2DI_FTYPE_V8HI:
33646 case V2DI_FTYPE_V4SI:
33647 case V2DF_FTYPE_V2DF:
33648 case V2DF_FTYPE_V4SI:
33649 case V2DF_FTYPE_V4DF:
33650 case V2DF_FTYPE_V4SF:
33651 case V2DF_FTYPE_V2SI:
33652 case V2SI_FTYPE_V2SI:
33653 case V2SI_FTYPE_V4SF:
33654 case V2SI_FTYPE_V2SF:
33655 case V2SI_FTYPE_V2DF:
33656 case V2SF_FTYPE_V2SF:
33657 case V2SF_FTYPE_V2SI:
33658 case V32QI_FTYPE_V32QI:
33659 case V32QI_FTYPE_V16QI:
33660 case V16HI_FTYPE_V16HI:
33661 case V16HI_FTYPE_V8HI:
33662 case V8SI_FTYPE_V8SI:
33663 case V16HI_FTYPE_V16QI:
33664 case V8SI_FTYPE_V16QI:
33665 case V4DI_FTYPE_V16QI:
33666 case V8SI_FTYPE_V8HI:
33667 case V4DI_FTYPE_V8HI:
33668 case V4DI_FTYPE_V4SI:
33669 case V4DI_FTYPE_V2DI:
33670 case HI_FTYPE_HI:
33671 case UINT_FTYPE_V2DF:
33672 case UINT_FTYPE_V4SF:
33673 case UINT64_FTYPE_V2DF:
33674 case UINT64_FTYPE_V4SF:
33675 case V16QI_FTYPE_V8DI:
33676 case V16HI_FTYPE_V16SI:
33677 case V16SI_FTYPE_HI:
33678 case V16SI_FTYPE_V16SI:
33679 case V16SI_FTYPE_INT:
33680 case V16SF_FTYPE_FLOAT:
33681 case V16SF_FTYPE_V8SF:
33682 case V16SI_FTYPE_V8SI:
33683 case V16SF_FTYPE_V4SF:
33684 case V16SI_FTYPE_V4SI:
33685 case V16SF_FTYPE_V16SF:
33686 case V8HI_FTYPE_V8DI:
33687 case V8UHI_FTYPE_V8UHI:
33688 case V8SI_FTYPE_V8DI:
33689 case V8USI_FTYPE_V8USI:
33690 case V8SF_FTYPE_V8DF:
33691 case V8DI_FTYPE_QI:
33692 case V8DI_FTYPE_INT64:
33693 case V8DI_FTYPE_V4DI:
33694 case V8DI_FTYPE_V8DI:
33695 case V8DF_FTYPE_DOUBLE:
33696 case V8DF_FTYPE_V4DF:
33697 case V8DF_FTYPE_V2DF:
33698 case V8DF_FTYPE_V8DF:
33699 case V8DF_FTYPE_V8SI:
33700 nargs = 1;
33701 break;
33702 case V4SF_FTYPE_V4SF_VEC_MERGE:
33703 case V2DF_FTYPE_V2DF_VEC_MERGE:
33704 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33705 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33706 case V16QI_FTYPE_V16QI_V16QI:
33707 case V16QI_FTYPE_V8HI_V8HI:
33708 case V16SI_FTYPE_V16SI_V16SI:
33709 case V16SF_FTYPE_V16SF_V16SF:
33710 case V16SF_FTYPE_V16SF_V16SI:
33711 case V8QI_FTYPE_V8QI_V8QI:
33712 case V8QI_FTYPE_V4HI_V4HI:
33713 case V8HI_FTYPE_V8HI_V8HI:
33714 case V8HI_FTYPE_V16QI_V16QI:
33715 case V8HI_FTYPE_V4SI_V4SI:
33716 case V8SF_FTYPE_V8SF_V8SF:
33717 case V8SF_FTYPE_V8SF_V8SI:
33718 case V8DI_FTYPE_V8DI_V8DI:
33719 case V8DF_FTYPE_V8DF_V8DF:
33720 case V8DF_FTYPE_V8DF_V8DI:
33721 case V4SI_FTYPE_V4SI_V4SI:
33722 case V4SI_FTYPE_V8HI_V8HI:
33723 case V4SI_FTYPE_V4SF_V4SF:
33724 case V4SI_FTYPE_V2DF_V2DF:
33725 case V4HI_FTYPE_V4HI_V4HI:
33726 case V4HI_FTYPE_V8QI_V8QI:
33727 case V4HI_FTYPE_V2SI_V2SI:
33728 case V4DF_FTYPE_V4DF_V4DF:
33729 case V4DF_FTYPE_V4DF_V4DI:
33730 case V4SF_FTYPE_V4SF_V4SF:
33731 case V4SF_FTYPE_V4SF_V4SI:
33732 case V4SF_FTYPE_V4SF_V2SI:
33733 case V4SF_FTYPE_V4SF_V2DF:
33734 case V4SF_FTYPE_V4SF_UINT:
33735 case V4SF_FTYPE_V4SF_UINT64:
33736 case V4SF_FTYPE_V4SF_DI:
33737 case V4SF_FTYPE_V4SF_SI:
33738 case V2DI_FTYPE_V2DI_V2DI:
33739 case V2DI_FTYPE_V16QI_V16QI:
33740 case V2DI_FTYPE_V4SI_V4SI:
33741 case V2UDI_FTYPE_V4USI_V4USI:
33742 case V2DI_FTYPE_V2DI_V16QI:
33743 case V2DI_FTYPE_V2DF_V2DF:
33744 case V2SI_FTYPE_V2SI_V2SI:
33745 case V2SI_FTYPE_V4HI_V4HI:
33746 case V2SI_FTYPE_V2SF_V2SF:
33747 case V2DF_FTYPE_V2DF_V2DF:
33748 case V2DF_FTYPE_V2DF_V4SF:
33749 case V2DF_FTYPE_V2DF_V2DI:
33750 case V2DF_FTYPE_V2DF_DI:
33751 case V2DF_FTYPE_V2DF_SI:
33752 case V2DF_FTYPE_V2DF_UINT:
33753 case V2DF_FTYPE_V2DF_UINT64:
33754 case V2SF_FTYPE_V2SF_V2SF:
33755 case V1DI_FTYPE_V1DI_V1DI:
33756 case V1DI_FTYPE_V8QI_V8QI:
33757 case V1DI_FTYPE_V2SI_V2SI:
33758 case V32QI_FTYPE_V16HI_V16HI:
33759 case V16HI_FTYPE_V8SI_V8SI:
33760 case V32QI_FTYPE_V32QI_V32QI:
33761 case V16HI_FTYPE_V32QI_V32QI:
33762 case V16HI_FTYPE_V16HI_V16HI:
33763 case V8SI_FTYPE_V4DF_V4DF:
33764 case V8SI_FTYPE_V8SI_V8SI:
33765 case V8SI_FTYPE_V16HI_V16HI:
33766 case V4DI_FTYPE_V4DI_V4DI:
33767 case V4DI_FTYPE_V8SI_V8SI:
33768 case V4UDI_FTYPE_V8USI_V8USI:
33769 case QI_FTYPE_V8DI_V8DI:
33770 case HI_FTYPE_V16SI_V16SI:
33771 if (comparison == UNKNOWN)
33772 return ix86_expand_binop_builtin (icode, exp, target);
33773 nargs = 2;
33774 break;
33775 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33776 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33777 gcc_assert (comparison != UNKNOWN);
33778 nargs = 2;
33779 swap = true;
33780 break;
33781 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33782 case V16HI_FTYPE_V16HI_SI_COUNT:
33783 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33784 case V8SI_FTYPE_V8SI_SI_COUNT:
33785 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33786 case V4DI_FTYPE_V4DI_INT_COUNT:
33787 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33788 case V8HI_FTYPE_V8HI_SI_COUNT:
33789 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33790 case V4SI_FTYPE_V4SI_SI_COUNT:
33791 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33792 case V4HI_FTYPE_V4HI_SI_COUNT:
33793 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33794 case V2DI_FTYPE_V2DI_SI_COUNT:
33795 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33796 case V2SI_FTYPE_V2SI_SI_COUNT:
33797 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33798 case V1DI_FTYPE_V1DI_SI_COUNT:
33799 nargs = 2;
33800 last_arg_count = true;
33801 break;
33802 case UINT64_FTYPE_UINT64_UINT64:
33803 case UINT_FTYPE_UINT_UINT:
33804 case UINT_FTYPE_UINT_USHORT:
33805 case UINT_FTYPE_UINT_UCHAR:
33806 case UINT16_FTYPE_UINT16_INT:
33807 case UINT8_FTYPE_UINT8_INT:
33808 case HI_FTYPE_HI_HI:
33809 case V16SI_FTYPE_V8DF_V8DF:
33810 nargs = 2;
33811 break;
33812 case V2DI_FTYPE_V2DI_INT_CONVERT:
33813 nargs = 2;
33814 rmode = V1TImode;
33815 nargs_constant = 1;
33816 break;
33817 case V4DI_FTYPE_V4DI_INT_CONVERT:
33818 nargs = 2;
33819 rmode = V2TImode;
33820 nargs_constant = 1;
33821 break;
33822 case V8HI_FTYPE_V8HI_INT:
33823 case V8HI_FTYPE_V8SF_INT:
33824 case V16HI_FTYPE_V16SF_INT:
33825 case V8HI_FTYPE_V4SF_INT:
33826 case V8SF_FTYPE_V8SF_INT:
33827 case V4SF_FTYPE_V16SF_INT:
33828 case V16SF_FTYPE_V16SF_INT:
33829 case V4SI_FTYPE_V4SI_INT:
33830 case V4SI_FTYPE_V8SI_INT:
33831 case V4HI_FTYPE_V4HI_INT:
33832 case V4DF_FTYPE_V4DF_INT:
33833 case V4DF_FTYPE_V8DF_INT:
33834 case V4SF_FTYPE_V4SF_INT:
33835 case V4SF_FTYPE_V8SF_INT:
33836 case V2DI_FTYPE_V2DI_INT:
33837 case V2DF_FTYPE_V2DF_INT:
33838 case V2DF_FTYPE_V4DF_INT:
33839 case V16HI_FTYPE_V16HI_INT:
33840 case V8SI_FTYPE_V8SI_INT:
33841 case V16SI_FTYPE_V16SI_INT:
33842 case V4SI_FTYPE_V16SI_INT:
33843 case V4DI_FTYPE_V4DI_INT:
33844 case V2DI_FTYPE_V4DI_INT:
33845 case V4DI_FTYPE_V8DI_INT:
33846 case HI_FTYPE_HI_INT:
33847 nargs = 2;
33848 nargs_constant = 1;
33849 break;
33850 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33851 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33852 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33853 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33854 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33855 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33856 case HI_FTYPE_V16SI_V16SI_HI:
33857 case QI_FTYPE_V8DI_V8DI_QI:
33858 case V16HI_FTYPE_V16SI_V16HI_HI:
33859 case V16QI_FTYPE_V16SI_V16QI_HI:
33860 case V16QI_FTYPE_V8DI_V16QI_QI:
33861 case V16SF_FTYPE_V16SF_V16SF_HI:
33862 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33863 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33864 case V16SF_FTYPE_V16SI_V16SF_HI:
33865 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33866 case V16SF_FTYPE_V4SF_V16SF_HI:
33867 case V16SI_FTYPE_SI_V16SI_HI:
33868 case V16SI_FTYPE_V16HI_V16SI_HI:
33869 case V16SI_FTYPE_V16QI_V16SI_HI:
33870 case V16SI_FTYPE_V16SF_V16SI_HI:
33871 case V16SI_FTYPE_V16SI_V16SI_HI:
33872 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33873 case V16SI_FTYPE_V4SI_V16SI_HI:
33874 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33875 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33876 case V8DF_FTYPE_V2DF_V8DF_QI:
33877 case V8DF_FTYPE_V4DF_V8DF_QI:
33878 case V8DF_FTYPE_V8DF_V8DF_QI:
33879 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33880 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33881 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33882 case V8DF_FTYPE_V8SF_V8DF_QI:
33883 case V8DF_FTYPE_V8SI_V8DF_QI:
33884 case V8DI_FTYPE_DI_V8DI_QI:
33885 case V8DI_FTYPE_V16QI_V8DI_QI:
33886 case V8DI_FTYPE_V2DI_V8DI_QI:
33887 case V8DI_FTYPE_V4DI_V8DI_QI:
33888 case V8DI_FTYPE_V8DI_V8DI_QI:
33889 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33890 case V8DI_FTYPE_V8HI_V8DI_QI:
33891 case V8DI_FTYPE_V8SI_V8DI_QI:
33892 case V8HI_FTYPE_V8DI_V8HI_QI:
33893 case V8SF_FTYPE_V8DF_V8SF_QI:
33894 case V8SI_FTYPE_V8DF_V8SI_QI:
33895 case V8SI_FTYPE_V8DI_V8SI_QI:
33896 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33897 nargs = 3;
33898 break;
33899 case V32QI_FTYPE_V32QI_V32QI_INT:
33900 case V16HI_FTYPE_V16HI_V16HI_INT:
33901 case V16QI_FTYPE_V16QI_V16QI_INT:
33902 case V4DI_FTYPE_V4DI_V4DI_INT:
33903 case V8HI_FTYPE_V8HI_V8HI_INT:
33904 case V8SI_FTYPE_V8SI_V8SI_INT:
33905 case V8SI_FTYPE_V8SI_V4SI_INT:
33906 case V8SF_FTYPE_V8SF_V8SF_INT:
33907 case V8SF_FTYPE_V8SF_V4SF_INT:
33908 case V4SI_FTYPE_V4SI_V4SI_INT:
33909 case V4DF_FTYPE_V4DF_V4DF_INT:
33910 case V16SF_FTYPE_V16SF_V16SF_INT:
33911 case V16SF_FTYPE_V16SF_V4SF_INT:
33912 case V16SI_FTYPE_V16SI_V4SI_INT:
33913 case V4DF_FTYPE_V4DF_V2DF_INT:
33914 case V4SF_FTYPE_V4SF_V4SF_INT:
33915 case V2DI_FTYPE_V2DI_V2DI_INT:
33916 case V4DI_FTYPE_V4DI_V2DI_INT:
33917 case V2DF_FTYPE_V2DF_V2DF_INT:
33918 case QI_FTYPE_V8DI_V8DI_INT:
33919 case QI_FTYPE_V8DF_V8DF_INT:
33920 case QI_FTYPE_V2DF_V2DF_INT:
33921 case QI_FTYPE_V4SF_V4SF_INT:
33922 case HI_FTYPE_V16SI_V16SI_INT:
33923 case HI_FTYPE_V16SF_V16SF_INT:
33924 nargs = 3;
33925 nargs_constant = 1;
33926 break;
33927 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33928 nargs = 3;
33929 rmode = V4DImode;
33930 nargs_constant = 1;
33931 break;
33932 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33933 nargs = 3;
33934 rmode = V2DImode;
33935 nargs_constant = 1;
33936 break;
33937 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33938 nargs = 3;
33939 rmode = DImode;
33940 nargs_constant = 1;
33941 break;
33942 case V2DI_FTYPE_V2DI_UINT_UINT:
33943 nargs = 3;
33944 nargs_constant = 2;
33945 break;
33946 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33947 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33948 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33949 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33950 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33951 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33952 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33953 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33954 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33955 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33956 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33957 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33958 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33959 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33960 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33961 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33962 nargs = 4;
33963 break;
33964 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33965 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33966 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33967 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33968 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33969 nargs = 4;
33970 nargs_constant = 1;
33971 break;
33972 case QI_FTYPE_V2DF_V2DF_INT_QI:
33973 case QI_FTYPE_V4SF_V4SF_INT_QI:
33974 nargs = 4;
33975 mask_pos = 1;
33976 nargs_constant = 1;
33977 break;
33978 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33979 nargs = 4;
33980 nargs_constant = 2;
33981 break;
33982 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33983 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33984 nargs = 4;
33985 break;
33986 case QI_FTYPE_V8DI_V8DI_INT_QI:
33987 case HI_FTYPE_V16SI_V16SI_INT_HI:
33988 case QI_FTYPE_V8DF_V8DF_INT_QI:
33989 case HI_FTYPE_V16SF_V16SF_INT_HI:
33990 mask_pos = 1;
33991 nargs = 4;
33992 nargs_constant = 1;
33993 break;
33994 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33995 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33996 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33997 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33998 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33999 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34000 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34001 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34002 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34003 nargs = 4;
34004 mask_pos = 2;
34005 nargs_constant = 1;
34006 break;
34007 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34008 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34009 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34010 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34011 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34012 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34013 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34014 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34015 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34016 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34017 nargs = 5;
34018 mask_pos = 2;
34019 nargs_constant = 1;
34020 break;
34021 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34022 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34023 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34024 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34025 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34026 nargs = 5;
34027 mask_pos = 1;
34028 nargs_constant = 1;
34029 break;
34030
34031 default:
34032 gcc_unreachable ();
34033 }
34034
34035 gcc_assert (nargs <= ARRAY_SIZE (args));
34036
34037 if (comparison != UNKNOWN)
34038 {
34039 gcc_assert (nargs == 2);
34040 return ix86_expand_sse_compare (d, exp, target, swap);
34041 }
34042
34043 if (rmode == VOIDmode || rmode == tmode)
34044 {
34045 if (optimize
34046 || target == 0
34047 || GET_MODE (target) != tmode
34048 || !insn_p->operand[0].predicate (target, tmode))
34049 target = gen_reg_rtx (tmode);
34050 real_target = target;
34051 }
34052 else
34053 {
34054 real_target = gen_reg_rtx (tmode);
34055 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34056 }
34057
34058 for (i = 0; i < nargs; i++)
34059 {
34060 tree arg = CALL_EXPR_ARG (exp, i);
34061 rtx op = expand_normal (arg);
34062 enum machine_mode mode = insn_p->operand[i + 1].mode;
34063 bool match = insn_p->operand[i + 1].predicate (op, mode);
34064
34065 if (last_arg_count && (i + 1) == nargs)
34066 {
34067 /* SIMD shift insns take either an 8-bit immediate or
34068 register as count. But builtin functions take int as
34069 count. If count doesn't match, we put it in register. */
34070 if (!match)
34071 {
34072 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34073 if (!insn_p->operand[i + 1].predicate (op, mode))
34074 op = copy_to_reg (op);
34075 }
34076 }
34077 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34078 (!mask_pos && (nargs - i) <= nargs_constant))
34079 {
34080 if (!match)
34081 switch (icode)
34082 {
34083 case CODE_FOR_avx2_inserti128:
34084 case CODE_FOR_avx2_extracti128:
34085 error ("the last argument must be an 1-bit immediate");
34086 return const0_rtx;
34087
34088 case CODE_FOR_avx512f_cmpv8di3_mask:
34089 case CODE_FOR_avx512f_cmpv16si3_mask:
34090 case CODE_FOR_avx512f_ucmpv8di3_mask:
34091 case CODE_FOR_avx512f_ucmpv16si3_mask:
34092 error ("the last argument must be a 3-bit immediate");
34093 return const0_rtx;
34094
34095 case CODE_FOR_sse4_1_roundsd:
34096 case CODE_FOR_sse4_1_roundss:
34097
34098 case CODE_FOR_sse4_1_roundpd:
34099 case CODE_FOR_sse4_1_roundps:
34100 case CODE_FOR_avx_roundpd256:
34101 case CODE_FOR_avx_roundps256:
34102
34103 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34104 case CODE_FOR_sse4_1_roundps_sfix:
34105 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34106 case CODE_FOR_avx_roundps_sfix256:
34107
34108 case CODE_FOR_sse4_1_blendps:
34109 case CODE_FOR_avx_blendpd256:
34110 case CODE_FOR_avx_vpermilv4df:
34111 case CODE_FOR_avx512f_getmantv8df_mask:
34112 case CODE_FOR_avx512f_getmantv16sf_mask:
34113 error ("the last argument must be a 4-bit immediate");
34114 return const0_rtx;
34115
34116 case CODE_FOR_sha1rnds4:
34117 case CODE_FOR_sse4_1_blendpd:
34118 case CODE_FOR_avx_vpermilv2df:
34119 case CODE_FOR_xop_vpermil2v2df3:
34120 case CODE_FOR_xop_vpermil2v4sf3:
34121 case CODE_FOR_xop_vpermil2v4df3:
34122 case CODE_FOR_xop_vpermil2v8sf3:
34123 case CODE_FOR_avx512f_vinsertf32x4_mask:
34124 case CODE_FOR_avx512f_vinserti32x4_mask:
34125 case CODE_FOR_avx512f_vextractf32x4_mask:
34126 case CODE_FOR_avx512f_vextracti32x4_mask:
34127 error ("the last argument must be a 2-bit immediate");
34128 return const0_rtx;
34129
34130 case CODE_FOR_avx_vextractf128v4df:
34131 case CODE_FOR_avx_vextractf128v8sf:
34132 case CODE_FOR_avx_vextractf128v8si:
34133 case CODE_FOR_avx_vinsertf128v4df:
34134 case CODE_FOR_avx_vinsertf128v8sf:
34135 case CODE_FOR_avx_vinsertf128v8si:
34136 case CODE_FOR_avx512f_vinsertf64x4_mask:
34137 case CODE_FOR_avx512f_vinserti64x4_mask:
34138 case CODE_FOR_avx512f_vextractf64x4_mask:
34139 case CODE_FOR_avx512f_vextracti64x4_mask:
34140 error ("the last argument must be a 1-bit immediate");
34141 return const0_rtx;
34142
34143 case CODE_FOR_avx_vmcmpv2df3:
34144 case CODE_FOR_avx_vmcmpv4sf3:
34145 case CODE_FOR_avx_cmpv2df3:
34146 case CODE_FOR_avx_cmpv4sf3:
34147 case CODE_FOR_avx_cmpv4df3:
34148 case CODE_FOR_avx_cmpv8sf3:
34149 case CODE_FOR_avx512f_cmpv8df3_mask:
34150 case CODE_FOR_avx512f_cmpv16sf3_mask:
34151 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34152 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34153 error ("the last argument must be a 5-bit immediate");
34154 return const0_rtx;
34155
34156 default:
34157 switch (nargs_constant)
34158 {
34159 case 2:
34160 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34161 (!mask_pos && (nargs - i) == nargs_constant))
34162 {
34163 error ("the next to last argument must be an 8-bit immediate");
34164 break;
34165 }
34166 case 1:
34167 error ("the last argument must be an 8-bit immediate");
34168 break;
34169 default:
34170 gcc_unreachable ();
34171 }
34172 return const0_rtx;
34173 }
34174 }
34175 else
34176 {
34177 if (VECTOR_MODE_P (mode))
34178 op = safe_vector_operand (op, mode);
34179
34180 /* If we aren't optimizing, only allow one memory operand to
34181 be generated. */
34182 if (memory_operand (op, mode))
34183 num_memory++;
34184
34185 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34186 {
34187 if (optimize || !match || num_memory > 1)
34188 op = copy_to_mode_reg (mode, op);
34189 }
34190 else
34191 {
34192 op = copy_to_reg (op);
34193 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34194 }
34195 }
34196
34197 args[i].op = op;
34198 args[i].mode = mode;
34199 }
34200
34201 switch (nargs)
34202 {
34203 case 1:
34204 pat = GEN_FCN (icode) (real_target, args[0].op);
34205 break;
34206 case 2:
34207 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34208 break;
34209 case 3:
34210 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34211 args[2].op);
34212 break;
34213 case 4:
34214 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34215 args[2].op, args[3].op);
34216 break;
34217 case 5:
34218 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34219 args[2].op, args[3].op, args[4].op);
34220 case 6:
34221 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34222 args[2].op, args[3].op, args[4].op,
34223 args[5].op);
34224 break;
34225 default:
34226 gcc_unreachable ();
34227 }
34228
34229 if (! pat)
34230 return 0;
34231
34232 emit_insn (pat);
34233 return target;
34234 }
34235
34236 /* Transform pattern of following layout:
34237 (parallel [
34238 set (A B)
34239 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34240 ])
34241 into:
34242 (set (A B))
34243
34244 Or:
34245 (parallel [ A B
34246 ...
34247 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34248 ...
34249 ])
34250 into:
34251 (parallel [ A B ... ]) */
34252
34253 static rtx
34254 ix86_erase_embedded_rounding (rtx pat)
34255 {
34256 if (GET_CODE (pat) == INSN)
34257 pat = PATTERN (pat);
34258
34259 gcc_assert (GET_CODE (pat) == PARALLEL);
34260
34261 if (XVECLEN (pat, 0) == 2)
34262 {
34263 rtx p0 = XVECEXP (pat, 0, 0);
34264 rtx p1 = XVECEXP (pat, 0, 1);
34265
34266 gcc_assert (GET_CODE (p0) == SET
34267 && GET_CODE (p1) == UNSPEC
34268 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34269
34270 return p0;
34271 }
34272 else
34273 {
34274 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34275 int i = 0;
34276 int j = 0;
34277
34278 for (; i < XVECLEN (pat, 0); ++i)
34279 {
34280 rtx elem = XVECEXP (pat, 0, i);
34281 if (GET_CODE (elem) != UNSPEC
34282 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34283 res [j++] = elem;
34284 }
34285
34286 /* No more than 1 occurence was removed. */
34287 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34288
34289 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34290 }
34291 }
34292
34293 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34294 with rounding. */
34295 static rtx
34296 ix86_expand_sse_comi_round (const struct builtin_description *d,
34297 tree exp, rtx target)
34298 {
34299 rtx pat, set_dst;
34300 tree arg0 = CALL_EXPR_ARG (exp, 0);
34301 tree arg1 = CALL_EXPR_ARG (exp, 1);
34302 tree arg2 = CALL_EXPR_ARG (exp, 2);
34303 tree arg3 = CALL_EXPR_ARG (exp, 3);
34304 rtx op0 = expand_normal (arg0);
34305 rtx op1 = expand_normal (arg1);
34306 rtx op2 = expand_normal (arg2);
34307 rtx op3 = expand_normal (arg3);
34308 enum insn_code icode = d->icode;
34309 const struct insn_data_d *insn_p = &insn_data[icode];
34310 enum machine_mode mode0 = insn_p->operand[0].mode;
34311 enum machine_mode mode1 = insn_p->operand[1].mode;
34312 enum rtx_code comparison = UNEQ;
34313 bool need_ucomi = false;
34314
34315 /* See avxintrin.h for values. */
34316 enum rtx_code comi_comparisons[32] =
34317 {
34318 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34319 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34320 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34321 };
34322 bool need_ucomi_values[32] =
34323 {
34324 true, false, false, true, true, false, false, true,
34325 true, false, false, true, true, false, false, true,
34326 false, true, true, false, false, true, true, false,
34327 false, true, true, false, false, true, true, false
34328 };
34329
34330 if (!CONST_INT_P (op2))
34331 {
34332 error ("the third argument must be comparison constant");
34333 return const0_rtx;
34334 }
34335 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34336 {
34337 error ("incorect comparison mode");
34338 return const0_rtx;
34339 }
34340
34341 if (!insn_p->operand[2].predicate (op3, SImode))
34342 {
34343 error ("incorrect rounding operand");
34344 return const0_rtx;
34345 }
34346
34347 comparison = comi_comparisons[INTVAL (op2)];
34348 need_ucomi = need_ucomi_values[INTVAL (op2)];
34349
34350 if (VECTOR_MODE_P (mode0))
34351 op0 = safe_vector_operand (op0, mode0);
34352 if (VECTOR_MODE_P (mode1))
34353 op1 = safe_vector_operand (op1, mode1);
34354
34355 target = gen_reg_rtx (SImode);
34356 emit_move_insn (target, const0_rtx);
34357 target = gen_rtx_SUBREG (QImode, target, 0);
34358
34359 if ((optimize && !register_operand (op0, mode0))
34360 || !insn_p->operand[0].predicate (op0, mode0))
34361 op0 = copy_to_mode_reg (mode0, op0);
34362 if ((optimize && !register_operand (op1, mode1))
34363 || !insn_p->operand[1].predicate (op1, mode1))
34364 op1 = copy_to_mode_reg (mode1, op1);
34365
34366 if (need_ucomi)
34367 icode = icode == CODE_FOR_sse_comi_round
34368 ? CODE_FOR_sse_ucomi_round
34369 : CODE_FOR_sse2_ucomi_round;
34370
34371 pat = GEN_FCN (icode) (op0, op1, op3);
34372 if (! pat)
34373 return 0;
34374
34375 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34376 if (INTVAL (op3) == NO_ROUND)
34377 {
34378 pat = ix86_erase_embedded_rounding (pat);
34379 if (! pat)
34380 return 0;
34381
34382 set_dst = SET_DEST (pat);
34383 }
34384 else
34385 {
34386 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34387 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34388 }
34389
34390 emit_insn (pat);
34391 emit_insn (gen_rtx_SET (VOIDmode,
34392 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34393 gen_rtx_fmt_ee (comparison, QImode,
34394 set_dst,
34395 const0_rtx)));
34396
34397 return SUBREG_REG (target);
34398 }
34399
34400 static rtx
34401 ix86_expand_round_builtin (const struct builtin_description *d,
34402 tree exp, rtx target)
34403 {
34404 rtx pat;
34405 unsigned int i, nargs;
34406 struct
34407 {
34408 rtx op;
34409 enum machine_mode mode;
34410 } args[6];
34411 enum insn_code icode = d->icode;
34412 const struct insn_data_d *insn_p = &insn_data[icode];
34413 enum machine_mode tmode = insn_p->operand[0].mode;
34414 unsigned int nargs_constant = 0;
34415 unsigned int redundant_embed_rnd = 0;
34416
34417 switch ((enum ix86_builtin_func_type) d->flag)
34418 {
34419 case UINT64_FTYPE_V2DF_INT:
34420 case UINT64_FTYPE_V4SF_INT:
34421 case UINT_FTYPE_V2DF_INT:
34422 case UINT_FTYPE_V4SF_INT:
34423 case INT64_FTYPE_V2DF_INT:
34424 case INT64_FTYPE_V4SF_INT:
34425 case INT_FTYPE_V2DF_INT:
34426 case INT_FTYPE_V4SF_INT:
34427 nargs = 2;
34428 break;
34429 case V4SF_FTYPE_V4SF_UINT_INT:
34430 case V4SF_FTYPE_V4SF_UINT64_INT:
34431 case V2DF_FTYPE_V2DF_UINT64_INT:
34432 case V4SF_FTYPE_V4SF_INT_INT:
34433 case V4SF_FTYPE_V4SF_INT64_INT:
34434 case V2DF_FTYPE_V2DF_INT64_INT:
34435 case V4SF_FTYPE_V4SF_V4SF_INT:
34436 case V2DF_FTYPE_V2DF_V2DF_INT:
34437 case V4SF_FTYPE_V4SF_V2DF_INT:
34438 case V2DF_FTYPE_V2DF_V4SF_INT:
34439 nargs = 3;
34440 break;
34441 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34442 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34443 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34444 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34445 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34446 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34447 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34448 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34449 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34450 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34451 nargs = 4;
34452 break;
34453 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34454 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34455 nargs_constant = 2;
34456 nargs = 4;
34457 break;
34458 case INT_FTYPE_V4SF_V4SF_INT_INT:
34459 case INT_FTYPE_V2DF_V2DF_INT_INT:
34460 return ix86_expand_sse_comi_round (d, exp, target);
34461 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34462 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34463 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34464 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34465 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34466 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34467 nargs = 5;
34468 break;
34469 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34470 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34471 nargs_constant = 4;
34472 nargs = 5;
34473 break;
34474 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34475 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34476 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34477 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34478 nargs_constant = 3;
34479 nargs = 5;
34480 break;
34481 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34482 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34483 nargs = 6;
34484 nargs_constant = 4;
34485 break;
34486 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34487 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34488 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34489 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34490 nargs = 6;
34491 nargs_constant = 3;
34492 break;
34493 default:
34494 gcc_unreachable ();
34495 }
34496 gcc_assert (nargs <= ARRAY_SIZE (args));
34497
34498 if (optimize
34499 || target == 0
34500 || GET_MODE (target) != tmode
34501 || !insn_p->operand[0].predicate (target, tmode))
34502 target = gen_reg_rtx (tmode);
34503
34504 for (i = 0; i < nargs; i++)
34505 {
34506 tree arg = CALL_EXPR_ARG (exp, i);
34507 rtx op = expand_normal (arg);
34508 enum machine_mode mode = insn_p->operand[i + 1].mode;
34509 bool match = insn_p->operand[i + 1].predicate (op, mode);
34510
34511 if (i == nargs - nargs_constant)
34512 {
34513 if (!match)
34514 {
34515 switch (icode)
34516 {
34517 case CODE_FOR_avx512f_getmantv8df_mask_round:
34518 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34519 case CODE_FOR_avx512f_getmantv2df_round:
34520 case CODE_FOR_avx512f_getmantv4sf_round:
34521 error ("the immediate argument must be a 4-bit immediate");
34522 return const0_rtx;
34523 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34524 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34525 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34526 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34527 error ("the immediate argument must be a 5-bit immediate");
34528 return const0_rtx;
34529 default:
34530 error ("the immediate argument must be an 8-bit immediate");
34531 return const0_rtx;
34532 }
34533 }
34534 }
34535 else if (i == nargs-1)
34536 {
34537 if (!insn_p->operand[nargs].predicate (op, SImode))
34538 {
34539 error ("incorrect rounding operand");
34540 return const0_rtx;
34541 }
34542
34543 /* If there is no rounding use normal version of the pattern. */
34544 if (INTVAL (op) == NO_ROUND)
34545 redundant_embed_rnd = 1;
34546 }
34547 else
34548 {
34549 if (VECTOR_MODE_P (mode))
34550 op = safe_vector_operand (op, mode);
34551
34552 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34553 {
34554 if (optimize || !match)
34555 op = copy_to_mode_reg (mode, op);
34556 }
34557 else
34558 {
34559 op = copy_to_reg (op);
34560 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34561 }
34562 }
34563
34564 args[i].op = op;
34565 args[i].mode = mode;
34566 }
34567
34568 switch (nargs)
34569 {
34570 case 1:
34571 pat = GEN_FCN (icode) (target, args[0].op);
34572 break;
34573 case 2:
34574 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34575 break;
34576 case 3:
34577 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34578 args[2].op);
34579 break;
34580 case 4:
34581 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34582 args[2].op, args[3].op);
34583 break;
34584 case 5:
34585 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34586 args[2].op, args[3].op, args[4].op);
34587 case 6:
34588 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34589 args[2].op, args[3].op, args[4].op,
34590 args[5].op);
34591 break;
34592 default:
34593 gcc_unreachable ();
34594 }
34595
34596 if (!pat)
34597 return 0;
34598
34599 if (redundant_embed_rnd)
34600 pat = ix86_erase_embedded_rounding (pat);
34601
34602 emit_insn (pat);
34603 return target;
34604 }
34605
34606 /* Subroutine of ix86_expand_builtin to take care of special insns
34607 with variable number of operands. */
34608
34609 static rtx
34610 ix86_expand_special_args_builtin (const struct builtin_description *d,
34611 tree exp, rtx target)
34612 {
34613 tree arg;
34614 rtx pat, op;
34615 unsigned int i, nargs, arg_adjust, memory;
34616 bool aligned_mem = false;
34617 struct
34618 {
34619 rtx op;
34620 enum machine_mode mode;
34621 } args[3];
34622 enum insn_code icode = d->icode;
34623 bool last_arg_constant = false;
34624 const struct insn_data_d *insn_p = &insn_data[icode];
34625 enum machine_mode tmode = insn_p->operand[0].mode;
34626 enum { load, store } klass;
34627
34628 switch ((enum ix86_builtin_func_type) d->flag)
34629 {
34630 case VOID_FTYPE_VOID:
34631 emit_insn (GEN_FCN (icode) (target));
34632 return 0;
34633 case VOID_FTYPE_UINT64:
34634 case VOID_FTYPE_UNSIGNED:
34635 nargs = 0;
34636 klass = store;
34637 memory = 0;
34638 break;
34639
34640 case INT_FTYPE_VOID:
34641 case USHORT_FTYPE_VOID:
34642 case UINT64_FTYPE_VOID:
34643 case UNSIGNED_FTYPE_VOID:
34644 nargs = 0;
34645 klass = load;
34646 memory = 0;
34647 break;
34648 case UINT64_FTYPE_PUNSIGNED:
34649 case V2DI_FTYPE_PV2DI:
34650 case V4DI_FTYPE_PV4DI:
34651 case V32QI_FTYPE_PCCHAR:
34652 case V16QI_FTYPE_PCCHAR:
34653 case V8SF_FTYPE_PCV4SF:
34654 case V8SF_FTYPE_PCFLOAT:
34655 case V4SF_FTYPE_PCFLOAT:
34656 case V4DF_FTYPE_PCV2DF:
34657 case V4DF_FTYPE_PCDOUBLE:
34658 case V2DF_FTYPE_PCDOUBLE:
34659 case VOID_FTYPE_PVOID:
34660 case V16SI_FTYPE_PV4SI:
34661 case V16SF_FTYPE_PV4SF:
34662 case V8DI_FTYPE_PV4DI:
34663 case V8DI_FTYPE_PV8DI:
34664 case V8DF_FTYPE_PV4DF:
34665 nargs = 1;
34666 klass = load;
34667 memory = 0;
34668 switch (icode)
34669 {
34670 case CODE_FOR_sse4_1_movntdqa:
34671 case CODE_FOR_avx2_movntdqa:
34672 case CODE_FOR_avx512f_movntdqa:
34673 aligned_mem = true;
34674 break;
34675 default:
34676 break;
34677 }
34678 break;
34679 case VOID_FTYPE_PV2SF_V4SF:
34680 case VOID_FTYPE_PV8DI_V8DI:
34681 case VOID_FTYPE_PV4DI_V4DI:
34682 case VOID_FTYPE_PV2DI_V2DI:
34683 case VOID_FTYPE_PCHAR_V32QI:
34684 case VOID_FTYPE_PCHAR_V16QI:
34685 case VOID_FTYPE_PFLOAT_V16SF:
34686 case VOID_FTYPE_PFLOAT_V8SF:
34687 case VOID_FTYPE_PFLOAT_V4SF:
34688 case VOID_FTYPE_PDOUBLE_V8DF:
34689 case VOID_FTYPE_PDOUBLE_V4DF:
34690 case VOID_FTYPE_PDOUBLE_V2DF:
34691 case VOID_FTYPE_PLONGLONG_LONGLONG:
34692 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34693 case VOID_FTYPE_PINT_INT:
34694 nargs = 1;
34695 klass = store;
34696 /* Reserve memory operand for target. */
34697 memory = ARRAY_SIZE (args);
34698 switch (icode)
34699 {
34700 /* These builtins and instructions require the memory
34701 to be properly aligned. */
34702 case CODE_FOR_avx_movntv4di:
34703 case CODE_FOR_sse2_movntv2di:
34704 case CODE_FOR_avx_movntv8sf:
34705 case CODE_FOR_sse_movntv4sf:
34706 case CODE_FOR_sse4a_vmmovntv4sf:
34707 case CODE_FOR_avx_movntv4df:
34708 case CODE_FOR_sse2_movntv2df:
34709 case CODE_FOR_sse4a_vmmovntv2df:
34710 case CODE_FOR_sse2_movntidi:
34711 case CODE_FOR_sse_movntq:
34712 case CODE_FOR_sse2_movntisi:
34713 case CODE_FOR_avx512f_movntv16sf:
34714 case CODE_FOR_avx512f_movntv8df:
34715 case CODE_FOR_avx512f_movntv8di:
34716 aligned_mem = true;
34717 break;
34718 default:
34719 break;
34720 }
34721 break;
34722 case V4SF_FTYPE_V4SF_PCV2SF:
34723 case V2DF_FTYPE_V2DF_PCDOUBLE:
34724 nargs = 2;
34725 klass = load;
34726 memory = 1;
34727 break;
34728 case V8SF_FTYPE_PCV8SF_V8SI:
34729 case V4DF_FTYPE_PCV4DF_V4DI:
34730 case V4SF_FTYPE_PCV4SF_V4SI:
34731 case V2DF_FTYPE_PCV2DF_V2DI:
34732 case V8SI_FTYPE_PCV8SI_V8SI:
34733 case V4DI_FTYPE_PCV4DI_V4DI:
34734 case V4SI_FTYPE_PCV4SI_V4SI:
34735 case V2DI_FTYPE_PCV2DI_V2DI:
34736 nargs = 2;
34737 klass = load;
34738 memory = 0;
34739 break;
34740 case VOID_FTYPE_PV8DF_V8DF_QI:
34741 case VOID_FTYPE_PV16SF_V16SF_HI:
34742 case VOID_FTYPE_PV8DI_V8DI_QI:
34743 case VOID_FTYPE_PV16SI_V16SI_HI:
34744 switch (icode)
34745 {
34746 /* These builtins and instructions require the memory
34747 to be properly aligned. */
34748 case CODE_FOR_avx512f_storev16sf_mask:
34749 case CODE_FOR_avx512f_storev16si_mask:
34750 case CODE_FOR_avx512f_storev8df_mask:
34751 case CODE_FOR_avx512f_storev8di_mask:
34752 case CODE_FOR_avx512vl_storev8sf_mask:
34753 case CODE_FOR_avx512vl_storev8si_mask:
34754 case CODE_FOR_avx512vl_storev4df_mask:
34755 case CODE_FOR_avx512vl_storev4di_mask:
34756 case CODE_FOR_avx512vl_storev4sf_mask:
34757 case CODE_FOR_avx512vl_storev4si_mask:
34758 case CODE_FOR_avx512vl_storev2df_mask:
34759 case CODE_FOR_avx512vl_storev2di_mask:
34760 aligned_mem = true;
34761 break;
34762 default:
34763 break;
34764 }
34765 /* FALLTHRU */
34766 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34767 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34768 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34769 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34770 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34771 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34772 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34773 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34774 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34775 case VOID_FTYPE_PFLOAT_V4SF_QI:
34776 case VOID_FTYPE_PV8SI_V8DI_QI:
34777 case VOID_FTYPE_PV8HI_V8DI_QI:
34778 case VOID_FTYPE_PV16HI_V16SI_HI:
34779 case VOID_FTYPE_PV16QI_V8DI_QI:
34780 case VOID_FTYPE_PV16QI_V16SI_HI:
34781 nargs = 2;
34782 klass = store;
34783 /* Reserve memory operand for target. */
34784 memory = ARRAY_SIZE (args);
34785 break;
34786 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34787 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34788 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34789 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34790 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34791 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34792 nargs = 3;
34793 klass = load;
34794 memory = 0;
34795 switch (icode)
34796 {
34797 /* These builtins and instructions require the memory
34798 to be properly aligned. */
34799 case CODE_FOR_avx512f_loadv16sf_mask:
34800 case CODE_FOR_avx512f_loadv16si_mask:
34801 case CODE_FOR_avx512f_loadv8df_mask:
34802 case CODE_FOR_avx512f_loadv8di_mask:
34803 case CODE_FOR_avx512vl_loadv8sf_mask:
34804 case CODE_FOR_avx512vl_loadv8si_mask:
34805 case CODE_FOR_avx512vl_loadv4df_mask:
34806 case CODE_FOR_avx512vl_loadv4di_mask:
34807 case CODE_FOR_avx512vl_loadv4sf_mask:
34808 case CODE_FOR_avx512vl_loadv4si_mask:
34809 case CODE_FOR_avx512vl_loadv2df_mask:
34810 case CODE_FOR_avx512vl_loadv2di_mask:
34811 case CODE_FOR_avx512bw_loadv64qi_mask:
34812 case CODE_FOR_avx512vl_loadv32qi_mask:
34813 case CODE_FOR_avx512vl_loadv16qi_mask:
34814 case CODE_FOR_avx512bw_loadv32hi_mask:
34815 case CODE_FOR_avx512vl_loadv16hi_mask:
34816 case CODE_FOR_avx512vl_loadv8hi_mask:
34817 aligned_mem = true;
34818 break;
34819 default:
34820 break;
34821 }
34822 break;
34823 case VOID_FTYPE_UINT_UINT_UINT:
34824 case VOID_FTYPE_UINT64_UINT_UINT:
34825 case UCHAR_FTYPE_UINT_UINT_UINT:
34826 case UCHAR_FTYPE_UINT64_UINT_UINT:
34827 nargs = 3;
34828 klass = load;
34829 memory = ARRAY_SIZE (args);
34830 last_arg_constant = true;
34831 break;
34832 default:
34833 gcc_unreachable ();
34834 }
34835
34836 gcc_assert (nargs <= ARRAY_SIZE (args));
34837
34838 if (klass == store)
34839 {
34840 arg = CALL_EXPR_ARG (exp, 0);
34841 op = expand_normal (arg);
34842 gcc_assert (target == 0);
34843 if (memory)
34844 {
34845 op = ix86_zero_extend_to_Pmode (op);
34846 target = gen_rtx_MEM (tmode, op);
34847 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34848 on it. Try to improve it using get_pointer_alignment,
34849 and if the special builtin is one that requires strict
34850 mode alignment, also from it's GET_MODE_ALIGNMENT.
34851 Failure to do so could lead to ix86_legitimate_combined_insn
34852 rejecting all changes to such insns. */
34853 unsigned int align = get_pointer_alignment (arg);
34854 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34855 align = GET_MODE_ALIGNMENT (tmode);
34856 if (MEM_ALIGN (target) < align)
34857 set_mem_align (target, align);
34858 }
34859 else
34860 target = force_reg (tmode, op);
34861 arg_adjust = 1;
34862 }
34863 else
34864 {
34865 arg_adjust = 0;
34866 if (optimize
34867 || target == 0
34868 || !register_operand (target, tmode)
34869 || GET_MODE (target) != tmode)
34870 target = gen_reg_rtx (tmode);
34871 }
34872
34873 for (i = 0; i < nargs; i++)
34874 {
34875 enum machine_mode mode = insn_p->operand[i + 1].mode;
34876 bool match;
34877
34878 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34879 op = expand_normal (arg);
34880 match = insn_p->operand[i + 1].predicate (op, mode);
34881
34882 if (last_arg_constant && (i + 1) == nargs)
34883 {
34884 if (!match)
34885 {
34886 if (icode == CODE_FOR_lwp_lwpvalsi3
34887 || icode == CODE_FOR_lwp_lwpinssi3
34888 || icode == CODE_FOR_lwp_lwpvaldi3
34889 || icode == CODE_FOR_lwp_lwpinsdi3)
34890 error ("the last argument must be a 32-bit immediate");
34891 else
34892 error ("the last argument must be an 8-bit immediate");
34893 return const0_rtx;
34894 }
34895 }
34896 else
34897 {
34898 if (i == memory)
34899 {
34900 /* This must be the memory operand. */
34901 op = ix86_zero_extend_to_Pmode (op);
34902 op = gen_rtx_MEM (mode, op);
34903 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34904 on it. Try to improve it using get_pointer_alignment,
34905 and if the special builtin is one that requires strict
34906 mode alignment, also from it's GET_MODE_ALIGNMENT.
34907 Failure to do so could lead to ix86_legitimate_combined_insn
34908 rejecting all changes to such insns. */
34909 unsigned int align = get_pointer_alignment (arg);
34910 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34911 align = GET_MODE_ALIGNMENT (mode);
34912 if (MEM_ALIGN (op) < align)
34913 set_mem_align (op, align);
34914 }
34915 else
34916 {
34917 /* This must be register. */
34918 if (VECTOR_MODE_P (mode))
34919 op = safe_vector_operand (op, mode);
34920
34921 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34922 op = copy_to_mode_reg (mode, op);
34923 else
34924 {
34925 op = copy_to_reg (op);
34926 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34927 }
34928 }
34929 }
34930
34931 args[i].op = op;
34932 args[i].mode = mode;
34933 }
34934
34935 switch (nargs)
34936 {
34937 case 0:
34938 pat = GEN_FCN (icode) (target);
34939 break;
34940 case 1:
34941 pat = GEN_FCN (icode) (target, args[0].op);
34942 break;
34943 case 2:
34944 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34945 break;
34946 case 3:
34947 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34948 break;
34949 default:
34950 gcc_unreachable ();
34951 }
34952
34953 if (! pat)
34954 return 0;
34955 emit_insn (pat);
34956 return klass == store ? 0 : target;
34957 }
34958
34959 /* Return the integer constant in ARG. Constrain it to be in the range
34960 of the subparts of VEC_TYPE; issue an error if not. */
34961
34962 static int
34963 get_element_number (tree vec_type, tree arg)
34964 {
34965 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34966
34967 if (!tree_fits_uhwi_p (arg)
34968 || (elt = tree_to_uhwi (arg), elt > max))
34969 {
34970 error ("selector must be an integer constant in the range 0..%wi", max);
34971 return 0;
34972 }
34973
34974 return elt;
34975 }
34976
34977 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34978 ix86_expand_vector_init. We DO have language-level syntax for this, in
34979 the form of (type){ init-list }. Except that since we can't place emms
34980 instructions from inside the compiler, we can't allow the use of MMX
34981 registers unless the user explicitly asks for it. So we do *not* define
34982 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34983 we have builtins invoked by mmintrin.h that gives us license to emit
34984 these sorts of instructions. */
34985
34986 static rtx
34987 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34988 {
34989 enum machine_mode tmode = TYPE_MODE (type);
34990 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34991 int i, n_elt = GET_MODE_NUNITS (tmode);
34992 rtvec v = rtvec_alloc (n_elt);
34993
34994 gcc_assert (VECTOR_MODE_P (tmode));
34995 gcc_assert (call_expr_nargs (exp) == n_elt);
34996
34997 for (i = 0; i < n_elt; ++i)
34998 {
34999 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35000 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35001 }
35002
35003 if (!target || !register_operand (target, tmode))
35004 target = gen_reg_rtx (tmode);
35005
35006 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35007 return target;
35008 }
35009
35010 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35011 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35012 had a language-level syntax for referencing vector elements. */
35013
35014 static rtx
35015 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35016 {
35017 enum machine_mode tmode, mode0;
35018 tree arg0, arg1;
35019 int elt;
35020 rtx op0;
35021
35022 arg0 = CALL_EXPR_ARG (exp, 0);
35023 arg1 = CALL_EXPR_ARG (exp, 1);
35024
35025 op0 = expand_normal (arg0);
35026 elt = get_element_number (TREE_TYPE (arg0), arg1);
35027
35028 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35029 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35030 gcc_assert (VECTOR_MODE_P (mode0));
35031
35032 op0 = force_reg (mode0, op0);
35033
35034 if (optimize || !target || !register_operand (target, tmode))
35035 target = gen_reg_rtx (tmode);
35036
35037 ix86_expand_vector_extract (true, target, op0, elt);
35038
35039 return target;
35040 }
35041
35042 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35043 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35044 a language-level syntax for referencing vector elements. */
35045
35046 static rtx
35047 ix86_expand_vec_set_builtin (tree exp)
35048 {
35049 enum machine_mode tmode, mode1;
35050 tree arg0, arg1, arg2;
35051 int elt;
35052 rtx op0, op1, target;
35053
35054 arg0 = CALL_EXPR_ARG (exp, 0);
35055 arg1 = CALL_EXPR_ARG (exp, 1);
35056 arg2 = CALL_EXPR_ARG (exp, 2);
35057
35058 tmode = TYPE_MODE (TREE_TYPE (arg0));
35059 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35060 gcc_assert (VECTOR_MODE_P (tmode));
35061
35062 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35063 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35064 elt = get_element_number (TREE_TYPE (arg0), arg2);
35065
35066 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35067 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35068
35069 op0 = force_reg (tmode, op0);
35070 op1 = force_reg (mode1, op1);
35071
35072 /* OP0 is the source of these builtin functions and shouldn't be
35073 modified. Create a copy, use it and return it as target. */
35074 target = gen_reg_rtx (tmode);
35075 emit_move_insn (target, op0);
35076 ix86_expand_vector_set (true, target, op1, elt);
35077
35078 return target;
35079 }
35080
35081 /* Expand an expression EXP that calls a built-in function,
35082 with result going to TARGET if that's convenient
35083 (and in mode MODE if that's convenient).
35084 SUBTARGET may be used as the target for computing one of EXP's operands.
35085 IGNORE is nonzero if the value is to be ignored. */
35086
35087 static rtx
35088 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35089 enum machine_mode mode, int ignore)
35090 {
35091 const struct builtin_description *d;
35092 size_t i;
35093 enum insn_code icode;
35094 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35095 tree arg0, arg1, arg2, arg3, arg4;
35096 rtx op0, op1, op2, op3, op4, pat, insn;
35097 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35098 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35099
35100 /* For CPU builtins that can be folded, fold first and expand the fold. */
35101 switch (fcode)
35102 {
35103 case IX86_BUILTIN_CPU_INIT:
35104 {
35105 /* Make it call __cpu_indicator_init in libgcc. */
35106 tree call_expr, fndecl, type;
35107 type = build_function_type_list (integer_type_node, NULL_TREE);
35108 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35109 call_expr = build_call_expr (fndecl, 0);
35110 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35111 }
35112 case IX86_BUILTIN_CPU_IS:
35113 case IX86_BUILTIN_CPU_SUPPORTS:
35114 {
35115 tree arg0 = CALL_EXPR_ARG (exp, 0);
35116 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35117 gcc_assert (fold_expr != NULL_TREE);
35118 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35119 }
35120 }
35121
35122 /* Determine whether the builtin function is available under the current ISA.
35123 Originally the builtin was not created if it wasn't applicable to the
35124 current ISA based on the command line switches. With function specific
35125 options, we need to check in the context of the function making the call
35126 whether it is supported. */
35127 if (ix86_builtins_isa[fcode].isa
35128 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35129 {
35130 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35131 NULL, (enum fpmath_unit) 0, false);
35132
35133 if (!opts)
35134 error ("%qE needs unknown isa option", fndecl);
35135 else
35136 {
35137 gcc_assert (opts != NULL);
35138 error ("%qE needs isa option %s", fndecl, opts);
35139 free (opts);
35140 }
35141 return const0_rtx;
35142 }
35143
35144 switch (fcode)
35145 {
35146 case IX86_BUILTIN_MASKMOVQ:
35147 case IX86_BUILTIN_MASKMOVDQU:
35148 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35149 ? CODE_FOR_mmx_maskmovq
35150 : CODE_FOR_sse2_maskmovdqu);
35151 /* Note the arg order is different from the operand order. */
35152 arg1 = CALL_EXPR_ARG (exp, 0);
35153 arg2 = CALL_EXPR_ARG (exp, 1);
35154 arg0 = CALL_EXPR_ARG (exp, 2);
35155 op0 = expand_normal (arg0);
35156 op1 = expand_normal (arg1);
35157 op2 = expand_normal (arg2);
35158 mode0 = insn_data[icode].operand[0].mode;
35159 mode1 = insn_data[icode].operand[1].mode;
35160 mode2 = insn_data[icode].operand[2].mode;
35161
35162 op0 = ix86_zero_extend_to_Pmode (op0);
35163 op0 = gen_rtx_MEM (mode1, op0);
35164
35165 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35166 op0 = copy_to_mode_reg (mode0, op0);
35167 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35168 op1 = copy_to_mode_reg (mode1, op1);
35169 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35170 op2 = copy_to_mode_reg (mode2, op2);
35171 pat = GEN_FCN (icode) (op0, op1, op2);
35172 if (! pat)
35173 return 0;
35174 emit_insn (pat);
35175 return 0;
35176
35177 case IX86_BUILTIN_LDMXCSR:
35178 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35179 target = assign_386_stack_local (SImode, SLOT_TEMP);
35180 emit_move_insn (target, op0);
35181 emit_insn (gen_sse_ldmxcsr (target));
35182 return 0;
35183
35184 case IX86_BUILTIN_STMXCSR:
35185 target = assign_386_stack_local (SImode, SLOT_TEMP);
35186 emit_insn (gen_sse_stmxcsr (target));
35187 return copy_to_mode_reg (SImode, target);
35188
35189 case IX86_BUILTIN_CLFLUSH:
35190 arg0 = CALL_EXPR_ARG (exp, 0);
35191 op0 = expand_normal (arg0);
35192 icode = CODE_FOR_sse2_clflush;
35193 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35194 op0 = ix86_zero_extend_to_Pmode (op0);
35195
35196 emit_insn (gen_sse2_clflush (op0));
35197 return 0;
35198
35199 case IX86_BUILTIN_CLFLUSHOPT:
35200 arg0 = CALL_EXPR_ARG (exp, 0);
35201 op0 = expand_normal (arg0);
35202 icode = CODE_FOR_clflushopt;
35203 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35204 op0 = ix86_zero_extend_to_Pmode (op0);
35205
35206 emit_insn (gen_clflushopt (op0));
35207 return 0;
35208
35209 case IX86_BUILTIN_MONITOR:
35210 arg0 = CALL_EXPR_ARG (exp, 0);
35211 arg1 = CALL_EXPR_ARG (exp, 1);
35212 arg2 = CALL_EXPR_ARG (exp, 2);
35213 op0 = expand_normal (arg0);
35214 op1 = expand_normal (arg1);
35215 op2 = expand_normal (arg2);
35216 if (!REG_P (op0))
35217 op0 = ix86_zero_extend_to_Pmode (op0);
35218 if (!REG_P (op1))
35219 op1 = copy_to_mode_reg (SImode, op1);
35220 if (!REG_P (op2))
35221 op2 = copy_to_mode_reg (SImode, op2);
35222 emit_insn (ix86_gen_monitor (op0, op1, op2));
35223 return 0;
35224
35225 case IX86_BUILTIN_MWAIT:
35226 arg0 = CALL_EXPR_ARG (exp, 0);
35227 arg1 = CALL_EXPR_ARG (exp, 1);
35228 op0 = expand_normal (arg0);
35229 op1 = expand_normal (arg1);
35230 if (!REG_P (op0))
35231 op0 = copy_to_mode_reg (SImode, op0);
35232 if (!REG_P (op1))
35233 op1 = copy_to_mode_reg (SImode, op1);
35234 emit_insn (gen_sse3_mwait (op0, op1));
35235 return 0;
35236
35237 case IX86_BUILTIN_VEC_INIT_V2SI:
35238 case IX86_BUILTIN_VEC_INIT_V4HI:
35239 case IX86_BUILTIN_VEC_INIT_V8QI:
35240 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35241
35242 case IX86_BUILTIN_VEC_EXT_V2DF:
35243 case IX86_BUILTIN_VEC_EXT_V2DI:
35244 case IX86_BUILTIN_VEC_EXT_V4SF:
35245 case IX86_BUILTIN_VEC_EXT_V4SI:
35246 case IX86_BUILTIN_VEC_EXT_V8HI:
35247 case IX86_BUILTIN_VEC_EXT_V2SI:
35248 case IX86_BUILTIN_VEC_EXT_V4HI:
35249 case IX86_BUILTIN_VEC_EXT_V16QI:
35250 return ix86_expand_vec_ext_builtin (exp, target);
35251
35252 case IX86_BUILTIN_VEC_SET_V2DI:
35253 case IX86_BUILTIN_VEC_SET_V4SF:
35254 case IX86_BUILTIN_VEC_SET_V4SI:
35255 case IX86_BUILTIN_VEC_SET_V8HI:
35256 case IX86_BUILTIN_VEC_SET_V4HI:
35257 case IX86_BUILTIN_VEC_SET_V16QI:
35258 return ix86_expand_vec_set_builtin (exp);
35259
35260 case IX86_BUILTIN_INFQ:
35261 case IX86_BUILTIN_HUGE_VALQ:
35262 {
35263 REAL_VALUE_TYPE inf;
35264 rtx tmp;
35265
35266 real_inf (&inf);
35267 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35268
35269 tmp = validize_mem (force_const_mem (mode, tmp));
35270
35271 if (target == 0)
35272 target = gen_reg_rtx (mode);
35273
35274 emit_move_insn (target, tmp);
35275 return target;
35276 }
35277
35278 case IX86_BUILTIN_RDPMC:
35279 case IX86_BUILTIN_RDTSC:
35280 case IX86_BUILTIN_RDTSCP:
35281
35282 op0 = gen_reg_rtx (DImode);
35283 op1 = gen_reg_rtx (DImode);
35284
35285 if (fcode == IX86_BUILTIN_RDPMC)
35286 {
35287 arg0 = CALL_EXPR_ARG (exp, 0);
35288 op2 = expand_normal (arg0);
35289 if (!register_operand (op2, SImode))
35290 op2 = copy_to_mode_reg (SImode, op2);
35291
35292 insn = (TARGET_64BIT
35293 ? gen_rdpmc_rex64 (op0, op1, op2)
35294 : gen_rdpmc (op0, op2));
35295 emit_insn (insn);
35296 }
35297 else if (fcode == IX86_BUILTIN_RDTSC)
35298 {
35299 insn = (TARGET_64BIT
35300 ? gen_rdtsc_rex64 (op0, op1)
35301 : gen_rdtsc (op0));
35302 emit_insn (insn);
35303 }
35304 else
35305 {
35306 op2 = gen_reg_rtx (SImode);
35307
35308 insn = (TARGET_64BIT
35309 ? gen_rdtscp_rex64 (op0, op1, op2)
35310 : gen_rdtscp (op0, op2));
35311 emit_insn (insn);
35312
35313 arg0 = CALL_EXPR_ARG (exp, 0);
35314 op4 = expand_normal (arg0);
35315 if (!address_operand (op4, VOIDmode))
35316 {
35317 op4 = convert_memory_address (Pmode, op4);
35318 op4 = copy_addr_to_reg (op4);
35319 }
35320 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35321 }
35322
35323 if (target == 0)
35324 {
35325 /* mode is VOIDmode if __builtin_rd* has been called
35326 without lhs. */
35327 if (mode == VOIDmode)
35328 return target;
35329 target = gen_reg_rtx (mode);
35330 }
35331
35332 if (TARGET_64BIT)
35333 {
35334 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35335 op1, 1, OPTAB_DIRECT);
35336 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35337 op0, 1, OPTAB_DIRECT);
35338 }
35339
35340 emit_move_insn (target, op0);
35341 return target;
35342
35343 case IX86_BUILTIN_FXSAVE:
35344 case IX86_BUILTIN_FXRSTOR:
35345 case IX86_BUILTIN_FXSAVE64:
35346 case IX86_BUILTIN_FXRSTOR64:
35347 case IX86_BUILTIN_FNSTENV:
35348 case IX86_BUILTIN_FLDENV:
35349 mode0 = BLKmode;
35350 switch (fcode)
35351 {
35352 case IX86_BUILTIN_FXSAVE:
35353 icode = CODE_FOR_fxsave;
35354 break;
35355 case IX86_BUILTIN_FXRSTOR:
35356 icode = CODE_FOR_fxrstor;
35357 break;
35358 case IX86_BUILTIN_FXSAVE64:
35359 icode = CODE_FOR_fxsave64;
35360 break;
35361 case IX86_BUILTIN_FXRSTOR64:
35362 icode = CODE_FOR_fxrstor64;
35363 break;
35364 case IX86_BUILTIN_FNSTENV:
35365 icode = CODE_FOR_fnstenv;
35366 break;
35367 case IX86_BUILTIN_FLDENV:
35368 icode = CODE_FOR_fldenv;
35369 break;
35370 default:
35371 gcc_unreachable ();
35372 }
35373
35374 arg0 = CALL_EXPR_ARG (exp, 0);
35375 op0 = expand_normal (arg0);
35376
35377 if (!address_operand (op0, VOIDmode))
35378 {
35379 op0 = convert_memory_address (Pmode, op0);
35380 op0 = copy_addr_to_reg (op0);
35381 }
35382 op0 = gen_rtx_MEM (mode0, op0);
35383
35384 pat = GEN_FCN (icode) (op0);
35385 if (pat)
35386 emit_insn (pat);
35387 return 0;
35388
35389 case IX86_BUILTIN_XSAVE:
35390 case IX86_BUILTIN_XRSTOR:
35391 case IX86_BUILTIN_XSAVE64:
35392 case IX86_BUILTIN_XRSTOR64:
35393 case IX86_BUILTIN_XSAVEOPT:
35394 case IX86_BUILTIN_XSAVEOPT64:
35395 case IX86_BUILTIN_XSAVES:
35396 case IX86_BUILTIN_XRSTORS:
35397 case IX86_BUILTIN_XSAVES64:
35398 case IX86_BUILTIN_XRSTORS64:
35399 case IX86_BUILTIN_XSAVEC:
35400 case IX86_BUILTIN_XSAVEC64:
35401 arg0 = CALL_EXPR_ARG (exp, 0);
35402 arg1 = CALL_EXPR_ARG (exp, 1);
35403 op0 = expand_normal (arg0);
35404 op1 = expand_normal (arg1);
35405
35406 if (!address_operand (op0, VOIDmode))
35407 {
35408 op0 = convert_memory_address (Pmode, op0);
35409 op0 = copy_addr_to_reg (op0);
35410 }
35411 op0 = gen_rtx_MEM (BLKmode, op0);
35412
35413 op1 = force_reg (DImode, op1);
35414
35415 if (TARGET_64BIT)
35416 {
35417 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35418 NULL, 1, OPTAB_DIRECT);
35419 switch (fcode)
35420 {
35421 case IX86_BUILTIN_XSAVE:
35422 icode = CODE_FOR_xsave_rex64;
35423 break;
35424 case IX86_BUILTIN_XRSTOR:
35425 icode = CODE_FOR_xrstor_rex64;
35426 break;
35427 case IX86_BUILTIN_XSAVE64:
35428 icode = CODE_FOR_xsave64;
35429 break;
35430 case IX86_BUILTIN_XRSTOR64:
35431 icode = CODE_FOR_xrstor64;
35432 break;
35433 case IX86_BUILTIN_XSAVEOPT:
35434 icode = CODE_FOR_xsaveopt_rex64;
35435 break;
35436 case IX86_BUILTIN_XSAVEOPT64:
35437 icode = CODE_FOR_xsaveopt64;
35438 break;
35439 case IX86_BUILTIN_XSAVES:
35440 icode = CODE_FOR_xsaves_rex64;
35441 break;
35442 case IX86_BUILTIN_XRSTORS:
35443 icode = CODE_FOR_xrstors_rex64;
35444 break;
35445 case IX86_BUILTIN_XSAVES64:
35446 icode = CODE_FOR_xsaves64;
35447 break;
35448 case IX86_BUILTIN_XRSTORS64:
35449 icode = CODE_FOR_xrstors64;
35450 break;
35451 case IX86_BUILTIN_XSAVEC:
35452 icode = CODE_FOR_xsavec_rex64;
35453 break;
35454 case IX86_BUILTIN_XSAVEC64:
35455 icode = CODE_FOR_xsavec64;
35456 break;
35457 default:
35458 gcc_unreachable ();
35459 }
35460
35461 op2 = gen_lowpart (SImode, op2);
35462 op1 = gen_lowpart (SImode, op1);
35463 pat = GEN_FCN (icode) (op0, op1, op2);
35464 }
35465 else
35466 {
35467 switch (fcode)
35468 {
35469 case IX86_BUILTIN_XSAVE:
35470 icode = CODE_FOR_xsave;
35471 break;
35472 case IX86_BUILTIN_XRSTOR:
35473 icode = CODE_FOR_xrstor;
35474 break;
35475 case IX86_BUILTIN_XSAVEOPT:
35476 icode = CODE_FOR_xsaveopt;
35477 break;
35478 case IX86_BUILTIN_XSAVES:
35479 icode = CODE_FOR_xsaves;
35480 break;
35481 case IX86_BUILTIN_XRSTORS:
35482 icode = CODE_FOR_xrstors;
35483 break;
35484 case IX86_BUILTIN_XSAVEC:
35485 icode = CODE_FOR_xsavec;
35486 break;
35487 default:
35488 gcc_unreachable ();
35489 }
35490 pat = GEN_FCN (icode) (op0, op1);
35491 }
35492
35493 if (pat)
35494 emit_insn (pat);
35495 return 0;
35496
35497 case IX86_BUILTIN_LLWPCB:
35498 arg0 = CALL_EXPR_ARG (exp, 0);
35499 op0 = expand_normal (arg0);
35500 icode = CODE_FOR_lwp_llwpcb;
35501 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35502 op0 = ix86_zero_extend_to_Pmode (op0);
35503 emit_insn (gen_lwp_llwpcb (op0));
35504 return 0;
35505
35506 case IX86_BUILTIN_SLWPCB:
35507 icode = CODE_FOR_lwp_slwpcb;
35508 if (!target
35509 || !insn_data[icode].operand[0].predicate (target, Pmode))
35510 target = gen_reg_rtx (Pmode);
35511 emit_insn (gen_lwp_slwpcb (target));
35512 return target;
35513
35514 case IX86_BUILTIN_BEXTRI32:
35515 case IX86_BUILTIN_BEXTRI64:
35516 arg0 = CALL_EXPR_ARG (exp, 0);
35517 arg1 = CALL_EXPR_ARG (exp, 1);
35518 op0 = expand_normal (arg0);
35519 op1 = expand_normal (arg1);
35520 icode = (fcode == IX86_BUILTIN_BEXTRI32
35521 ? CODE_FOR_tbm_bextri_si
35522 : CODE_FOR_tbm_bextri_di);
35523 if (!CONST_INT_P (op1))
35524 {
35525 error ("last argument must be an immediate");
35526 return const0_rtx;
35527 }
35528 else
35529 {
35530 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35531 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35532 op1 = GEN_INT (length);
35533 op2 = GEN_INT (lsb_index);
35534 pat = GEN_FCN (icode) (target, op0, op1, op2);
35535 if (pat)
35536 emit_insn (pat);
35537 return target;
35538 }
35539
35540 case IX86_BUILTIN_RDRAND16_STEP:
35541 icode = CODE_FOR_rdrandhi_1;
35542 mode0 = HImode;
35543 goto rdrand_step;
35544
35545 case IX86_BUILTIN_RDRAND32_STEP:
35546 icode = CODE_FOR_rdrandsi_1;
35547 mode0 = SImode;
35548 goto rdrand_step;
35549
35550 case IX86_BUILTIN_RDRAND64_STEP:
35551 icode = CODE_FOR_rdranddi_1;
35552 mode0 = DImode;
35553
35554 rdrand_step:
35555 op0 = gen_reg_rtx (mode0);
35556 emit_insn (GEN_FCN (icode) (op0));
35557
35558 arg0 = CALL_EXPR_ARG (exp, 0);
35559 op1 = expand_normal (arg0);
35560 if (!address_operand (op1, VOIDmode))
35561 {
35562 op1 = convert_memory_address (Pmode, op1);
35563 op1 = copy_addr_to_reg (op1);
35564 }
35565 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35566
35567 op1 = gen_reg_rtx (SImode);
35568 emit_move_insn (op1, CONST1_RTX (SImode));
35569
35570 /* Emit SImode conditional move. */
35571 if (mode0 == HImode)
35572 {
35573 op2 = gen_reg_rtx (SImode);
35574 emit_insn (gen_zero_extendhisi2 (op2, op0));
35575 }
35576 else if (mode0 == SImode)
35577 op2 = op0;
35578 else
35579 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35580
35581 if (target == 0
35582 || !register_operand (target, SImode))
35583 target = gen_reg_rtx (SImode);
35584
35585 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35586 const0_rtx);
35587 emit_insn (gen_rtx_SET (VOIDmode, target,
35588 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35589 return target;
35590
35591 case IX86_BUILTIN_RDSEED16_STEP:
35592 icode = CODE_FOR_rdseedhi_1;
35593 mode0 = HImode;
35594 goto rdseed_step;
35595
35596 case IX86_BUILTIN_RDSEED32_STEP:
35597 icode = CODE_FOR_rdseedsi_1;
35598 mode0 = SImode;
35599 goto rdseed_step;
35600
35601 case IX86_BUILTIN_RDSEED64_STEP:
35602 icode = CODE_FOR_rdseeddi_1;
35603 mode0 = DImode;
35604
35605 rdseed_step:
35606 op0 = gen_reg_rtx (mode0);
35607 emit_insn (GEN_FCN (icode) (op0));
35608
35609 arg0 = CALL_EXPR_ARG (exp, 0);
35610 op1 = expand_normal (arg0);
35611 if (!address_operand (op1, VOIDmode))
35612 {
35613 op1 = convert_memory_address (Pmode, op1);
35614 op1 = copy_addr_to_reg (op1);
35615 }
35616 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35617
35618 op2 = gen_reg_rtx (QImode);
35619
35620 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35621 const0_rtx);
35622 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35623
35624 if (target == 0
35625 || !register_operand (target, SImode))
35626 target = gen_reg_rtx (SImode);
35627
35628 emit_insn (gen_zero_extendqisi2 (target, op2));
35629 return target;
35630
35631 case IX86_BUILTIN_ADDCARRYX32:
35632 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35633 mode0 = SImode;
35634 goto addcarryx;
35635
35636 case IX86_BUILTIN_ADDCARRYX64:
35637 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35638 mode0 = DImode;
35639
35640 addcarryx:
35641 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35642 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35643 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35644 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35645
35646 op0 = gen_reg_rtx (QImode);
35647
35648 /* Generate CF from input operand. */
35649 op1 = expand_normal (arg0);
35650 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35651 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35652
35653 /* Gen ADCX instruction to compute X+Y+CF. */
35654 op2 = expand_normal (arg1);
35655 op3 = expand_normal (arg2);
35656
35657 if (!REG_P (op2))
35658 op2 = copy_to_mode_reg (mode0, op2);
35659 if (!REG_P (op3))
35660 op3 = copy_to_mode_reg (mode0, op3);
35661
35662 op0 = gen_reg_rtx (mode0);
35663
35664 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35665 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35666 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35667
35668 /* Store the result. */
35669 op4 = expand_normal (arg3);
35670 if (!address_operand (op4, VOIDmode))
35671 {
35672 op4 = convert_memory_address (Pmode, op4);
35673 op4 = copy_addr_to_reg (op4);
35674 }
35675 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35676
35677 /* Return current CF value. */
35678 if (target == 0)
35679 target = gen_reg_rtx (QImode);
35680
35681 PUT_MODE (pat, QImode);
35682 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35683 return target;
35684
35685 case IX86_BUILTIN_READ_FLAGS:
35686 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35687
35688 if (optimize
35689 || target == NULL_RTX
35690 || !nonimmediate_operand (target, word_mode)
35691 || GET_MODE (target) != word_mode)
35692 target = gen_reg_rtx (word_mode);
35693
35694 emit_insn (gen_pop (target));
35695 return target;
35696
35697 case IX86_BUILTIN_WRITE_FLAGS:
35698
35699 arg0 = CALL_EXPR_ARG (exp, 0);
35700 op0 = expand_normal (arg0);
35701 if (!general_no_elim_operand (op0, word_mode))
35702 op0 = copy_to_mode_reg (word_mode, op0);
35703
35704 emit_insn (gen_push (op0));
35705 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35706 return 0;
35707
35708 case IX86_BUILTIN_KORTESTC16:
35709 icode = CODE_FOR_kortestchi;
35710 mode0 = HImode;
35711 mode1 = CCCmode;
35712 goto kortest;
35713
35714 case IX86_BUILTIN_KORTESTZ16:
35715 icode = CODE_FOR_kortestzhi;
35716 mode0 = HImode;
35717 mode1 = CCZmode;
35718
35719 kortest:
35720 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35721 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35722 op0 = expand_normal (arg0);
35723 op1 = expand_normal (arg1);
35724
35725 op0 = copy_to_reg (op0);
35726 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35727 op1 = copy_to_reg (op1);
35728 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35729
35730 target = gen_reg_rtx (QImode);
35731 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35732
35733 /* Emit kortest. */
35734 emit_insn (GEN_FCN (icode) (op0, op1));
35735 /* And use setcc to return result from flags. */
35736 ix86_expand_setcc (target, EQ,
35737 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35738 return target;
35739
35740 case IX86_BUILTIN_GATHERSIV2DF:
35741 icode = CODE_FOR_avx2_gathersiv2df;
35742 goto gather_gen;
35743 case IX86_BUILTIN_GATHERSIV4DF:
35744 icode = CODE_FOR_avx2_gathersiv4df;
35745 goto gather_gen;
35746 case IX86_BUILTIN_GATHERDIV2DF:
35747 icode = CODE_FOR_avx2_gatherdiv2df;
35748 goto gather_gen;
35749 case IX86_BUILTIN_GATHERDIV4DF:
35750 icode = CODE_FOR_avx2_gatherdiv4df;
35751 goto gather_gen;
35752 case IX86_BUILTIN_GATHERSIV4SF:
35753 icode = CODE_FOR_avx2_gathersiv4sf;
35754 goto gather_gen;
35755 case IX86_BUILTIN_GATHERSIV8SF:
35756 icode = CODE_FOR_avx2_gathersiv8sf;
35757 goto gather_gen;
35758 case IX86_BUILTIN_GATHERDIV4SF:
35759 icode = CODE_FOR_avx2_gatherdiv4sf;
35760 goto gather_gen;
35761 case IX86_BUILTIN_GATHERDIV8SF:
35762 icode = CODE_FOR_avx2_gatherdiv8sf;
35763 goto gather_gen;
35764 case IX86_BUILTIN_GATHERSIV2DI:
35765 icode = CODE_FOR_avx2_gathersiv2di;
35766 goto gather_gen;
35767 case IX86_BUILTIN_GATHERSIV4DI:
35768 icode = CODE_FOR_avx2_gathersiv4di;
35769 goto gather_gen;
35770 case IX86_BUILTIN_GATHERDIV2DI:
35771 icode = CODE_FOR_avx2_gatherdiv2di;
35772 goto gather_gen;
35773 case IX86_BUILTIN_GATHERDIV4DI:
35774 icode = CODE_FOR_avx2_gatherdiv4di;
35775 goto gather_gen;
35776 case IX86_BUILTIN_GATHERSIV4SI:
35777 icode = CODE_FOR_avx2_gathersiv4si;
35778 goto gather_gen;
35779 case IX86_BUILTIN_GATHERSIV8SI:
35780 icode = CODE_FOR_avx2_gathersiv8si;
35781 goto gather_gen;
35782 case IX86_BUILTIN_GATHERDIV4SI:
35783 icode = CODE_FOR_avx2_gatherdiv4si;
35784 goto gather_gen;
35785 case IX86_BUILTIN_GATHERDIV8SI:
35786 icode = CODE_FOR_avx2_gatherdiv8si;
35787 goto gather_gen;
35788 case IX86_BUILTIN_GATHERALTSIV4DF:
35789 icode = CODE_FOR_avx2_gathersiv4df;
35790 goto gather_gen;
35791 case IX86_BUILTIN_GATHERALTDIV8SF:
35792 icode = CODE_FOR_avx2_gatherdiv8sf;
35793 goto gather_gen;
35794 case IX86_BUILTIN_GATHERALTSIV4DI:
35795 icode = CODE_FOR_avx2_gathersiv4di;
35796 goto gather_gen;
35797 case IX86_BUILTIN_GATHERALTDIV8SI:
35798 icode = CODE_FOR_avx2_gatherdiv8si;
35799 goto gather_gen;
35800 case IX86_BUILTIN_GATHER3SIV16SF:
35801 icode = CODE_FOR_avx512f_gathersiv16sf;
35802 goto gather_gen;
35803 case IX86_BUILTIN_GATHER3SIV8DF:
35804 icode = CODE_FOR_avx512f_gathersiv8df;
35805 goto gather_gen;
35806 case IX86_BUILTIN_GATHER3DIV16SF:
35807 icode = CODE_FOR_avx512f_gatherdiv16sf;
35808 goto gather_gen;
35809 case IX86_BUILTIN_GATHER3DIV8DF:
35810 icode = CODE_FOR_avx512f_gatherdiv8df;
35811 goto gather_gen;
35812 case IX86_BUILTIN_GATHER3SIV16SI:
35813 icode = CODE_FOR_avx512f_gathersiv16si;
35814 goto gather_gen;
35815 case IX86_BUILTIN_GATHER3SIV8DI:
35816 icode = CODE_FOR_avx512f_gathersiv8di;
35817 goto gather_gen;
35818 case IX86_BUILTIN_GATHER3DIV16SI:
35819 icode = CODE_FOR_avx512f_gatherdiv16si;
35820 goto gather_gen;
35821 case IX86_BUILTIN_GATHER3DIV8DI:
35822 icode = CODE_FOR_avx512f_gatherdiv8di;
35823 goto gather_gen;
35824 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35825 icode = CODE_FOR_avx512f_gathersiv8df;
35826 goto gather_gen;
35827 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35828 icode = CODE_FOR_avx512f_gatherdiv16sf;
35829 goto gather_gen;
35830 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35831 icode = CODE_FOR_avx512f_gathersiv8di;
35832 goto gather_gen;
35833 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35834 icode = CODE_FOR_avx512f_gatherdiv16si;
35835 goto gather_gen;
35836 case IX86_BUILTIN_SCATTERSIV16SF:
35837 icode = CODE_FOR_avx512f_scattersiv16sf;
35838 goto scatter_gen;
35839 case IX86_BUILTIN_SCATTERSIV8DF:
35840 icode = CODE_FOR_avx512f_scattersiv8df;
35841 goto scatter_gen;
35842 case IX86_BUILTIN_SCATTERDIV16SF:
35843 icode = CODE_FOR_avx512f_scatterdiv16sf;
35844 goto scatter_gen;
35845 case IX86_BUILTIN_SCATTERDIV8DF:
35846 icode = CODE_FOR_avx512f_scatterdiv8df;
35847 goto scatter_gen;
35848 case IX86_BUILTIN_SCATTERSIV16SI:
35849 icode = CODE_FOR_avx512f_scattersiv16si;
35850 goto scatter_gen;
35851 case IX86_BUILTIN_SCATTERSIV8DI:
35852 icode = CODE_FOR_avx512f_scattersiv8di;
35853 goto scatter_gen;
35854 case IX86_BUILTIN_SCATTERDIV16SI:
35855 icode = CODE_FOR_avx512f_scatterdiv16si;
35856 goto scatter_gen;
35857 case IX86_BUILTIN_SCATTERDIV8DI:
35858 icode = CODE_FOR_avx512f_scatterdiv8di;
35859 goto scatter_gen;
35860
35861 case IX86_BUILTIN_GATHERPFDPD:
35862 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35863 goto vec_prefetch_gen;
35864 case IX86_BUILTIN_GATHERPFDPS:
35865 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35866 goto vec_prefetch_gen;
35867 case IX86_BUILTIN_GATHERPFQPD:
35868 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35869 goto vec_prefetch_gen;
35870 case IX86_BUILTIN_GATHERPFQPS:
35871 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35872 goto vec_prefetch_gen;
35873 case IX86_BUILTIN_SCATTERPFDPD:
35874 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35875 goto vec_prefetch_gen;
35876 case IX86_BUILTIN_SCATTERPFDPS:
35877 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35878 goto vec_prefetch_gen;
35879 case IX86_BUILTIN_SCATTERPFQPD:
35880 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35881 goto vec_prefetch_gen;
35882 case IX86_BUILTIN_SCATTERPFQPS:
35883 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35884 goto vec_prefetch_gen;
35885
35886 gather_gen:
35887 rtx half;
35888 rtx (*gen) (rtx, rtx);
35889
35890 arg0 = CALL_EXPR_ARG (exp, 0);
35891 arg1 = CALL_EXPR_ARG (exp, 1);
35892 arg2 = CALL_EXPR_ARG (exp, 2);
35893 arg3 = CALL_EXPR_ARG (exp, 3);
35894 arg4 = CALL_EXPR_ARG (exp, 4);
35895 op0 = expand_normal (arg0);
35896 op1 = expand_normal (arg1);
35897 op2 = expand_normal (arg2);
35898 op3 = expand_normal (arg3);
35899 op4 = expand_normal (arg4);
35900 /* Note the arg order is different from the operand order. */
35901 mode0 = insn_data[icode].operand[1].mode;
35902 mode2 = insn_data[icode].operand[3].mode;
35903 mode3 = insn_data[icode].operand[4].mode;
35904 mode4 = insn_data[icode].operand[5].mode;
35905
35906 if (target == NULL_RTX
35907 || GET_MODE (target) != insn_data[icode].operand[0].mode
35908 || !insn_data[icode].operand[0].predicate (target,
35909 GET_MODE (target)))
35910 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35911 else
35912 subtarget = target;
35913
35914 switch (fcode)
35915 {
35916 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35917 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35918 half = gen_reg_rtx (V8SImode);
35919 if (!nonimmediate_operand (op2, V16SImode))
35920 op2 = copy_to_mode_reg (V16SImode, op2);
35921 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35922 op2 = half;
35923 break;
35924 case IX86_BUILTIN_GATHERALTSIV4DF:
35925 case IX86_BUILTIN_GATHERALTSIV4DI:
35926 half = gen_reg_rtx (V4SImode);
35927 if (!nonimmediate_operand (op2, V8SImode))
35928 op2 = copy_to_mode_reg (V8SImode, op2);
35929 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35930 op2 = half;
35931 break;
35932 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35933 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35934 half = gen_reg_rtx (mode0);
35935 if (mode0 == V8SFmode)
35936 gen = gen_vec_extract_lo_v16sf;
35937 else
35938 gen = gen_vec_extract_lo_v16si;
35939 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35940 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35941 emit_insn (gen (half, op0));
35942 op0 = half;
35943 if (GET_MODE (op3) != VOIDmode)
35944 {
35945 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35946 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35947 emit_insn (gen (half, op3));
35948 op3 = half;
35949 }
35950 break;
35951 case IX86_BUILTIN_GATHERALTDIV8SF:
35952 case IX86_BUILTIN_GATHERALTDIV8SI:
35953 half = gen_reg_rtx (mode0);
35954 if (mode0 == V4SFmode)
35955 gen = gen_vec_extract_lo_v8sf;
35956 else
35957 gen = gen_vec_extract_lo_v8si;
35958 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35959 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35960 emit_insn (gen (half, op0));
35961 op0 = half;
35962 if (GET_MODE (op3) != VOIDmode)
35963 {
35964 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35965 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35966 emit_insn (gen (half, op3));
35967 op3 = half;
35968 }
35969 break;
35970 default:
35971 break;
35972 }
35973
35974 /* Force memory operand only with base register here. But we
35975 don't want to do it on memory operand for other builtin
35976 functions. */
35977 op1 = ix86_zero_extend_to_Pmode (op1);
35978
35979 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35980 op0 = copy_to_mode_reg (mode0, op0);
35981 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35982 op1 = copy_to_mode_reg (Pmode, op1);
35983 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35984 op2 = copy_to_mode_reg (mode2, op2);
35985 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35986 {
35987 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35988 op3 = copy_to_mode_reg (mode3, op3);
35989 }
35990 else
35991 {
35992 op3 = copy_to_reg (op3);
35993 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35994 }
35995 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35996 {
35997 error ("the last argument must be scale 1, 2, 4, 8");
35998 return const0_rtx;
35999 }
36000
36001 /* Optimize. If mask is known to have all high bits set,
36002 replace op0 with pc_rtx to signal that the instruction
36003 overwrites the whole destination and doesn't use its
36004 previous contents. */
36005 if (optimize)
36006 {
36007 if (TREE_CODE (arg3) == INTEGER_CST)
36008 {
36009 if (integer_all_onesp (arg3))
36010 op0 = pc_rtx;
36011 }
36012 else if (TREE_CODE (arg3) == VECTOR_CST)
36013 {
36014 unsigned int negative = 0;
36015 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36016 {
36017 tree cst = VECTOR_CST_ELT (arg3, i);
36018 if (TREE_CODE (cst) == INTEGER_CST
36019 && tree_int_cst_sign_bit (cst))
36020 negative++;
36021 else if (TREE_CODE (cst) == REAL_CST
36022 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36023 negative++;
36024 }
36025 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36026 op0 = pc_rtx;
36027 }
36028 else if (TREE_CODE (arg3) == SSA_NAME
36029 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36030 {
36031 /* Recognize also when mask is like:
36032 __v2df src = _mm_setzero_pd ();
36033 __v2df mask = _mm_cmpeq_pd (src, src);
36034 or
36035 __v8sf src = _mm256_setzero_ps ();
36036 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36037 as that is a cheaper way to load all ones into
36038 a register than having to load a constant from
36039 memory. */
36040 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36041 if (is_gimple_call (def_stmt))
36042 {
36043 tree fndecl = gimple_call_fndecl (def_stmt);
36044 if (fndecl
36045 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36046 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36047 {
36048 case IX86_BUILTIN_CMPPD:
36049 case IX86_BUILTIN_CMPPS:
36050 case IX86_BUILTIN_CMPPD256:
36051 case IX86_BUILTIN_CMPPS256:
36052 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36053 break;
36054 /* FALLTHRU */
36055 case IX86_BUILTIN_CMPEQPD:
36056 case IX86_BUILTIN_CMPEQPS:
36057 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36058 && initializer_zerop (gimple_call_arg (def_stmt,
36059 1)))
36060 op0 = pc_rtx;
36061 break;
36062 default:
36063 break;
36064 }
36065 }
36066 }
36067 }
36068
36069 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36070 if (! pat)
36071 return const0_rtx;
36072 emit_insn (pat);
36073
36074 switch (fcode)
36075 {
36076 case IX86_BUILTIN_GATHER3DIV16SF:
36077 if (target == NULL_RTX)
36078 target = gen_reg_rtx (V8SFmode);
36079 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36080 break;
36081 case IX86_BUILTIN_GATHER3DIV16SI:
36082 if (target == NULL_RTX)
36083 target = gen_reg_rtx (V8SImode);
36084 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36085 break;
36086 case IX86_BUILTIN_GATHERDIV8SF:
36087 if (target == NULL_RTX)
36088 target = gen_reg_rtx (V4SFmode);
36089 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36090 break;
36091 case IX86_BUILTIN_GATHERDIV8SI:
36092 if (target == NULL_RTX)
36093 target = gen_reg_rtx (V4SImode);
36094 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36095 break;
36096 default:
36097 target = subtarget;
36098 break;
36099 }
36100 return target;
36101
36102 scatter_gen:
36103 arg0 = CALL_EXPR_ARG (exp, 0);
36104 arg1 = CALL_EXPR_ARG (exp, 1);
36105 arg2 = CALL_EXPR_ARG (exp, 2);
36106 arg3 = CALL_EXPR_ARG (exp, 3);
36107 arg4 = CALL_EXPR_ARG (exp, 4);
36108 op0 = expand_normal (arg0);
36109 op1 = expand_normal (arg1);
36110 op2 = expand_normal (arg2);
36111 op3 = expand_normal (arg3);
36112 op4 = expand_normal (arg4);
36113 mode1 = insn_data[icode].operand[1].mode;
36114 mode2 = insn_data[icode].operand[2].mode;
36115 mode3 = insn_data[icode].operand[3].mode;
36116 mode4 = insn_data[icode].operand[4].mode;
36117
36118 /* Force memory operand only with base register here. But we
36119 don't want to do it on memory operand for other builtin
36120 functions. */
36121 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36122
36123 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36124 op0 = copy_to_mode_reg (Pmode, op0);
36125
36126 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36127 {
36128 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36129 op1 = copy_to_mode_reg (mode1, op1);
36130 }
36131 else
36132 {
36133 op1 = copy_to_reg (op1);
36134 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36135 }
36136
36137 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36138 op2 = copy_to_mode_reg (mode2, op2);
36139
36140 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36141 op3 = copy_to_mode_reg (mode3, op3);
36142
36143 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36144 {
36145 error ("the last argument must be scale 1, 2, 4, 8");
36146 return const0_rtx;
36147 }
36148
36149 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36150 if (! pat)
36151 return const0_rtx;
36152
36153 emit_insn (pat);
36154 return 0;
36155
36156 vec_prefetch_gen:
36157 arg0 = CALL_EXPR_ARG (exp, 0);
36158 arg1 = CALL_EXPR_ARG (exp, 1);
36159 arg2 = CALL_EXPR_ARG (exp, 2);
36160 arg3 = CALL_EXPR_ARG (exp, 3);
36161 arg4 = CALL_EXPR_ARG (exp, 4);
36162 op0 = expand_normal (arg0);
36163 op1 = expand_normal (arg1);
36164 op2 = expand_normal (arg2);
36165 op3 = expand_normal (arg3);
36166 op4 = expand_normal (arg4);
36167 mode0 = insn_data[icode].operand[0].mode;
36168 mode1 = insn_data[icode].operand[1].mode;
36169 mode3 = insn_data[icode].operand[3].mode;
36170 mode4 = insn_data[icode].operand[4].mode;
36171
36172 if (GET_MODE (op0) == mode0
36173 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36174 {
36175 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36176 op0 = copy_to_mode_reg (mode0, op0);
36177 }
36178 else if (op0 != constm1_rtx)
36179 {
36180 op0 = copy_to_reg (op0);
36181 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36182 }
36183
36184 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36185 op1 = copy_to_mode_reg (mode1, op1);
36186
36187 /* Force memory operand only with base register here. But we
36188 don't want to do it on memory operand for other builtin
36189 functions. */
36190 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36191
36192 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36193 op2 = copy_to_mode_reg (Pmode, op2);
36194
36195 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36196 {
36197 error ("the forth argument must be scale 1, 2, 4, 8");
36198 return const0_rtx;
36199 }
36200
36201 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36202 {
36203 error ("incorrect hint operand");
36204 return const0_rtx;
36205 }
36206
36207 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36208 if (! pat)
36209 return const0_rtx;
36210
36211 emit_insn (pat);
36212
36213 return 0;
36214
36215 case IX86_BUILTIN_XABORT:
36216 icode = CODE_FOR_xabort;
36217 arg0 = CALL_EXPR_ARG (exp, 0);
36218 op0 = expand_normal (arg0);
36219 mode0 = insn_data[icode].operand[0].mode;
36220 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36221 {
36222 error ("the xabort's argument must be an 8-bit immediate");
36223 return const0_rtx;
36224 }
36225 emit_insn (gen_xabort (op0));
36226 return 0;
36227
36228 default:
36229 break;
36230 }
36231
36232 for (i = 0, d = bdesc_special_args;
36233 i < ARRAY_SIZE (bdesc_special_args);
36234 i++, d++)
36235 if (d->code == fcode)
36236 return ix86_expand_special_args_builtin (d, exp, target);
36237
36238 for (i = 0, d = bdesc_args;
36239 i < ARRAY_SIZE (bdesc_args);
36240 i++, d++)
36241 if (d->code == fcode)
36242 switch (fcode)
36243 {
36244 case IX86_BUILTIN_FABSQ:
36245 case IX86_BUILTIN_COPYSIGNQ:
36246 if (!TARGET_SSE)
36247 /* Emit a normal call if SSE isn't available. */
36248 return expand_call (exp, target, ignore);
36249 default:
36250 return ix86_expand_args_builtin (d, exp, target);
36251 }
36252
36253 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36254 if (d->code == fcode)
36255 return ix86_expand_sse_comi (d, exp, target);
36256
36257 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36258 if (d->code == fcode)
36259 return ix86_expand_round_builtin (d, exp, target);
36260
36261 for (i = 0, d = bdesc_pcmpestr;
36262 i < ARRAY_SIZE (bdesc_pcmpestr);
36263 i++, d++)
36264 if (d->code == fcode)
36265 return ix86_expand_sse_pcmpestr (d, exp, target);
36266
36267 for (i = 0, d = bdesc_pcmpistr;
36268 i < ARRAY_SIZE (bdesc_pcmpistr);
36269 i++, d++)
36270 if (d->code == fcode)
36271 return ix86_expand_sse_pcmpistr (d, exp, target);
36272
36273 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36274 if (d->code == fcode)
36275 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36276 (enum ix86_builtin_func_type)
36277 d->flag, d->comparison);
36278
36279 gcc_unreachable ();
36280 }
36281
36282 /* This returns the target-specific builtin with code CODE if
36283 current_function_decl has visibility on this builtin, which is checked
36284 using isa flags. Returns NULL_TREE otherwise. */
36285
36286 static tree ix86_get_builtin (enum ix86_builtins code)
36287 {
36288 struct cl_target_option *opts;
36289 tree target_tree = NULL_TREE;
36290
36291 /* Determine the isa flags of current_function_decl. */
36292
36293 if (current_function_decl)
36294 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36295
36296 if (target_tree == NULL)
36297 target_tree = target_option_default_node;
36298
36299 opts = TREE_TARGET_OPTION (target_tree);
36300
36301 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36302 return ix86_builtin_decl (code, true);
36303 else
36304 return NULL_TREE;
36305 }
36306
36307 /* Returns a function decl for a vectorized version of the builtin function
36308 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36309 if it is not available. */
36310
36311 static tree
36312 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36313 tree type_in)
36314 {
36315 enum machine_mode in_mode, out_mode;
36316 int in_n, out_n;
36317 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36318
36319 if (TREE_CODE (type_out) != VECTOR_TYPE
36320 || TREE_CODE (type_in) != VECTOR_TYPE
36321 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36322 return NULL_TREE;
36323
36324 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36325 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36326 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36327 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36328
36329 switch (fn)
36330 {
36331 case BUILT_IN_SQRT:
36332 if (out_mode == DFmode && in_mode == DFmode)
36333 {
36334 if (out_n == 2 && in_n == 2)
36335 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36336 else if (out_n == 4 && in_n == 4)
36337 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36338 else if (out_n == 8 && in_n == 8)
36339 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36340 }
36341 break;
36342
36343 case BUILT_IN_EXP2F:
36344 if (out_mode == SFmode && in_mode == SFmode)
36345 {
36346 if (out_n == 16 && in_n == 16)
36347 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36348 }
36349 break;
36350
36351 case BUILT_IN_SQRTF:
36352 if (out_mode == SFmode && in_mode == SFmode)
36353 {
36354 if (out_n == 4 && in_n == 4)
36355 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36356 else if (out_n == 8 && in_n == 8)
36357 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36358 else if (out_n == 16 && in_n == 16)
36359 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36360 }
36361 break;
36362
36363 case BUILT_IN_IFLOOR:
36364 case BUILT_IN_LFLOOR:
36365 case BUILT_IN_LLFLOOR:
36366 /* The round insn does not trap on denormals. */
36367 if (flag_trapping_math || !TARGET_ROUND)
36368 break;
36369
36370 if (out_mode == SImode && in_mode == DFmode)
36371 {
36372 if (out_n == 4 && in_n == 2)
36373 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36374 else if (out_n == 8 && in_n == 4)
36375 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36376 else if (out_n == 16 && in_n == 8)
36377 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36378 }
36379 break;
36380
36381 case BUILT_IN_IFLOORF:
36382 case BUILT_IN_LFLOORF:
36383 case BUILT_IN_LLFLOORF:
36384 /* The round insn does not trap on denormals. */
36385 if (flag_trapping_math || !TARGET_ROUND)
36386 break;
36387
36388 if (out_mode == SImode && in_mode == SFmode)
36389 {
36390 if (out_n == 4 && in_n == 4)
36391 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36392 else if (out_n == 8 && in_n == 8)
36393 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36394 }
36395 break;
36396
36397 case BUILT_IN_ICEIL:
36398 case BUILT_IN_LCEIL:
36399 case BUILT_IN_LLCEIL:
36400 /* The round insn does not trap on denormals. */
36401 if (flag_trapping_math || !TARGET_ROUND)
36402 break;
36403
36404 if (out_mode == SImode && in_mode == DFmode)
36405 {
36406 if (out_n == 4 && in_n == 2)
36407 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36408 else if (out_n == 8 && in_n == 4)
36409 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36410 else if (out_n == 16 && in_n == 8)
36411 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36412 }
36413 break;
36414
36415 case BUILT_IN_ICEILF:
36416 case BUILT_IN_LCEILF:
36417 case BUILT_IN_LLCEILF:
36418 /* The round insn does not trap on denormals. */
36419 if (flag_trapping_math || !TARGET_ROUND)
36420 break;
36421
36422 if (out_mode == SImode && in_mode == SFmode)
36423 {
36424 if (out_n == 4 && in_n == 4)
36425 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36426 else if (out_n == 8 && in_n == 8)
36427 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36428 }
36429 break;
36430
36431 case BUILT_IN_IRINT:
36432 case BUILT_IN_LRINT:
36433 case BUILT_IN_LLRINT:
36434 if (out_mode == SImode && in_mode == DFmode)
36435 {
36436 if (out_n == 4 && in_n == 2)
36437 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36438 else if (out_n == 8 && in_n == 4)
36439 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36440 }
36441 break;
36442
36443 case BUILT_IN_IRINTF:
36444 case BUILT_IN_LRINTF:
36445 case BUILT_IN_LLRINTF:
36446 if (out_mode == SImode && in_mode == SFmode)
36447 {
36448 if (out_n == 4 && in_n == 4)
36449 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36450 else if (out_n == 8 && in_n == 8)
36451 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36452 }
36453 break;
36454
36455 case BUILT_IN_IROUND:
36456 case BUILT_IN_LROUND:
36457 case BUILT_IN_LLROUND:
36458 /* The round insn does not trap on denormals. */
36459 if (flag_trapping_math || !TARGET_ROUND)
36460 break;
36461
36462 if (out_mode == SImode && in_mode == DFmode)
36463 {
36464 if (out_n == 4 && in_n == 2)
36465 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36466 else if (out_n == 8 && in_n == 4)
36467 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36468 else if (out_n == 16 && in_n == 8)
36469 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36470 }
36471 break;
36472
36473 case BUILT_IN_IROUNDF:
36474 case BUILT_IN_LROUNDF:
36475 case BUILT_IN_LLROUNDF:
36476 /* The round insn does not trap on denormals. */
36477 if (flag_trapping_math || !TARGET_ROUND)
36478 break;
36479
36480 if (out_mode == SImode && in_mode == SFmode)
36481 {
36482 if (out_n == 4 && in_n == 4)
36483 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36484 else if (out_n == 8 && in_n == 8)
36485 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36486 }
36487 break;
36488
36489 case BUILT_IN_COPYSIGN:
36490 if (out_mode == DFmode && in_mode == DFmode)
36491 {
36492 if (out_n == 2 && in_n == 2)
36493 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36494 else if (out_n == 4 && in_n == 4)
36495 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36496 else if (out_n == 8 && in_n == 8)
36497 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36498 }
36499 break;
36500
36501 case BUILT_IN_COPYSIGNF:
36502 if (out_mode == SFmode && in_mode == SFmode)
36503 {
36504 if (out_n == 4 && in_n == 4)
36505 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36506 else if (out_n == 8 && in_n == 8)
36507 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36508 else if (out_n == 16 && in_n == 16)
36509 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36510 }
36511 break;
36512
36513 case BUILT_IN_FLOOR:
36514 /* The round insn does not trap on denormals. */
36515 if (flag_trapping_math || !TARGET_ROUND)
36516 break;
36517
36518 if (out_mode == DFmode && in_mode == DFmode)
36519 {
36520 if (out_n == 2 && in_n == 2)
36521 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36522 else if (out_n == 4 && in_n == 4)
36523 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36524 }
36525 break;
36526
36527 case BUILT_IN_FLOORF:
36528 /* The round insn does not trap on denormals. */
36529 if (flag_trapping_math || !TARGET_ROUND)
36530 break;
36531
36532 if (out_mode == SFmode && in_mode == SFmode)
36533 {
36534 if (out_n == 4 && in_n == 4)
36535 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36536 else if (out_n == 8 && in_n == 8)
36537 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36538 }
36539 break;
36540
36541 case BUILT_IN_CEIL:
36542 /* The round insn does not trap on denormals. */
36543 if (flag_trapping_math || !TARGET_ROUND)
36544 break;
36545
36546 if (out_mode == DFmode && in_mode == DFmode)
36547 {
36548 if (out_n == 2 && in_n == 2)
36549 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36550 else if (out_n == 4 && in_n == 4)
36551 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36552 }
36553 break;
36554
36555 case BUILT_IN_CEILF:
36556 /* The round insn does not trap on denormals. */
36557 if (flag_trapping_math || !TARGET_ROUND)
36558 break;
36559
36560 if (out_mode == SFmode && in_mode == SFmode)
36561 {
36562 if (out_n == 4 && in_n == 4)
36563 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36564 else if (out_n == 8 && in_n == 8)
36565 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36566 }
36567 break;
36568
36569 case BUILT_IN_TRUNC:
36570 /* The round insn does not trap on denormals. */
36571 if (flag_trapping_math || !TARGET_ROUND)
36572 break;
36573
36574 if (out_mode == DFmode && in_mode == DFmode)
36575 {
36576 if (out_n == 2 && in_n == 2)
36577 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36578 else if (out_n == 4 && in_n == 4)
36579 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36580 }
36581 break;
36582
36583 case BUILT_IN_TRUNCF:
36584 /* The round insn does not trap on denormals. */
36585 if (flag_trapping_math || !TARGET_ROUND)
36586 break;
36587
36588 if (out_mode == SFmode && in_mode == SFmode)
36589 {
36590 if (out_n == 4 && in_n == 4)
36591 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36592 else if (out_n == 8 && in_n == 8)
36593 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36594 }
36595 break;
36596
36597 case BUILT_IN_RINT:
36598 /* The round insn does not trap on denormals. */
36599 if (flag_trapping_math || !TARGET_ROUND)
36600 break;
36601
36602 if (out_mode == DFmode && in_mode == DFmode)
36603 {
36604 if (out_n == 2 && in_n == 2)
36605 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36606 else if (out_n == 4 && in_n == 4)
36607 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36608 }
36609 break;
36610
36611 case BUILT_IN_RINTF:
36612 /* The round insn does not trap on denormals. */
36613 if (flag_trapping_math || !TARGET_ROUND)
36614 break;
36615
36616 if (out_mode == SFmode && in_mode == SFmode)
36617 {
36618 if (out_n == 4 && in_n == 4)
36619 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36620 else if (out_n == 8 && in_n == 8)
36621 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36622 }
36623 break;
36624
36625 case BUILT_IN_ROUND:
36626 /* The round insn does not trap on denormals. */
36627 if (flag_trapping_math || !TARGET_ROUND)
36628 break;
36629
36630 if (out_mode == DFmode && in_mode == DFmode)
36631 {
36632 if (out_n == 2 && in_n == 2)
36633 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36634 else if (out_n == 4 && in_n == 4)
36635 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36636 }
36637 break;
36638
36639 case BUILT_IN_ROUNDF:
36640 /* The round insn does not trap on denormals. */
36641 if (flag_trapping_math || !TARGET_ROUND)
36642 break;
36643
36644 if (out_mode == SFmode && in_mode == SFmode)
36645 {
36646 if (out_n == 4 && in_n == 4)
36647 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36648 else if (out_n == 8 && in_n == 8)
36649 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36650 }
36651 break;
36652
36653 case BUILT_IN_FMA:
36654 if (out_mode == DFmode && in_mode == DFmode)
36655 {
36656 if (out_n == 2 && in_n == 2)
36657 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36658 if (out_n == 4 && in_n == 4)
36659 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36660 }
36661 break;
36662
36663 case BUILT_IN_FMAF:
36664 if (out_mode == SFmode && in_mode == SFmode)
36665 {
36666 if (out_n == 4 && in_n == 4)
36667 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36668 if (out_n == 8 && in_n == 8)
36669 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36670 }
36671 break;
36672
36673 default:
36674 break;
36675 }
36676
36677 /* Dispatch to a handler for a vectorization library. */
36678 if (ix86_veclib_handler)
36679 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36680 type_in);
36681
36682 return NULL_TREE;
36683 }
36684
36685 /* Handler for an SVML-style interface to
36686 a library with vectorized intrinsics. */
36687
36688 static tree
36689 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36690 {
36691 char name[20];
36692 tree fntype, new_fndecl, args;
36693 unsigned arity;
36694 const char *bname;
36695 enum machine_mode el_mode, in_mode;
36696 int n, in_n;
36697
36698 /* The SVML is suitable for unsafe math only. */
36699 if (!flag_unsafe_math_optimizations)
36700 return NULL_TREE;
36701
36702 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36703 n = TYPE_VECTOR_SUBPARTS (type_out);
36704 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36705 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36706 if (el_mode != in_mode
36707 || n != in_n)
36708 return NULL_TREE;
36709
36710 switch (fn)
36711 {
36712 case BUILT_IN_EXP:
36713 case BUILT_IN_LOG:
36714 case BUILT_IN_LOG10:
36715 case BUILT_IN_POW:
36716 case BUILT_IN_TANH:
36717 case BUILT_IN_TAN:
36718 case BUILT_IN_ATAN:
36719 case BUILT_IN_ATAN2:
36720 case BUILT_IN_ATANH:
36721 case BUILT_IN_CBRT:
36722 case BUILT_IN_SINH:
36723 case BUILT_IN_SIN:
36724 case BUILT_IN_ASINH:
36725 case BUILT_IN_ASIN:
36726 case BUILT_IN_COSH:
36727 case BUILT_IN_COS:
36728 case BUILT_IN_ACOSH:
36729 case BUILT_IN_ACOS:
36730 if (el_mode != DFmode || n != 2)
36731 return NULL_TREE;
36732 break;
36733
36734 case BUILT_IN_EXPF:
36735 case BUILT_IN_LOGF:
36736 case BUILT_IN_LOG10F:
36737 case BUILT_IN_POWF:
36738 case BUILT_IN_TANHF:
36739 case BUILT_IN_TANF:
36740 case BUILT_IN_ATANF:
36741 case BUILT_IN_ATAN2F:
36742 case BUILT_IN_ATANHF:
36743 case BUILT_IN_CBRTF:
36744 case BUILT_IN_SINHF:
36745 case BUILT_IN_SINF:
36746 case BUILT_IN_ASINHF:
36747 case BUILT_IN_ASINF:
36748 case BUILT_IN_COSHF:
36749 case BUILT_IN_COSF:
36750 case BUILT_IN_ACOSHF:
36751 case BUILT_IN_ACOSF:
36752 if (el_mode != SFmode || n != 4)
36753 return NULL_TREE;
36754 break;
36755
36756 default:
36757 return NULL_TREE;
36758 }
36759
36760 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36761
36762 if (fn == BUILT_IN_LOGF)
36763 strcpy (name, "vmlsLn4");
36764 else if (fn == BUILT_IN_LOG)
36765 strcpy (name, "vmldLn2");
36766 else if (n == 4)
36767 {
36768 sprintf (name, "vmls%s", bname+10);
36769 name[strlen (name)-1] = '4';
36770 }
36771 else
36772 sprintf (name, "vmld%s2", bname+10);
36773
36774 /* Convert to uppercase. */
36775 name[4] &= ~0x20;
36776
36777 arity = 0;
36778 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36779 args;
36780 args = TREE_CHAIN (args))
36781 arity++;
36782
36783 if (arity == 1)
36784 fntype = build_function_type_list (type_out, type_in, NULL);
36785 else
36786 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36787
36788 /* Build a function declaration for the vectorized function. */
36789 new_fndecl = build_decl (BUILTINS_LOCATION,
36790 FUNCTION_DECL, get_identifier (name), fntype);
36791 TREE_PUBLIC (new_fndecl) = 1;
36792 DECL_EXTERNAL (new_fndecl) = 1;
36793 DECL_IS_NOVOPS (new_fndecl) = 1;
36794 TREE_READONLY (new_fndecl) = 1;
36795
36796 return new_fndecl;
36797 }
36798
36799 /* Handler for an ACML-style interface to
36800 a library with vectorized intrinsics. */
36801
36802 static tree
36803 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36804 {
36805 char name[20] = "__vr.._";
36806 tree fntype, new_fndecl, args;
36807 unsigned arity;
36808 const char *bname;
36809 enum machine_mode el_mode, in_mode;
36810 int n, in_n;
36811
36812 /* The ACML is 64bits only and suitable for unsafe math only as
36813 it does not correctly support parts of IEEE with the required
36814 precision such as denormals. */
36815 if (!TARGET_64BIT
36816 || !flag_unsafe_math_optimizations)
36817 return NULL_TREE;
36818
36819 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36820 n = TYPE_VECTOR_SUBPARTS (type_out);
36821 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36822 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36823 if (el_mode != in_mode
36824 || n != in_n)
36825 return NULL_TREE;
36826
36827 switch (fn)
36828 {
36829 case BUILT_IN_SIN:
36830 case BUILT_IN_COS:
36831 case BUILT_IN_EXP:
36832 case BUILT_IN_LOG:
36833 case BUILT_IN_LOG2:
36834 case BUILT_IN_LOG10:
36835 name[4] = 'd';
36836 name[5] = '2';
36837 if (el_mode != DFmode
36838 || n != 2)
36839 return NULL_TREE;
36840 break;
36841
36842 case BUILT_IN_SINF:
36843 case BUILT_IN_COSF:
36844 case BUILT_IN_EXPF:
36845 case BUILT_IN_POWF:
36846 case BUILT_IN_LOGF:
36847 case BUILT_IN_LOG2F:
36848 case BUILT_IN_LOG10F:
36849 name[4] = 's';
36850 name[5] = '4';
36851 if (el_mode != SFmode
36852 || n != 4)
36853 return NULL_TREE;
36854 break;
36855
36856 default:
36857 return NULL_TREE;
36858 }
36859
36860 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36861 sprintf (name + 7, "%s", bname+10);
36862
36863 arity = 0;
36864 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36865 args;
36866 args = TREE_CHAIN (args))
36867 arity++;
36868
36869 if (arity == 1)
36870 fntype = build_function_type_list (type_out, type_in, NULL);
36871 else
36872 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36873
36874 /* Build a function declaration for the vectorized function. */
36875 new_fndecl = build_decl (BUILTINS_LOCATION,
36876 FUNCTION_DECL, get_identifier (name), fntype);
36877 TREE_PUBLIC (new_fndecl) = 1;
36878 DECL_EXTERNAL (new_fndecl) = 1;
36879 DECL_IS_NOVOPS (new_fndecl) = 1;
36880 TREE_READONLY (new_fndecl) = 1;
36881
36882 return new_fndecl;
36883 }
36884
36885 /* Returns a decl of a function that implements gather load with
36886 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36887 Return NULL_TREE if it is not available. */
36888
36889 static tree
36890 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36891 const_tree index_type, int scale)
36892 {
36893 bool si;
36894 enum ix86_builtins code;
36895
36896 if (! TARGET_AVX2)
36897 return NULL_TREE;
36898
36899 if ((TREE_CODE (index_type) != INTEGER_TYPE
36900 && !POINTER_TYPE_P (index_type))
36901 || (TYPE_MODE (index_type) != SImode
36902 && TYPE_MODE (index_type) != DImode))
36903 return NULL_TREE;
36904
36905 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36906 return NULL_TREE;
36907
36908 /* v*gather* insn sign extends index to pointer mode. */
36909 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36910 && TYPE_UNSIGNED (index_type))
36911 return NULL_TREE;
36912
36913 if (scale <= 0
36914 || scale > 8
36915 || (scale & (scale - 1)) != 0)
36916 return NULL_TREE;
36917
36918 si = TYPE_MODE (index_type) == SImode;
36919 switch (TYPE_MODE (mem_vectype))
36920 {
36921 case V2DFmode:
36922 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36923 break;
36924 case V4DFmode:
36925 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36926 break;
36927 case V2DImode:
36928 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36929 break;
36930 case V4DImode:
36931 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36932 break;
36933 case V4SFmode:
36934 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36935 break;
36936 case V8SFmode:
36937 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36938 break;
36939 case V4SImode:
36940 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36941 break;
36942 case V8SImode:
36943 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36944 break;
36945 case V8DFmode:
36946 if (TARGET_AVX512F)
36947 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36948 else
36949 return NULL_TREE;
36950 break;
36951 case V8DImode:
36952 if (TARGET_AVX512F)
36953 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36954 else
36955 return NULL_TREE;
36956 break;
36957 case V16SFmode:
36958 if (TARGET_AVX512F)
36959 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36960 else
36961 return NULL_TREE;
36962 break;
36963 case V16SImode:
36964 if (TARGET_AVX512F)
36965 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36966 else
36967 return NULL_TREE;
36968 break;
36969 default:
36970 return NULL_TREE;
36971 }
36972
36973 return ix86_get_builtin (code);
36974 }
36975
36976 /* Returns a code for a target-specific builtin that implements
36977 reciprocal of the function, or NULL_TREE if not available. */
36978
36979 static tree
36980 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36981 {
36982 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36983 && flag_finite_math_only && !flag_trapping_math
36984 && flag_unsafe_math_optimizations))
36985 return NULL_TREE;
36986
36987 if (md_fn)
36988 /* Machine dependent builtins. */
36989 switch (fn)
36990 {
36991 /* Vectorized version of sqrt to rsqrt conversion. */
36992 case IX86_BUILTIN_SQRTPS_NR:
36993 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36994
36995 case IX86_BUILTIN_SQRTPS_NR256:
36996 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36997
36998 default:
36999 return NULL_TREE;
37000 }
37001 else
37002 /* Normal builtins. */
37003 switch (fn)
37004 {
37005 /* Sqrt to rsqrt conversion. */
37006 case BUILT_IN_SQRTF:
37007 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37008
37009 default:
37010 return NULL_TREE;
37011 }
37012 }
37013 \f
37014 /* Helper for avx_vpermilps256_operand et al. This is also used by
37015 the expansion functions to turn the parallel back into a mask.
37016 The return value is 0 for no match and the imm8+1 for a match. */
37017
37018 int
37019 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37020 {
37021 unsigned i, nelt = GET_MODE_NUNITS (mode);
37022 unsigned mask = 0;
37023 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37024
37025 if (XVECLEN (par, 0) != (int) nelt)
37026 return 0;
37027
37028 /* Validate that all of the elements are constants, and not totally
37029 out of range. Copy the data into an integral array to make the
37030 subsequent checks easier. */
37031 for (i = 0; i < nelt; ++i)
37032 {
37033 rtx er = XVECEXP (par, 0, i);
37034 unsigned HOST_WIDE_INT ei;
37035
37036 if (!CONST_INT_P (er))
37037 return 0;
37038 ei = INTVAL (er);
37039 if (ei >= nelt)
37040 return 0;
37041 ipar[i] = ei;
37042 }
37043
37044 switch (mode)
37045 {
37046 case V8DFmode:
37047 /* In the 512-bit DFmode case, we can only move elements within
37048 a 128-bit lane. First fill the second part of the mask,
37049 then fallthru. */
37050 for (i = 4; i < 6; ++i)
37051 {
37052 if (ipar[i] < 4 || ipar[i] >= 6)
37053 return 0;
37054 mask |= (ipar[i] - 4) << i;
37055 }
37056 for (i = 6; i < 8; ++i)
37057 {
37058 if (ipar[i] < 6)
37059 return 0;
37060 mask |= (ipar[i] - 6) << i;
37061 }
37062 /* FALLTHRU */
37063
37064 case V4DFmode:
37065 /* In the 256-bit DFmode case, we can only move elements within
37066 a 128-bit lane. */
37067 for (i = 0; i < 2; ++i)
37068 {
37069 if (ipar[i] >= 2)
37070 return 0;
37071 mask |= ipar[i] << i;
37072 }
37073 for (i = 2; i < 4; ++i)
37074 {
37075 if (ipar[i] < 2)
37076 return 0;
37077 mask |= (ipar[i] - 2) << i;
37078 }
37079 break;
37080
37081 case V16SFmode:
37082 /* In 512 bit SFmode case, permutation in the upper 256 bits
37083 must mirror the permutation in the lower 256-bits. */
37084 for (i = 0; i < 8; ++i)
37085 if (ipar[i] + 8 != ipar[i + 8])
37086 return 0;
37087 /* FALLTHRU */
37088
37089 case V8SFmode:
37090 /* In 256 bit SFmode case, we have full freedom of
37091 movement within the low 128-bit lane, but the high 128-bit
37092 lane must mirror the exact same pattern. */
37093 for (i = 0; i < 4; ++i)
37094 if (ipar[i] + 4 != ipar[i + 4])
37095 return 0;
37096 nelt = 4;
37097 /* FALLTHRU */
37098
37099 case V2DFmode:
37100 case V4SFmode:
37101 /* In the 128-bit case, we've full freedom in the placement of
37102 the elements from the source operand. */
37103 for (i = 0; i < nelt; ++i)
37104 mask |= ipar[i] << (i * (nelt / 2));
37105 break;
37106
37107 default:
37108 gcc_unreachable ();
37109 }
37110
37111 /* Make sure success has a non-zero value by adding one. */
37112 return mask + 1;
37113 }
37114
37115 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37116 the expansion functions to turn the parallel back into a mask.
37117 The return value is 0 for no match and the imm8+1 for a match. */
37118
37119 int
37120 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37121 {
37122 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37123 unsigned mask = 0;
37124 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37125
37126 if (XVECLEN (par, 0) != (int) nelt)
37127 return 0;
37128
37129 /* Validate that all of the elements are constants, and not totally
37130 out of range. Copy the data into an integral array to make the
37131 subsequent checks easier. */
37132 for (i = 0; i < nelt; ++i)
37133 {
37134 rtx er = XVECEXP (par, 0, i);
37135 unsigned HOST_WIDE_INT ei;
37136
37137 if (!CONST_INT_P (er))
37138 return 0;
37139 ei = INTVAL (er);
37140 if (ei >= 2 * nelt)
37141 return 0;
37142 ipar[i] = ei;
37143 }
37144
37145 /* Validate that the halves of the permute are halves. */
37146 for (i = 0; i < nelt2 - 1; ++i)
37147 if (ipar[i] + 1 != ipar[i + 1])
37148 return 0;
37149 for (i = nelt2; i < nelt - 1; ++i)
37150 if (ipar[i] + 1 != ipar[i + 1])
37151 return 0;
37152
37153 /* Reconstruct the mask. */
37154 for (i = 0; i < 2; ++i)
37155 {
37156 unsigned e = ipar[i * nelt2];
37157 if (e % nelt2)
37158 return 0;
37159 e /= nelt2;
37160 mask |= e << (i * 4);
37161 }
37162
37163 /* Make sure success has a non-zero value by adding one. */
37164 return mask + 1;
37165 }
37166 \f
37167 /* Return a register priority for hard reg REGNO. */
37168 static int
37169 ix86_register_priority (int hard_regno)
37170 {
37171 /* ebp and r13 as the base always wants a displacement, r12 as the
37172 base always wants an index. So discourage their usage in an
37173 address. */
37174 if (hard_regno == R12_REG || hard_regno == R13_REG)
37175 return 0;
37176 if (hard_regno == BP_REG)
37177 return 1;
37178 /* New x86-64 int registers result in bigger code size. Discourage
37179 them. */
37180 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37181 return 2;
37182 /* New x86-64 SSE registers result in bigger code size. Discourage
37183 them. */
37184 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37185 return 2;
37186 /* Usage of AX register results in smaller code. Prefer it. */
37187 if (hard_regno == 0)
37188 return 4;
37189 return 3;
37190 }
37191
37192 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37193
37194 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37195 QImode must go into class Q_REGS.
37196 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37197 movdf to do mem-to-mem moves through integer regs. */
37198
37199 static reg_class_t
37200 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37201 {
37202 enum machine_mode mode = GET_MODE (x);
37203
37204 /* We're only allowed to return a subclass of CLASS. Many of the
37205 following checks fail for NO_REGS, so eliminate that early. */
37206 if (regclass == NO_REGS)
37207 return NO_REGS;
37208
37209 /* All classes can load zeros. */
37210 if (x == CONST0_RTX (mode))
37211 return regclass;
37212
37213 /* Force constants into memory if we are loading a (nonzero) constant into
37214 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37215 instructions to load from a constant. */
37216 if (CONSTANT_P (x)
37217 && (MAYBE_MMX_CLASS_P (regclass)
37218 || MAYBE_SSE_CLASS_P (regclass)
37219 || MAYBE_MASK_CLASS_P (regclass)))
37220 return NO_REGS;
37221
37222 /* Prefer SSE regs only, if we can use them for math. */
37223 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37224 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37225
37226 /* Floating-point constants need more complex checks. */
37227 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37228 {
37229 /* General regs can load everything. */
37230 if (reg_class_subset_p (regclass, GENERAL_REGS))
37231 return regclass;
37232
37233 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37234 zero above. We only want to wind up preferring 80387 registers if
37235 we plan on doing computation with them. */
37236 if (TARGET_80387
37237 && standard_80387_constant_p (x) > 0)
37238 {
37239 /* Limit class to non-sse. */
37240 if (regclass == FLOAT_SSE_REGS)
37241 return FLOAT_REGS;
37242 if (regclass == FP_TOP_SSE_REGS)
37243 return FP_TOP_REG;
37244 if (regclass == FP_SECOND_SSE_REGS)
37245 return FP_SECOND_REG;
37246 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37247 return regclass;
37248 }
37249
37250 return NO_REGS;
37251 }
37252
37253 /* Generally when we see PLUS here, it's the function invariant
37254 (plus soft-fp const_int). Which can only be computed into general
37255 regs. */
37256 if (GET_CODE (x) == PLUS)
37257 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37258
37259 /* QImode constants are easy to load, but non-constant QImode data
37260 must go into Q_REGS. */
37261 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37262 {
37263 if (reg_class_subset_p (regclass, Q_REGS))
37264 return regclass;
37265 if (reg_class_subset_p (Q_REGS, regclass))
37266 return Q_REGS;
37267 return NO_REGS;
37268 }
37269
37270 return regclass;
37271 }
37272
37273 /* Discourage putting floating-point values in SSE registers unless
37274 SSE math is being used, and likewise for the 387 registers. */
37275 static reg_class_t
37276 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37277 {
37278 enum machine_mode mode = GET_MODE (x);
37279
37280 /* Restrict the output reload class to the register bank that we are doing
37281 math on. If we would like not to return a subset of CLASS, reject this
37282 alternative: if reload cannot do this, it will still use its choice. */
37283 mode = GET_MODE (x);
37284 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37285 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37286
37287 if (X87_FLOAT_MODE_P (mode))
37288 {
37289 if (regclass == FP_TOP_SSE_REGS)
37290 return FP_TOP_REG;
37291 else if (regclass == FP_SECOND_SSE_REGS)
37292 return FP_SECOND_REG;
37293 else
37294 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37295 }
37296
37297 return regclass;
37298 }
37299
37300 static reg_class_t
37301 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37302 enum machine_mode mode, secondary_reload_info *sri)
37303 {
37304 /* Double-word spills from general registers to non-offsettable memory
37305 references (zero-extended addresses) require special handling. */
37306 if (TARGET_64BIT
37307 && MEM_P (x)
37308 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37309 && INTEGER_CLASS_P (rclass)
37310 && !offsettable_memref_p (x))
37311 {
37312 sri->icode = (in_p
37313 ? CODE_FOR_reload_noff_load
37314 : CODE_FOR_reload_noff_store);
37315 /* Add the cost of moving address to a temporary. */
37316 sri->extra_cost = 1;
37317
37318 return NO_REGS;
37319 }
37320
37321 /* QImode spills from non-QI registers require
37322 intermediate register on 32bit targets. */
37323 if (mode == QImode
37324 && (MAYBE_MASK_CLASS_P (rclass)
37325 || (!TARGET_64BIT && !in_p
37326 && INTEGER_CLASS_P (rclass)
37327 && MAYBE_NON_Q_CLASS_P (rclass))))
37328 {
37329 int regno;
37330
37331 if (REG_P (x))
37332 regno = REGNO (x);
37333 else
37334 regno = -1;
37335
37336 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37337 regno = true_regnum (x);
37338
37339 /* Return Q_REGS if the operand is in memory. */
37340 if (regno == -1)
37341 return Q_REGS;
37342 }
37343
37344 /* This condition handles corner case where an expression involving
37345 pointers gets vectorized. We're trying to use the address of a
37346 stack slot as a vector initializer.
37347
37348 (set (reg:V2DI 74 [ vect_cst_.2 ])
37349 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37350
37351 Eventually frame gets turned into sp+offset like this:
37352
37353 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37354 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37355 (const_int 392 [0x188]))))
37356
37357 That later gets turned into:
37358
37359 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37360 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37361 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37362
37363 We'll have the following reload recorded:
37364
37365 Reload 0: reload_in (DI) =
37366 (plus:DI (reg/f:DI 7 sp)
37367 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37368 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37369 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37370 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37371 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37372 reload_reg_rtx: (reg:V2DI 22 xmm1)
37373
37374 Which isn't going to work since SSE instructions can't handle scalar
37375 additions. Returning GENERAL_REGS forces the addition into integer
37376 register and reload can handle subsequent reloads without problems. */
37377
37378 if (in_p && GET_CODE (x) == PLUS
37379 && SSE_CLASS_P (rclass)
37380 && SCALAR_INT_MODE_P (mode))
37381 return GENERAL_REGS;
37382
37383 return NO_REGS;
37384 }
37385
37386 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37387
37388 static bool
37389 ix86_class_likely_spilled_p (reg_class_t rclass)
37390 {
37391 switch (rclass)
37392 {
37393 case AREG:
37394 case DREG:
37395 case CREG:
37396 case BREG:
37397 case AD_REGS:
37398 case SIREG:
37399 case DIREG:
37400 case SSE_FIRST_REG:
37401 case FP_TOP_REG:
37402 case FP_SECOND_REG:
37403 return true;
37404
37405 default:
37406 break;
37407 }
37408
37409 return false;
37410 }
37411
37412 /* If we are copying between general and FP registers, we need a memory
37413 location. The same is true for SSE and MMX registers.
37414
37415 To optimize register_move_cost performance, allow inline variant.
37416
37417 The macro can't work reliably when one of the CLASSES is class containing
37418 registers from multiple units (SSE, MMX, integer). We avoid this by never
37419 combining those units in single alternative in the machine description.
37420 Ensure that this constraint holds to avoid unexpected surprises.
37421
37422 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37423 enforce these sanity checks. */
37424
37425 static inline bool
37426 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37427 enum machine_mode mode, int strict)
37428 {
37429 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37430 return false;
37431 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37432 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37433 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37434 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37435 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37436 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37437 {
37438 gcc_assert (!strict || lra_in_progress);
37439 return true;
37440 }
37441
37442 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37443 return true;
37444
37445 /* Between mask and general, we have moves no larger than word size. */
37446 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37447 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37448 return true;
37449
37450 /* ??? This is a lie. We do have moves between mmx/general, and for
37451 mmx/sse2. But by saying we need secondary memory we discourage the
37452 register allocator from using the mmx registers unless needed. */
37453 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37454 return true;
37455
37456 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37457 {
37458 /* SSE1 doesn't have any direct moves from other classes. */
37459 if (!TARGET_SSE2)
37460 return true;
37461
37462 /* If the target says that inter-unit moves are more expensive
37463 than moving through memory, then don't generate them. */
37464 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37465 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37466 return true;
37467
37468 /* Between SSE and general, we have moves no larger than word size. */
37469 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37470 return true;
37471 }
37472
37473 return false;
37474 }
37475
37476 bool
37477 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37478 enum machine_mode mode, int strict)
37479 {
37480 return inline_secondary_memory_needed (class1, class2, mode, strict);
37481 }
37482
37483 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37484
37485 On the 80386, this is the size of MODE in words,
37486 except in the FP regs, where a single reg is always enough. */
37487
37488 static unsigned char
37489 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37490 {
37491 if (MAYBE_INTEGER_CLASS_P (rclass))
37492 {
37493 if (mode == XFmode)
37494 return (TARGET_64BIT ? 2 : 3);
37495 else if (mode == XCmode)
37496 return (TARGET_64BIT ? 4 : 6);
37497 else
37498 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37499 }
37500 else
37501 {
37502 if (COMPLEX_MODE_P (mode))
37503 return 2;
37504 else
37505 return 1;
37506 }
37507 }
37508
37509 /* Return true if the registers in CLASS cannot represent the change from
37510 modes FROM to TO. */
37511
37512 bool
37513 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37514 enum reg_class regclass)
37515 {
37516 if (from == to)
37517 return false;
37518
37519 /* x87 registers can't do subreg at all, as all values are reformatted
37520 to extended precision. */
37521 if (MAYBE_FLOAT_CLASS_P (regclass))
37522 return true;
37523
37524 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37525 {
37526 /* Vector registers do not support QI or HImode loads. If we don't
37527 disallow a change to these modes, reload will assume it's ok to
37528 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37529 the vec_dupv4hi pattern. */
37530 if (GET_MODE_SIZE (from) < 4)
37531 return true;
37532
37533 /* Vector registers do not support subreg with nonzero offsets, which
37534 are otherwise valid for integer registers. Since we can't see
37535 whether we have a nonzero offset from here, prohibit all
37536 nonparadoxical subregs changing size. */
37537 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37538 return true;
37539 }
37540
37541 return false;
37542 }
37543
37544 /* Return the cost of moving data of mode M between a
37545 register and memory. A value of 2 is the default; this cost is
37546 relative to those in `REGISTER_MOVE_COST'.
37547
37548 This function is used extensively by register_move_cost that is used to
37549 build tables at startup. Make it inline in this case.
37550 When IN is 2, return maximum of in and out move cost.
37551
37552 If moving between registers and memory is more expensive than
37553 between two registers, you should define this macro to express the
37554 relative cost.
37555
37556 Model also increased moving costs of QImode registers in non
37557 Q_REGS classes.
37558 */
37559 static inline int
37560 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37561 int in)
37562 {
37563 int cost;
37564 if (FLOAT_CLASS_P (regclass))
37565 {
37566 int index;
37567 switch (mode)
37568 {
37569 case SFmode:
37570 index = 0;
37571 break;
37572 case DFmode:
37573 index = 1;
37574 break;
37575 case XFmode:
37576 index = 2;
37577 break;
37578 default:
37579 return 100;
37580 }
37581 if (in == 2)
37582 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37583 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37584 }
37585 if (SSE_CLASS_P (regclass))
37586 {
37587 int index;
37588 switch (GET_MODE_SIZE (mode))
37589 {
37590 case 4:
37591 index = 0;
37592 break;
37593 case 8:
37594 index = 1;
37595 break;
37596 case 16:
37597 index = 2;
37598 break;
37599 default:
37600 return 100;
37601 }
37602 if (in == 2)
37603 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37604 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37605 }
37606 if (MMX_CLASS_P (regclass))
37607 {
37608 int index;
37609 switch (GET_MODE_SIZE (mode))
37610 {
37611 case 4:
37612 index = 0;
37613 break;
37614 case 8:
37615 index = 1;
37616 break;
37617 default:
37618 return 100;
37619 }
37620 if (in)
37621 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37622 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37623 }
37624 switch (GET_MODE_SIZE (mode))
37625 {
37626 case 1:
37627 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37628 {
37629 if (!in)
37630 return ix86_cost->int_store[0];
37631 if (TARGET_PARTIAL_REG_DEPENDENCY
37632 && optimize_function_for_speed_p (cfun))
37633 cost = ix86_cost->movzbl_load;
37634 else
37635 cost = ix86_cost->int_load[0];
37636 if (in == 2)
37637 return MAX (cost, ix86_cost->int_store[0]);
37638 return cost;
37639 }
37640 else
37641 {
37642 if (in == 2)
37643 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37644 if (in)
37645 return ix86_cost->movzbl_load;
37646 else
37647 return ix86_cost->int_store[0] + 4;
37648 }
37649 break;
37650 case 2:
37651 if (in == 2)
37652 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37653 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37654 default:
37655 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37656 if (mode == TFmode)
37657 mode = XFmode;
37658 if (in == 2)
37659 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37660 else if (in)
37661 cost = ix86_cost->int_load[2];
37662 else
37663 cost = ix86_cost->int_store[2];
37664 return (cost * (((int) GET_MODE_SIZE (mode)
37665 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37666 }
37667 }
37668
37669 static int
37670 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37671 bool in)
37672 {
37673 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37674 }
37675
37676
37677 /* Return the cost of moving data from a register in class CLASS1 to
37678 one in class CLASS2.
37679
37680 It is not required that the cost always equal 2 when FROM is the same as TO;
37681 on some machines it is expensive to move between registers if they are not
37682 general registers. */
37683
37684 static int
37685 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37686 reg_class_t class2_i)
37687 {
37688 enum reg_class class1 = (enum reg_class) class1_i;
37689 enum reg_class class2 = (enum reg_class) class2_i;
37690
37691 /* In case we require secondary memory, compute cost of the store followed
37692 by load. In order to avoid bad register allocation choices, we need
37693 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37694
37695 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37696 {
37697 int cost = 1;
37698
37699 cost += inline_memory_move_cost (mode, class1, 2);
37700 cost += inline_memory_move_cost (mode, class2, 2);
37701
37702 /* In case of copying from general_purpose_register we may emit multiple
37703 stores followed by single load causing memory size mismatch stall.
37704 Count this as arbitrarily high cost of 20. */
37705 if (targetm.class_max_nregs (class1, mode)
37706 > targetm.class_max_nregs (class2, mode))
37707 cost += 20;
37708
37709 /* In the case of FP/MMX moves, the registers actually overlap, and we
37710 have to switch modes in order to treat them differently. */
37711 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37712 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37713 cost += 20;
37714
37715 return cost;
37716 }
37717
37718 /* Moves between SSE/MMX and integer unit are expensive. */
37719 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37720 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37721
37722 /* ??? By keeping returned value relatively high, we limit the number
37723 of moves between integer and MMX/SSE registers for all targets.
37724 Additionally, high value prevents problem with x86_modes_tieable_p(),
37725 where integer modes in MMX/SSE registers are not tieable
37726 because of missing QImode and HImode moves to, from or between
37727 MMX/SSE registers. */
37728 return MAX (8, ix86_cost->mmxsse_to_integer);
37729
37730 if (MAYBE_FLOAT_CLASS_P (class1))
37731 return ix86_cost->fp_move;
37732 if (MAYBE_SSE_CLASS_P (class1))
37733 return ix86_cost->sse_move;
37734 if (MAYBE_MMX_CLASS_P (class1))
37735 return ix86_cost->mmx_move;
37736 return 2;
37737 }
37738
37739 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37740 MODE. */
37741
37742 bool
37743 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37744 {
37745 /* Flags and only flags can only hold CCmode values. */
37746 if (CC_REGNO_P (regno))
37747 return GET_MODE_CLASS (mode) == MODE_CC;
37748 if (GET_MODE_CLASS (mode) == MODE_CC
37749 || GET_MODE_CLASS (mode) == MODE_RANDOM
37750 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37751 return false;
37752 if (STACK_REGNO_P (regno))
37753 return VALID_FP_MODE_P (mode);
37754 if (MASK_REGNO_P (regno))
37755 return (VALID_MASK_REG_MODE (mode)
37756 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37757 if (SSE_REGNO_P (regno))
37758 {
37759 /* We implement the move patterns for all vector modes into and
37760 out of SSE registers, even when no operation instructions
37761 are available. */
37762
37763 /* For AVX-512 we allow, regardless of regno:
37764 - XI mode
37765 - any of 512-bit wide vector mode
37766 - any scalar mode. */
37767 if (TARGET_AVX512F
37768 && (mode == XImode
37769 || VALID_AVX512F_REG_MODE (mode)
37770 || VALID_AVX512F_SCALAR_MODE (mode)))
37771 return true;
37772
37773 /* TODO check for QI/HI scalars. */
37774 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37775 if (TARGET_AVX512VL
37776 && (mode == OImode
37777 || mode == TImode
37778 || VALID_AVX256_REG_MODE (mode)
37779 || VALID_AVX512VL_128_REG_MODE (mode)))
37780 return true;
37781
37782 /* xmm16-xmm31 are only available for AVX-512. */
37783 if (EXT_REX_SSE_REGNO_P (regno))
37784 return false;
37785
37786 /* OImode and AVX modes are available only when AVX is enabled. */
37787 return ((TARGET_AVX
37788 && VALID_AVX256_REG_OR_OI_MODE (mode))
37789 || VALID_SSE_REG_MODE (mode)
37790 || VALID_SSE2_REG_MODE (mode)
37791 || VALID_MMX_REG_MODE (mode)
37792 || VALID_MMX_REG_MODE_3DNOW (mode));
37793 }
37794 if (MMX_REGNO_P (regno))
37795 {
37796 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37797 so if the register is available at all, then we can move data of
37798 the given mode into or out of it. */
37799 return (VALID_MMX_REG_MODE (mode)
37800 || VALID_MMX_REG_MODE_3DNOW (mode));
37801 }
37802
37803 if (mode == QImode)
37804 {
37805 /* Take care for QImode values - they can be in non-QI regs,
37806 but then they do cause partial register stalls. */
37807 if (ANY_QI_REGNO_P (regno))
37808 return true;
37809 if (!TARGET_PARTIAL_REG_STALL)
37810 return true;
37811 /* LRA checks if the hard register is OK for the given mode.
37812 QImode values can live in non-QI regs, so we allow all
37813 registers here. */
37814 if (lra_in_progress)
37815 return true;
37816 return !can_create_pseudo_p ();
37817 }
37818 /* We handle both integer and floats in the general purpose registers. */
37819 else if (VALID_INT_MODE_P (mode))
37820 return true;
37821 else if (VALID_FP_MODE_P (mode))
37822 return true;
37823 else if (VALID_DFP_MODE_P (mode))
37824 return true;
37825 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37826 on to use that value in smaller contexts, this can easily force a
37827 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37828 supporting DImode, allow it. */
37829 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37830 return true;
37831
37832 return false;
37833 }
37834
37835 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37836 tieable integer mode. */
37837
37838 static bool
37839 ix86_tieable_integer_mode_p (enum machine_mode mode)
37840 {
37841 switch (mode)
37842 {
37843 case HImode:
37844 case SImode:
37845 return true;
37846
37847 case QImode:
37848 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37849
37850 case DImode:
37851 return TARGET_64BIT;
37852
37853 default:
37854 return false;
37855 }
37856 }
37857
37858 /* Return true if MODE1 is accessible in a register that can hold MODE2
37859 without copying. That is, all register classes that can hold MODE2
37860 can also hold MODE1. */
37861
37862 bool
37863 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37864 {
37865 if (mode1 == mode2)
37866 return true;
37867
37868 if (ix86_tieable_integer_mode_p (mode1)
37869 && ix86_tieable_integer_mode_p (mode2))
37870 return true;
37871
37872 /* MODE2 being XFmode implies fp stack or general regs, which means we
37873 can tie any smaller floating point modes to it. Note that we do not
37874 tie this with TFmode. */
37875 if (mode2 == XFmode)
37876 return mode1 == SFmode || mode1 == DFmode;
37877
37878 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37879 that we can tie it with SFmode. */
37880 if (mode2 == DFmode)
37881 return mode1 == SFmode;
37882
37883 /* If MODE2 is only appropriate for an SSE register, then tie with
37884 any other mode acceptable to SSE registers. */
37885 if (GET_MODE_SIZE (mode2) == 32
37886 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37887 return (GET_MODE_SIZE (mode1) == 32
37888 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37889 if (GET_MODE_SIZE (mode2) == 16
37890 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37891 return (GET_MODE_SIZE (mode1) == 16
37892 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37893
37894 /* If MODE2 is appropriate for an MMX register, then tie
37895 with any other mode acceptable to MMX registers. */
37896 if (GET_MODE_SIZE (mode2) == 8
37897 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37898 return (GET_MODE_SIZE (mode1) == 8
37899 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37900
37901 return false;
37902 }
37903
37904 /* Return the cost of moving between two registers of mode MODE. */
37905
37906 static int
37907 ix86_set_reg_reg_cost (enum machine_mode mode)
37908 {
37909 unsigned int units = UNITS_PER_WORD;
37910
37911 switch (GET_MODE_CLASS (mode))
37912 {
37913 default:
37914 break;
37915
37916 case MODE_CC:
37917 units = GET_MODE_SIZE (CCmode);
37918 break;
37919
37920 case MODE_FLOAT:
37921 if ((TARGET_SSE && mode == TFmode)
37922 || (TARGET_80387 && mode == XFmode)
37923 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37924 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37925 units = GET_MODE_SIZE (mode);
37926 break;
37927
37928 case MODE_COMPLEX_FLOAT:
37929 if ((TARGET_SSE && mode == TCmode)
37930 || (TARGET_80387 && mode == XCmode)
37931 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37932 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37933 units = GET_MODE_SIZE (mode);
37934 break;
37935
37936 case MODE_VECTOR_INT:
37937 case MODE_VECTOR_FLOAT:
37938 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37939 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37940 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37941 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37942 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37943 units = GET_MODE_SIZE (mode);
37944 }
37945
37946 /* Return the cost of moving between two registers of mode MODE,
37947 assuming that the move will be in pieces of at most UNITS bytes. */
37948 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37949 }
37950
37951 /* Compute a (partial) cost for rtx X. Return true if the complete
37952 cost has been computed, and false if subexpressions should be
37953 scanned. In either case, *TOTAL contains the cost result. */
37954
37955 static bool
37956 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37957 bool speed)
37958 {
37959 rtx mask;
37960 enum rtx_code code = (enum rtx_code) code_i;
37961 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37962 enum machine_mode mode = GET_MODE (x);
37963 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37964
37965 switch (code)
37966 {
37967 case SET:
37968 if (register_operand (SET_DEST (x), VOIDmode)
37969 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37970 {
37971 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37972 return true;
37973 }
37974 return false;
37975
37976 case CONST_INT:
37977 case CONST:
37978 case LABEL_REF:
37979 case SYMBOL_REF:
37980 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37981 *total = 3;
37982 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37983 *total = 2;
37984 else if (flag_pic && SYMBOLIC_CONST (x)
37985 && !(TARGET_64BIT
37986 && (GET_CODE (x) == LABEL_REF
37987 || (GET_CODE (x) == SYMBOL_REF
37988 && SYMBOL_REF_LOCAL_P (x)))))
37989 *total = 1;
37990 else
37991 *total = 0;
37992 return true;
37993
37994 case CONST_DOUBLE:
37995 if (mode == VOIDmode)
37996 {
37997 *total = 0;
37998 return true;
37999 }
38000 switch (standard_80387_constant_p (x))
38001 {
38002 case 1: /* 0.0 */
38003 *total = 1;
38004 return true;
38005 default: /* Other constants */
38006 *total = 2;
38007 return true;
38008 case 0:
38009 case -1:
38010 break;
38011 }
38012 if (SSE_FLOAT_MODE_P (mode))
38013 {
38014 case CONST_VECTOR:
38015 switch (standard_sse_constant_p (x))
38016 {
38017 case 0:
38018 break;
38019 case 1: /* 0: xor eliminates false dependency */
38020 *total = 0;
38021 return true;
38022 default: /* -1: cmp contains false dependency */
38023 *total = 1;
38024 return true;
38025 }
38026 }
38027 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38028 it'll probably end up. Add a penalty for size. */
38029 *total = (COSTS_N_INSNS (1)
38030 + (flag_pic != 0 && !TARGET_64BIT)
38031 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38032 return true;
38033
38034 case ZERO_EXTEND:
38035 /* The zero extensions is often completely free on x86_64, so make
38036 it as cheap as possible. */
38037 if (TARGET_64BIT && mode == DImode
38038 && GET_MODE (XEXP (x, 0)) == SImode)
38039 *total = 1;
38040 else if (TARGET_ZERO_EXTEND_WITH_AND)
38041 *total = cost->add;
38042 else
38043 *total = cost->movzx;
38044 return false;
38045
38046 case SIGN_EXTEND:
38047 *total = cost->movsx;
38048 return false;
38049
38050 case ASHIFT:
38051 if (SCALAR_INT_MODE_P (mode)
38052 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38053 && CONST_INT_P (XEXP (x, 1)))
38054 {
38055 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38056 if (value == 1)
38057 {
38058 *total = cost->add;
38059 return false;
38060 }
38061 if ((value == 2 || value == 3)
38062 && cost->lea <= cost->shift_const)
38063 {
38064 *total = cost->lea;
38065 return false;
38066 }
38067 }
38068 /* FALLTHRU */
38069
38070 case ROTATE:
38071 case ASHIFTRT:
38072 case LSHIFTRT:
38073 case ROTATERT:
38074 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38075 {
38076 /* ??? Should be SSE vector operation cost. */
38077 /* At least for published AMD latencies, this really is the same
38078 as the latency for a simple fpu operation like fabs. */
38079 /* V*QImode is emulated with 1-11 insns. */
38080 if (mode == V16QImode || mode == V32QImode)
38081 {
38082 int count = 11;
38083 if (TARGET_XOP && mode == V16QImode)
38084 {
38085 /* For XOP we use vpshab, which requires a broadcast of the
38086 value to the variable shift insn. For constants this
38087 means a V16Q const in mem; even when we can perform the
38088 shift with one insn set the cost to prefer paddb. */
38089 if (CONSTANT_P (XEXP (x, 1)))
38090 {
38091 *total = (cost->fabs
38092 + rtx_cost (XEXP (x, 0), code, 0, speed)
38093 + (speed ? 2 : COSTS_N_BYTES (16)));
38094 return true;
38095 }
38096 count = 3;
38097 }
38098 else if (TARGET_SSSE3)
38099 count = 7;
38100 *total = cost->fabs * count;
38101 }
38102 else
38103 *total = cost->fabs;
38104 }
38105 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38106 {
38107 if (CONST_INT_P (XEXP (x, 1)))
38108 {
38109 if (INTVAL (XEXP (x, 1)) > 32)
38110 *total = cost->shift_const + COSTS_N_INSNS (2);
38111 else
38112 *total = cost->shift_const * 2;
38113 }
38114 else
38115 {
38116 if (GET_CODE (XEXP (x, 1)) == AND)
38117 *total = cost->shift_var * 2;
38118 else
38119 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38120 }
38121 }
38122 else
38123 {
38124 if (CONST_INT_P (XEXP (x, 1)))
38125 *total = cost->shift_const;
38126 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38127 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38128 {
38129 /* Return the cost after shift-and truncation. */
38130 *total = cost->shift_var;
38131 return true;
38132 }
38133 else
38134 *total = cost->shift_var;
38135 }
38136 return false;
38137
38138 case FMA:
38139 {
38140 rtx sub;
38141
38142 gcc_assert (FLOAT_MODE_P (mode));
38143 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38144
38145 /* ??? SSE scalar/vector cost should be used here. */
38146 /* ??? Bald assumption that fma has the same cost as fmul. */
38147 *total = cost->fmul;
38148 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38149
38150 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38151 sub = XEXP (x, 0);
38152 if (GET_CODE (sub) == NEG)
38153 sub = XEXP (sub, 0);
38154 *total += rtx_cost (sub, FMA, 0, speed);
38155
38156 sub = XEXP (x, 2);
38157 if (GET_CODE (sub) == NEG)
38158 sub = XEXP (sub, 0);
38159 *total += rtx_cost (sub, FMA, 2, speed);
38160 return true;
38161 }
38162
38163 case MULT:
38164 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38165 {
38166 /* ??? SSE scalar cost should be used here. */
38167 *total = cost->fmul;
38168 return false;
38169 }
38170 else if (X87_FLOAT_MODE_P (mode))
38171 {
38172 *total = cost->fmul;
38173 return false;
38174 }
38175 else if (FLOAT_MODE_P (mode))
38176 {
38177 /* ??? SSE vector cost should be used here. */
38178 *total = cost->fmul;
38179 return false;
38180 }
38181 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38182 {
38183 /* V*QImode is emulated with 7-13 insns. */
38184 if (mode == V16QImode || mode == V32QImode)
38185 {
38186 int extra = 11;
38187 if (TARGET_XOP && mode == V16QImode)
38188 extra = 5;
38189 else if (TARGET_SSSE3)
38190 extra = 6;
38191 *total = cost->fmul * 2 + cost->fabs * extra;
38192 }
38193 /* V*DImode is emulated with 5-8 insns. */
38194 else if (mode == V2DImode || mode == V4DImode)
38195 {
38196 if (TARGET_XOP && mode == V2DImode)
38197 *total = cost->fmul * 2 + cost->fabs * 3;
38198 else
38199 *total = cost->fmul * 3 + cost->fabs * 5;
38200 }
38201 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38202 insns, including two PMULUDQ. */
38203 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38204 *total = cost->fmul * 2 + cost->fabs * 5;
38205 else
38206 *total = cost->fmul;
38207 return false;
38208 }
38209 else
38210 {
38211 rtx op0 = XEXP (x, 0);
38212 rtx op1 = XEXP (x, 1);
38213 int nbits;
38214 if (CONST_INT_P (XEXP (x, 1)))
38215 {
38216 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38217 for (nbits = 0; value != 0; value &= value - 1)
38218 nbits++;
38219 }
38220 else
38221 /* This is arbitrary. */
38222 nbits = 7;
38223
38224 /* Compute costs correctly for widening multiplication. */
38225 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38226 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38227 == GET_MODE_SIZE (mode))
38228 {
38229 int is_mulwiden = 0;
38230 enum machine_mode inner_mode = GET_MODE (op0);
38231
38232 if (GET_CODE (op0) == GET_CODE (op1))
38233 is_mulwiden = 1, op1 = XEXP (op1, 0);
38234 else if (CONST_INT_P (op1))
38235 {
38236 if (GET_CODE (op0) == SIGN_EXTEND)
38237 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38238 == INTVAL (op1);
38239 else
38240 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38241 }
38242
38243 if (is_mulwiden)
38244 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38245 }
38246
38247 *total = (cost->mult_init[MODE_INDEX (mode)]
38248 + nbits * cost->mult_bit
38249 + rtx_cost (op0, outer_code, opno, speed)
38250 + rtx_cost (op1, outer_code, opno, speed));
38251
38252 return true;
38253 }
38254
38255 case DIV:
38256 case UDIV:
38257 case MOD:
38258 case UMOD:
38259 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38260 /* ??? SSE cost should be used here. */
38261 *total = cost->fdiv;
38262 else if (X87_FLOAT_MODE_P (mode))
38263 *total = cost->fdiv;
38264 else if (FLOAT_MODE_P (mode))
38265 /* ??? SSE vector cost should be used here. */
38266 *total = cost->fdiv;
38267 else
38268 *total = cost->divide[MODE_INDEX (mode)];
38269 return false;
38270
38271 case PLUS:
38272 if (GET_MODE_CLASS (mode) == MODE_INT
38273 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38274 {
38275 if (GET_CODE (XEXP (x, 0)) == PLUS
38276 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38277 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38278 && CONSTANT_P (XEXP (x, 1)))
38279 {
38280 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38281 if (val == 2 || val == 4 || val == 8)
38282 {
38283 *total = cost->lea;
38284 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38285 outer_code, opno, speed);
38286 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38287 outer_code, opno, speed);
38288 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38289 return true;
38290 }
38291 }
38292 else if (GET_CODE (XEXP (x, 0)) == MULT
38293 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38294 {
38295 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38296 if (val == 2 || val == 4 || val == 8)
38297 {
38298 *total = cost->lea;
38299 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38300 outer_code, opno, speed);
38301 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38302 return true;
38303 }
38304 }
38305 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38306 {
38307 *total = cost->lea;
38308 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38309 outer_code, opno, speed);
38310 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38311 outer_code, opno, speed);
38312 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38313 return true;
38314 }
38315 }
38316 /* FALLTHRU */
38317
38318 case MINUS:
38319 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38320 {
38321 /* ??? SSE cost should be used here. */
38322 *total = cost->fadd;
38323 return false;
38324 }
38325 else if (X87_FLOAT_MODE_P (mode))
38326 {
38327 *total = cost->fadd;
38328 return false;
38329 }
38330 else if (FLOAT_MODE_P (mode))
38331 {
38332 /* ??? SSE vector cost should be used here. */
38333 *total = cost->fadd;
38334 return false;
38335 }
38336 /* FALLTHRU */
38337
38338 case AND:
38339 case IOR:
38340 case XOR:
38341 if (GET_MODE_CLASS (mode) == MODE_INT
38342 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38343 {
38344 *total = (cost->add * 2
38345 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38346 << (GET_MODE (XEXP (x, 0)) != DImode))
38347 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38348 << (GET_MODE (XEXP (x, 1)) != DImode)));
38349 return true;
38350 }
38351 /* FALLTHRU */
38352
38353 case NEG:
38354 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38355 {
38356 /* ??? SSE cost should be used here. */
38357 *total = cost->fchs;
38358 return false;
38359 }
38360 else if (X87_FLOAT_MODE_P (mode))
38361 {
38362 *total = cost->fchs;
38363 return false;
38364 }
38365 else if (FLOAT_MODE_P (mode))
38366 {
38367 /* ??? SSE vector cost should be used here. */
38368 *total = cost->fchs;
38369 return false;
38370 }
38371 /* FALLTHRU */
38372
38373 case NOT:
38374 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38375 {
38376 /* ??? Should be SSE vector operation cost. */
38377 /* At least for published AMD latencies, this really is the same
38378 as the latency for a simple fpu operation like fabs. */
38379 *total = cost->fabs;
38380 }
38381 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38382 *total = cost->add * 2;
38383 else
38384 *total = cost->add;
38385 return false;
38386
38387 case COMPARE:
38388 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38389 && XEXP (XEXP (x, 0), 1) == const1_rtx
38390 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38391 && XEXP (x, 1) == const0_rtx)
38392 {
38393 /* This kind of construct is implemented using test[bwl].
38394 Treat it as if we had an AND. */
38395 *total = (cost->add
38396 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38397 + rtx_cost (const1_rtx, outer_code, opno, speed));
38398 return true;
38399 }
38400 return false;
38401
38402 case FLOAT_EXTEND:
38403 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38404 *total = 0;
38405 return false;
38406
38407 case ABS:
38408 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38409 /* ??? SSE cost should be used here. */
38410 *total = cost->fabs;
38411 else if (X87_FLOAT_MODE_P (mode))
38412 *total = cost->fabs;
38413 else if (FLOAT_MODE_P (mode))
38414 /* ??? SSE vector cost should be used here. */
38415 *total = cost->fabs;
38416 return false;
38417
38418 case SQRT:
38419 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38420 /* ??? SSE cost should be used here. */
38421 *total = cost->fsqrt;
38422 else if (X87_FLOAT_MODE_P (mode))
38423 *total = cost->fsqrt;
38424 else if (FLOAT_MODE_P (mode))
38425 /* ??? SSE vector cost should be used here. */
38426 *total = cost->fsqrt;
38427 return false;
38428
38429 case UNSPEC:
38430 if (XINT (x, 1) == UNSPEC_TP)
38431 *total = 0;
38432 return false;
38433
38434 case VEC_SELECT:
38435 case VEC_CONCAT:
38436 case VEC_DUPLICATE:
38437 /* ??? Assume all of these vector manipulation patterns are
38438 recognizable. In which case they all pretty much have the
38439 same cost. */
38440 *total = cost->fabs;
38441 return true;
38442 case VEC_MERGE:
38443 mask = XEXP (x, 2);
38444 /* This is masked instruction, assume the same cost,
38445 as nonmasked variant. */
38446 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38447 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38448 else
38449 *total = cost->fabs;
38450 return true;
38451
38452 default:
38453 return false;
38454 }
38455 }
38456
38457 #if TARGET_MACHO
38458
38459 static int current_machopic_label_num;
38460
38461 /* Given a symbol name and its associated stub, write out the
38462 definition of the stub. */
38463
38464 void
38465 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38466 {
38467 unsigned int length;
38468 char *binder_name, *symbol_name, lazy_ptr_name[32];
38469 int label = ++current_machopic_label_num;
38470
38471 /* For 64-bit we shouldn't get here. */
38472 gcc_assert (!TARGET_64BIT);
38473
38474 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38475 symb = targetm.strip_name_encoding (symb);
38476
38477 length = strlen (stub);
38478 binder_name = XALLOCAVEC (char, length + 32);
38479 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38480
38481 length = strlen (symb);
38482 symbol_name = XALLOCAVEC (char, length + 32);
38483 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38484
38485 sprintf (lazy_ptr_name, "L%d$lz", label);
38486
38487 if (MACHOPIC_ATT_STUB)
38488 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38489 else if (MACHOPIC_PURE)
38490 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38491 else
38492 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38493
38494 fprintf (file, "%s:\n", stub);
38495 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38496
38497 if (MACHOPIC_ATT_STUB)
38498 {
38499 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38500 }
38501 else if (MACHOPIC_PURE)
38502 {
38503 /* PIC stub. */
38504 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38505 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38506 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38507 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38508 label, lazy_ptr_name, label);
38509 fprintf (file, "\tjmp\t*%%ecx\n");
38510 }
38511 else
38512 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38513
38514 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38515 it needs no stub-binding-helper. */
38516 if (MACHOPIC_ATT_STUB)
38517 return;
38518
38519 fprintf (file, "%s:\n", binder_name);
38520
38521 if (MACHOPIC_PURE)
38522 {
38523 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38524 fprintf (file, "\tpushl\t%%ecx\n");
38525 }
38526 else
38527 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38528
38529 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38530
38531 /* N.B. Keep the correspondence of these
38532 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38533 old-pic/new-pic/non-pic stubs; altering this will break
38534 compatibility with existing dylibs. */
38535 if (MACHOPIC_PURE)
38536 {
38537 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38538 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38539 }
38540 else
38541 /* 16-byte -mdynamic-no-pic stub. */
38542 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38543
38544 fprintf (file, "%s:\n", lazy_ptr_name);
38545 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38546 fprintf (file, ASM_LONG "%s\n", binder_name);
38547 }
38548 #endif /* TARGET_MACHO */
38549
38550 /* Order the registers for register allocator. */
38551
38552 void
38553 x86_order_regs_for_local_alloc (void)
38554 {
38555 int pos = 0;
38556 int i;
38557
38558 /* First allocate the local general purpose registers. */
38559 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38560 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38561 reg_alloc_order [pos++] = i;
38562
38563 /* Global general purpose registers. */
38564 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38565 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38566 reg_alloc_order [pos++] = i;
38567
38568 /* x87 registers come first in case we are doing FP math
38569 using them. */
38570 if (!TARGET_SSE_MATH)
38571 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38572 reg_alloc_order [pos++] = i;
38573
38574 /* SSE registers. */
38575 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38576 reg_alloc_order [pos++] = i;
38577 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38578 reg_alloc_order [pos++] = i;
38579
38580 /* Extended REX SSE registers. */
38581 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38582 reg_alloc_order [pos++] = i;
38583
38584 /* Mask register. */
38585 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38586 reg_alloc_order [pos++] = i;
38587
38588 /* x87 registers. */
38589 if (TARGET_SSE_MATH)
38590 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38591 reg_alloc_order [pos++] = i;
38592
38593 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38594 reg_alloc_order [pos++] = i;
38595
38596 /* Initialize the rest of array as we do not allocate some registers
38597 at all. */
38598 while (pos < FIRST_PSEUDO_REGISTER)
38599 reg_alloc_order [pos++] = 0;
38600 }
38601
38602 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38603 in struct attribute_spec handler. */
38604 static tree
38605 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38606 tree args,
38607 int,
38608 bool *no_add_attrs)
38609 {
38610 if (TREE_CODE (*node) != FUNCTION_TYPE
38611 && TREE_CODE (*node) != METHOD_TYPE
38612 && TREE_CODE (*node) != FIELD_DECL
38613 && TREE_CODE (*node) != TYPE_DECL)
38614 {
38615 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38616 name);
38617 *no_add_attrs = true;
38618 return NULL_TREE;
38619 }
38620 if (TARGET_64BIT)
38621 {
38622 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38623 name);
38624 *no_add_attrs = true;
38625 return NULL_TREE;
38626 }
38627 if (is_attribute_p ("callee_pop_aggregate_return", name))
38628 {
38629 tree cst;
38630
38631 cst = TREE_VALUE (args);
38632 if (TREE_CODE (cst) != INTEGER_CST)
38633 {
38634 warning (OPT_Wattributes,
38635 "%qE attribute requires an integer constant argument",
38636 name);
38637 *no_add_attrs = true;
38638 }
38639 else if (compare_tree_int (cst, 0) != 0
38640 && compare_tree_int (cst, 1) != 0)
38641 {
38642 warning (OPT_Wattributes,
38643 "argument to %qE attribute is neither zero, nor one",
38644 name);
38645 *no_add_attrs = true;
38646 }
38647
38648 return NULL_TREE;
38649 }
38650
38651 return NULL_TREE;
38652 }
38653
38654 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38655 struct attribute_spec.handler. */
38656 static tree
38657 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38658 bool *no_add_attrs)
38659 {
38660 if (TREE_CODE (*node) != FUNCTION_TYPE
38661 && TREE_CODE (*node) != METHOD_TYPE
38662 && TREE_CODE (*node) != FIELD_DECL
38663 && TREE_CODE (*node) != TYPE_DECL)
38664 {
38665 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38666 name);
38667 *no_add_attrs = true;
38668 return NULL_TREE;
38669 }
38670
38671 /* Can combine regparm with all attributes but fastcall. */
38672 if (is_attribute_p ("ms_abi", name))
38673 {
38674 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38675 {
38676 error ("ms_abi and sysv_abi attributes are not compatible");
38677 }
38678
38679 return NULL_TREE;
38680 }
38681 else if (is_attribute_p ("sysv_abi", name))
38682 {
38683 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38684 {
38685 error ("ms_abi and sysv_abi attributes are not compatible");
38686 }
38687
38688 return NULL_TREE;
38689 }
38690
38691 return NULL_TREE;
38692 }
38693
38694 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38695 struct attribute_spec.handler. */
38696 static tree
38697 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38698 bool *no_add_attrs)
38699 {
38700 tree *type = NULL;
38701 if (DECL_P (*node))
38702 {
38703 if (TREE_CODE (*node) == TYPE_DECL)
38704 type = &TREE_TYPE (*node);
38705 }
38706 else
38707 type = node;
38708
38709 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38710 {
38711 warning (OPT_Wattributes, "%qE attribute ignored",
38712 name);
38713 *no_add_attrs = true;
38714 }
38715
38716 else if ((is_attribute_p ("ms_struct", name)
38717 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38718 || ((is_attribute_p ("gcc_struct", name)
38719 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38720 {
38721 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38722 name);
38723 *no_add_attrs = true;
38724 }
38725
38726 return NULL_TREE;
38727 }
38728
38729 static tree
38730 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38731 bool *no_add_attrs)
38732 {
38733 if (TREE_CODE (*node) != FUNCTION_DECL)
38734 {
38735 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38736 name);
38737 *no_add_attrs = true;
38738 }
38739 return NULL_TREE;
38740 }
38741
38742 static bool
38743 ix86_ms_bitfield_layout_p (const_tree record_type)
38744 {
38745 return ((TARGET_MS_BITFIELD_LAYOUT
38746 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38747 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38748 }
38749
38750 /* Returns an expression indicating where the this parameter is
38751 located on entry to the FUNCTION. */
38752
38753 static rtx
38754 x86_this_parameter (tree function)
38755 {
38756 tree type = TREE_TYPE (function);
38757 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38758 int nregs;
38759
38760 if (TARGET_64BIT)
38761 {
38762 const int *parm_regs;
38763
38764 if (ix86_function_type_abi (type) == MS_ABI)
38765 parm_regs = x86_64_ms_abi_int_parameter_registers;
38766 else
38767 parm_regs = x86_64_int_parameter_registers;
38768 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38769 }
38770
38771 nregs = ix86_function_regparm (type, function);
38772
38773 if (nregs > 0 && !stdarg_p (type))
38774 {
38775 int regno;
38776 unsigned int ccvt = ix86_get_callcvt (type);
38777
38778 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38779 regno = aggr ? DX_REG : CX_REG;
38780 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38781 {
38782 regno = CX_REG;
38783 if (aggr)
38784 return gen_rtx_MEM (SImode,
38785 plus_constant (Pmode, stack_pointer_rtx, 4));
38786 }
38787 else
38788 {
38789 regno = AX_REG;
38790 if (aggr)
38791 {
38792 regno = DX_REG;
38793 if (nregs == 1)
38794 return gen_rtx_MEM (SImode,
38795 plus_constant (Pmode,
38796 stack_pointer_rtx, 4));
38797 }
38798 }
38799 return gen_rtx_REG (SImode, regno);
38800 }
38801
38802 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38803 aggr ? 8 : 4));
38804 }
38805
38806 /* Determine whether x86_output_mi_thunk can succeed. */
38807
38808 static bool
38809 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38810 const_tree function)
38811 {
38812 /* 64-bit can handle anything. */
38813 if (TARGET_64BIT)
38814 return true;
38815
38816 /* For 32-bit, everything's fine if we have one free register. */
38817 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38818 return true;
38819
38820 /* Need a free register for vcall_offset. */
38821 if (vcall_offset)
38822 return false;
38823
38824 /* Need a free register for GOT references. */
38825 if (flag_pic && !targetm.binds_local_p (function))
38826 return false;
38827
38828 /* Otherwise ok. */
38829 return true;
38830 }
38831
38832 /* Output the assembler code for a thunk function. THUNK_DECL is the
38833 declaration for the thunk function itself, FUNCTION is the decl for
38834 the target function. DELTA is an immediate constant offset to be
38835 added to THIS. If VCALL_OFFSET is nonzero, the word at
38836 *(*this + vcall_offset) should be added to THIS. */
38837
38838 static void
38839 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38840 HOST_WIDE_INT vcall_offset, tree function)
38841 {
38842 rtx this_param = x86_this_parameter (function);
38843 rtx this_reg, tmp, fnaddr;
38844 unsigned int tmp_regno;
38845 rtx_insn *insn;
38846
38847 if (TARGET_64BIT)
38848 tmp_regno = R10_REG;
38849 else
38850 {
38851 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38852 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38853 tmp_regno = AX_REG;
38854 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38855 tmp_regno = DX_REG;
38856 else
38857 tmp_regno = CX_REG;
38858 }
38859
38860 emit_note (NOTE_INSN_PROLOGUE_END);
38861
38862 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38863 pull it in now and let DELTA benefit. */
38864 if (REG_P (this_param))
38865 this_reg = this_param;
38866 else if (vcall_offset)
38867 {
38868 /* Put the this parameter into %eax. */
38869 this_reg = gen_rtx_REG (Pmode, AX_REG);
38870 emit_move_insn (this_reg, this_param);
38871 }
38872 else
38873 this_reg = NULL_RTX;
38874
38875 /* Adjust the this parameter by a fixed constant. */
38876 if (delta)
38877 {
38878 rtx delta_rtx = GEN_INT (delta);
38879 rtx delta_dst = this_reg ? this_reg : this_param;
38880
38881 if (TARGET_64BIT)
38882 {
38883 if (!x86_64_general_operand (delta_rtx, Pmode))
38884 {
38885 tmp = gen_rtx_REG (Pmode, tmp_regno);
38886 emit_move_insn (tmp, delta_rtx);
38887 delta_rtx = tmp;
38888 }
38889 }
38890
38891 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38892 }
38893
38894 /* Adjust the this parameter by a value stored in the vtable. */
38895 if (vcall_offset)
38896 {
38897 rtx vcall_addr, vcall_mem, this_mem;
38898
38899 tmp = gen_rtx_REG (Pmode, tmp_regno);
38900
38901 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38902 if (Pmode != ptr_mode)
38903 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38904 emit_move_insn (tmp, this_mem);
38905
38906 /* Adjust the this parameter. */
38907 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38908 if (TARGET_64BIT
38909 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38910 {
38911 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38912 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38913 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38914 }
38915
38916 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38917 if (Pmode != ptr_mode)
38918 emit_insn (gen_addsi_1_zext (this_reg,
38919 gen_rtx_REG (ptr_mode,
38920 REGNO (this_reg)),
38921 vcall_mem));
38922 else
38923 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38924 }
38925
38926 /* If necessary, drop THIS back to its stack slot. */
38927 if (this_reg && this_reg != this_param)
38928 emit_move_insn (this_param, this_reg);
38929
38930 fnaddr = XEXP (DECL_RTL (function), 0);
38931 if (TARGET_64BIT)
38932 {
38933 if (!flag_pic || targetm.binds_local_p (function)
38934 || TARGET_PECOFF)
38935 ;
38936 else
38937 {
38938 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38939 tmp = gen_rtx_CONST (Pmode, tmp);
38940 fnaddr = gen_const_mem (Pmode, tmp);
38941 }
38942 }
38943 else
38944 {
38945 if (!flag_pic || targetm.binds_local_p (function))
38946 ;
38947 #if TARGET_MACHO
38948 else if (TARGET_MACHO)
38949 {
38950 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38951 fnaddr = XEXP (fnaddr, 0);
38952 }
38953 #endif /* TARGET_MACHO */
38954 else
38955 {
38956 tmp = gen_rtx_REG (Pmode, CX_REG);
38957 output_set_got (tmp, NULL_RTX);
38958
38959 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38960 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38961 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38962 fnaddr = gen_const_mem (Pmode, fnaddr);
38963 }
38964 }
38965
38966 /* Our sibling call patterns do not allow memories, because we have no
38967 predicate that can distinguish between frame and non-frame memory.
38968 For our purposes here, we can get away with (ab)using a jump pattern,
38969 because we're going to do no optimization. */
38970 if (MEM_P (fnaddr))
38971 {
38972 if (sibcall_insn_operand (fnaddr, word_mode))
38973 {
38974 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38975 tmp = emit_call_insn (tmp);
38976 SIBLING_CALL_P (tmp) = 1;
38977 }
38978 else
38979 emit_jump_insn (gen_indirect_jump (fnaddr));
38980 }
38981 else
38982 {
38983 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38984 fnaddr = legitimize_pic_address (fnaddr,
38985 gen_rtx_REG (Pmode, tmp_regno));
38986
38987 if (!sibcall_insn_operand (fnaddr, word_mode))
38988 {
38989 tmp = gen_rtx_REG (word_mode, tmp_regno);
38990 if (GET_MODE (fnaddr) != word_mode)
38991 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38992 emit_move_insn (tmp, fnaddr);
38993 fnaddr = tmp;
38994 }
38995
38996 tmp = gen_rtx_MEM (QImode, fnaddr);
38997 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38998 tmp = emit_call_insn (tmp);
38999 SIBLING_CALL_P (tmp) = 1;
39000 }
39001 emit_barrier ();
39002
39003 /* Emit just enough of rest_of_compilation to get the insns emitted.
39004 Note that use_thunk calls assemble_start_function et al. */
39005 insn = get_insns ();
39006 shorten_branches (insn);
39007 final_start_function (insn, file, 1);
39008 final (insn, file, 1);
39009 final_end_function ();
39010 }
39011
39012 static void
39013 x86_file_start (void)
39014 {
39015 default_file_start ();
39016 if (TARGET_16BIT)
39017 fputs ("\t.code16gcc\n", asm_out_file);
39018 #if TARGET_MACHO
39019 darwin_file_start ();
39020 #endif
39021 if (X86_FILE_START_VERSION_DIRECTIVE)
39022 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39023 if (X86_FILE_START_FLTUSED)
39024 fputs ("\t.global\t__fltused\n", asm_out_file);
39025 if (ix86_asm_dialect == ASM_INTEL)
39026 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39027 }
39028
39029 int
39030 x86_field_alignment (tree field, int computed)
39031 {
39032 enum machine_mode mode;
39033 tree type = TREE_TYPE (field);
39034
39035 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39036 return computed;
39037 mode = TYPE_MODE (strip_array_types (type));
39038 if (mode == DFmode || mode == DCmode
39039 || GET_MODE_CLASS (mode) == MODE_INT
39040 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39041 return MIN (32, computed);
39042 return computed;
39043 }
39044
39045 /* Output assembler code to FILE to increment profiler label # LABELNO
39046 for profiling a function entry. */
39047 void
39048 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39049 {
39050 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39051 : MCOUNT_NAME);
39052
39053 if (TARGET_64BIT)
39054 {
39055 #ifndef NO_PROFILE_COUNTERS
39056 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39057 #endif
39058
39059 if (!TARGET_PECOFF && flag_pic)
39060 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39061 else
39062 fprintf (file, "\tcall\t%s\n", mcount_name);
39063 }
39064 else if (flag_pic)
39065 {
39066 #ifndef NO_PROFILE_COUNTERS
39067 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39068 LPREFIX, labelno);
39069 #endif
39070 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39071 }
39072 else
39073 {
39074 #ifndef NO_PROFILE_COUNTERS
39075 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39076 LPREFIX, labelno);
39077 #endif
39078 fprintf (file, "\tcall\t%s\n", mcount_name);
39079 }
39080 }
39081
39082 /* We don't have exact information about the insn sizes, but we may assume
39083 quite safely that we are informed about all 1 byte insns and memory
39084 address sizes. This is enough to eliminate unnecessary padding in
39085 99% of cases. */
39086
39087 static int
39088 min_insn_size (rtx insn)
39089 {
39090 int l = 0, len;
39091
39092 if (!INSN_P (insn) || !active_insn_p (insn))
39093 return 0;
39094
39095 /* Discard alignments we've emit and jump instructions. */
39096 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39097 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39098 return 0;
39099
39100 /* Important case - calls are always 5 bytes.
39101 It is common to have many calls in the row. */
39102 if (CALL_P (insn)
39103 && symbolic_reference_mentioned_p (PATTERN (insn))
39104 && !SIBLING_CALL_P (insn))
39105 return 5;
39106 len = get_attr_length (insn);
39107 if (len <= 1)
39108 return 1;
39109
39110 /* For normal instructions we rely on get_attr_length being exact,
39111 with a few exceptions. */
39112 if (!JUMP_P (insn))
39113 {
39114 enum attr_type type = get_attr_type (insn);
39115
39116 switch (type)
39117 {
39118 case TYPE_MULTI:
39119 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39120 || asm_noperands (PATTERN (insn)) >= 0)
39121 return 0;
39122 break;
39123 case TYPE_OTHER:
39124 case TYPE_FCMP:
39125 break;
39126 default:
39127 /* Otherwise trust get_attr_length. */
39128 return len;
39129 }
39130
39131 l = get_attr_length_address (insn);
39132 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39133 l = 4;
39134 }
39135 if (l)
39136 return 1+l;
39137 else
39138 return 2;
39139 }
39140
39141 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39142
39143 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39144 window. */
39145
39146 static void
39147 ix86_avoid_jump_mispredicts (void)
39148 {
39149 rtx_insn *insn, *start = get_insns ();
39150 int nbytes = 0, njumps = 0;
39151 int isjump = 0;
39152
39153 /* Look for all minimal intervals of instructions containing 4 jumps.
39154 The intervals are bounded by START and INSN. NBYTES is the total
39155 size of instructions in the interval including INSN and not including
39156 START. When the NBYTES is smaller than 16 bytes, it is possible
39157 that the end of START and INSN ends up in the same 16byte page.
39158
39159 The smallest offset in the page INSN can start is the case where START
39160 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39161 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39162
39163 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39164 have to, control transfer to label(s) can be performed through other
39165 means, and also we estimate minimum length of all asm stmts as 0. */
39166 for (insn = start; insn; insn = NEXT_INSN (insn))
39167 {
39168 int min_size;
39169
39170 if (LABEL_P (insn))
39171 {
39172 int align = label_to_alignment (insn);
39173 int max_skip = label_to_max_skip (insn);
39174
39175 if (max_skip > 15)
39176 max_skip = 15;
39177 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39178 already in the current 16 byte page, because otherwise
39179 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39180 bytes to reach 16 byte boundary. */
39181 if (align <= 0
39182 || (align <= 3 && max_skip != (1 << align) - 1))
39183 max_skip = 0;
39184 if (dump_file)
39185 fprintf (dump_file, "Label %i with max_skip %i\n",
39186 INSN_UID (insn), max_skip);
39187 if (max_skip)
39188 {
39189 while (nbytes + max_skip >= 16)
39190 {
39191 start = NEXT_INSN (start);
39192 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39193 || CALL_P (start))
39194 njumps--, isjump = 1;
39195 else
39196 isjump = 0;
39197 nbytes -= min_insn_size (start);
39198 }
39199 }
39200 continue;
39201 }
39202
39203 min_size = min_insn_size (insn);
39204 nbytes += min_size;
39205 if (dump_file)
39206 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39207 INSN_UID (insn), min_size);
39208 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39209 || CALL_P (insn))
39210 njumps++;
39211 else
39212 continue;
39213
39214 while (njumps > 3)
39215 {
39216 start = NEXT_INSN (start);
39217 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39218 || CALL_P (start))
39219 njumps--, isjump = 1;
39220 else
39221 isjump = 0;
39222 nbytes -= min_insn_size (start);
39223 }
39224 gcc_assert (njumps >= 0);
39225 if (dump_file)
39226 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39227 INSN_UID (start), INSN_UID (insn), nbytes);
39228
39229 if (njumps == 3 && isjump && nbytes < 16)
39230 {
39231 int padsize = 15 - nbytes + min_insn_size (insn);
39232
39233 if (dump_file)
39234 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39235 INSN_UID (insn), padsize);
39236 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39237 }
39238 }
39239 }
39240 #endif
39241
39242 /* AMD Athlon works faster
39243 when RET is not destination of conditional jump or directly preceded
39244 by other jump instruction. We avoid the penalty by inserting NOP just
39245 before the RET instructions in such cases. */
39246 static void
39247 ix86_pad_returns (void)
39248 {
39249 edge e;
39250 edge_iterator ei;
39251
39252 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39253 {
39254 basic_block bb = e->src;
39255 rtx_insn *ret = BB_END (bb);
39256 rtx_insn *prev;
39257 bool replace = false;
39258
39259 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39260 || optimize_bb_for_size_p (bb))
39261 continue;
39262 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39263 if (active_insn_p (prev) || LABEL_P (prev))
39264 break;
39265 if (prev && LABEL_P (prev))
39266 {
39267 edge e;
39268 edge_iterator ei;
39269
39270 FOR_EACH_EDGE (e, ei, bb->preds)
39271 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39272 && !(e->flags & EDGE_FALLTHRU))
39273 {
39274 replace = true;
39275 break;
39276 }
39277 }
39278 if (!replace)
39279 {
39280 prev = prev_active_insn (ret);
39281 if (prev
39282 && ((JUMP_P (prev) && any_condjump_p (prev))
39283 || CALL_P (prev)))
39284 replace = true;
39285 /* Empty functions get branch mispredict even when
39286 the jump destination is not visible to us. */
39287 if (!prev && !optimize_function_for_size_p (cfun))
39288 replace = true;
39289 }
39290 if (replace)
39291 {
39292 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39293 delete_insn (ret);
39294 }
39295 }
39296 }
39297
39298 /* Count the minimum number of instructions in BB. Return 4 if the
39299 number of instructions >= 4. */
39300
39301 static int
39302 ix86_count_insn_bb (basic_block bb)
39303 {
39304 rtx_insn *insn;
39305 int insn_count = 0;
39306
39307 /* Count number of instructions in this block. Return 4 if the number
39308 of instructions >= 4. */
39309 FOR_BB_INSNS (bb, insn)
39310 {
39311 /* Only happen in exit blocks. */
39312 if (JUMP_P (insn)
39313 && ANY_RETURN_P (PATTERN (insn)))
39314 break;
39315
39316 if (NONDEBUG_INSN_P (insn)
39317 && GET_CODE (PATTERN (insn)) != USE
39318 && GET_CODE (PATTERN (insn)) != CLOBBER)
39319 {
39320 insn_count++;
39321 if (insn_count >= 4)
39322 return insn_count;
39323 }
39324 }
39325
39326 return insn_count;
39327 }
39328
39329
39330 /* Count the minimum number of instructions in code path in BB.
39331 Return 4 if the number of instructions >= 4. */
39332
39333 static int
39334 ix86_count_insn (basic_block bb)
39335 {
39336 edge e;
39337 edge_iterator ei;
39338 int min_prev_count;
39339
39340 /* Only bother counting instructions along paths with no
39341 more than 2 basic blocks between entry and exit. Given
39342 that BB has an edge to exit, determine if a predecessor
39343 of BB has an edge from entry. If so, compute the number
39344 of instructions in the predecessor block. If there
39345 happen to be multiple such blocks, compute the minimum. */
39346 min_prev_count = 4;
39347 FOR_EACH_EDGE (e, ei, bb->preds)
39348 {
39349 edge prev_e;
39350 edge_iterator prev_ei;
39351
39352 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39353 {
39354 min_prev_count = 0;
39355 break;
39356 }
39357 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39358 {
39359 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39360 {
39361 int count = ix86_count_insn_bb (e->src);
39362 if (count < min_prev_count)
39363 min_prev_count = count;
39364 break;
39365 }
39366 }
39367 }
39368
39369 if (min_prev_count < 4)
39370 min_prev_count += ix86_count_insn_bb (bb);
39371
39372 return min_prev_count;
39373 }
39374
39375 /* Pad short function to 4 instructions. */
39376
39377 static void
39378 ix86_pad_short_function (void)
39379 {
39380 edge e;
39381 edge_iterator ei;
39382
39383 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39384 {
39385 rtx_insn *ret = BB_END (e->src);
39386 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39387 {
39388 int insn_count = ix86_count_insn (e->src);
39389
39390 /* Pad short function. */
39391 if (insn_count < 4)
39392 {
39393 rtx_insn *insn = ret;
39394
39395 /* Find epilogue. */
39396 while (insn
39397 && (!NOTE_P (insn)
39398 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39399 insn = PREV_INSN (insn);
39400
39401 if (!insn)
39402 insn = ret;
39403
39404 /* Two NOPs count as one instruction. */
39405 insn_count = 2 * (4 - insn_count);
39406 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39407 }
39408 }
39409 }
39410 }
39411
39412 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39413 the epilogue, the Windows system unwinder will apply epilogue logic and
39414 produce incorrect offsets. This can be avoided by adding a nop between
39415 the last insn that can throw and the first insn of the epilogue. */
39416
39417 static void
39418 ix86_seh_fixup_eh_fallthru (void)
39419 {
39420 edge e;
39421 edge_iterator ei;
39422
39423 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39424 {
39425 rtx_insn *insn, *next;
39426
39427 /* Find the beginning of the epilogue. */
39428 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39429 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39430 break;
39431 if (insn == NULL)
39432 continue;
39433
39434 /* We only care about preceding insns that can throw. */
39435 insn = prev_active_insn (insn);
39436 if (insn == NULL || !can_throw_internal (insn))
39437 continue;
39438
39439 /* Do not separate calls from their debug information. */
39440 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39441 if (NOTE_P (next)
39442 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39443 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39444 insn = next;
39445 else
39446 break;
39447
39448 emit_insn_after (gen_nops (const1_rtx), insn);
39449 }
39450 }
39451
39452 /* Implement machine specific optimizations. We implement padding of returns
39453 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39454 static void
39455 ix86_reorg (void)
39456 {
39457 /* We are freeing block_for_insn in the toplev to keep compatibility
39458 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39459 compute_bb_for_insn ();
39460
39461 if (TARGET_SEH && current_function_has_exception_handlers ())
39462 ix86_seh_fixup_eh_fallthru ();
39463
39464 if (optimize && optimize_function_for_speed_p (cfun))
39465 {
39466 if (TARGET_PAD_SHORT_FUNCTION)
39467 ix86_pad_short_function ();
39468 else if (TARGET_PAD_RETURNS)
39469 ix86_pad_returns ();
39470 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39471 if (TARGET_FOUR_JUMP_LIMIT)
39472 ix86_avoid_jump_mispredicts ();
39473 #endif
39474 }
39475 }
39476
39477 /* Return nonzero when QImode register that must be represented via REX prefix
39478 is used. */
39479 bool
39480 x86_extended_QIreg_mentioned_p (rtx insn)
39481 {
39482 int i;
39483 extract_insn_cached (insn);
39484 for (i = 0; i < recog_data.n_operands; i++)
39485 if (GENERAL_REG_P (recog_data.operand[i])
39486 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39487 return true;
39488 return false;
39489 }
39490
39491 /* Return nonzero when P points to register encoded via REX prefix.
39492 Called via for_each_rtx. */
39493 static int
39494 extended_reg_mentioned_1 (rtx *p, void *)
39495 {
39496 unsigned int regno;
39497 if (!REG_P (*p))
39498 return 0;
39499 regno = REGNO (*p);
39500 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39501 }
39502
39503 /* Return true when INSN mentions register that must be encoded using REX
39504 prefix. */
39505 bool
39506 x86_extended_reg_mentioned_p (rtx insn)
39507 {
39508 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39509 extended_reg_mentioned_1, NULL);
39510 }
39511
39512 /* If profitable, negate (without causing overflow) integer constant
39513 of mode MODE at location LOC. Return true in this case. */
39514 bool
39515 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39516 {
39517 HOST_WIDE_INT val;
39518
39519 if (!CONST_INT_P (*loc))
39520 return false;
39521
39522 switch (mode)
39523 {
39524 case DImode:
39525 /* DImode x86_64 constants must fit in 32 bits. */
39526 gcc_assert (x86_64_immediate_operand (*loc, mode));
39527
39528 mode = SImode;
39529 break;
39530
39531 case SImode:
39532 case HImode:
39533 case QImode:
39534 break;
39535
39536 default:
39537 gcc_unreachable ();
39538 }
39539
39540 /* Avoid overflows. */
39541 if (mode_signbit_p (mode, *loc))
39542 return false;
39543
39544 val = INTVAL (*loc);
39545
39546 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39547 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39548 if ((val < 0 && val != -128)
39549 || val == 128)
39550 {
39551 *loc = GEN_INT (-val);
39552 return true;
39553 }
39554
39555 return false;
39556 }
39557
39558 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39559 optabs would emit if we didn't have TFmode patterns. */
39560
39561 void
39562 x86_emit_floatuns (rtx operands[2])
39563 {
39564 rtx_code_label *neglab, *donelab;
39565 rtx i0, i1, f0, in, out;
39566 enum machine_mode mode, inmode;
39567
39568 inmode = GET_MODE (operands[1]);
39569 gcc_assert (inmode == SImode || inmode == DImode);
39570
39571 out = operands[0];
39572 in = force_reg (inmode, operands[1]);
39573 mode = GET_MODE (out);
39574 neglab = gen_label_rtx ();
39575 donelab = gen_label_rtx ();
39576 f0 = gen_reg_rtx (mode);
39577
39578 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39579
39580 expand_float (out, in, 0);
39581
39582 emit_jump_insn (gen_jump (donelab));
39583 emit_barrier ();
39584
39585 emit_label (neglab);
39586
39587 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39588 1, OPTAB_DIRECT);
39589 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39590 1, OPTAB_DIRECT);
39591 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39592
39593 expand_float (f0, i0, 0);
39594
39595 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39596
39597 emit_label (donelab);
39598 }
39599 \f
39600 /* AVX512F does support 64-byte integer vector operations,
39601 thus the longest vector we are faced with is V64QImode. */
39602 #define MAX_VECT_LEN 64
39603
39604 struct expand_vec_perm_d
39605 {
39606 rtx target, op0, op1;
39607 unsigned char perm[MAX_VECT_LEN];
39608 enum machine_mode vmode;
39609 unsigned char nelt;
39610 bool one_operand_p;
39611 bool testing_p;
39612 };
39613
39614 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39615 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39616 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39617
39618 /* Get a vector mode of the same size as the original but with elements
39619 twice as wide. This is only guaranteed to apply to integral vectors. */
39620
39621 static inline enum machine_mode
39622 get_mode_wider_vector (enum machine_mode o)
39623 {
39624 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39625 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39626 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39627 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39628 return n;
39629 }
39630
39631 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39632 fill target with val via vec_duplicate. */
39633
39634 static bool
39635 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39636 {
39637 bool ok;
39638 rtx_insn *insn;
39639 rtx dup;
39640
39641 /* First attempt to recognize VAL as-is. */
39642 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39643 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39644 if (recog_memoized (insn) < 0)
39645 {
39646 rtx_insn *seq;
39647 /* If that fails, force VAL into a register. */
39648
39649 start_sequence ();
39650 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39651 seq = get_insns ();
39652 end_sequence ();
39653 if (seq)
39654 emit_insn_before (seq, insn);
39655
39656 ok = recog_memoized (insn) >= 0;
39657 gcc_assert (ok);
39658 }
39659 return true;
39660 }
39661
39662 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39663 with all elements equal to VAR. Return true if successful. */
39664
39665 static bool
39666 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39667 rtx target, rtx val)
39668 {
39669 bool ok;
39670
39671 switch (mode)
39672 {
39673 case V2SImode:
39674 case V2SFmode:
39675 if (!mmx_ok)
39676 return false;
39677 /* FALLTHRU */
39678
39679 case V4DFmode:
39680 case V4DImode:
39681 case V8SFmode:
39682 case V8SImode:
39683 case V2DFmode:
39684 case V2DImode:
39685 case V4SFmode:
39686 case V4SImode:
39687 case V16SImode:
39688 case V8DImode:
39689 case V16SFmode:
39690 case V8DFmode:
39691 return ix86_vector_duplicate_value (mode, target, val);
39692
39693 case V4HImode:
39694 if (!mmx_ok)
39695 return false;
39696 if (TARGET_SSE || TARGET_3DNOW_A)
39697 {
39698 rtx x;
39699
39700 val = gen_lowpart (SImode, val);
39701 x = gen_rtx_TRUNCATE (HImode, val);
39702 x = gen_rtx_VEC_DUPLICATE (mode, x);
39703 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39704 return true;
39705 }
39706 goto widen;
39707
39708 case V8QImode:
39709 if (!mmx_ok)
39710 return false;
39711 goto widen;
39712
39713 case V8HImode:
39714 if (TARGET_SSE2)
39715 {
39716 struct expand_vec_perm_d dperm;
39717 rtx tmp1, tmp2;
39718
39719 permute:
39720 memset (&dperm, 0, sizeof (dperm));
39721 dperm.target = target;
39722 dperm.vmode = mode;
39723 dperm.nelt = GET_MODE_NUNITS (mode);
39724 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39725 dperm.one_operand_p = true;
39726
39727 /* Extend to SImode using a paradoxical SUBREG. */
39728 tmp1 = gen_reg_rtx (SImode);
39729 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39730
39731 /* Insert the SImode value as low element of a V4SImode vector. */
39732 tmp2 = gen_reg_rtx (V4SImode);
39733 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39734 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39735
39736 ok = (expand_vec_perm_1 (&dperm)
39737 || expand_vec_perm_broadcast_1 (&dperm));
39738 gcc_assert (ok);
39739 return ok;
39740 }
39741 goto widen;
39742
39743 case V16QImode:
39744 if (TARGET_SSE2)
39745 goto permute;
39746 goto widen;
39747
39748 widen:
39749 /* Replicate the value once into the next wider mode and recurse. */
39750 {
39751 enum machine_mode smode, wsmode, wvmode;
39752 rtx x;
39753
39754 smode = GET_MODE_INNER (mode);
39755 wvmode = get_mode_wider_vector (mode);
39756 wsmode = GET_MODE_INNER (wvmode);
39757
39758 val = convert_modes (wsmode, smode, val, true);
39759 x = expand_simple_binop (wsmode, ASHIFT, val,
39760 GEN_INT (GET_MODE_BITSIZE (smode)),
39761 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39762 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39763
39764 x = gen_reg_rtx (wvmode);
39765 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39766 gcc_assert (ok);
39767 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39768 return ok;
39769 }
39770
39771 case V16HImode:
39772 case V32QImode:
39773 {
39774 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39775 rtx x = gen_reg_rtx (hvmode);
39776
39777 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39778 gcc_assert (ok);
39779
39780 x = gen_rtx_VEC_CONCAT (mode, x, x);
39781 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39782 }
39783 return true;
39784
39785 default:
39786 return false;
39787 }
39788 }
39789
39790 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39791 whose ONE_VAR element is VAR, and other elements are zero. Return true
39792 if successful. */
39793
39794 static bool
39795 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39796 rtx target, rtx var, int one_var)
39797 {
39798 enum machine_mode vsimode;
39799 rtx new_target;
39800 rtx x, tmp;
39801 bool use_vector_set = false;
39802
39803 switch (mode)
39804 {
39805 case V2DImode:
39806 /* For SSE4.1, we normally use vector set. But if the second
39807 element is zero and inter-unit moves are OK, we use movq
39808 instead. */
39809 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39810 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39811 && one_var == 0));
39812 break;
39813 case V16QImode:
39814 case V4SImode:
39815 case V4SFmode:
39816 use_vector_set = TARGET_SSE4_1;
39817 break;
39818 case V8HImode:
39819 use_vector_set = TARGET_SSE2;
39820 break;
39821 case V4HImode:
39822 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39823 break;
39824 case V32QImode:
39825 case V16HImode:
39826 case V8SImode:
39827 case V8SFmode:
39828 case V4DFmode:
39829 use_vector_set = TARGET_AVX;
39830 break;
39831 case V4DImode:
39832 /* Use ix86_expand_vector_set in 64bit mode only. */
39833 use_vector_set = TARGET_AVX && TARGET_64BIT;
39834 break;
39835 default:
39836 break;
39837 }
39838
39839 if (use_vector_set)
39840 {
39841 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39842 var = force_reg (GET_MODE_INNER (mode), var);
39843 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39844 return true;
39845 }
39846
39847 switch (mode)
39848 {
39849 case V2SFmode:
39850 case V2SImode:
39851 if (!mmx_ok)
39852 return false;
39853 /* FALLTHRU */
39854
39855 case V2DFmode:
39856 case V2DImode:
39857 if (one_var != 0)
39858 return false;
39859 var = force_reg (GET_MODE_INNER (mode), var);
39860 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39861 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39862 return true;
39863
39864 case V4SFmode:
39865 case V4SImode:
39866 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39867 new_target = gen_reg_rtx (mode);
39868 else
39869 new_target = target;
39870 var = force_reg (GET_MODE_INNER (mode), var);
39871 x = gen_rtx_VEC_DUPLICATE (mode, var);
39872 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39873 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39874 if (one_var != 0)
39875 {
39876 /* We need to shuffle the value to the correct position, so
39877 create a new pseudo to store the intermediate result. */
39878
39879 /* With SSE2, we can use the integer shuffle insns. */
39880 if (mode != V4SFmode && TARGET_SSE2)
39881 {
39882 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39883 const1_rtx,
39884 GEN_INT (one_var == 1 ? 0 : 1),
39885 GEN_INT (one_var == 2 ? 0 : 1),
39886 GEN_INT (one_var == 3 ? 0 : 1)));
39887 if (target != new_target)
39888 emit_move_insn (target, new_target);
39889 return true;
39890 }
39891
39892 /* Otherwise convert the intermediate result to V4SFmode and
39893 use the SSE1 shuffle instructions. */
39894 if (mode != V4SFmode)
39895 {
39896 tmp = gen_reg_rtx (V4SFmode);
39897 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39898 }
39899 else
39900 tmp = new_target;
39901
39902 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39903 const1_rtx,
39904 GEN_INT (one_var == 1 ? 0 : 1),
39905 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39906 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39907
39908 if (mode != V4SFmode)
39909 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39910 else if (tmp != target)
39911 emit_move_insn (target, tmp);
39912 }
39913 else if (target != new_target)
39914 emit_move_insn (target, new_target);
39915 return true;
39916
39917 case V8HImode:
39918 case V16QImode:
39919 vsimode = V4SImode;
39920 goto widen;
39921 case V4HImode:
39922 case V8QImode:
39923 if (!mmx_ok)
39924 return false;
39925 vsimode = V2SImode;
39926 goto widen;
39927 widen:
39928 if (one_var != 0)
39929 return false;
39930
39931 /* Zero extend the variable element to SImode and recurse. */
39932 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39933
39934 x = gen_reg_rtx (vsimode);
39935 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39936 var, one_var))
39937 gcc_unreachable ();
39938
39939 emit_move_insn (target, gen_lowpart (mode, x));
39940 return true;
39941
39942 default:
39943 return false;
39944 }
39945 }
39946
39947 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39948 consisting of the values in VALS. It is known that all elements
39949 except ONE_VAR are constants. Return true if successful. */
39950
39951 static bool
39952 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39953 rtx target, rtx vals, int one_var)
39954 {
39955 rtx var = XVECEXP (vals, 0, one_var);
39956 enum machine_mode wmode;
39957 rtx const_vec, x;
39958
39959 const_vec = copy_rtx (vals);
39960 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39961 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39962
39963 switch (mode)
39964 {
39965 case V2DFmode:
39966 case V2DImode:
39967 case V2SFmode:
39968 case V2SImode:
39969 /* For the two element vectors, it's just as easy to use
39970 the general case. */
39971 return false;
39972
39973 case V4DImode:
39974 /* Use ix86_expand_vector_set in 64bit mode only. */
39975 if (!TARGET_64BIT)
39976 return false;
39977 case V4DFmode:
39978 case V8SFmode:
39979 case V8SImode:
39980 case V16HImode:
39981 case V32QImode:
39982 case V4SFmode:
39983 case V4SImode:
39984 case V8HImode:
39985 case V4HImode:
39986 break;
39987
39988 case V16QImode:
39989 if (TARGET_SSE4_1)
39990 break;
39991 wmode = V8HImode;
39992 goto widen;
39993 case V8QImode:
39994 wmode = V4HImode;
39995 goto widen;
39996 widen:
39997 /* There's no way to set one QImode entry easily. Combine
39998 the variable value with its adjacent constant value, and
39999 promote to an HImode set. */
40000 x = XVECEXP (vals, 0, one_var ^ 1);
40001 if (one_var & 1)
40002 {
40003 var = convert_modes (HImode, QImode, var, true);
40004 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40005 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40006 x = GEN_INT (INTVAL (x) & 0xff);
40007 }
40008 else
40009 {
40010 var = convert_modes (HImode, QImode, var, true);
40011 x = gen_int_mode (INTVAL (x) << 8, HImode);
40012 }
40013 if (x != const0_rtx)
40014 var = expand_simple_binop (HImode, IOR, var, x, var,
40015 1, OPTAB_LIB_WIDEN);
40016
40017 x = gen_reg_rtx (wmode);
40018 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40019 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40020
40021 emit_move_insn (target, gen_lowpart (mode, x));
40022 return true;
40023
40024 default:
40025 return false;
40026 }
40027
40028 emit_move_insn (target, const_vec);
40029 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40030 return true;
40031 }
40032
40033 /* A subroutine of ix86_expand_vector_init_general. Use vector
40034 concatenate to handle the most general case: all values variable,
40035 and none identical. */
40036
40037 static void
40038 ix86_expand_vector_init_concat (enum machine_mode mode,
40039 rtx target, rtx *ops, int n)
40040 {
40041 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40042 rtx first[16], second[8], third[4];
40043 rtvec v;
40044 int i, j;
40045
40046 switch (n)
40047 {
40048 case 2:
40049 switch (mode)
40050 {
40051 case V16SImode:
40052 cmode = V8SImode;
40053 break;
40054 case V16SFmode:
40055 cmode = V8SFmode;
40056 break;
40057 case V8DImode:
40058 cmode = V4DImode;
40059 break;
40060 case V8DFmode:
40061 cmode = V4DFmode;
40062 break;
40063 case V8SImode:
40064 cmode = V4SImode;
40065 break;
40066 case V8SFmode:
40067 cmode = V4SFmode;
40068 break;
40069 case V4DImode:
40070 cmode = V2DImode;
40071 break;
40072 case V4DFmode:
40073 cmode = V2DFmode;
40074 break;
40075 case V4SImode:
40076 cmode = V2SImode;
40077 break;
40078 case V4SFmode:
40079 cmode = V2SFmode;
40080 break;
40081 case V2DImode:
40082 cmode = DImode;
40083 break;
40084 case V2SImode:
40085 cmode = SImode;
40086 break;
40087 case V2DFmode:
40088 cmode = DFmode;
40089 break;
40090 case V2SFmode:
40091 cmode = SFmode;
40092 break;
40093 default:
40094 gcc_unreachable ();
40095 }
40096
40097 if (!register_operand (ops[1], cmode))
40098 ops[1] = force_reg (cmode, ops[1]);
40099 if (!register_operand (ops[0], cmode))
40100 ops[0] = force_reg (cmode, ops[0]);
40101 emit_insn (gen_rtx_SET (VOIDmode, target,
40102 gen_rtx_VEC_CONCAT (mode, ops[0],
40103 ops[1])));
40104 break;
40105
40106 case 4:
40107 switch (mode)
40108 {
40109 case V4DImode:
40110 cmode = V2DImode;
40111 break;
40112 case V4DFmode:
40113 cmode = V2DFmode;
40114 break;
40115 case V4SImode:
40116 cmode = V2SImode;
40117 break;
40118 case V4SFmode:
40119 cmode = V2SFmode;
40120 break;
40121 default:
40122 gcc_unreachable ();
40123 }
40124 goto half;
40125
40126 case 8:
40127 switch (mode)
40128 {
40129 case V8DImode:
40130 cmode = V2DImode;
40131 hmode = V4DImode;
40132 break;
40133 case V8DFmode:
40134 cmode = V2DFmode;
40135 hmode = V4DFmode;
40136 break;
40137 case V8SImode:
40138 cmode = V2SImode;
40139 hmode = V4SImode;
40140 break;
40141 case V8SFmode:
40142 cmode = V2SFmode;
40143 hmode = V4SFmode;
40144 break;
40145 default:
40146 gcc_unreachable ();
40147 }
40148 goto half;
40149
40150 case 16:
40151 switch (mode)
40152 {
40153 case V16SImode:
40154 cmode = V2SImode;
40155 hmode = V4SImode;
40156 gmode = V8SImode;
40157 break;
40158 case V16SFmode:
40159 cmode = V2SFmode;
40160 hmode = V4SFmode;
40161 gmode = V8SFmode;
40162 break;
40163 default:
40164 gcc_unreachable ();
40165 }
40166 goto half;
40167
40168 half:
40169 /* FIXME: We process inputs backward to help RA. PR 36222. */
40170 i = n - 1;
40171 j = (n >> 1) - 1;
40172 for (; i > 0; i -= 2, j--)
40173 {
40174 first[j] = gen_reg_rtx (cmode);
40175 v = gen_rtvec (2, ops[i - 1], ops[i]);
40176 ix86_expand_vector_init (false, first[j],
40177 gen_rtx_PARALLEL (cmode, v));
40178 }
40179
40180 n >>= 1;
40181 if (n > 4)
40182 {
40183 gcc_assert (hmode != VOIDmode);
40184 gcc_assert (gmode != VOIDmode);
40185 for (i = j = 0; i < n; i += 2, j++)
40186 {
40187 second[j] = gen_reg_rtx (hmode);
40188 ix86_expand_vector_init_concat (hmode, second [j],
40189 &first [i], 2);
40190 }
40191 n >>= 1;
40192 for (i = j = 0; i < n; i += 2, j++)
40193 {
40194 third[j] = gen_reg_rtx (gmode);
40195 ix86_expand_vector_init_concat (gmode, third[j],
40196 &second[i], 2);
40197 }
40198 n >>= 1;
40199 ix86_expand_vector_init_concat (mode, target, third, n);
40200 }
40201 else if (n > 2)
40202 {
40203 gcc_assert (hmode != VOIDmode);
40204 for (i = j = 0; i < n; i += 2, j++)
40205 {
40206 second[j] = gen_reg_rtx (hmode);
40207 ix86_expand_vector_init_concat (hmode, second [j],
40208 &first [i], 2);
40209 }
40210 n >>= 1;
40211 ix86_expand_vector_init_concat (mode, target, second, n);
40212 }
40213 else
40214 ix86_expand_vector_init_concat (mode, target, first, n);
40215 break;
40216
40217 default:
40218 gcc_unreachable ();
40219 }
40220 }
40221
40222 /* A subroutine of ix86_expand_vector_init_general. Use vector
40223 interleave to handle the most general case: all values variable,
40224 and none identical. */
40225
40226 static void
40227 ix86_expand_vector_init_interleave (enum machine_mode mode,
40228 rtx target, rtx *ops, int n)
40229 {
40230 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40231 int i, j;
40232 rtx op0, op1;
40233 rtx (*gen_load_even) (rtx, rtx, rtx);
40234 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40235 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40236
40237 switch (mode)
40238 {
40239 case V8HImode:
40240 gen_load_even = gen_vec_setv8hi;
40241 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40242 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40243 inner_mode = HImode;
40244 first_imode = V4SImode;
40245 second_imode = V2DImode;
40246 third_imode = VOIDmode;
40247 break;
40248 case V16QImode:
40249 gen_load_even = gen_vec_setv16qi;
40250 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40251 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40252 inner_mode = QImode;
40253 first_imode = V8HImode;
40254 second_imode = V4SImode;
40255 third_imode = V2DImode;
40256 break;
40257 default:
40258 gcc_unreachable ();
40259 }
40260
40261 for (i = 0; i < n; i++)
40262 {
40263 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40264 op0 = gen_reg_rtx (SImode);
40265 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40266
40267 /* Insert the SImode value as low element of V4SImode vector. */
40268 op1 = gen_reg_rtx (V4SImode);
40269 op0 = gen_rtx_VEC_MERGE (V4SImode,
40270 gen_rtx_VEC_DUPLICATE (V4SImode,
40271 op0),
40272 CONST0_RTX (V4SImode),
40273 const1_rtx);
40274 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40275
40276 /* Cast the V4SImode vector back to a vector in orignal mode. */
40277 op0 = gen_reg_rtx (mode);
40278 emit_move_insn (op0, gen_lowpart (mode, op1));
40279
40280 /* Load even elements into the second position. */
40281 emit_insn (gen_load_even (op0,
40282 force_reg (inner_mode,
40283 ops [i + i + 1]),
40284 const1_rtx));
40285
40286 /* Cast vector to FIRST_IMODE vector. */
40287 ops[i] = gen_reg_rtx (first_imode);
40288 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40289 }
40290
40291 /* Interleave low FIRST_IMODE vectors. */
40292 for (i = j = 0; i < n; i += 2, j++)
40293 {
40294 op0 = gen_reg_rtx (first_imode);
40295 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40296
40297 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40298 ops[j] = gen_reg_rtx (second_imode);
40299 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40300 }
40301
40302 /* Interleave low SECOND_IMODE vectors. */
40303 switch (second_imode)
40304 {
40305 case V4SImode:
40306 for (i = j = 0; i < n / 2; i += 2, j++)
40307 {
40308 op0 = gen_reg_rtx (second_imode);
40309 emit_insn (gen_interleave_second_low (op0, ops[i],
40310 ops[i + 1]));
40311
40312 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40313 vector. */
40314 ops[j] = gen_reg_rtx (third_imode);
40315 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40316 }
40317 second_imode = V2DImode;
40318 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40319 /* FALLTHRU */
40320
40321 case V2DImode:
40322 op0 = gen_reg_rtx (second_imode);
40323 emit_insn (gen_interleave_second_low (op0, ops[0],
40324 ops[1]));
40325
40326 /* Cast the SECOND_IMODE vector back to a vector on original
40327 mode. */
40328 emit_insn (gen_rtx_SET (VOIDmode, target,
40329 gen_lowpart (mode, op0)));
40330 break;
40331
40332 default:
40333 gcc_unreachable ();
40334 }
40335 }
40336
40337 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40338 all values variable, and none identical. */
40339
40340 static void
40341 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40342 rtx target, rtx vals)
40343 {
40344 rtx ops[64], op0, op1;
40345 enum machine_mode half_mode = VOIDmode;
40346 int n, i;
40347
40348 switch (mode)
40349 {
40350 case V2SFmode:
40351 case V2SImode:
40352 if (!mmx_ok && !TARGET_SSE)
40353 break;
40354 /* FALLTHRU */
40355
40356 case V16SImode:
40357 case V16SFmode:
40358 case V8DFmode:
40359 case V8DImode:
40360 case V8SFmode:
40361 case V8SImode:
40362 case V4DFmode:
40363 case V4DImode:
40364 case V4SFmode:
40365 case V4SImode:
40366 case V2DFmode:
40367 case V2DImode:
40368 n = GET_MODE_NUNITS (mode);
40369 for (i = 0; i < n; i++)
40370 ops[i] = XVECEXP (vals, 0, i);
40371 ix86_expand_vector_init_concat (mode, target, ops, n);
40372 return;
40373
40374 case V32QImode:
40375 half_mode = V16QImode;
40376 goto half;
40377
40378 case V16HImode:
40379 half_mode = V8HImode;
40380 goto half;
40381
40382 half:
40383 n = GET_MODE_NUNITS (mode);
40384 for (i = 0; i < n; i++)
40385 ops[i] = XVECEXP (vals, 0, i);
40386 op0 = gen_reg_rtx (half_mode);
40387 op1 = gen_reg_rtx (half_mode);
40388 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40389 n >> 2);
40390 ix86_expand_vector_init_interleave (half_mode, op1,
40391 &ops [n >> 1], n >> 2);
40392 emit_insn (gen_rtx_SET (VOIDmode, target,
40393 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40394 return;
40395
40396 case V16QImode:
40397 if (!TARGET_SSE4_1)
40398 break;
40399 /* FALLTHRU */
40400
40401 case V8HImode:
40402 if (!TARGET_SSE2)
40403 break;
40404
40405 /* Don't use ix86_expand_vector_init_interleave if we can't
40406 move from GPR to SSE register directly. */
40407 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40408 break;
40409
40410 n = GET_MODE_NUNITS (mode);
40411 for (i = 0; i < n; i++)
40412 ops[i] = XVECEXP (vals, 0, i);
40413 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40414 return;
40415
40416 case V4HImode:
40417 case V8QImode:
40418 break;
40419
40420 default:
40421 gcc_unreachable ();
40422 }
40423
40424 {
40425 int i, j, n_elts, n_words, n_elt_per_word;
40426 enum machine_mode inner_mode;
40427 rtx words[4], shift;
40428
40429 inner_mode = GET_MODE_INNER (mode);
40430 n_elts = GET_MODE_NUNITS (mode);
40431 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40432 n_elt_per_word = n_elts / n_words;
40433 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40434
40435 for (i = 0; i < n_words; ++i)
40436 {
40437 rtx word = NULL_RTX;
40438
40439 for (j = 0; j < n_elt_per_word; ++j)
40440 {
40441 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40442 elt = convert_modes (word_mode, inner_mode, elt, true);
40443
40444 if (j == 0)
40445 word = elt;
40446 else
40447 {
40448 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40449 word, 1, OPTAB_LIB_WIDEN);
40450 word = expand_simple_binop (word_mode, IOR, word, elt,
40451 word, 1, OPTAB_LIB_WIDEN);
40452 }
40453 }
40454
40455 words[i] = word;
40456 }
40457
40458 if (n_words == 1)
40459 emit_move_insn (target, gen_lowpart (mode, words[0]));
40460 else if (n_words == 2)
40461 {
40462 rtx tmp = gen_reg_rtx (mode);
40463 emit_clobber (tmp);
40464 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40465 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40466 emit_move_insn (target, tmp);
40467 }
40468 else if (n_words == 4)
40469 {
40470 rtx tmp = gen_reg_rtx (V4SImode);
40471 gcc_assert (word_mode == SImode);
40472 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40473 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40474 emit_move_insn (target, gen_lowpart (mode, tmp));
40475 }
40476 else
40477 gcc_unreachable ();
40478 }
40479 }
40480
40481 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40482 instructions unless MMX_OK is true. */
40483
40484 void
40485 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40486 {
40487 enum machine_mode mode = GET_MODE (target);
40488 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40489 int n_elts = GET_MODE_NUNITS (mode);
40490 int n_var = 0, one_var = -1;
40491 bool all_same = true, all_const_zero = true;
40492 int i;
40493 rtx x;
40494
40495 for (i = 0; i < n_elts; ++i)
40496 {
40497 x = XVECEXP (vals, 0, i);
40498 if (!(CONST_INT_P (x)
40499 || GET_CODE (x) == CONST_DOUBLE
40500 || GET_CODE (x) == CONST_FIXED))
40501 n_var++, one_var = i;
40502 else if (x != CONST0_RTX (inner_mode))
40503 all_const_zero = false;
40504 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40505 all_same = false;
40506 }
40507
40508 /* Constants are best loaded from the constant pool. */
40509 if (n_var == 0)
40510 {
40511 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40512 return;
40513 }
40514
40515 /* If all values are identical, broadcast the value. */
40516 if (all_same
40517 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40518 XVECEXP (vals, 0, 0)))
40519 return;
40520
40521 /* Values where only one field is non-constant are best loaded from
40522 the pool and overwritten via move later. */
40523 if (n_var == 1)
40524 {
40525 if (all_const_zero
40526 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40527 XVECEXP (vals, 0, one_var),
40528 one_var))
40529 return;
40530
40531 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40532 return;
40533 }
40534
40535 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40536 }
40537
40538 void
40539 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40540 {
40541 enum machine_mode mode = GET_MODE (target);
40542 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40543 enum machine_mode half_mode;
40544 bool use_vec_merge = false;
40545 rtx tmp;
40546 static rtx (*gen_extract[6][2]) (rtx, rtx)
40547 = {
40548 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40549 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40550 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40551 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40552 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40553 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40554 };
40555 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40556 = {
40557 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40558 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40559 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40560 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40561 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40562 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40563 };
40564 int i, j, n;
40565
40566 switch (mode)
40567 {
40568 case V2SFmode:
40569 case V2SImode:
40570 if (mmx_ok)
40571 {
40572 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40573 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40574 if (elt == 0)
40575 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40576 else
40577 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40578 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40579 return;
40580 }
40581 break;
40582
40583 case V2DImode:
40584 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40585 if (use_vec_merge)
40586 break;
40587
40588 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40589 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40590 if (elt == 0)
40591 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40592 else
40593 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40594 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40595 return;
40596
40597 case V2DFmode:
40598 {
40599 rtx op0, op1;
40600
40601 /* For the two element vectors, we implement a VEC_CONCAT with
40602 the extraction of the other element. */
40603
40604 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40605 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40606
40607 if (elt == 0)
40608 op0 = val, op1 = tmp;
40609 else
40610 op0 = tmp, op1 = val;
40611
40612 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40613 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40614 }
40615 return;
40616
40617 case V4SFmode:
40618 use_vec_merge = TARGET_SSE4_1;
40619 if (use_vec_merge)
40620 break;
40621
40622 switch (elt)
40623 {
40624 case 0:
40625 use_vec_merge = true;
40626 break;
40627
40628 case 1:
40629 /* tmp = target = A B C D */
40630 tmp = copy_to_reg (target);
40631 /* target = A A B B */
40632 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40633 /* target = X A B B */
40634 ix86_expand_vector_set (false, target, val, 0);
40635 /* target = A X C D */
40636 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40637 const1_rtx, const0_rtx,
40638 GEN_INT (2+4), GEN_INT (3+4)));
40639 return;
40640
40641 case 2:
40642 /* tmp = target = A B C D */
40643 tmp = copy_to_reg (target);
40644 /* tmp = X B C D */
40645 ix86_expand_vector_set (false, tmp, val, 0);
40646 /* target = A B X D */
40647 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40648 const0_rtx, const1_rtx,
40649 GEN_INT (0+4), GEN_INT (3+4)));
40650 return;
40651
40652 case 3:
40653 /* tmp = target = A B C D */
40654 tmp = copy_to_reg (target);
40655 /* tmp = X B C D */
40656 ix86_expand_vector_set (false, tmp, val, 0);
40657 /* target = A B X D */
40658 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40659 const0_rtx, const1_rtx,
40660 GEN_INT (2+4), GEN_INT (0+4)));
40661 return;
40662
40663 default:
40664 gcc_unreachable ();
40665 }
40666 break;
40667
40668 case V4SImode:
40669 use_vec_merge = TARGET_SSE4_1;
40670 if (use_vec_merge)
40671 break;
40672
40673 /* Element 0 handled by vec_merge below. */
40674 if (elt == 0)
40675 {
40676 use_vec_merge = true;
40677 break;
40678 }
40679
40680 if (TARGET_SSE2)
40681 {
40682 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40683 store into element 0, then shuffle them back. */
40684
40685 rtx order[4];
40686
40687 order[0] = GEN_INT (elt);
40688 order[1] = const1_rtx;
40689 order[2] = const2_rtx;
40690 order[3] = GEN_INT (3);
40691 order[elt] = const0_rtx;
40692
40693 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40694 order[1], order[2], order[3]));
40695
40696 ix86_expand_vector_set (false, target, val, 0);
40697
40698 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40699 order[1], order[2], order[3]));
40700 }
40701 else
40702 {
40703 /* For SSE1, we have to reuse the V4SF code. */
40704 rtx t = gen_reg_rtx (V4SFmode);
40705 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40706 emit_move_insn (target, gen_lowpart (mode, t));
40707 }
40708 return;
40709
40710 case V8HImode:
40711 use_vec_merge = TARGET_SSE2;
40712 break;
40713 case V4HImode:
40714 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40715 break;
40716
40717 case V16QImode:
40718 use_vec_merge = TARGET_SSE4_1;
40719 break;
40720
40721 case V8QImode:
40722 break;
40723
40724 case V32QImode:
40725 half_mode = V16QImode;
40726 j = 0;
40727 n = 16;
40728 goto half;
40729
40730 case V16HImode:
40731 half_mode = V8HImode;
40732 j = 1;
40733 n = 8;
40734 goto half;
40735
40736 case V8SImode:
40737 half_mode = V4SImode;
40738 j = 2;
40739 n = 4;
40740 goto half;
40741
40742 case V4DImode:
40743 half_mode = V2DImode;
40744 j = 3;
40745 n = 2;
40746 goto half;
40747
40748 case V8SFmode:
40749 half_mode = V4SFmode;
40750 j = 4;
40751 n = 4;
40752 goto half;
40753
40754 case V4DFmode:
40755 half_mode = V2DFmode;
40756 j = 5;
40757 n = 2;
40758 goto half;
40759
40760 half:
40761 /* Compute offset. */
40762 i = elt / n;
40763 elt %= n;
40764
40765 gcc_assert (i <= 1);
40766
40767 /* Extract the half. */
40768 tmp = gen_reg_rtx (half_mode);
40769 emit_insn (gen_extract[j][i] (tmp, target));
40770
40771 /* Put val in tmp at elt. */
40772 ix86_expand_vector_set (false, tmp, val, elt);
40773
40774 /* Put it back. */
40775 emit_insn (gen_insert[j][i] (target, target, tmp));
40776 return;
40777
40778 default:
40779 break;
40780 }
40781
40782 if (use_vec_merge)
40783 {
40784 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40785 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40786 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40787 }
40788 else
40789 {
40790 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40791
40792 emit_move_insn (mem, target);
40793
40794 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40795 emit_move_insn (tmp, val);
40796
40797 emit_move_insn (target, mem);
40798 }
40799 }
40800
40801 void
40802 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40803 {
40804 enum machine_mode mode = GET_MODE (vec);
40805 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40806 bool use_vec_extr = false;
40807 rtx tmp;
40808
40809 switch (mode)
40810 {
40811 case V2SImode:
40812 case V2SFmode:
40813 if (!mmx_ok)
40814 break;
40815 /* FALLTHRU */
40816
40817 case V2DFmode:
40818 case V2DImode:
40819 use_vec_extr = true;
40820 break;
40821
40822 case V4SFmode:
40823 use_vec_extr = TARGET_SSE4_1;
40824 if (use_vec_extr)
40825 break;
40826
40827 switch (elt)
40828 {
40829 case 0:
40830 tmp = vec;
40831 break;
40832
40833 case 1:
40834 case 3:
40835 tmp = gen_reg_rtx (mode);
40836 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40837 GEN_INT (elt), GEN_INT (elt),
40838 GEN_INT (elt+4), GEN_INT (elt+4)));
40839 break;
40840
40841 case 2:
40842 tmp = gen_reg_rtx (mode);
40843 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40844 break;
40845
40846 default:
40847 gcc_unreachable ();
40848 }
40849 vec = tmp;
40850 use_vec_extr = true;
40851 elt = 0;
40852 break;
40853
40854 case V4SImode:
40855 use_vec_extr = TARGET_SSE4_1;
40856 if (use_vec_extr)
40857 break;
40858
40859 if (TARGET_SSE2)
40860 {
40861 switch (elt)
40862 {
40863 case 0:
40864 tmp = vec;
40865 break;
40866
40867 case 1:
40868 case 3:
40869 tmp = gen_reg_rtx (mode);
40870 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40871 GEN_INT (elt), GEN_INT (elt),
40872 GEN_INT (elt), GEN_INT (elt)));
40873 break;
40874
40875 case 2:
40876 tmp = gen_reg_rtx (mode);
40877 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40878 break;
40879
40880 default:
40881 gcc_unreachable ();
40882 }
40883 vec = tmp;
40884 use_vec_extr = true;
40885 elt = 0;
40886 }
40887 else
40888 {
40889 /* For SSE1, we have to reuse the V4SF code. */
40890 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40891 gen_lowpart (V4SFmode, vec), elt);
40892 return;
40893 }
40894 break;
40895
40896 case V8HImode:
40897 use_vec_extr = TARGET_SSE2;
40898 break;
40899 case V4HImode:
40900 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40901 break;
40902
40903 case V16QImode:
40904 use_vec_extr = TARGET_SSE4_1;
40905 break;
40906
40907 case V8SFmode:
40908 if (TARGET_AVX)
40909 {
40910 tmp = gen_reg_rtx (V4SFmode);
40911 if (elt < 4)
40912 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40913 else
40914 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40915 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40916 return;
40917 }
40918 break;
40919
40920 case V4DFmode:
40921 if (TARGET_AVX)
40922 {
40923 tmp = gen_reg_rtx (V2DFmode);
40924 if (elt < 2)
40925 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40926 else
40927 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40928 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40929 return;
40930 }
40931 break;
40932
40933 case V32QImode:
40934 if (TARGET_AVX)
40935 {
40936 tmp = gen_reg_rtx (V16QImode);
40937 if (elt < 16)
40938 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40939 else
40940 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40941 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40942 return;
40943 }
40944 break;
40945
40946 case V16HImode:
40947 if (TARGET_AVX)
40948 {
40949 tmp = gen_reg_rtx (V8HImode);
40950 if (elt < 8)
40951 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40952 else
40953 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40954 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40955 return;
40956 }
40957 break;
40958
40959 case V8SImode:
40960 if (TARGET_AVX)
40961 {
40962 tmp = gen_reg_rtx (V4SImode);
40963 if (elt < 4)
40964 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40965 else
40966 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40967 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40968 return;
40969 }
40970 break;
40971
40972 case V4DImode:
40973 if (TARGET_AVX)
40974 {
40975 tmp = gen_reg_rtx (V2DImode);
40976 if (elt < 2)
40977 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40978 else
40979 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40980 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40981 return;
40982 }
40983 break;
40984
40985 case V16SFmode:
40986 tmp = gen_reg_rtx (V8SFmode);
40987 if (elt < 8)
40988 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40989 else
40990 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40991 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40992 return;
40993
40994 case V8DFmode:
40995 tmp = gen_reg_rtx (V4DFmode);
40996 if (elt < 4)
40997 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40998 else
40999 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41000 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41001 return;
41002
41003 case V16SImode:
41004 tmp = gen_reg_rtx (V8SImode);
41005 if (elt < 8)
41006 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41007 else
41008 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41009 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41010 return;
41011
41012 case V8DImode:
41013 tmp = gen_reg_rtx (V4DImode);
41014 if (elt < 4)
41015 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41016 else
41017 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41018 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41019 return;
41020
41021 case V8QImode:
41022 /* ??? Could extract the appropriate HImode element and shift. */
41023 default:
41024 break;
41025 }
41026
41027 if (use_vec_extr)
41028 {
41029 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41030 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41031
41032 /* Let the rtl optimizers know about the zero extension performed. */
41033 if (inner_mode == QImode || inner_mode == HImode)
41034 {
41035 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41036 target = gen_lowpart (SImode, target);
41037 }
41038
41039 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41040 }
41041 else
41042 {
41043 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41044
41045 emit_move_insn (mem, vec);
41046
41047 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41048 emit_move_insn (target, tmp);
41049 }
41050 }
41051
41052 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41053 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41054 The upper bits of DEST are undefined, though they shouldn't cause
41055 exceptions (some bits from src or all zeros are ok). */
41056
41057 static void
41058 emit_reduc_half (rtx dest, rtx src, int i)
41059 {
41060 rtx tem, d = dest;
41061 switch (GET_MODE (src))
41062 {
41063 case V4SFmode:
41064 if (i == 128)
41065 tem = gen_sse_movhlps (dest, src, src);
41066 else
41067 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41068 GEN_INT (1 + 4), GEN_INT (1 + 4));
41069 break;
41070 case V2DFmode:
41071 tem = gen_vec_interleave_highv2df (dest, src, src);
41072 break;
41073 case V16QImode:
41074 case V8HImode:
41075 case V4SImode:
41076 case V2DImode:
41077 d = gen_reg_rtx (V1TImode);
41078 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41079 GEN_INT (i / 2));
41080 break;
41081 case V8SFmode:
41082 if (i == 256)
41083 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41084 else
41085 tem = gen_avx_shufps256 (dest, src, src,
41086 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41087 break;
41088 case V4DFmode:
41089 if (i == 256)
41090 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41091 else
41092 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41093 break;
41094 case V32QImode:
41095 case V16HImode:
41096 case V8SImode:
41097 case V4DImode:
41098 if (i == 256)
41099 {
41100 if (GET_MODE (dest) != V4DImode)
41101 d = gen_reg_rtx (V4DImode);
41102 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41103 gen_lowpart (V4DImode, src),
41104 const1_rtx);
41105 }
41106 else
41107 {
41108 d = gen_reg_rtx (V2TImode);
41109 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41110 GEN_INT (i / 2));
41111 }
41112 break;
41113 case V16SImode:
41114 case V16SFmode:
41115 case V8DImode:
41116 case V8DFmode:
41117 if (i > 128)
41118 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41119 gen_lowpart (V16SImode, src),
41120 gen_lowpart (V16SImode, src),
41121 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41122 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41123 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41124 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41125 GEN_INT (0xC), GEN_INT (0xD),
41126 GEN_INT (0xE), GEN_INT (0xF),
41127 GEN_INT (0x10), GEN_INT (0x11),
41128 GEN_INT (0x12), GEN_INT (0x13),
41129 GEN_INT (0x14), GEN_INT (0x15),
41130 GEN_INT (0x16), GEN_INT (0x17));
41131 else
41132 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41133 gen_lowpart (V16SImode, src),
41134 GEN_INT (i == 128 ? 0x2 : 0x1),
41135 GEN_INT (0x3),
41136 GEN_INT (0x3),
41137 GEN_INT (0x3),
41138 GEN_INT (i == 128 ? 0x6 : 0x5),
41139 GEN_INT (0x7),
41140 GEN_INT (0x7),
41141 GEN_INT (0x7),
41142 GEN_INT (i == 128 ? 0xA : 0x9),
41143 GEN_INT (0xB),
41144 GEN_INT (0xB),
41145 GEN_INT (0xB),
41146 GEN_INT (i == 128 ? 0xE : 0xD),
41147 GEN_INT (0xF),
41148 GEN_INT (0xF),
41149 GEN_INT (0xF));
41150 break;
41151 default:
41152 gcc_unreachable ();
41153 }
41154 emit_insn (tem);
41155 if (d != dest)
41156 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41157 }
41158
41159 /* Expand a vector reduction. FN is the binary pattern to reduce;
41160 DEST is the destination; IN is the input vector. */
41161
41162 void
41163 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41164 {
41165 rtx half, dst, vec = in;
41166 enum machine_mode mode = GET_MODE (in);
41167 int i;
41168
41169 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41170 if (TARGET_SSE4_1
41171 && mode == V8HImode
41172 && fn == gen_uminv8hi3)
41173 {
41174 emit_insn (gen_sse4_1_phminposuw (dest, in));
41175 return;
41176 }
41177
41178 for (i = GET_MODE_BITSIZE (mode);
41179 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41180 i >>= 1)
41181 {
41182 half = gen_reg_rtx (mode);
41183 emit_reduc_half (half, vec, i);
41184 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41185 dst = dest;
41186 else
41187 dst = gen_reg_rtx (mode);
41188 emit_insn (fn (dst, half, vec));
41189 vec = dst;
41190 }
41191 }
41192 \f
41193 /* Target hook for scalar_mode_supported_p. */
41194 static bool
41195 ix86_scalar_mode_supported_p (enum machine_mode mode)
41196 {
41197 if (DECIMAL_FLOAT_MODE_P (mode))
41198 return default_decimal_float_supported_p ();
41199 else if (mode == TFmode)
41200 return true;
41201 else
41202 return default_scalar_mode_supported_p (mode);
41203 }
41204
41205 /* Implements target hook vector_mode_supported_p. */
41206 static bool
41207 ix86_vector_mode_supported_p (enum machine_mode mode)
41208 {
41209 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41210 return true;
41211 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41212 return true;
41213 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41214 return true;
41215 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41216 return true;
41217 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41218 return true;
41219 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41220 return true;
41221 return false;
41222 }
41223
41224 /* Target hook for c_mode_for_suffix. */
41225 static enum machine_mode
41226 ix86_c_mode_for_suffix (char suffix)
41227 {
41228 if (suffix == 'q')
41229 return TFmode;
41230 if (suffix == 'w')
41231 return XFmode;
41232
41233 return VOIDmode;
41234 }
41235
41236 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41237
41238 We do this in the new i386 backend to maintain source compatibility
41239 with the old cc0-based compiler. */
41240
41241 static tree
41242 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41243 {
41244 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41245 clobbers);
41246 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41247 clobbers);
41248 return clobbers;
41249 }
41250
41251 /* Implements target vector targetm.asm.encode_section_info. */
41252
41253 static void ATTRIBUTE_UNUSED
41254 ix86_encode_section_info (tree decl, rtx rtl, int first)
41255 {
41256 default_encode_section_info (decl, rtl, first);
41257
41258 if (TREE_CODE (decl) == VAR_DECL
41259 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41260 && ix86_in_large_data_p (decl))
41261 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41262 }
41263
41264 /* Worker function for REVERSE_CONDITION. */
41265
41266 enum rtx_code
41267 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41268 {
41269 return (mode != CCFPmode && mode != CCFPUmode
41270 ? reverse_condition (code)
41271 : reverse_condition_maybe_unordered (code));
41272 }
41273
41274 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41275 to OPERANDS[0]. */
41276
41277 const char *
41278 output_387_reg_move (rtx insn, rtx *operands)
41279 {
41280 if (REG_P (operands[0]))
41281 {
41282 if (REG_P (operands[1])
41283 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41284 {
41285 if (REGNO (operands[0]) == FIRST_STACK_REG)
41286 return output_387_ffreep (operands, 0);
41287 return "fstp\t%y0";
41288 }
41289 if (STACK_TOP_P (operands[0]))
41290 return "fld%Z1\t%y1";
41291 return "fst\t%y0";
41292 }
41293 else if (MEM_P (operands[0]))
41294 {
41295 gcc_assert (REG_P (operands[1]));
41296 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41297 return "fstp%Z0\t%y0";
41298 else
41299 {
41300 /* There is no non-popping store to memory for XFmode.
41301 So if we need one, follow the store with a load. */
41302 if (GET_MODE (operands[0]) == XFmode)
41303 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41304 else
41305 return "fst%Z0\t%y0";
41306 }
41307 }
41308 else
41309 gcc_unreachable();
41310 }
41311
41312 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41313 FP status register is set. */
41314
41315 void
41316 ix86_emit_fp_unordered_jump (rtx label)
41317 {
41318 rtx reg = gen_reg_rtx (HImode);
41319 rtx temp;
41320
41321 emit_insn (gen_x86_fnstsw_1 (reg));
41322
41323 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41324 {
41325 emit_insn (gen_x86_sahf_1 (reg));
41326
41327 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41328 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41329 }
41330 else
41331 {
41332 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41333
41334 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41335 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41336 }
41337
41338 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41339 gen_rtx_LABEL_REF (VOIDmode, label),
41340 pc_rtx);
41341 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41342
41343 emit_jump_insn (temp);
41344 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41345 }
41346
41347 /* Output code to perform a log1p XFmode calculation. */
41348
41349 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41350 {
41351 rtx_code_label *label1 = gen_label_rtx ();
41352 rtx_code_label *label2 = gen_label_rtx ();
41353
41354 rtx tmp = gen_reg_rtx (XFmode);
41355 rtx tmp2 = gen_reg_rtx (XFmode);
41356 rtx test;
41357
41358 emit_insn (gen_absxf2 (tmp, op1));
41359 test = gen_rtx_GE (VOIDmode, tmp,
41360 CONST_DOUBLE_FROM_REAL_VALUE (
41361 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41362 XFmode));
41363 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41364
41365 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41366 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41367 emit_jump (label2);
41368
41369 emit_label (label1);
41370 emit_move_insn (tmp, CONST1_RTX (XFmode));
41371 emit_insn (gen_addxf3 (tmp, op1, tmp));
41372 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41373 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41374
41375 emit_label (label2);
41376 }
41377
41378 /* Emit code for round calculation. */
41379 void ix86_emit_i387_round (rtx op0, rtx op1)
41380 {
41381 enum machine_mode inmode = GET_MODE (op1);
41382 enum machine_mode outmode = GET_MODE (op0);
41383 rtx e1, e2, res, tmp, tmp1, half;
41384 rtx scratch = gen_reg_rtx (HImode);
41385 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41386 rtx_code_label *jump_label = gen_label_rtx ();
41387 rtx insn;
41388 rtx (*gen_abs) (rtx, rtx);
41389 rtx (*gen_neg) (rtx, rtx);
41390
41391 switch (inmode)
41392 {
41393 case SFmode:
41394 gen_abs = gen_abssf2;
41395 break;
41396 case DFmode:
41397 gen_abs = gen_absdf2;
41398 break;
41399 case XFmode:
41400 gen_abs = gen_absxf2;
41401 break;
41402 default:
41403 gcc_unreachable ();
41404 }
41405
41406 switch (outmode)
41407 {
41408 case SFmode:
41409 gen_neg = gen_negsf2;
41410 break;
41411 case DFmode:
41412 gen_neg = gen_negdf2;
41413 break;
41414 case XFmode:
41415 gen_neg = gen_negxf2;
41416 break;
41417 case HImode:
41418 gen_neg = gen_neghi2;
41419 break;
41420 case SImode:
41421 gen_neg = gen_negsi2;
41422 break;
41423 case DImode:
41424 gen_neg = gen_negdi2;
41425 break;
41426 default:
41427 gcc_unreachable ();
41428 }
41429
41430 e1 = gen_reg_rtx (inmode);
41431 e2 = gen_reg_rtx (inmode);
41432 res = gen_reg_rtx (outmode);
41433
41434 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41435
41436 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41437
41438 /* scratch = fxam(op1) */
41439 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41440 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41441 UNSPEC_FXAM)));
41442 /* e1 = fabs(op1) */
41443 emit_insn (gen_abs (e1, op1));
41444
41445 /* e2 = e1 + 0.5 */
41446 half = force_reg (inmode, half);
41447 emit_insn (gen_rtx_SET (VOIDmode, e2,
41448 gen_rtx_PLUS (inmode, e1, half)));
41449
41450 /* res = floor(e2) */
41451 if (inmode != XFmode)
41452 {
41453 tmp1 = gen_reg_rtx (XFmode);
41454
41455 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41456 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41457 }
41458 else
41459 tmp1 = e2;
41460
41461 switch (outmode)
41462 {
41463 case SFmode:
41464 case DFmode:
41465 {
41466 rtx tmp0 = gen_reg_rtx (XFmode);
41467
41468 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41469
41470 emit_insn (gen_rtx_SET (VOIDmode, res,
41471 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41472 UNSPEC_TRUNC_NOOP)));
41473 }
41474 break;
41475 case XFmode:
41476 emit_insn (gen_frndintxf2_floor (res, tmp1));
41477 break;
41478 case HImode:
41479 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41480 break;
41481 case SImode:
41482 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41483 break;
41484 case DImode:
41485 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41486 break;
41487 default:
41488 gcc_unreachable ();
41489 }
41490
41491 /* flags = signbit(a) */
41492 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41493
41494 /* if (flags) then res = -res */
41495 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41496 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41497 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41498 pc_rtx);
41499 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41500 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41501 JUMP_LABEL (insn) = jump_label;
41502
41503 emit_insn (gen_neg (res, res));
41504
41505 emit_label (jump_label);
41506 LABEL_NUSES (jump_label) = 1;
41507
41508 emit_move_insn (op0, res);
41509 }
41510
41511 /* Output code to perform a Newton-Rhapson approximation of a single precision
41512 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41513
41514 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41515 {
41516 rtx x0, x1, e0, e1;
41517
41518 x0 = gen_reg_rtx (mode);
41519 e0 = gen_reg_rtx (mode);
41520 e1 = gen_reg_rtx (mode);
41521 x1 = gen_reg_rtx (mode);
41522
41523 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41524
41525 b = force_reg (mode, b);
41526
41527 /* x0 = rcp(b) estimate */
41528 if (mode == V16SFmode || mode == V8DFmode)
41529 emit_insn (gen_rtx_SET (VOIDmode, x0,
41530 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41531 UNSPEC_RCP14)));
41532 else
41533 emit_insn (gen_rtx_SET (VOIDmode, x0,
41534 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41535 UNSPEC_RCP)));
41536
41537 /* e0 = x0 * b */
41538 emit_insn (gen_rtx_SET (VOIDmode, e0,
41539 gen_rtx_MULT (mode, x0, b)));
41540
41541 /* e0 = x0 * e0 */
41542 emit_insn (gen_rtx_SET (VOIDmode, e0,
41543 gen_rtx_MULT (mode, x0, e0)));
41544
41545 /* e1 = x0 + x0 */
41546 emit_insn (gen_rtx_SET (VOIDmode, e1,
41547 gen_rtx_PLUS (mode, x0, x0)));
41548
41549 /* x1 = e1 - e0 */
41550 emit_insn (gen_rtx_SET (VOIDmode, x1,
41551 gen_rtx_MINUS (mode, e1, e0)));
41552
41553 /* res = a * x1 */
41554 emit_insn (gen_rtx_SET (VOIDmode, res,
41555 gen_rtx_MULT (mode, a, x1)));
41556 }
41557
41558 /* Output code to perform a Newton-Rhapson approximation of a
41559 single precision floating point [reciprocal] square root. */
41560
41561 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41562 bool recip)
41563 {
41564 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41565 REAL_VALUE_TYPE r;
41566 int unspec;
41567
41568 x0 = gen_reg_rtx (mode);
41569 e0 = gen_reg_rtx (mode);
41570 e1 = gen_reg_rtx (mode);
41571 e2 = gen_reg_rtx (mode);
41572 e3 = gen_reg_rtx (mode);
41573
41574 real_from_integer (&r, VOIDmode, -3, SIGNED);
41575 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41576
41577 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41578 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41579 unspec = UNSPEC_RSQRT;
41580
41581 if (VECTOR_MODE_P (mode))
41582 {
41583 mthree = ix86_build_const_vector (mode, true, mthree);
41584 mhalf = ix86_build_const_vector (mode, true, mhalf);
41585 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41586 if (GET_MODE_SIZE (mode) == 64)
41587 unspec = UNSPEC_RSQRT14;
41588 }
41589
41590 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41591 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41592
41593 a = force_reg (mode, a);
41594
41595 /* x0 = rsqrt(a) estimate */
41596 emit_insn (gen_rtx_SET (VOIDmode, x0,
41597 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41598 unspec)));
41599
41600 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41601 if (!recip)
41602 {
41603 rtx zero, mask;
41604
41605 zero = gen_reg_rtx (mode);
41606 mask = gen_reg_rtx (mode);
41607
41608 zero = force_reg (mode, CONST0_RTX(mode));
41609
41610 /* Handle masked compare. */
41611 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41612 {
41613 mask = gen_reg_rtx (HImode);
41614 /* Imm value 0x4 corresponds to not-equal comparison. */
41615 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41616 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41617 }
41618 else
41619 {
41620 emit_insn (gen_rtx_SET (VOIDmode, mask,
41621 gen_rtx_NE (mode, zero, a)));
41622
41623 emit_insn (gen_rtx_SET (VOIDmode, x0,
41624 gen_rtx_AND (mode, x0, mask)));
41625 }
41626 }
41627
41628 /* e0 = x0 * a */
41629 emit_insn (gen_rtx_SET (VOIDmode, e0,
41630 gen_rtx_MULT (mode, x0, a)));
41631 /* e1 = e0 * x0 */
41632 emit_insn (gen_rtx_SET (VOIDmode, e1,
41633 gen_rtx_MULT (mode, e0, x0)));
41634
41635 /* e2 = e1 - 3. */
41636 mthree = force_reg (mode, mthree);
41637 emit_insn (gen_rtx_SET (VOIDmode, e2,
41638 gen_rtx_PLUS (mode, e1, mthree)));
41639
41640 mhalf = force_reg (mode, mhalf);
41641 if (recip)
41642 /* e3 = -.5 * x0 */
41643 emit_insn (gen_rtx_SET (VOIDmode, e3,
41644 gen_rtx_MULT (mode, x0, mhalf)));
41645 else
41646 /* e3 = -.5 * e0 */
41647 emit_insn (gen_rtx_SET (VOIDmode, e3,
41648 gen_rtx_MULT (mode, e0, mhalf)));
41649 /* ret = e2 * e3 */
41650 emit_insn (gen_rtx_SET (VOIDmode, res,
41651 gen_rtx_MULT (mode, e2, e3)));
41652 }
41653
41654 #ifdef TARGET_SOLARIS
41655 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41656
41657 static void
41658 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41659 tree decl)
41660 {
41661 /* With Binutils 2.15, the "@unwind" marker must be specified on
41662 every occurrence of the ".eh_frame" section, not just the first
41663 one. */
41664 if (TARGET_64BIT
41665 && strcmp (name, ".eh_frame") == 0)
41666 {
41667 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41668 flags & SECTION_WRITE ? "aw" : "a");
41669 return;
41670 }
41671
41672 #ifndef USE_GAS
41673 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41674 {
41675 solaris_elf_asm_comdat_section (name, flags, decl);
41676 return;
41677 }
41678 #endif
41679
41680 default_elf_asm_named_section (name, flags, decl);
41681 }
41682 #endif /* TARGET_SOLARIS */
41683
41684 /* Return the mangling of TYPE if it is an extended fundamental type. */
41685
41686 static const char *
41687 ix86_mangle_type (const_tree type)
41688 {
41689 type = TYPE_MAIN_VARIANT (type);
41690
41691 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41692 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41693 return NULL;
41694
41695 switch (TYPE_MODE (type))
41696 {
41697 case TFmode:
41698 /* __float128 is "g". */
41699 return "g";
41700 case XFmode:
41701 /* "long double" or __float80 is "e". */
41702 return "e";
41703 default:
41704 return NULL;
41705 }
41706 }
41707
41708 /* For 32-bit code we can save PIC register setup by using
41709 __stack_chk_fail_local hidden function instead of calling
41710 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41711 register, so it is better to call __stack_chk_fail directly. */
41712
41713 static tree ATTRIBUTE_UNUSED
41714 ix86_stack_protect_fail (void)
41715 {
41716 return TARGET_64BIT
41717 ? default_external_stack_protect_fail ()
41718 : default_hidden_stack_protect_fail ();
41719 }
41720
41721 /* Select a format to encode pointers in exception handling data. CODE
41722 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41723 true if the symbol may be affected by dynamic relocations.
41724
41725 ??? All x86 object file formats are capable of representing this.
41726 After all, the relocation needed is the same as for the call insn.
41727 Whether or not a particular assembler allows us to enter such, I
41728 guess we'll have to see. */
41729 int
41730 asm_preferred_eh_data_format (int code, int global)
41731 {
41732 if (flag_pic)
41733 {
41734 int type = DW_EH_PE_sdata8;
41735 if (!TARGET_64BIT
41736 || ix86_cmodel == CM_SMALL_PIC
41737 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41738 type = DW_EH_PE_sdata4;
41739 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41740 }
41741 if (ix86_cmodel == CM_SMALL
41742 || (ix86_cmodel == CM_MEDIUM && code))
41743 return DW_EH_PE_udata4;
41744 return DW_EH_PE_absptr;
41745 }
41746 \f
41747 /* Expand copysign from SIGN to the positive value ABS_VALUE
41748 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41749 the sign-bit. */
41750 static void
41751 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41752 {
41753 enum machine_mode mode = GET_MODE (sign);
41754 rtx sgn = gen_reg_rtx (mode);
41755 if (mask == NULL_RTX)
41756 {
41757 enum machine_mode vmode;
41758
41759 if (mode == SFmode)
41760 vmode = V4SFmode;
41761 else if (mode == DFmode)
41762 vmode = V2DFmode;
41763 else
41764 vmode = mode;
41765
41766 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41767 if (!VECTOR_MODE_P (mode))
41768 {
41769 /* We need to generate a scalar mode mask in this case. */
41770 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41771 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41772 mask = gen_reg_rtx (mode);
41773 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41774 }
41775 }
41776 else
41777 mask = gen_rtx_NOT (mode, mask);
41778 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41779 gen_rtx_AND (mode, mask, sign)));
41780 emit_insn (gen_rtx_SET (VOIDmode, result,
41781 gen_rtx_IOR (mode, abs_value, sgn)));
41782 }
41783
41784 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41785 mask for masking out the sign-bit is stored in *SMASK, if that is
41786 non-null. */
41787 static rtx
41788 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41789 {
41790 enum machine_mode vmode, mode = GET_MODE (op0);
41791 rtx xa, mask;
41792
41793 xa = gen_reg_rtx (mode);
41794 if (mode == SFmode)
41795 vmode = V4SFmode;
41796 else if (mode == DFmode)
41797 vmode = V2DFmode;
41798 else
41799 vmode = mode;
41800 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41801 if (!VECTOR_MODE_P (mode))
41802 {
41803 /* We need to generate a scalar mode mask in this case. */
41804 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41805 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41806 mask = gen_reg_rtx (mode);
41807 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41808 }
41809 emit_insn (gen_rtx_SET (VOIDmode, xa,
41810 gen_rtx_AND (mode, op0, mask)));
41811
41812 if (smask)
41813 *smask = mask;
41814
41815 return xa;
41816 }
41817
41818 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41819 swapping the operands if SWAP_OPERANDS is true. The expanded
41820 code is a forward jump to a newly created label in case the
41821 comparison is true. The generated label rtx is returned. */
41822 static rtx_code_label *
41823 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41824 bool swap_operands)
41825 {
41826 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41827 rtx_code_label *label;
41828 rtx tmp;
41829
41830 if (swap_operands)
41831 {
41832 tmp = op0;
41833 op0 = op1;
41834 op1 = tmp;
41835 }
41836
41837 label = gen_label_rtx ();
41838 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41839 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41840 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41841 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41842 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41843 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41844 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41845 JUMP_LABEL (tmp) = label;
41846
41847 return label;
41848 }
41849
41850 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41851 using comparison code CODE. Operands are swapped for the comparison if
41852 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41853 static rtx
41854 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41855 bool swap_operands)
41856 {
41857 rtx (*insn)(rtx, rtx, rtx, rtx);
41858 enum machine_mode mode = GET_MODE (op0);
41859 rtx mask = gen_reg_rtx (mode);
41860
41861 if (swap_operands)
41862 {
41863 rtx tmp = op0;
41864 op0 = op1;
41865 op1 = tmp;
41866 }
41867
41868 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41869
41870 emit_insn (insn (mask, op0, op1,
41871 gen_rtx_fmt_ee (code, mode, op0, op1)));
41872 return mask;
41873 }
41874
41875 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41876 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41877 static rtx
41878 ix86_gen_TWO52 (enum machine_mode mode)
41879 {
41880 REAL_VALUE_TYPE TWO52r;
41881 rtx TWO52;
41882
41883 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41884 TWO52 = const_double_from_real_value (TWO52r, mode);
41885 TWO52 = force_reg (mode, TWO52);
41886
41887 return TWO52;
41888 }
41889
41890 /* Expand SSE sequence for computing lround from OP1 storing
41891 into OP0. */
41892 void
41893 ix86_expand_lround (rtx op0, rtx op1)
41894 {
41895 /* C code for the stuff we're doing below:
41896 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41897 return (long)tmp;
41898 */
41899 enum machine_mode mode = GET_MODE (op1);
41900 const struct real_format *fmt;
41901 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41902 rtx adj;
41903
41904 /* load nextafter (0.5, 0.0) */
41905 fmt = REAL_MODE_FORMAT (mode);
41906 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41907 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41908
41909 /* adj = copysign (0.5, op1) */
41910 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41911 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41912
41913 /* adj = op1 + adj */
41914 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41915
41916 /* op0 = (imode)adj */
41917 expand_fix (op0, adj, 0);
41918 }
41919
41920 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41921 into OPERAND0. */
41922 void
41923 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41924 {
41925 /* C code for the stuff we're doing below (for do_floor):
41926 xi = (long)op1;
41927 xi -= (double)xi > op1 ? 1 : 0;
41928 return xi;
41929 */
41930 enum machine_mode fmode = GET_MODE (op1);
41931 enum machine_mode imode = GET_MODE (op0);
41932 rtx ireg, freg, tmp;
41933 rtx_code_label *label;
41934
41935 /* reg = (long)op1 */
41936 ireg = gen_reg_rtx (imode);
41937 expand_fix (ireg, op1, 0);
41938
41939 /* freg = (double)reg */
41940 freg = gen_reg_rtx (fmode);
41941 expand_float (freg, ireg, 0);
41942
41943 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41944 label = ix86_expand_sse_compare_and_jump (UNLE,
41945 freg, op1, !do_floor);
41946 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41947 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41948 emit_move_insn (ireg, tmp);
41949
41950 emit_label (label);
41951 LABEL_NUSES (label) = 1;
41952
41953 emit_move_insn (op0, ireg);
41954 }
41955
41956 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41957 result in OPERAND0. */
41958 void
41959 ix86_expand_rint (rtx operand0, rtx operand1)
41960 {
41961 /* C code for the stuff we're doing below:
41962 xa = fabs (operand1);
41963 if (!isless (xa, 2**52))
41964 return operand1;
41965 xa = xa + 2**52 - 2**52;
41966 return copysign (xa, operand1);
41967 */
41968 enum machine_mode mode = GET_MODE (operand0);
41969 rtx res, xa, TWO52, mask;
41970 rtx_code_label *label;
41971
41972 res = gen_reg_rtx (mode);
41973 emit_move_insn (res, operand1);
41974
41975 /* xa = abs (operand1) */
41976 xa = ix86_expand_sse_fabs (res, &mask);
41977
41978 /* if (!isless (xa, TWO52)) goto label; */
41979 TWO52 = ix86_gen_TWO52 (mode);
41980 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41981
41982 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41983 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41984
41985 ix86_sse_copysign_to_positive (res, xa, res, mask);
41986
41987 emit_label (label);
41988 LABEL_NUSES (label) = 1;
41989
41990 emit_move_insn (operand0, res);
41991 }
41992
41993 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41994 into OPERAND0. */
41995 void
41996 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41997 {
41998 /* C code for the stuff we expand below.
41999 double xa = fabs (x), x2;
42000 if (!isless (xa, TWO52))
42001 return x;
42002 xa = xa + TWO52 - TWO52;
42003 x2 = copysign (xa, x);
42004 Compensate. Floor:
42005 if (x2 > x)
42006 x2 -= 1;
42007 Compensate. Ceil:
42008 if (x2 < x)
42009 x2 -= -1;
42010 return x2;
42011 */
42012 enum machine_mode mode = GET_MODE (operand0);
42013 rtx xa, TWO52, tmp, one, res, mask;
42014 rtx_code_label *label;
42015
42016 TWO52 = ix86_gen_TWO52 (mode);
42017
42018 /* Temporary for holding the result, initialized to the input
42019 operand to ease control flow. */
42020 res = gen_reg_rtx (mode);
42021 emit_move_insn (res, operand1);
42022
42023 /* xa = abs (operand1) */
42024 xa = ix86_expand_sse_fabs (res, &mask);
42025
42026 /* if (!isless (xa, TWO52)) goto label; */
42027 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42028
42029 /* xa = xa + TWO52 - TWO52; */
42030 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42031 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42032
42033 /* xa = copysign (xa, operand1) */
42034 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42035
42036 /* generate 1.0 or -1.0 */
42037 one = force_reg (mode,
42038 const_double_from_real_value (do_floor
42039 ? dconst1 : dconstm1, mode));
42040
42041 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42042 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42043 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42044 gen_rtx_AND (mode, one, tmp)));
42045 /* We always need to subtract here to preserve signed zero. */
42046 tmp = expand_simple_binop (mode, MINUS,
42047 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42048 emit_move_insn (res, tmp);
42049
42050 emit_label (label);
42051 LABEL_NUSES (label) = 1;
42052
42053 emit_move_insn (operand0, res);
42054 }
42055
42056 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42057 into OPERAND0. */
42058 void
42059 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42060 {
42061 /* C code for the stuff we expand below.
42062 double xa = fabs (x), x2;
42063 if (!isless (xa, TWO52))
42064 return x;
42065 x2 = (double)(long)x;
42066 Compensate. Floor:
42067 if (x2 > x)
42068 x2 -= 1;
42069 Compensate. Ceil:
42070 if (x2 < x)
42071 x2 += 1;
42072 if (HONOR_SIGNED_ZEROS (mode))
42073 return copysign (x2, x);
42074 return x2;
42075 */
42076 enum machine_mode mode = GET_MODE (operand0);
42077 rtx xa, xi, TWO52, tmp, one, res, mask;
42078 rtx_code_label *label;
42079
42080 TWO52 = ix86_gen_TWO52 (mode);
42081
42082 /* Temporary for holding the result, initialized to the input
42083 operand to ease control flow. */
42084 res = gen_reg_rtx (mode);
42085 emit_move_insn (res, operand1);
42086
42087 /* xa = abs (operand1) */
42088 xa = ix86_expand_sse_fabs (res, &mask);
42089
42090 /* if (!isless (xa, TWO52)) goto label; */
42091 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42092
42093 /* xa = (double)(long)x */
42094 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42095 expand_fix (xi, res, 0);
42096 expand_float (xa, xi, 0);
42097
42098 /* generate 1.0 */
42099 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42100
42101 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42102 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42103 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42104 gen_rtx_AND (mode, one, tmp)));
42105 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42106 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42107 emit_move_insn (res, tmp);
42108
42109 if (HONOR_SIGNED_ZEROS (mode))
42110 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42111
42112 emit_label (label);
42113 LABEL_NUSES (label) = 1;
42114
42115 emit_move_insn (operand0, res);
42116 }
42117
42118 /* Expand SSE sequence for computing round from OPERAND1 storing
42119 into OPERAND0. Sequence that works without relying on DImode truncation
42120 via cvttsd2siq that is only available on 64bit targets. */
42121 void
42122 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42123 {
42124 /* C code for the stuff we expand below.
42125 double xa = fabs (x), xa2, x2;
42126 if (!isless (xa, TWO52))
42127 return x;
42128 Using the absolute value and copying back sign makes
42129 -0.0 -> -0.0 correct.
42130 xa2 = xa + TWO52 - TWO52;
42131 Compensate.
42132 dxa = xa2 - xa;
42133 if (dxa <= -0.5)
42134 xa2 += 1;
42135 else if (dxa > 0.5)
42136 xa2 -= 1;
42137 x2 = copysign (xa2, x);
42138 return x2;
42139 */
42140 enum machine_mode mode = GET_MODE (operand0);
42141 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42142 rtx_code_label *label;
42143
42144 TWO52 = ix86_gen_TWO52 (mode);
42145
42146 /* Temporary for holding the result, initialized to the input
42147 operand to ease control flow. */
42148 res = gen_reg_rtx (mode);
42149 emit_move_insn (res, operand1);
42150
42151 /* xa = abs (operand1) */
42152 xa = ix86_expand_sse_fabs (res, &mask);
42153
42154 /* if (!isless (xa, TWO52)) goto label; */
42155 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42156
42157 /* xa2 = xa + TWO52 - TWO52; */
42158 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42159 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42160
42161 /* dxa = xa2 - xa; */
42162 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42163
42164 /* generate 0.5, 1.0 and -0.5 */
42165 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42166 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42167 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42168 0, OPTAB_DIRECT);
42169
42170 /* Compensate. */
42171 tmp = gen_reg_rtx (mode);
42172 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42173 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42174 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42175 gen_rtx_AND (mode, one, tmp)));
42176 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42177 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42178 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42179 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42180 gen_rtx_AND (mode, one, tmp)));
42181 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42182
42183 /* res = copysign (xa2, operand1) */
42184 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42185
42186 emit_label (label);
42187 LABEL_NUSES (label) = 1;
42188
42189 emit_move_insn (operand0, res);
42190 }
42191
42192 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42193 into OPERAND0. */
42194 void
42195 ix86_expand_trunc (rtx operand0, rtx operand1)
42196 {
42197 /* C code for SSE variant we expand below.
42198 double xa = fabs (x), x2;
42199 if (!isless (xa, TWO52))
42200 return x;
42201 x2 = (double)(long)x;
42202 if (HONOR_SIGNED_ZEROS (mode))
42203 return copysign (x2, x);
42204 return x2;
42205 */
42206 enum machine_mode mode = GET_MODE (operand0);
42207 rtx xa, xi, TWO52, res, mask;
42208 rtx_code_label *label;
42209
42210 TWO52 = ix86_gen_TWO52 (mode);
42211
42212 /* Temporary for holding the result, initialized to the input
42213 operand to ease control flow. */
42214 res = gen_reg_rtx (mode);
42215 emit_move_insn (res, operand1);
42216
42217 /* xa = abs (operand1) */
42218 xa = ix86_expand_sse_fabs (res, &mask);
42219
42220 /* if (!isless (xa, TWO52)) goto label; */
42221 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42222
42223 /* x = (double)(long)x */
42224 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42225 expand_fix (xi, res, 0);
42226 expand_float (res, xi, 0);
42227
42228 if (HONOR_SIGNED_ZEROS (mode))
42229 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42230
42231 emit_label (label);
42232 LABEL_NUSES (label) = 1;
42233
42234 emit_move_insn (operand0, res);
42235 }
42236
42237 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42238 into OPERAND0. */
42239 void
42240 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42241 {
42242 enum machine_mode mode = GET_MODE (operand0);
42243 rtx xa, mask, TWO52, one, res, smask, tmp;
42244 rtx_code_label *label;
42245
42246 /* C code for SSE variant we expand below.
42247 double xa = fabs (x), x2;
42248 if (!isless (xa, TWO52))
42249 return x;
42250 xa2 = xa + TWO52 - TWO52;
42251 Compensate:
42252 if (xa2 > xa)
42253 xa2 -= 1.0;
42254 x2 = copysign (xa2, x);
42255 return x2;
42256 */
42257
42258 TWO52 = ix86_gen_TWO52 (mode);
42259
42260 /* Temporary for holding the result, initialized to the input
42261 operand to ease control flow. */
42262 res = gen_reg_rtx (mode);
42263 emit_move_insn (res, operand1);
42264
42265 /* xa = abs (operand1) */
42266 xa = ix86_expand_sse_fabs (res, &smask);
42267
42268 /* if (!isless (xa, TWO52)) goto label; */
42269 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42270
42271 /* res = xa + TWO52 - TWO52; */
42272 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42273 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42274 emit_move_insn (res, tmp);
42275
42276 /* generate 1.0 */
42277 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42278
42279 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42280 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42281 emit_insn (gen_rtx_SET (VOIDmode, mask,
42282 gen_rtx_AND (mode, mask, one)));
42283 tmp = expand_simple_binop (mode, MINUS,
42284 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42285 emit_move_insn (res, tmp);
42286
42287 /* res = copysign (res, operand1) */
42288 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42289
42290 emit_label (label);
42291 LABEL_NUSES (label) = 1;
42292
42293 emit_move_insn (operand0, res);
42294 }
42295
42296 /* Expand SSE sequence for computing round from OPERAND1 storing
42297 into OPERAND0. */
42298 void
42299 ix86_expand_round (rtx operand0, rtx operand1)
42300 {
42301 /* C code for the stuff we're doing below:
42302 double xa = fabs (x);
42303 if (!isless (xa, TWO52))
42304 return x;
42305 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42306 return copysign (xa, x);
42307 */
42308 enum machine_mode mode = GET_MODE (operand0);
42309 rtx res, TWO52, xa, xi, half, mask;
42310 rtx_code_label *label;
42311 const struct real_format *fmt;
42312 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42313
42314 /* Temporary for holding the result, initialized to the input
42315 operand to ease control flow. */
42316 res = gen_reg_rtx (mode);
42317 emit_move_insn (res, operand1);
42318
42319 TWO52 = ix86_gen_TWO52 (mode);
42320 xa = ix86_expand_sse_fabs (res, &mask);
42321 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42322
42323 /* load nextafter (0.5, 0.0) */
42324 fmt = REAL_MODE_FORMAT (mode);
42325 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42326 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42327
42328 /* xa = xa + 0.5 */
42329 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42330 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42331
42332 /* xa = (double)(int64_t)xa */
42333 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42334 expand_fix (xi, xa, 0);
42335 expand_float (xa, xi, 0);
42336
42337 /* res = copysign (xa, operand1) */
42338 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42339
42340 emit_label (label);
42341 LABEL_NUSES (label) = 1;
42342
42343 emit_move_insn (operand0, res);
42344 }
42345
42346 /* Expand SSE sequence for computing round
42347 from OP1 storing into OP0 using sse4 round insn. */
42348 void
42349 ix86_expand_round_sse4 (rtx op0, rtx op1)
42350 {
42351 enum machine_mode mode = GET_MODE (op0);
42352 rtx e1, e2, res, half;
42353 const struct real_format *fmt;
42354 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42355 rtx (*gen_copysign) (rtx, rtx, rtx);
42356 rtx (*gen_round) (rtx, rtx, rtx);
42357
42358 switch (mode)
42359 {
42360 case SFmode:
42361 gen_copysign = gen_copysignsf3;
42362 gen_round = gen_sse4_1_roundsf2;
42363 break;
42364 case DFmode:
42365 gen_copysign = gen_copysigndf3;
42366 gen_round = gen_sse4_1_rounddf2;
42367 break;
42368 default:
42369 gcc_unreachable ();
42370 }
42371
42372 /* round (a) = trunc (a + copysign (0.5, a)) */
42373
42374 /* load nextafter (0.5, 0.0) */
42375 fmt = REAL_MODE_FORMAT (mode);
42376 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42377 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42378 half = const_double_from_real_value (pred_half, mode);
42379
42380 /* e1 = copysign (0.5, op1) */
42381 e1 = gen_reg_rtx (mode);
42382 emit_insn (gen_copysign (e1, half, op1));
42383
42384 /* e2 = op1 + e1 */
42385 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42386
42387 /* res = trunc (e2) */
42388 res = gen_reg_rtx (mode);
42389 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42390
42391 emit_move_insn (op0, res);
42392 }
42393 \f
42394
42395 /* Table of valid machine attributes. */
42396 static const struct attribute_spec ix86_attribute_table[] =
42397 {
42398 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42399 affects_type_identity } */
42400 /* Stdcall attribute says callee is responsible for popping arguments
42401 if they are not variable. */
42402 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42403 true },
42404 /* Fastcall attribute says callee is responsible for popping arguments
42405 if they are not variable. */
42406 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42407 true },
42408 /* Thiscall attribute says callee is responsible for popping arguments
42409 if they are not variable. */
42410 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42411 true },
42412 /* Cdecl attribute says the callee is a normal C declaration */
42413 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42414 true },
42415 /* Regparm attribute specifies how many integer arguments are to be
42416 passed in registers. */
42417 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42418 true },
42419 /* Sseregparm attribute says we are using x86_64 calling conventions
42420 for FP arguments. */
42421 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42422 true },
42423 /* The transactional memory builtins are implicitly regparm or fastcall
42424 depending on the ABI. Override the generic do-nothing attribute that
42425 these builtins were declared with. */
42426 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42427 true },
42428 /* force_align_arg_pointer says this function realigns the stack at entry. */
42429 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42430 false, true, true, ix86_handle_cconv_attribute, false },
42431 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42432 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42433 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42434 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42435 false },
42436 #endif
42437 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42438 false },
42439 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42440 false },
42441 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42442 SUBTARGET_ATTRIBUTE_TABLE,
42443 #endif
42444 /* ms_abi and sysv_abi calling convention function attributes. */
42445 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42446 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42447 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42448 false },
42449 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42450 ix86_handle_callee_pop_aggregate_return, true },
42451 /* End element. */
42452 { NULL, 0, 0, false, false, false, NULL, false }
42453 };
42454
42455 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42456 static int
42457 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42458 tree vectype, int)
42459 {
42460 unsigned elements;
42461
42462 switch (type_of_cost)
42463 {
42464 case scalar_stmt:
42465 return ix86_cost->scalar_stmt_cost;
42466
42467 case scalar_load:
42468 return ix86_cost->scalar_load_cost;
42469
42470 case scalar_store:
42471 return ix86_cost->scalar_store_cost;
42472
42473 case vector_stmt:
42474 return ix86_cost->vec_stmt_cost;
42475
42476 case vector_load:
42477 return ix86_cost->vec_align_load_cost;
42478
42479 case vector_store:
42480 return ix86_cost->vec_store_cost;
42481
42482 case vec_to_scalar:
42483 return ix86_cost->vec_to_scalar_cost;
42484
42485 case scalar_to_vec:
42486 return ix86_cost->scalar_to_vec_cost;
42487
42488 case unaligned_load:
42489 case unaligned_store:
42490 return ix86_cost->vec_unalign_load_cost;
42491
42492 case cond_branch_taken:
42493 return ix86_cost->cond_taken_branch_cost;
42494
42495 case cond_branch_not_taken:
42496 return ix86_cost->cond_not_taken_branch_cost;
42497
42498 case vec_perm:
42499 case vec_promote_demote:
42500 return ix86_cost->vec_stmt_cost;
42501
42502 case vec_construct:
42503 elements = TYPE_VECTOR_SUBPARTS (vectype);
42504 return elements / 2 + 1;
42505
42506 default:
42507 gcc_unreachable ();
42508 }
42509 }
42510
42511 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42512 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42513 insn every time. */
42514
42515 static GTY(()) rtx vselect_insn;
42516
42517 /* Initialize vselect_insn. */
42518
42519 static void
42520 init_vselect_insn (void)
42521 {
42522 unsigned i;
42523 rtx x;
42524
42525 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42526 for (i = 0; i < MAX_VECT_LEN; ++i)
42527 XVECEXP (x, 0, i) = const0_rtx;
42528 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42529 const0_rtx), x);
42530 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42531 start_sequence ();
42532 vselect_insn = emit_insn (x);
42533 end_sequence ();
42534 }
42535
42536 /* Construct (set target (vec_select op0 (parallel perm))) and
42537 return true if that's a valid instruction in the active ISA. */
42538
42539 static bool
42540 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42541 unsigned nelt, bool testing_p)
42542 {
42543 unsigned int i;
42544 rtx x, save_vconcat;
42545 int icode;
42546
42547 if (vselect_insn == NULL_RTX)
42548 init_vselect_insn ();
42549
42550 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42551 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42552 for (i = 0; i < nelt; ++i)
42553 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42554 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42555 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42556 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42557 SET_DEST (PATTERN (vselect_insn)) = target;
42558 icode = recog_memoized (vselect_insn);
42559
42560 if (icode >= 0 && !testing_p)
42561 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42562
42563 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42564 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42565 INSN_CODE (vselect_insn) = -1;
42566
42567 return icode >= 0;
42568 }
42569
42570 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42571
42572 static bool
42573 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42574 const unsigned char *perm, unsigned nelt,
42575 bool testing_p)
42576 {
42577 enum machine_mode v2mode;
42578 rtx x;
42579 bool ok;
42580
42581 if (vselect_insn == NULL_RTX)
42582 init_vselect_insn ();
42583
42584 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42585 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42586 PUT_MODE (x, v2mode);
42587 XEXP (x, 0) = op0;
42588 XEXP (x, 1) = op1;
42589 ok = expand_vselect (target, x, perm, nelt, testing_p);
42590 XEXP (x, 0) = const0_rtx;
42591 XEXP (x, 1) = const0_rtx;
42592 return ok;
42593 }
42594
42595 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42596 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42597
42598 static bool
42599 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42600 {
42601 enum machine_mode vmode = d->vmode;
42602 unsigned i, mask, nelt = d->nelt;
42603 rtx target, op0, op1, x;
42604 rtx rperm[32], vperm;
42605
42606 if (d->one_operand_p)
42607 return false;
42608 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42609 ;
42610 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42611 ;
42612 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42613 ;
42614 else
42615 return false;
42616
42617 /* This is a blend, not a permute. Elements must stay in their
42618 respective lanes. */
42619 for (i = 0; i < nelt; ++i)
42620 {
42621 unsigned e = d->perm[i];
42622 if (!(e == i || e == i + nelt))
42623 return false;
42624 }
42625
42626 if (d->testing_p)
42627 return true;
42628
42629 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42630 decision should be extracted elsewhere, so that we only try that
42631 sequence once all budget==3 options have been tried. */
42632 target = d->target;
42633 op0 = d->op0;
42634 op1 = d->op1;
42635 mask = 0;
42636
42637 switch (vmode)
42638 {
42639 case V4DFmode:
42640 case V8SFmode:
42641 case V2DFmode:
42642 case V4SFmode:
42643 case V8HImode:
42644 case V8SImode:
42645 for (i = 0; i < nelt; ++i)
42646 mask |= (d->perm[i] >= nelt) << i;
42647 break;
42648
42649 case V2DImode:
42650 for (i = 0; i < 2; ++i)
42651 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42652 vmode = V8HImode;
42653 goto do_subreg;
42654
42655 case V4SImode:
42656 for (i = 0; i < 4; ++i)
42657 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42658 vmode = V8HImode;
42659 goto do_subreg;
42660
42661 case V16QImode:
42662 /* See if bytes move in pairs so we can use pblendw with
42663 an immediate argument, rather than pblendvb with a vector
42664 argument. */
42665 for (i = 0; i < 16; i += 2)
42666 if (d->perm[i] + 1 != d->perm[i + 1])
42667 {
42668 use_pblendvb:
42669 for (i = 0; i < nelt; ++i)
42670 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42671
42672 finish_pblendvb:
42673 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42674 vperm = force_reg (vmode, vperm);
42675
42676 if (GET_MODE_SIZE (vmode) == 16)
42677 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42678 else
42679 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42680 if (target != d->target)
42681 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42682 return true;
42683 }
42684
42685 for (i = 0; i < 8; ++i)
42686 mask |= (d->perm[i * 2] >= 16) << i;
42687 vmode = V8HImode;
42688 /* FALLTHRU */
42689
42690 do_subreg:
42691 target = gen_reg_rtx (vmode);
42692 op0 = gen_lowpart (vmode, op0);
42693 op1 = gen_lowpart (vmode, op1);
42694 break;
42695
42696 case V32QImode:
42697 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42698 for (i = 0; i < 32; i += 2)
42699 if (d->perm[i] + 1 != d->perm[i + 1])
42700 goto use_pblendvb;
42701 /* See if bytes move in quadruplets. If yes, vpblendd
42702 with immediate can be used. */
42703 for (i = 0; i < 32; i += 4)
42704 if (d->perm[i] + 2 != d->perm[i + 2])
42705 break;
42706 if (i < 32)
42707 {
42708 /* See if bytes move the same in both lanes. If yes,
42709 vpblendw with immediate can be used. */
42710 for (i = 0; i < 16; i += 2)
42711 if (d->perm[i] + 16 != d->perm[i + 16])
42712 goto use_pblendvb;
42713
42714 /* Use vpblendw. */
42715 for (i = 0; i < 16; ++i)
42716 mask |= (d->perm[i * 2] >= 32) << i;
42717 vmode = V16HImode;
42718 goto do_subreg;
42719 }
42720
42721 /* Use vpblendd. */
42722 for (i = 0; i < 8; ++i)
42723 mask |= (d->perm[i * 4] >= 32) << i;
42724 vmode = V8SImode;
42725 goto do_subreg;
42726
42727 case V16HImode:
42728 /* See if words move in pairs. If yes, vpblendd can be used. */
42729 for (i = 0; i < 16; i += 2)
42730 if (d->perm[i] + 1 != d->perm[i + 1])
42731 break;
42732 if (i < 16)
42733 {
42734 /* See if words move the same in both lanes. If not,
42735 vpblendvb must be used. */
42736 for (i = 0; i < 8; i++)
42737 if (d->perm[i] + 8 != d->perm[i + 8])
42738 {
42739 /* Use vpblendvb. */
42740 for (i = 0; i < 32; ++i)
42741 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42742
42743 vmode = V32QImode;
42744 nelt = 32;
42745 target = gen_reg_rtx (vmode);
42746 op0 = gen_lowpart (vmode, op0);
42747 op1 = gen_lowpart (vmode, op1);
42748 goto finish_pblendvb;
42749 }
42750
42751 /* Use vpblendw. */
42752 for (i = 0; i < 16; ++i)
42753 mask |= (d->perm[i] >= 16) << i;
42754 break;
42755 }
42756
42757 /* Use vpblendd. */
42758 for (i = 0; i < 8; ++i)
42759 mask |= (d->perm[i * 2] >= 16) << i;
42760 vmode = V8SImode;
42761 goto do_subreg;
42762
42763 case V4DImode:
42764 /* Use vpblendd. */
42765 for (i = 0; i < 4; ++i)
42766 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42767 vmode = V8SImode;
42768 goto do_subreg;
42769
42770 default:
42771 gcc_unreachable ();
42772 }
42773
42774 /* This matches five different patterns with the different modes. */
42775 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42776 x = gen_rtx_SET (VOIDmode, target, x);
42777 emit_insn (x);
42778 if (target != d->target)
42779 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42780
42781 return true;
42782 }
42783
42784 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42785 in terms of the variable form of vpermilps.
42786
42787 Note that we will have already failed the immediate input vpermilps,
42788 which requires that the high and low part shuffle be identical; the
42789 variable form doesn't require that. */
42790
42791 static bool
42792 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42793 {
42794 rtx rperm[8], vperm;
42795 unsigned i;
42796
42797 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42798 return false;
42799
42800 /* We can only permute within the 128-bit lane. */
42801 for (i = 0; i < 8; ++i)
42802 {
42803 unsigned e = d->perm[i];
42804 if (i < 4 ? e >= 4 : e < 4)
42805 return false;
42806 }
42807
42808 if (d->testing_p)
42809 return true;
42810
42811 for (i = 0; i < 8; ++i)
42812 {
42813 unsigned e = d->perm[i];
42814
42815 /* Within each 128-bit lane, the elements of op0 are numbered
42816 from 0 and the elements of op1 are numbered from 4. */
42817 if (e >= 8 + 4)
42818 e -= 8;
42819 else if (e >= 4)
42820 e -= 4;
42821
42822 rperm[i] = GEN_INT (e);
42823 }
42824
42825 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42826 vperm = force_reg (V8SImode, vperm);
42827 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42828
42829 return true;
42830 }
42831
42832 /* Return true if permutation D can be performed as VMODE permutation
42833 instead. */
42834
42835 static bool
42836 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42837 {
42838 unsigned int i, j, chunk;
42839
42840 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42841 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42842 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42843 return false;
42844
42845 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42846 return true;
42847
42848 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42849 for (i = 0; i < d->nelt; i += chunk)
42850 if (d->perm[i] & (chunk - 1))
42851 return false;
42852 else
42853 for (j = 1; j < chunk; ++j)
42854 if (d->perm[i] + j != d->perm[i + j])
42855 return false;
42856
42857 return true;
42858 }
42859
42860 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42861 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42862
42863 static bool
42864 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42865 {
42866 unsigned i, nelt, eltsz, mask;
42867 unsigned char perm[32];
42868 enum machine_mode vmode = V16QImode;
42869 rtx rperm[32], vperm, target, op0, op1;
42870
42871 nelt = d->nelt;
42872
42873 if (!d->one_operand_p)
42874 {
42875 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42876 {
42877 if (TARGET_AVX2
42878 && valid_perm_using_mode_p (V2TImode, d))
42879 {
42880 if (d->testing_p)
42881 return true;
42882
42883 /* Use vperm2i128 insn. The pattern uses
42884 V4DImode instead of V2TImode. */
42885 target = d->target;
42886 if (d->vmode != V4DImode)
42887 target = gen_reg_rtx (V4DImode);
42888 op0 = gen_lowpart (V4DImode, d->op0);
42889 op1 = gen_lowpart (V4DImode, d->op1);
42890 rperm[0]
42891 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42892 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42893 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42894 if (target != d->target)
42895 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42896 return true;
42897 }
42898 return false;
42899 }
42900 }
42901 else
42902 {
42903 if (GET_MODE_SIZE (d->vmode) == 16)
42904 {
42905 if (!TARGET_SSSE3)
42906 return false;
42907 }
42908 else if (GET_MODE_SIZE (d->vmode) == 32)
42909 {
42910 if (!TARGET_AVX2)
42911 return false;
42912
42913 /* V4DImode should be already handled through
42914 expand_vselect by vpermq instruction. */
42915 gcc_assert (d->vmode != V4DImode);
42916
42917 vmode = V32QImode;
42918 if (d->vmode == V8SImode
42919 || d->vmode == V16HImode
42920 || d->vmode == V32QImode)
42921 {
42922 /* First see if vpermq can be used for
42923 V8SImode/V16HImode/V32QImode. */
42924 if (valid_perm_using_mode_p (V4DImode, d))
42925 {
42926 for (i = 0; i < 4; i++)
42927 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42928 if (d->testing_p)
42929 return true;
42930 target = gen_reg_rtx (V4DImode);
42931 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42932 perm, 4, false))
42933 {
42934 emit_move_insn (d->target,
42935 gen_lowpart (d->vmode, target));
42936 return true;
42937 }
42938 return false;
42939 }
42940
42941 /* Next see if vpermd can be used. */
42942 if (valid_perm_using_mode_p (V8SImode, d))
42943 vmode = V8SImode;
42944 }
42945 /* Or if vpermps can be used. */
42946 else if (d->vmode == V8SFmode)
42947 vmode = V8SImode;
42948
42949 if (vmode == V32QImode)
42950 {
42951 /* vpshufb only works intra lanes, it is not
42952 possible to shuffle bytes in between the lanes. */
42953 for (i = 0; i < nelt; ++i)
42954 if ((d->perm[i] ^ i) & (nelt / 2))
42955 return false;
42956 }
42957 }
42958 else
42959 return false;
42960 }
42961
42962 if (d->testing_p)
42963 return true;
42964
42965 if (vmode == V8SImode)
42966 for (i = 0; i < 8; ++i)
42967 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42968 else
42969 {
42970 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42971 if (!d->one_operand_p)
42972 mask = 2 * nelt - 1;
42973 else if (vmode == V16QImode)
42974 mask = nelt - 1;
42975 else
42976 mask = nelt / 2 - 1;
42977
42978 for (i = 0; i < nelt; ++i)
42979 {
42980 unsigned j, e = d->perm[i] & mask;
42981 for (j = 0; j < eltsz; ++j)
42982 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42983 }
42984 }
42985
42986 vperm = gen_rtx_CONST_VECTOR (vmode,
42987 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42988 vperm = force_reg (vmode, vperm);
42989
42990 target = d->target;
42991 if (d->vmode != vmode)
42992 target = gen_reg_rtx (vmode);
42993 op0 = gen_lowpart (vmode, d->op0);
42994 if (d->one_operand_p)
42995 {
42996 if (vmode == V16QImode)
42997 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42998 else if (vmode == V32QImode)
42999 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43000 else if (vmode == V8SFmode)
43001 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43002 else
43003 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43004 }
43005 else
43006 {
43007 op1 = gen_lowpart (vmode, d->op1);
43008 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43009 }
43010 if (target != d->target)
43011 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43012
43013 return true;
43014 }
43015
43016 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43017 in a single instruction. */
43018
43019 static bool
43020 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43021 {
43022 unsigned i, nelt = d->nelt;
43023 unsigned char perm2[MAX_VECT_LEN];
43024
43025 /* Check plain VEC_SELECT first, because AVX has instructions that could
43026 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43027 input where SEL+CONCAT may not. */
43028 if (d->one_operand_p)
43029 {
43030 int mask = nelt - 1;
43031 bool identity_perm = true;
43032 bool broadcast_perm = true;
43033
43034 for (i = 0; i < nelt; i++)
43035 {
43036 perm2[i] = d->perm[i] & mask;
43037 if (perm2[i] != i)
43038 identity_perm = false;
43039 if (perm2[i])
43040 broadcast_perm = false;
43041 }
43042
43043 if (identity_perm)
43044 {
43045 if (!d->testing_p)
43046 emit_move_insn (d->target, d->op0);
43047 return true;
43048 }
43049 else if (broadcast_perm && TARGET_AVX2)
43050 {
43051 /* Use vpbroadcast{b,w,d}. */
43052 rtx (*gen) (rtx, rtx) = NULL;
43053 switch (d->vmode)
43054 {
43055 case V32QImode:
43056 gen = gen_avx2_pbroadcastv32qi_1;
43057 break;
43058 case V16HImode:
43059 gen = gen_avx2_pbroadcastv16hi_1;
43060 break;
43061 case V8SImode:
43062 gen = gen_avx2_pbroadcastv8si_1;
43063 break;
43064 case V16QImode:
43065 gen = gen_avx2_pbroadcastv16qi;
43066 break;
43067 case V8HImode:
43068 gen = gen_avx2_pbroadcastv8hi;
43069 break;
43070 case V8SFmode:
43071 gen = gen_avx2_vec_dupv8sf_1;
43072 break;
43073 /* For other modes prefer other shuffles this function creates. */
43074 default: break;
43075 }
43076 if (gen != NULL)
43077 {
43078 if (!d->testing_p)
43079 emit_insn (gen (d->target, d->op0));
43080 return true;
43081 }
43082 }
43083
43084 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43085 return true;
43086
43087 /* There are plenty of patterns in sse.md that are written for
43088 SEL+CONCAT and are not replicated for a single op. Perhaps
43089 that should be changed, to avoid the nastiness here. */
43090
43091 /* Recognize interleave style patterns, which means incrementing
43092 every other permutation operand. */
43093 for (i = 0; i < nelt; i += 2)
43094 {
43095 perm2[i] = d->perm[i] & mask;
43096 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43097 }
43098 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43099 d->testing_p))
43100 return true;
43101
43102 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43103 if (nelt >= 4)
43104 {
43105 for (i = 0; i < nelt; i += 4)
43106 {
43107 perm2[i + 0] = d->perm[i + 0] & mask;
43108 perm2[i + 1] = d->perm[i + 1] & mask;
43109 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43110 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43111 }
43112
43113 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43114 d->testing_p))
43115 return true;
43116 }
43117 }
43118
43119 /* Finally, try the fully general two operand permute. */
43120 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43121 d->testing_p))
43122 return true;
43123
43124 /* Recognize interleave style patterns with reversed operands. */
43125 if (!d->one_operand_p)
43126 {
43127 for (i = 0; i < nelt; ++i)
43128 {
43129 unsigned e = d->perm[i];
43130 if (e >= nelt)
43131 e -= nelt;
43132 else
43133 e += nelt;
43134 perm2[i] = e;
43135 }
43136
43137 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43138 d->testing_p))
43139 return true;
43140 }
43141
43142 /* Try the SSE4.1 blend variable merge instructions. */
43143 if (expand_vec_perm_blend (d))
43144 return true;
43145
43146 /* Try one of the AVX vpermil variable permutations. */
43147 if (expand_vec_perm_vpermil (d))
43148 return true;
43149
43150 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43151 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43152 if (expand_vec_perm_pshufb (d))
43153 return true;
43154
43155 /* Try the AVX512F vpermi2 instructions. */
43156 rtx vec[64];
43157 enum machine_mode mode = d->vmode;
43158 if (mode == V8DFmode)
43159 mode = V8DImode;
43160 else if (mode == V16SFmode)
43161 mode = V16SImode;
43162 for (i = 0; i < nelt; ++i)
43163 vec[i] = GEN_INT (d->perm[i]);
43164 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43165 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43166 return true;
43167
43168 return false;
43169 }
43170
43171 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43172 in terms of a pair of pshuflw + pshufhw instructions. */
43173
43174 static bool
43175 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43176 {
43177 unsigned char perm2[MAX_VECT_LEN];
43178 unsigned i;
43179 bool ok;
43180
43181 if (d->vmode != V8HImode || !d->one_operand_p)
43182 return false;
43183
43184 /* The two permutations only operate in 64-bit lanes. */
43185 for (i = 0; i < 4; ++i)
43186 if (d->perm[i] >= 4)
43187 return false;
43188 for (i = 4; i < 8; ++i)
43189 if (d->perm[i] < 4)
43190 return false;
43191
43192 if (d->testing_p)
43193 return true;
43194
43195 /* Emit the pshuflw. */
43196 memcpy (perm2, d->perm, 4);
43197 for (i = 4; i < 8; ++i)
43198 perm2[i] = i;
43199 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43200 gcc_assert (ok);
43201
43202 /* Emit the pshufhw. */
43203 memcpy (perm2 + 4, d->perm + 4, 4);
43204 for (i = 0; i < 4; ++i)
43205 perm2[i] = i;
43206 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43207 gcc_assert (ok);
43208
43209 return true;
43210 }
43211
43212 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43213 the permutation using the SSSE3 palignr instruction. This succeeds
43214 when all of the elements in PERM fit within one vector and we merely
43215 need to shift them down so that a single vector permutation has a
43216 chance to succeed. */
43217
43218 static bool
43219 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43220 {
43221 unsigned i, nelt = d->nelt;
43222 unsigned min, max;
43223 bool in_order, ok;
43224 rtx shift, target;
43225 struct expand_vec_perm_d dcopy;
43226
43227 /* Even with AVX, palignr only operates on 128-bit vectors. */
43228 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43229 return false;
43230
43231 min = nelt, max = 0;
43232 for (i = 0; i < nelt; ++i)
43233 {
43234 unsigned e = d->perm[i];
43235 if (e < min)
43236 min = e;
43237 if (e > max)
43238 max = e;
43239 }
43240 if (min == 0 || max - min >= nelt)
43241 return false;
43242
43243 /* Given that we have SSSE3, we know we'll be able to implement the
43244 single operand permutation after the palignr with pshufb. */
43245 if (d->testing_p)
43246 return true;
43247
43248 dcopy = *d;
43249 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43250 target = gen_reg_rtx (TImode);
43251 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43252 gen_lowpart (TImode, d->op0), shift));
43253
43254 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43255 dcopy.one_operand_p = true;
43256
43257 in_order = true;
43258 for (i = 0; i < nelt; ++i)
43259 {
43260 unsigned e = dcopy.perm[i] - min;
43261 if (e != i)
43262 in_order = false;
43263 dcopy.perm[i] = e;
43264 }
43265
43266 /* Test for the degenerate case where the alignment by itself
43267 produces the desired permutation. */
43268 if (in_order)
43269 {
43270 emit_move_insn (d->target, dcopy.op0);
43271 return true;
43272 }
43273
43274 ok = expand_vec_perm_1 (&dcopy);
43275 gcc_assert (ok);
43276
43277 return ok;
43278 }
43279
43280 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43281 the permutation using the SSE4_1 pblendv instruction. Potentially
43282 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43283
43284 static bool
43285 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43286 {
43287 unsigned i, which, nelt = d->nelt;
43288 struct expand_vec_perm_d dcopy, dcopy1;
43289 enum machine_mode vmode = d->vmode;
43290 bool ok;
43291
43292 /* Use the same checks as in expand_vec_perm_blend, but skipping
43293 AVX and AVX2 as they require more than 2 instructions. */
43294 if (d->one_operand_p)
43295 return false;
43296 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43297 ;
43298 else
43299 return false;
43300
43301 /* Figure out where permutation elements stay not in their
43302 respective lanes. */
43303 for (i = 0, which = 0; i < nelt; ++i)
43304 {
43305 unsigned e = d->perm[i];
43306 if (e != i)
43307 which |= (e < nelt ? 1 : 2);
43308 }
43309 /* We can pblend the part where elements stay not in their
43310 respective lanes only when these elements are all in one
43311 half of a permutation.
43312 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43313 lanes, but both 8 and 9 >= 8
43314 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43315 respective lanes and 8 >= 8, but 2 not. */
43316 if (which != 1 && which != 2)
43317 return false;
43318 if (d->testing_p)
43319 return true;
43320
43321 /* First we apply one operand permutation to the part where
43322 elements stay not in their respective lanes. */
43323 dcopy = *d;
43324 if (which == 2)
43325 dcopy.op0 = dcopy.op1 = d->op1;
43326 else
43327 dcopy.op0 = dcopy.op1 = d->op0;
43328 dcopy.one_operand_p = true;
43329
43330 for (i = 0; i < nelt; ++i)
43331 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43332
43333 ok = expand_vec_perm_1 (&dcopy);
43334 gcc_assert (ok);
43335
43336 /* Next we put permuted elements into their positions. */
43337 dcopy1 = *d;
43338 if (which == 2)
43339 dcopy1.op1 = dcopy.target;
43340 else
43341 dcopy1.op0 = dcopy.target;
43342
43343 for (i = 0; i < nelt; ++i)
43344 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43345
43346 ok = expand_vec_perm_blend (&dcopy1);
43347 gcc_assert (ok);
43348
43349 return true;
43350 }
43351
43352 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43353
43354 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43355 a two vector permutation into a single vector permutation by using
43356 an interleave operation to merge the vectors. */
43357
43358 static bool
43359 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43360 {
43361 struct expand_vec_perm_d dremap, dfinal;
43362 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43363 unsigned HOST_WIDE_INT contents;
43364 unsigned char remap[2 * MAX_VECT_LEN];
43365 rtx_insn *seq;
43366 bool ok, same_halves = false;
43367
43368 if (GET_MODE_SIZE (d->vmode) == 16)
43369 {
43370 if (d->one_operand_p)
43371 return false;
43372 }
43373 else if (GET_MODE_SIZE (d->vmode) == 32)
43374 {
43375 if (!TARGET_AVX)
43376 return false;
43377 /* For 32-byte modes allow even d->one_operand_p.
43378 The lack of cross-lane shuffling in some instructions
43379 might prevent a single insn shuffle. */
43380 dfinal = *d;
43381 dfinal.testing_p = true;
43382 /* If expand_vec_perm_interleave3 can expand this into
43383 a 3 insn sequence, give up and let it be expanded as
43384 3 insn sequence. While that is one insn longer,
43385 it doesn't need a memory operand and in the common
43386 case that both interleave low and high permutations
43387 with the same operands are adjacent needs 4 insns
43388 for both after CSE. */
43389 if (expand_vec_perm_interleave3 (&dfinal))
43390 return false;
43391 }
43392 else
43393 return false;
43394
43395 /* Examine from whence the elements come. */
43396 contents = 0;
43397 for (i = 0; i < nelt; ++i)
43398 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43399
43400 memset (remap, 0xff, sizeof (remap));
43401 dremap = *d;
43402
43403 if (GET_MODE_SIZE (d->vmode) == 16)
43404 {
43405 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43406
43407 /* Split the two input vectors into 4 halves. */
43408 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43409 h2 = h1 << nelt2;
43410 h3 = h2 << nelt2;
43411 h4 = h3 << nelt2;
43412
43413 /* If the elements from the low halves use interleave low, and similarly
43414 for interleave high. If the elements are from mis-matched halves, we
43415 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43416 if ((contents & (h1 | h3)) == contents)
43417 {
43418 /* punpckl* */
43419 for (i = 0; i < nelt2; ++i)
43420 {
43421 remap[i] = i * 2;
43422 remap[i + nelt] = i * 2 + 1;
43423 dremap.perm[i * 2] = i;
43424 dremap.perm[i * 2 + 1] = i + nelt;
43425 }
43426 if (!TARGET_SSE2 && d->vmode == V4SImode)
43427 dremap.vmode = V4SFmode;
43428 }
43429 else if ((contents & (h2 | h4)) == contents)
43430 {
43431 /* punpckh* */
43432 for (i = 0; i < nelt2; ++i)
43433 {
43434 remap[i + nelt2] = i * 2;
43435 remap[i + nelt + nelt2] = i * 2 + 1;
43436 dremap.perm[i * 2] = i + nelt2;
43437 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43438 }
43439 if (!TARGET_SSE2 && d->vmode == V4SImode)
43440 dremap.vmode = V4SFmode;
43441 }
43442 else if ((contents & (h1 | h4)) == contents)
43443 {
43444 /* shufps */
43445 for (i = 0; i < nelt2; ++i)
43446 {
43447 remap[i] = i;
43448 remap[i + nelt + nelt2] = i + nelt2;
43449 dremap.perm[i] = i;
43450 dremap.perm[i + nelt2] = i + nelt + nelt2;
43451 }
43452 if (nelt != 4)
43453 {
43454 /* shufpd */
43455 dremap.vmode = V2DImode;
43456 dremap.nelt = 2;
43457 dremap.perm[0] = 0;
43458 dremap.perm[1] = 3;
43459 }
43460 }
43461 else if ((contents & (h2 | h3)) == contents)
43462 {
43463 /* shufps */
43464 for (i = 0; i < nelt2; ++i)
43465 {
43466 remap[i + nelt2] = i;
43467 remap[i + nelt] = i + nelt2;
43468 dremap.perm[i] = i + nelt2;
43469 dremap.perm[i + nelt2] = i + nelt;
43470 }
43471 if (nelt != 4)
43472 {
43473 /* shufpd */
43474 dremap.vmode = V2DImode;
43475 dremap.nelt = 2;
43476 dremap.perm[0] = 1;
43477 dremap.perm[1] = 2;
43478 }
43479 }
43480 else
43481 return false;
43482 }
43483 else
43484 {
43485 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43486 unsigned HOST_WIDE_INT q[8];
43487 unsigned int nonzero_halves[4];
43488
43489 /* Split the two input vectors into 8 quarters. */
43490 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43491 for (i = 1; i < 8; ++i)
43492 q[i] = q[0] << (nelt4 * i);
43493 for (i = 0; i < 4; ++i)
43494 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43495 {
43496 nonzero_halves[nzcnt] = i;
43497 ++nzcnt;
43498 }
43499
43500 if (nzcnt == 1)
43501 {
43502 gcc_assert (d->one_operand_p);
43503 nonzero_halves[1] = nonzero_halves[0];
43504 same_halves = true;
43505 }
43506 else if (d->one_operand_p)
43507 {
43508 gcc_assert (nonzero_halves[0] == 0);
43509 gcc_assert (nonzero_halves[1] == 1);
43510 }
43511
43512 if (nzcnt <= 2)
43513 {
43514 if (d->perm[0] / nelt2 == nonzero_halves[1])
43515 {
43516 /* Attempt to increase the likelihood that dfinal
43517 shuffle will be intra-lane. */
43518 char tmph = nonzero_halves[0];
43519 nonzero_halves[0] = nonzero_halves[1];
43520 nonzero_halves[1] = tmph;
43521 }
43522
43523 /* vperm2f128 or vperm2i128. */
43524 for (i = 0; i < nelt2; ++i)
43525 {
43526 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43527 remap[i + nonzero_halves[0] * nelt2] = i;
43528 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43529 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43530 }
43531
43532 if (d->vmode != V8SFmode
43533 && d->vmode != V4DFmode
43534 && d->vmode != V8SImode)
43535 {
43536 dremap.vmode = V8SImode;
43537 dremap.nelt = 8;
43538 for (i = 0; i < 4; ++i)
43539 {
43540 dremap.perm[i] = i + nonzero_halves[0] * 4;
43541 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43542 }
43543 }
43544 }
43545 else if (d->one_operand_p)
43546 return false;
43547 else if (TARGET_AVX2
43548 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43549 {
43550 /* vpunpckl* */
43551 for (i = 0; i < nelt4; ++i)
43552 {
43553 remap[i] = i * 2;
43554 remap[i + nelt] = i * 2 + 1;
43555 remap[i + nelt2] = i * 2 + nelt2;
43556 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43557 dremap.perm[i * 2] = i;
43558 dremap.perm[i * 2 + 1] = i + nelt;
43559 dremap.perm[i * 2 + nelt2] = i + nelt2;
43560 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43561 }
43562 }
43563 else if (TARGET_AVX2
43564 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43565 {
43566 /* vpunpckh* */
43567 for (i = 0; i < nelt4; ++i)
43568 {
43569 remap[i + nelt4] = i * 2;
43570 remap[i + nelt + nelt4] = i * 2 + 1;
43571 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43572 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43573 dremap.perm[i * 2] = i + nelt4;
43574 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43575 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43576 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43577 }
43578 }
43579 else
43580 return false;
43581 }
43582
43583 /* Use the remapping array set up above to move the elements from their
43584 swizzled locations into their final destinations. */
43585 dfinal = *d;
43586 for (i = 0; i < nelt; ++i)
43587 {
43588 unsigned e = remap[d->perm[i]];
43589 gcc_assert (e < nelt);
43590 /* If same_halves is true, both halves of the remapped vector are the
43591 same. Avoid cross-lane accesses if possible. */
43592 if (same_halves && i >= nelt2)
43593 {
43594 gcc_assert (e < nelt2);
43595 dfinal.perm[i] = e + nelt2;
43596 }
43597 else
43598 dfinal.perm[i] = e;
43599 }
43600 if (!d->testing_p)
43601 {
43602 dremap.target = gen_reg_rtx (dremap.vmode);
43603 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43604 }
43605 dfinal.op1 = dfinal.op0;
43606 dfinal.one_operand_p = true;
43607
43608 /* Test if the final remap can be done with a single insn. For V4SFmode or
43609 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43610 start_sequence ();
43611 ok = expand_vec_perm_1 (&dfinal);
43612 seq = get_insns ();
43613 end_sequence ();
43614
43615 if (!ok)
43616 return false;
43617
43618 if (d->testing_p)
43619 return true;
43620
43621 if (dremap.vmode != dfinal.vmode)
43622 {
43623 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43624 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43625 }
43626
43627 ok = expand_vec_perm_1 (&dremap);
43628 gcc_assert (ok);
43629
43630 emit_insn (seq);
43631 return true;
43632 }
43633
43634 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43635 a single vector cross-lane permutation into vpermq followed
43636 by any of the single insn permutations. */
43637
43638 static bool
43639 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43640 {
43641 struct expand_vec_perm_d dremap, dfinal;
43642 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43643 unsigned contents[2];
43644 bool ok;
43645
43646 if (!(TARGET_AVX2
43647 && (d->vmode == V32QImode || d->vmode == V16HImode)
43648 && d->one_operand_p))
43649 return false;
43650
43651 contents[0] = 0;
43652 contents[1] = 0;
43653 for (i = 0; i < nelt2; ++i)
43654 {
43655 contents[0] |= 1u << (d->perm[i] / nelt4);
43656 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43657 }
43658
43659 for (i = 0; i < 2; ++i)
43660 {
43661 unsigned int cnt = 0;
43662 for (j = 0; j < 4; ++j)
43663 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43664 return false;
43665 }
43666
43667 if (d->testing_p)
43668 return true;
43669
43670 dremap = *d;
43671 dremap.vmode = V4DImode;
43672 dremap.nelt = 4;
43673 dremap.target = gen_reg_rtx (V4DImode);
43674 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43675 dremap.op1 = dremap.op0;
43676 dremap.one_operand_p = true;
43677 for (i = 0; i < 2; ++i)
43678 {
43679 unsigned int cnt = 0;
43680 for (j = 0; j < 4; ++j)
43681 if ((contents[i] & (1u << j)) != 0)
43682 dremap.perm[2 * i + cnt++] = j;
43683 for (; cnt < 2; ++cnt)
43684 dremap.perm[2 * i + cnt] = 0;
43685 }
43686
43687 dfinal = *d;
43688 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43689 dfinal.op1 = dfinal.op0;
43690 dfinal.one_operand_p = true;
43691 for (i = 0, j = 0; i < nelt; ++i)
43692 {
43693 if (i == nelt2)
43694 j = 2;
43695 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43696 if ((d->perm[i] / nelt4) == dremap.perm[j])
43697 ;
43698 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43699 dfinal.perm[i] |= nelt4;
43700 else
43701 gcc_unreachable ();
43702 }
43703
43704 ok = expand_vec_perm_1 (&dremap);
43705 gcc_assert (ok);
43706
43707 ok = expand_vec_perm_1 (&dfinal);
43708 gcc_assert (ok);
43709
43710 return true;
43711 }
43712
43713 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43714 a vector permutation using two instructions, vperm2f128 resp.
43715 vperm2i128 followed by any single in-lane permutation. */
43716
43717 static bool
43718 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43719 {
43720 struct expand_vec_perm_d dfirst, dsecond;
43721 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43722 bool ok;
43723
43724 if (!TARGET_AVX
43725 || GET_MODE_SIZE (d->vmode) != 32
43726 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43727 return false;
43728
43729 dsecond = *d;
43730 dsecond.one_operand_p = false;
43731 dsecond.testing_p = true;
43732
43733 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43734 immediate. For perm < 16 the second permutation uses
43735 d->op0 as first operand, for perm >= 16 it uses d->op1
43736 as first operand. The second operand is the result of
43737 vperm2[fi]128. */
43738 for (perm = 0; perm < 32; perm++)
43739 {
43740 /* Ignore permutations which do not move anything cross-lane. */
43741 if (perm < 16)
43742 {
43743 /* The second shuffle for e.g. V4DFmode has
43744 0123 and ABCD operands.
43745 Ignore AB23, as 23 is already in the second lane
43746 of the first operand. */
43747 if ((perm & 0xc) == (1 << 2)) continue;
43748 /* And 01CD, as 01 is in the first lane of the first
43749 operand. */
43750 if ((perm & 3) == 0) continue;
43751 /* And 4567, as then the vperm2[fi]128 doesn't change
43752 anything on the original 4567 second operand. */
43753 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43754 }
43755 else
43756 {
43757 /* The second shuffle for e.g. V4DFmode has
43758 4567 and ABCD operands.
43759 Ignore AB67, as 67 is already in the second lane
43760 of the first operand. */
43761 if ((perm & 0xc) == (3 << 2)) continue;
43762 /* And 45CD, as 45 is in the first lane of the first
43763 operand. */
43764 if ((perm & 3) == 2) continue;
43765 /* And 0123, as then the vperm2[fi]128 doesn't change
43766 anything on the original 0123 first operand. */
43767 if ((perm & 0xf) == (1 << 2)) continue;
43768 }
43769
43770 for (i = 0; i < nelt; i++)
43771 {
43772 j = d->perm[i] / nelt2;
43773 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43774 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43775 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43776 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43777 else
43778 break;
43779 }
43780
43781 if (i == nelt)
43782 {
43783 start_sequence ();
43784 ok = expand_vec_perm_1 (&dsecond);
43785 end_sequence ();
43786 }
43787 else
43788 ok = false;
43789
43790 if (ok)
43791 {
43792 if (d->testing_p)
43793 return true;
43794
43795 /* Found a usable second shuffle. dfirst will be
43796 vperm2f128 on d->op0 and d->op1. */
43797 dsecond.testing_p = false;
43798 dfirst = *d;
43799 dfirst.target = gen_reg_rtx (d->vmode);
43800 for (i = 0; i < nelt; i++)
43801 dfirst.perm[i] = (i & (nelt2 - 1))
43802 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43803
43804 ok = expand_vec_perm_1 (&dfirst);
43805 gcc_assert (ok);
43806
43807 /* And dsecond is some single insn shuffle, taking
43808 d->op0 and result of vperm2f128 (if perm < 16) or
43809 d->op1 and result of vperm2f128 (otherwise). */
43810 dsecond.op1 = dfirst.target;
43811 if (perm >= 16)
43812 dsecond.op0 = dfirst.op1;
43813
43814 ok = expand_vec_perm_1 (&dsecond);
43815 gcc_assert (ok);
43816
43817 return true;
43818 }
43819
43820 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43821 if (d->one_operand_p)
43822 return false;
43823 }
43824
43825 return false;
43826 }
43827
43828 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43829 a two vector permutation using 2 intra-lane interleave insns
43830 and cross-lane shuffle for 32-byte vectors. */
43831
43832 static bool
43833 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43834 {
43835 unsigned i, nelt;
43836 rtx (*gen) (rtx, rtx, rtx);
43837
43838 if (d->one_operand_p)
43839 return false;
43840 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43841 ;
43842 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43843 ;
43844 else
43845 return false;
43846
43847 nelt = d->nelt;
43848 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43849 return false;
43850 for (i = 0; i < nelt; i += 2)
43851 if (d->perm[i] != d->perm[0] + i / 2
43852 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43853 return false;
43854
43855 if (d->testing_p)
43856 return true;
43857
43858 switch (d->vmode)
43859 {
43860 case V32QImode:
43861 if (d->perm[0])
43862 gen = gen_vec_interleave_highv32qi;
43863 else
43864 gen = gen_vec_interleave_lowv32qi;
43865 break;
43866 case V16HImode:
43867 if (d->perm[0])
43868 gen = gen_vec_interleave_highv16hi;
43869 else
43870 gen = gen_vec_interleave_lowv16hi;
43871 break;
43872 case V8SImode:
43873 if (d->perm[0])
43874 gen = gen_vec_interleave_highv8si;
43875 else
43876 gen = gen_vec_interleave_lowv8si;
43877 break;
43878 case V4DImode:
43879 if (d->perm[0])
43880 gen = gen_vec_interleave_highv4di;
43881 else
43882 gen = gen_vec_interleave_lowv4di;
43883 break;
43884 case V8SFmode:
43885 if (d->perm[0])
43886 gen = gen_vec_interleave_highv8sf;
43887 else
43888 gen = gen_vec_interleave_lowv8sf;
43889 break;
43890 case V4DFmode:
43891 if (d->perm[0])
43892 gen = gen_vec_interleave_highv4df;
43893 else
43894 gen = gen_vec_interleave_lowv4df;
43895 break;
43896 default:
43897 gcc_unreachable ();
43898 }
43899
43900 emit_insn (gen (d->target, d->op0, d->op1));
43901 return true;
43902 }
43903
43904 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43905 a single vector permutation using a single intra-lane vector
43906 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43907 the non-swapped and swapped vectors together. */
43908
43909 static bool
43910 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43911 {
43912 struct expand_vec_perm_d dfirst, dsecond;
43913 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43914 rtx_insn *seq;
43915 bool ok;
43916 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43917
43918 if (!TARGET_AVX
43919 || TARGET_AVX2
43920 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43921 || !d->one_operand_p)
43922 return false;
43923
43924 dfirst = *d;
43925 for (i = 0; i < nelt; i++)
43926 dfirst.perm[i] = 0xff;
43927 for (i = 0, msk = 0; i < nelt; i++)
43928 {
43929 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43930 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43931 return false;
43932 dfirst.perm[j] = d->perm[i];
43933 if (j != i)
43934 msk |= (1 << i);
43935 }
43936 for (i = 0; i < nelt; i++)
43937 if (dfirst.perm[i] == 0xff)
43938 dfirst.perm[i] = i;
43939
43940 if (!d->testing_p)
43941 dfirst.target = gen_reg_rtx (dfirst.vmode);
43942
43943 start_sequence ();
43944 ok = expand_vec_perm_1 (&dfirst);
43945 seq = get_insns ();
43946 end_sequence ();
43947
43948 if (!ok)
43949 return false;
43950
43951 if (d->testing_p)
43952 return true;
43953
43954 emit_insn (seq);
43955
43956 dsecond = *d;
43957 dsecond.op0 = dfirst.target;
43958 dsecond.op1 = dfirst.target;
43959 dsecond.one_operand_p = true;
43960 dsecond.target = gen_reg_rtx (dsecond.vmode);
43961 for (i = 0; i < nelt; i++)
43962 dsecond.perm[i] = i ^ nelt2;
43963
43964 ok = expand_vec_perm_1 (&dsecond);
43965 gcc_assert (ok);
43966
43967 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43968 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43969 return true;
43970 }
43971
43972 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43973 permutation using two vperm2f128, followed by a vshufpd insn blending
43974 the two vectors together. */
43975
43976 static bool
43977 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43978 {
43979 struct expand_vec_perm_d dfirst, dsecond, dthird;
43980 bool ok;
43981
43982 if (!TARGET_AVX || (d->vmode != V4DFmode))
43983 return false;
43984
43985 if (d->testing_p)
43986 return true;
43987
43988 dfirst = *d;
43989 dsecond = *d;
43990 dthird = *d;
43991
43992 dfirst.perm[0] = (d->perm[0] & ~1);
43993 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43994 dfirst.perm[2] = (d->perm[2] & ~1);
43995 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43996 dsecond.perm[0] = (d->perm[1] & ~1);
43997 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43998 dsecond.perm[2] = (d->perm[3] & ~1);
43999 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44000 dthird.perm[0] = (d->perm[0] % 2);
44001 dthird.perm[1] = (d->perm[1] % 2) + 4;
44002 dthird.perm[2] = (d->perm[2] % 2) + 2;
44003 dthird.perm[3] = (d->perm[3] % 2) + 6;
44004
44005 dfirst.target = gen_reg_rtx (dfirst.vmode);
44006 dsecond.target = gen_reg_rtx (dsecond.vmode);
44007 dthird.op0 = dfirst.target;
44008 dthird.op1 = dsecond.target;
44009 dthird.one_operand_p = false;
44010
44011 canonicalize_perm (&dfirst);
44012 canonicalize_perm (&dsecond);
44013
44014 ok = expand_vec_perm_1 (&dfirst)
44015 && expand_vec_perm_1 (&dsecond)
44016 && expand_vec_perm_1 (&dthird);
44017
44018 gcc_assert (ok);
44019
44020 return true;
44021 }
44022
44023 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44024 permutation with two pshufb insns and an ior. We should have already
44025 failed all two instruction sequences. */
44026
44027 static bool
44028 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44029 {
44030 rtx rperm[2][16], vperm, l, h, op, m128;
44031 unsigned int i, nelt, eltsz;
44032
44033 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44034 return false;
44035 gcc_assert (!d->one_operand_p);
44036
44037 if (d->testing_p)
44038 return true;
44039
44040 nelt = d->nelt;
44041 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44042
44043 /* Generate two permutation masks. If the required element is within
44044 the given vector it is shuffled into the proper lane. If the required
44045 element is in the other vector, force a zero into the lane by setting
44046 bit 7 in the permutation mask. */
44047 m128 = GEN_INT (-128);
44048 for (i = 0; i < nelt; ++i)
44049 {
44050 unsigned j, e = d->perm[i];
44051 unsigned which = (e >= nelt);
44052 if (e >= nelt)
44053 e -= nelt;
44054
44055 for (j = 0; j < eltsz; ++j)
44056 {
44057 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44058 rperm[1-which][i*eltsz + j] = m128;
44059 }
44060 }
44061
44062 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44063 vperm = force_reg (V16QImode, vperm);
44064
44065 l = gen_reg_rtx (V16QImode);
44066 op = gen_lowpart (V16QImode, d->op0);
44067 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44068
44069 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44070 vperm = force_reg (V16QImode, vperm);
44071
44072 h = gen_reg_rtx (V16QImode);
44073 op = gen_lowpart (V16QImode, d->op1);
44074 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44075
44076 op = d->target;
44077 if (d->vmode != V16QImode)
44078 op = gen_reg_rtx (V16QImode);
44079 emit_insn (gen_iorv16qi3 (op, l, h));
44080 if (op != d->target)
44081 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44082
44083 return true;
44084 }
44085
44086 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44087 with two vpshufb insns, vpermq and vpor. We should have already failed
44088 all two or three instruction sequences. */
44089
44090 static bool
44091 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44092 {
44093 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44094 unsigned int i, nelt, eltsz;
44095
44096 if (!TARGET_AVX2
44097 || !d->one_operand_p
44098 || (d->vmode != V32QImode && d->vmode != V16HImode))
44099 return false;
44100
44101 if (d->testing_p)
44102 return true;
44103
44104 nelt = d->nelt;
44105 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44106
44107 /* Generate two permutation masks. If the required element is within
44108 the same lane, it is shuffled in. If the required element from the
44109 other lane, force a zero by setting bit 7 in the permutation mask.
44110 In the other mask the mask has non-negative elements if element
44111 is requested from the other lane, but also moved to the other lane,
44112 so that the result of vpshufb can have the two V2TImode halves
44113 swapped. */
44114 m128 = GEN_INT (-128);
44115 for (i = 0; i < nelt; ++i)
44116 {
44117 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44118 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44119
44120 for (j = 0; j < eltsz; ++j)
44121 {
44122 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44123 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44124 }
44125 }
44126
44127 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44128 vperm = force_reg (V32QImode, vperm);
44129
44130 h = gen_reg_rtx (V32QImode);
44131 op = gen_lowpart (V32QImode, d->op0);
44132 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44133
44134 /* Swap the 128-byte lanes of h into hp. */
44135 hp = gen_reg_rtx (V4DImode);
44136 op = gen_lowpart (V4DImode, h);
44137 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44138 const1_rtx));
44139
44140 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44141 vperm = force_reg (V32QImode, vperm);
44142
44143 l = gen_reg_rtx (V32QImode);
44144 op = gen_lowpart (V32QImode, d->op0);
44145 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44146
44147 op = d->target;
44148 if (d->vmode != V32QImode)
44149 op = gen_reg_rtx (V32QImode);
44150 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44151 if (op != d->target)
44152 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44153
44154 return true;
44155 }
44156
44157 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44158 and extract-odd permutations of two V32QImode and V16QImode operand
44159 with two vpshufb insns, vpor and vpermq. We should have already
44160 failed all two or three instruction sequences. */
44161
44162 static bool
44163 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44164 {
44165 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44166 unsigned int i, nelt, eltsz;
44167
44168 if (!TARGET_AVX2
44169 || d->one_operand_p
44170 || (d->vmode != V32QImode && d->vmode != V16HImode))
44171 return false;
44172
44173 for (i = 0; i < d->nelt; ++i)
44174 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44175 return false;
44176
44177 if (d->testing_p)
44178 return true;
44179
44180 nelt = d->nelt;
44181 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44182
44183 /* Generate two permutation masks. In the first permutation mask
44184 the first quarter will contain indexes for the first half
44185 of the op0, the second quarter will contain bit 7 set, third quarter
44186 will contain indexes for the second half of the op0 and the
44187 last quarter bit 7 set. In the second permutation mask
44188 the first quarter will contain bit 7 set, the second quarter
44189 indexes for the first half of the op1, the third quarter bit 7 set
44190 and last quarter indexes for the second half of the op1.
44191 I.e. the first mask e.g. for V32QImode extract even will be:
44192 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44193 (all values masked with 0xf except for -128) and second mask
44194 for extract even will be
44195 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44196 m128 = GEN_INT (-128);
44197 for (i = 0; i < nelt; ++i)
44198 {
44199 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44200 unsigned which = d->perm[i] >= nelt;
44201 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44202
44203 for (j = 0; j < eltsz; ++j)
44204 {
44205 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44206 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44207 }
44208 }
44209
44210 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44211 vperm = force_reg (V32QImode, vperm);
44212
44213 l = gen_reg_rtx (V32QImode);
44214 op = gen_lowpart (V32QImode, d->op0);
44215 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44216
44217 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44218 vperm = force_reg (V32QImode, vperm);
44219
44220 h = gen_reg_rtx (V32QImode);
44221 op = gen_lowpart (V32QImode, d->op1);
44222 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44223
44224 ior = gen_reg_rtx (V32QImode);
44225 emit_insn (gen_iorv32qi3 (ior, l, h));
44226
44227 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44228 op = gen_reg_rtx (V4DImode);
44229 ior = gen_lowpart (V4DImode, ior);
44230 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44231 const1_rtx, GEN_INT (3)));
44232 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44233
44234 return true;
44235 }
44236
44237 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44238 and extract-odd permutations. */
44239
44240 static bool
44241 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44242 {
44243 rtx t1, t2, t3, t4, t5;
44244
44245 switch (d->vmode)
44246 {
44247 case V4DFmode:
44248 if (d->testing_p)
44249 break;
44250 t1 = gen_reg_rtx (V4DFmode);
44251 t2 = gen_reg_rtx (V4DFmode);
44252
44253 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44254 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44255 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44256
44257 /* Now an unpck[lh]pd will produce the result required. */
44258 if (odd)
44259 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44260 else
44261 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44262 emit_insn (t3);
44263 break;
44264
44265 case V8SFmode:
44266 {
44267 int mask = odd ? 0xdd : 0x88;
44268
44269 if (d->testing_p)
44270 break;
44271 t1 = gen_reg_rtx (V8SFmode);
44272 t2 = gen_reg_rtx (V8SFmode);
44273 t3 = gen_reg_rtx (V8SFmode);
44274
44275 /* Shuffle within the 128-bit lanes to produce:
44276 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44277 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44278 GEN_INT (mask)));
44279
44280 /* Shuffle the lanes around to produce:
44281 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44282 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44283 GEN_INT (0x3)));
44284
44285 /* Shuffle within the 128-bit lanes to produce:
44286 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44287 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44288
44289 /* Shuffle within the 128-bit lanes to produce:
44290 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44291 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44292
44293 /* Shuffle the lanes around to produce:
44294 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44295 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44296 GEN_INT (0x20)));
44297 }
44298 break;
44299
44300 case V2DFmode:
44301 case V4SFmode:
44302 case V2DImode:
44303 case V4SImode:
44304 /* These are always directly implementable by expand_vec_perm_1. */
44305 gcc_unreachable ();
44306
44307 case V8HImode:
44308 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44309 return expand_vec_perm_pshufb2 (d);
44310 else
44311 {
44312 if (d->testing_p)
44313 break;
44314 /* We need 2*log2(N)-1 operations to achieve odd/even
44315 with interleave. */
44316 t1 = gen_reg_rtx (V8HImode);
44317 t2 = gen_reg_rtx (V8HImode);
44318 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44319 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44320 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44321 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44322 if (odd)
44323 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44324 else
44325 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44326 emit_insn (t3);
44327 }
44328 break;
44329
44330 case V16QImode:
44331 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44332 return expand_vec_perm_pshufb2 (d);
44333 else
44334 {
44335 if (d->testing_p)
44336 break;
44337 t1 = gen_reg_rtx (V16QImode);
44338 t2 = gen_reg_rtx (V16QImode);
44339 t3 = gen_reg_rtx (V16QImode);
44340 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44341 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44342 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44343 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44344 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44345 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44346 if (odd)
44347 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44348 else
44349 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44350 emit_insn (t3);
44351 }
44352 break;
44353
44354 case V16HImode:
44355 case V32QImode:
44356 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44357
44358 case V4DImode:
44359 if (!TARGET_AVX2)
44360 {
44361 struct expand_vec_perm_d d_copy = *d;
44362 d_copy.vmode = V4DFmode;
44363 if (d->testing_p)
44364 d_copy.target = gen_lowpart (V4DFmode, d->target);
44365 else
44366 d_copy.target = gen_reg_rtx (V4DFmode);
44367 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44368 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44369 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44370 {
44371 if (!d->testing_p)
44372 emit_move_insn (d->target,
44373 gen_lowpart (V4DImode, d_copy.target));
44374 return true;
44375 }
44376 return false;
44377 }
44378
44379 if (d->testing_p)
44380 break;
44381
44382 t1 = gen_reg_rtx (V4DImode);
44383 t2 = gen_reg_rtx (V4DImode);
44384
44385 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44386 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44387 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44388
44389 /* Now an vpunpck[lh]qdq will produce the result required. */
44390 if (odd)
44391 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44392 else
44393 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44394 emit_insn (t3);
44395 break;
44396
44397 case V8SImode:
44398 if (!TARGET_AVX2)
44399 {
44400 struct expand_vec_perm_d d_copy = *d;
44401 d_copy.vmode = V8SFmode;
44402 if (d->testing_p)
44403 d_copy.target = gen_lowpart (V8SFmode, d->target);
44404 else
44405 d_copy.target = gen_reg_rtx (V8SFmode);
44406 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44407 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44408 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44409 {
44410 if (!d->testing_p)
44411 emit_move_insn (d->target,
44412 gen_lowpart (V8SImode, d_copy.target));
44413 return true;
44414 }
44415 return false;
44416 }
44417
44418 if (d->testing_p)
44419 break;
44420
44421 t1 = gen_reg_rtx (V8SImode);
44422 t2 = gen_reg_rtx (V8SImode);
44423 t3 = gen_reg_rtx (V4DImode);
44424 t4 = gen_reg_rtx (V4DImode);
44425 t5 = gen_reg_rtx (V4DImode);
44426
44427 /* Shuffle the lanes around into
44428 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44429 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44430 gen_lowpart (V4DImode, d->op1),
44431 GEN_INT (0x20)));
44432 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44433 gen_lowpart (V4DImode, d->op1),
44434 GEN_INT (0x31)));
44435
44436 /* Swap the 2nd and 3rd position in each lane into
44437 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44438 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44439 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44440 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44441 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44442
44443 /* Now an vpunpck[lh]qdq will produce
44444 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44445 if (odd)
44446 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44447 gen_lowpart (V4DImode, t2));
44448 else
44449 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44450 gen_lowpart (V4DImode, t2));
44451 emit_insn (t3);
44452 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44453 break;
44454
44455 default:
44456 gcc_unreachable ();
44457 }
44458
44459 return true;
44460 }
44461
44462 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44463 extract-even and extract-odd permutations. */
44464
44465 static bool
44466 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44467 {
44468 unsigned i, odd, nelt = d->nelt;
44469
44470 odd = d->perm[0];
44471 if (odd != 0 && odd != 1)
44472 return false;
44473
44474 for (i = 1; i < nelt; ++i)
44475 if (d->perm[i] != 2 * i + odd)
44476 return false;
44477
44478 return expand_vec_perm_even_odd_1 (d, odd);
44479 }
44480
44481 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44482 permutations. We assume that expand_vec_perm_1 has already failed. */
44483
44484 static bool
44485 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44486 {
44487 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44488 enum machine_mode vmode = d->vmode;
44489 unsigned char perm2[4];
44490 rtx op0 = d->op0, dest;
44491 bool ok;
44492
44493 switch (vmode)
44494 {
44495 case V4DFmode:
44496 case V8SFmode:
44497 /* These are special-cased in sse.md so that we can optionally
44498 use the vbroadcast instruction. They expand to two insns
44499 if the input happens to be in a register. */
44500 gcc_unreachable ();
44501
44502 case V2DFmode:
44503 case V2DImode:
44504 case V4SFmode:
44505 case V4SImode:
44506 /* These are always implementable using standard shuffle patterns. */
44507 gcc_unreachable ();
44508
44509 case V8HImode:
44510 case V16QImode:
44511 /* These can be implemented via interleave. We save one insn by
44512 stopping once we have promoted to V4SImode and then use pshufd. */
44513 if (d->testing_p)
44514 return true;
44515 do
44516 {
44517 rtx dest;
44518 rtx (*gen) (rtx, rtx, rtx)
44519 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44520 : gen_vec_interleave_lowv8hi;
44521
44522 if (elt >= nelt2)
44523 {
44524 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44525 : gen_vec_interleave_highv8hi;
44526 elt -= nelt2;
44527 }
44528 nelt2 /= 2;
44529
44530 dest = gen_reg_rtx (vmode);
44531 emit_insn (gen (dest, op0, op0));
44532 vmode = get_mode_wider_vector (vmode);
44533 op0 = gen_lowpart (vmode, dest);
44534 }
44535 while (vmode != V4SImode);
44536
44537 memset (perm2, elt, 4);
44538 dest = gen_reg_rtx (V4SImode);
44539 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44540 gcc_assert (ok);
44541 if (!d->testing_p)
44542 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44543 return true;
44544
44545 case V32QImode:
44546 case V16HImode:
44547 case V8SImode:
44548 case V4DImode:
44549 /* For AVX2 broadcasts of the first element vpbroadcast* or
44550 vpermq should be used by expand_vec_perm_1. */
44551 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44552 return false;
44553
44554 default:
44555 gcc_unreachable ();
44556 }
44557 }
44558
44559 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44560 broadcast permutations. */
44561
44562 static bool
44563 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44564 {
44565 unsigned i, elt, nelt = d->nelt;
44566
44567 if (!d->one_operand_p)
44568 return false;
44569
44570 elt = d->perm[0];
44571 for (i = 1; i < nelt; ++i)
44572 if (d->perm[i] != elt)
44573 return false;
44574
44575 return expand_vec_perm_broadcast_1 (d);
44576 }
44577
44578 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44579 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44580 all the shorter instruction sequences. */
44581
44582 static bool
44583 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44584 {
44585 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44586 unsigned int i, nelt, eltsz;
44587 bool used[4];
44588
44589 if (!TARGET_AVX2
44590 || d->one_operand_p
44591 || (d->vmode != V32QImode && d->vmode != V16HImode))
44592 return false;
44593
44594 if (d->testing_p)
44595 return true;
44596
44597 nelt = d->nelt;
44598 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44599
44600 /* Generate 4 permutation masks. If the required element is within
44601 the same lane, it is shuffled in. If the required element from the
44602 other lane, force a zero by setting bit 7 in the permutation mask.
44603 In the other mask the mask has non-negative elements if element
44604 is requested from the other lane, but also moved to the other lane,
44605 so that the result of vpshufb can have the two V2TImode halves
44606 swapped. */
44607 m128 = GEN_INT (-128);
44608 for (i = 0; i < 32; ++i)
44609 {
44610 rperm[0][i] = m128;
44611 rperm[1][i] = m128;
44612 rperm[2][i] = m128;
44613 rperm[3][i] = m128;
44614 }
44615 used[0] = false;
44616 used[1] = false;
44617 used[2] = false;
44618 used[3] = false;
44619 for (i = 0; i < nelt; ++i)
44620 {
44621 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44622 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44623 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44624
44625 for (j = 0; j < eltsz; ++j)
44626 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44627 used[which] = true;
44628 }
44629
44630 for (i = 0; i < 2; ++i)
44631 {
44632 if (!used[2 * i + 1])
44633 {
44634 h[i] = NULL_RTX;
44635 continue;
44636 }
44637 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44638 gen_rtvec_v (32, rperm[2 * i + 1]));
44639 vperm = force_reg (V32QImode, vperm);
44640 h[i] = gen_reg_rtx (V32QImode);
44641 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44642 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44643 }
44644
44645 /* Swap the 128-byte lanes of h[X]. */
44646 for (i = 0; i < 2; ++i)
44647 {
44648 if (h[i] == NULL_RTX)
44649 continue;
44650 op = gen_reg_rtx (V4DImode);
44651 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44652 const2_rtx, GEN_INT (3), const0_rtx,
44653 const1_rtx));
44654 h[i] = gen_lowpart (V32QImode, op);
44655 }
44656
44657 for (i = 0; i < 2; ++i)
44658 {
44659 if (!used[2 * i])
44660 {
44661 l[i] = NULL_RTX;
44662 continue;
44663 }
44664 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44665 vperm = force_reg (V32QImode, vperm);
44666 l[i] = gen_reg_rtx (V32QImode);
44667 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44668 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44669 }
44670
44671 for (i = 0; i < 2; ++i)
44672 {
44673 if (h[i] && l[i])
44674 {
44675 op = gen_reg_rtx (V32QImode);
44676 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44677 l[i] = op;
44678 }
44679 else if (h[i])
44680 l[i] = h[i];
44681 }
44682
44683 gcc_assert (l[0] && l[1]);
44684 op = d->target;
44685 if (d->vmode != V32QImode)
44686 op = gen_reg_rtx (V32QImode);
44687 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44688 if (op != d->target)
44689 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44690 return true;
44691 }
44692
44693 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44694 With all of the interface bits taken care of, perform the expansion
44695 in D and return true on success. */
44696
44697 static bool
44698 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44699 {
44700 /* Try a single instruction expansion. */
44701 if (expand_vec_perm_1 (d))
44702 return true;
44703
44704 /* Try sequences of two instructions. */
44705
44706 if (expand_vec_perm_pshuflw_pshufhw (d))
44707 return true;
44708
44709 if (expand_vec_perm_palignr (d))
44710 return true;
44711
44712 if (expand_vec_perm_interleave2 (d))
44713 return true;
44714
44715 if (expand_vec_perm_broadcast (d))
44716 return true;
44717
44718 if (expand_vec_perm_vpermq_perm_1 (d))
44719 return true;
44720
44721 if (expand_vec_perm_vperm2f128 (d))
44722 return true;
44723
44724 if (expand_vec_perm_pblendv (d))
44725 return true;
44726
44727 /* Try sequences of three instructions. */
44728
44729 if (expand_vec_perm_2vperm2f128_vshuf (d))
44730 return true;
44731
44732 if (expand_vec_perm_pshufb2 (d))
44733 return true;
44734
44735 if (expand_vec_perm_interleave3 (d))
44736 return true;
44737
44738 if (expand_vec_perm_vperm2f128_vblend (d))
44739 return true;
44740
44741 /* Try sequences of four instructions. */
44742
44743 if (expand_vec_perm_vpshufb2_vpermq (d))
44744 return true;
44745
44746 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44747 return true;
44748
44749 /* ??? Look for narrow permutations whose element orderings would
44750 allow the promotion to a wider mode. */
44751
44752 /* ??? Look for sequences of interleave or a wider permute that place
44753 the data into the correct lanes for a half-vector shuffle like
44754 pshuf[lh]w or vpermilps. */
44755
44756 /* ??? Look for sequences of interleave that produce the desired results.
44757 The combinatorics of punpck[lh] get pretty ugly... */
44758
44759 if (expand_vec_perm_even_odd (d))
44760 return true;
44761
44762 /* Even longer sequences. */
44763 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44764 return true;
44765
44766 return false;
44767 }
44768
44769 /* If a permutation only uses one operand, make it clear. Returns true
44770 if the permutation references both operands. */
44771
44772 static bool
44773 canonicalize_perm (struct expand_vec_perm_d *d)
44774 {
44775 int i, which, nelt = d->nelt;
44776
44777 for (i = which = 0; i < nelt; ++i)
44778 which |= (d->perm[i] < nelt ? 1 : 2);
44779
44780 d->one_operand_p = true;
44781 switch (which)
44782 {
44783 default:
44784 gcc_unreachable();
44785
44786 case 3:
44787 if (!rtx_equal_p (d->op0, d->op1))
44788 {
44789 d->one_operand_p = false;
44790 break;
44791 }
44792 /* The elements of PERM do not suggest that only the first operand
44793 is used, but both operands are identical. Allow easier matching
44794 of the permutation by folding the permutation into the single
44795 input vector. */
44796 /* FALLTHRU */
44797
44798 case 2:
44799 for (i = 0; i < nelt; ++i)
44800 d->perm[i] &= nelt - 1;
44801 d->op0 = d->op1;
44802 break;
44803
44804 case 1:
44805 d->op1 = d->op0;
44806 break;
44807 }
44808
44809 return (which == 3);
44810 }
44811
44812 bool
44813 ix86_expand_vec_perm_const (rtx operands[4])
44814 {
44815 struct expand_vec_perm_d d;
44816 unsigned char perm[MAX_VECT_LEN];
44817 int i, nelt;
44818 bool two_args;
44819 rtx sel;
44820
44821 d.target = operands[0];
44822 d.op0 = operands[1];
44823 d.op1 = operands[2];
44824 sel = operands[3];
44825
44826 d.vmode = GET_MODE (d.target);
44827 gcc_assert (VECTOR_MODE_P (d.vmode));
44828 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44829 d.testing_p = false;
44830
44831 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44832 gcc_assert (XVECLEN (sel, 0) == nelt);
44833 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44834
44835 for (i = 0; i < nelt; ++i)
44836 {
44837 rtx e = XVECEXP (sel, 0, i);
44838 int ei = INTVAL (e) & (2 * nelt - 1);
44839 d.perm[i] = ei;
44840 perm[i] = ei;
44841 }
44842
44843 two_args = canonicalize_perm (&d);
44844
44845 if (ix86_expand_vec_perm_const_1 (&d))
44846 return true;
44847
44848 /* If the selector says both arguments are needed, but the operands are the
44849 same, the above tried to expand with one_operand_p and flattened selector.
44850 If that didn't work, retry without one_operand_p; we succeeded with that
44851 during testing. */
44852 if (two_args && d.one_operand_p)
44853 {
44854 d.one_operand_p = false;
44855 memcpy (d.perm, perm, sizeof (perm));
44856 return ix86_expand_vec_perm_const_1 (&d);
44857 }
44858
44859 return false;
44860 }
44861
44862 /* Implement targetm.vectorize.vec_perm_const_ok. */
44863
44864 static bool
44865 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44866 const unsigned char *sel)
44867 {
44868 struct expand_vec_perm_d d;
44869 unsigned int i, nelt, which;
44870 bool ret;
44871
44872 d.vmode = vmode;
44873 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44874 d.testing_p = true;
44875
44876 /* Given sufficient ISA support we can just return true here
44877 for selected vector modes. */
44878 if (d.vmode == V16SImode || d.vmode == V16SFmode
44879 || d.vmode == V8DFmode || d.vmode == V8DImode)
44880 /* All implementable with a single vpermi2 insn. */
44881 return true;
44882 if (GET_MODE_SIZE (d.vmode) == 16)
44883 {
44884 /* All implementable with a single vpperm insn. */
44885 if (TARGET_XOP)
44886 return true;
44887 /* All implementable with 2 pshufb + 1 ior. */
44888 if (TARGET_SSSE3)
44889 return true;
44890 /* All implementable with shufpd or unpck[lh]pd. */
44891 if (d.nelt == 2)
44892 return true;
44893 }
44894
44895 /* Extract the values from the vector CST into the permutation
44896 array in D. */
44897 memcpy (d.perm, sel, nelt);
44898 for (i = which = 0; i < nelt; ++i)
44899 {
44900 unsigned char e = d.perm[i];
44901 gcc_assert (e < 2 * nelt);
44902 which |= (e < nelt ? 1 : 2);
44903 }
44904
44905 /* For all elements from second vector, fold the elements to first. */
44906 if (which == 2)
44907 for (i = 0; i < nelt; ++i)
44908 d.perm[i] -= nelt;
44909
44910 /* Check whether the mask can be applied to the vector type. */
44911 d.one_operand_p = (which != 3);
44912
44913 /* Implementable with shufps or pshufd. */
44914 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44915 return true;
44916
44917 /* Otherwise we have to go through the motions and see if we can
44918 figure out how to generate the requested permutation. */
44919 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44920 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44921 if (!d.one_operand_p)
44922 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44923
44924 start_sequence ();
44925 ret = ix86_expand_vec_perm_const_1 (&d);
44926 end_sequence ();
44927
44928 return ret;
44929 }
44930
44931 void
44932 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44933 {
44934 struct expand_vec_perm_d d;
44935 unsigned i, nelt;
44936
44937 d.target = targ;
44938 d.op0 = op0;
44939 d.op1 = op1;
44940 d.vmode = GET_MODE (targ);
44941 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44942 d.one_operand_p = false;
44943 d.testing_p = false;
44944
44945 for (i = 0; i < nelt; ++i)
44946 d.perm[i] = i * 2 + odd;
44947
44948 /* We'll either be able to implement the permutation directly... */
44949 if (expand_vec_perm_1 (&d))
44950 return;
44951
44952 /* ... or we use the special-case patterns. */
44953 expand_vec_perm_even_odd_1 (&d, odd);
44954 }
44955
44956 static void
44957 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44958 {
44959 struct expand_vec_perm_d d;
44960 unsigned i, nelt, base;
44961 bool ok;
44962
44963 d.target = targ;
44964 d.op0 = op0;
44965 d.op1 = op1;
44966 d.vmode = GET_MODE (targ);
44967 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44968 d.one_operand_p = false;
44969 d.testing_p = false;
44970
44971 base = high_p ? nelt / 2 : 0;
44972 for (i = 0; i < nelt / 2; ++i)
44973 {
44974 d.perm[i * 2] = i + base;
44975 d.perm[i * 2 + 1] = i + base + nelt;
44976 }
44977
44978 /* Note that for AVX this isn't one instruction. */
44979 ok = ix86_expand_vec_perm_const_1 (&d);
44980 gcc_assert (ok);
44981 }
44982
44983
44984 /* Expand a vector operation CODE for a V*QImode in terms of the
44985 same operation on V*HImode. */
44986
44987 void
44988 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44989 {
44990 enum machine_mode qimode = GET_MODE (dest);
44991 enum machine_mode himode;
44992 rtx (*gen_il) (rtx, rtx, rtx);
44993 rtx (*gen_ih) (rtx, rtx, rtx);
44994 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44995 struct expand_vec_perm_d d;
44996 bool ok, full_interleave;
44997 bool uns_p = false;
44998 int i;
44999
45000 switch (qimode)
45001 {
45002 case V16QImode:
45003 himode = V8HImode;
45004 gen_il = gen_vec_interleave_lowv16qi;
45005 gen_ih = gen_vec_interleave_highv16qi;
45006 break;
45007 case V32QImode:
45008 himode = V16HImode;
45009 gen_il = gen_avx2_interleave_lowv32qi;
45010 gen_ih = gen_avx2_interleave_highv32qi;
45011 break;
45012 default:
45013 gcc_unreachable ();
45014 }
45015
45016 op2_l = op2_h = op2;
45017 switch (code)
45018 {
45019 case MULT:
45020 /* Unpack data such that we've got a source byte in each low byte of
45021 each word. We don't care what goes into the high byte of each word.
45022 Rather than trying to get zero in there, most convenient is to let
45023 it be a copy of the low byte. */
45024 op2_l = gen_reg_rtx (qimode);
45025 op2_h = gen_reg_rtx (qimode);
45026 emit_insn (gen_il (op2_l, op2, op2));
45027 emit_insn (gen_ih (op2_h, op2, op2));
45028 /* FALLTHRU */
45029
45030 op1_l = gen_reg_rtx (qimode);
45031 op1_h = gen_reg_rtx (qimode);
45032 emit_insn (gen_il (op1_l, op1, op1));
45033 emit_insn (gen_ih (op1_h, op1, op1));
45034 full_interleave = qimode == V16QImode;
45035 break;
45036
45037 case ASHIFT:
45038 case LSHIFTRT:
45039 uns_p = true;
45040 /* FALLTHRU */
45041 case ASHIFTRT:
45042 op1_l = gen_reg_rtx (himode);
45043 op1_h = gen_reg_rtx (himode);
45044 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45045 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45046 full_interleave = true;
45047 break;
45048 default:
45049 gcc_unreachable ();
45050 }
45051
45052 /* Perform the operation. */
45053 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45054 1, OPTAB_DIRECT);
45055 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45056 1, OPTAB_DIRECT);
45057 gcc_assert (res_l && res_h);
45058
45059 /* Merge the data back into the right place. */
45060 d.target = dest;
45061 d.op0 = gen_lowpart (qimode, res_l);
45062 d.op1 = gen_lowpart (qimode, res_h);
45063 d.vmode = qimode;
45064 d.nelt = GET_MODE_NUNITS (qimode);
45065 d.one_operand_p = false;
45066 d.testing_p = false;
45067
45068 if (full_interleave)
45069 {
45070 /* For SSE2, we used an full interleave, so the desired
45071 results are in the even elements. */
45072 for (i = 0; i < 32; ++i)
45073 d.perm[i] = i * 2;
45074 }
45075 else
45076 {
45077 /* For AVX, the interleave used above was not cross-lane. So the
45078 extraction is evens but with the second and third quarter swapped.
45079 Happily, that is even one insn shorter than even extraction. */
45080 for (i = 0; i < 32; ++i)
45081 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45082 }
45083
45084 ok = ix86_expand_vec_perm_const_1 (&d);
45085 gcc_assert (ok);
45086
45087 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45088 gen_rtx_fmt_ee (code, qimode, op1, op2));
45089 }
45090
45091 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45092 if op is CONST_VECTOR with all odd elements equal to their
45093 preceding element. */
45094
45095 static bool
45096 const_vector_equal_evenodd_p (rtx op)
45097 {
45098 enum machine_mode mode = GET_MODE (op);
45099 int i, nunits = GET_MODE_NUNITS (mode);
45100 if (GET_CODE (op) != CONST_VECTOR
45101 || nunits != CONST_VECTOR_NUNITS (op))
45102 return false;
45103 for (i = 0; i < nunits; i += 2)
45104 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45105 return false;
45106 return true;
45107 }
45108
45109 void
45110 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45111 bool uns_p, bool odd_p)
45112 {
45113 enum machine_mode mode = GET_MODE (op1);
45114 enum machine_mode wmode = GET_MODE (dest);
45115 rtx x;
45116 rtx orig_op1 = op1, orig_op2 = op2;
45117
45118 if (!nonimmediate_operand (op1, mode))
45119 op1 = force_reg (mode, op1);
45120 if (!nonimmediate_operand (op2, mode))
45121 op2 = force_reg (mode, op2);
45122
45123 /* We only play even/odd games with vectors of SImode. */
45124 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45125
45126 /* If we're looking for the odd results, shift those members down to
45127 the even slots. For some cpus this is faster than a PSHUFD. */
45128 if (odd_p)
45129 {
45130 /* For XOP use vpmacsdqh, but only for smult, as it is only
45131 signed. */
45132 if (TARGET_XOP && mode == V4SImode && !uns_p)
45133 {
45134 x = force_reg (wmode, CONST0_RTX (wmode));
45135 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45136 return;
45137 }
45138
45139 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45140 if (!const_vector_equal_evenodd_p (orig_op1))
45141 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45142 x, NULL, 1, OPTAB_DIRECT);
45143 if (!const_vector_equal_evenodd_p (orig_op2))
45144 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45145 x, NULL, 1, OPTAB_DIRECT);
45146 op1 = gen_lowpart (mode, op1);
45147 op2 = gen_lowpart (mode, op2);
45148 }
45149
45150 if (mode == V16SImode)
45151 {
45152 if (uns_p)
45153 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45154 else
45155 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45156 }
45157 else if (mode == V8SImode)
45158 {
45159 if (uns_p)
45160 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45161 else
45162 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45163 }
45164 else if (uns_p)
45165 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45166 else if (TARGET_SSE4_1)
45167 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45168 else
45169 {
45170 rtx s1, s2, t0, t1, t2;
45171
45172 /* The easiest way to implement this without PMULDQ is to go through
45173 the motions as if we are performing a full 64-bit multiply. With
45174 the exception that we need to do less shuffling of the elements. */
45175
45176 /* Compute the sign-extension, aka highparts, of the two operands. */
45177 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45178 op1, pc_rtx, pc_rtx);
45179 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45180 op2, pc_rtx, pc_rtx);
45181
45182 /* Multiply LO(A) * HI(B), and vice-versa. */
45183 t1 = gen_reg_rtx (wmode);
45184 t2 = gen_reg_rtx (wmode);
45185 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45186 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45187
45188 /* Multiply LO(A) * LO(B). */
45189 t0 = gen_reg_rtx (wmode);
45190 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45191
45192 /* Combine and shift the highparts into place. */
45193 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45194 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45195 1, OPTAB_DIRECT);
45196
45197 /* Combine high and low parts. */
45198 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45199 return;
45200 }
45201 emit_insn (x);
45202 }
45203
45204 void
45205 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45206 bool uns_p, bool high_p)
45207 {
45208 enum machine_mode wmode = GET_MODE (dest);
45209 enum machine_mode mode = GET_MODE (op1);
45210 rtx t1, t2, t3, t4, mask;
45211
45212 switch (mode)
45213 {
45214 case V4SImode:
45215 t1 = gen_reg_rtx (mode);
45216 t2 = gen_reg_rtx (mode);
45217 if (TARGET_XOP && !uns_p)
45218 {
45219 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45220 shuffle the elements once so that all elements are in the right
45221 place for immediate use: { A C B D }. */
45222 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45223 const1_rtx, GEN_INT (3)));
45224 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45225 const1_rtx, GEN_INT (3)));
45226 }
45227 else
45228 {
45229 /* Put the elements into place for the multiply. */
45230 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45231 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45232 high_p = false;
45233 }
45234 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45235 break;
45236
45237 case V8SImode:
45238 /* Shuffle the elements between the lanes. After this we
45239 have { A B E F | C D G H } for each operand. */
45240 t1 = gen_reg_rtx (V4DImode);
45241 t2 = gen_reg_rtx (V4DImode);
45242 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45243 const0_rtx, const2_rtx,
45244 const1_rtx, GEN_INT (3)));
45245 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45246 const0_rtx, const2_rtx,
45247 const1_rtx, GEN_INT (3)));
45248
45249 /* Shuffle the elements within the lanes. After this we
45250 have { A A B B | C C D D } or { E E F F | G G H H }. */
45251 t3 = gen_reg_rtx (V8SImode);
45252 t4 = gen_reg_rtx (V8SImode);
45253 mask = GEN_INT (high_p
45254 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45255 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45256 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45257 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45258
45259 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45260 break;
45261
45262 case V8HImode:
45263 case V16HImode:
45264 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45265 uns_p, OPTAB_DIRECT);
45266 t2 = expand_binop (mode,
45267 uns_p ? umul_highpart_optab : smul_highpart_optab,
45268 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45269 gcc_assert (t1 && t2);
45270
45271 t3 = gen_reg_rtx (mode);
45272 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45273 emit_move_insn (dest, gen_lowpart (wmode, t3));
45274 break;
45275
45276 case V16QImode:
45277 case V32QImode:
45278 t1 = gen_reg_rtx (wmode);
45279 t2 = gen_reg_rtx (wmode);
45280 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45281 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45282
45283 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45284 break;
45285
45286 default:
45287 gcc_unreachable ();
45288 }
45289 }
45290
45291 void
45292 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45293 {
45294 rtx res_1, res_2, res_3, res_4;
45295
45296 res_1 = gen_reg_rtx (V4SImode);
45297 res_2 = gen_reg_rtx (V4SImode);
45298 res_3 = gen_reg_rtx (V2DImode);
45299 res_4 = gen_reg_rtx (V2DImode);
45300 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45301 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45302
45303 /* Move the results in element 2 down to element 1; we don't care
45304 what goes in elements 2 and 3. Then we can merge the parts
45305 back together with an interleave.
45306
45307 Note that two other sequences were tried:
45308 (1) Use interleaves at the start instead of psrldq, which allows
45309 us to use a single shufps to merge things back at the end.
45310 (2) Use shufps here to combine the two vectors, then pshufd to
45311 put the elements in the correct order.
45312 In both cases the cost of the reformatting stall was too high
45313 and the overall sequence slower. */
45314
45315 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45316 const0_rtx, const2_rtx,
45317 const0_rtx, const0_rtx));
45318 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45319 const0_rtx, const2_rtx,
45320 const0_rtx, const0_rtx));
45321 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45322
45323 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45324 }
45325
45326 void
45327 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45328 {
45329 enum machine_mode mode = GET_MODE (op0);
45330 rtx t1, t2, t3, t4, t5, t6;
45331
45332 if (TARGET_XOP && mode == V2DImode)
45333 {
45334 /* op1: A,B,C,D, op2: E,F,G,H */
45335 op1 = gen_lowpart (V4SImode, op1);
45336 op2 = gen_lowpart (V4SImode, op2);
45337
45338 t1 = gen_reg_rtx (V4SImode);
45339 t2 = gen_reg_rtx (V4SImode);
45340 t3 = gen_reg_rtx (V2DImode);
45341 t4 = gen_reg_rtx (V2DImode);
45342
45343 /* t1: B,A,D,C */
45344 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45345 GEN_INT (1),
45346 GEN_INT (0),
45347 GEN_INT (3),
45348 GEN_INT (2)));
45349
45350 /* t2: (B*E),(A*F),(D*G),(C*H) */
45351 emit_insn (gen_mulv4si3 (t2, t1, op2));
45352
45353 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45354 emit_insn (gen_xop_phadddq (t3, t2));
45355
45356 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45357 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45358
45359 /* Multiply lower parts and add all */
45360 t5 = gen_reg_rtx (V2DImode);
45361 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45362 gen_lowpart (V4SImode, op1),
45363 gen_lowpart (V4SImode, op2)));
45364 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45365
45366 }
45367 else
45368 {
45369 enum machine_mode nmode;
45370 rtx (*umul) (rtx, rtx, rtx);
45371
45372 if (mode == V2DImode)
45373 {
45374 umul = gen_vec_widen_umult_even_v4si;
45375 nmode = V4SImode;
45376 }
45377 else if (mode == V4DImode)
45378 {
45379 umul = gen_vec_widen_umult_even_v8si;
45380 nmode = V8SImode;
45381 }
45382 else if (mode == V8DImode)
45383 {
45384 umul = gen_vec_widen_umult_even_v16si;
45385 nmode = V16SImode;
45386 }
45387 else
45388 gcc_unreachable ();
45389
45390
45391 /* Multiply low parts. */
45392 t1 = gen_reg_rtx (mode);
45393 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45394
45395 /* Shift input vectors right 32 bits so we can multiply high parts. */
45396 t6 = GEN_INT (32);
45397 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45398 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45399
45400 /* Multiply high parts by low parts. */
45401 t4 = gen_reg_rtx (mode);
45402 t5 = gen_reg_rtx (mode);
45403 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45404 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45405
45406 /* Combine and shift the highparts back. */
45407 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45408 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45409
45410 /* Combine high and low parts. */
45411 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45412 }
45413
45414 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45415 gen_rtx_MULT (mode, op1, op2));
45416 }
45417
45418 /* Calculate integer abs() using only SSE2 instructions. */
45419
45420 void
45421 ix86_expand_sse2_abs (rtx target, rtx input)
45422 {
45423 enum machine_mode mode = GET_MODE (target);
45424 rtx tmp0, tmp1, x;
45425
45426 switch (mode)
45427 {
45428 /* For 32-bit signed integer X, the best way to calculate the absolute
45429 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45430 case V4SImode:
45431 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45432 GEN_INT (GET_MODE_BITSIZE
45433 (GET_MODE_INNER (mode)) - 1),
45434 NULL, 0, OPTAB_DIRECT);
45435 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45436 NULL, 0, OPTAB_DIRECT);
45437 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45438 target, 0, OPTAB_DIRECT);
45439 break;
45440
45441 /* For 16-bit signed integer X, the best way to calculate the absolute
45442 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45443 case V8HImode:
45444 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45445
45446 x = expand_simple_binop (mode, SMAX, tmp0, input,
45447 target, 0, OPTAB_DIRECT);
45448 break;
45449
45450 /* For 8-bit signed integer X, the best way to calculate the absolute
45451 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45452 as SSE2 provides the PMINUB insn. */
45453 case V16QImode:
45454 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45455
45456 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45457 target, 0, OPTAB_DIRECT);
45458 break;
45459
45460 default:
45461 gcc_unreachable ();
45462 }
45463
45464 if (x != target)
45465 emit_move_insn (target, x);
45466 }
45467
45468 /* Expand an insert into a vector register through pinsr insn.
45469 Return true if successful. */
45470
45471 bool
45472 ix86_expand_pinsr (rtx *operands)
45473 {
45474 rtx dst = operands[0];
45475 rtx src = operands[3];
45476
45477 unsigned int size = INTVAL (operands[1]);
45478 unsigned int pos = INTVAL (operands[2]);
45479
45480 if (GET_CODE (dst) == SUBREG)
45481 {
45482 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45483 dst = SUBREG_REG (dst);
45484 }
45485
45486 if (GET_CODE (src) == SUBREG)
45487 src = SUBREG_REG (src);
45488
45489 switch (GET_MODE (dst))
45490 {
45491 case V16QImode:
45492 case V8HImode:
45493 case V4SImode:
45494 case V2DImode:
45495 {
45496 enum machine_mode srcmode, dstmode;
45497 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45498
45499 srcmode = mode_for_size (size, MODE_INT, 0);
45500
45501 switch (srcmode)
45502 {
45503 case QImode:
45504 if (!TARGET_SSE4_1)
45505 return false;
45506 dstmode = V16QImode;
45507 pinsr = gen_sse4_1_pinsrb;
45508 break;
45509
45510 case HImode:
45511 if (!TARGET_SSE2)
45512 return false;
45513 dstmode = V8HImode;
45514 pinsr = gen_sse2_pinsrw;
45515 break;
45516
45517 case SImode:
45518 if (!TARGET_SSE4_1)
45519 return false;
45520 dstmode = V4SImode;
45521 pinsr = gen_sse4_1_pinsrd;
45522 break;
45523
45524 case DImode:
45525 gcc_assert (TARGET_64BIT);
45526 if (!TARGET_SSE4_1)
45527 return false;
45528 dstmode = V2DImode;
45529 pinsr = gen_sse4_1_pinsrq;
45530 break;
45531
45532 default:
45533 return false;
45534 }
45535
45536 rtx d = dst;
45537 if (GET_MODE (dst) != dstmode)
45538 d = gen_reg_rtx (dstmode);
45539 src = gen_lowpart (srcmode, src);
45540
45541 pos /= size;
45542
45543 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45544 GEN_INT (1 << pos)));
45545 if (d != dst)
45546 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45547 return true;
45548 }
45549
45550 default:
45551 return false;
45552 }
45553 }
45554 \f
45555 /* This function returns the calling abi specific va_list type node.
45556 It returns the FNDECL specific va_list type. */
45557
45558 static tree
45559 ix86_fn_abi_va_list (tree fndecl)
45560 {
45561 if (!TARGET_64BIT)
45562 return va_list_type_node;
45563 gcc_assert (fndecl != NULL_TREE);
45564
45565 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45566 return ms_va_list_type_node;
45567 else
45568 return sysv_va_list_type_node;
45569 }
45570
45571 /* Returns the canonical va_list type specified by TYPE. If there
45572 is no valid TYPE provided, it return NULL_TREE. */
45573
45574 static tree
45575 ix86_canonical_va_list_type (tree type)
45576 {
45577 tree wtype, htype;
45578
45579 /* Resolve references and pointers to va_list type. */
45580 if (TREE_CODE (type) == MEM_REF)
45581 type = TREE_TYPE (type);
45582 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45583 type = TREE_TYPE (type);
45584 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45585 type = TREE_TYPE (type);
45586
45587 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45588 {
45589 wtype = va_list_type_node;
45590 gcc_assert (wtype != NULL_TREE);
45591 htype = type;
45592 if (TREE_CODE (wtype) == ARRAY_TYPE)
45593 {
45594 /* If va_list is an array type, the argument may have decayed
45595 to a pointer type, e.g. by being passed to another function.
45596 In that case, unwrap both types so that we can compare the
45597 underlying records. */
45598 if (TREE_CODE (htype) == ARRAY_TYPE
45599 || POINTER_TYPE_P (htype))
45600 {
45601 wtype = TREE_TYPE (wtype);
45602 htype = TREE_TYPE (htype);
45603 }
45604 }
45605 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45606 return va_list_type_node;
45607 wtype = sysv_va_list_type_node;
45608 gcc_assert (wtype != NULL_TREE);
45609 htype = type;
45610 if (TREE_CODE (wtype) == ARRAY_TYPE)
45611 {
45612 /* If va_list is an array type, the argument may have decayed
45613 to a pointer type, e.g. by being passed to another function.
45614 In that case, unwrap both types so that we can compare the
45615 underlying records. */
45616 if (TREE_CODE (htype) == ARRAY_TYPE
45617 || POINTER_TYPE_P (htype))
45618 {
45619 wtype = TREE_TYPE (wtype);
45620 htype = TREE_TYPE (htype);
45621 }
45622 }
45623 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45624 return sysv_va_list_type_node;
45625 wtype = ms_va_list_type_node;
45626 gcc_assert (wtype != NULL_TREE);
45627 htype = type;
45628 if (TREE_CODE (wtype) == ARRAY_TYPE)
45629 {
45630 /* If va_list is an array type, the argument may have decayed
45631 to a pointer type, e.g. by being passed to another function.
45632 In that case, unwrap both types so that we can compare the
45633 underlying records. */
45634 if (TREE_CODE (htype) == ARRAY_TYPE
45635 || POINTER_TYPE_P (htype))
45636 {
45637 wtype = TREE_TYPE (wtype);
45638 htype = TREE_TYPE (htype);
45639 }
45640 }
45641 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45642 return ms_va_list_type_node;
45643 return NULL_TREE;
45644 }
45645 return std_canonical_va_list_type (type);
45646 }
45647
45648 /* Iterate through the target-specific builtin types for va_list.
45649 IDX denotes the iterator, *PTREE is set to the result type of
45650 the va_list builtin, and *PNAME to its internal type.
45651 Returns zero if there is no element for this index, otherwise
45652 IDX should be increased upon the next call.
45653 Note, do not iterate a base builtin's name like __builtin_va_list.
45654 Used from c_common_nodes_and_builtins. */
45655
45656 static int
45657 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45658 {
45659 if (TARGET_64BIT)
45660 {
45661 switch (idx)
45662 {
45663 default:
45664 break;
45665
45666 case 0:
45667 *ptree = ms_va_list_type_node;
45668 *pname = "__builtin_ms_va_list";
45669 return 1;
45670
45671 case 1:
45672 *ptree = sysv_va_list_type_node;
45673 *pname = "__builtin_sysv_va_list";
45674 return 1;
45675 }
45676 }
45677
45678 return 0;
45679 }
45680
45681 #undef TARGET_SCHED_DISPATCH
45682 #define TARGET_SCHED_DISPATCH has_dispatch
45683 #undef TARGET_SCHED_DISPATCH_DO
45684 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45685 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45686 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45687 #undef TARGET_SCHED_REORDER
45688 #define TARGET_SCHED_REORDER ix86_sched_reorder
45689 #undef TARGET_SCHED_ADJUST_PRIORITY
45690 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45691 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45692 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45693 ix86_dependencies_evaluation_hook
45694
45695 /* The size of the dispatch window is the total number of bytes of
45696 object code allowed in a window. */
45697 #define DISPATCH_WINDOW_SIZE 16
45698
45699 /* Number of dispatch windows considered for scheduling. */
45700 #define MAX_DISPATCH_WINDOWS 3
45701
45702 /* Maximum number of instructions in a window. */
45703 #define MAX_INSN 4
45704
45705 /* Maximum number of immediate operands in a window. */
45706 #define MAX_IMM 4
45707
45708 /* Maximum number of immediate bits allowed in a window. */
45709 #define MAX_IMM_SIZE 128
45710
45711 /* Maximum number of 32 bit immediates allowed in a window. */
45712 #define MAX_IMM_32 4
45713
45714 /* Maximum number of 64 bit immediates allowed in a window. */
45715 #define MAX_IMM_64 2
45716
45717 /* Maximum total of loads or prefetches allowed in a window. */
45718 #define MAX_LOAD 2
45719
45720 /* Maximum total of stores allowed in a window. */
45721 #define MAX_STORE 1
45722
45723 #undef BIG
45724 #define BIG 100
45725
45726
45727 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45728 enum dispatch_group {
45729 disp_no_group = 0,
45730 disp_load,
45731 disp_store,
45732 disp_load_store,
45733 disp_prefetch,
45734 disp_imm,
45735 disp_imm_32,
45736 disp_imm_64,
45737 disp_branch,
45738 disp_cmp,
45739 disp_jcc,
45740 disp_last
45741 };
45742
45743 /* Number of allowable groups in a dispatch window. It is an array
45744 indexed by dispatch_group enum. 100 is used as a big number,
45745 because the number of these kind of operations does not have any
45746 effect in dispatch window, but we need them for other reasons in
45747 the table. */
45748 static unsigned int num_allowable_groups[disp_last] = {
45749 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45750 };
45751
45752 char group_name[disp_last + 1][16] = {
45753 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45754 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45755 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45756 };
45757
45758 /* Instruction path. */
45759 enum insn_path {
45760 no_path = 0,
45761 path_single, /* Single micro op. */
45762 path_double, /* Double micro op. */
45763 path_multi, /* Instructions with more than 2 micro op.. */
45764 last_path
45765 };
45766
45767 /* sched_insn_info defines a window to the instructions scheduled in
45768 the basic block. It contains a pointer to the insn_info table and
45769 the instruction scheduled.
45770
45771 Windows are allocated for each basic block and are linked
45772 together. */
45773 typedef struct sched_insn_info_s {
45774 rtx insn;
45775 enum dispatch_group group;
45776 enum insn_path path;
45777 int byte_len;
45778 int imm_bytes;
45779 } sched_insn_info;
45780
45781 /* Linked list of dispatch windows. This is a two way list of
45782 dispatch windows of a basic block. It contains information about
45783 the number of uops in the window and the total number of
45784 instructions and of bytes in the object code for this dispatch
45785 window. */
45786 typedef struct dispatch_windows_s {
45787 int num_insn; /* Number of insn in the window. */
45788 int num_uops; /* Number of uops in the window. */
45789 int window_size; /* Number of bytes in the window. */
45790 int window_num; /* Window number between 0 or 1. */
45791 int num_imm; /* Number of immediates in an insn. */
45792 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45793 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45794 int imm_size; /* Total immediates in the window. */
45795 int num_loads; /* Total memory loads in the window. */
45796 int num_stores; /* Total memory stores in the window. */
45797 int violation; /* Violation exists in window. */
45798 sched_insn_info *window; /* Pointer to the window. */
45799 struct dispatch_windows_s *next;
45800 struct dispatch_windows_s *prev;
45801 } dispatch_windows;
45802
45803 /* Immediate valuse used in an insn. */
45804 typedef struct imm_info_s
45805 {
45806 int imm;
45807 int imm32;
45808 int imm64;
45809 } imm_info;
45810
45811 static dispatch_windows *dispatch_window_list;
45812 static dispatch_windows *dispatch_window_list1;
45813
45814 /* Get dispatch group of insn. */
45815
45816 static enum dispatch_group
45817 get_mem_group (rtx insn)
45818 {
45819 enum attr_memory memory;
45820
45821 if (INSN_CODE (insn) < 0)
45822 return disp_no_group;
45823 memory = get_attr_memory (insn);
45824 if (memory == MEMORY_STORE)
45825 return disp_store;
45826
45827 if (memory == MEMORY_LOAD)
45828 return disp_load;
45829
45830 if (memory == MEMORY_BOTH)
45831 return disp_load_store;
45832
45833 return disp_no_group;
45834 }
45835
45836 /* Return true if insn is a compare instruction. */
45837
45838 static bool
45839 is_cmp (rtx insn)
45840 {
45841 enum attr_type type;
45842
45843 type = get_attr_type (insn);
45844 return (type == TYPE_TEST
45845 || type == TYPE_ICMP
45846 || type == TYPE_FCMP
45847 || GET_CODE (PATTERN (insn)) == COMPARE);
45848 }
45849
45850 /* Return true if a dispatch violation encountered. */
45851
45852 static bool
45853 dispatch_violation (void)
45854 {
45855 if (dispatch_window_list->next)
45856 return dispatch_window_list->next->violation;
45857 return dispatch_window_list->violation;
45858 }
45859
45860 /* Return true if insn is a branch instruction. */
45861
45862 static bool
45863 is_branch (rtx insn)
45864 {
45865 return (CALL_P (insn) || JUMP_P (insn));
45866 }
45867
45868 /* Return true if insn is a prefetch instruction. */
45869
45870 static bool
45871 is_prefetch (rtx insn)
45872 {
45873 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45874 }
45875
45876 /* This function initializes a dispatch window and the list container holding a
45877 pointer to the window. */
45878
45879 static void
45880 init_window (int window_num)
45881 {
45882 int i;
45883 dispatch_windows *new_list;
45884
45885 if (window_num == 0)
45886 new_list = dispatch_window_list;
45887 else
45888 new_list = dispatch_window_list1;
45889
45890 new_list->num_insn = 0;
45891 new_list->num_uops = 0;
45892 new_list->window_size = 0;
45893 new_list->next = NULL;
45894 new_list->prev = NULL;
45895 new_list->window_num = window_num;
45896 new_list->num_imm = 0;
45897 new_list->num_imm_32 = 0;
45898 new_list->num_imm_64 = 0;
45899 new_list->imm_size = 0;
45900 new_list->num_loads = 0;
45901 new_list->num_stores = 0;
45902 new_list->violation = false;
45903
45904 for (i = 0; i < MAX_INSN; i++)
45905 {
45906 new_list->window[i].insn = NULL;
45907 new_list->window[i].group = disp_no_group;
45908 new_list->window[i].path = no_path;
45909 new_list->window[i].byte_len = 0;
45910 new_list->window[i].imm_bytes = 0;
45911 }
45912 return;
45913 }
45914
45915 /* This function allocates and initializes a dispatch window and the
45916 list container holding a pointer to the window. */
45917
45918 static dispatch_windows *
45919 allocate_window (void)
45920 {
45921 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45922 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45923
45924 return new_list;
45925 }
45926
45927 /* This routine initializes the dispatch scheduling information. It
45928 initiates building dispatch scheduler tables and constructs the
45929 first dispatch window. */
45930
45931 static void
45932 init_dispatch_sched (void)
45933 {
45934 /* Allocate a dispatch list and a window. */
45935 dispatch_window_list = allocate_window ();
45936 dispatch_window_list1 = allocate_window ();
45937 init_window (0);
45938 init_window (1);
45939 }
45940
45941 /* This function returns true if a branch is detected. End of a basic block
45942 does not have to be a branch, but here we assume only branches end a
45943 window. */
45944
45945 static bool
45946 is_end_basic_block (enum dispatch_group group)
45947 {
45948 return group == disp_branch;
45949 }
45950
45951 /* This function is called when the end of a window processing is reached. */
45952
45953 static void
45954 process_end_window (void)
45955 {
45956 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45957 if (dispatch_window_list->next)
45958 {
45959 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45960 gcc_assert (dispatch_window_list->window_size
45961 + dispatch_window_list1->window_size <= 48);
45962 init_window (1);
45963 }
45964 init_window (0);
45965 }
45966
45967 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45968 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45969 for 48 bytes of instructions. Note that these windows are not dispatch
45970 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45971
45972 static dispatch_windows *
45973 allocate_next_window (int window_num)
45974 {
45975 if (window_num == 0)
45976 {
45977 if (dispatch_window_list->next)
45978 init_window (1);
45979 init_window (0);
45980 return dispatch_window_list;
45981 }
45982
45983 dispatch_window_list->next = dispatch_window_list1;
45984 dispatch_window_list1->prev = dispatch_window_list;
45985
45986 return dispatch_window_list1;
45987 }
45988
45989 /* Increment the number of immediate operands of an instruction. */
45990
45991 static int
45992 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45993 {
45994 if (*in_rtx == 0)
45995 return 0;
45996
45997 switch ( GET_CODE (*in_rtx))
45998 {
45999 case CONST:
46000 case SYMBOL_REF:
46001 case CONST_INT:
46002 (imm_values->imm)++;
46003 if (x86_64_immediate_operand (*in_rtx, SImode))
46004 (imm_values->imm32)++;
46005 else
46006 (imm_values->imm64)++;
46007 break;
46008
46009 case CONST_DOUBLE:
46010 (imm_values->imm)++;
46011 (imm_values->imm64)++;
46012 break;
46013
46014 case CODE_LABEL:
46015 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46016 {
46017 (imm_values->imm)++;
46018 (imm_values->imm32)++;
46019 }
46020 break;
46021
46022 default:
46023 break;
46024 }
46025
46026 return 0;
46027 }
46028
46029 /* Compute number of immediate operands of an instruction. */
46030
46031 static void
46032 find_constant (rtx in_rtx, imm_info *imm_values)
46033 {
46034 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46035 (rtx_function) find_constant_1, (void *) imm_values);
46036 }
46037
46038 /* Return total size of immediate operands of an instruction along with number
46039 of corresponding immediate-operands. It initializes its parameters to zero
46040 befor calling FIND_CONSTANT.
46041 INSN is the input instruction. IMM is the total of immediates.
46042 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46043 bit immediates. */
46044
46045 static int
46046 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46047 {
46048 imm_info imm_values = {0, 0, 0};
46049
46050 find_constant (insn, &imm_values);
46051 *imm = imm_values.imm;
46052 *imm32 = imm_values.imm32;
46053 *imm64 = imm_values.imm64;
46054 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46055 }
46056
46057 /* This function indicates if an operand of an instruction is an
46058 immediate. */
46059
46060 static bool
46061 has_immediate (rtx insn)
46062 {
46063 int num_imm_operand;
46064 int num_imm32_operand;
46065 int num_imm64_operand;
46066
46067 if (insn)
46068 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46069 &num_imm64_operand);
46070 return false;
46071 }
46072
46073 /* Return single or double path for instructions. */
46074
46075 static enum insn_path
46076 get_insn_path (rtx insn)
46077 {
46078 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46079
46080 if ((int)path == 0)
46081 return path_single;
46082
46083 if ((int)path == 1)
46084 return path_double;
46085
46086 return path_multi;
46087 }
46088
46089 /* Return insn dispatch group. */
46090
46091 static enum dispatch_group
46092 get_insn_group (rtx insn)
46093 {
46094 enum dispatch_group group = get_mem_group (insn);
46095 if (group)
46096 return group;
46097
46098 if (is_branch (insn))
46099 return disp_branch;
46100
46101 if (is_cmp (insn))
46102 return disp_cmp;
46103
46104 if (has_immediate (insn))
46105 return disp_imm;
46106
46107 if (is_prefetch (insn))
46108 return disp_prefetch;
46109
46110 return disp_no_group;
46111 }
46112
46113 /* Count number of GROUP restricted instructions in a dispatch
46114 window WINDOW_LIST. */
46115
46116 static int
46117 count_num_restricted (rtx insn, dispatch_windows *window_list)
46118 {
46119 enum dispatch_group group = get_insn_group (insn);
46120 int imm_size;
46121 int num_imm_operand;
46122 int num_imm32_operand;
46123 int num_imm64_operand;
46124
46125 if (group == disp_no_group)
46126 return 0;
46127
46128 if (group == disp_imm)
46129 {
46130 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46131 &num_imm64_operand);
46132 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46133 || num_imm_operand + window_list->num_imm > MAX_IMM
46134 || (num_imm32_operand > 0
46135 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46136 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46137 || (num_imm64_operand > 0
46138 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46139 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46140 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46141 && num_imm64_operand > 0
46142 && ((window_list->num_imm_64 > 0
46143 && window_list->num_insn >= 2)
46144 || window_list->num_insn >= 3)))
46145 return BIG;
46146
46147 return 1;
46148 }
46149
46150 if ((group == disp_load_store
46151 && (window_list->num_loads >= MAX_LOAD
46152 || window_list->num_stores >= MAX_STORE))
46153 || ((group == disp_load
46154 || group == disp_prefetch)
46155 && window_list->num_loads >= MAX_LOAD)
46156 || (group == disp_store
46157 && window_list->num_stores >= MAX_STORE))
46158 return BIG;
46159
46160 return 1;
46161 }
46162
46163 /* This function returns true if insn satisfies dispatch rules on the
46164 last window scheduled. */
46165
46166 static bool
46167 fits_dispatch_window (rtx insn)
46168 {
46169 dispatch_windows *window_list = dispatch_window_list;
46170 dispatch_windows *window_list_next = dispatch_window_list->next;
46171 unsigned int num_restrict;
46172 enum dispatch_group group = get_insn_group (insn);
46173 enum insn_path path = get_insn_path (insn);
46174 int sum;
46175
46176 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46177 instructions should be given the lowest priority in the
46178 scheduling process in Haifa scheduler to make sure they will be
46179 scheduled in the same dispatch window as the reference to them. */
46180 if (group == disp_jcc || group == disp_cmp)
46181 return false;
46182
46183 /* Check nonrestricted. */
46184 if (group == disp_no_group || group == disp_branch)
46185 return true;
46186
46187 /* Get last dispatch window. */
46188 if (window_list_next)
46189 window_list = window_list_next;
46190
46191 if (window_list->window_num == 1)
46192 {
46193 sum = window_list->prev->window_size + window_list->window_size;
46194
46195 if (sum == 32
46196 || (min_insn_size (insn) + sum) >= 48)
46197 /* Window 1 is full. Go for next window. */
46198 return true;
46199 }
46200
46201 num_restrict = count_num_restricted (insn, window_list);
46202
46203 if (num_restrict > num_allowable_groups[group])
46204 return false;
46205
46206 /* See if it fits in the first window. */
46207 if (window_list->window_num == 0)
46208 {
46209 /* The first widow should have only single and double path
46210 uops. */
46211 if (path == path_double
46212 && (window_list->num_uops + 2) > MAX_INSN)
46213 return false;
46214 else if (path != path_single)
46215 return false;
46216 }
46217 return true;
46218 }
46219
46220 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46221 dispatch window WINDOW_LIST. */
46222
46223 static void
46224 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46225 {
46226 int byte_len = min_insn_size (insn);
46227 int num_insn = window_list->num_insn;
46228 int imm_size;
46229 sched_insn_info *window = window_list->window;
46230 enum dispatch_group group = get_insn_group (insn);
46231 enum insn_path path = get_insn_path (insn);
46232 int num_imm_operand;
46233 int num_imm32_operand;
46234 int num_imm64_operand;
46235
46236 if (!window_list->violation && group != disp_cmp
46237 && !fits_dispatch_window (insn))
46238 window_list->violation = true;
46239
46240 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46241 &num_imm64_operand);
46242
46243 /* Initialize window with new instruction. */
46244 window[num_insn].insn = insn;
46245 window[num_insn].byte_len = byte_len;
46246 window[num_insn].group = group;
46247 window[num_insn].path = path;
46248 window[num_insn].imm_bytes = imm_size;
46249
46250 window_list->window_size += byte_len;
46251 window_list->num_insn = num_insn + 1;
46252 window_list->num_uops = window_list->num_uops + num_uops;
46253 window_list->imm_size += imm_size;
46254 window_list->num_imm += num_imm_operand;
46255 window_list->num_imm_32 += num_imm32_operand;
46256 window_list->num_imm_64 += num_imm64_operand;
46257
46258 if (group == disp_store)
46259 window_list->num_stores += 1;
46260 else if (group == disp_load
46261 || group == disp_prefetch)
46262 window_list->num_loads += 1;
46263 else if (group == disp_load_store)
46264 {
46265 window_list->num_stores += 1;
46266 window_list->num_loads += 1;
46267 }
46268 }
46269
46270 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46271 If the total bytes of instructions or the number of instructions in
46272 the window exceed allowable, it allocates a new window. */
46273
46274 static void
46275 add_to_dispatch_window (rtx insn)
46276 {
46277 int byte_len;
46278 dispatch_windows *window_list;
46279 dispatch_windows *next_list;
46280 dispatch_windows *window0_list;
46281 enum insn_path path;
46282 enum dispatch_group insn_group;
46283 bool insn_fits;
46284 int num_insn;
46285 int num_uops;
46286 int window_num;
46287 int insn_num_uops;
46288 int sum;
46289
46290 if (INSN_CODE (insn) < 0)
46291 return;
46292
46293 byte_len = min_insn_size (insn);
46294 window_list = dispatch_window_list;
46295 next_list = window_list->next;
46296 path = get_insn_path (insn);
46297 insn_group = get_insn_group (insn);
46298
46299 /* Get the last dispatch window. */
46300 if (next_list)
46301 window_list = dispatch_window_list->next;
46302
46303 if (path == path_single)
46304 insn_num_uops = 1;
46305 else if (path == path_double)
46306 insn_num_uops = 2;
46307 else
46308 insn_num_uops = (int) path;
46309
46310 /* If current window is full, get a new window.
46311 Window number zero is full, if MAX_INSN uops are scheduled in it.
46312 Window number one is full, if window zero's bytes plus window
46313 one's bytes is 32, or if the bytes of the new instruction added
46314 to the total makes it greater than 48, or it has already MAX_INSN
46315 instructions in it. */
46316 num_insn = window_list->num_insn;
46317 num_uops = window_list->num_uops;
46318 window_num = window_list->window_num;
46319 insn_fits = fits_dispatch_window (insn);
46320
46321 if (num_insn >= MAX_INSN
46322 || num_uops + insn_num_uops > MAX_INSN
46323 || !(insn_fits))
46324 {
46325 window_num = ~window_num & 1;
46326 window_list = allocate_next_window (window_num);
46327 }
46328
46329 if (window_num == 0)
46330 {
46331 add_insn_window (insn, window_list, insn_num_uops);
46332 if (window_list->num_insn >= MAX_INSN
46333 && insn_group == disp_branch)
46334 {
46335 process_end_window ();
46336 return;
46337 }
46338 }
46339 else if (window_num == 1)
46340 {
46341 window0_list = window_list->prev;
46342 sum = window0_list->window_size + window_list->window_size;
46343 if (sum == 32
46344 || (byte_len + sum) >= 48)
46345 {
46346 process_end_window ();
46347 window_list = dispatch_window_list;
46348 }
46349
46350 add_insn_window (insn, window_list, insn_num_uops);
46351 }
46352 else
46353 gcc_unreachable ();
46354
46355 if (is_end_basic_block (insn_group))
46356 {
46357 /* End of basic block is reached do end-basic-block process. */
46358 process_end_window ();
46359 return;
46360 }
46361 }
46362
46363 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46364
46365 DEBUG_FUNCTION static void
46366 debug_dispatch_window_file (FILE *file, int window_num)
46367 {
46368 dispatch_windows *list;
46369 int i;
46370
46371 if (window_num == 0)
46372 list = dispatch_window_list;
46373 else
46374 list = dispatch_window_list1;
46375
46376 fprintf (file, "Window #%d:\n", list->window_num);
46377 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46378 list->num_insn, list->num_uops, list->window_size);
46379 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46380 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46381
46382 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46383 list->num_stores);
46384 fprintf (file, " insn info:\n");
46385
46386 for (i = 0; i < MAX_INSN; i++)
46387 {
46388 if (!list->window[i].insn)
46389 break;
46390 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46391 i, group_name[list->window[i].group],
46392 i, (void *)list->window[i].insn,
46393 i, list->window[i].path,
46394 i, list->window[i].byte_len,
46395 i, list->window[i].imm_bytes);
46396 }
46397 }
46398
46399 /* Print to stdout a dispatch window. */
46400
46401 DEBUG_FUNCTION void
46402 debug_dispatch_window (int window_num)
46403 {
46404 debug_dispatch_window_file (stdout, window_num);
46405 }
46406
46407 /* Print INSN dispatch information to FILE. */
46408
46409 DEBUG_FUNCTION static void
46410 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46411 {
46412 int byte_len;
46413 enum insn_path path;
46414 enum dispatch_group group;
46415 int imm_size;
46416 int num_imm_operand;
46417 int num_imm32_operand;
46418 int num_imm64_operand;
46419
46420 if (INSN_CODE (insn) < 0)
46421 return;
46422
46423 byte_len = min_insn_size (insn);
46424 path = get_insn_path (insn);
46425 group = get_insn_group (insn);
46426 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46427 &num_imm64_operand);
46428
46429 fprintf (file, " insn info:\n");
46430 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46431 group_name[group], path, byte_len);
46432 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46433 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46434 }
46435
46436 /* Print to STDERR the status of the ready list with respect to
46437 dispatch windows. */
46438
46439 DEBUG_FUNCTION void
46440 debug_ready_dispatch (void)
46441 {
46442 int i;
46443 int no_ready = number_in_ready ();
46444
46445 fprintf (stdout, "Number of ready: %d\n", no_ready);
46446
46447 for (i = 0; i < no_ready; i++)
46448 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46449 }
46450
46451 /* This routine is the driver of the dispatch scheduler. */
46452
46453 static void
46454 do_dispatch (rtx insn, int mode)
46455 {
46456 if (mode == DISPATCH_INIT)
46457 init_dispatch_sched ();
46458 else if (mode == ADD_TO_DISPATCH_WINDOW)
46459 add_to_dispatch_window (insn);
46460 }
46461
46462 /* Return TRUE if Dispatch Scheduling is supported. */
46463
46464 static bool
46465 has_dispatch (rtx insn, int action)
46466 {
46467 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46468 && flag_dispatch_scheduler)
46469 switch (action)
46470 {
46471 default:
46472 return false;
46473
46474 case IS_DISPATCH_ON:
46475 return true;
46476 break;
46477
46478 case IS_CMP:
46479 return is_cmp (insn);
46480
46481 case DISPATCH_VIOLATION:
46482 return dispatch_violation ();
46483
46484 case FITS_DISPATCH_WINDOW:
46485 return fits_dispatch_window (insn);
46486 }
46487
46488 return false;
46489 }
46490
46491 /* Implementation of reassociation_width target hook used by
46492 reassoc phase to identify parallelism level in reassociated
46493 tree. Statements tree_code is passed in OPC. Arguments type
46494 is passed in MODE.
46495
46496 Currently parallel reassociation is enabled for Atom
46497 processors only and we set reassociation width to be 2
46498 because Atom may issue up to 2 instructions per cycle.
46499
46500 Return value should be fixed if parallel reassociation is
46501 enabled for other processors. */
46502
46503 static int
46504 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46505 {
46506 int res = 1;
46507
46508 /* Vector part. */
46509 if (VECTOR_MODE_P (mode))
46510 {
46511 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46512 return 2;
46513 else
46514 return 1;
46515 }
46516
46517 /* Scalar part. */
46518 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46519 res = 2;
46520 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46521 res = 2;
46522
46523 return res;
46524 }
46525
46526 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46527 place emms and femms instructions. */
46528
46529 static enum machine_mode
46530 ix86_preferred_simd_mode (enum machine_mode mode)
46531 {
46532 if (!TARGET_SSE)
46533 return word_mode;
46534
46535 switch (mode)
46536 {
46537 case QImode:
46538 return TARGET_AVX512BW ? V64QImode :
46539 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46540 case HImode:
46541 return TARGET_AVX512BW ? V32HImode :
46542 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46543 case SImode:
46544 return TARGET_AVX512F ? V16SImode :
46545 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46546 case DImode:
46547 return TARGET_AVX512F ? V8DImode :
46548 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46549
46550 case SFmode:
46551 if (TARGET_AVX512F)
46552 return V16SFmode;
46553 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46554 return V8SFmode;
46555 else
46556 return V4SFmode;
46557
46558 case DFmode:
46559 if (!TARGET_VECTORIZE_DOUBLE)
46560 return word_mode;
46561 else if (TARGET_AVX512F)
46562 return V8DFmode;
46563 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46564 return V4DFmode;
46565 else if (TARGET_SSE2)
46566 return V2DFmode;
46567 /* FALLTHRU */
46568
46569 default:
46570 return word_mode;
46571 }
46572 }
46573
46574 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46575 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46576 256bit and 128bit vectors. */
46577
46578 static unsigned int
46579 ix86_autovectorize_vector_sizes (void)
46580 {
46581 return TARGET_AVX512F ? 64 | 32 | 16 :
46582 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46583 }
46584
46585 \f
46586
46587 /* Return class of registers which could be used for pseudo of MODE
46588 and of class RCLASS for spilling instead of memory. Return NO_REGS
46589 if it is not possible or non-profitable. */
46590 static reg_class_t
46591 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46592 {
46593 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46594 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46595 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46596 return ALL_SSE_REGS;
46597 return NO_REGS;
46598 }
46599
46600 /* Implement targetm.vectorize.init_cost. */
46601
46602 static void *
46603 ix86_init_cost (struct loop *)
46604 {
46605 unsigned *cost = XNEWVEC (unsigned, 3);
46606 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46607 return cost;
46608 }
46609
46610 /* Implement targetm.vectorize.add_stmt_cost. */
46611
46612 static unsigned
46613 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46614 struct _stmt_vec_info *stmt_info, int misalign,
46615 enum vect_cost_model_location where)
46616 {
46617 unsigned *cost = (unsigned *) data;
46618 unsigned retval = 0;
46619
46620 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46621 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46622
46623 /* Statements in an inner loop relative to the loop being
46624 vectorized are weighted more heavily. The value here is
46625 arbitrary and could potentially be improved with analysis. */
46626 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46627 count *= 50; /* FIXME. */
46628
46629 retval = (unsigned) (count * stmt_cost);
46630
46631 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46632 for Silvermont as it has out of order integer pipeline and can execute
46633 2 scalar instruction per tick, but has in order SIMD pipeline. */
46634 if (TARGET_SILVERMONT || TARGET_INTEL)
46635 if (stmt_info && stmt_info->stmt)
46636 {
46637 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46638 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46639 retval = (retval * 17) / 10;
46640 }
46641
46642 cost[where] += retval;
46643
46644 return retval;
46645 }
46646
46647 /* Implement targetm.vectorize.finish_cost. */
46648
46649 static void
46650 ix86_finish_cost (void *data, unsigned *prologue_cost,
46651 unsigned *body_cost, unsigned *epilogue_cost)
46652 {
46653 unsigned *cost = (unsigned *) data;
46654 *prologue_cost = cost[vect_prologue];
46655 *body_cost = cost[vect_body];
46656 *epilogue_cost = cost[vect_epilogue];
46657 }
46658
46659 /* Implement targetm.vectorize.destroy_cost_data. */
46660
46661 static void
46662 ix86_destroy_cost_data (void *data)
46663 {
46664 free (data);
46665 }
46666
46667 /* Validate target specific memory model bits in VAL. */
46668
46669 static unsigned HOST_WIDE_INT
46670 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46671 {
46672 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46673 bool strong;
46674
46675 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46676 |MEMMODEL_MASK)
46677 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46678 {
46679 warning (OPT_Winvalid_memory_model,
46680 "Unknown architecture specific memory model");
46681 return MEMMODEL_SEQ_CST;
46682 }
46683 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46684 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46685 {
46686 warning (OPT_Winvalid_memory_model,
46687 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46688 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46689 }
46690 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46691 {
46692 warning (OPT_Winvalid_memory_model,
46693 "HLE_RELEASE not used with RELEASE or stronger memory model");
46694 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46695 }
46696 return val;
46697 }
46698
46699 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46700 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46701 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46702 or number of vecsize_mangle variants that should be emitted. */
46703
46704 static int
46705 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46706 struct cgraph_simd_clone *clonei,
46707 tree base_type, int num)
46708 {
46709 int ret = 1;
46710
46711 if (clonei->simdlen
46712 && (clonei->simdlen < 2
46713 || clonei->simdlen > 16
46714 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46715 {
46716 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46717 "unsupported simdlen %d", clonei->simdlen);
46718 return 0;
46719 }
46720
46721 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46722 if (TREE_CODE (ret_type) != VOID_TYPE)
46723 switch (TYPE_MODE (ret_type))
46724 {
46725 case QImode:
46726 case HImode:
46727 case SImode:
46728 case DImode:
46729 case SFmode:
46730 case DFmode:
46731 /* case SCmode: */
46732 /* case DCmode: */
46733 break;
46734 default:
46735 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46736 "unsupported return type %qT for simd\n", ret_type);
46737 return 0;
46738 }
46739
46740 tree t;
46741 int i;
46742
46743 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46744 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46745 switch (TYPE_MODE (TREE_TYPE (t)))
46746 {
46747 case QImode:
46748 case HImode:
46749 case SImode:
46750 case DImode:
46751 case SFmode:
46752 case DFmode:
46753 /* case SCmode: */
46754 /* case DCmode: */
46755 break;
46756 default:
46757 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46758 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46759 return 0;
46760 }
46761
46762 if (clonei->cilk_elemental)
46763 {
46764 /* Parse here processor clause. If not present, default to 'b'. */
46765 clonei->vecsize_mangle = 'b';
46766 }
46767 else if (!TREE_PUBLIC (node->decl))
46768 {
46769 /* If the function isn't exported, we can pick up just one ISA
46770 for the clones. */
46771 if (TARGET_AVX2)
46772 clonei->vecsize_mangle = 'd';
46773 else if (TARGET_AVX)
46774 clonei->vecsize_mangle = 'c';
46775 else
46776 clonei->vecsize_mangle = 'b';
46777 ret = 1;
46778 }
46779 else
46780 {
46781 clonei->vecsize_mangle = "bcd"[num];
46782 ret = 3;
46783 }
46784 switch (clonei->vecsize_mangle)
46785 {
46786 case 'b':
46787 clonei->vecsize_int = 128;
46788 clonei->vecsize_float = 128;
46789 break;
46790 case 'c':
46791 clonei->vecsize_int = 128;
46792 clonei->vecsize_float = 256;
46793 break;
46794 case 'd':
46795 clonei->vecsize_int = 256;
46796 clonei->vecsize_float = 256;
46797 break;
46798 }
46799 if (clonei->simdlen == 0)
46800 {
46801 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46802 clonei->simdlen = clonei->vecsize_int;
46803 else
46804 clonei->simdlen = clonei->vecsize_float;
46805 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46806 if (clonei->simdlen > 16)
46807 clonei->simdlen = 16;
46808 }
46809 return ret;
46810 }
46811
46812 /* Add target attribute to SIMD clone NODE if needed. */
46813
46814 static void
46815 ix86_simd_clone_adjust (struct cgraph_node *node)
46816 {
46817 const char *str = NULL;
46818 gcc_assert (node->decl == cfun->decl);
46819 switch (node->simdclone->vecsize_mangle)
46820 {
46821 case 'b':
46822 if (!TARGET_SSE2)
46823 str = "sse2";
46824 break;
46825 case 'c':
46826 if (!TARGET_AVX)
46827 str = "avx";
46828 break;
46829 case 'd':
46830 if (!TARGET_AVX2)
46831 str = "avx2";
46832 break;
46833 default:
46834 gcc_unreachable ();
46835 }
46836 if (str == NULL)
46837 return;
46838 push_cfun (NULL);
46839 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46840 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46841 gcc_assert (ok);
46842 pop_cfun ();
46843 ix86_previous_fndecl = NULL_TREE;
46844 ix86_set_current_function (node->decl);
46845 }
46846
46847 /* If SIMD clone NODE can't be used in a vectorized loop
46848 in current function, return -1, otherwise return a badness of using it
46849 (0 if it is most desirable from vecsize_mangle point of view, 1
46850 slightly less desirable, etc.). */
46851
46852 static int
46853 ix86_simd_clone_usable (struct cgraph_node *node)
46854 {
46855 switch (node->simdclone->vecsize_mangle)
46856 {
46857 case 'b':
46858 if (!TARGET_SSE2)
46859 return -1;
46860 if (!TARGET_AVX)
46861 return 0;
46862 return TARGET_AVX2 ? 2 : 1;
46863 case 'c':
46864 if (!TARGET_AVX)
46865 return -1;
46866 return TARGET_AVX2 ? 1 : 0;
46867 break;
46868 case 'd':
46869 if (!TARGET_AVX2)
46870 return -1;
46871 return 0;
46872 default:
46873 gcc_unreachable ();
46874 }
46875 }
46876
46877 /* This function gives out the number of memory references.
46878 This value determines the unrolling factor for
46879 bdver3 and bdver4 architectures. */
46880
46881 static int
46882 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46883 {
46884 if (*x != NULL_RTX && MEM_P (*x))
46885 {
46886 enum machine_mode mode;
46887 unsigned int n_words;
46888
46889 mode = GET_MODE (*x);
46890 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46891
46892 if (n_words > 4)
46893 (*mem_count)+=2;
46894 else
46895 (*mem_count)+=1;
46896 }
46897 return 0;
46898 }
46899
46900 /* This function adjusts the unroll factor based on
46901 the hardware capabilities. For ex, bdver3 has
46902 a loop buffer which makes unrolling of smaller
46903 loops less important. This function decides the
46904 unroll factor using number of memory references
46905 (value 32 is used) as a heuristic. */
46906
46907 static unsigned
46908 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46909 {
46910 basic_block *bbs;
46911 rtx_insn *insn;
46912 unsigned i;
46913 unsigned mem_count = 0;
46914
46915 if (!TARGET_ADJUST_UNROLL)
46916 return nunroll;
46917
46918 /* Count the number of memory references within the loop body. */
46919 bbs = get_loop_body (loop);
46920 for (i = 0; i < loop->num_nodes; i++)
46921 {
46922 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46923 if (NONDEBUG_INSN_P (insn))
46924 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
46925 &mem_count);
46926 }
46927 free (bbs);
46928
46929 if (mem_count && mem_count <=32)
46930 return 32/mem_count;
46931
46932 return nunroll;
46933 }
46934
46935
46936 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46937
46938 static bool
46939 ix86_float_exceptions_rounding_supported_p (void)
46940 {
46941 /* For x87 floating point with standard excess precision handling,
46942 there is no adddf3 pattern (since x87 floating point only has
46943 XFmode operations) so the default hook implementation gets this
46944 wrong. */
46945 return TARGET_80387 || TARGET_SSE_MATH;
46946 }
46947
46948 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46949
46950 static void
46951 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46952 {
46953 if (!TARGET_80387 && !TARGET_SSE_MATH)
46954 return;
46955 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46956 if (TARGET_80387)
46957 {
46958 tree fenv_index_type = build_index_type (size_int (6));
46959 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46960 tree fenv_var = create_tmp_var (fenv_type, NULL);
46961 mark_addressable (fenv_var);
46962 tree fenv_ptr = build_pointer_type (fenv_type);
46963 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46964 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46965 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46966 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46967 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46968 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46969 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46970 tree hold_fnclex = build_call_expr (fnclex, 0);
46971 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46972 hold_fnclex);
46973 *clear = build_call_expr (fnclex, 0);
46974 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46975 tree fnstsw_call = build_call_expr (fnstsw, 0);
46976 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
46977 sw_var, fnstsw_call);
46978 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46979 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46980 exceptions_var, exceptions_x87);
46981 *update = build2 (COMPOUND_EXPR, integer_type_node,
46982 sw_mod, update_mod);
46983 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46984 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46985 }
46986 if (TARGET_SSE_MATH)
46987 {
46988 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46989 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46990 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46991 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46992 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46993 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46994 mxcsr_orig_var, stmxcsr_hold_call);
46995 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46996 mxcsr_orig_var,
46997 build_int_cst (unsigned_type_node, 0x1f80));
46998 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46999 build_int_cst (unsigned_type_node, 0xffffffc0));
47000 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47001 mxcsr_mod_var, hold_mod_val);
47002 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47003 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47004 hold_assign_orig, hold_assign_mod);
47005 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47006 ldmxcsr_hold_call);
47007 if (*hold)
47008 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47009 else
47010 *hold = hold_all;
47011 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47012 if (*clear)
47013 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47014 ldmxcsr_clear_call);
47015 else
47016 *clear = ldmxcsr_clear_call;
47017 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47018 tree exceptions_sse = fold_convert (integer_type_node,
47019 stxmcsr_update_call);
47020 if (*update)
47021 {
47022 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47023 exceptions_var, exceptions_sse);
47024 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47025 exceptions_var, exceptions_mod);
47026 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47027 exceptions_assign);
47028 }
47029 else
47030 *update = build2 (MODIFY_EXPR, integer_type_node,
47031 exceptions_var, exceptions_sse);
47032 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47033 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47034 ldmxcsr_update_call);
47035 }
47036 tree atomic_feraiseexcept
47037 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47038 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47039 1, exceptions_var);
47040 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47041 atomic_feraiseexcept_call);
47042 }
47043
47044 /* Initialize the GCC target structure. */
47045 #undef TARGET_RETURN_IN_MEMORY
47046 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47047
47048 #undef TARGET_LEGITIMIZE_ADDRESS
47049 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47050
47051 #undef TARGET_ATTRIBUTE_TABLE
47052 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47053 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47054 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47055 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47056 # undef TARGET_MERGE_DECL_ATTRIBUTES
47057 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47058 #endif
47059
47060 #undef TARGET_COMP_TYPE_ATTRIBUTES
47061 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47062
47063 #undef TARGET_INIT_BUILTINS
47064 #define TARGET_INIT_BUILTINS ix86_init_builtins
47065 #undef TARGET_BUILTIN_DECL
47066 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47067 #undef TARGET_EXPAND_BUILTIN
47068 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47069
47070 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47071 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47072 ix86_builtin_vectorized_function
47073
47074 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47075 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47076
47077 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47078 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47079
47080 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47081 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47082
47083 #undef TARGET_BUILTIN_RECIPROCAL
47084 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47085
47086 #undef TARGET_ASM_FUNCTION_EPILOGUE
47087 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47088
47089 #undef TARGET_ENCODE_SECTION_INFO
47090 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47091 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47092 #else
47093 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47094 #endif
47095
47096 #undef TARGET_ASM_OPEN_PAREN
47097 #define TARGET_ASM_OPEN_PAREN ""
47098 #undef TARGET_ASM_CLOSE_PAREN
47099 #define TARGET_ASM_CLOSE_PAREN ""
47100
47101 #undef TARGET_ASM_BYTE_OP
47102 #define TARGET_ASM_BYTE_OP ASM_BYTE
47103
47104 #undef TARGET_ASM_ALIGNED_HI_OP
47105 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47106 #undef TARGET_ASM_ALIGNED_SI_OP
47107 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47108 #ifdef ASM_QUAD
47109 #undef TARGET_ASM_ALIGNED_DI_OP
47110 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47111 #endif
47112
47113 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47114 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47115
47116 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47117 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47118
47119 #undef TARGET_ASM_UNALIGNED_HI_OP
47120 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47121 #undef TARGET_ASM_UNALIGNED_SI_OP
47122 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47123 #undef TARGET_ASM_UNALIGNED_DI_OP
47124 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47125
47126 #undef TARGET_PRINT_OPERAND
47127 #define TARGET_PRINT_OPERAND ix86_print_operand
47128 #undef TARGET_PRINT_OPERAND_ADDRESS
47129 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47130 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47131 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47132 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47133 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47134
47135 #undef TARGET_SCHED_INIT_GLOBAL
47136 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47137 #undef TARGET_SCHED_ADJUST_COST
47138 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47139 #undef TARGET_SCHED_ISSUE_RATE
47140 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47141 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47142 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47143 ia32_multipass_dfa_lookahead
47144 #undef TARGET_SCHED_MACRO_FUSION_P
47145 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47146 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47147 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47148
47149 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47150 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47151
47152 #undef TARGET_MEMMODEL_CHECK
47153 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47154
47155 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47156 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47157
47158 #ifdef HAVE_AS_TLS
47159 #undef TARGET_HAVE_TLS
47160 #define TARGET_HAVE_TLS true
47161 #endif
47162 #undef TARGET_CANNOT_FORCE_CONST_MEM
47163 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47164 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47165 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47166
47167 #undef TARGET_DELEGITIMIZE_ADDRESS
47168 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47169
47170 #undef TARGET_MS_BITFIELD_LAYOUT_P
47171 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47172
47173 #if TARGET_MACHO
47174 #undef TARGET_BINDS_LOCAL_P
47175 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47176 #endif
47177 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47178 #undef TARGET_BINDS_LOCAL_P
47179 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47180 #endif
47181
47182 #undef TARGET_ASM_OUTPUT_MI_THUNK
47183 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47184 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47185 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47186
47187 #undef TARGET_ASM_FILE_START
47188 #define TARGET_ASM_FILE_START x86_file_start
47189
47190 #undef TARGET_OPTION_OVERRIDE
47191 #define TARGET_OPTION_OVERRIDE ix86_option_override
47192
47193 #undef TARGET_REGISTER_MOVE_COST
47194 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47195 #undef TARGET_MEMORY_MOVE_COST
47196 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47197 #undef TARGET_RTX_COSTS
47198 #define TARGET_RTX_COSTS ix86_rtx_costs
47199 #undef TARGET_ADDRESS_COST
47200 #define TARGET_ADDRESS_COST ix86_address_cost
47201
47202 #undef TARGET_FIXED_CONDITION_CODE_REGS
47203 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47204 #undef TARGET_CC_MODES_COMPATIBLE
47205 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47206
47207 #undef TARGET_MACHINE_DEPENDENT_REORG
47208 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47209
47210 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47211 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47212
47213 #undef TARGET_BUILD_BUILTIN_VA_LIST
47214 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47215
47216 #undef TARGET_FOLD_BUILTIN
47217 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47218
47219 #undef TARGET_COMPARE_VERSION_PRIORITY
47220 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47221
47222 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47223 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47224 ix86_generate_version_dispatcher_body
47225
47226 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47227 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47228 ix86_get_function_versions_dispatcher
47229
47230 #undef TARGET_ENUM_VA_LIST_P
47231 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47232
47233 #undef TARGET_FN_ABI_VA_LIST
47234 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47235
47236 #undef TARGET_CANONICAL_VA_LIST_TYPE
47237 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47238
47239 #undef TARGET_EXPAND_BUILTIN_VA_START
47240 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47241
47242 #undef TARGET_MD_ASM_CLOBBERS
47243 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47244
47245 #undef TARGET_PROMOTE_PROTOTYPES
47246 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47247 #undef TARGET_SETUP_INCOMING_VARARGS
47248 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47249 #undef TARGET_MUST_PASS_IN_STACK
47250 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47251 #undef TARGET_FUNCTION_ARG_ADVANCE
47252 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47253 #undef TARGET_FUNCTION_ARG
47254 #define TARGET_FUNCTION_ARG ix86_function_arg
47255 #undef TARGET_FUNCTION_ARG_BOUNDARY
47256 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47257 #undef TARGET_PASS_BY_REFERENCE
47258 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47259 #undef TARGET_INTERNAL_ARG_POINTER
47260 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47261 #undef TARGET_UPDATE_STACK_BOUNDARY
47262 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47263 #undef TARGET_GET_DRAP_RTX
47264 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47265 #undef TARGET_STRICT_ARGUMENT_NAMING
47266 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47267 #undef TARGET_STATIC_CHAIN
47268 #define TARGET_STATIC_CHAIN ix86_static_chain
47269 #undef TARGET_TRAMPOLINE_INIT
47270 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47271 #undef TARGET_RETURN_POPS_ARGS
47272 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47273
47274 #undef TARGET_LEGITIMATE_COMBINED_INSN
47275 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47276
47277 #undef TARGET_ASAN_SHADOW_OFFSET
47278 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47279
47280 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47281 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47282
47283 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47284 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47285
47286 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47287 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47288
47289 #undef TARGET_C_MODE_FOR_SUFFIX
47290 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47291
47292 #ifdef HAVE_AS_TLS
47293 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47294 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47295 #endif
47296
47297 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47298 #undef TARGET_INSERT_ATTRIBUTES
47299 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47300 #endif
47301
47302 #undef TARGET_MANGLE_TYPE
47303 #define TARGET_MANGLE_TYPE ix86_mangle_type
47304
47305 #if !TARGET_MACHO
47306 #undef TARGET_STACK_PROTECT_FAIL
47307 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47308 #endif
47309
47310 #undef TARGET_FUNCTION_VALUE
47311 #define TARGET_FUNCTION_VALUE ix86_function_value
47312
47313 #undef TARGET_FUNCTION_VALUE_REGNO_P
47314 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47315
47316 #undef TARGET_PROMOTE_FUNCTION_MODE
47317 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47318
47319 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47320 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47321
47322 #undef TARGET_INSTANTIATE_DECLS
47323 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47324
47325 #undef TARGET_SECONDARY_RELOAD
47326 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47327
47328 #undef TARGET_CLASS_MAX_NREGS
47329 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47330
47331 #undef TARGET_PREFERRED_RELOAD_CLASS
47332 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47333 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47334 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47335 #undef TARGET_CLASS_LIKELY_SPILLED_P
47336 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47337
47338 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47339 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47340 ix86_builtin_vectorization_cost
47341 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47342 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47343 ix86_vectorize_vec_perm_const_ok
47344 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47345 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47346 ix86_preferred_simd_mode
47347 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47348 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47349 ix86_autovectorize_vector_sizes
47350 #undef TARGET_VECTORIZE_INIT_COST
47351 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47352 #undef TARGET_VECTORIZE_ADD_STMT_COST
47353 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47354 #undef TARGET_VECTORIZE_FINISH_COST
47355 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47356 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47357 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47358
47359 #undef TARGET_SET_CURRENT_FUNCTION
47360 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47361
47362 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47363 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47364
47365 #undef TARGET_OPTION_SAVE
47366 #define TARGET_OPTION_SAVE ix86_function_specific_save
47367
47368 #undef TARGET_OPTION_RESTORE
47369 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47370
47371 #undef TARGET_OPTION_PRINT
47372 #define TARGET_OPTION_PRINT ix86_function_specific_print
47373
47374 #undef TARGET_OPTION_FUNCTION_VERSIONS
47375 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47376
47377 #undef TARGET_CAN_INLINE_P
47378 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47379
47380 #undef TARGET_EXPAND_TO_RTL_HOOK
47381 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47382
47383 #undef TARGET_LEGITIMATE_ADDRESS_P
47384 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47385
47386 #undef TARGET_LRA_P
47387 #define TARGET_LRA_P hook_bool_void_true
47388
47389 #undef TARGET_REGISTER_PRIORITY
47390 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47391
47392 #undef TARGET_REGISTER_USAGE_LEVELING_P
47393 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47394
47395 #undef TARGET_LEGITIMATE_CONSTANT_P
47396 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47397
47398 #undef TARGET_FRAME_POINTER_REQUIRED
47399 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47400
47401 #undef TARGET_CAN_ELIMINATE
47402 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47403
47404 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47405 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47406
47407 #undef TARGET_ASM_CODE_END
47408 #define TARGET_ASM_CODE_END ix86_code_end
47409
47410 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47411 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47412
47413 #if TARGET_MACHO
47414 #undef TARGET_INIT_LIBFUNCS
47415 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47416 #endif
47417
47418 #undef TARGET_LOOP_UNROLL_ADJUST
47419 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47420
47421 #undef TARGET_SPILL_CLASS
47422 #define TARGET_SPILL_CLASS ix86_spill_class
47423
47424 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47425 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47426 ix86_simd_clone_compute_vecsize_and_simdlen
47427
47428 #undef TARGET_SIMD_CLONE_ADJUST
47429 #define TARGET_SIMD_CLONE_ADJUST \
47430 ix86_simd_clone_adjust
47431
47432 #undef TARGET_SIMD_CLONE_USABLE
47433 #define TARGET_SIMD_CLONE_USABLE \
47434 ix86_simd_clone_usable
47435
47436 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47437 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47438 ix86_float_exceptions_rounding_supported_p
47439
47440 #undef TARGET_MODE_EMIT
47441 #define TARGET_MODE_EMIT ix86_emit_mode_set
47442
47443 #undef TARGET_MODE_NEEDED
47444 #define TARGET_MODE_NEEDED ix86_mode_needed
47445
47446 #undef TARGET_MODE_AFTER
47447 #define TARGET_MODE_AFTER ix86_mode_after
47448
47449 #undef TARGET_MODE_ENTRY
47450 #define TARGET_MODE_ENTRY ix86_mode_entry
47451
47452 #undef TARGET_MODE_EXIT
47453 #define TARGET_MODE_EXIT ix86_mode_exit
47454
47455 #undef TARGET_MODE_PRIORITY
47456 #define TARGET_MODE_PRIORITY ix86_mode_priority
47457
47458 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47459 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47460
47461 struct gcc_target targetm = TARGET_INITIALIZER;
47462 \f
47463 #include "gt-i386.h"