]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
use templates instead of gengtype for typed allocation functions
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 true, /* has_execute */
2526 TV_NONE, /* tv_id */
2527 0, /* properties_required */
2528 0, /* properties_provided */
2529 0, /* properties_destroyed */
2530 0, /* todo_flags_start */
2531 TODO_df_finish, /* todo_flags_finish */
2532 };
2533
2534 class pass_insert_vzeroupper : public rtl_opt_pass
2535 {
2536 public:
2537 pass_insert_vzeroupper(gcc::context *ctxt)
2538 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2539 {}
2540
2541 /* opt_pass methods: */
2542 virtual bool gate (function *)
2543 {
2544 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2545 }
2546
2547 virtual unsigned int execute (function *)
2548 {
2549 return rest_of_handle_insert_vzeroupper ();
2550 }
2551
2552 }; // class pass_insert_vzeroupper
2553
2554 } // anon namespace
2555
2556 rtl_opt_pass *
2557 make_pass_insert_vzeroupper (gcc::context *ctxt)
2558 {
2559 return new pass_insert_vzeroupper (ctxt);
2560 }
2561
2562 /* Return true if a red-zone is in use. */
2563
2564 static inline bool
2565 ix86_using_red_zone (void)
2566 {
2567 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2568 }
2569 \f
2570 /* Return a string that documents the current -m options. The caller is
2571 responsible for freeing the string. */
2572
2573 static char *
2574 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2575 const char *tune, enum fpmath_unit fpmath,
2576 bool add_nl_p)
2577 {
2578 struct ix86_target_opts
2579 {
2580 const char *option; /* option string */
2581 HOST_WIDE_INT mask; /* isa mask options */
2582 };
2583
2584 /* This table is ordered so that options like -msse4.2 that imply
2585 preceding options while match those first. */
2586 static struct ix86_target_opts isa_opts[] =
2587 {
2588 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2589 { "-mfma", OPTION_MASK_ISA_FMA },
2590 { "-mxop", OPTION_MASK_ISA_XOP },
2591 { "-mlwp", OPTION_MASK_ISA_LWP },
2592 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2593 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2594 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2595 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2596 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2597 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2598 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2599 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2600 { "-msse3", OPTION_MASK_ISA_SSE3 },
2601 { "-msse2", OPTION_MASK_ISA_SSE2 },
2602 { "-msse", OPTION_MASK_ISA_SSE },
2603 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2604 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2605 { "-mmmx", OPTION_MASK_ISA_MMX },
2606 { "-mabm", OPTION_MASK_ISA_ABM },
2607 { "-mbmi", OPTION_MASK_ISA_BMI },
2608 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2609 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2610 { "-mhle", OPTION_MASK_ISA_HLE },
2611 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2612 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2613 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2614 { "-madx", OPTION_MASK_ISA_ADX },
2615 { "-mtbm", OPTION_MASK_ISA_TBM },
2616 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2617 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2618 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2619 { "-maes", OPTION_MASK_ISA_AES },
2620 { "-msha", OPTION_MASK_ISA_SHA },
2621 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2622 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2623 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2624 { "-mf16c", OPTION_MASK_ISA_F16C },
2625 { "-mrtm", OPTION_MASK_ISA_RTM },
2626 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2627 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2628 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2629 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2630 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2631 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2632 };
2633
2634 /* Flag options. */
2635 static struct ix86_target_opts flag_opts[] =
2636 {
2637 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2638 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2639 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2640 { "-m80387", MASK_80387 },
2641 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2642 { "-malign-double", MASK_ALIGN_DOUBLE },
2643 { "-mcld", MASK_CLD },
2644 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2645 { "-mieee-fp", MASK_IEEE_FP },
2646 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2647 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2648 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2649 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2650 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2651 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2652 { "-mno-red-zone", MASK_NO_RED_ZONE },
2653 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2654 { "-mrecip", MASK_RECIP },
2655 { "-mrtd", MASK_RTD },
2656 { "-msseregparm", MASK_SSEREGPARM },
2657 { "-mstack-arg-probe", MASK_STACK_PROBE },
2658 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2659 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2660 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2661 { "-mvzeroupper", MASK_VZEROUPPER },
2662 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2663 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2664 { "-mprefer-avx128", MASK_PREFER_AVX128},
2665 };
2666
2667 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2668
2669 char isa_other[40];
2670 char target_other[40];
2671 unsigned num = 0;
2672 unsigned i, j;
2673 char *ret;
2674 char *ptr;
2675 size_t len;
2676 size_t line_len;
2677 size_t sep_len;
2678 const char *abi;
2679
2680 memset (opts, '\0', sizeof (opts));
2681
2682 /* Add -march= option. */
2683 if (arch)
2684 {
2685 opts[num][0] = "-march=";
2686 opts[num++][1] = arch;
2687 }
2688
2689 /* Add -mtune= option. */
2690 if (tune)
2691 {
2692 opts[num][0] = "-mtune=";
2693 opts[num++][1] = tune;
2694 }
2695
2696 /* Add -m32/-m64/-mx32. */
2697 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2698 {
2699 if ((isa & OPTION_MASK_ABI_64) != 0)
2700 abi = "-m64";
2701 else
2702 abi = "-mx32";
2703 isa &= ~ (OPTION_MASK_ISA_64BIT
2704 | OPTION_MASK_ABI_64
2705 | OPTION_MASK_ABI_X32);
2706 }
2707 else
2708 abi = "-m32";
2709 opts[num++][0] = abi;
2710
2711 /* Pick out the options in isa options. */
2712 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2713 {
2714 if ((isa & isa_opts[i].mask) != 0)
2715 {
2716 opts[num++][0] = isa_opts[i].option;
2717 isa &= ~ isa_opts[i].mask;
2718 }
2719 }
2720
2721 if (isa && add_nl_p)
2722 {
2723 opts[num++][0] = isa_other;
2724 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2725 isa);
2726 }
2727
2728 /* Add flag options. */
2729 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2730 {
2731 if ((flags & flag_opts[i].mask) != 0)
2732 {
2733 opts[num++][0] = flag_opts[i].option;
2734 flags &= ~ flag_opts[i].mask;
2735 }
2736 }
2737
2738 if (flags && add_nl_p)
2739 {
2740 opts[num++][0] = target_other;
2741 sprintf (target_other, "(other flags: %#x)", flags);
2742 }
2743
2744 /* Add -fpmath= option. */
2745 if (fpmath)
2746 {
2747 opts[num][0] = "-mfpmath=";
2748 switch ((int) fpmath)
2749 {
2750 case FPMATH_387:
2751 opts[num++][1] = "387";
2752 break;
2753
2754 case FPMATH_SSE:
2755 opts[num++][1] = "sse";
2756 break;
2757
2758 case FPMATH_387 | FPMATH_SSE:
2759 opts[num++][1] = "sse+387";
2760 break;
2761
2762 default:
2763 gcc_unreachable ();
2764 }
2765 }
2766
2767 /* Any options? */
2768 if (num == 0)
2769 return NULL;
2770
2771 gcc_assert (num < ARRAY_SIZE (opts));
2772
2773 /* Size the string. */
2774 len = 0;
2775 sep_len = (add_nl_p) ? 3 : 1;
2776 for (i = 0; i < num; i++)
2777 {
2778 len += sep_len;
2779 for (j = 0; j < 2; j++)
2780 if (opts[i][j])
2781 len += strlen (opts[i][j]);
2782 }
2783
2784 /* Build the string. */
2785 ret = ptr = (char *) xmalloc (len);
2786 line_len = 0;
2787
2788 for (i = 0; i < num; i++)
2789 {
2790 size_t len2[2];
2791
2792 for (j = 0; j < 2; j++)
2793 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2794
2795 if (i != 0)
2796 {
2797 *ptr++ = ' ';
2798 line_len++;
2799
2800 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2801 {
2802 *ptr++ = '\\';
2803 *ptr++ = '\n';
2804 line_len = 0;
2805 }
2806 }
2807
2808 for (j = 0; j < 2; j++)
2809 if (opts[i][j])
2810 {
2811 memcpy (ptr, opts[i][j], len2[j]);
2812 ptr += len2[j];
2813 line_len += len2[j];
2814 }
2815 }
2816
2817 *ptr = '\0';
2818 gcc_assert (ret + len >= ptr);
2819
2820 return ret;
2821 }
2822
2823 /* Return true, if profiling code should be emitted before
2824 prologue. Otherwise it returns false.
2825 Note: For x86 with "hotfix" it is sorried. */
2826 static bool
2827 ix86_profile_before_prologue (void)
2828 {
2829 return flag_fentry != 0;
2830 }
2831
2832 /* Function that is callable from the debugger to print the current
2833 options. */
2834 void ATTRIBUTE_UNUSED
2835 ix86_debug_options (void)
2836 {
2837 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2838 ix86_arch_string, ix86_tune_string,
2839 ix86_fpmath, true);
2840
2841 if (opts)
2842 {
2843 fprintf (stderr, "%s\n\n", opts);
2844 free (opts);
2845 }
2846 else
2847 fputs ("<no options>\n\n", stderr);
2848
2849 return;
2850 }
2851
2852 static const char *stringop_alg_names[] = {
2853 #define DEF_ENUM
2854 #define DEF_ALG(alg, name) #name,
2855 #include "stringop.def"
2856 #undef DEF_ENUM
2857 #undef DEF_ALG
2858 };
2859
2860 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2861 The string is of the following form (or comma separated list of it):
2862
2863 strategy_alg:max_size:[align|noalign]
2864
2865 where the full size range for the strategy is either [0, max_size] or
2866 [min_size, max_size], in which min_size is the max_size + 1 of the
2867 preceding range. The last size range must have max_size == -1.
2868
2869 Examples:
2870
2871 1.
2872 -mmemcpy-strategy=libcall:-1:noalign
2873
2874 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2875
2876
2877 2.
2878 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2879
2880 This is to tell the compiler to use the following strategy for memset
2881 1) when the expected size is between [1, 16], use rep_8byte strategy;
2882 2) when the size is between [17, 2048], use vector_loop;
2883 3) when the size is > 2048, use libcall. */
2884
2885 struct stringop_size_range
2886 {
2887 int max;
2888 stringop_alg alg;
2889 bool noalign;
2890 };
2891
2892 static void
2893 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2894 {
2895 const struct stringop_algs *default_algs;
2896 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2897 char *curr_range_str, *next_range_str;
2898 int i = 0, n = 0;
2899
2900 if (is_memset)
2901 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2902 else
2903 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2904
2905 curr_range_str = strategy_str;
2906
2907 do
2908 {
2909 int maxs;
2910 char alg_name[128];
2911 char align[16];
2912 next_range_str = strchr (curr_range_str, ',');
2913 if (next_range_str)
2914 *next_range_str++ = '\0';
2915
2916 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2917 alg_name, &maxs, align))
2918 {
2919 error ("wrong arg %s to option %s", curr_range_str,
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2922 }
2923
2924 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2925 {
2926 error ("size ranges of option %s should be increasing",
2927 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2928 return;
2929 }
2930
2931 for (i = 0; i < last_alg; i++)
2932 if (!strcmp (alg_name, stringop_alg_names[i]))
2933 break;
2934
2935 if (i == last_alg)
2936 {
2937 error ("wrong stringop strategy name %s specified for option %s",
2938 alg_name,
2939 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2940 return;
2941 }
2942
2943 input_ranges[n].max = maxs;
2944 input_ranges[n].alg = (stringop_alg) i;
2945 if (!strcmp (align, "align"))
2946 input_ranges[n].noalign = false;
2947 else if (!strcmp (align, "noalign"))
2948 input_ranges[n].noalign = true;
2949 else
2950 {
2951 error ("unknown alignment %s specified for option %s",
2952 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2954 }
2955 n++;
2956 curr_range_str = next_range_str;
2957 }
2958 while (curr_range_str);
2959
2960 if (input_ranges[n - 1].max != -1)
2961 {
2962 error ("the max value for the last size range should be -1"
2963 " for option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2966 }
2967
2968 if (n > MAX_STRINGOP_ALGS)
2969 {
2970 error ("too many size ranges specified in option %s",
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 return;
2973 }
2974
2975 /* Now override the default algs array. */
2976 for (i = 0; i < n; i++)
2977 {
2978 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2979 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2980 = input_ranges[i].alg;
2981 *const_cast<int *>(&default_algs->size[i].noalign)
2982 = input_ranges[i].noalign;
2983 }
2984 }
2985
2986 \f
2987 /* parse -mtune-ctrl= option. When DUMP is true,
2988 print the features that are explicitly set. */
2989
2990 static void
2991 parse_mtune_ctrl_str (bool dump)
2992 {
2993 if (!ix86_tune_ctrl_string)
2994 return;
2995
2996 char *next_feature_string = NULL;
2997 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2998 char *orig = curr_feature_string;
2999 int i;
3000 do
3001 {
3002 bool clear = false;
3003
3004 next_feature_string = strchr (curr_feature_string, ',');
3005 if (next_feature_string)
3006 *next_feature_string++ = '\0';
3007 if (*curr_feature_string == '^')
3008 {
3009 curr_feature_string++;
3010 clear = true;
3011 }
3012 for (i = 0; i < X86_TUNE_LAST; i++)
3013 {
3014 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3015 {
3016 ix86_tune_features[i] = !clear;
3017 if (dump)
3018 fprintf (stderr, "Explicitly %s feature %s\n",
3019 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3020 break;
3021 }
3022 }
3023 if (i == X86_TUNE_LAST)
3024 error ("Unknown parameter to option -mtune-ctrl: %s",
3025 clear ? curr_feature_string - 1 : curr_feature_string);
3026 curr_feature_string = next_feature_string;
3027 }
3028 while (curr_feature_string);
3029 free (orig);
3030 }
3031
3032 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3033 processor type. */
3034
3035 static void
3036 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3037 {
3038 unsigned int ix86_tune_mask = 1u << ix86_tune;
3039 int i;
3040
3041 for (i = 0; i < X86_TUNE_LAST; ++i)
3042 {
3043 if (ix86_tune_no_default)
3044 ix86_tune_features[i] = 0;
3045 else
3046 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3047 }
3048
3049 if (dump)
3050 {
3051 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3052 for (i = 0; i < X86_TUNE_LAST; i++)
3053 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3054 ix86_tune_features[i] ? "on" : "off");
3055 }
3056
3057 parse_mtune_ctrl_str (dump);
3058 }
3059
3060
3061 /* Override various settings based on options. If MAIN_ARGS_P, the
3062 options are from the command line, otherwise they are from
3063 attributes. */
3064
3065 static void
3066 ix86_option_override_internal (bool main_args_p,
3067 struct gcc_options *opts,
3068 struct gcc_options *opts_set)
3069 {
3070 int i;
3071 unsigned int ix86_arch_mask;
3072 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3073 const char *prefix;
3074 const char *suffix;
3075 const char *sw;
3076
3077 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3078 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3079 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3080 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3081 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3082 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3083 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3084 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3085 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3086 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3087 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3088 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3089 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3090 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3091 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3092 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3093 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3094 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3095 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3096 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3097 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3098 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3099 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3100 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3101 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3102 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3103 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3104 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3105 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3106 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3107 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3108 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3109 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3110 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3111 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3112 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3113 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3114 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3115 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3116 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3117 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3118 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3119 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3120 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3121 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3122 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3123 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3124 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3125 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3126
3127 #define PTA_CORE2 \
3128 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3129 | PTA_CX16 | PTA_FXSR)
3130 #define PTA_NEHALEM \
3131 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3132 #define PTA_WESTMERE \
3133 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3134 #define PTA_SANDYBRIDGE \
3135 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3136 #define PTA_IVYBRIDGE \
3137 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3138 #define PTA_HASWELL \
3139 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3140 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3141 #define PTA_BROADWELL \
3142 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3143 #define PTA_BONNELL \
3144 (PTA_CORE2 | PTA_MOVBE)
3145 #define PTA_SILVERMONT \
3146 (PTA_WESTMERE | PTA_MOVBE)
3147
3148 /* if this reaches 64, need to widen struct pta flags below */
3149
3150 static struct pta
3151 {
3152 const char *const name; /* processor name or nickname. */
3153 const enum processor_type processor;
3154 const enum attr_cpu schedule;
3155 const unsigned HOST_WIDE_INT flags;
3156 }
3157 const processor_alias_table[] =
3158 {
3159 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3160 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3161 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3162 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3164 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3165 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3166 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_FXSR},
3169 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3170 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3172 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3175 PTA_MMX | PTA_SSE | PTA_FXSR},
3176 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3177 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3178 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3179 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3180 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3181 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3182 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3183 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3184 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3185 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3186 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3187 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3188 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3189 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3191 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3192 PTA_SANDYBRIDGE},
3193 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3194 PTA_SANDYBRIDGE},
3195 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3196 PTA_IVYBRIDGE},
3197 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3198 PTA_IVYBRIDGE},
3199 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3200 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3202 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3203 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3205 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3207 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3209 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3210 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3211 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3215 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3216 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3217 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3218 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3219 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3220 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3221 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3222 {"x86-64", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3224 {"k8", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"opteron", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"athlon64", PROCESSOR_K8, CPU_K8,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3238 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3239 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3241 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3242 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3243 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3244 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3245 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3246 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3247 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3248 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3249 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3250 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3251 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3256 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3261 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3262 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3263 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3264 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3265 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3266 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3267 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3268 | PTA_XSAVEOPT | PTA_FSGSBASE},
3269 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3273 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3274 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3275 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3276 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3277 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3278 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3279 | PTA_FXSR | PTA_XSAVE},
3280 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3281 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3282 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3283 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3284 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3286
3287 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3288 PTA_64BIT
3289 | PTA_HLE /* flags are only used for -march switch. */ },
3290 };
3291
3292 /* -mrecip options. */
3293 static struct
3294 {
3295 const char *string; /* option name */
3296 unsigned int mask; /* mask bits to set */
3297 }
3298 const recip_options[] =
3299 {
3300 { "all", RECIP_MASK_ALL },
3301 { "none", RECIP_MASK_NONE },
3302 { "div", RECIP_MASK_DIV },
3303 { "sqrt", RECIP_MASK_SQRT },
3304 { "vec-div", RECIP_MASK_VEC_DIV },
3305 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3306 };
3307
3308 int const pta_size = ARRAY_SIZE (processor_alias_table);
3309
3310 /* Set up prefix/suffix so the error messages refer to either the command
3311 line argument, or the attribute(target). */
3312 if (main_args_p)
3313 {
3314 prefix = "-m";
3315 suffix = "";
3316 sw = "switch";
3317 }
3318 else
3319 {
3320 prefix = "option(\"";
3321 suffix = "\")";
3322 sw = "attribute";
3323 }
3324
3325 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3326 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3327 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3328 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3329 #ifdef TARGET_BI_ARCH
3330 else
3331 {
3332 #if TARGET_BI_ARCH == 1
3333 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3334 is on and OPTION_MASK_ABI_X32 is off. We turn off
3335 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3336 -mx32. */
3337 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3338 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3339 #else
3340 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3341 on and OPTION_MASK_ABI_64 is off. We turn off
3342 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3343 -m64. */
3344 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3345 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3346 #endif
3347 }
3348 #endif
3349
3350 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3351 {
3352 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3353 OPTION_MASK_ABI_64 for TARGET_X32. */
3354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3356 }
3357 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3358 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3359 | OPTION_MASK_ABI_X32
3360 | OPTION_MASK_ABI_64);
3361 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3362 {
3363 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3364 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3365 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3366 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3367 }
3368
3369 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3370 SUBTARGET_OVERRIDE_OPTIONS;
3371 #endif
3372
3373 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3374 SUBSUBTARGET_OVERRIDE_OPTIONS;
3375 #endif
3376
3377 /* -fPIC is the default for x86_64. */
3378 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3379 opts->x_flag_pic = 2;
3380
3381 /* Need to check -mtune=generic first. */
3382 if (opts->x_ix86_tune_string)
3383 {
3384 /* As special support for cross compilers we read -mtune=native
3385 as -mtune=generic. With native compilers we won't see the
3386 -mtune=native, as it was changed by the driver. */
3387 if (!strcmp (opts->x_ix86_tune_string, "native"))
3388 {
3389 opts->x_ix86_tune_string = "generic";
3390 }
3391 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3392 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3393 "%stune=k8%s or %stune=generic%s instead as appropriate",
3394 prefix, suffix, prefix, suffix, prefix, suffix);
3395 }
3396 else
3397 {
3398 if (opts->x_ix86_arch_string)
3399 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3400 if (!opts->x_ix86_tune_string)
3401 {
3402 opts->x_ix86_tune_string
3403 = processor_target_table[TARGET_CPU_DEFAULT].name;
3404 ix86_tune_defaulted = 1;
3405 }
3406
3407 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3408 or defaulted. We need to use a sensible tune option. */
3409 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3410 {
3411 opts->x_ix86_tune_string = "generic";
3412 }
3413 }
3414
3415 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3416 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3417 {
3418 /* rep; movq isn't available in 32-bit code. */
3419 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3420 opts->x_ix86_stringop_alg = no_stringop;
3421 }
3422
3423 if (!opts->x_ix86_arch_string)
3424 opts->x_ix86_arch_string
3425 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3426 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3427 else
3428 ix86_arch_specified = 1;
3429
3430 if (opts_set->x_ix86_pmode)
3431 {
3432 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 && opts->x_ix86_pmode == PMODE_SI)
3434 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3435 && opts->x_ix86_pmode == PMODE_DI))
3436 error ("address mode %qs not supported in the %s bit mode",
3437 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3438 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3439 }
3440 else
3441 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3442 ? PMODE_DI : PMODE_SI;
3443
3444 if (!opts_set->x_ix86_abi)
3445 opts->x_ix86_abi = DEFAULT_ABI;
3446
3447 /* For targets using ms ABI enable ms-extensions, if not
3448 explicit turned off. For non-ms ABI we turn off this
3449 option. */
3450 if (!opts_set->x_flag_ms_extensions)
3451 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3452
3453 if (opts_set->x_ix86_cmodel)
3454 {
3455 switch (opts->x_ix86_cmodel)
3456 {
3457 case CM_SMALL:
3458 case CM_SMALL_PIC:
3459 if (opts->x_flag_pic)
3460 opts->x_ix86_cmodel = CM_SMALL_PIC;
3461 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3462 error ("code model %qs not supported in the %s bit mode",
3463 "small", "32");
3464 break;
3465
3466 case CM_MEDIUM:
3467 case CM_MEDIUM_PIC:
3468 if (opts->x_flag_pic)
3469 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3470 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3471 error ("code model %qs not supported in the %s bit mode",
3472 "medium", "32");
3473 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3474 error ("code model %qs not supported in x32 mode",
3475 "medium");
3476 break;
3477
3478 case CM_LARGE:
3479 case CM_LARGE_PIC:
3480 if (opts->x_flag_pic)
3481 opts->x_ix86_cmodel = CM_LARGE_PIC;
3482 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3483 error ("code model %qs not supported in the %s bit mode",
3484 "large", "32");
3485 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3486 error ("code model %qs not supported in x32 mode",
3487 "large");
3488 break;
3489
3490 case CM_32:
3491 if (opts->x_flag_pic)
3492 error ("code model %s does not support PIC mode", "32");
3493 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "32", "64");
3496 break;
3497
3498 case CM_KERNEL:
3499 if (opts->x_flag_pic)
3500 {
3501 error ("code model %s does not support PIC mode", "kernel");
3502 opts->x_ix86_cmodel = CM_32;
3503 }
3504 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3505 error ("code model %qs not supported in the %s bit mode",
3506 "kernel", "32");
3507 break;
3508
3509 default:
3510 gcc_unreachable ();
3511 }
3512 }
3513 else
3514 {
3515 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3516 use of rip-relative addressing. This eliminates fixups that
3517 would otherwise be needed if this object is to be placed in a
3518 DLL, and is essentially just as efficient as direct addressing. */
3519 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3520 && (TARGET_RDOS || TARGET_PECOFF))
3521 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3522 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3524 else
3525 opts->x_ix86_cmodel = CM_32;
3526 }
3527 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3528 {
3529 error ("-masm=intel not supported in this configuration");
3530 opts->x_ix86_asm_dialect = ASM_ATT;
3531 }
3532 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3533 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3534 sorry ("%i-bit mode not compiled in",
3535 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3536
3537 for (i = 0; i < pta_size; i++)
3538 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3539 {
3540 ix86_schedule = processor_alias_table[i].schedule;
3541 ix86_arch = processor_alias_table[i].processor;
3542 /* Default cpu tuning to the architecture. */
3543 ix86_tune = ix86_arch;
3544
3545 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3546 && !(processor_alias_table[i].flags & PTA_64BIT))
3547 error ("CPU you selected does not support x86-64 "
3548 "instruction set");
3549
3550 if (processor_alias_table[i].flags & PTA_MMX
3551 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3552 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3553 if (processor_alias_table[i].flags & PTA_3DNOW
3554 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3555 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3556 if (processor_alias_table[i].flags & PTA_3DNOW_A
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3559 if (processor_alias_table[i].flags & PTA_SSE
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3562 if (processor_alias_table[i].flags & PTA_SSE2
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3565 if (processor_alias_table[i].flags & PTA_SSE3
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3568 if (processor_alias_table[i].flags & PTA_SSSE3
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3571 if (processor_alias_table[i].flags & PTA_SSE4_1
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3574 if (processor_alias_table[i].flags & PTA_SSE4_2
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3577 if (processor_alias_table[i].flags & PTA_AVX
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3580 if (processor_alias_table[i].flags & PTA_AVX2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3583 if (processor_alias_table[i].flags & PTA_FMA
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3586 if (processor_alias_table[i].flags & PTA_SSE4A
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3589 if (processor_alias_table[i].flags & PTA_FMA4
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3592 if (processor_alias_table[i].flags & PTA_XOP
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3595 if (processor_alias_table[i].flags & PTA_LWP
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3598 if (processor_alias_table[i].flags & PTA_ABM
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3601 if (processor_alias_table[i].flags & PTA_BMI
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3604 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3607 if (processor_alias_table[i].flags & PTA_TBM
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3610 if (processor_alias_table[i].flags & PTA_BMI2
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3613 if (processor_alias_table[i].flags & PTA_CX16
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3616 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3619 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3620 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3623 if (processor_alias_table[i].flags & PTA_MOVBE
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3626 if (processor_alias_table[i].flags & PTA_AES
3627 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3628 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3629 if (processor_alias_table[i].flags & PTA_SHA
3630 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3631 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3632 if (processor_alias_table[i].flags & PTA_PCLMUL
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3635 if (processor_alias_table[i].flags & PTA_FSGSBASE
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3638 if (processor_alias_table[i].flags & PTA_RDRND
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3641 if (processor_alias_table[i].flags & PTA_F16C
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3644 if (processor_alias_table[i].flags & PTA_RTM
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3647 if (processor_alias_table[i].flags & PTA_HLE
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3650 if (processor_alias_table[i].flags & PTA_PRFCHW
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3653 if (processor_alias_table[i].flags & PTA_RDSEED
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3656 if (processor_alias_table[i].flags & PTA_ADX
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3659 if (processor_alias_table[i].flags & PTA_FXSR
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3662 if (processor_alias_table[i].flags & PTA_XSAVE
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3665 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3668 if (processor_alias_table[i].flags & PTA_AVX512F
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3671 if (processor_alias_table[i].flags & PTA_AVX512ER
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3674 if (processor_alias_table[i].flags & PTA_AVX512PF
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3677 if (processor_alias_table[i].flags & PTA_AVX512CD
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3680 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3683 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3686 if (processor_alias_table[i].flags & PTA_XSAVEC
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3689 if (processor_alias_table[i].flags & PTA_XSAVES
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3692 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3693 x86_prefetch_sse = true;
3694
3695 break;
3696 }
3697
3698 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3699 error ("generic CPU can be used only for %stune=%s %s",
3700 prefix, suffix, sw);
3701 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3702 error ("intel CPU can be used only for %stune=%s %s",
3703 prefix, suffix, sw);
3704 else if (i == pta_size)
3705 error ("bad value (%s) for %sarch=%s %s",
3706 opts->x_ix86_arch_string, prefix, suffix, sw);
3707
3708 ix86_arch_mask = 1u << ix86_arch;
3709 for (i = 0; i < X86_ARCH_LAST; ++i)
3710 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3711
3712 for (i = 0; i < pta_size; i++)
3713 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3714 {
3715 ix86_schedule = processor_alias_table[i].schedule;
3716 ix86_tune = processor_alias_table[i].processor;
3717 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3718 {
3719 if (!(processor_alias_table[i].flags & PTA_64BIT))
3720 {
3721 if (ix86_tune_defaulted)
3722 {
3723 opts->x_ix86_tune_string = "x86-64";
3724 for (i = 0; i < pta_size; i++)
3725 if (! strcmp (opts->x_ix86_tune_string,
3726 processor_alias_table[i].name))
3727 break;
3728 ix86_schedule = processor_alias_table[i].schedule;
3729 ix86_tune = processor_alias_table[i].processor;
3730 }
3731 else
3732 error ("CPU you selected does not support x86-64 "
3733 "instruction set");
3734 }
3735 }
3736 /* Intel CPUs have always interpreted SSE prefetch instructions as
3737 NOPs; so, we can enable SSE prefetch instructions even when
3738 -mtune (rather than -march) points us to a processor that has them.
3739 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3740 higher processors. */
3741 if (TARGET_CMOV
3742 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3743 x86_prefetch_sse = true;
3744 break;
3745 }
3746
3747 if (ix86_tune_specified && i == pta_size)
3748 error ("bad value (%s) for %stune=%s %s",
3749 opts->x_ix86_tune_string, prefix, suffix, sw);
3750
3751 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3752
3753 #ifndef USE_IX86_FRAME_POINTER
3754 #define USE_IX86_FRAME_POINTER 0
3755 #endif
3756
3757 #ifndef USE_X86_64_FRAME_POINTER
3758 #define USE_X86_64_FRAME_POINTER 0
3759 #endif
3760
3761 /* Set the default values for switches whose default depends on TARGET_64BIT
3762 in case they weren't overwritten by command line options. */
3763 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3764 {
3765 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3766 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3767 if (opts->x_flag_asynchronous_unwind_tables
3768 && !opts_set->x_flag_unwind_tables
3769 && TARGET_64BIT_MS_ABI)
3770 opts->x_flag_unwind_tables = 1;
3771 if (opts->x_flag_asynchronous_unwind_tables == 2)
3772 opts->x_flag_unwind_tables
3773 = opts->x_flag_asynchronous_unwind_tables = 1;
3774 if (opts->x_flag_pcc_struct_return == 2)
3775 opts->x_flag_pcc_struct_return = 0;
3776 }
3777 else
3778 {
3779 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3780 opts->x_flag_omit_frame_pointer
3781 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3782 if (opts->x_flag_asynchronous_unwind_tables == 2)
3783 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3784 if (opts->x_flag_pcc_struct_return == 2)
3785 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3786 }
3787
3788 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3789 if (opts->x_optimize_size)
3790 ix86_cost = &ix86_size_cost;
3791 else
3792 ix86_cost = ix86_tune_cost;
3793
3794 /* Arrange to set up i386_stack_locals for all functions. */
3795 init_machine_status = ix86_init_machine_status;
3796
3797 /* Validate -mregparm= value. */
3798 if (opts_set->x_ix86_regparm)
3799 {
3800 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3801 warning (0, "-mregparm is ignored in 64-bit mode");
3802 if (opts->x_ix86_regparm > REGPARM_MAX)
3803 {
3804 error ("-mregparm=%d is not between 0 and %d",
3805 opts->x_ix86_regparm, REGPARM_MAX);
3806 opts->x_ix86_regparm = 0;
3807 }
3808 }
3809 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 opts->x_ix86_regparm = REGPARM_MAX;
3811
3812 /* Default align_* from the processor table. */
3813 if (opts->x_align_loops == 0)
3814 {
3815 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3816 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3817 }
3818 if (opts->x_align_jumps == 0)
3819 {
3820 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3821 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3822 }
3823 if (opts->x_align_functions == 0)
3824 {
3825 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3826 }
3827
3828 /* Provide default for -mbranch-cost= value. */
3829 if (!opts_set->x_ix86_branch_cost)
3830 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3831
3832 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3833 {
3834 opts->x_target_flags
3835 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3836
3837 /* Enable by default the SSE and MMX builtins. Do allow the user to
3838 explicitly disable any of these. In particular, disabling SSE and
3839 MMX for kernel code is extremely useful. */
3840 if (!ix86_arch_specified)
3841 opts->x_ix86_isa_flags
3842 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3843 | TARGET_SUBTARGET64_ISA_DEFAULT)
3844 & ~opts->x_ix86_isa_flags_explicit);
3845
3846 if (TARGET_RTD_P (opts->x_target_flags))
3847 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3848 }
3849 else
3850 {
3851 opts->x_target_flags
3852 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3853
3854 if (!ix86_arch_specified)
3855 opts->x_ix86_isa_flags
3856 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3857
3858 /* i386 ABI does not specify red zone. It still makes sense to use it
3859 when programmer takes care to stack from being destroyed. */
3860 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3861 opts->x_target_flags |= MASK_NO_RED_ZONE;
3862 }
3863
3864 /* Keep nonleaf frame pointers. */
3865 if (opts->x_flag_omit_frame_pointer)
3866 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3867 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3868 opts->x_flag_omit_frame_pointer = 1;
3869
3870 /* If we're doing fast math, we don't care about comparison order
3871 wrt NaNs. This lets us use a shorter comparison sequence. */
3872 if (opts->x_flag_finite_math_only)
3873 opts->x_target_flags &= ~MASK_IEEE_FP;
3874
3875 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3876 since the insns won't need emulation. */
3877 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3878 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3879
3880 /* Likewise, if the target doesn't have a 387, or we've specified
3881 software floating point, don't use 387 inline intrinsics. */
3882 if (!TARGET_80387_P (opts->x_target_flags))
3883 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3884
3885 /* Turn on MMX builtins for -msse. */
3886 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3887 opts->x_ix86_isa_flags
3888 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3889
3890 /* Enable SSE prefetch. */
3891 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3892 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3893 x86_prefetch_sse = true;
3894
3895 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3896 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3897 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3898 opts->x_ix86_isa_flags
3899 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3900
3901 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3902 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3903 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3904 opts->x_ix86_isa_flags
3905 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3906
3907 /* Enable lzcnt instruction for -mabm. */
3908 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3909 opts->x_ix86_isa_flags
3910 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3911
3912 /* Validate -mpreferred-stack-boundary= value or default it to
3913 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3914 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3915 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3916 {
3917 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3918 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3919 int max = (TARGET_SEH ? 4 : 12);
3920
3921 if (opts->x_ix86_preferred_stack_boundary_arg < min
3922 || opts->x_ix86_preferred_stack_boundary_arg > max)
3923 {
3924 if (min == max)
3925 error ("-mpreferred-stack-boundary is not supported "
3926 "for this target");
3927 else
3928 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3929 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3930 }
3931 else
3932 ix86_preferred_stack_boundary
3933 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3934 }
3935
3936 /* Set the default value for -mstackrealign. */
3937 if (opts->x_ix86_force_align_arg_pointer == -1)
3938 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3939
3940 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3941
3942 /* Validate -mincoming-stack-boundary= value or default it to
3943 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3944 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3945 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3946 {
3947 if (opts->x_ix86_incoming_stack_boundary_arg
3948 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3949 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3950 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3951 opts->x_ix86_incoming_stack_boundary_arg,
3952 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3953 else
3954 {
3955 ix86_user_incoming_stack_boundary
3956 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3957 ix86_incoming_stack_boundary
3958 = ix86_user_incoming_stack_boundary;
3959 }
3960 }
3961
3962 /* Accept -msseregparm only if at least SSE support is enabled. */
3963 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3964 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3965 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3966
3967 if (opts_set->x_ix86_fpmath)
3968 {
3969 if (opts->x_ix86_fpmath & FPMATH_SSE)
3970 {
3971 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3972 {
3973 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3974 opts->x_ix86_fpmath = FPMATH_387;
3975 }
3976 else if ((opts->x_ix86_fpmath & FPMATH_387)
3977 && !TARGET_80387_P (opts->x_target_flags))
3978 {
3979 warning (0, "387 instruction set disabled, using SSE arithmetics");
3980 opts->x_ix86_fpmath = FPMATH_SSE;
3981 }
3982 }
3983 }
3984 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3985 fpmath=387. The second is however default at many targets since the
3986 extra 80bit precision of temporaries is considered to be part of ABI.
3987 Overwrite the default at least for -ffast-math.
3988 TODO: -mfpmath=both seems to produce same performing code with bit
3989 smaller binaries. It is however not clear if register allocation is
3990 ready for this setting.
3991 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3992 codegen. We may switch to 387 with -ffast-math for size optimized
3993 functions. */
3994 else if (fast_math_flags_set_p (&global_options)
3995 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3996 opts->x_ix86_fpmath = FPMATH_SSE;
3997 else
3998 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3999
4000 /* If the i387 is disabled, then do not return values in it. */
4001 if (!TARGET_80387_P (opts->x_target_flags))
4002 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4003
4004 /* Use external vectorized library in vectorizing intrinsics. */
4005 if (opts_set->x_ix86_veclibabi_type)
4006 switch (opts->x_ix86_veclibabi_type)
4007 {
4008 case ix86_veclibabi_type_svml:
4009 ix86_veclib_handler = ix86_veclibabi_svml;
4010 break;
4011
4012 case ix86_veclibabi_type_acml:
4013 ix86_veclib_handler = ix86_veclibabi_acml;
4014 break;
4015
4016 default:
4017 gcc_unreachable ();
4018 }
4019
4020 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4021 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4022 && !opts->x_optimize_size)
4023 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4024
4025 /* If stack probes are required, the space used for large function
4026 arguments on the stack must also be probed, so enable
4027 -maccumulate-outgoing-args so this happens in the prologue. */
4028 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4029 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4030 {
4031 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4032 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4033 "for correctness", prefix, suffix);
4034 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4035 }
4036
4037 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4038 {
4039 char *p;
4040 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4041 p = strchr (internal_label_prefix, 'X');
4042 internal_label_prefix_len = p - internal_label_prefix;
4043 *p = '\0';
4044 }
4045
4046 /* When scheduling description is not available, disable scheduler pass
4047 so it won't slow down the compilation and make x87 code slower. */
4048 if (!TARGET_SCHEDULE)
4049 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4050
4051 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4052 ix86_tune_cost->simultaneous_prefetches,
4053 opts->x_param_values,
4054 opts_set->x_param_values);
4055 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4056 ix86_tune_cost->prefetch_block,
4057 opts->x_param_values,
4058 opts_set->x_param_values);
4059 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4060 ix86_tune_cost->l1_cache_size,
4061 opts->x_param_values,
4062 opts_set->x_param_values);
4063 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4064 ix86_tune_cost->l2_cache_size,
4065 opts->x_param_values,
4066 opts_set->x_param_values);
4067
4068 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4069 if (opts->x_flag_prefetch_loop_arrays < 0
4070 && HAVE_prefetch
4071 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4072 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4073 opts->x_flag_prefetch_loop_arrays = 1;
4074
4075 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4076 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4077 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4078 targetm.expand_builtin_va_start = NULL;
4079
4080 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4081 {
4082 ix86_gen_leave = gen_leave_rex64;
4083 if (Pmode == DImode)
4084 {
4085 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4086 ix86_gen_tls_local_dynamic_base_64
4087 = gen_tls_local_dynamic_base_64_di;
4088 }
4089 else
4090 {
4091 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4092 ix86_gen_tls_local_dynamic_base_64
4093 = gen_tls_local_dynamic_base_64_si;
4094 }
4095 }
4096 else
4097 ix86_gen_leave = gen_leave;
4098
4099 if (Pmode == DImode)
4100 {
4101 ix86_gen_add3 = gen_adddi3;
4102 ix86_gen_sub3 = gen_subdi3;
4103 ix86_gen_sub3_carry = gen_subdi3_carry;
4104 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4105 ix86_gen_andsp = gen_anddi3;
4106 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4107 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4108 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4109 ix86_gen_monitor = gen_sse3_monitor_di;
4110 }
4111 else
4112 {
4113 ix86_gen_add3 = gen_addsi3;
4114 ix86_gen_sub3 = gen_subsi3;
4115 ix86_gen_sub3_carry = gen_subsi3_carry;
4116 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4117 ix86_gen_andsp = gen_andsi3;
4118 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4119 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4120 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4121 ix86_gen_monitor = gen_sse3_monitor_si;
4122 }
4123
4124 #ifdef USE_IX86_CLD
4125 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4126 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4127 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4128 #endif
4129
4130 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4131 {
4132 if (opts->x_flag_fentry > 0)
4133 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4134 "with -fpic");
4135 opts->x_flag_fentry = 0;
4136 }
4137 else if (TARGET_SEH)
4138 {
4139 if (opts->x_flag_fentry == 0)
4140 sorry ("-mno-fentry isn%'t compatible with SEH");
4141 opts->x_flag_fentry = 1;
4142 }
4143 else if (opts->x_flag_fentry < 0)
4144 {
4145 #if defined(PROFILE_BEFORE_PROLOGUE)
4146 opts->x_flag_fentry = 1;
4147 #else
4148 opts->x_flag_fentry = 0;
4149 #endif
4150 }
4151
4152 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4153 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4154 AVX unaligned load/store. */
4155 if (!opts->x_optimize_size)
4156 {
4157 if (flag_expensive_optimizations
4158 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4159 opts->x_target_flags |= MASK_VZEROUPPER;
4160 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4161 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4162 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4163 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4164 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4165 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4166 /* Enable 128-bit AVX instruction generation
4167 for the auto-vectorizer. */
4168 if (TARGET_AVX128_OPTIMAL
4169 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4170 opts->x_target_flags |= MASK_PREFER_AVX128;
4171 }
4172
4173 if (opts->x_ix86_recip_name)
4174 {
4175 char *p = ASTRDUP (opts->x_ix86_recip_name);
4176 char *q;
4177 unsigned int mask, i;
4178 bool invert;
4179
4180 while ((q = strtok (p, ",")) != NULL)
4181 {
4182 p = NULL;
4183 if (*q == '!')
4184 {
4185 invert = true;
4186 q++;
4187 }
4188 else
4189 invert = false;
4190
4191 if (!strcmp (q, "default"))
4192 mask = RECIP_MASK_ALL;
4193 else
4194 {
4195 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4196 if (!strcmp (q, recip_options[i].string))
4197 {
4198 mask = recip_options[i].mask;
4199 break;
4200 }
4201
4202 if (i == ARRAY_SIZE (recip_options))
4203 {
4204 error ("unknown option for -mrecip=%s", q);
4205 invert = false;
4206 mask = RECIP_MASK_NONE;
4207 }
4208 }
4209
4210 opts->x_recip_mask_explicit |= mask;
4211 if (invert)
4212 opts->x_recip_mask &= ~mask;
4213 else
4214 opts->x_recip_mask |= mask;
4215 }
4216 }
4217
4218 if (TARGET_RECIP_P (opts->x_target_flags))
4219 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4220 else if (opts_set->x_target_flags & MASK_RECIP)
4221 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4222
4223 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4224 for 64-bit Bionic. */
4225 if (TARGET_HAS_BIONIC
4226 && !(opts_set->x_target_flags
4227 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4228 opts->x_target_flags |= (TARGET_64BIT
4229 ? MASK_LONG_DOUBLE_128
4230 : MASK_LONG_DOUBLE_64);
4231
4232 /* Only one of them can be active. */
4233 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4234 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4235
4236 /* Save the initial options in case the user does function specific
4237 options. */
4238 if (main_args_p)
4239 target_option_default_node = target_option_current_node
4240 = build_target_option_node (opts);
4241
4242 /* Handle stack protector */
4243 if (!opts_set->x_ix86_stack_protector_guard)
4244 opts->x_ix86_stack_protector_guard
4245 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4246
4247 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4248 if (opts->x_ix86_tune_memcpy_strategy)
4249 {
4250 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4251 ix86_parse_stringop_strategy_string (str, false);
4252 free (str);
4253 }
4254
4255 if (opts->x_ix86_tune_memset_strategy)
4256 {
4257 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4258 ix86_parse_stringop_strategy_string (str, true);
4259 free (str);
4260 }
4261 }
4262
4263 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4264
4265 static void
4266 ix86_option_override (void)
4267 {
4268 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4269 static struct register_pass_info insert_vzeroupper_info
4270 = { pass_insert_vzeroupper, "reload",
4271 1, PASS_POS_INSERT_AFTER
4272 };
4273
4274 ix86_option_override_internal (true, &global_options, &global_options_set);
4275
4276
4277 /* This needs to be done at start up. It's convenient to do it here. */
4278 register_pass (&insert_vzeroupper_info);
4279 }
4280
4281 /* Update register usage after having seen the compiler flags. */
4282
4283 static void
4284 ix86_conditional_register_usage (void)
4285 {
4286 int i, c_mask;
4287 unsigned int j;
4288
4289 /* The PIC register, if it exists, is fixed. */
4290 j = PIC_OFFSET_TABLE_REGNUM;
4291 if (j != INVALID_REGNUM)
4292 fixed_regs[j] = call_used_regs[j] = 1;
4293
4294 /* For 32-bit targets, squash the REX registers. */
4295 if (! TARGET_64BIT)
4296 {
4297 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4298 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4299 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4300 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4301 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4302 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4303 }
4304
4305 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4306 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4307 : TARGET_64BIT ? (1 << 2)
4308 : (1 << 1));
4309
4310 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4311
4312 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4313 {
4314 /* Set/reset conditionally defined registers from
4315 CALL_USED_REGISTERS initializer. */
4316 if (call_used_regs[i] > 1)
4317 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4318
4319 /* Calculate registers of CLOBBERED_REGS register set
4320 as call used registers from GENERAL_REGS register set. */
4321 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4322 && call_used_regs[i])
4323 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4324 }
4325
4326 /* If MMX is disabled, squash the registers. */
4327 if (! TARGET_MMX)
4328 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4329 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4330 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4331
4332 /* If SSE is disabled, squash the registers. */
4333 if (! TARGET_SSE)
4334 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4335 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4336 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4337
4338 /* If the FPU is disabled, squash the registers. */
4339 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4340 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4341 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4342 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4343
4344 /* If AVX512F is disabled, squash the registers. */
4345 if (! TARGET_AVX512F)
4346 {
4347 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4348 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4349
4350 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4352 }
4353 }
4354
4355 \f
4356 /* Save the current options */
4357
4358 static void
4359 ix86_function_specific_save (struct cl_target_option *ptr,
4360 struct gcc_options *opts)
4361 {
4362 ptr->arch = ix86_arch;
4363 ptr->schedule = ix86_schedule;
4364 ptr->tune = ix86_tune;
4365 ptr->branch_cost = ix86_branch_cost;
4366 ptr->tune_defaulted = ix86_tune_defaulted;
4367 ptr->arch_specified = ix86_arch_specified;
4368 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4369 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4370 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4371 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4372 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4373 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4374 ptr->x_ix86_abi = opts->x_ix86_abi;
4375 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4376 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4377 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4378 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4379 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4380 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4381 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4382 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4383 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4384 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4385 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4386 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4387 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4388 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4389 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4390 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4391 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4392 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4393 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4394 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4395
4396 /* The fields are char but the variables are not; make sure the
4397 values fit in the fields. */
4398 gcc_assert (ptr->arch == ix86_arch);
4399 gcc_assert (ptr->schedule == ix86_schedule);
4400 gcc_assert (ptr->tune == ix86_tune);
4401 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4402 }
4403
4404 /* Restore the current options */
4405
4406 static void
4407 ix86_function_specific_restore (struct gcc_options *opts,
4408 struct cl_target_option *ptr)
4409 {
4410 enum processor_type old_tune = ix86_tune;
4411 enum processor_type old_arch = ix86_arch;
4412 unsigned int ix86_arch_mask;
4413 int i;
4414
4415 /* We don't change -fPIC. */
4416 opts->x_flag_pic = flag_pic;
4417
4418 ix86_arch = (enum processor_type) ptr->arch;
4419 ix86_schedule = (enum attr_cpu) ptr->schedule;
4420 ix86_tune = (enum processor_type) ptr->tune;
4421 opts->x_ix86_branch_cost = ptr->branch_cost;
4422 ix86_tune_defaulted = ptr->tune_defaulted;
4423 ix86_arch_specified = ptr->arch_specified;
4424 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4425 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4426 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4427 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4428 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4429 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4430 opts->x_ix86_abi = ptr->x_ix86_abi;
4431 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4432 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4433 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4434 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4435 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4436 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4437 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4438 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4439 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4440 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4441 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4442 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4443 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4444 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4445 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4446 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4447 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4448 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4449 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4450 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4451
4452 /* Recreate the arch feature tests if the arch changed */
4453 if (old_arch != ix86_arch)
4454 {
4455 ix86_arch_mask = 1u << ix86_arch;
4456 for (i = 0; i < X86_ARCH_LAST; ++i)
4457 ix86_arch_features[i]
4458 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4459 }
4460
4461 /* Recreate the tune optimization tests */
4462 if (old_tune != ix86_tune)
4463 set_ix86_tune_features (ix86_tune, false);
4464 }
4465
4466 /* Print the current options */
4467
4468 static void
4469 ix86_function_specific_print (FILE *file, int indent,
4470 struct cl_target_option *ptr)
4471 {
4472 char *target_string
4473 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4474 NULL, NULL, ptr->x_ix86_fpmath, false);
4475
4476 gcc_assert (ptr->arch < PROCESSOR_max);
4477 fprintf (file, "%*sarch = %d (%s)\n",
4478 indent, "",
4479 ptr->arch, processor_target_table[ptr->arch].name);
4480
4481 gcc_assert (ptr->tune < PROCESSOR_max);
4482 fprintf (file, "%*stune = %d (%s)\n",
4483 indent, "",
4484 ptr->tune, processor_target_table[ptr->tune].name);
4485
4486 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4487
4488 if (target_string)
4489 {
4490 fprintf (file, "%*s%s\n", indent, "", target_string);
4491 free (target_string);
4492 }
4493 }
4494
4495 \f
4496 /* Inner function to process the attribute((target(...))), take an argument and
4497 set the current options from the argument. If we have a list, recursively go
4498 over the list. */
4499
4500 static bool
4501 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4502 struct gcc_options *opts,
4503 struct gcc_options *opts_set,
4504 struct gcc_options *enum_opts_set)
4505 {
4506 char *next_optstr;
4507 bool ret = true;
4508
4509 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4510 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4511 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4512 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4513 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4514
4515 enum ix86_opt_type
4516 {
4517 ix86_opt_unknown,
4518 ix86_opt_yes,
4519 ix86_opt_no,
4520 ix86_opt_str,
4521 ix86_opt_enum,
4522 ix86_opt_isa
4523 };
4524
4525 static const struct
4526 {
4527 const char *string;
4528 size_t len;
4529 enum ix86_opt_type type;
4530 int opt;
4531 int mask;
4532 } attrs[] = {
4533 /* isa options */
4534 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4535 IX86_ATTR_ISA ("abm", OPT_mabm),
4536 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4537 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4538 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4539 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4540 IX86_ATTR_ISA ("aes", OPT_maes),
4541 IX86_ATTR_ISA ("sha", OPT_msha),
4542 IX86_ATTR_ISA ("avx", OPT_mavx),
4543 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4544 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4545 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4546 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4547 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4548 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4549 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4550 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4551 IX86_ATTR_ISA ("sse", OPT_msse),
4552 IX86_ATTR_ISA ("sse2", OPT_msse2),
4553 IX86_ATTR_ISA ("sse3", OPT_msse3),
4554 IX86_ATTR_ISA ("sse4", OPT_msse4),
4555 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4556 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4557 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4558 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4559 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4560 IX86_ATTR_ISA ("fma", OPT_mfma),
4561 IX86_ATTR_ISA ("xop", OPT_mxop),
4562 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4563 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4564 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4565 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4566 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4567 IX86_ATTR_ISA ("hle", OPT_mhle),
4568 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4569 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4570 IX86_ATTR_ISA ("adx", OPT_madx),
4571 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4572 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4573 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4574 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4575 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4576 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4577 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4578
4579 /* enum options */
4580 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4581
4582 /* string options */
4583 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4584 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4585
4586 /* flag options */
4587 IX86_ATTR_YES ("cld",
4588 OPT_mcld,
4589 MASK_CLD),
4590
4591 IX86_ATTR_NO ("fancy-math-387",
4592 OPT_mfancy_math_387,
4593 MASK_NO_FANCY_MATH_387),
4594
4595 IX86_ATTR_YES ("ieee-fp",
4596 OPT_mieee_fp,
4597 MASK_IEEE_FP),
4598
4599 IX86_ATTR_YES ("inline-all-stringops",
4600 OPT_minline_all_stringops,
4601 MASK_INLINE_ALL_STRINGOPS),
4602
4603 IX86_ATTR_YES ("inline-stringops-dynamically",
4604 OPT_minline_stringops_dynamically,
4605 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4606
4607 IX86_ATTR_NO ("align-stringops",
4608 OPT_mno_align_stringops,
4609 MASK_NO_ALIGN_STRINGOPS),
4610
4611 IX86_ATTR_YES ("recip",
4612 OPT_mrecip,
4613 MASK_RECIP),
4614
4615 };
4616
4617 /* If this is a list, recurse to get the options. */
4618 if (TREE_CODE (args) == TREE_LIST)
4619 {
4620 bool ret = true;
4621
4622 for (; args; args = TREE_CHAIN (args))
4623 if (TREE_VALUE (args)
4624 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4625 p_strings, opts, opts_set,
4626 enum_opts_set))
4627 ret = false;
4628
4629 return ret;
4630 }
4631
4632 else if (TREE_CODE (args) != STRING_CST)
4633 {
4634 error ("attribute %<target%> argument not a string");
4635 return false;
4636 }
4637
4638 /* Handle multiple arguments separated by commas. */
4639 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4640
4641 while (next_optstr && *next_optstr != '\0')
4642 {
4643 char *p = next_optstr;
4644 char *orig_p = p;
4645 char *comma = strchr (next_optstr, ',');
4646 const char *opt_string;
4647 size_t len, opt_len;
4648 int opt;
4649 bool opt_set_p;
4650 char ch;
4651 unsigned i;
4652 enum ix86_opt_type type = ix86_opt_unknown;
4653 int mask = 0;
4654
4655 if (comma)
4656 {
4657 *comma = '\0';
4658 len = comma - next_optstr;
4659 next_optstr = comma + 1;
4660 }
4661 else
4662 {
4663 len = strlen (p);
4664 next_optstr = NULL;
4665 }
4666
4667 /* Recognize no-xxx. */
4668 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4669 {
4670 opt_set_p = false;
4671 p += 3;
4672 len -= 3;
4673 }
4674 else
4675 opt_set_p = true;
4676
4677 /* Find the option. */
4678 ch = *p;
4679 opt = N_OPTS;
4680 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4681 {
4682 type = attrs[i].type;
4683 opt_len = attrs[i].len;
4684 if (ch == attrs[i].string[0]
4685 && ((type != ix86_opt_str && type != ix86_opt_enum)
4686 ? len == opt_len
4687 : len > opt_len)
4688 && memcmp (p, attrs[i].string, opt_len) == 0)
4689 {
4690 opt = attrs[i].opt;
4691 mask = attrs[i].mask;
4692 opt_string = attrs[i].string;
4693 break;
4694 }
4695 }
4696
4697 /* Process the option. */
4698 if (opt == N_OPTS)
4699 {
4700 error ("attribute(target(\"%s\")) is unknown", orig_p);
4701 ret = false;
4702 }
4703
4704 else if (type == ix86_opt_isa)
4705 {
4706 struct cl_decoded_option decoded;
4707
4708 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4709 ix86_handle_option (opts, opts_set,
4710 &decoded, input_location);
4711 }
4712
4713 else if (type == ix86_opt_yes || type == ix86_opt_no)
4714 {
4715 if (type == ix86_opt_no)
4716 opt_set_p = !opt_set_p;
4717
4718 if (opt_set_p)
4719 opts->x_target_flags |= mask;
4720 else
4721 opts->x_target_flags &= ~mask;
4722 }
4723
4724 else if (type == ix86_opt_str)
4725 {
4726 if (p_strings[opt])
4727 {
4728 error ("option(\"%s\") was already specified", opt_string);
4729 ret = false;
4730 }
4731 else
4732 p_strings[opt] = xstrdup (p + opt_len);
4733 }
4734
4735 else if (type == ix86_opt_enum)
4736 {
4737 bool arg_ok;
4738 int value;
4739
4740 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4741 if (arg_ok)
4742 set_option (opts, enum_opts_set, opt, value,
4743 p + opt_len, DK_UNSPECIFIED, input_location,
4744 global_dc);
4745 else
4746 {
4747 error ("attribute(target(\"%s\")) is unknown", orig_p);
4748 ret = false;
4749 }
4750 }
4751
4752 else
4753 gcc_unreachable ();
4754 }
4755
4756 return ret;
4757 }
4758
4759 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4760
4761 tree
4762 ix86_valid_target_attribute_tree (tree args,
4763 struct gcc_options *opts,
4764 struct gcc_options *opts_set)
4765 {
4766 const char *orig_arch_string = opts->x_ix86_arch_string;
4767 const char *orig_tune_string = opts->x_ix86_tune_string;
4768 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4769 int orig_tune_defaulted = ix86_tune_defaulted;
4770 int orig_arch_specified = ix86_arch_specified;
4771 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4772 tree t = NULL_TREE;
4773 int i;
4774 struct cl_target_option *def
4775 = TREE_TARGET_OPTION (target_option_default_node);
4776 struct gcc_options enum_opts_set;
4777
4778 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4779
4780 /* Process each of the options on the chain. */
4781 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4782 opts_set, &enum_opts_set))
4783 return error_mark_node;
4784
4785 /* If the changed options are different from the default, rerun
4786 ix86_option_override_internal, and then save the options away.
4787 The string options are are attribute options, and will be undone
4788 when we copy the save structure. */
4789 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4790 || opts->x_target_flags != def->x_target_flags
4791 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4792 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4793 || enum_opts_set.x_ix86_fpmath)
4794 {
4795 /* If we are using the default tune= or arch=, undo the string assigned,
4796 and use the default. */
4797 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4798 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4799 else if (!orig_arch_specified)
4800 opts->x_ix86_arch_string = NULL;
4801
4802 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4803 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4804 else if (orig_tune_defaulted)
4805 opts->x_ix86_tune_string = NULL;
4806
4807 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4808 if (enum_opts_set.x_ix86_fpmath)
4809 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4810 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4811 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4812 {
4813 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4814 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4815 }
4816
4817 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4818 ix86_option_override_internal (false, opts, opts_set);
4819
4820 /* Add any builtin functions with the new isa if any. */
4821 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4822
4823 /* Save the current options unless we are validating options for
4824 #pragma. */
4825 t = build_target_option_node (opts);
4826
4827 opts->x_ix86_arch_string = orig_arch_string;
4828 opts->x_ix86_tune_string = orig_tune_string;
4829 opts_set->x_ix86_fpmath = orig_fpmath_set;
4830
4831 /* Free up memory allocated to hold the strings */
4832 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4833 free (option_strings[i]);
4834 }
4835
4836 return t;
4837 }
4838
4839 /* Hook to validate attribute((target("string"))). */
4840
4841 static bool
4842 ix86_valid_target_attribute_p (tree fndecl,
4843 tree ARG_UNUSED (name),
4844 tree args,
4845 int ARG_UNUSED (flags))
4846 {
4847 struct gcc_options func_options;
4848 tree new_target, new_optimize;
4849 bool ret = true;
4850
4851 /* attribute((target("default"))) does nothing, beyond
4852 affecting multi-versioning. */
4853 if (TREE_VALUE (args)
4854 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4855 && TREE_CHAIN (args) == NULL_TREE
4856 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4857 return true;
4858
4859 tree old_optimize = build_optimization_node (&global_options);
4860
4861 /* Get the optimization options of the current function. */
4862 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4863
4864 if (!func_optimize)
4865 func_optimize = old_optimize;
4866
4867 /* Init func_options. */
4868 memset (&func_options, 0, sizeof (func_options));
4869 init_options_struct (&func_options, NULL);
4870 lang_hooks.init_options_struct (&func_options);
4871
4872 cl_optimization_restore (&func_options,
4873 TREE_OPTIMIZATION (func_optimize));
4874
4875 /* Initialize func_options to the default before its target options can
4876 be set. */
4877 cl_target_option_restore (&func_options,
4878 TREE_TARGET_OPTION (target_option_default_node));
4879
4880 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4881 &global_options_set);
4882
4883 new_optimize = build_optimization_node (&func_options);
4884
4885 if (new_target == error_mark_node)
4886 ret = false;
4887
4888 else if (fndecl && new_target)
4889 {
4890 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4891
4892 if (old_optimize != new_optimize)
4893 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4894 }
4895
4896 return ret;
4897 }
4898
4899 \f
4900 /* Hook to determine if one function can safely inline another. */
4901
4902 static bool
4903 ix86_can_inline_p (tree caller, tree callee)
4904 {
4905 bool ret = false;
4906 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4907 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4908
4909 /* If callee has no option attributes, then it is ok to inline. */
4910 if (!callee_tree)
4911 ret = true;
4912
4913 /* If caller has no option attributes, but callee does then it is not ok to
4914 inline. */
4915 else if (!caller_tree)
4916 ret = false;
4917
4918 else
4919 {
4920 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4921 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4922
4923 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4924 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4925 function. */
4926 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4927 != callee_opts->x_ix86_isa_flags)
4928 ret = false;
4929
4930 /* See if we have the same non-isa options. */
4931 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4932 ret = false;
4933
4934 /* See if arch, tune, etc. are the same. */
4935 else if (caller_opts->arch != callee_opts->arch)
4936 ret = false;
4937
4938 else if (caller_opts->tune != callee_opts->tune)
4939 ret = false;
4940
4941 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4942 ret = false;
4943
4944 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4945 ret = false;
4946
4947 else
4948 ret = true;
4949 }
4950
4951 return ret;
4952 }
4953
4954 \f
4955 /* Remember the last target of ix86_set_current_function. */
4956 static GTY(()) tree ix86_previous_fndecl;
4957
4958 /* Invalidate ix86_previous_fndecl cache. */
4959 void
4960 ix86_reset_previous_fndecl (void)
4961 {
4962 ix86_previous_fndecl = NULL_TREE;
4963 }
4964
4965 /* Establish appropriate back-end context for processing the function
4966 FNDECL. The argument might be NULL to indicate processing at top
4967 level, outside of any function scope. */
4968 static void
4969 ix86_set_current_function (tree fndecl)
4970 {
4971 /* Only change the context if the function changes. This hook is called
4972 several times in the course of compiling a function, and we don't want to
4973 slow things down too much or call target_reinit when it isn't safe. */
4974 if (fndecl && fndecl != ix86_previous_fndecl)
4975 {
4976 tree old_tree = (ix86_previous_fndecl
4977 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4978 : NULL_TREE);
4979
4980 tree new_tree = (fndecl
4981 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4982 : NULL_TREE);
4983
4984 ix86_previous_fndecl = fndecl;
4985 if (old_tree == new_tree)
4986 ;
4987
4988 else if (new_tree)
4989 {
4990 cl_target_option_restore (&global_options,
4991 TREE_TARGET_OPTION (new_tree));
4992 if (TREE_TARGET_GLOBALS (new_tree))
4993 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4994 else
4995 TREE_TARGET_GLOBALS (new_tree)
4996 = save_target_globals_default_opts ();
4997 }
4998
4999 else if (old_tree)
5000 {
5001 new_tree = target_option_current_node;
5002 cl_target_option_restore (&global_options,
5003 TREE_TARGET_OPTION (new_tree));
5004 if (TREE_TARGET_GLOBALS (new_tree))
5005 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5006 else if (new_tree == target_option_default_node)
5007 restore_target_globals (&default_target_globals);
5008 else
5009 TREE_TARGET_GLOBALS (new_tree)
5010 = save_target_globals_default_opts ();
5011 }
5012 }
5013 }
5014
5015 \f
5016 /* Return true if this goes in large data/bss. */
5017
5018 static bool
5019 ix86_in_large_data_p (tree exp)
5020 {
5021 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5022 return false;
5023
5024 /* Functions are never large data. */
5025 if (TREE_CODE (exp) == FUNCTION_DECL)
5026 return false;
5027
5028 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5029 {
5030 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5031 if (strcmp (section, ".ldata") == 0
5032 || strcmp (section, ".lbss") == 0)
5033 return true;
5034 return false;
5035 }
5036 else
5037 {
5038 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5039
5040 /* If this is an incomplete type with size 0, then we can't put it
5041 in data because it might be too big when completed. */
5042 if (!size || size > ix86_section_threshold)
5043 return true;
5044 }
5045
5046 return false;
5047 }
5048
5049 /* Switch to the appropriate section for output of DECL.
5050 DECL is either a `VAR_DECL' node or a constant of some sort.
5051 RELOC indicates whether forming the initial value of DECL requires
5052 link-time relocations. */
5053
5054 ATTRIBUTE_UNUSED static section *
5055 x86_64_elf_select_section (tree decl, int reloc,
5056 unsigned HOST_WIDE_INT align)
5057 {
5058 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5059 && ix86_in_large_data_p (decl))
5060 {
5061 const char *sname = NULL;
5062 unsigned int flags = SECTION_WRITE;
5063 switch (categorize_decl_for_section (decl, reloc))
5064 {
5065 case SECCAT_DATA:
5066 sname = ".ldata";
5067 break;
5068 case SECCAT_DATA_REL:
5069 sname = ".ldata.rel";
5070 break;
5071 case SECCAT_DATA_REL_LOCAL:
5072 sname = ".ldata.rel.local";
5073 break;
5074 case SECCAT_DATA_REL_RO:
5075 sname = ".ldata.rel.ro";
5076 break;
5077 case SECCAT_DATA_REL_RO_LOCAL:
5078 sname = ".ldata.rel.ro.local";
5079 break;
5080 case SECCAT_BSS:
5081 sname = ".lbss";
5082 flags |= SECTION_BSS;
5083 break;
5084 case SECCAT_RODATA:
5085 case SECCAT_RODATA_MERGE_STR:
5086 case SECCAT_RODATA_MERGE_STR_INIT:
5087 case SECCAT_RODATA_MERGE_CONST:
5088 sname = ".lrodata";
5089 flags = 0;
5090 break;
5091 case SECCAT_SRODATA:
5092 case SECCAT_SDATA:
5093 case SECCAT_SBSS:
5094 gcc_unreachable ();
5095 case SECCAT_TEXT:
5096 case SECCAT_TDATA:
5097 case SECCAT_TBSS:
5098 /* We don't split these for medium model. Place them into
5099 default sections and hope for best. */
5100 break;
5101 }
5102 if (sname)
5103 {
5104 /* We might get called with string constants, but get_named_section
5105 doesn't like them as they are not DECLs. Also, we need to set
5106 flags in that case. */
5107 if (!DECL_P (decl))
5108 return get_section (sname, flags, NULL);
5109 return get_named_section (decl, sname, reloc);
5110 }
5111 }
5112 return default_elf_select_section (decl, reloc, align);
5113 }
5114
5115 /* Select a set of attributes for section NAME based on the properties
5116 of DECL and whether or not RELOC indicates that DECL's initializer
5117 might contain runtime relocations. */
5118
5119 static unsigned int ATTRIBUTE_UNUSED
5120 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5121 {
5122 unsigned int flags = default_section_type_flags (decl, name, reloc);
5123
5124 if (decl == NULL_TREE
5125 && (strcmp (name, ".ldata.rel.ro") == 0
5126 || strcmp (name, ".ldata.rel.ro.local") == 0))
5127 flags |= SECTION_RELRO;
5128
5129 if (strcmp (name, ".lbss") == 0
5130 || strncmp (name, ".lbss.", 5) == 0
5131 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5132 flags |= SECTION_BSS;
5133
5134 return flags;
5135 }
5136
5137 /* Build up a unique section name, expressed as a
5138 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5139 RELOC indicates whether the initial value of EXP requires
5140 link-time relocations. */
5141
5142 static void ATTRIBUTE_UNUSED
5143 x86_64_elf_unique_section (tree decl, int reloc)
5144 {
5145 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5146 && ix86_in_large_data_p (decl))
5147 {
5148 const char *prefix = NULL;
5149 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5150 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5151
5152 switch (categorize_decl_for_section (decl, reloc))
5153 {
5154 case SECCAT_DATA:
5155 case SECCAT_DATA_REL:
5156 case SECCAT_DATA_REL_LOCAL:
5157 case SECCAT_DATA_REL_RO:
5158 case SECCAT_DATA_REL_RO_LOCAL:
5159 prefix = one_only ? ".ld" : ".ldata";
5160 break;
5161 case SECCAT_BSS:
5162 prefix = one_only ? ".lb" : ".lbss";
5163 break;
5164 case SECCAT_RODATA:
5165 case SECCAT_RODATA_MERGE_STR:
5166 case SECCAT_RODATA_MERGE_STR_INIT:
5167 case SECCAT_RODATA_MERGE_CONST:
5168 prefix = one_only ? ".lr" : ".lrodata";
5169 break;
5170 case SECCAT_SRODATA:
5171 case SECCAT_SDATA:
5172 case SECCAT_SBSS:
5173 gcc_unreachable ();
5174 case SECCAT_TEXT:
5175 case SECCAT_TDATA:
5176 case SECCAT_TBSS:
5177 /* We don't split these for medium model. Place them into
5178 default sections and hope for best. */
5179 break;
5180 }
5181 if (prefix)
5182 {
5183 const char *name, *linkonce;
5184 char *string;
5185
5186 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5187 name = targetm.strip_name_encoding (name);
5188
5189 /* If we're using one_only, then there needs to be a .gnu.linkonce
5190 prefix to the section name. */
5191 linkonce = one_only ? ".gnu.linkonce" : "";
5192
5193 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5194
5195 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5196 return;
5197 }
5198 }
5199 default_unique_section (decl, reloc);
5200 }
5201
5202 #ifdef COMMON_ASM_OP
5203 /* This says how to output assembler code to declare an
5204 uninitialized external linkage data object.
5205
5206 For medium model x86-64 we need to use .largecomm opcode for
5207 large objects. */
5208 void
5209 x86_elf_aligned_common (FILE *file,
5210 const char *name, unsigned HOST_WIDE_INT size,
5211 int align)
5212 {
5213 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5214 && size > (unsigned int)ix86_section_threshold)
5215 fputs (".largecomm\t", file);
5216 else
5217 fputs (COMMON_ASM_OP, file);
5218 assemble_name (file, name);
5219 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5220 size, align / BITS_PER_UNIT);
5221 }
5222 #endif
5223
5224 /* Utility function for targets to use in implementing
5225 ASM_OUTPUT_ALIGNED_BSS. */
5226
5227 void
5228 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5229 const char *name, unsigned HOST_WIDE_INT size,
5230 int align)
5231 {
5232 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5233 && size > (unsigned int)ix86_section_threshold)
5234 switch_to_section (get_named_section (decl, ".lbss", 0));
5235 else
5236 switch_to_section (bss_section);
5237 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5238 #ifdef ASM_DECLARE_OBJECT_NAME
5239 last_assemble_variable_decl = decl;
5240 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5241 #else
5242 /* Standard thing is just output label for the object. */
5243 ASM_OUTPUT_LABEL (file, name);
5244 #endif /* ASM_DECLARE_OBJECT_NAME */
5245 ASM_OUTPUT_SKIP (file, size ? size : 1);
5246 }
5247 \f
5248 /* Decide whether we must probe the stack before any space allocation
5249 on this target. It's essentially TARGET_STACK_PROBE except when
5250 -fstack-check causes the stack to be already probed differently. */
5251
5252 bool
5253 ix86_target_stack_probe (void)
5254 {
5255 /* Do not probe the stack twice if static stack checking is enabled. */
5256 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5257 return false;
5258
5259 return TARGET_STACK_PROBE;
5260 }
5261 \f
5262 /* Decide whether we can make a sibling call to a function. DECL is the
5263 declaration of the function being targeted by the call and EXP is the
5264 CALL_EXPR representing the call. */
5265
5266 static bool
5267 ix86_function_ok_for_sibcall (tree decl, tree exp)
5268 {
5269 tree type, decl_or_type;
5270 rtx a, b;
5271
5272 /* If we are generating position-independent code, we cannot sibcall
5273 optimize any indirect call, or a direct call to a global function,
5274 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5275 if (!TARGET_MACHO
5276 && !TARGET_64BIT
5277 && flag_pic
5278 && (!decl || !targetm.binds_local_p (decl)))
5279 return false;
5280
5281 /* If we need to align the outgoing stack, then sibcalling would
5282 unalign the stack, which may break the called function. */
5283 if (ix86_minimum_incoming_stack_boundary (true)
5284 < PREFERRED_STACK_BOUNDARY)
5285 return false;
5286
5287 if (decl)
5288 {
5289 decl_or_type = decl;
5290 type = TREE_TYPE (decl);
5291 }
5292 else
5293 {
5294 /* We're looking at the CALL_EXPR, we need the type of the function. */
5295 type = CALL_EXPR_FN (exp); /* pointer expression */
5296 type = TREE_TYPE (type); /* pointer type */
5297 type = TREE_TYPE (type); /* function type */
5298 decl_or_type = type;
5299 }
5300
5301 /* Check that the return value locations are the same. Like
5302 if we are returning floats on the 80387 register stack, we cannot
5303 make a sibcall from a function that doesn't return a float to a
5304 function that does or, conversely, from a function that does return
5305 a float to a function that doesn't; the necessary stack adjustment
5306 would not be executed. This is also the place we notice
5307 differences in the return value ABI. Note that it is ok for one
5308 of the functions to have void return type as long as the return
5309 value of the other is passed in a register. */
5310 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5311 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5312 cfun->decl, false);
5313 if (STACK_REG_P (a) || STACK_REG_P (b))
5314 {
5315 if (!rtx_equal_p (a, b))
5316 return false;
5317 }
5318 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5319 ;
5320 else if (!rtx_equal_p (a, b))
5321 return false;
5322
5323 if (TARGET_64BIT)
5324 {
5325 /* The SYSV ABI has more call-clobbered registers;
5326 disallow sibcalls from MS to SYSV. */
5327 if (cfun->machine->call_abi == MS_ABI
5328 && ix86_function_type_abi (type) == SYSV_ABI)
5329 return false;
5330 }
5331 else
5332 {
5333 /* If this call is indirect, we'll need to be able to use a
5334 call-clobbered register for the address of the target function.
5335 Make sure that all such registers are not used for passing
5336 parameters. Note that DLLIMPORT functions are indirect. */
5337 if (!decl
5338 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5339 {
5340 if (ix86_function_regparm (type, NULL) >= 3)
5341 {
5342 /* ??? Need to count the actual number of registers to be used,
5343 not the possible number of registers. Fix later. */
5344 return false;
5345 }
5346 }
5347 }
5348
5349 /* Otherwise okay. That also includes certain types of indirect calls. */
5350 return true;
5351 }
5352
5353 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5354 and "sseregparm" calling convention attributes;
5355 arguments as in struct attribute_spec.handler. */
5356
5357 static tree
5358 ix86_handle_cconv_attribute (tree *node, tree name,
5359 tree args,
5360 int flags ATTRIBUTE_UNUSED,
5361 bool *no_add_attrs)
5362 {
5363 if (TREE_CODE (*node) != FUNCTION_TYPE
5364 && TREE_CODE (*node) != METHOD_TYPE
5365 && TREE_CODE (*node) != FIELD_DECL
5366 && TREE_CODE (*node) != TYPE_DECL)
5367 {
5368 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5369 name);
5370 *no_add_attrs = true;
5371 return NULL_TREE;
5372 }
5373
5374 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5375 if (is_attribute_p ("regparm", name))
5376 {
5377 tree cst;
5378
5379 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5380 {
5381 error ("fastcall and regparm attributes are not compatible");
5382 }
5383
5384 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5385 {
5386 error ("regparam and thiscall attributes are not compatible");
5387 }
5388
5389 cst = TREE_VALUE (args);
5390 if (TREE_CODE (cst) != INTEGER_CST)
5391 {
5392 warning (OPT_Wattributes,
5393 "%qE attribute requires an integer constant argument",
5394 name);
5395 *no_add_attrs = true;
5396 }
5397 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5398 {
5399 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5400 name, REGPARM_MAX);
5401 *no_add_attrs = true;
5402 }
5403
5404 return NULL_TREE;
5405 }
5406
5407 if (TARGET_64BIT)
5408 {
5409 /* Do not warn when emulating the MS ABI. */
5410 if ((TREE_CODE (*node) != FUNCTION_TYPE
5411 && TREE_CODE (*node) != METHOD_TYPE)
5412 || ix86_function_type_abi (*node) != MS_ABI)
5413 warning (OPT_Wattributes, "%qE attribute ignored",
5414 name);
5415 *no_add_attrs = true;
5416 return NULL_TREE;
5417 }
5418
5419 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5420 if (is_attribute_p ("fastcall", name))
5421 {
5422 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5423 {
5424 error ("fastcall and cdecl attributes are not compatible");
5425 }
5426 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5427 {
5428 error ("fastcall and stdcall attributes are not compatible");
5429 }
5430 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5431 {
5432 error ("fastcall and regparm attributes are not compatible");
5433 }
5434 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5435 {
5436 error ("fastcall and thiscall attributes are not compatible");
5437 }
5438 }
5439
5440 /* Can combine stdcall with fastcall (redundant), regparm and
5441 sseregparm. */
5442 else if (is_attribute_p ("stdcall", name))
5443 {
5444 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5445 {
5446 error ("stdcall and cdecl attributes are not compatible");
5447 }
5448 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5449 {
5450 error ("stdcall and fastcall attributes are not compatible");
5451 }
5452 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5453 {
5454 error ("stdcall and thiscall attributes are not compatible");
5455 }
5456 }
5457
5458 /* Can combine cdecl with regparm and sseregparm. */
5459 else if (is_attribute_p ("cdecl", name))
5460 {
5461 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5462 {
5463 error ("stdcall and cdecl attributes are not compatible");
5464 }
5465 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5466 {
5467 error ("fastcall and cdecl attributes are not compatible");
5468 }
5469 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5470 {
5471 error ("cdecl and thiscall attributes are not compatible");
5472 }
5473 }
5474 else if (is_attribute_p ("thiscall", name))
5475 {
5476 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5477 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5478 name);
5479 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5480 {
5481 error ("stdcall and thiscall attributes are not compatible");
5482 }
5483 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5484 {
5485 error ("fastcall and thiscall attributes are not compatible");
5486 }
5487 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5488 {
5489 error ("cdecl and thiscall attributes are not compatible");
5490 }
5491 }
5492
5493 /* Can combine sseregparm with all attributes. */
5494
5495 return NULL_TREE;
5496 }
5497
5498 /* The transactional memory builtins are implicitly regparm or fastcall
5499 depending on the ABI. Override the generic do-nothing attribute that
5500 these builtins were declared with, and replace it with one of the two
5501 attributes that we expect elsewhere. */
5502
5503 static tree
5504 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5505 tree args ATTRIBUTE_UNUSED,
5506 int flags, bool *no_add_attrs)
5507 {
5508 tree alt;
5509
5510 /* In no case do we want to add the placeholder attribute. */
5511 *no_add_attrs = true;
5512
5513 /* The 64-bit ABI is unchanged for transactional memory. */
5514 if (TARGET_64BIT)
5515 return NULL_TREE;
5516
5517 /* ??? Is there a better way to validate 32-bit windows? We have
5518 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5519 if (CHECK_STACK_LIMIT > 0)
5520 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5521 else
5522 {
5523 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5524 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5525 }
5526 decl_attributes (node, alt, flags);
5527
5528 return NULL_TREE;
5529 }
5530
5531 /* This function determines from TYPE the calling-convention. */
5532
5533 unsigned int
5534 ix86_get_callcvt (const_tree type)
5535 {
5536 unsigned int ret = 0;
5537 bool is_stdarg;
5538 tree attrs;
5539
5540 if (TARGET_64BIT)
5541 return IX86_CALLCVT_CDECL;
5542
5543 attrs = TYPE_ATTRIBUTES (type);
5544 if (attrs != NULL_TREE)
5545 {
5546 if (lookup_attribute ("cdecl", attrs))
5547 ret |= IX86_CALLCVT_CDECL;
5548 else if (lookup_attribute ("stdcall", attrs))
5549 ret |= IX86_CALLCVT_STDCALL;
5550 else if (lookup_attribute ("fastcall", attrs))
5551 ret |= IX86_CALLCVT_FASTCALL;
5552 else if (lookup_attribute ("thiscall", attrs))
5553 ret |= IX86_CALLCVT_THISCALL;
5554
5555 /* Regparam isn't allowed for thiscall and fastcall. */
5556 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5557 {
5558 if (lookup_attribute ("regparm", attrs))
5559 ret |= IX86_CALLCVT_REGPARM;
5560 if (lookup_attribute ("sseregparm", attrs))
5561 ret |= IX86_CALLCVT_SSEREGPARM;
5562 }
5563
5564 if (IX86_BASE_CALLCVT(ret) != 0)
5565 return ret;
5566 }
5567
5568 is_stdarg = stdarg_p (type);
5569 if (TARGET_RTD && !is_stdarg)
5570 return IX86_CALLCVT_STDCALL | ret;
5571
5572 if (ret != 0
5573 || is_stdarg
5574 || TREE_CODE (type) != METHOD_TYPE
5575 || ix86_function_type_abi (type) != MS_ABI)
5576 return IX86_CALLCVT_CDECL | ret;
5577
5578 return IX86_CALLCVT_THISCALL;
5579 }
5580
5581 /* Return 0 if the attributes for two types are incompatible, 1 if they
5582 are compatible, and 2 if they are nearly compatible (which causes a
5583 warning to be generated). */
5584
5585 static int
5586 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5587 {
5588 unsigned int ccvt1, ccvt2;
5589
5590 if (TREE_CODE (type1) != FUNCTION_TYPE
5591 && TREE_CODE (type1) != METHOD_TYPE)
5592 return 1;
5593
5594 ccvt1 = ix86_get_callcvt (type1);
5595 ccvt2 = ix86_get_callcvt (type2);
5596 if (ccvt1 != ccvt2)
5597 return 0;
5598 if (ix86_function_regparm (type1, NULL)
5599 != ix86_function_regparm (type2, NULL))
5600 return 0;
5601
5602 return 1;
5603 }
5604 \f
5605 /* Return the regparm value for a function with the indicated TYPE and DECL.
5606 DECL may be NULL when calling function indirectly
5607 or considering a libcall. */
5608
5609 static int
5610 ix86_function_regparm (const_tree type, const_tree decl)
5611 {
5612 tree attr;
5613 int regparm;
5614 unsigned int ccvt;
5615
5616 if (TARGET_64BIT)
5617 return (ix86_function_type_abi (type) == SYSV_ABI
5618 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5619 ccvt = ix86_get_callcvt (type);
5620 regparm = ix86_regparm;
5621
5622 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5623 {
5624 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5625 if (attr)
5626 {
5627 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5628 return regparm;
5629 }
5630 }
5631 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5632 return 2;
5633 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5634 return 1;
5635
5636 /* Use register calling convention for local functions when possible. */
5637 if (decl
5638 && TREE_CODE (decl) == FUNCTION_DECL
5639 /* Caller and callee must agree on the calling convention, so
5640 checking here just optimize means that with
5641 __attribute__((optimize (...))) caller could use regparm convention
5642 and callee not, or vice versa. Instead look at whether the callee
5643 is optimized or not. */
5644 && opt_for_fn (decl, optimize)
5645 && !(profile_flag && !flag_fentry))
5646 {
5647 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5648 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5649 if (i && i->local && i->can_change_signature)
5650 {
5651 int local_regparm, globals = 0, regno;
5652
5653 /* Make sure no regparm register is taken by a
5654 fixed register variable. */
5655 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5656 if (fixed_regs[local_regparm])
5657 break;
5658
5659 /* We don't want to use regparm(3) for nested functions as
5660 these use a static chain pointer in the third argument. */
5661 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5662 local_regparm = 2;
5663
5664 /* In 32-bit mode save a register for the split stack. */
5665 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5666 local_regparm = 2;
5667
5668 /* Each fixed register usage increases register pressure,
5669 so less registers should be used for argument passing.
5670 This functionality can be overriden by an explicit
5671 regparm value. */
5672 for (regno = AX_REG; regno <= DI_REG; regno++)
5673 if (fixed_regs[regno])
5674 globals++;
5675
5676 local_regparm
5677 = globals < local_regparm ? local_regparm - globals : 0;
5678
5679 if (local_regparm > regparm)
5680 regparm = local_regparm;
5681 }
5682 }
5683
5684 return regparm;
5685 }
5686
5687 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5688 DFmode (2) arguments in SSE registers for a function with the
5689 indicated TYPE and DECL. DECL may be NULL when calling function
5690 indirectly or considering a libcall. Otherwise return 0. */
5691
5692 static int
5693 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5694 {
5695 gcc_assert (!TARGET_64BIT);
5696
5697 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5698 by the sseregparm attribute. */
5699 if (TARGET_SSEREGPARM
5700 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5701 {
5702 if (!TARGET_SSE)
5703 {
5704 if (warn)
5705 {
5706 if (decl)
5707 error ("calling %qD with attribute sseregparm without "
5708 "SSE/SSE2 enabled", decl);
5709 else
5710 error ("calling %qT with attribute sseregparm without "
5711 "SSE/SSE2 enabled", type);
5712 }
5713 return 0;
5714 }
5715
5716 return 2;
5717 }
5718
5719 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5720 (and DFmode for SSE2) arguments in SSE registers. */
5721 if (decl && TARGET_SSE_MATH && optimize
5722 && !(profile_flag && !flag_fentry))
5723 {
5724 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5725 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5726 if (i && i->local && i->can_change_signature)
5727 return TARGET_SSE2 ? 2 : 1;
5728 }
5729
5730 return 0;
5731 }
5732
5733 /* Return true if EAX is live at the start of the function. Used by
5734 ix86_expand_prologue to determine if we need special help before
5735 calling allocate_stack_worker. */
5736
5737 static bool
5738 ix86_eax_live_at_start_p (void)
5739 {
5740 /* Cheat. Don't bother working forward from ix86_function_regparm
5741 to the function type to whether an actual argument is located in
5742 eax. Instead just look at cfg info, which is still close enough
5743 to correct at this point. This gives false positives for broken
5744 functions that might use uninitialized data that happens to be
5745 allocated in eax, but who cares? */
5746 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5747 }
5748
5749 static bool
5750 ix86_keep_aggregate_return_pointer (tree fntype)
5751 {
5752 tree attr;
5753
5754 if (!TARGET_64BIT)
5755 {
5756 attr = lookup_attribute ("callee_pop_aggregate_return",
5757 TYPE_ATTRIBUTES (fntype));
5758 if (attr)
5759 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5760
5761 /* For 32-bit MS-ABI the default is to keep aggregate
5762 return pointer. */
5763 if (ix86_function_type_abi (fntype) == MS_ABI)
5764 return true;
5765 }
5766 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5767 }
5768
5769 /* Value is the number of bytes of arguments automatically
5770 popped when returning from a subroutine call.
5771 FUNDECL is the declaration node of the function (as a tree),
5772 FUNTYPE is the data type of the function (as a tree),
5773 or for a library call it is an identifier node for the subroutine name.
5774 SIZE is the number of bytes of arguments passed on the stack.
5775
5776 On the 80386, the RTD insn may be used to pop them if the number
5777 of args is fixed, but if the number is variable then the caller
5778 must pop them all. RTD can't be used for library calls now
5779 because the library is compiled with the Unix compiler.
5780 Use of RTD is a selectable option, since it is incompatible with
5781 standard Unix calling sequences. If the option is not selected,
5782 the caller must always pop the args.
5783
5784 The attribute stdcall is equivalent to RTD on a per module basis. */
5785
5786 static int
5787 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5788 {
5789 unsigned int ccvt;
5790
5791 /* None of the 64-bit ABIs pop arguments. */
5792 if (TARGET_64BIT)
5793 return 0;
5794
5795 ccvt = ix86_get_callcvt (funtype);
5796
5797 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5798 | IX86_CALLCVT_THISCALL)) != 0
5799 && ! stdarg_p (funtype))
5800 return size;
5801
5802 /* Lose any fake structure return argument if it is passed on the stack. */
5803 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5804 && !ix86_keep_aggregate_return_pointer (funtype))
5805 {
5806 int nregs = ix86_function_regparm (funtype, fundecl);
5807 if (nregs == 0)
5808 return GET_MODE_SIZE (Pmode);
5809 }
5810
5811 return 0;
5812 }
5813
5814 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5815
5816 static bool
5817 ix86_legitimate_combined_insn (rtx insn)
5818 {
5819 /* Check operand constraints in case hard registers were propagated
5820 into insn pattern. This check prevents combine pass from
5821 generating insn patterns with invalid hard register operands.
5822 These invalid insns can eventually confuse reload to error out
5823 with a spill failure. See also PRs 46829 and 46843. */
5824 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5825 {
5826 int i;
5827
5828 extract_insn (insn);
5829 preprocess_constraints ();
5830
5831 for (i = 0; i < recog_data.n_operands; i++)
5832 {
5833 rtx op = recog_data.operand[i];
5834 enum machine_mode mode = GET_MODE (op);
5835 struct operand_alternative *op_alt;
5836 int offset = 0;
5837 bool win;
5838 int j;
5839
5840 /* For pre-AVX disallow unaligned loads/stores where the
5841 instructions don't support it. */
5842 if (!TARGET_AVX
5843 && VECTOR_MODE_P (GET_MODE (op))
5844 && misaligned_operand (op, GET_MODE (op)))
5845 {
5846 int min_align = get_attr_ssememalign (insn);
5847 if (min_align == 0)
5848 return false;
5849 }
5850
5851 /* A unary operator may be accepted by the predicate, but it
5852 is irrelevant for matching constraints. */
5853 if (UNARY_P (op))
5854 op = XEXP (op, 0);
5855
5856 if (GET_CODE (op) == SUBREG)
5857 {
5858 if (REG_P (SUBREG_REG (op))
5859 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5860 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5861 GET_MODE (SUBREG_REG (op)),
5862 SUBREG_BYTE (op),
5863 GET_MODE (op));
5864 op = SUBREG_REG (op);
5865 }
5866
5867 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5868 continue;
5869
5870 op_alt = recog_op_alt[i];
5871
5872 /* Operand has no constraints, anything is OK. */
5873 win = !recog_data.n_alternatives;
5874
5875 for (j = 0; j < recog_data.n_alternatives; j++)
5876 {
5877 if (op_alt[j].anything_ok
5878 || (op_alt[j].matches != -1
5879 && operands_match_p
5880 (recog_data.operand[i],
5881 recog_data.operand[op_alt[j].matches]))
5882 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5883 {
5884 win = true;
5885 break;
5886 }
5887 }
5888
5889 if (!win)
5890 return false;
5891 }
5892 }
5893
5894 return true;
5895 }
5896 \f
5897 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5898
5899 static unsigned HOST_WIDE_INT
5900 ix86_asan_shadow_offset (void)
5901 {
5902 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5903 : HOST_WIDE_INT_C (0x7fff8000))
5904 : (HOST_WIDE_INT_1 << 29);
5905 }
5906 \f
5907 /* Argument support functions. */
5908
5909 /* Return true when register may be used to pass function parameters. */
5910 bool
5911 ix86_function_arg_regno_p (int regno)
5912 {
5913 int i;
5914 const int *parm_regs;
5915
5916 if (!TARGET_64BIT)
5917 {
5918 if (TARGET_MACHO)
5919 return (regno < REGPARM_MAX
5920 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5921 else
5922 return (regno < REGPARM_MAX
5923 || (TARGET_MMX && MMX_REGNO_P (regno)
5924 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5925 || (TARGET_SSE && SSE_REGNO_P (regno)
5926 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5927 }
5928
5929 if (TARGET_SSE && SSE_REGNO_P (regno)
5930 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5931 return true;
5932
5933 /* TODO: The function should depend on current function ABI but
5934 builtins.c would need updating then. Therefore we use the
5935 default ABI. */
5936
5937 /* RAX is used as hidden argument to va_arg functions. */
5938 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5939 return true;
5940
5941 if (ix86_abi == MS_ABI)
5942 parm_regs = x86_64_ms_abi_int_parameter_registers;
5943 else
5944 parm_regs = x86_64_int_parameter_registers;
5945 for (i = 0; i < (ix86_abi == MS_ABI
5946 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5947 if (regno == parm_regs[i])
5948 return true;
5949 return false;
5950 }
5951
5952 /* Return if we do not know how to pass TYPE solely in registers. */
5953
5954 static bool
5955 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5956 {
5957 if (must_pass_in_stack_var_size_or_pad (mode, type))
5958 return true;
5959
5960 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5961 The layout_type routine is crafty and tries to trick us into passing
5962 currently unsupported vector types on the stack by using TImode. */
5963 return (!TARGET_64BIT && mode == TImode
5964 && type && TREE_CODE (type) != VECTOR_TYPE);
5965 }
5966
5967 /* It returns the size, in bytes, of the area reserved for arguments passed
5968 in registers for the function represented by fndecl dependent to the used
5969 abi format. */
5970 int
5971 ix86_reg_parm_stack_space (const_tree fndecl)
5972 {
5973 enum calling_abi call_abi = SYSV_ABI;
5974 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5975 call_abi = ix86_function_abi (fndecl);
5976 else
5977 call_abi = ix86_function_type_abi (fndecl);
5978 if (TARGET_64BIT && call_abi == MS_ABI)
5979 return 32;
5980 return 0;
5981 }
5982
5983 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5984 call abi used. */
5985 enum calling_abi
5986 ix86_function_type_abi (const_tree fntype)
5987 {
5988 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5989 {
5990 enum calling_abi abi = ix86_abi;
5991 if (abi == SYSV_ABI)
5992 {
5993 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5994 abi = MS_ABI;
5995 }
5996 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5997 abi = SYSV_ABI;
5998 return abi;
5999 }
6000 return ix86_abi;
6001 }
6002
6003 /* We add this as a workaround in order to use libc_has_function
6004 hook in i386.md. */
6005 bool
6006 ix86_libc_has_function (enum function_class fn_class)
6007 {
6008 return targetm.libc_has_function (fn_class);
6009 }
6010
6011 static bool
6012 ix86_function_ms_hook_prologue (const_tree fn)
6013 {
6014 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6015 {
6016 if (decl_function_context (fn) != NULL_TREE)
6017 error_at (DECL_SOURCE_LOCATION (fn),
6018 "ms_hook_prologue is not compatible with nested function");
6019 else
6020 return true;
6021 }
6022 return false;
6023 }
6024
6025 static enum calling_abi
6026 ix86_function_abi (const_tree fndecl)
6027 {
6028 if (! fndecl)
6029 return ix86_abi;
6030 return ix86_function_type_abi (TREE_TYPE (fndecl));
6031 }
6032
6033 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6034 call abi used. */
6035 enum calling_abi
6036 ix86_cfun_abi (void)
6037 {
6038 if (! cfun)
6039 return ix86_abi;
6040 return cfun->machine->call_abi;
6041 }
6042
6043 /* Write the extra assembler code needed to declare a function properly. */
6044
6045 void
6046 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6047 tree decl)
6048 {
6049 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6050
6051 if (is_ms_hook)
6052 {
6053 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6054 unsigned int filler_cc = 0xcccccccc;
6055
6056 for (i = 0; i < filler_count; i += 4)
6057 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6058 }
6059
6060 #ifdef SUBTARGET_ASM_UNWIND_INIT
6061 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6062 #endif
6063
6064 ASM_OUTPUT_LABEL (asm_out_file, fname);
6065
6066 /* Output magic byte marker, if hot-patch attribute is set. */
6067 if (is_ms_hook)
6068 {
6069 if (TARGET_64BIT)
6070 {
6071 /* leaq [%rsp + 0], %rsp */
6072 asm_fprintf (asm_out_file, ASM_BYTE
6073 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6074 }
6075 else
6076 {
6077 /* movl.s %edi, %edi
6078 push %ebp
6079 movl.s %esp, %ebp */
6080 asm_fprintf (asm_out_file, ASM_BYTE
6081 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6082 }
6083 }
6084 }
6085
6086 /* regclass.c */
6087 extern void init_regs (void);
6088
6089 /* Implementation of call abi switching target hook. Specific to FNDECL
6090 the specific call register sets are set. See also
6091 ix86_conditional_register_usage for more details. */
6092 void
6093 ix86_call_abi_override (const_tree fndecl)
6094 {
6095 if (fndecl == NULL_TREE)
6096 cfun->machine->call_abi = ix86_abi;
6097 else
6098 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6099 }
6100
6101 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6102 expensive re-initialization of init_regs each time we switch function context
6103 since this is needed only during RTL expansion. */
6104 static void
6105 ix86_maybe_switch_abi (void)
6106 {
6107 if (TARGET_64BIT &&
6108 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6109 reinit_regs ();
6110 }
6111
6112 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6113 for a call to a function whose data type is FNTYPE.
6114 For a library call, FNTYPE is 0. */
6115
6116 void
6117 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6118 tree fntype, /* tree ptr for function decl */
6119 rtx libname, /* SYMBOL_REF of library name or 0 */
6120 tree fndecl,
6121 int caller)
6122 {
6123 struct cgraph_local_info *i;
6124
6125 memset (cum, 0, sizeof (*cum));
6126
6127 if (fndecl)
6128 {
6129 i = cgraph_local_info (fndecl);
6130 cum->call_abi = ix86_function_abi (fndecl);
6131 }
6132 else
6133 {
6134 i = NULL;
6135 cum->call_abi = ix86_function_type_abi (fntype);
6136 }
6137
6138 cum->caller = caller;
6139
6140 /* Set up the number of registers to use for passing arguments. */
6141 cum->nregs = ix86_regparm;
6142 if (TARGET_64BIT)
6143 {
6144 cum->nregs = (cum->call_abi == SYSV_ABI
6145 ? X86_64_REGPARM_MAX
6146 : X86_64_MS_REGPARM_MAX);
6147 }
6148 if (TARGET_SSE)
6149 {
6150 cum->sse_nregs = SSE_REGPARM_MAX;
6151 if (TARGET_64BIT)
6152 {
6153 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6154 ? X86_64_SSE_REGPARM_MAX
6155 : X86_64_MS_SSE_REGPARM_MAX);
6156 }
6157 }
6158 if (TARGET_MMX)
6159 cum->mmx_nregs = MMX_REGPARM_MAX;
6160 cum->warn_avx512f = true;
6161 cum->warn_avx = true;
6162 cum->warn_sse = true;
6163 cum->warn_mmx = true;
6164
6165 /* Because type might mismatch in between caller and callee, we need to
6166 use actual type of function for local calls.
6167 FIXME: cgraph_analyze can be told to actually record if function uses
6168 va_start so for local functions maybe_vaarg can be made aggressive
6169 helping K&R code.
6170 FIXME: once typesytem is fixed, we won't need this code anymore. */
6171 if (i && i->local && i->can_change_signature)
6172 fntype = TREE_TYPE (fndecl);
6173 cum->maybe_vaarg = (fntype
6174 ? (!prototype_p (fntype) || stdarg_p (fntype))
6175 : !libname);
6176
6177 if (!TARGET_64BIT)
6178 {
6179 /* If there are variable arguments, then we won't pass anything
6180 in registers in 32-bit mode. */
6181 if (stdarg_p (fntype))
6182 {
6183 cum->nregs = 0;
6184 cum->sse_nregs = 0;
6185 cum->mmx_nregs = 0;
6186 cum->warn_avx512f = false;
6187 cum->warn_avx = false;
6188 cum->warn_sse = false;
6189 cum->warn_mmx = false;
6190 return;
6191 }
6192
6193 /* Use ecx and edx registers if function has fastcall attribute,
6194 else look for regparm information. */
6195 if (fntype)
6196 {
6197 unsigned int ccvt = ix86_get_callcvt (fntype);
6198 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6199 {
6200 cum->nregs = 1;
6201 cum->fastcall = 1; /* Same first register as in fastcall. */
6202 }
6203 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6204 {
6205 cum->nregs = 2;
6206 cum->fastcall = 1;
6207 }
6208 else
6209 cum->nregs = ix86_function_regparm (fntype, fndecl);
6210 }
6211
6212 /* Set up the number of SSE registers used for passing SFmode
6213 and DFmode arguments. Warn for mismatching ABI. */
6214 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6215 }
6216 }
6217
6218 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6219 But in the case of vector types, it is some vector mode.
6220
6221 When we have only some of our vector isa extensions enabled, then there
6222 are some modes for which vector_mode_supported_p is false. For these
6223 modes, the generic vector support in gcc will choose some non-vector mode
6224 in order to implement the type. By computing the natural mode, we'll
6225 select the proper ABI location for the operand and not depend on whatever
6226 the middle-end decides to do with these vector types.
6227
6228 The midde-end can't deal with the vector types > 16 bytes. In this
6229 case, we return the original mode and warn ABI change if CUM isn't
6230 NULL.
6231
6232 If INT_RETURN is true, warn ABI change if the vector mode isn't
6233 available for function return value. */
6234
6235 static enum machine_mode
6236 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6237 bool in_return)
6238 {
6239 enum machine_mode mode = TYPE_MODE (type);
6240
6241 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6242 {
6243 HOST_WIDE_INT size = int_size_in_bytes (type);
6244 if ((size == 8 || size == 16 || size == 32 || size == 64)
6245 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6246 && TYPE_VECTOR_SUBPARTS (type) > 1)
6247 {
6248 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6249
6250 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6251 mode = MIN_MODE_VECTOR_FLOAT;
6252 else
6253 mode = MIN_MODE_VECTOR_INT;
6254
6255 /* Get the mode which has this inner mode and number of units. */
6256 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6257 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6258 && GET_MODE_INNER (mode) == innermode)
6259 {
6260 if (size == 64 && !TARGET_AVX512F)
6261 {
6262 static bool warnedavx512f;
6263 static bool warnedavx512f_ret;
6264
6265 if (cum && cum->warn_avx512f && !warnedavx512f)
6266 {
6267 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6268 "without AVX512F enabled changes the ABI"))
6269 warnedavx512f = true;
6270 }
6271 else if (in_return && !warnedavx512f_ret)
6272 {
6273 if (warning (OPT_Wpsabi, "AVX512F vector return "
6274 "without AVX512F enabled changes the ABI"))
6275 warnedavx512f_ret = true;
6276 }
6277
6278 return TYPE_MODE (type);
6279 }
6280 else if (size == 32 && !TARGET_AVX)
6281 {
6282 static bool warnedavx;
6283 static bool warnedavx_ret;
6284
6285 if (cum && cum->warn_avx && !warnedavx)
6286 {
6287 if (warning (OPT_Wpsabi, "AVX vector argument "
6288 "without AVX enabled changes the ABI"))
6289 warnedavx = true;
6290 }
6291 else if (in_return && !warnedavx_ret)
6292 {
6293 if (warning (OPT_Wpsabi, "AVX vector return "
6294 "without AVX enabled changes the ABI"))
6295 warnedavx_ret = true;
6296 }
6297
6298 return TYPE_MODE (type);
6299 }
6300 else if (((size == 8 && TARGET_64BIT) || size == 16)
6301 && !TARGET_SSE)
6302 {
6303 static bool warnedsse;
6304 static bool warnedsse_ret;
6305
6306 if (cum && cum->warn_sse && !warnedsse)
6307 {
6308 if (warning (OPT_Wpsabi, "SSE vector argument "
6309 "without SSE enabled changes the ABI"))
6310 warnedsse = true;
6311 }
6312 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6313 {
6314 if (warning (OPT_Wpsabi, "SSE vector return "
6315 "without SSE enabled changes the ABI"))
6316 warnedsse_ret = true;
6317 }
6318 }
6319 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6320 {
6321 static bool warnedmmx;
6322 static bool warnedmmx_ret;
6323
6324 if (cum && cum->warn_mmx && !warnedmmx)
6325 {
6326 if (warning (OPT_Wpsabi, "MMX vector argument "
6327 "without MMX enabled changes the ABI"))
6328 warnedmmx = true;
6329 }
6330 else if (in_return && !warnedmmx_ret)
6331 {
6332 if (warning (OPT_Wpsabi, "MMX vector return "
6333 "without MMX enabled changes the ABI"))
6334 warnedmmx_ret = true;
6335 }
6336 }
6337 return mode;
6338 }
6339
6340 gcc_unreachable ();
6341 }
6342 }
6343
6344 return mode;
6345 }
6346
6347 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6348 this may not agree with the mode that the type system has chosen for the
6349 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6350 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6351
6352 static rtx
6353 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6354 unsigned int regno)
6355 {
6356 rtx tmp;
6357
6358 if (orig_mode != BLKmode)
6359 tmp = gen_rtx_REG (orig_mode, regno);
6360 else
6361 {
6362 tmp = gen_rtx_REG (mode, regno);
6363 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6364 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6365 }
6366
6367 return tmp;
6368 }
6369
6370 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6371 of this code is to classify each 8bytes of incoming argument by the register
6372 class and assign registers accordingly. */
6373
6374 /* Return the union class of CLASS1 and CLASS2.
6375 See the x86-64 PS ABI for details. */
6376
6377 static enum x86_64_reg_class
6378 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6379 {
6380 /* Rule #1: If both classes are equal, this is the resulting class. */
6381 if (class1 == class2)
6382 return class1;
6383
6384 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6385 the other class. */
6386 if (class1 == X86_64_NO_CLASS)
6387 return class2;
6388 if (class2 == X86_64_NO_CLASS)
6389 return class1;
6390
6391 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6392 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6393 return X86_64_MEMORY_CLASS;
6394
6395 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6396 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6397 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6398 return X86_64_INTEGERSI_CLASS;
6399 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6400 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6401 return X86_64_INTEGER_CLASS;
6402
6403 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6404 MEMORY is used. */
6405 if (class1 == X86_64_X87_CLASS
6406 || class1 == X86_64_X87UP_CLASS
6407 || class1 == X86_64_COMPLEX_X87_CLASS
6408 || class2 == X86_64_X87_CLASS
6409 || class2 == X86_64_X87UP_CLASS
6410 || class2 == X86_64_COMPLEX_X87_CLASS)
6411 return X86_64_MEMORY_CLASS;
6412
6413 /* Rule #6: Otherwise class SSE is used. */
6414 return X86_64_SSE_CLASS;
6415 }
6416
6417 /* Classify the argument of type TYPE and mode MODE.
6418 CLASSES will be filled by the register class used to pass each word
6419 of the operand. The number of words is returned. In case the parameter
6420 should be passed in memory, 0 is returned. As a special case for zero
6421 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6422
6423 BIT_OFFSET is used internally for handling records and specifies offset
6424 of the offset in bits modulo 512 to avoid overflow cases.
6425
6426 See the x86-64 PS ABI for details.
6427 */
6428
6429 static int
6430 classify_argument (enum machine_mode mode, const_tree type,
6431 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6432 {
6433 HOST_WIDE_INT bytes =
6434 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6435 int words
6436 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6437
6438 /* Variable sized entities are always passed/returned in memory. */
6439 if (bytes < 0)
6440 return 0;
6441
6442 if (mode != VOIDmode
6443 && targetm.calls.must_pass_in_stack (mode, type))
6444 return 0;
6445
6446 if (type && AGGREGATE_TYPE_P (type))
6447 {
6448 int i;
6449 tree field;
6450 enum x86_64_reg_class subclasses[MAX_CLASSES];
6451
6452 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6453 if (bytes > 64)
6454 return 0;
6455
6456 for (i = 0; i < words; i++)
6457 classes[i] = X86_64_NO_CLASS;
6458
6459 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6460 signalize memory class, so handle it as special case. */
6461 if (!words)
6462 {
6463 classes[0] = X86_64_NO_CLASS;
6464 return 1;
6465 }
6466
6467 /* Classify each field of record and merge classes. */
6468 switch (TREE_CODE (type))
6469 {
6470 case RECORD_TYPE:
6471 /* And now merge the fields of structure. */
6472 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6473 {
6474 if (TREE_CODE (field) == FIELD_DECL)
6475 {
6476 int num;
6477
6478 if (TREE_TYPE (field) == error_mark_node)
6479 continue;
6480
6481 /* Bitfields are always classified as integer. Handle them
6482 early, since later code would consider them to be
6483 misaligned integers. */
6484 if (DECL_BIT_FIELD (field))
6485 {
6486 for (i = (int_bit_position (field)
6487 + (bit_offset % 64)) / 8 / 8;
6488 i < ((int_bit_position (field) + (bit_offset % 64))
6489 + tree_to_shwi (DECL_SIZE (field))
6490 + 63) / 8 / 8; i++)
6491 classes[i] =
6492 merge_classes (X86_64_INTEGER_CLASS,
6493 classes[i]);
6494 }
6495 else
6496 {
6497 int pos;
6498
6499 type = TREE_TYPE (field);
6500
6501 /* Flexible array member is ignored. */
6502 if (TYPE_MODE (type) == BLKmode
6503 && TREE_CODE (type) == ARRAY_TYPE
6504 && TYPE_SIZE (type) == NULL_TREE
6505 && TYPE_DOMAIN (type) != NULL_TREE
6506 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6507 == NULL_TREE))
6508 {
6509 static bool warned;
6510
6511 if (!warned && warn_psabi)
6512 {
6513 warned = true;
6514 inform (input_location,
6515 "the ABI of passing struct with"
6516 " a flexible array member has"
6517 " changed in GCC 4.4");
6518 }
6519 continue;
6520 }
6521 num = classify_argument (TYPE_MODE (type), type,
6522 subclasses,
6523 (int_bit_position (field)
6524 + bit_offset) % 512);
6525 if (!num)
6526 return 0;
6527 pos = (int_bit_position (field)
6528 + (bit_offset % 64)) / 8 / 8;
6529 for (i = 0; i < num && (i + pos) < words; i++)
6530 classes[i + pos] =
6531 merge_classes (subclasses[i], classes[i + pos]);
6532 }
6533 }
6534 }
6535 break;
6536
6537 case ARRAY_TYPE:
6538 /* Arrays are handled as small records. */
6539 {
6540 int num;
6541 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6542 TREE_TYPE (type), subclasses, bit_offset);
6543 if (!num)
6544 return 0;
6545
6546 /* The partial classes are now full classes. */
6547 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6548 subclasses[0] = X86_64_SSE_CLASS;
6549 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6550 && !((bit_offset % 64) == 0 && bytes == 4))
6551 subclasses[0] = X86_64_INTEGER_CLASS;
6552
6553 for (i = 0; i < words; i++)
6554 classes[i] = subclasses[i % num];
6555
6556 break;
6557 }
6558 case UNION_TYPE:
6559 case QUAL_UNION_TYPE:
6560 /* Unions are similar to RECORD_TYPE but offset is always 0.
6561 */
6562 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6563 {
6564 if (TREE_CODE (field) == FIELD_DECL)
6565 {
6566 int num;
6567
6568 if (TREE_TYPE (field) == error_mark_node)
6569 continue;
6570
6571 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6572 TREE_TYPE (field), subclasses,
6573 bit_offset);
6574 if (!num)
6575 return 0;
6576 for (i = 0; i < num; i++)
6577 classes[i] = merge_classes (subclasses[i], classes[i]);
6578 }
6579 }
6580 break;
6581
6582 default:
6583 gcc_unreachable ();
6584 }
6585
6586 if (words > 2)
6587 {
6588 /* When size > 16 bytes, if the first one isn't
6589 X86_64_SSE_CLASS or any other ones aren't
6590 X86_64_SSEUP_CLASS, everything should be passed in
6591 memory. */
6592 if (classes[0] != X86_64_SSE_CLASS)
6593 return 0;
6594
6595 for (i = 1; i < words; i++)
6596 if (classes[i] != X86_64_SSEUP_CLASS)
6597 return 0;
6598 }
6599
6600 /* Final merger cleanup. */
6601 for (i = 0; i < words; i++)
6602 {
6603 /* If one class is MEMORY, everything should be passed in
6604 memory. */
6605 if (classes[i] == X86_64_MEMORY_CLASS)
6606 return 0;
6607
6608 /* The X86_64_SSEUP_CLASS should be always preceded by
6609 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6610 if (classes[i] == X86_64_SSEUP_CLASS
6611 && classes[i - 1] != X86_64_SSE_CLASS
6612 && classes[i - 1] != X86_64_SSEUP_CLASS)
6613 {
6614 /* The first one should never be X86_64_SSEUP_CLASS. */
6615 gcc_assert (i != 0);
6616 classes[i] = X86_64_SSE_CLASS;
6617 }
6618
6619 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6620 everything should be passed in memory. */
6621 if (classes[i] == X86_64_X87UP_CLASS
6622 && (classes[i - 1] != X86_64_X87_CLASS))
6623 {
6624 static bool warned;
6625
6626 /* The first one should never be X86_64_X87UP_CLASS. */
6627 gcc_assert (i != 0);
6628 if (!warned && warn_psabi)
6629 {
6630 warned = true;
6631 inform (input_location,
6632 "the ABI of passing union with long double"
6633 " has changed in GCC 4.4");
6634 }
6635 return 0;
6636 }
6637 }
6638 return words;
6639 }
6640
6641 /* Compute alignment needed. We align all types to natural boundaries with
6642 exception of XFmode that is aligned to 64bits. */
6643 if (mode != VOIDmode && mode != BLKmode)
6644 {
6645 int mode_alignment = GET_MODE_BITSIZE (mode);
6646
6647 if (mode == XFmode)
6648 mode_alignment = 128;
6649 else if (mode == XCmode)
6650 mode_alignment = 256;
6651 if (COMPLEX_MODE_P (mode))
6652 mode_alignment /= 2;
6653 /* Misaligned fields are always returned in memory. */
6654 if (bit_offset % mode_alignment)
6655 return 0;
6656 }
6657
6658 /* for V1xx modes, just use the base mode */
6659 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6660 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6661 mode = GET_MODE_INNER (mode);
6662
6663 /* Classification of atomic types. */
6664 switch (mode)
6665 {
6666 case SDmode:
6667 case DDmode:
6668 classes[0] = X86_64_SSE_CLASS;
6669 return 1;
6670 case TDmode:
6671 classes[0] = X86_64_SSE_CLASS;
6672 classes[1] = X86_64_SSEUP_CLASS;
6673 return 2;
6674 case DImode:
6675 case SImode:
6676 case HImode:
6677 case QImode:
6678 case CSImode:
6679 case CHImode:
6680 case CQImode:
6681 {
6682 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6683
6684 /* Analyze last 128 bits only. */
6685 size = (size - 1) & 0x7f;
6686
6687 if (size < 32)
6688 {
6689 classes[0] = X86_64_INTEGERSI_CLASS;
6690 return 1;
6691 }
6692 else if (size < 64)
6693 {
6694 classes[0] = X86_64_INTEGER_CLASS;
6695 return 1;
6696 }
6697 else if (size < 64+32)
6698 {
6699 classes[0] = X86_64_INTEGER_CLASS;
6700 classes[1] = X86_64_INTEGERSI_CLASS;
6701 return 2;
6702 }
6703 else if (size < 64+64)
6704 {
6705 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6706 return 2;
6707 }
6708 else
6709 gcc_unreachable ();
6710 }
6711 case CDImode:
6712 case TImode:
6713 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6714 return 2;
6715 case COImode:
6716 case OImode:
6717 /* OImode shouldn't be used directly. */
6718 gcc_unreachable ();
6719 case CTImode:
6720 return 0;
6721 case SFmode:
6722 if (!(bit_offset % 64))
6723 classes[0] = X86_64_SSESF_CLASS;
6724 else
6725 classes[0] = X86_64_SSE_CLASS;
6726 return 1;
6727 case DFmode:
6728 classes[0] = X86_64_SSEDF_CLASS;
6729 return 1;
6730 case XFmode:
6731 classes[0] = X86_64_X87_CLASS;
6732 classes[1] = X86_64_X87UP_CLASS;
6733 return 2;
6734 case TFmode:
6735 classes[0] = X86_64_SSE_CLASS;
6736 classes[1] = X86_64_SSEUP_CLASS;
6737 return 2;
6738 case SCmode:
6739 classes[0] = X86_64_SSE_CLASS;
6740 if (!(bit_offset % 64))
6741 return 1;
6742 else
6743 {
6744 static bool warned;
6745
6746 if (!warned && warn_psabi)
6747 {
6748 warned = true;
6749 inform (input_location,
6750 "the ABI of passing structure with complex float"
6751 " member has changed in GCC 4.4");
6752 }
6753 classes[1] = X86_64_SSESF_CLASS;
6754 return 2;
6755 }
6756 case DCmode:
6757 classes[0] = X86_64_SSEDF_CLASS;
6758 classes[1] = X86_64_SSEDF_CLASS;
6759 return 2;
6760 case XCmode:
6761 classes[0] = X86_64_COMPLEX_X87_CLASS;
6762 return 1;
6763 case TCmode:
6764 /* This modes is larger than 16 bytes. */
6765 return 0;
6766 case V8SFmode:
6767 case V8SImode:
6768 case V32QImode:
6769 case V16HImode:
6770 case V4DFmode:
6771 case V4DImode:
6772 classes[0] = X86_64_SSE_CLASS;
6773 classes[1] = X86_64_SSEUP_CLASS;
6774 classes[2] = X86_64_SSEUP_CLASS;
6775 classes[3] = X86_64_SSEUP_CLASS;
6776 return 4;
6777 case V8DFmode:
6778 case V16SFmode:
6779 case V8DImode:
6780 case V16SImode:
6781 case V32HImode:
6782 case V64QImode:
6783 classes[0] = X86_64_SSE_CLASS;
6784 classes[1] = X86_64_SSEUP_CLASS;
6785 classes[2] = X86_64_SSEUP_CLASS;
6786 classes[3] = X86_64_SSEUP_CLASS;
6787 classes[4] = X86_64_SSEUP_CLASS;
6788 classes[5] = X86_64_SSEUP_CLASS;
6789 classes[6] = X86_64_SSEUP_CLASS;
6790 classes[7] = X86_64_SSEUP_CLASS;
6791 return 8;
6792 case V4SFmode:
6793 case V4SImode:
6794 case V16QImode:
6795 case V8HImode:
6796 case V2DFmode:
6797 case V2DImode:
6798 classes[0] = X86_64_SSE_CLASS;
6799 classes[1] = X86_64_SSEUP_CLASS;
6800 return 2;
6801 case V1TImode:
6802 case V1DImode:
6803 case V2SFmode:
6804 case V2SImode:
6805 case V4HImode:
6806 case V8QImode:
6807 classes[0] = X86_64_SSE_CLASS;
6808 return 1;
6809 case BLKmode:
6810 case VOIDmode:
6811 return 0;
6812 default:
6813 gcc_assert (VECTOR_MODE_P (mode));
6814
6815 if (bytes > 16)
6816 return 0;
6817
6818 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6819
6820 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6821 classes[0] = X86_64_INTEGERSI_CLASS;
6822 else
6823 classes[0] = X86_64_INTEGER_CLASS;
6824 classes[1] = X86_64_INTEGER_CLASS;
6825 return 1 + (bytes > 8);
6826 }
6827 }
6828
6829 /* Examine the argument and return set number of register required in each
6830 class. Return true iff parameter should be passed in memory. */
6831
6832 static bool
6833 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6834 int *int_nregs, int *sse_nregs)
6835 {
6836 enum x86_64_reg_class regclass[MAX_CLASSES];
6837 int n = classify_argument (mode, type, regclass, 0);
6838
6839 *int_nregs = 0;
6840 *sse_nregs = 0;
6841
6842 if (!n)
6843 return true;
6844 for (n--; n >= 0; n--)
6845 switch (regclass[n])
6846 {
6847 case X86_64_INTEGER_CLASS:
6848 case X86_64_INTEGERSI_CLASS:
6849 (*int_nregs)++;
6850 break;
6851 case X86_64_SSE_CLASS:
6852 case X86_64_SSESF_CLASS:
6853 case X86_64_SSEDF_CLASS:
6854 (*sse_nregs)++;
6855 break;
6856 case X86_64_NO_CLASS:
6857 case X86_64_SSEUP_CLASS:
6858 break;
6859 case X86_64_X87_CLASS:
6860 case X86_64_X87UP_CLASS:
6861 case X86_64_COMPLEX_X87_CLASS:
6862 if (!in_return)
6863 return true;
6864 break;
6865 case X86_64_MEMORY_CLASS:
6866 gcc_unreachable ();
6867 }
6868
6869 return false;
6870 }
6871
6872 /* Construct container for the argument used by GCC interface. See
6873 FUNCTION_ARG for the detailed description. */
6874
6875 static rtx
6876 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6877 const_tree type, int in_return, int nintregs, int nsseregs,
6878 const int *intreg, int sse_regno)
6879 {
6880 /* The following variables hold the static issued_error state. */
6881 static bool issued_sse_arg_error;
6882 static bool issued_sse_ret_error;
6883 static bool issued_x87_ret_error;
6884
6885 enum machine_mode tmpmode;
6886 int bytes =
6887 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6888 enum x86_64_reg_class regclass[MAX_CLASSES];
6889 int n;
6890 int i;
6891 int nexps = 0;
6892 int needed_sseregs, needed_intregs;
6893 rtx exp[MAX_CLASSES];
6894 rtx ret;
6895
6896 n = classify_argument (mode, type, regclass, 0);
6897 if (!n)
6898 return NULL;
6899 if (examine_argument (mode, type, in_return, &needed_intregs,
6900 &needed_sseregs))
6901 return NULL;
6902 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6903 return NULL;
6904
6905 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6906 some less clueful developer tries to use floating-point anyway. */
6907 if (needed_sseregs && !TARGET_SSE)
6908 {
6909 if (in_return)
6910 {
6911 if (!issued_sse_ret_error)
6912 {
6913 error ("SSE register return with SSE disabled");
6914 issued_sse_ret_error = true;
6915 }
6916 }
6917 else if (!issued_sse_arg_error)
6918 {
6919 error ("SSE register argument with SSE disabled");
6920 issued_sse_arg_error = true;
6921 }
6922 return NULL;
6923 }
6924
6925 /* Likewise, error if the ABI requires us to return values in the
6926 x87 registers and the user specified -mno-80387. */
6927 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6928 for (i = 0; i < n; i++)
6929 if (regclass[i] == X86_64_X87_CLASS
6930 || regclass[i] == X86_64_X87UP_CLASS
6931 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6932 {
6933 if (!issued_x87_ret_error)
6934 {
6935 error ("x87 register return with x87 disabled");
6936 issued_x87_ret_error = true;
6937 }
6938 return NULL;
6939 }
6940
6941 /* First construct simple cases. Avoid SCmode, since we want to use
6942 single register to pass this type. */
6943 if (n == 1 && mode != SCmode)
6944 switch (regclass[0])
6945 {
6946 case X86_64_INTEGER_CLASS:
6947 case X86_64_INTEGERSI_CLASS:
6948 return gen_rtx_REG (mode, intreg[0]);
6949 case X86_64_SSE_CLASS:
6950 case X86_64_SSESF_CLASS:
6951 case X86_64_SSEDF_CLASS:
6952 if (mode != BLKmode)
6953 return gen_reg_or_parallel (mode, orig_mode,
6954 SSE_REGNO (sse_regno));
6955 break;
6956 case X86_64_X87_CLASS:
6957 case X86_64_COMPLEX_X87_CLASS:
6958 return gen_rtx_REG (mode, FIRST_STACK_REG);
6959 case X86_64_NO_CLASS:
6960 /* Zero sized array, struct or class. */
6961 return NULL;
6962 default:
6963 gcc_unreachable ();
6964 }
6965 if (n == 2
6966 && regclass[0] == X86_64_SSE_CLASS
6967 && regclass[1] == X86_64_SSEUP_CLASS
6968 && mode != BLKmode)
6969 return gen_reg_or_parallel (mode, orig_mode,
6970 SSE_REGNO (sse_regno));
6971 if (n == 4
6972 && regclass[0] == X86_64_SSE_CLASS
6973 && regclass[1] == X86_64_SSEUP_CLASS
6974 && regclass[2] == X86_64_SSEUP_CLASS
6975 && regclass[3] == X86_64_SSEUP_CLASS
6976 && mode != BLKmode)
6977 return gen_reg_or_parallel (mode, orig_mode,
6978 SSE_REGNO (sse_regno));
6979 if (n == 8
6980 && regclass[0] == X86_64_SSE_CLASS
6981 && regclass[1] == X86_64_SSEUP_CLASS
6982 && regclass[2] == X86_64_SSEUP_CLASS
6983 && regclass[3] == X86_64_SSEUP_CLASS
6984 && regclass[4] == X86_64_SSEUP_CLASS
6985 && regclass[5] == X86_64_SSEUP_CLASS
6986 && regclass[6] == X86_64_SSEUP_CLASS
6987 && regclass[7] == X86_64_SSEUP_CLASS
6988 && mode != BLKmode)
6989 return gen_reg_or_parallel (mode, orig_mode,
6990 SSE_REGNO (sse_regno));
6991 if (n == 2
6992 && regclass[0] == X86_64_X87_CLASS
6993 && regclass[1] == X86_64_X87UP_CLASS)
6994 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6995
6996 if (n == 2
6997 && regclass[0] == X86_64_INTEGER_CLASS
6998 && regclass[1] == X86_64_INTEGER_CLASS
6999 && (mode == CDImode || mode == TImode)
7000 && intreg[0] + 1 == intreg[1])
7001 return gen_rtx_REG (mode, intreg[0]);
7002
7003 /* Otherwise figure out the entries of the PARALLEL. */
7004 for (i = 0; i < n; i++)
7005 {
7006 int pos;
7007
7008 switch (regclass[i])
7009 {
7010 case X86_64_NO_CLASS:
7011 break;
7012 case X86_64_INTEGER_CLASS:
7013 case X86_64_INTEGERSI_CLASS:
7014 /* Merge TImodes on aligned occasions here too. */
7015 if (i * 8 + 8 > bytes)
7016 tmpmode
7017 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7018 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7019 tmpmode = SImode;
7020 else
7021 tmpmode = DImode;
7022 /* We've requested 24 bytes we
7023 don't have mode for. Use DImode. */
7024 if (tmpmode == BLKmode)
7025 tmpmode = DImode;
7026 exp [nexps++]
7027 = gen_rtx_EXPR_LIST (VOIDmode,
7028 gen_rtx_REG (tmpmode, *intreg),
7029 GEN_INT (i*8));
7030 intreg++;
7031 break;
7032 case X86_64_SSESF_CLASS:
7033 exp [nexps++]
7034 = gen_rtx_EXPR_LIST (VOIDmode,
7035 gen_rtx_REG (SFmode,
7036 SSE_REGNO (sse_regno)),
7037 GEN_INT (i*8));
7038 sse_regno++;
7039 break;
7040 case X86_64_SSEDF_CLASS:
7041 exp [nexps++]
7042 = gen_rtx_EXPR_LIST (VOIDmode,
7043 gen_rtx_REG (DFmode,
7044 SSE_REGNO (sse_regno)),
7045 GEN_INT (i*8));
7046 sse_regno++;
7047 break;
7048 case X86_64_SSE_CLASS:
7049 pos = i;
7050 switch (n)
7051 {
7052 case 1:
7053 tmpmode = DImode;
7054 break;
7055 case 2:
7056 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7057 {
7058 tmpmode = TImode;
7059 i++;
7060 }
7061 else
7062 tmpmode = DImode;
7063 break;
7064 case 4:
7065 gcc_assert (i == 0
7066 && regclass[1] == X86_64_SSEUP_CLASS
7067 && regclass[2] == X86_64_SSEUP_CLASS
7068 && regclass[3] == X86_64_SSEUP_CLASS);
7069 tmpmode = OImode;
7070 i += 3;
7071 break;
7072 case 8:
7073 gcc_assert (i == 0
7074 && regclass[1] == X86_64_SSEUP_CLASS
7075 && regclass[2] == X86_64_SSEUP_CLASS
7076 && regclass[3] == X86_64_SSEUP_CLASS
7077 && regclass[4] == X86_64_SSEUP_CLASS
7078 && regclass[5] == X86_64_SSEUP_CLASS
7079 && regclass[6] == X86_64_SSEUP_CLASS
7080 && regclass[7] == X86_64_SSEUP_CLASS);
7081 tmpmode = XImode;
7082 i += 7;
7083 break;
7084 default:
7085 gcc_unreachable ();
7086 }
7087 exp [nexps++]
7088 = gen_rtx_EXPR_LIST (VOIDmode,
7089 gen_rtx_REG (tmpmode,
7090 SSE_REGNO (sse_regno)),
7091 GEN_INT (pos*8));
7092 sse_regno++;
7093 break;
7094 default:
7095 gcc_unreachable ();
7096 }
7097 }
7098
7099 /* Empty aligned struct, union or class. */
7100 if (nexps == 0)
7101 return NULL;
7102
7103 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7104 for (i = 0; i < nexps; i++)
7105 XVECEXP (ret, 0, i) = exp [i];
7106 return ret;
7107 }
7108
7109 /* Update the data in CUM to advance over an argument of mode MODE
7110 and data type TYPE. (TYPE is null for libcalls where that information
7111 may not be available.) */
7112
7113 static void
7114 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7115 const_tree type, HOST_WIDE_INT bytes,
7116 HOST_WIDE_INT words)
7117 {
7118 switch (mode)
7119 {
7120 default:
7121 break;
7122
7123 case BLKmode:
7124 if (bytes < 0)
7125 break;
7126 /* FALLTHRU */
7127
7128 case DImode:
7129 case SImode:
7130 case HImode:
7131 case QImode:
7132 cum->words += words;
7133 cum->nregs -= words;
7134 cum->regno += words;
7135
7136 if (cum->nregs <= 0)
7137 {
7138 cum->nregs = 0;
7139 cum->regno = 0;
7140 }
7141 break;
7142
7143 case OImode:
7144 /* OImode shouldn't be used directly. */
7145 gcc_unreachable ();
7146
7147 case DFmode:
7148 if (cum->float_in_sse < 2)
7149 break;
7150 case SFmode:
7151 if (cum->float_in_sse < 1)
7152 break;
7153 /* FALLTHRU */
7154
7155 case V8SFmode:
7156 case V8SImode:
7157 case V64QImode:
7158 case V32HImode:
7159 case V16SImode:
7160 case V8DImode:
7161 case V16SFmode:
7162 case V8DFmode:
7163 case V32QImode:
7164 case V16HImode:
7165 case V4DFmode:
7166 case V4DImode:
7167 case TImode:
7168 case V16QImode:
7169 case V8HImode:
7170 case V4SImode:
7171 case V2DImode:
7172 case V4SFmode:
7173 case V2DFmode:
7174 if (!type || !AGGREGATE_TYPE_P (type))
7175 {
7176 cum->sse_words += words;
7177 cum->sse_nregs -= 1;
7178 cum->sse_regno += 1;
7179 if (cum->sse_nregs <= 0)
7180 {
7181 cum->sse_nregs = 0;
7182 cum->sse_regno = 0;
7183 }
7184 }
7185 break;
7186
7187 case V8QImode:
7188 case V4HImode:
7189 case V2SImode:
7190 case V2SFmode:
7191 case V1TImode:
7192 case V1DImode:
7193 if (!type || !AGGREGATE_TYPE_P (type))
7194 {
7195 cum->mmx_words += words;
7196 cum->mmx_nregs -= 1;
7197 cum->mmx_regno += 1;
7198 if (cum->mmx_nregs <= 0)
7199 {
7200 cum->mmx_nregs = 0;
7201 cum->mmx_regno = 0;
7202 }
7203 }
7204 break;
7205 }
7206 }
7207
7208 static void
7209 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7210 const_tree type, HOST_WIDE_INT words, bool named)
7211 {
7212 int int_nregs, sse_nregs;
7213
7214 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7215 if (!named && (VALID_AVX512F_REG_MODE (mode)
7216 || VALID_AVX256_REG_MODE (mode)))
7217 return;
7218
7219 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7220 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7221 {
7222 cum->nregs -= int_nregs;
7223 cum->sse_nregs -= sse_nregs;
7224 cum->regno += int_nregs;
7225 cum->sse_regno += sse_nregs;
7226 }
7227 else
7228 {
7229 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7230 cum->words = (cum->words + align - 1) & ~(align - 1);
7231 cum->words += words;
7232 }
7233 }
7234
7235 static void
7236 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7237 HOST_WIDE_INT words)
7238 {
7239 /* Otherwise, this should be passed indirect. */
7240 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7241
7242 cum->words += words;
7243 if (cum->nregs > 0)
7244 {
7245 cum->nregs -= 1;
7246 cum->regno += 1;
7247 }
7248 }
7249
7250 /* Update the data in CUM to advance over an argument of mode MODE and
7251 data type TYPE. (TYPE is null for libcalls where that information
7252 may not be available.) */
7253
7254 static void
7255 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7256 const_tree type, bool named)
7257 {
7258 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7259 HOST_WIDE_INT bytes, words;
7260
7261 if (mode == BLKmode)
7262 bytes = int_size_in_bytes (type);
7263 else
7264 bytes = GET_MODE_SIZE (mode);
7265 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7266
7267 if (type)
7268 mode = type_natural_mode (type, NULL, false);
7269
7270 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7271 function_arg_advance_ms_64 (cum, bytes, words);
7272 else if (TARGET_64BIT)
7273 function_arg_advance_64 (cum, mode, type, words, named);
7274 else
7275 function_arg_advance_32 (cum, mode, type, bytes, words);
7276 }
7277
7278 /* Define where to put the arguments to a function.
7279 Value is zero to push the argument on the stack,
7280 or a hard register in which to store the argument.
7281
7282 MODE is the argument's machine mode.
7283 TYPE is the data type of the argument (as a tree).
7284 This is null for libcalls where that information may
7285 not be available.
7286 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7287 the preceding args and about the function being called.
7288 NAMED is nonzero if this argument is a named parameter
7289 (otherwise it is an extra parameter matching an ellipsis). */
7290
7291 static rtx
7292 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7293 enum machine_mode orig_mode, const_tree type,
7294 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7295 {
7296 /* Avoid the AL settings for the Unix64 ABI. */
7297 if (mode == VOIDmode)
7298 return constm1_rtx;
7299
7300 switch (mode)
7301 {
7302 default:
7303 break;
7304
7305 case BLKmode:
7306 if (bytes < 0)
7307 break;
7308 /* FALLTHRU */
7309 case DImode:
7310 case SImode:
7311 case HImode:
7312 case QImode:
7313 if (words <= cum->nregs)
7314 {
7315 int regno = cum->regno;
7316
7317 /* Fastcall allocates the first two DWORD (SImode) or
7318 smaller arguments to ECX and EDX if it isn't an
7319 aggregate type . */
7320 if (cum->fastcall)
7321 {
7322 if (mode == BLKmode
7323 || mode == DImode
7324 || (type && AGGREGATE_TYPE_P (type)))
7325 break;
7326
7327 /* ECX not EAX is the first allocated register. */
7328 if (regno == AX_REG)
7329 regno = CX_REG;
7330 }
7331 return gen_rtx_REG (mode, regno);
7332 }
7333 break;
7334
7335 case DFmode:
7336 if (cum->float_in_sse < 2)
7337 break;
7338 case SFmode:
7339 if (cum->float_in_sse < 1)
7340 break;
7341 /* FALLTHRU */
7342 case TImode:
7343 /* In 32bit, we pass TImode in xmm registers. */
7344 case V16QImode:
7345 case V8HImode:
7346 case V4SImode:
7347 case V2DImode:
7348 case V4SFmode:
7349 case V2DFmode:
7350 if (!type || !AGGREGATE_TYPE_P (type))
7351 {
7352 if (cum->sse_nregs)
7353 return gen_reg_or_parallel (mode, orig_mode,
7354 cum->sse_regno + FIRST_SSE_REG);
7355 }
7356 break;
7357
7358 case OImode:
7359 case XImode:
7360 /* OImode and XImode shouldn't be used directly. */
7361 gcc_unreachable ();
7362
7363 case V64QImode:
7364 case V32HImode:
7365 case V16SImode:
7366 case V8DImode:
7367 case V16SFmode:
7368 case V8DFmode:
7369 case V8SFmode:
7370 case V8SImode:
7371 case V32QImode:
7372 case V16HImode:
7373 case V4DFmode:
7374 case V4DImode:
7375 if (!type || !AGGREGATE_TYPE_P (type))
7376 {
7377 if (cum->sse_nregs)
7378 return gen_reg_or_parallel (mode, orig_mode,
7379 cum->sse_regno + FIRST_SSE_REG);
7380 }
7381 break;
7382
7383 case V8QImode:
7384 case V4HImode:
7385 case V2SImode:
7386 case V2SFmode:
7387 case V1TImode:
7388 case V1DImode:
7389 if (!type || !AGGREGATE_TYPE_P (type))
7390 {
7391 if (cum->mmx_nregs)
7392 return gen_reg_or_parallel (mode, orig_mode,
7393 cum->mmx_regno + FIRST_MMX_REG);
7394 }
7395 break;
7396 }
7397
7398 return NULL_RTX;
7399 }
7400
7401 static rtx
7402 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7403 enum machine_mode orig_mode, const_tree type, bool named)
7404 {
7405 /* Handle a hidden AL argument containing number of registers
7406 for varargs x86-64 functions. */
7407 if (mode == VOIDmode)
7408 return GEN_INT (cum->maybe_vaarg
7409 ? (cum->sse_nregs < 0
7410 ? X86_64_SSE_REGPARM_MAX
7411 : cum->sse_regno)
7412 : -1);
7413
7414 switch (mode)
7415 {
7416 default:
7417 break;
7418
7419 case V8SFmode:
7420 case V8SImode:
7421 case V32QImode:
7422 case V16HImode:
7423 case V4DFmode:
7424 case V4DImode:
7425 case V16SFmode:
7426 case V16SImode:
7427 case V64QImode:
7428 case V32HImode:
7429 case V8DFmode:
7430 case V8DImode:
7431 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7432 if (!named)
7433 return NULL;
7434 break;
7435 }
7436
7437 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7438 cum->sse_nregs,
7439 &x86_64_int_parameter_registers [cum->regno],
7440 cum->sse_regno);
7441 }
7442
7443 static rtx
7444 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7445 enum machine_mode orig_mode, bool named,
7446 HOST_WIDE_INT bytes)
7447 {
7448 unsigned int regno;
7449
7450 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7451 We use value of -2 to specify that current function call is MSABI. */
7452 if (mode == VOIDmode)
7453 return GEN_INT (-2);
7454
7455 /* If we've run out of registers, it goes on the stack. */
7456 if (cum->nregs == 0)
7457 return NULL_RTX;
7458
7459 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7460
7461 /* Only floating point modes are passed in anything but integer regs. */
7462 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7463 {
7464 if (named)
7465 regno = cum->regno + FIRST_SSE_REG;
7466 else
7467 {
7468 rtx t1, t2;
7469
7470 /* Unnamed floating parameters are passed in both the
7471 SSE and integer registers. */
7472 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7473 t2 = gen_rtx_REG (mode, regno);
7474 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7475 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7476 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7477 }
7478 }
7479 /* Handle aggregated types passed in register. */
7480 if (orig_mode == BLKmode)
7481 {
7482 if (bytes > 0 && bytes <= 8)
7483 mode = (bytes > 4 ? DImode : SImode);
7484 if (mode == BLKmode)
7485 mode = DImode;
7486 }
7487
7488 return gen_reg_or_parallel (mode, orig_mode, regno);
7489 }
7490
7491 /* Return where to put the arguments to a function.
7492 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7493
7494 MODE is the argument's machine mode. TYPE is the data type of the
7495 argument. It is null for libcalls where that information may not be
7496 available. CUM gives information about the preceding args and about
7497 the function being called. NAMED is nonzero if this argument is a
7498 named parameter (otherwise it is an extra parameter matching an
7499 ellipsis). */
7500
7501 static rtx
7502 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7503 const_tree type, bool named)
7504 {
7505 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7506 enum machine_mode mode = omode;
7507 HOST_WIDE_INT bytes, words;
7508 rtx arg;
7509
7510 if (mode == BLKmode)
7511 bytes = int_size_in_bytes (type);
7512 else
7513 bytes = GET_MODE_SIZE (mode);
7514 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7515
7516 /* To simplify the code below, represent vector types with a vector mode
7517 even if MMX/SSE are not active. */
7518 if (type && TREE_CODE (type) == VECTOR_TYPE)
7519 mode = type_natural_mode (type, cum, false);
7520
7521 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7522 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7523 else if (TARGET_64BIT)
7524 arg = function_arg_64 (cum, mode, omode, type, named);
7525 else
7526 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7527
7528 return arg;
7529 }
7530
7531 /* A C expression that indicates when an argument must be passed by
7532 reference. If nonzero for an argument, a copy of that argument is
7533 made in memory and a pointer to the argument is passed instead of
7534 the argument itself. The pointer is passed in whatever way is
7535 appropriate for passing a pointer to that type. */
7536
7537 static bool
7538 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7539 const_tree type, bool named ATTRIBUTE_UNUSED)
7540 {
7541 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7542
7543 /* See Windows x64 Software Convention. */
7544 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7545 {
7546 int msize = (int) GET_MODE_SIZE (mode);
7547 if (type)
7548 {
7549 /* Arrays are passed by reference. */
7550 if (TREE_CODE (type) == ARRAY_TYPE)
7551 return true;
7552
7553 if (AGGREGATE_TYPE_P (type))
7554 {
7555 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7556 are passed by reference. */
7557 msize = int_size_in_bytes (type);
7558 }
7559 }
7560
7561 /* __m128 is passed by reference. */
7562 switch (msize) {
7563 case 1: case 2: case 4: case 8:
7564 break;
7565 default:
7566 return true;
7567 }
7568 }
7569 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7570 return 1;
7571
7572 return 0;
7573 }
7574
7575 /* Return true when TYPE should be 128bit aligned for 32bit argument
7576 passing ABI. XXX: This function is obsolete and is only used for
7577 checking psABI compatibility with previous versions of GCC. */
7578
7579 static bool
7580 ix86_compat_aligned_value_p (const_tree type)
7581 {
7582 enum machine_mode mode = TYPE_MODE (type);
7583 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7584 || mode == TDmode
7585 || mode == TFmode
7586 || mode == TCmode)
7587 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7588 return true;
7589 if (TYPE_ALIGN (type) < 128)
7590 return false;
7591
7592 if (AGGREGATE_TYPE_P (type))
7593 {
7594 /* Walk the aggregates recursively. */
7595 switch (TREE_CODE (type))
7596 {
7597 case RECORD_TYPE:
7598 case UNION_TYPE:
7599 case QUAL_UNION_TYPE:
7600 {
7601 tree field;
7602
7603 /* Walk all the structure fields. */
7604 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7605 {
7606 if (TREE_CODE (field) == FIELD_DECL
7607 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7608 return true;
7609 }
7610 break;
7611 }
7612
7613 case ARRAY_TYPE:
7614 /* Just for use if some languages passes arrays by value. */
7615 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7616 return true;
7617 break;
7618
7619 default:
7620 gcc_unreachable ();
7621 }
7622 }
7623 return false;
7624 }
7625
7626 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7627 XXX: This function is obsolete and is only used for checking psABI
7628 compatibility with previous versions of GCC. */
7629
7630 static unsigned int
7631 ix86_compat_function_arg_boundary (enum machine_mode mode,
7632 const_tree type, unsigned int align)
7633 {
7634 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7635 natural boundaries. */
7636 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7637 {
7638 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7639 make an exception for SSE modes since these require 128bit
7640 alignment.
7641
7642 The handling here differs from field_alignment. ICC aligns MMX
7643 arguments to 4 byte boundaries, while structure fields are aligned
7644 to 8 byte boundaries. */
7645 if (!type)
7646 {
7647 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7648 align = PARM_BOUNDARY;
7649 }
7650 else
7651 {
7652 if (!ix86_compat_aligned_value_p (type))
7653 align = PARM_BOUNDARY;
7654 }
7655 }
7656 if (align > BIGGEST_ALIGNMENT)
7657 align = BIGGEST_ALIGNMENT;
7658 return align;
7659 }
7660
7661 /* Return true when TYPE should be 128bit aligned for 32bit argument
7662 passing ABI. */
7663
7664 static bool
7665 ix86_contains_aligned_value_p (const_tree type)
7666 {
7667 enum machine_mode mode = TYPE_MODE (type);
7668
7669 if (mode == XFmode || mode == XCmode)
7670 return false;
7671
7672 if (TYPE_ALIGN (type) < 128)
7673 return false;
7674
7675 if (AGGREGATE_TYPE_P (type))
7676 {
7677 /* Walk the aggregates recursively. */
7678 switch (TREE_CODE (type))
7679 {
7680 case RECORD_TYPE:
7681 case UNION_TYPE:
7682 case QUAL_UNION_TYPE:
7683 {
7684 tree field;
7685
7686 /* Walk all the structure fields. */
7687 for (field = TYPE_FIELDS (type);
7688 field;
7689 field = DECL_CHAIN (field))
7690 {
7691 if (TREE_CODE (field) == FIELD_DECL
7692 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7693 return true;
7694 }
7695 break;
7696 }
7697
7698 case ARRAY_TYPE:
7699 /* Just for use if some languages passes arrays by value. */
7700 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7701 return true;
7702 break;
7703
7704 default:
7705 gcc_unreachable ();
7706 }
7707 }
7708 else
7709 return TYPE_ALIGN (type) >= 128;
7710
7711 return false;
7712 }
7713
7714 /* Gives the alignment boundary, in bits, of an argument with the
7715 specified mode and type. */
7716
7717 static unsigned int
7718 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7719 {
7720 unsigned int align;
7721 if (type)
7722 {
7723 /* Since the main variant type is used for call, we convert it to
7724 the main variant type. */
7725 type = TYPE_MAIN_VARIANT (type);
7726 align = TYPE_ALIGN (type);
7727 }
7728 else
7729 align = GET_MODE_ALIGNMENT (mode);
7730 if (align < PARM_BOUNDARY)
7731 align = PARM_BOUNDARY;
7732 else
7733 {
7734 static bool warned;
7735 unsigned int saved_align = align;
7736
7737 if (!TARGET_64BIT)
7738 {
7739 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7740 if (!type)
7741 {
7742 if (mode == XFmode || mode == XCmode)
7743 align = PARM_BOUNDARY;
7744 }
7745 else if (!ix86_contains_aligned_value_p (type))
7746 align = PARM_BOUNDARY;
7747
7748 if (align < 128)
7749 align = PARM_BOUNDARY;
7750 }
7751
7752 if (warn_psabi
7753 && !warned
7754 && align != ix86_compat_function_arg_boundary (mode, type,
7755 saved_align))
7756 {
7757 warned = true;
7758 inform (input_location,
7759 "The ABI for passing parameters with %d-byte"
7760 " alignment has changed in GCC 4.6",
7761 align / BITS_PER_UNIT);
7762 }
7763 }
7764
7765 return align;
7766 }
7767
7768 /* Return true if N is a possible register number of function value. */
7769
7770 static bool
7771 ix86_function_value_regno_p (const unsigned int regno)
7772 {
7773 switch (regno)
7774 {
7775 case AX_REG:
7776 case DX_REG:
7777 return true;
7778 case DI_REG:
7779 case SI_REG:
7780 return TARGET_64BIT && ix86_abi != MS_ABI;
7781
7782 /* Complex values are returned in %st(0)/%st(1) pair. */
7783 case ST0_REG:
7784 case ST1_REG:
7785 /* TODO: The function should depend on current function ABI but
7786 builtins.c would need updating then. Therefore we use the
7787 default ABI. */
7788 if (TARGET_64BIT && ix86_abi == MS_ABI)
7789 return false;
7790 return TARGET_FLOAT_RETURNS_IN_80387;
7791
7792 /* Complex values are returned in %xmm0/%xmm1 pair. */
7793 case XMM0_REG:
7794 case XMM1_REG:
7795 return TARGET_SSE;
7796
7797 case MM0_REG:
7798 if (TARGET_MACHO || TARGET_64BIT)
7799 return false;
7800 return TARGET_MMX;
7801 }
7802
7803 return false;
7804 }
7805
7806 /* Define how to find the value returned by a function.
7807 VALTYPE is the data type of the value (as a tree).
7808 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7809 otherwise, FUNC is 0. */
7810
7811 static rtx
7812 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7813 const_tree fntype, const_tree fn)
7814 {
7815 unsigned int regno;
7816
7817 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7818 we normally prevent this case when mmx is not available. However
7819 some ABIs may require the result to be returned like DImode. */
7820 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7821 regno = FIRST_MMX_REG;
7822
7823 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7824 we prevent this case when sse is not available. However some ABIs
7825 may require the result to be returned like integer TImode. */
7826 else if (mode == TImode
7827 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7828 regno = FIRST_SSE_REG;
7829
7830 /* 32-byte vector modes in %ymm0. */
7831 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7832 regno = FIRST_SSE_REG;
7833
7834 /* 64-byte vector modes in %zmm0. */
7835 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7836 regno = FIRST_SSE_REG;
7837
7838 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7839 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7840 regno = FIRST_FLOAT_REG;
7841 else
7842 /* Most things go in %eax. */
7843 regno = AX_REG;
7844
7845 /* Override FP return register with %xmm0 for local functions when
7846 SSE math is enabled or for functions with sseregparm attribute. */
7847 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7848 {
7849 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7850 if ((sse_level >= 1 && mode == SFmode)
7851 || (sse_level == 2 && mode == DFmode))
7852 regno = FIRST_SSE_REG;
7853 }
7854
7855 /* OImode shouldn't be used directly. */
7856 gcc_assert (mode != OImode);
7857
7858 return gen_rtx_REG (orig_mode, regno);
7859 }
7860
7861 static rtx
7862 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7863 const_tree valtype)
7864 {
7865 rtx ret;
7866
7867 /* Handle libcalls, which don't provide a type node. */
7868 if (valtype == NULL)
7869 {
7870 unsigned int regno;
7871
7872 switch (mode)
7873 {
7874 case SFmode:
7875 case SCmode:
7876 case DFmode:
7877 case DCmode:
7878 case TFmode:
7879 case SDmode:
7880 case DDmode:
7881 case TDmode:
7882 regno = FIRST_SSE_REG;
7883 break;
7884 case XFmode:
7885 case XCmode:
7886 regno = FIRST_FLOAT_REG;
7887 break;
7888 case TCmode:
7889 return NULL;
7890 default:
7891 regno = AX_REG;
7892 }
7893
7894 return gen_rtx_REG (mode, regno);
7895 }
7896 else if (POINTER_TYPE_P (valtype))
7897 {
7898 /* Pointers are always returned in word_mode. */
7899 mode = word_mode;
7900 }
7901
7902 ret = construct_container (mode, orig_mode, valtype, 1,
7903 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7904 x86_64_int_return_registers, 0);
7905
7906 /* For zero sized structures, construct_container returns NULL, but we
7907 need to keep rest of compiler happy by returning meaningful value. */
7908 if (!ret)
7909 ret = gen_rtx_REG (orig_mode, AX_REG);
7910
7911 return ret;
7912 }
7913
7914 static rtx
7915 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7916 const_tree valtype)
7917 {
7918 unsigned int regno = AX_REG;
7919
7920 if (TARGET_SSE)
7921 {
7922 switch (GET_MODE_SIZE (mode))
7923 {
7924 case 16:
7925 if (valtype != NULL_TREE
7926 && !VECTOR_INTEGER_TYPE_P (valtype)
7927 && !VECTOR_INTEGER_TYPE_P (valtype)
7928 && !INTEGRAL_TYPE_P (valtype)
7929 && !VECTOR_FLOAT_TYPE_P (valtype))
7930 break;
7931 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7932 && !COMPLEX_MODE_P (mode))
7933 regno = FIRST_SSE_REG;
7934 break;
7935 case 8:
7936 case 4:
7937 if (mode == SFmode || mode == DFmode)
7938 regno = FIRST_SSE_REG;
7939 break;
7940 default:
7941 break;
7942 }
7943 }
7944 return gen_rtx_REG (orig_mode, regno);
7945 }
7946
7947 static rtx
7948 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7949 enum machine_mode orig_mode, enum machine_mode mode)
7950 {
7951 const_tree fn, fntype;
7952
7953 fn = NULL_TREE;
7954 if (fntype_or_decl && DECL_P (fntype_or_decl))
7955 fn = fntype_or_decl;
7956 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7957
7958 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7959 return function_value_ms_64 (orig_mode, mode, valtype);
7960 else if (TARGET_64BIT)
7961 return function_value_64 (orig_mode, mode, valtype);
7962 else
7963 return function_value_32 (orig_mode, mode, fntype, fn);
7964 }
7965
7966 static rtx
7967 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7968 bool outgoing ATTRIBUTE_UNUSED)
7969 {
7970 enum machine_mode mode, orig_mode;
7971
7972 orig_mode = TYPE_MODE (valtype);
7973 mode = type_natural_mode (valtype, NULL, true);
7974 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7975 }
7976
7977 /* Pointer function arguments and return values are promoted to
7978 word_mode. */
7979
7980 static enum machine_mode
7981 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7982 int *punsignedp, const_tree fntype,
7983 int for_return)
7984 {
7985 if (type != NULL_TREE && POINTER_TYPE_P (type))
7986 {
7987 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7988 return word_mode;
7989 }
7990 return default_promote_function_mode (type, mode, punsignedp, fntype,
7991 for_return);
7992 }
7993
7994 /* Return true if a structure, union or array with MODE containing FIELD
7995 should be accessed using BLKmode. */
7996
7997 static bool
7998 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7999 {
8000 /* Union with XFmode must be in BLKmode. */
8001 return (mode == XFmode
8002 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8003 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8004 }
8005
8006 rtx
8007 ix86_libcall_value (enum machine_mode mode)
8008 {
8009 return ix86_function_value_1 (NULL, NULL, mode, mode);
8010 }
8011
8012 /* Return true iff type is returned in memory. */
8013
8014 static bool
8015 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8016 {
8017 #ifdef SUBTARGET_RETURN_IN_MEMORY
8018 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8019 #else
8020 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8021 HOST_WIDE_INT size;
8022
8023 if (TARGET_64BIT)
8024 {
8025 if (ix86_function_type_abi (fntype) == MS_ABI)
8026 {
8027 size = int_size_in_bytes (type);
8028
8029 /* __m128 is returned in xmm0. */
8030 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8031 || INTEGRAL_TYPE_P (type)
8032 || VECTOR_FLOAT_TYPE_P (type))
8033 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8034 && !COMPLEX_MODE_P (mode)
8035 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8036 return false;
8037
8038 /* Otherwise, the size must be exactly in [1248]. */
8039 return size != 1 && size != 2 && size != 4 && size != 8;
8040 }
8041 else
8042 {
8043 int needed_intregs, needed_sseregs;
8044
8045 return examine_argument (mode, type, 1,
8046 &needed_intregs, &needed_sseregs);
8047 }
8048 }
8049 else
8050 {
8051 if (mode == BLKmode)
8052 return true;
8053
8054 size = int_size_in_bytes (type);
8055
8056 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8057 return false;
8058
8059 if (VECTOR_MODE_P (mode) || mode == TImode)
8060 {
8061 /* User-created vectors small enough to fit in EAX. */
8062 if (size < 8)
8063 return false;
8064
8065 /* Unless ABI prescibes otherwise,
8066 MMX/3dNow values are returned in MM0 if available. */
8067
8068 if (size == 8)
8069 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8070
8071 /* SSE values are returned in XMM0 if available. */
8072 if (size == 16)
8073 return !TARGET_SSE;
8074
8075 /* AVX values are returned in YMM0 if available. */
8076 if (size == 32)
8077 return !TARGET_AVX;
8078
8079 /* AVX512F values are returned in ZMM0 if available. */
8080 if (size == 64)
8081 return !TARGET_AVX512F;
8082 }
8083
8084 if (mode == XFmode)
8085 return false;
8086
8087 if (size > 12)
8088 return true;
8089
8090 /* OImode shouldn't be used directly. */
8091 gcc_assert (mode != OImode);
8092
8093 return false;
8094 }
8095 #endif
8096 }
8097
8098 \f
8099 /* Create the va_list data type. */
8100
8101 /* Returns the calling convention specific va_list date type.
8102 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8103
8104 static tree
8105 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8106 {
8107 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8108
8109 /* For i386 we use plain pointer to argument area. */
8110 if (!TARGET_64BIT || abi == MS_ABI)
8111 return build_pointer_type (char_type_node);
8112
8113 record = lang_hooks.types.make_type (RECORD_TYPE);
8114 type_decl = build_decl (BUILTINS_LOCATION,
8115 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8116
8117 f_gpr = build_decl (BUILTINS_LOCATION,
8118 FIELD_DECL, get_identifier ("gp_offset"),
8119 unsigned_type_node);
8120 f_fpr = build_decl (BUILTINS_LOCATION,
8121 FIELD_DECL, get_identifier ("fp_offset"),
8122 unsigned_type_node);
8123 f_ovf = build_decl (BUILTINS_LOCATION,
8124 FIELD_DECL, get_identifier ("overflow_arg_area"),
8125 ptr_type_node);
8126 f_sav = build_decl (BUILTINS_LOCATION,
8127 FIELD_DECL, get_identifier ("reg_save_area"),
8128 ptr_type_node);
8129
8130 va_list_gpr_counter_field = f_gpr;
8131 va_list_fpr_counter_field = f_fpr;
8132
8133 DECL_FIELD_CONTEXT (f_gpr) = record;
8134 DECL_FIELD_CONTEXT (f_fpr) = record;
8135 DECL_FIELD_CONTEXT (f_ovf) = record;
8136 DECL_FIELD_CONTEXT (f_sav) = record;
8137
8138 TYPE_STUB_DECL (record) = type_decl;
8139 TYPE_NAME (record) = type_decl;
8140 TYPE_FIELDS (record) = f_gpr;
8141 DECL_CHAIN (f_gpr) = f_fpr;
8142 DECL_CHAIN (f_fpr) = f_ovf;
8143 DECL_CHAIN (f_ovf) = f_sav;
8144
8145 layout_type (record);
8146
8147 /* The correct type is an array type of one element. */
8148 return build_array_type (record, build_index_type (size_zero_node));
8149 }
8150
8151 /* Setup the builtin va_list data type and for 64-bit the additional
8152 calling convention specific va_list data types. */
8153
8154 static tree
8155 ix86_build_builtin_va_list (void)
8156 {
8157 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8158
8159 /* Initialize abi specific va_list builtin types. */
8160 if (TARGET_64BIT)
8161 {
8162 tree t;
8163 if (ix86_abi == MS_ABI)
8164 {
8165 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8166 if (TREE_CODE (t) != RECORD_TYPE)
8167 t = build_variant_type_copy (t);
8168 sysv_va_list_type_node = t;
8169 }
8170 else
8171 {
8172 t = ret;
8173 if (TREE_CODE (t) != RECORD_TYPE)
8174 t = build_variant_type_copy (t);
8175 sysv_va_list_type_node = t;
8176 }
8177 if (ix86_abi != MS_ABI)
8178 {
8179 t = ix86_build_builtin_va_list_abi (MS_ABI);
8180 if (TREE_CODE (t) != RECORD_TYPE)
8181 t = build_variant_type_copy (t);
8182 ms_va_list_type_node = t;
8183 }
8184 else
8185 {
8186 t = ret;
8187 if (TREE_CODE (t) != RECORD_TYPE)
8188 t = build_variant_type_copy (t);
8189 ms_va_list_type_node = t;
8190 }
8191 }
8192
8193 return ret;
8194 }
8195
8196 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8197
8198 static void
8199 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8200 {
8201 rtx save_area, mem;
8202 alias_set_type set;
8203 int i, max;
8204
8205 /* GPR size of varargs save area. */
8206 if (cfun->va_list_gpr_size)
8207 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8208 else
8209 ix86_varargs_gpr_size = 0;
8210
8211 /* FPR size of varargs save area. We don't need it if we don't pass
8212 anything in SSE registers. */
8213 if (TARGET_SSE && cfun->va_list_fpr_size)
8214 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8215 else
8216 ix86_varargs_fpr_size = 0;
8217
8218 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8219 return;
8220
8221 save_area = frame_pointer_rtx;
8222 set = get_varargs_alias_set ();
8223
8224 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8225 if (max > X86_64_REGPARM_MAX)
8226 max = X86_64_REGPARM_MAX;
8227
8228 for (i = cum->regno; i < max; i++)
8229 {
8230 mem = gen_rtx_MEM (word_mode,
8231 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8232 MEM_NOTRAP_P (mem) = 1;
8233 set_mem_alias_set (mem, set);
8234 emit_move_insn (mem,
8235 gen_rtx_REG (word_mode,
8236 x86_64_int_parameter_registers[i]));
8237 }
8238
8239 if (ix86_varargs_fpr_size)
8240 {
8241 enum machine_mode smode;
8242 rtx label, test;
8243
8244 /* Now emit code to save SSE registers. The AX parameter contains number
8245 of SSE parameter registers used to call this function, though all we
8246 actually check here is the zero/non-zero status. */
8247
8248 label = gen_label_rtx ();
8249 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8250 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8251 label));
8252
8253 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8254 we used movdqa (i.e. TImode) instead? Perhaps even better would
8255 be if we could determine the real mode of the data, via a hook
8256 into pass_stdarg. Ignore all that for now. */
8257 smode = V4SFmode;
8258 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8259 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8260
8261 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8262 if (max > X86_64_SSE_REGPARM_MAX)
8263 max = X86_64_SSE_REGPARM_MAX;
8264
8265 for (i = cum->sse_regno; i < max; ++i)
8266 {
8267 mem = plus_constant (Pmode, save_area,
8268 i * 16 + ix86_varargs_gpr_size);
8269 mem = gen_rtx_MEM (smode, mem);
8270 MEM_NOTRAP_P (mem) = 1;
8271 set_mem_alias_set (mem, set);
8272 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8273
8274 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8275 }
8276
8277 emit_label (label);
8278 }
8279 }
8280
8281 static void
8282 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8283 {
8284 alias_set_type set = get_varargs_alias_set ();
8285 int i;
8286
8287 /* Reset to zero, as there might be a sysv vaarg used
8288 before. */
8289 ix86_varargs_gpr_size = 0;
8290 ix86_varargs_fpr_size = 0;
8291
8292 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8293 {
8294 rtx reg, mem;
8295
8296 mem = gen_rtx_MEM (Pmode,
8297 plus_constant (Pmode, virtual_incoming_args_rtx,
8298 i * UNITS_PER_WORD));
8299 MEM_NOTRAP_P (mem) = 1;
8300 set_mem_alias_set (mem, set);
8301
8302 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8303 emit_move_insn (mem, reg);
8304 }
8305 }
8306
8307 static void
8308 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8309 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8310 int no_rtl)
8311 {
8312 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8313 CUMULATIVE_ARGS next_cum;
8314 tree fntype;
8315
8316 /* This argument doesn't appear to be used anymore. Which is good,
8317 because the old code here didn't suppress rtl generation. */
8318 gcc_assert (!no_rtl);
8319
8320 if (!TARGET_64BIT)
8321 return;
8322
8323 fntype = TREE_TYPE (current_function_decl);
8324
8325 /* For varargs, we do not want to skip the dummy va_dcl argument.
8326 For stdargs, we do want to skip the last named argument. */
8327 next_cum = *cum;
8328 if (stdarg_p (fntype))
8329 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8330 true);
8331
8332 if (cum->call_abi == MS_ABI)
8333 setup_incoming_varargs_ms_64 (&next_cum);
8334 else
8335 setup_incoming_varargs_64 (&next_cum);
8336 }
8337
8338 /* Checks if TYPE is of kind va_list char *. */
8339
8340 static bool
8341 is_va_list_char_pointer (tree type)
8342 {
8343 tree canonic;
8344
8345 /* For 32-bit it is always true. */
8346 if (!TARGET_64BIT)
8347 return true;
8348 canonic = ix86_canonical_va_list_type (type);
8349 return (canonic == ms_va_list_type_node
8350 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8351 }
8352
8353 /* Implement va_start. */
8354
8355 static void
8356 ix86_va_start (tree valist, rtx nextarg)
8357 {
8358 HOST_WIDE_INT words, n_gpr, n_fpr;
8359 tree f_gpr, f_fpr, f_ovf, f_sav;
8360 tree gpr, fpr, ovf, sav, t;
8361 tree type;
8362 rtx ovf_rtx;
8363
8364 if (flag_split_stack
8365 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8366 {
8367 unsigned int scratch_regno;
8368
8369 /* When we are splitting the stack, we can't refer to the stack
8370 arguments using internal_arg_pointer, because they may be on
8371 the old stack. The split stack prologue will arrange to
8372 leave a pointer to the old stack arguments in a scratch
8373 register, which we here copy to a pseudo-register. The split
8374 stack prologue can't set the pseudo-register directly because
8375 it (the prologue) runs before any registers have been saved. */
8376
8377 scratch_regno = split_stack_prologue_scratch_regno ();
8378 if (scratch_regno != INVALID_REGNUM)
8379 {
8380 rtx reg, seq;
8381
8382 reg = gen_reg_rtx (Pmode);
8383 cfun->machine->split_stack_varargs_pointer = reg;
8384
8385 start_sequence ();
8386 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8387 seq = get_insns ();
8388 end_sequence ();
8389
8390 push_topmost_sequence ();
8391 emit_insn_after (seq, entry_of_function ());
8392 pop_topmost_sequence ();
8393 }
8394 }
8395
8396 /* Only 64bit target needs something special. */
8397 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8398 {
8399 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8400 std_expand_builtin_va_start (valist, nextarg);
8401 else
8402 {
8403 rtx va_r, next;
8404
8405 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8406 next = expand_binop (ptr_mode, add_optab,
8407 cfun->machine->split_stack_varargs_pointer,
8408 crtl->args.arg_offset_rtx,
8409 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8410 convert_move (va_r, next, 0);
8411 }
8412 return;
8413 }
8414
8415 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8416 f_fpr = DECL_CHAIN (f_gpr);
8417 f_ovf = DECL_CHAIN (f_fpr);
8418 f_sav = DECL_CHAIN (f_ovf);
8419
8420 valist = build_simple_mem_ref (valist);
8421 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8422 /* The following should be folded into the MEM_REF offset. */
8423 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8424 f_gpr, NULL_TREE);
8425 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8426 f_fpr, NULL_TREE);
8427 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8428 f_ovf, NULL_TREE);
8429 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8430 f_sav, NULL_TREE);
8431
8432 /* Count number of gp and fp argument registers used. */
8433 words = crtl->args.info.words;
8434 n_gpr = crtl->args.info.regno;
8435 n_fpr = crtl->args.info.sse_regno;
8436
8437 if (cfun->va_list_gpr_size)
8438 {
8439 type = TREE_TYPE (gpr);
8440 t = build2 (MODIFY_EXPR, type,
8441 gpr, build_int_cst (type, n_gpr * 8));
8442 TREE_SIDE_EFFECTS (t) = 1;
8443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8444 }
8445
8446 if (TARGET_SSE && cfun->va_list_fpr_size)
8447 {
8448 type = TREE_TYPE (fpr);
8449 t = build2 (MODIFY_EXPR, type, fpr,
8450 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8451 TREE_SIDE_EFFECTS (t) = 1;
8452 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8453 }
8454
8455 /* Find the overflow area. */
8456 type = TREE_TYPE (ovf);
8457 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8458 ovf_rtx = crtl->args.internal_arg_pointer;
8459 else
8460 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8461 t = make_tree (type, ovf_rtx);
8462 if (words != 0)
8463 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8464 t = build2 (MODIFY_EXPR, type, ovf, t);
8465 TREE_SIDE_EFFECTS (t) = 1;
8466 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8467
8468 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8469 {
8470 /* Find the register save area.
8471 Prologue of the function save it right above stack frame. */
8472 type = TREE_TYPE (sav);
8473 t = make_tree (type, frame_pointer_rtx);
8474 if (!ix86_varargs_gpr_size)
8475 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8476 t = build2 (MODIFY_EXPR, type, sav, t);
8477 TREE_SIDE_EFFECTS (t) = 1;
8478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8479 }
8480 }
8481
8482 /* Implement va_arg. */
8483
8484 static tree
8485 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8486 gimple_seq *post_p)
8487 {
8488 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8489 tree f_gpr, f_fpr, f_ovf, f_sav;
8490 tree gpr, fpr, ovf, sav, t;
8491 int size, rsize;
8492 tree lab_false, lab_over = NULL_TREE;
8493 tree addr, t2;
8494 rtx container;
8495 int indirect_p = 0;
8496 tree ptrtype;
8497 enum machine_mode nat_mode;
8498 unsigned int arg_boundary;
8499
8500 /* Only 64bit target needs something special. */
8501 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8502 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8503
8504 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8505 f_fpr = DECL_CHAIN (f_gpr);
8506 f_ovf = DECL_CHAIN (f_fpr);
8507 f_sav = DECL_CHAIN (f_ovf);
8508
8509 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8510 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8511 valist = build_va_arg_indirect_ref (valist);
8512 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8513 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8514 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8515
8516 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8517 if (indirect_p)
8518 type = build_pointer_type (type);
8519 size = int_size_in_bytes (type);
8520 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8521
8522 nat_mode = type_natural_mode (type, NULL, false);
8523 switch (nat_mode)
8524 {
8525 case V8SFmode:
8526 case V8SImode:
8527 case V32QImode:
8528 case V16HImode:
8529 case V4DFmode:
8530 case V4DImode:
8531 case V16SFmode:
8532 case V16SImode:
8533 case V64QImode:
8534 case V32HImode:
8535 case V8DFmode:
8536 case V8DImode:
8537 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8538 if (!TARGET_64BIT_MS_ABI)
8539 {
8540 container = NULL;
8541 break;
8542 }
8543
8544 default:
8545 container = construct_container (nat_mode, TYPE_MODE (type),
8546 type, 0, X86_64_REGPARM_MAX,
8547 X86_64_SSE_REGPARM_MAX, intreg,
8548 0);
8549 break;
8550 }
8551
8552 /* Pull the value out of the saved registers. */
8553
8554 addr = create_tmp_var (ptr_type_node, "addr");
8555
8556 if (container)
8557 {
8558 int needed_intregs, needed_sseregs;
8559 bool need_temp;
8560 tree int_addr, sse_addr;
8561
8562 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8563 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8564
8565 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8566
8567 need_temp = (!REG_P (container)
8568 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8569 || TYPE_ALIGN (type) > 128));
8570
8571 /* In case we are passing structure, verify that it is consecutive block
8572 on the register save area. If not we need to do moves. */
8573 if (!need_temp && !REG_P (container))
8574 {
8575 /* Verify that all registers are strictly consecutive */
8576 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8577 {
8578 int i;
8579
8580 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8581 {
8582 rtx slot = XVECEXP (container, 0, i);
8583 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8584 || INTVAL (XEXP (slot, 1)) != i * 16)
8585 need_temp = 1;
8586 }
8587 }
8588 else
8589 {
8590 int i;
8591
8592 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8593 {
8594 rtx slot = XVECEXP (container, 0, i);
8595 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8596 || INTVAL (XEXP (slot, 1)) != i * 8)
8597 need_temp = 1;
8598 }
8599 }
8600 }
8601 if (!need_temp)
8602 {
8603 int_addr = addr;
8604 sse_addr = addr;
8605 }
8606 else
8607 {
8608 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8609 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8610 }
8611
8612 /* First ensure that we fit completely in registers. */
8613 if (needed_intregs)
8614 {
8615 t = build_int_cst (TREE_TYPE (gpr),
8616 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8617 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8618 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8619 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8620 gimplify_and_add (t, pre_p);
8621 }
8622 if (needed_sseregs)
8623 {
8624 t = build_int_cst (TREE_TYPE (fpr),
8625 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8626 + X86_64_REGPARM_MAX * 8);
8627 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8628 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8629 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8630 gimplify_and_add (t, pre_p);
8631 }
8632
8633 /* Compute index to start of area used for integer regs. */
8634 if (needed_intregs)
8635 {
8636 /* int_addr = gpr + sav; */
8637 t = fold_build_pointer_plus (sav, gpr);
8638 gimplify_assign (int_addr, t, pre_p);
8639 }
8640 if (needed_sseregs)
8641 {
8642 /* sse_addr = fpr + sav; */
8643 t = fold_build_pointer_plus (sav, fpr);
8644 gimplify_assign (sse_addr, t, pre_p);
8645 }
8646 if (need_temp)
8647 {
8648 int i, prev_size = 0;
8649 tree temp = create_tmp_var (type, "va_arg_tmp");
8650
8651 /* addr = &temp; */
8652 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8653 gimplify_assign (addr, t, pre_p);
8654
8655 for (i = 0; i < XVECLEN (container, 0); i++)
8656 {
8657 rtx slot = XVECEXP (container, 0, i);
8658 rtx reg = XEXP (slot, 0);
8659 enum machine_mode mode = GET_MODE (reg);
8660 tree piece_type;
8661 tree addr_type;
8662 tree daddr_type;
8663 tree src_addr, src;
8664 int src_offset;
8665 tree dest_addr, dest;
8666 int cur_size = GET_MODE_SIZE (mode);
8667
8668 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8669 prev_size = INTVAL (XEXP (slot, 1));
8670 if (prev_size + cur_size > size)
8671 {
8672 cur_size = size - prev_size;
8673 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8674 if (mode == BLKmode)
8675 mode = QImode;
8676 }
8677 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8678 if (mode == GET_MODE (reg))
8679 addr_type = build_pointer_type (piece_type);
8680 else
8681 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8682 true);
8683 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8684 true);
8685
8686 if (SSE_REGNO_P (REGNO (reg)))
8687 {
8688 src_addr = sse_addr;
8689 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8690 }
8691 else
8692 {
8693 src_addr = int_addr;
8694 src_offset = REGNO (reg) * 8;
8695 }
8696 src_addr = fold_convert (addr_type, src_addr);
8697 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8698
8699 dest_addr = fold_convert (daddr_type, addr);
8700 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8701 if (cur_size == GET_MODE_SIZE (mode))
8702 {
8703 src = build_va_arg_indirect_ref (src_addr);
8704 dest = build_va_arg_indirect_ref (dest_addr);
8705
8706 gimplify_assign (dest, src, pre_p);
8707 }
8708 else
8709 {
8710 tree copy
8711 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8712 3, dest_addr, src_addr,
8713 size_int (cur_size));
8714 gimplify_and_add (copy, pre_p);
8715 }
8716 prev_size += cur_size;
8717 }
8718 }
8719
8720 if (needed_intregs)
8721 {
8722 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8723 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8724 gimplify_assign (gpr, t, pre_p);
8725 }
8726
8727 if (needed_sseregs)
8728 {
8729 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8730 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8731 gimplify_assign (fpr, t, pre_p);
8732 }
8733
8734 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8735
8736 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8737 }
8738
8739 /* ... otherwise out of the overflow area. */
8740
8741 /* When we align parameter on stack for caller, if the parameter
8742 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8743 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8744 here with caller. */
8745 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8746 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8747 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8748
8749 /* Care for on-stack alignment if needed. */
8750 if (arg_boundary <= 64 || size == 0)
8751 t = ovf;
8752 else
8753 {
8754 HOST_WIDE_INT align = arg_boundary / 8;
8755 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8756 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8757 build_int_cst (TREE_TYPE (t), -align));
8758 }
8759
8760 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8761 gimplify_assign (addr, t, pre_p);
8762
8763 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8764 gimplify_assign (unshare_expr (ovf), t, pre_p);
8765
8766 if (container)
8767 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8768
8769 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8770 addr = fold_convert (ptrtype, addr);
8771
8772 if (indirect_p)
8773 addr = build_va_arg_indirect_ref (addr);
8774 return build_va_arg_indirect_ref (addr);
8775 }
8776 \f
8777 /* Return true if OPNUM's MEM should be matched
8778 in movabs* patterns. */
8779
8780 bool
8781 ix86_check_movabs (rtx insn, int opnum)
8782 {
8783 rtx set, mem;
8784
8785 set = PATTERN (insn);
8786 if (GET_CODE (set) == PARALLEL)
8787 set = XVECEXP (set, 0, 0);
8788 gcc_assert (GET_CODE (set) == SET);
8789 mem = XEXP (set, opnum);
8790 while (GET_CODE (mem) == SUBREG)
8791 mem = SUBREG_REG (mem);
8792 gcc_assert (MEM_P (mem));
8793 return volatile_ok || !MEM_VOLATILE_P (mem);
8794 }
8795 \f
8796 /* Initialize the table of extra 80387 mathematical constants. */
8797
8798 static void
8799 init_ext_80387_constants (void)
8800 {
8801 static const char * cst[5] =
8802 {
8803 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8804 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8805 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8806 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8807 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8808 };
8809 int i;
8810
8811 for (i = 0; i < 5; i++)
8812 {
8813 real_from_string (&ext_80387_constants_table[i], cst[i]);
8814 /* Ensure each constant is rounded to XFmode precision. */
8815 real_convert (&ext_80387_constants_table[i],
8816 XFmode, &ext_80387_constants_table[i]);
8817 }
8818
8819 ext_80387_constants_init = 1;
8820 }
8821
8822 /* Return non-zero if the constant is something that
8823 can be loaded with a special instruction. */
8824
8825 int
8826 standard_80387_constant_p (rtx x)
8827 {
8828 enum machine_mode mode = GET_MODE (x);
8829
8830 REAL_VALUE_TYPE r;
8831
8832 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8833 return -1;
8834
8835 if (x == CONST0_RTX (mode))
8836 return 1;
8837 if (x == CONST1_RTX (mode))
8838 return 2;
8839
8840 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8841
8842 /* For XFmode constants, try to find a special 80387 instruction when
8843 optimizing for size or on those CPUs that benefit from them. */
8844 if (mode == XFmode
8845 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8846 {
8847 int i;
8848
8849 if (! ext_80387_constants_init)
8850 init_ext_80387_constants ();
8851
8852 for (i = 0; i < 5; i++)
8853 if (real_identical (&r, &ext_80387_constants_table[i]))
8854 return i + 3;
8855 }
8856
8857 /* Load of the constant -0.0 or -1.0 will be split as
8858 fldz;fchs or fld1;fchs sequence. */
8859 if (real_isnegzero (&r))
8860 return 8;
8861 if (real_identical (&r, &dconstm1))
8862 return 9;
8863
8864 return 0;
8865 }
8866
8867 /* Return the opcode of the special instruction to be used to load
8868 the constant X. */
8869
8870 const char *
8871 standard_80387_constant_opcode (rtx x)
8872 {
8873 switch (standard_80387_constant_p (x))
8874 {
8875 case 1:
8876 return "fldz";
8877 case 2:
8878 return "fld1";
8879 case 3:
8880 return "fldlg2";
8881 case 4:
8882 return "fldln2";
8883 case 5:
8884 return "fldl2e";
8885 case 6:
8886 return "fldl2t";
8887 case 7:
8888 return "fldpi";
8889 case 8:
8890 case 9:
8891 return "#";
8892 default:
8893 gcc_unreachable ();
8894 }
8895 }
8896
8897 /* Return the CONST_DOUBLE representing the 80387 constant that is
8898 loaded by the specified special instruction. The argument IDX
8899 matches the return value from standard_80387_constant_p. */
8900
8901 rtx
8902 standard_80387_constant_rtx (int idx)
8903 {
8904 int i;
8905
8906 if (! ext_80387_constants_init)
8907 init_ext_80387_constants ();
8908
8909 switch (idx)
8910 {
8911 case 3:
8912 case 4:
8913 case 5:
8914 case 6:
8915 case 7:
8916 i = idx - 3;
8917 break;
8918
8919 default:
8920 gcc_unreachable ();
8921 }
8922
8923 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8924 XFmode);
8925 }
8926
8927 /* Return 1 if X is all 0s and 2 if x is all 1s
8928 in supported SSE/AVX vector mode. */
8929
8930 int
8931 standard_sse_constant_p (rtx x)
8932 {
8933 enum machine_mode mode = GET_MODE (x);
8934
8935 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8936 return 1;
8937 if (vector_all_ones_operand (x, mode))
8938 switch (mode)
8939 {
8940 case V16QImode:
8941 case V8HImode:
8942 case V4SImode:
8943 case V2DImode:
8944 if (TARGET_SSE2)
8945 return 2;
8946 case V32QImode:
8947 case V16HImode:
8948 case V8SImode:
8949 case V4DImode:
8950 if (TARGET_AVX2)
8951 return 2;
8952 case V64QImode:
8953 case V32HImode:
8954 case V16SImode:
8955 case V8DImode:
8956 if (TARGET_AVX512F)
8957 return 2;
8958 default:
8959 break;
8960 }
8961
8962 return 0;
8963 }
8964
8965 /* Return the opcode of the special instruction to be used to load
8966 the constant X. */
8967
8968 const char *
8969 standard_sse_constant_opcode (rtx insn, rtx x)
8970 {
8971 switch (standard_sse_constant_p (x))
8972 {
8973 case 1:
8974 switch (get_attr_mode (insn))
8975 {
8976 case MODE_XI:
8977 case MODE_V16SF:
8978 return "vpxord\t%g0, %g0, %g0";
8979 case MODE_V8DF:
8980 return "vpxorq\t%g0, %g0, %g0";
8981 case MODE_TI:
8982 return "%vpxor\t%0, %d0";
8983 case MODE_V2DF:
8984 return "%vxorpd\t%0, %d0";
8985 case MODE_V4SF:
8986 return "%vxorps\t%0, %d0";
8987
8988 case MODE_OI:
8989 return "vpxor\t%x0, %x0, %x0";
8990 case MODE_V4DF:
8991 return "vxorpd\t%x0, %x0, %x0";
8992 case MODE_V8SF:
8993 return "vxorps\t%x0, %x0, %x0";
8994
8995 default:
8996 break;
8997 }
8998
8999 case 2:
9000 if (get_attr_mode (insn) == MODE_XI
9001 || get_attr_mode (insn) == MODE_V8DF
9002 || get_attr_mode (insn) == MODE_V16SF)
9003 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9004 if (TARGET_AVX)
9005 return "vpcmpeqd\t%0, %0, %0";
9006 else
9007 return "pcmpeqd\t%0, %0";
9008
9009 default:
9010 break;
9011 }
9012 gcc_unreachable ();
9013 }
9014
9015 /* Returns true if OP contains a symbol reference */
9016
9017 bool
9018 symbolic_reference_mentioned_p (rtx op)
9019 {
9020 const char *fmt;
9021 int i;
9022
9023 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9024 return true;
9025
9026 fmt = GET_RTX_FORMAT (GET_CODE (op));
9027 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9028 {
9029 if (fmt[i] == 'E')
9030 {
9031 int j;
9032
9033 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9034 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9035 return true;
9036 }
9037
9038 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9039 return true;
9040 }
9041
9042 return false;
9043 }
9044
9045 /* Return true if it is appropriate to emit `ret' instructions in the
9046 body of a function. Do this only if the epilogue is simple, needing a
9047 couple of insns. Prior to reloading, we can't tell how many registers
9048 must be saved, so return false then. Return false if there is no frame
9049 marker to de-allocate. */
9050
9051 bool
9052 ix86_can_use_return_insn_p (void)
9053 {
9054 struct ix86_frame frame;
9055
9056 if (! reload_completed || frame_pointer_needed)
9057 return 0;
9058
9059 /* Don't allow more than 32k pop, since that's all we can do
9060 with one instruction. */
9061 if (crtl->args.pops_args && crtl->args.size >= 32768)
9062 return 0;
9063
9064 ix86_compute_frame_layout (&frame);
9065 return (frame.stack_pointer_offset == UNITS_PER_WORD
9066 && (frame.nregs + frame.nsseregs) == 0);
9067 }
9068 \f
9069 /* Value should be nonzero if functions must have frame pointers.
9070 Zero means the frame pointer need not be set up (and parms may
9071 be accessed via the stack pointer) in functions that seem suitable. */
9072
9073 static bool
9074 ix86_frame_pointer_required (void)
9075 {
9076 /* If we accessed previous frames, then the generated code expects
9077 to be able to access the saved ebp value in our frame. */
9078 if (cfun->machine->accesses_prev_frame)
9079 return true;
9080
9081 /* Several x86 os'es need a frame pointer for other reasons,
9082 usually pertaining to setjmp. */
9083 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9084 return true;
9085
9086 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9087 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9088 return true;
9089
9090 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9091 allocation is 4GB. */
9092 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9093 return true;
9094
9095 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9096 turns off the frame pointer by default. Turn it back on now if
9097 we've not got a leaf function. */
9098 if (TARGET_OMIT_LEAF_FRAME_POINTER
9099 && (!crtl->is_leaf
9100 || ix86_current_function_calls_tls_descriptor))
9101 return true;
9102
9103 if (crtl->profile && !flag_fentry)
9104 return true;
9105
9106 return false;
9107 }
9108
9109 /* Record that the current function accesses previous call frames. */
9110
9111 void
9112 ix86_setup_frame_addresses (void)
9113 {
9114 cfun->machine->accesses_prev_frame = 1;
9115 }
9116 \f
9117 #ifndef USE_HIDDEN_LINKONCE
9118 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9119 # define USE_HIDDEN_LINKONCE 1
9120 # else
9121 # define USE_HIDDEN_LINKONCE 0
9122 # endif
9123 #endif
9124
9125 static int pic_labels_used;
9126
9127 /* Fills in the label name that should be used for a pc thunk for
9128 the given register. */
9129
9130 static void
9131 get_pc_thunk_name (char name[32], unsigned int regno)
9132 {
9133 gcc_assert (!TARGET_64BIT);
9134
9135 if (USE_HIDDEN_LINKONCE)
9136 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9137 else
9138 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9139 }
9140
9141
9142 /* This function generates code for -fpic that loads %ebx with
9143 the return address of the caller and then returns. */
9144
9145 static void
9146 ix86_code_end (void)
9147 {
9148 rtx xops[2];
9149 int regno;
9150
9151 for (regno = AX_REG; regno <= SP_REG; regno++)
9152 {
9153 char name[32];
9154 tree decl;
9155
9156 if (!(pic_labels_used & (1 << regno)))
9157 continue;
9158
9159 get_pc_thunk_name (name, regno);
9160
9161 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9162 get_identifier (name),
9163 build_function_type_list (void_type_node, NULL_TREE));
9164 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9165 NULL_TREE, void_type_node);
9166 TREE_PUBLIC (decl) = 1;
9167 TREE_STATIC (decl) = 1;
9168 DECL_IGNORED_P (decl) = 1;
9169
9170 #if TARGET_MACHO
9171 if (TARGET_MACHO)
9172 {
9173 switch_to_section (darwin_sections[text_coal_section]);
9174 fputs ("\t.weak_definition\t", asm_out_file);
9175 assemble_name (asm_out_file, name);
9176 fputs ("\n\t.private_extern\t", asm_out_file);
9177 assemble_name (asm_out_file, name);
9178 putc ('\n', asm_out_file);
9179 ASM_OUTPUT_LABEL (asm_out_file, name);
9180 DECL_WEAK (decl) = 1;
9181 }
9182 else
9183 #endif
9184 if (USE_HIDDEN_LINKONCE)
9185 {
9186 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9187
9188 targetm.asm_out.unique_section (decl, 0);
9189 switch_to_section (get_named_section (decl, NULL, 0));
9190
9191 targetm.asm_out.globalize_label (asm_out_file, name);
9192 fputs ("\t.hidden\t", asm_out_file);
9193 assemble_name (asm_out_file, name);
9194 putc ('\n', asm_out_file);
9195 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9196 }
9197 else
9198 {
9199 switch_to_section (text_section);
9200 ASM_OUTPUT_LABEL (asm_out_file, name);
9201 }
9202
9203 DECL_INITIAL (decl) = make_node (BLOCK);
9204 current_function_decl = decl;
9205 init_function_start (decl);
9206 first_function_block_is_cold = false;
9207 /* Make sure unwind info is emitted for the thunk if needed. */
9208 final_start_function (emit_barrier (), asm_out_file, 1);
9209
9210 /* Pad stack IP move with 4 instructions (two NOPs count
9211 as one instruction). */
9212 if (TARGET_PAD_SHORT_FUNCTION)
9213 {
9214 int i = 8;
9215
9216 while (i--)
9217 fputs ("\tnop\n", asm_out_file);
9218 }
9219
9220 xops[0] = gen_rtx_REG (Pmode, regno);
9221 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9222 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9223 fputs ("\tret\n", asm_out_file);
9224 final_end_function ();
9225 init_insn_lengths ();
9226 free_after_compilation (cfun);
9227 set_cfun (NULL);
9228 current_function_decl = NULL;
9229 }
9230
9231 if (flag_split_stack)
9232 file_end_indicate_split_stack ();
9233 }
9234
9235 /* Emit code for the SET_GOT patterns. */
9236
9237 const char *
9238 output_set_got (rtx dest, rtx label)
9239 {
9240 rtx xops[3];
9241
9242 xops[0] = dest;
9243
9244 if (TARGET_VXWORKS_RTP && flag_pic)
9245 {
9246 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9247 xops[2] = gen_rtx_MEM (Pmode,
9248 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9249 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9250
9251 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9252 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9253 an unadorned address. */
9254 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9255 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9256 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9257 return "";
9258 }
9259
9260 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9261
9262 if (!flag_pic)
9263 {
9264 if (TARGET_MACHO)
9265 /* We don't need a pic base, we're not producing pic. */
9266 gcc_unreachable ();
9267
9268 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9269 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9270 targetm.asm_out.internal_label (asm_out_file, "L",
9271 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9272 }
9273 else
9274 {
9275 char name[32];
9276 get_pc_thunk_name (name, REGNO (dest));
9277 pic_labels_used |= 1 << REGNO (dest);
9278
9279 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9280 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9281 output_asm_insn ("call\t%X2", xops);
9282
9283 #if TARGET_MACHO
9284 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9285 This is what will be referenced by the Mach-O PIC subsystem. */
9286 if (machopic_should_output_picbase_label () || !label)
9287 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9288
9289 /* When we are restoring the pic base at the site of a nonlocal label,
9290 and we decided to emit the pic base above, we will still output a
9291 local label used for calculating the correction offset (even though
9292 the offset will be 0 in that case). */
9293 if (label)
9294 targetm.asm_out.internal_label (asm_out_file, "L",
9295 CODE_LABEL_NUMBER (label));
9296 #endif
9297 }
9298
9299 if (!TARGET_MACHO)
9300 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9301
9302 return "";
9303 }
9304
9305 /* Generate an "push" pattern for input ARG. */
9306
9307 static rtx
9308 gen_push (rtx arg)
9309 {
9310 struct machine_function *m = cfun->machine;
9311
9312 if (m->fs.cfa_reg == stack_pointer_rtx)
9313 m->fs.cfa_offset += UNITS_PER_WORD;
9314 m->fs.sp_offset += UNITS_PER_WORD;
9315
9316 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9317 arg = gen_rtx_REG (word_mode, REGNO (arg));
9318
9319 return gen_rtx_SET (VOIDmode,
9320 gen_rtx_MEM (word_mode,
9321 gen_rtx_PRE_DEC (Pmode,
9322 stack_pointer_rtx)),
9323 arg);
9324 }
9325
9326 /* Generate an "pop" pattern for input ARG. */
9327
9328 static rtx
9329 gen_pop (rtx arg)
9330 {
9331 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9332 arg = gen_rtx_REG (word_mode, REGNO (arg));
9333
9334 return gen_rtx_SET (VOIDmode,
9335 arg,
9336 gen_rtx_MEM (word_mode,
9337 gen_rtx_POST_INC (Pmode,
9338 stack_pointer_rtx)));
9339 }
9340
9341 /* Return >= 0 if there is an unused call-clobbered register available
9342 for the entire function. */
9343
9344 static unsigned int
9345 ix86_select_alt_pic_regnum (void)
9346 {
9347 if (crtl->is_leaf
9348 && !crtl->profile
9349 && !ix86_current_function_calls_tls_descriptor)
9350 {
9351 int i, drap;
9352 /* Can't use the same register for both PIC and DRAP. */
9353 if (crtl->drap_reg)
9354 drap = REGNO (crtl->drap_reg);
9355 else
9356 drap = -1;
9357 for (i = 2; i >= 0; --i)
9358 if (i != drap && !df_regs_ever_live_p (i))
9359 return i;
9360 }
9361
9362 return INVALID_REGNUM;
9363 }
9364
9365 /* Return TRUE if we need to save REGNO. */
9366
9367 static bool
9368 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9369 {
9370 if (pic_offset_table_rtx
9371 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9372 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9373 || crtl->profile
9374 || crtl->calls_eh_return
9375 || crtl->uses_const_pool
9376 || cfun->has_nonlocal_label))
9377 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9378
9379 if (crtl->calls_eh_return && maybe_eh_return)
9380 {
9381 unsigned i;
9382 for (i = 0; ; i++)
9383 {
9384 unsigned test = EH_RETURN_DATA_REGNO (i);
9385 if (test == INVALID_REGNUM)
9386 break;
9387 if (test == regno)
9388 return true;
9389 }
9390 }
9391
9392 if (crtl->drap_reg
9393 && regno == REGNO (crtl->drap_reg)
9394 && !cfun->machine->no_drap_save_restore)
9395 return true;
9396
9397 return (df_regs_ever_live_p (regno)
9398 && !call_used_regs[regno]
9399 && !fixed_regs[regno]
9400 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9401 }
9402
9403 /* Return number of saved general prupose registers. */
9404
9405 static int
9406 ix86_nsaved_regs (void)
9407 {
9408 int nregs = 0;
9409 int regno;
9410
9411 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9412 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9413 nregs ++;
9414 return nregs;
9415 }
9416
9417 /* Return number of saved SSE registrers. */
9418
9419 static int
9420 ix86_nsaved_sseregs (void)
9421 {
9422 int nregs = 0;
9423 int regno;
9424
9425 if (!TARGET_64BIT_MS_ABI)
9426 return 0;
9427 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9428 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9429 nregs ++;
9430 return nregs;
9431 }
9432
9433 /* Given FROM and TO register numbers, say whether this elimination is
9434 allowed. If stack alignment is needed, we can only replace argument
9435 pointer with hard frame pointer, or replace frame pointer with stack
9436 pointer. Otherwise, frame pointer elimination is automatically
9437 handled and all other eliminations are valid. */
9438
9439 static bool
9440 ix86_can_eliminate (const int from, const int to)
9441 {
9442 if (stack_realign_fp)
9443 return ((from == ARG_POINTER_REGNUM
9444 && to == HARD_FRAME_POINTER_REGNUM)
9445 || (from == FRAME_POINTER_REGNUM
9446 && to == STACK_POINTER_REGNUM));
9447 else
9448 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9449 }
9450
9451 /* Return the offset between two registers, one to be eliminated, and the other
9452 its replacement, at the start of a routine. */
9453
9454 HOST_WIDE_INT
9455 ix86_initial_elimination_offset (int from, int to)
9456 {
9457 struct ix86_frame frame;
9458 ix86_compute_frame_layout (&frame);
9459
9460 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9461 return frame.hard_frame_pointer_offset;
9462 else if (from == FRAME_POINTER_REGNUM
9463 && to == HARD_FRAME_POINTER_REGNUM)
9464 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9465 else
9466 {
9467 gcc_assert (to == STACK_POINTER_REGNUM);
9468
9469 if (from == ARG_POINTER_REGNUM)
9470 return frame.stack_pointer_offset;
9471
9472 gcc_assert (from == FRAME_POINTER_REGNUM);
9473 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9474 }
9475 }
9476
9477 /* In a dynamically-aligned function, we can't know the offset from
9478 stack pointer to frame pointer, so we must ensure that setjmp
9479 eliminates fp against the hard fp (%ebp) rather than trying to
9480 index from %esp up to the top of the frame across a gap that is
9481 of unknown (at compile-time) size. */
9482 static rtx
9483 ix86_builtin_setjmp_frame_value (void)
9484 {
9485 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9486 }
9487
9488 /* When using -fsplit-stack, the allocation routines set a field in
9489 the TCB to the bottom of the stack plus this much space, measured
9490 in bytes. */
9491
9492 #define SPLIT_STACK_AVAILABLE 256
9493
9494 /* Fill structure ix86_frame about frame of currently computed function. */
9495
9496 static void
9497 ix86_compute_frame_layout (struct ix86_frame *frame)
9498 {
9499 unsigned HOST_WIDE_INT stack_alignment_needed;
9500 HOST_WIDE_INT offset;
9501 unsigned HOST_WIDE_INT preferred_alignment;
9502 HOST_WIDE_INT size = get_frame_size ();
9503 HOST_WIDE_INT to_allocate;
9504
9505 frame->nregs = ix86_nsaved_regs ();
9506 frame->nsseregs = ix86_nsaved_sseregs ();
9507
9508 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9509 function prologues and leaf. */
9510 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9511 && (!crtl->is_leaf || cfun->calls_alloca != 0
9512 || ix86_current_function_calls_tls_descriptor))
9513 {
9514 crtl->preferred_stack_boundary = 128;
9515 crtl->stack_alignment_needed = 128;
9516 }
9517 /* preferred_stack_boundary is never updated for call
9518 expanded from tls descriptor. Update it here. We don't update it in
9519 expand stage because according to the comments before
9520 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9521 away. */
9522 else if (ix86_current_function_calls_tls_descriptor
9523 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9524 {
9525 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9526 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9527 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9528 }
9529
9530 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9531 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9532
9533 gcc_assert (!size || stack_alignment_needed);
9534 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9535 gcc_assert (preferred_alignment <= stack_alignment_needed);
9536
9537 /* For SEH we have to limit the amount of code movement into the prologue.
9538 At present we do this via a BLOCKAGE, at which point there's very little
9539 scheduling that can be done, which means that there's very little point
9540 in doing anything except PUSHs. */
9541 if (TARGET_SEH)
9542 cfun->machine->use_fast_prologue_epilogue = false;
9543
9544 /* During reload iteration the amount of registers saved can change.
9545 Recompute the value as needed. Do not recompute when amount of registers
9546 didn't change as reload does multiple calls to the function and does not
9547 expect the decision to change within single iteration. */
9548 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9549 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9550 {
9551 int count = frame->nregs;
9552 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9553
9554 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9555
9556 /* The fast prologue uses move instead of push to save registers. This
9557 is significantly longer, but also executes faster as modern hardware
9558 can execute the moves in parallel, but can't do that for push/pop.
9559
9560 Be careful about choosing what prologue to emit: When function takes
9561 many instructions to execute we may use slow version as well as in
9562 case function is known to be outside hot spot (this is known with
9563 feedback only). Weight the size of function by number of registers
9564 to save as it is cheap to use one or two push instructions but very
9565 slow to use many of them. */
9566 if (count)
9567 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9568 if (node->frequency < NODE_FREQUENCY_NORMAL
9569 || (flag_branch_probabilities
9570 && node->frequency < NODE_FREQUENCY_HOT))
9571 cfun->machine->use_fast_prologue_epilogue = false;
9572 else
9573 cfun->machine->use_fast_prologue_epilogue
9574 = !expensive_function_p (count);
9575 }
9576
9577 frame->save_regs_using_mov
9578 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9579 /* If static stack checking is enabled and done with probes,
9580 the registers need to be saved before allocating the frame. */
9581 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9582
9583 /* Skip return address. */
9584 offset = UNITS_PER_WORD;
9585
9586 /* Skip pushed static chain. */
9587 if (ix86_static_chain_on_stack)
9588 offset += UNITS_PER_WORD;
9589
9590 /* Skip saved base pointer. */
9591 if (frame_pointer_needed)
9592 offset += UNITS_PER_WORD;
9593 frame->hfp_save_offset = offset;
9594
9595 /* The traditional frame pointer location is at the top of the frame. */
9596 frame->hard_frame_pointer_offset = offset;
9597
9598 /* Register save area */
9599 offset += frame->nregs * UNITS_PER_WORD;
9600 frame->reg_save_offset = offset;
9601
9602 /* On SEH target, registers are pushed just before the frame pointer
9603 location. */
9604 if (TARGET_SEH)
9605 frame->hard_frame_pointer_offset = offset;
9606
9607 /* Align and set SSE register save area. */
9608 if (frame->nsseregs)
9609 {
9610 /* The only ABI that has saved SSE registers (Win64) also has a
9611 16-byte aligned default stack, and thus we don't need to be
9612 within the re-aligned local stack frame to save them. */
9613 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9614 offset = (offset + 16 - 1) & -16;
9615 offset += frame->nsseregs * 16;
9616 }
9617 frame->sse_reg_save_offset = offset;
9618
9619 /* The re-aligned stack starts here. Values before this point are not
9620 directly comparable with values below this point. In order to make
9621 sure that no value happens to be the same before and after, force
9622 the alignment computation below to add a non-zero value. */
9623 if (stack_realign_fp)
9624 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9625
9626 /* Va-arg area */
9627 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9628 offset += frame->va_arg_size;
9629
9630 /* Align start of frame for local function. */
9631 if (stack_realign_fp
9632 || offset != frame->sse_reg_save_offset
9633 || size != 0
9634 || !crtl->is_leaf
9635 || cfun->calls_alloca
9636 || ix86_current_function_calls_tls_descriptor)
9637 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9638
9639 /* Frame pointer points here. */
9640 frame->frame_pointer_offset = offset;
9641
9642 offset += size;
9643
9644 /* Add outgoing arguments area. Can be skipped if we eliminated
9645 all the function calls as dead code.
9646 Skipping is however impossible when function calls alloca. Alloca
9647 expander assumes that last crtl->outgoing_args_size
9648 of stack frame are unused. */
9649 if (ACCUMULATE_OUTGOING_ARGS
9650 && (!crtl->is_leaf || cfun->calls_alloca
9651 || ix86_current_function_calls_tls_descriptor))
9652 {
9653 offset += crtl->outgoing_args_size;
9654 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9655 }
9656 else
9657 frame->outgoing_arguments_size = 0;
9658
9659 /* Align stack boundary. Only needed if we're calling another function
9660 or using alloca. */
9661 if (!crtl->is_leaf || cfun->calls_alloca
9662 || ix86_current_function_calls_tls_descriptor)
9663 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9664
9665 /* We've reached end of stack frame. */
9666 frame->stack_pointer_offset = offset;
9667
9668 /* Size prologue needs to allocate. */
9669 to_allocate = offset - frame->sse_reg_save_offset;
9670
9671 if ((!to_allocate && frame->nregs <= 1)
9672 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9673 frame->save_regs_using_mov = false;
9674
9675 if (ix86_using_red_zone ()
9676 && crtl->sp_is_unchanging
9677 && crtl->is_leaf
9678 && !ix86_current_function_calls_tls_descriptor)
9679 {
9680 frame->red_zone_size = to_allocate;
9681 if (frame->save_regs_using_mov)
9682 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9683 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9684 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9685 }
9686 else
9687 frame->red_zone_size = 0;
9688 frame->stack_pointer_offset -= frame->red_zone_size;
9689
9690 /* The SEH frame pointer location is near the bottom of the frame.
9691 This is enforced by the fact that the difference between the
9692 stack pointer and the frame pointer is limited to 240 bytes in
9693 the unwind data structure. */
9694 if (TARGET_SEH)
9695 {
9696 HOST_WIDE_INT diff;
9697
9698 /* If we can leave the frame pointer where it is, do so. Also, returns
9699 the establisher frame for __builtin_frame_address (0). */
9700 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9701 if (diff <= SEH_MAX_FRAME_SIZE
9702 && (diff > 240 || (diff & 15) != 0)
9703 && !crtl->accesses_prior_frames)
9704 {
9705 /* Ideally we'd determine what portion of the local stack frame
9706 (within the constraint of the lowest 240) is most heavily used.
9707 But without that complication, simply bias the frame pointer
9708 by 128 bytes so as to maximize the amount of the local stack
9709 frame that is addressable with 8-bit offsets. */
9710 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9711 }
9712 }
9713 }
9714
9715 /* This is semi-inlined memory_address_length, but simplified
9716 since we know that we're always dealing with reg+offset, and
9717 to avoid having to create and discard all that rtl. */
9718
9719 static inline int
9720 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9721 {
9722 int len = 4;
9723
9724 if (offset == 0)
9725 {
9726 /* EBP and R13 cannot be encoded without an offset. */
9727 len = (regno == BP_REG || regno == R13_REG);
9728 }
9729 else if (IN_RANGE (offset, -128, 127))
9730 len = 1;
9731
9732 /* ESP and R12 must be encoded with a SIB byte. */
9733 if (regno == SP_REG || regno == R12_REG)
9734 len++;
9735
9736 return len;
9737 }
9738
9739 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9740 The valid base registers are taken from CFUN->MACHINE->FS. */
9741
9742 static rtx
9743 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9744 {
9745 const struct machine_function *m = cfun->machine;
9746 rtx base_reg = NULL;
9747 HOST_WIDE_INT base_offset = 0;
9748
9749 if (m->use_fast_prologue_epilogue)
9750 {
9751 /* Choose the base register most likely to allow the most scheduling
9752 opportunities. Generally FP is valid throughout the function,
9753 while DRAP must be reloaded within the epilogue. But choose either
9754 over the SP due to increased encoding size. */
9755
9756 if (m->fs.fp_valid)
9757 {
9758 base_reg = hard_frame_pointer_rtx;
9759 base_offset = m->fs.fp_offset - cfa_offset;
9760 }
9761 else if (m->fs.drap_valid)
9762 {
9763 base_reg = crtl->drap_reg;
9764 base_offset = 0 - cfa_offset;
9765 }
9766 else if (m->fs.sp_valid)
9767 {
9768 base_reg = stack_pointer_rtx;
9769 base_offset = m->fs.sp_offset - cfa_offset;
9770 }
9771 }
9772 else
9773 {
9774 HOST_WIDE_INT toffset;
9775 int len = 16, tlen;
9776
9777 /* Choose the base register with the smallest address encoding.
9778 With a tie, choose FP > DRAP > SP. */
9779 if (m->fs.sp_valid)
9780 {
9781 base_reg = stack_pointer_rtx;
9782 base_offset = m->fs.sp_offset - cfa_offset;
9783 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9784 }
9785 if (m->fs.drap_valid)
9786 {
9787 toffset = 0 - cfa_offset;
9788 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9789 if (tlen <= len)
9790 {
9791 base_reg = crtl->drap_reg;
9792 base_offset = toffset;
9793 len = tlen;
9794 }
9795 }
9796 if (m->fs.fp_valid)
9797 {
9798 toffset = m->fs.fp_offset - cfa_offset;
9799 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9800 if (tlen <= len)
9801 {
9802 base_reg = hard_frame_pointer_rtx;
9803 base_offset = toffset;
9804 len = tlen;
9805 }
9806 }
9807 }
9808 gcc_assert (base_reg != NULL);
9809
9810 return plus_constant (Pmode, base_reg, base_offset);
9811 }
9812
9813 /* Emit code to save registers in the prologue. */
9814
9815 static void
9816 ix86_emit_save_regs (void)
9817 {
9818 unsigned int regno;
9819 rtx insn;
9820
9821 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9822 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9823 {
9824 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9825 RTX_FRAME_RELATED_P (insn) = 1;
9826 }
9827 }
9828
9829 /* Emit a single register save at CFA - CFA_OFFSET. */
9830
9831 static void
9832 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9833 HOST_WIDE_INT cfa_offset)
9834 {
9835 struct machine_function *m = cfun->machine;
9836 rtx reg = gen_rtx_REG (mode, regno);
9837 rtx mem, addr, base, insn;
9838
9839 addr = choose_baseaddr (cfa_offset);
9840 mem = gen_frame_mem (mode, addr);
9841
9842 /* For SSE saves, we need to indicate the 128-bit alignment. */
9843 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9844
9845 insn = emit_move_insn (mem, reg);
9846 RTX_FRAME_RELATED_P (insn) = 1;
9847
9848 base = addr;
9849 if (GET_CODE (base) == PLUS)
9850 base = XEXP (base, 0);
9851 gcc_checking_assert (REG_P (base));
9852
9853 /* When saving registers into a re-aligned local stack frame, avoid
9854 any tricky guessing by dwarf2out. */
9855 if (m->fs.realigned)
9856 {
9857 gcc_checking_assert (stack_realign_drap);
9858
9859 if (regno == REGNO (crtl->drap_reg))
9860 {
9861 /* A bit of a hack. We force the DRAP register to be saved in
9862 the re-aligned stack frame, which provides us with a copy
9863 of the CFA that will last past the prologue. Install it. */
9864 gcc_checking_assert (cfun->machine->fs.fp_valid);
9865 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9866 cfun->machine->fs.fp_offset - cfa_offset);
9867 mem = gen_rtx_MEM (mode, addr);
9868 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9869 }
9870 else
9871 {
9872 /* The frame pointer is a stable reference within the
9873 aligned frame. Use it. */
9874 gcc_checking_assert (cfun->machine->fs.fp_valid);
9875 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9876 cfun->machine->fs.fp_offset - cfa_offset);
9877 mem = gen_rtx_MEM (mode, addr);
9878 add_reg_note (insn, REG_CFA_EXPRESSION,
9879 gen_rtx_SET (VOIDmode, mem, reg));
9880 }
9881 }
9882
9883 /* The memory may not be relative to the current CFA register,
9884 which means that we may need to generate a new pattern for
9885 use by the unwind info. */
9886 else if (base != m->fs.cfa_reg)
9887 {
9888 addr = plus_constant (Pmode, m->fs.cfa_reg,
9889 m->fs.cfa_offset - cfa_offset);
9890 mem = gen_rtx_MEM (mode, addr);
9891 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9892 }
9893 }
9894
9895 /* Emit code to save registers using MOV insns.
9896 First register is stored at CFA - CFA_OFFSET. */
9897 static void
9898 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9899 {
9900 unsigned int regno;
9901
9902 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9903 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9904 {
9905 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9906 cfa_offset -= UNITS_PER_WORD;
9907 }
9908 }
9909
9910 /* Emit code to save SSE registers using MOV insns.
9911 First register is stored at CFA - CFA_OFFSET. */
9912 static void
9913 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9914 {
9915 unsigned int regno;
9916
9917 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9918 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9919 {
9920 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9921 cfa_offset -= 16;
9922 }
9923 }
9924
9925 static GTY(()) rtx queued_cfa_restores;
9926
9927 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9928 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9929 Don't add the note if the previously saved value will be left untouched
9930 within stack red-zone till return, as unwinders can find the same value
9931 in the register and on the stack. */
9932
9933 static void
9934 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9935 {
9936 if (!crtl->shrink_wrapped
9937 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9938 return;
9939
9940 if (insn)
9941 {
9942 add_reg_note (insn, REG_CFA_RESTORE, reg);
9943 RTX_FRAME_RELATED_P (insn) = 1;
9944 }
9945 else
9946 queued_cfa_restores
9947 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9948 }
9949
9950 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9951
9952 static void
9953 ix86_add_queued_cfa_restore_notes (rtx insn)
9954 {
9955 rtx last;
9956 if (!queued_cfa_restores)
9957 return;
9958 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9959 ;
9960 XEXP (last, 1) = REG_NOTES (insn);
9961 REG_NOTES (insn) = queued_cfa_restores;
9962 queued_cfa_restores = NULL_RTX;
9963 RTX_FRAME_RELATED_P (insn) = 1;
9964 }
9965
9966 /* Expand prologue or epilogue stack adjustment.
9967 The pattern exist to put a dependency on all ebp-based memory accesses.
9968 STYLE should be negative if instructions should be marked as frame related,
9969 zero if %r11 register is live and cannot be freely used and positive
9970 otherwise. */
9971
9972 static void
9973 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9974 int style, bool set_cfa)
9975 {
9976 struct machine_function *m = cfun->machine;
9977 rtx insn;
9978 bool add_frame_related_expr = false;
9979
9980 if (Pmode == SImode)
9981 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9982 else if (x86_64_immediate_operand (offset, DImode))
9983 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9984 else
9985 {
9986 rtx tmp;
9987 /* r11 is used by indirect sibcall return as well, set before the
9988 epilogue and used after the epilogue. */
9989 if (style)
9990 tmp = gen_rtx_REG (DImode, R11_REG);
9991 else
9992 {
9993 gcc_assert (src != hard_frame_pointer_rtx
9994 && dest != hard_frame_pointer_rtx);
9995 tmp = hard_frame_pointer_rtx;
9996 }
9997 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9998 if (style < 0)
9999 add_frame_related_expr = true;
10000
10001 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10002 }
10003
10004 insn = emit_insn (insn);
10005 if (style >= 0)
10006 ix86_add_queued_cfa_restore_notes (insn);
10007
10008 if (set_cfa)
10009 {
10010 rtx r;
10011
10012 gcc_assert (m->fs.cfa_reg == src);
10013 m->fs.cfa_offset += INTVAL (offset);
10014 m->fs.cfa_reg = dest;
10015
10016 r = gen_rtx_PLUS (Pmode, src, offset);
10017 r = gen_rtx_SET (VOIDmode, dest, r);
10018 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10019 RTX_FRAME_RELATED_P (insn) = 1;
10020 }
10021 else if (style < 0)
10022 {
10023 RTX_FRAME_RELATED_P (insn) = 1;
10024 if (add_frame_related_expr)
10025 {
10026 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10027 r = gen_rtx_SET (VOIDmode, dest, r);
10028 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10029 }
10030 }
10031
10032 if (dest == stack_pointer_rtx)
10033 {
10034 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10035 bool valid = m->fs.sp_valid;
10036
10037 if (src == hard_frame_pointer_rtx)
10038 {
10039 valid = m->fs.fp_valid;
10040 ooffset = m->fs.fp_offset;
10041 }
10042 else if (src == crtl->drap_reg)
10043 {
10044 valid = m->fs.drap_valid;
10045 ooffset = 0;
10046 }
10047 else
10048 {
10049 /* Else there are two possibilities: SP itself, which we set
10050 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10051 taken care of this by hand along the eh_return path. */
10052 gcc_checking_assert (src == stack_pointer_rtx
10053 || offset == const0_rtx);
10054 }
10055
10056 m->fs.sp_offset = ooffset - INTVAL (offset);
10057 m->fs.sp_valid = valid;
10058 }
10059 }
10060
10061 /* Find an available register to be used as dynamic realign argument
10062 pointer regsiter. Such a register will be written in prologue and
10063 used in begin of body, so it must not be
10064 1. parameter passing register.
10065 2. GOT pointer.
10066 We reuse static-chain register if it is available. Otherwise, we
10067 use DI for i386 and R13 for x86-64. We chose R13 since it has
10068 shorter encoding.
10069
10070 Return: the regno of chosen register. */
10071
10072 static unsigned int
10073 find_drap_reg (void)
10074 {
10075 tree decl = cfun->decl;
10076
10077 if (TARGET_64BIT)
10078 {
10079 /* Use R13 for nested function or function need static chain.
10080 Since function with tail call may use any caller-saved
10081 registers in epilogue, DRAP must not use caller-saved
10082 register in such case. */
10083 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10084 return R13_REG;
10085
10086 return R10_REG;
10087 }
10088 else
10089 {
10090 /* Use DI for nested function or function need static chain.
10091 Since function with tail call may use any caller-saved
10092 registers in epilogue, DRAP must not use caller-saved
10093 register in such case. */
10094 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10095 return DI_REG;
10096
10097 /* Reuse static chain register if it isn't used for parameter
10098 passing. */
10099 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10100 {
10101 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10102 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10103 return CX_REG;
10104 }
10105 return DI_REG;
10106 }
10107 }
10108
10109 /* Return minimum incoming stack alignment. */
10110
10111 static unsigned int
10112 ix86_minimum_incoming_stack_boundary (bool sibcall)
10113 {
10114 unsigned int incoming_stack_boundary;
10115
10116 /* Prefer the one specified at command line. */
10117 if (ix86_user_incoming_stack_boundary)
10118 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10119 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10120 if -mstackrealign is used, it isn't used for sibcall check and
10121 estimated stack alignment is 128bit. */
10122 else if (!sibcall
10123 && !TARGET_64BIT
10124 && ix86_force_align_arg_pointer
10125 && crtl->stack_alignment_estimated == 128)
10126 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10127 else
10128 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10129
10130 /* Incoming stack alignment can be changed on individual functions
10131 via force_align_arg_pointer attribute. We use the smallest
10132 incoming stack boundary. */
10133 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10134 && lookup_attribute (ix86_force_align_arg_pointer_string,
10135 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10136 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10137
10138 /* The incoming stack frame has to be aligned at least at
10139 parm_stack_boundary. */
10140 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10141 incoming_stack_boundary = crtl->parm_stack_boundary;
10142
10143 /* Stack at entrance of main is aligned by runtime. We use the
10144 smallest incoming stack boundary. */
10145 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10146 && DECL_NAME (current_function_decl)
10147 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10148 && DECL_FILE_SCOPE_P (current_function_decl))
10149 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10150
10151 return incoming_stack_boundary;
10152 }
10153
10154 /* Update incoming stack boundary and estimated stack alignment. */
10155
10156 static void
10157 ix86_update_stack_boundary (void)
10158 {
10159 ix86_incoming_stack_boundary
10160 = ix86_minimum_incoming_stack_boundary (false);
10161
10162 /* x86_64 vararg needs 16byte stack alignment for register save
10163 area. */
10164 if (TARGET_64BIT
10165 && cfun->stdarg
10166 && crtl->stack_alignment_estimated < 128)
10167 crtl->stack_alignment_estimated = 128;
10168 }
10169
10170 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10171 needed or an rtx for DRAP otherwise. */
10172
10173 static rtx
10174 ix86_get_drap_rtx (void)
10175 {
10176 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10177 crtl->need_drap = true;
10178
10179 if (stack_realign_drap)
10180 {
10181 /* Assign DRAP to vDRAP and returns vDRAP */
10182 unsigned int regno = find_drap_reg ();
10183 rtx drap_vreg;
10184 rtx arg_ptr;
10185 rtx seq, insn;
10186
10187 arg_ptr = gen_rtx_REG (Pmode, regno);
10188 crtl->drap_reg = arg_ptr;
10189
10190 start_sequence ();
10191 drap_vreg = copy_to_reg (arg_ptr);
10192 seq = get_insns ();
10193 end_sequence ();
10194
10195 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10196 if (!optimize)
10197 {
10198 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10199 RTX_FRAME_RELATED_P (insn) = 1;
10200 }
10201 return drap_vreg;
10202 }
10203 else
10204 return NULL;
10205 }
10206
10207 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10208
10209 static rtx
10210 ix86_internal_arg_pointer (void)
10211 {
10212 return virtual_incoming_args_rtx;
10213 }
10214
10215 struct scratch_reg {
10216 rtx reg;
10217 bool saved;
10218 };
10219
10220 /* Return a short-lived scratch register for use on function entry.
10221 In 32-bit mode, it is valid only after the registers are saved
10222 in the prologue. This register must be released by means of
10223 release_scratch_register_on_entry once it is dead. */
10224
10225 static void
10226 get_scratch_register_on_entry (struct scratch_reg *sr)
10227 {
10228 int regno;
10229
10230 sr->saved = false;
10231
10232 if (TARGET_64BIT)
10233 {
10234 /* We always use R11 in 64-bit mode. */
10235 regno = R11_REG;
10236 }
10237 else
10238 {
10239 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10240 bool fastcall_p
10241 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10242 bool thiscall_p
10243 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10244 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10245 int regparm = ix86_function_regparm (fntype, decl);
10246 int drap_regno
10247 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10248
10249 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10250 for the static chain register. */
10251 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10252 && drap_regno != AX_REG)
10253 regno = AX_REG;
10254 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10255 for the static chain register. */
10256 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10257 regno = AX_REG;
10258 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10259 regno = DX_REG;
10260 /* ecx is the static chain register. */
10261 else if (regparm < 3 && !fastcall_p && !thiscall_p
10262 && !static_chain_p
10263 && drap_regno != CX_REG)
10264 regno = CX_REG;
10265 else if (ix86_save_reg (BX_REG, true))
10266 regno = BX_REG;
10267 /* esi is the static chain register. */
10268 else if (!(regparm == 3 && static_chain_p)
10269 && ix86_save_reg (SI_REG, true))
10270 regno = SI_REG;
10271 else if (ix86_save_reg (DI_REG, true))
10272 regno = DI_REG;
10273 else
10274 {
10275 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10276 sr->saved = true;
10277 }
10278 }
10279
10280 sr->reg = gen_rtx_REG (Pmode, regno);
10281 if (sr->saved)
10282 {
10283 rtx insn = emit_insn (gen_push (sr->reg));
10284 RTX_FRAME_RELATED_P (insn) = 1;
10285 }
10286 }
10287
10288 /* Release a scratch register obtained from the preceding function. */
10289
10290 static void
10291 release_scratch_register_on_entry (struct scratch_reg *sr)
10292 {
10293 if (sr->saved)
10294 {
10295 struct machine_function *m = cfun->machine;
10296 rtx x, insn = emit_insn (gen_pop (sr->reg));
10297
10298 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10299 RTX_FRAME_RELATED_P (insn) = 1;
10300 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10301 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10302 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10303 m->fs.sp_offset -= UNITS_PER_WORD;
10304 }
10305 }
10306
10307 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10308
10309 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10310
10311 static void
10312 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10313 {
10314 /* We skip the probe for the first interval + a small dope of 4 words and
10315 probe that many bytes past the specified size to maintain a protection
10316 area at the botton of the stack. */
10317 const int dope = 4 * UNITS_PER_WORD;
10318 rtx size_rtx = GEN_INT (size), last;
10319
10320 /* See if we have a constant small number of probes to generate. If so,
10321 that's the easy case. The run-time loop is made up of 11 insns in the
10322 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10323 for n # of intervals. */
10324 if (size <= 5 * PROBE_INTERVAL)
10325 {
10326 HOST_WIDE_INT i, adjust;
10327 bool first_probe = true;
10328
10329 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10330 values of N from 1 until it exceeds SIZE. If only one probe is
10331 needed, this will not generate any code. Then adjust and probe
10332 to PROBE_INTERVAL + SIZE. */
10333 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10334 {
10335 if (first_probe)
10336 {
10337 adjust = 2 * PROBE_INTERVAL + dope;
10338 first_probe = false;
10339 }
10340 else
10341 adjust = PROBE_INTERVAL;
10342
10343 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10344 plus_constant (Pmode, stack_pointer_rtx,
10345 -adjust)));
10346 emit_stack_probe (stack_pointer_rtx);
10347 }
10348
10349 if (first_probe)
10350 adjust = size + PROBE_INTERVAL + dope;
10351 else
10352 adjust = size + PROBE_INTERVAL - i;
10353
10354 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10355 plus_constant (Pmode, stack_pointer_rtx,
10356 -adjust)));
10357 emit_stack_probe (stack_pointer_rtx);
10358
10359 /* Adjust back to account for the additional first interval. */
10360 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10361 plus_constant (Pmode, stack_pointer_rtx,
10362 PROBE_INTERVAL + dope)));
10363 }
10364
10365 /* Otherwise, do the same as above, but in a loop. Note that we must be
10366 extra careful with variables wrapping around because we might be at
10367 the very top (or the very bottom) of the address space and we have
10368 to be able to handle this case properly; in particular, we use an
10369 equality test for the loop condition. */
10370 else
10371 {
10372 HOST_WIDE_INT rounded_size;
10373 struct scratch_reg sr;
10374
10375 get_scratch_register_on_entry (&sr);
10376
10377
10378 /* Step 1: round SIZE to the previous multiple of the interval. */
10379
10380 rounded_size = size & -PROBE_INTERVAL;
10381
10382
10383 /* Step 2: compute initial and final value of the loop counter. */
10384
10385 /* SP = SP_0 + PROBE_INTERVAL. */
10386 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10387 plus_constant (Pmode, stack_pointer_rtx,
10388 - (PROBE_INTERVAL + dope))));
10389
10390 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10391 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10392 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10393 gen_rtx_PLUS (Pmode, sr.reg,
10394 stack_pointer_rtx)));
10395
10396
10397 /* Step 3: the loop
10398
10399 while (SP != LAST_ADDR)
10400 {
10401 SP = SP + PROBE_INTERVAL
10402 probe at SP
10403 }
10404
10405 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10406 values of N from 1 until it is equal to ROUNDED_SIZE. */
10407
10408 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10409
10410
10411 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10412 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10413
10414 if (size != rounded_size)
10415 {
10416 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10417 plus_constant (Pmode, stack_pointer_rtx,
10418 rounded_size - size)));
10419 emit_stack_probe (stack_pointer_rtx);
10420 }
10421
10422 /* Adjust back to account for the additional first interval. */
10423 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10424 plus_constant (Pmode, stack_pointer_rtx,
10425 PROBE_INTERVAL + dope)));
10426
10427 release_scratch_register_on_entry (&sr);
10428 }
10429
10430 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10431
10432 /* Even if the stack pointer isn't the CFA register, we need to correctly
10433 describe the adjustments made to it, in particular differentiate the
10434 frame-related ones from the frame-unrelated ones. */
10435 if (size > 0)
10436 {
10437 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10438 XVECEXP (expr, 0, 0)
10439 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10440 plus_constant (Pmode, stack_pointer_rtx, -size));
10441 XVECEXP (expr, 0, 1)
10442 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10443 plus_constant (Pmode, stack_pointer_rtx,
10444 PROBE_INTERVAL + dope + size));
10445 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10446 RTX_FRAME_RELATED_P (last) = 1;
10447
10448 cfun->machine->fs.sp_offset += size;
10449 }
10450
10451 /* Make sure nothing is scheduled before we are done. */
10452 emit_insn (gen_blockage ());
10453 }
10454
10455 /* Adjust the stack pointer up to REG while probing it. */
10456
10457 const char *
10458 output_adjust_stack_and_probe (rtx reg)
10459 {
10460 static int labelno = 0;
10461 char loop_lab[32], end_lab[32];
10462 rtx xops[2];
10463
10464 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10465 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10466
10467 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10468
10469 /* Jump to END_LAB if SP == LAST_ADDR. */
10470 xops[0] = stack_pointer_rtx;
10471 xops[1] = reg;
10472 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10473 fputs ("\tje\t", asm_out_file);
10474 assemble_name_raw (asm_out_file, end_lab);
10475 fputc ('\n', asm_out_file);
10476
10477 /* SP = SP + PROBE_INTERVAL. */
10478 xops[1] = GEN_INT (PROBE_INTERVAL);
10479 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10480
10481 /* Probe at SP. */
10482 xops[1] = const0_rtx;
10483 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10484
10485 fprintf (asm_out_file, "\tjmp\t");
10486 assemble_name_raw (asm_out_file, loop_lab);
10487 fputc ('\n', asm_out_file);
10488
10489 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10490
10491 return "";
10492 }
10493
10494 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10495 inclusive. These are offsets from the current stack pointer. */
10496
10497 static void
10498 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10499 {
10500 /* See if we have a constant small number of probes to generate. If so,
10501 that's the easy case. The run-time loop is made up of 7 insns in the
10502 generic case while the compile-time loop is made up of n insns for n #
10503 of intervals. */
10504 if (size <= 7 * PROBE_INTERVAL)
10505 {
10506 HOST_WIDE_INT i;
10507
10508 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10509 it exceeds SIZE. If only one probe is needed, this will not
10510 generate any code. Then probe at FIRST + SIZE. */
10511 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10512 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10513 -(first + i)));
10514
10515 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10516 -(first + size)));
10517 }
10518
10519 /* Otherwise, do the same as above, but in a loop. Note that we must be
10520 extra careful with variables wrapping around because we might be at
10521 the very top (or the very bottom) of the address space and we have
10522 to be able to handle this case properly; in particular, we use an
10523 equality test for the loop condition. */
10524 else
10525 {
10526 HOST_WIDE_INT rounded_size, last;
10527 struct scratch_reg sr;
10528
10529 get_scratch_register_on_entry (&sr);
10530
10531
10532 /* Step 1: round SIZE to the previous multiple of the interval. */
10533
10534 rounded_size = size & -PROBE_INTERVAL;
10535
10536
10537 /* Step 2: compute initial and final value of the loop counter. */
10538
10539 /* TEST_OFFSET = FIRST. */
10540 emit_move_insn (sr.reg, GEN_INT (-first));
10541
10542 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10543 last = first + rounded_size;
10544
10545
10546 /* Step 3: the loop
10547
10548 while (TEST_ADDR != LAST_ADDR)
10549 {
10550 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10551 probe at TEST_ADDR
10552 }
10553
10554 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10555 until it is equal to ROUNDED_SIZE. */
10556
10557 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10558
10559
10560 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10561 that SIZE is equal to ROUNDED_SIZE. */
10562
10563 if (size != rounded_size)
10564 emit_stack_probe (plus_constant (Pmode,
10565 gen_rtx_PLUS (Pmode,
10566 stack_pointer_rtx,
10567 sr.reg),
10568 rounded_size - size));
10569
10570 release_scratch_register_on_entry (&sr);
10571 }
10572
10573 /* Make sure nothing is scheduled before we are done. */
10574 emit_insn (gen_blockage ());
10575 }
10576
10577 /* Probe a range of stack addresses from REG to END, inclusive. These are
10578 offsets from the current stack pointer. */
10579
10580 const char *
10581 output_probe_stack_range (rtx reg, rtx end)
10582 {
10583 static int labelno = 0;
10584 char loop_lab[32], end_lab[32];
10585 rtx xops[3];
10586
10587 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10588 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10589
10590 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10591
10592 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10593 xops[0] = reg;
10594 xops[1] = end;
10595 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10596 fputs ("\tje\t", asm_out_file);
10597 assemble_name_raw (asm_out_file, end_lab);
10598 fputc ('\n', asm_out_file);
10599
10600 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10601 xops[1] = GEN_INT (PROBE_INTERVAL);
10602 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10603
10604 /* Probe at TEST_ADDR. */
10605 xops[0] = stack_pointer_rtx;
10606 xops[1] = reg;
10607 xops[2] = const0_rtx;
10608 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10609
10610 fprintf (asm_out_file, "\tjmp\t");
10611 assemble_name_raw (asm_out_file, loop_lab);
10612 fputc ('\n', asm_out_file);
10613
10614 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10615
10616 return "";
10617 }
10618
10619 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10620 to be generated in correct form. */
10621 static void
10622 ix86_finalize_stack_realign_flags (void)
10623 {
10624 /* Check if stack realign is really needed after reload, and
10625 stores result in cfun */
10626 unsigned int incoming_stack_boundary
10627 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10628 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10629 unsigned int stack_realign = (incoming_stack_boundary
10630 < (crtl->is_leaf
10631 ? crtl->max_used_stack_slot_alignment
10632 : crtl->stack_alignment_needed));
10633
10634 if (crtl->stack_realign_finalized)
10635 {
10636 /* After stack_realign_needed is finalized, we can't no longer
10637 change it. */
10638 gcc_assert (crtl->stack_realign_needed == stack_realign);
10639 return;
10640 }
10641
10642 /* If the only reason for frame_pointer_needed is that we conservatively
10643 assumed stack realignment might be needed, but in the end nothing that
10644 needed the stack alignment had been spilled, clear frame_pointer_needed
10645 and say we don't need stack realignment. */
10646 if (stack_realign
10647 && frame_pointer_needed
10648 && crtl->is_leaf
10649 && flag_omit_frame_pointer
10650 && crtl->sp_is_unchanging
10651 && !ix86_current_function_calls_tls_descriptor
10652 && !crtl->accesses_prior_frames
10653 && !cfun->calls_alloca
10654 && !crtl->calls_eh_return
10655 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10656 && !ix86_frame_pointer_required ()
10657 && get_frame_size () == 0
10658 && ix86_nsaved_sseregs () == 0
10659 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10660 {
10661 HARD_REG_SET set_up_by_prologue, prologue_used;
10662 basic_block bb;
10663
10664 CLEAR_HARD_REG_SET (prologue_used);
10665 CLEAR_HARD_REG_SET (set_up_by_prologue);
10666 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10667 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10668 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10669 HARD_FRAME_POINTER_REGNUM);
10670 FOR_EACH_BB_FN (bb, cfun)
10671 {
10672 rtx insn;
10673 FOR_BB_INSNS (bb, insn)
10674 if (NONDEBUG_INSN_P (insn)
10675 && requires_stack_frame_p (insn, prologue_used,
10676 set_up_by_prologue))
10677 {
10678 crtl->stack_realign_needed = stack_realign;
10679 crtl->stack_realign_finalized = true;
10680 return;
10681 }
10682 }
10683
10684 /* If drap has been set, but it actually isn't live at the start
10685 of the function, there is no reason to set it up. */
10686 if (crtl->drap_reg)
10687 {
10688 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10689 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10690 {
10691 crtl->drap_reg = NULL_RTX;
10692 crtl->need_drap = false;
10693 }
10694 }
10695 else
10696 cfun->machine->no_drap_save_restore = true;
10697
10698 frame_pointer_needed = false;
10699 stack_realign = false;
10700 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10701 crtl->stack_alignment_needed = incoming_stack_boundary;
10702 crtl->stack_alignment_estimated = incoming_stack_boundary;
10703 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10704 crtl->preferred_stack_boundary = incoming_stack_boundary;
10705 df_finish_pass (true);
10706 df_scan_alloc (NULL);
10707 df_scan_blocks ();
10708 df_compute_regs_ever_live (true);
10709 df_analyze ();
10710 }
10711
10712 crtl->stack_realign_needed = stack_realign;
10713 crtl->stack_realign_finalized = true;
10714 }
10715
10716 /* Expand the prologue into a bunch of separate insns. */
10717
10718 void
10719 ix86_expand_prologue (void)
10720 {
10721 struct machine_function *m = cfun->machine;
10722 rtx insn, t;
10723 bool pic_reg_used;
10724 struct ix86_frame frame;
10725 HOST_WIDE_INT allocate;
10726 bool int_registers_saved;
10727 bool sse_registers_saved;
10728
10729 ix86_finalize_stack_realign_flags ();
10730
10731 /* DRAP should not coexist with stack_realign_fp */
10732 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10733
10734 memset (&m->fs, 0, sizeof (m->fs));
10735
10736 /* Initialize CFA state for before the prologue. */
10737 m->fs.cfa_reg = stack_pointer_rtx;
10738 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10739
10740 /* Track SP offset to the CFA. We continue tracking this after we've
10741 swapped the CFA register away from SP. In the case of re-alignment
10742 this is fudged; we're interested to offsets within the local frame. */
10743 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10744 m->fs.sp_valid = true;
10745
10746 ix86_compute_frame_layout (&frame);
10747
10748 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10749 {
10750 /* We should have already generated an error for any use of
10751 ms_hook on a nested function. */
10752 gcc_checking_assert (!ix86_static_chain_on_stack);
10753
10754 /* Check if profiling is active and we shall use profiling before
10755 prologue variant. If so sorry. */
10756 if (crtl->profile && flag_fentry != 0)
10757 sorry ("ms_hook_prologue attribute isn%'t compatible "
10758 "with -mfentry for 32-bit");
10759
10760 /* In ix86_asm_output_function_label we emitted:
10761 8b ff movl.s %edi,%edi
10762 55 push %ebp
10763 8b ec movl.s %esp,%ebp
10764
10765 This matches the hookable function prologue in Win32 API
10766 functions in Microsoft Windows XP Service Pack 2 and newer.
10767 Wine uses this to enable Windows apps to hook the Win32 API
10768 functions provided by Wine.
10769
10770 What that means is that we've already set up the frame pointer. */
10771
10772 if (frame_pointer_needed
10773 && !(crtl->drap_reg && crtl->stack_realign_needed))
10774 {
10775 rtx push, mov;
10776
10777 /* We've decided to use the frame pointer already set up.
10778 Describe this to the unwinder by pretending that both
10779 push and mov insns happen right here.
10780
10781 Putting the unwind info here at the end of the ms_hook
10782 is done so that we can make absolutely certain we get
10783 the required byte sequence at the start of the function,
10784 rather than relying on an assembler that can produce
10785 the exact encoding required.
10786
10787 However it does mean (in the unpatched case) that we have
10788 a 1 insn window where the asynchronous unwind info is
10789 incorrect. However, if we placed the unwind info at
10790 its correct location we would have incorrect unwind info
10791 in the patched case. Which is probably all moot since
10792 I don't expect Wine generates dwarf2 unwind info for the
10793 system libraries that use this feature. */
10794
10795 insn = emit_insn (gen_blockage ());
10796
10797 push = gen_push (hard_frame_pointer_rtx);
10798 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10799 stack_pointer_rtx);
10800 RTX_FRAME_RELATED_P (push) = 1;
10801 RTX_FRAME_RELATED_P (mov) = 1;
10802
10803 RTX_FRAME_RELATED_P (insn) = 1;
10804 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10805 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10806
10807 /* Note that gen_push incremented m->fs.cfa_offset, even
10808 though we didn't emit the push insn here. */
10809 m->fs.cfa_reg = hard_frame_pointer_rtx;
10810 m->fs.fp_offset = m->fs.cfa_offset;
10811 m->fs.fp_valid = true;
10812 }
10813 else
10814 {
10815 /* The frame pointer is not needed so pop %ebp again.
10816 This leaves us with a pristine state. */
10817 emit_insn (gen_pop (hard_frame_pointer_rtx));
10818 }
10819 }
10820
10821 /* The first insn of a function that accepts its static chain on the
10822 stack is to push the register that would be filled in by a direct
10823 call. This insn will be skipped by the trampoline. */
10824 else if (ix86_static_chain_on_stack)
10825 {
10826 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10827 emit_insn (gen_blockage ());
10828
10829 /* We don't want to interpret this push insn as a register save,
10830 only as a stack adjustment. The real copy of the register as
10831 a save will be done later, if needed. */
10832 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10833 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10834 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10835 RTX_FRAME_RELATED_P (insn) = 1;
10836 }
10837
10838 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10839 of DRAP is needed and stack realignment is really needed after reload */
10840 if (stack_realign_drap)
10841 {
10842 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10843
10844 /* Only need to push parameter pointer reg if it is caller saved. */
10845 if (!call_used_regs[REGNO (crtl->drap_reg)])
10846 {
10847 /* Push arg pointer reg */
10848 insn = emit_insn (gen_push (crtl->drap_reg));
10849 RTX_FRAME_RELATED_P (insn) = 1;
10850 }
10851
10852 /* Grab the argument pointer. */
10853 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10854 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10855 RTX_FRAME_RELATED_P (insn) = 1;
10856 m->fs.cfa_reg = crtl->drap_reg;
10857 m->fs.cfa_offset = 0;
10858
10859 /* Align the stack. */
10860 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10861 stack_pointer_rtx,
10862 GEN_INT (-align_bytes)));
10863 RTX_FRAME_RELATED_P (insn) = 1;
10864
10865 /* Replicate the return address on the stack so that return
10866 address can be reached via (argp - 1) slot. This is needed
10867 to implement macro RETURN_ADDR_RTX and intrinsic function
10868 expand_builtin_return_addr etc. */
10869 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10870 t = gen_frame_mem (word_mode, t);
10871 insn = emit_insn (gen_push (t));
10872 RTX_FRAME_RELATED_P (insn) = 1;
10873
10874 /* For the purposes of frame and register save area addressing,
10875 we've started over with a new frame. */
10876 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10877 m->fs.realigned = true;
10878 }
10879
10880 int_registers_saved = (frame.nregs == 0);
10881 sse_registers_saved = (frame.nsseregs == 0);
10882
10883 if (frame_pointer_needed && !m->fs.fp_valid)
10884 {
10885 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10886 slower on all targets. Also sdb doesn't like it. */
10887 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10888 RTX_FRAME_RELATED_P (insn) = 1;
10889
10890 /* Push registers now, before setting the frame pointer
10891 on SEH target. */
10892 if (!int_registers_saved
10893 && TARGET_SEH
10894 && !frame.save_regs_using_mov)
10895 {
10896 ix86_emit_save_regs ();
10897 int_registers_saved = true;
10898 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10899 }
10900
10901 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10902 {
10903 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10904 RTX_FRAME_RELATED_P (insn) = 1;
10905
10906 if (m->fs.cfa_reg == stack_pointer_rtx)
10907 m->fs.cfa_reg = hard_frame_pointer_rtx;
10908 m->fs.fp_offset = m->fs.sp_offset;
10909 m->fs.fp_valid = true;
10910 }
10911 }
10912
10913 if (!int_registers_saved)
10914 {
10915 /* If saving registers via PUSH, do so now. */
10916 if (!frame.save_regs_using_mov)
10917 {
10918 ix86_emit_save_regs ();
10919 int_registers_saved = true;
10920 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10921 }
10922
10923 /* When using red zone we may start register saving before allocating
10924 the stack frame saving one cycle of the prologue. However, avoid
10925 doing this if we have to probe the stack; at least on x86_64 the
10926 stack probe can turn into a call that clobbers a red zone location. */
10927 else if (ix86_using_red_zone ()
10928 && (! TARGET_STACK_PROBE
10929 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10930 {
10931 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10932 int_registers_saved = true;
10933 }
10934 }
10935
10936 if (stack_realign_fp)
10937 {
10938 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10939 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10940
10941 /* The computation of the size of the re-aligned stack frame means
10942 that we must allocate the size of the register save area before
10943 performing the actual alignment. Otherwise we cannot guarantee
10944 that there's enough storage above the realignment point. */
10945 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10946 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10947 GEN_INT (m->fs.sp_offset
10948 - frame.sse_reg_save_offset),
10949 -1, false);
10950
10951 /* Align the stack. */
10952 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10953 stack_pointer_rtx,
10954 GEN_INT (-align_bytes)));
10955
10956 /* For the purposes of register save area addressing, the stack
10957 pointer is no longer valid. As for the value of sp_offset,
10958 see ix86_compute_frame_layout, which we need to match in order
10959 to pass verification of stack_pointer_offset at the end. */
10960 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10961 m->fs.sp_valid = false;
10962 }
10963
10964 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10965
10966 if (flag_stack_usage_info)
10967 {
10968 /* We start to count from ARG_POINTER. */
10969 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10970
10971 /* If it was realigned, take into account the fake frame. */
10972 if (stack_realign_drap)
10973 {
10974 if (ix86_static_chain_on_stack)
10975 stack_size += UNITS_PER_WORD;
10976
10977 if (!call_used_regs[REGNO (crtl->drap_reg)])
10978 stack_size += UNITS_PER_WORD;
10979
10980 /* This over-estimates by 1 minimal-stack-alignment-unit but
10981 mitigates that by counting in the new return address slot. */
10982 current_function_dynamic_stack_size
10983 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10984 }
10985
10986 current_function_static_stack_size = stack_size;
10987 }
10988
10989 /* On SEH target with very large frame size, allocate an area to save
10990 SSE registers (as the very large allocation won't be described). */
10991 if (TARGET_SEH
10992 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10993 && !sse_registers_saved)
10994 {
10995 HOST_WIDE_INT sse_size =
10996 frame.sse_reg_save_offset - frame.reg_save_offset;
10997
10998 gcc_assert (int_registers_saved);
10999
11000 /* No need to do stack checking as the area will be immediately
11001 written. */
11002 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11003 GEN_INT (-sse_size), -1,
11004 m->fs.cfa_reg == stack_pointer_rtx);
11005 allocate -= sse_size;
11006 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11007 sse_registers_saved = true;
11008 }
11009
11010 /* The stack has already been decremented by the instruction calling us
11011 so probe if the size is non-negative to preserve the protection area. */
11012 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11013 {
11014 /* We expect the registers to be saved when probes are used. */
11015 gcc_assert (int_registers_saved);
11016
11017 if (STACK_CHECK_MOVING_SP)
11018 {
11019 if (!(crtl->is_leaf && !cfun->calls_alloca
11020 && allocate <= PROBE_INTERVAL))
11021 {
11022 ix86_adjust_stack_and_probe (allocate);
11023 allocate = 0;
11024 }
11025 }
11026 else
11027 {
11028 HOST_WIDE_INT size = allocate;
11029
11030 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11031 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11032
11033 if (TARGET_STACK_PROBE)
11034 {
11035 if (crtl->is_leaf && !cfun->calls_alloca)
11036 {
11037 if (size > PROBE_INTERVAL)
11038 ix86_emit_probe_stack_range (0, size);
11039 }
11040 else
11041 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11042 }
11043 else
11044 {
11045 if (crtl->is_leaf && !cfun->calls_alloca)
11046 {
11047 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11048 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11049 size - STACK_CHECK_PROTECT);
11050 }
11051 else
11052 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11053 }
11054 }
11055 }
11056
11057 if (allocate == 0)
11058 ;
11059 else if (!ix86_target_stack_probe ()
11060 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11061 {
11062 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11063 GEN_INT (-allocate), -1,
11064 m->fs.cfa_reg == stack_pointer_rtx);
11065 }
11066 else
11067 {
11068 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11069 rtx r10 = NULL;
11070 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11071 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11072 bool eax_live = ix86_eax_live_at_start_p ();
11073 bool r10_live = false;
11074
11075 if (TARGET_64BIT)
11076 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11077
11078 if (eax_live)
11079 {
11080 insn = emit_insn (gen_push (eax));
11081 allocate -= UNITS_PER_WORD;
11082 /* Note that SEH directives need to continue tracking the stack
11083 pointer even after the frame pointer has been set up. */
11084 if (sp_is_cfa_reg || TARGET_SEH)
11085 {
11086 if (sp_is_cfa_reg)
11087 m->fs.cfa_offset += UNITS_PER_WORD;
11088 RTX_FRAME_RELATED_P (insn) = 1;
11089 }
11090 }
11091
11092 if (r10_live)
11093 {
11094 r10 = gen_rtx_REG (Pmode, R10_REG);
11095 insn = emit_insn (gen_push (r10));
11096 allocate -= UNITS_PER_WORD;
11097 if (sp_is_cfa_reg || TARGET_SEH)
11098 {
11099 if (sp_is_cfa_reg)
11100 m->fs.cfa_offset += UNITS_PER_WORD;
11101 RTX_FRAME_RELATED_P (insn) = 1;
11102 }
11103 }
11104
11105 emit_move_insn (eax, GEN_INT (allocate));
11106 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11107
11108 /* Use the fact that AX still contains ALLOCATE. */
11109 adjust_stack_insn = (Pmode == DImode
11110 ? gen_pro_epilogue_adjust_stack_di_sub
11111 : gen_pro_epilogue_adjust_stack_si_sub);
11112
11113 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11114 stack_pointer_rtx, eax));
11115
11116 if (sp_is_cfa_reg || TARGET_SEH)
11117 {
11118 if (sp_is_cfa_reg)
11119 m->fs.cfa_offset += allocate;
11120 RTX_FRAME_RELATED_P (insn) = 1;
11121 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11122 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11123 plus_constant (Pmode, stack_pointer_rtx,
11124 -allocate)));
11125 }
11126 m->fs.sp_offset += allocate;
11127
11128 /* Use stack_pointer_rtx for relative addressing so that code
11129 works for realigned stack, too. */
11130 if (r10_live && eax_live)
11131 {
11132 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11133 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11134 gen_frame_mem (word_mode, t));
11135 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11136 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11137 gen_frame_mem (word_mode, t));
11138 }
11139 else if (eax_live || r10_live)
11140 {
11141 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11142 emit_move_insn (gen_rtx_REG (word_mode,
11143 (eax_live ? AX_REG : R10_REG)),
11144 gen_frame_mem (word_mode, t));
11145 }
11146 }
11147 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11148
11149 /* If we havn't already set up the frame pointer, do so now. */
11150 if (frame_pointer_needed && !m->fs.fp_valid)
11151 {
11152 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11153 GEN_INT (frame.stack_pointer_offset
11154 - frame.hard_frame_pointer_offset));
11155 insn = emit_insn (insn);
11156 RTX_FRAME_RELATED_P (insn) = 1;
11157 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11158
11159 if (m->fs.cfa_reg == stack_pointer_rtx)
11160 m->fs.cfa_reg = hard_frame_pointer_rtx;
11161 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11162 m->fs.fp_valid = true;
11163 }
11164
11165 if (!int_registers_saved)
11166 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11167 if (!sse_registers_saved)
11168 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11169
11170 pic_reg_used = false;
11171 /* We don't use pic-register for pe-coff target. */
11172 if (pic_offset_table_rtx
11173 && !TARGET_PECOFF
11174 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11175 || crtl->profile))
11176 {
11177 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11178
11179 if (alt_pic_reg_used != INVALID_REGNUM)
11180 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11181
11182 pic_reg_used = true;
11183 }
11184
11185 if (pic_reg_used)
11186 {
11187 if (TARGET_64BIT)
11188 {
11189 if (ix86_cmodel == CM_LARGE_PIC)
11190 {
11191 rtx label, tmp_reg;
11192
11193 gcc_assert (Pmode == DImode);
11194 label = gen_label_rtx ();
11195 emit_label (label);
11196 LABEL_PRESERVE_P (label) = 1;
11197 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11198 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11199 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11200 label));
11201 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11202 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11203 pic_offset_table_rtx, tmp_reg));
11204 }
11205 else
11206 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11207 }
11208 else
11209 {
11210 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11211 RTX_FRAME_RELATED_P (insn) = 1;
11212 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11213 }
11214 }
11215
11216 /* In the pic_reg_used case, make sure that the got load isn't deleted
11217 when mcount needs it. Blockage to avoid call movement across mcount
11218 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11219 note. */
11220 if (crtl->profile && !flag_fentry && pic_reg_used)
11221 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11222
11223 if (crtl->drap_reg && !crtl->stack_realign_needed)
11224 {
11225 /* vDRAP is setup but after reload it turns out stack realign
11226 isn't necessary, here we will emit prologue to setup DRAP
11227 without stack realign adjustment */
11228 t = choose_baseaddr (0);
11229 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11230 }
11231
11232 /* Prevent instructions from being scheduled into register save push
11233 sequence when access to the redzone area is done through frame pointer.
11234 The offset between the frame pointer and the stack pointer is calculated
11235 relative to the value of the stack pointer at the end of the function
11236 prologue, and moving instructions that access redzone area via frame
11237 pointer inside push sequence violates this assumption. */
11238 if (frame_pointer_needed && frame.red_zone_size)
11239 emit_insn (gen_memory_blockage ());
11240
11241 /* Emit cld instruction if stringops are used in the function. */
11242 if (TARGET_CLD && ix86_current_function_needs_cld)
11243 emit_insn (gen_cld ());
11244
11245 /* SEH requires that the prologue end within 256 bytes of the start of
11246 the function. Prevent instruction schedules that would extend that.
11247 Further, prevent alloca modifications to the stack pointer from being
11248 combined with prologue modifications. */
11249 if (TARGET_SEH)
11250 emit_insn (gen_prologue_use (stack_pointer_rtx));
11251 }
11252
11253 /* Emit code to restore REG using a POP insn. */
11254
11255 static void
11256 ix86_emit_restore_reg_using_pop (rtx reg)
11257 {
11258 struct machine_function *m = cfun->machine;
11259 rtx insn = emit_insn (gen_pop (reg));
11260
11261 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11262 m->fs.sp_offset -= UNITS_PER_WORD;
11263
11264 if (m->fs.cfa_reg == crtl->drap_reg
11265 && REGNO (reg) == REGNO (crtl->drap_reg))
11266 {
11267 /* Previously we'd represented the CFA as an expression
11268 like *(%ebp - 8). We've just popped that value from
11269 the stack, which means we need to reset the CFA to
11270 the drap register. This will remain until we restore
11271 the stack pointer. */
11272 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11273 RTX_FRAME_RELATED_P (insn) = 1;
11274
11275 /* This means that the DRAP register is valid for addressing too. */
11276 m->fs.drap_valid = true;
11277 return;
11278 }
11279
11280 if (m->fs.cfa_reg == stack_pointer_rtx)
11281 {
11282 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11283 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11284 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11285 RTX_FRAME_RELATED_P (insn) = 1;
11286
11287 m->fs.cfa_offset -= UNITS_PER_WORD;
11288 }
11289
11290 /* When the frame pointer is the CFA, and we pop it, we are
11291 swapping back to the stack pointer as the CFA. This happens
11292 for stack frames that don't allocate other data, so we assume
11293 the stack pointer is now pointing at the return address, i.e.
11294 the function entry state, which makes the offset be 1 word. */
11295 if (reg == hard_frame_pointer_rtx)
11296 {
11297 m->fs.fp_valid = false;
11298 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11299 {
11300 m->fs.cfa_reg = stack_pointer_rtx;
11301 m->fs.cfa_offset -= UNITS_PER_WORD;
11302
11303 add_reg_note (insn, REG_CFA_DEF_CFA,
11304 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11305 GEN_INT (m->fs.cfa_offset)));
11306 RTX_FRAME_RELATED_P (insn) = 1;
11307 }
11308 }
11309 }
11310
11311 /* Emit code to restore saved registers using POP insns. */
11312
11313 static void
11314 ix86_emit_restore_regs_using_pop (void)
11315 {
11316 unsigned int regno;
11317
11318 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11319 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11320 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11321 }
11322
11323 /* Emit code and notes for the LEAVE instruction. */
11324
11325 static void
11326 ix86_emit_leave (void)
11327 {
11328 struct machine_function *m = cfun->machine;
11329 rtx insn = emit_insn (ix86_gen_leave ());
11330
11331 ix86_add_queued_cfa_restore_notes (insn);
11332
11333 gcc_assert (m->fs.fp_valid);
11334 m->fs.sp_valid = true;
11335 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11336 m->fs.fp_valid = false;
11337
11338 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11339 {
11340 m->fs.cfa_reg = stack_pointer_rtx;
11341 m->fs.cfa_offset = m->fs.sp_offset;
11342
11343 add_reg_note (insn, REG_CFA_DEF_CFA,
11344 plus_constant (Pmode, stack_pointer_rtx,
11345 m->fs.sp_offset));
11346 RTX_FRAME_RELATED_P (insn) = 1;
11347 }
11348 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11349 m->fs.fp_offset);
11350 }
11351
11352 /* Emit code to restore saved registers using MOV insns.
11353 First register is restored from CFA - CFA_OFFSET. */
11354 static void
11355 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11356 bool maybe_eh_return)
11357 {
11358 struct machine_function *m = cfun->machine;
11359 unsigned int regno;
11360
11361 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11362 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11363 {
11364 rtx reg = gen_rtx_REG (word_mode, regno);
11365 rtx insn, mem;
11366
11367 mem = choose_baseaddr (cfa_offset);
11368 mem = gen_frame_mem (word_mode, mem);
11369 insn = emit_move_insn (reg, mem);
11370
11371 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11372 {
11373 /* Previously we'd represented the CFA as an expression
11374 like *(%ebp - 8). We've just popped that value from
11375 the stack, which means we need to reset the CFA to
11376 the drap register. This will remain until we restore
11377 the stack pointer. */
11378 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11379 RTX_FRAME_RELATED_P (insn) = 1;
11380
11381 /* This means that the DRAP register is valid for addressing. */
11382 m->fs.drap_valid = true;
11383 }
11384 else
11385 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11386
11387 cfa_offset -= UNITS_PER_WORD;
11388 }
11389 }
11390
11391 /* Emit code to restore saved registers using MOV insns.
11392 First register is restored from CFA - CFA_OFFSET. */
11393 static void
11394 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11395 bool maybe_eh_return)
11396 {
11397 unsigned int regno;
11398
11399 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11400 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11401 {
11402 rtx reg = gen_rtx_REG (V4SFmode, regno);
11403 rtx mem;
11404
11405 mem = choose_baseaddr (cfa_offset);
11406 mem = gen_rtx_MEM (V4SFmode, mem);
11407 set_mem_align (mem, 128);
11408 emit_move_insn (reg, mem);
11409
11410 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11411
11412 cfa_offset -= 16;
11413 }
11414 }
11415
11416 /* Restore function stack, frame, and registers. */
11417
11418 void
11419 ix86_expand_epilogue (int style)
11420 {
11421 struct machine_function *m = cfun->machine;
11422 struct machine_frame_state frame_state_save = m->fs;
11423 struct ix86_frame frame;
11424 bool restore_regs_via_mov;
11425 bool using_drap;
11426
11427 ix86_finalize_stack_realign_flags ();
11428 ix86_compute_frame_layout (&frame);
11429
11430 m->fs.sp_valid = (!frame_pointer_needed
11431 || (crtl->sp_is_unchanging
11432 && !stack_realign_fp));
11433 gcc_assert (!m->fs.sp_valid
11434 || m->fs.sp_offset == frame.stack_pointer_offset);
11435
11436 /* The FP must be valid if the frame pointer is present. */
11437 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11438 gcc_assert (!m->fs.fp_valid
11439 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11440
11441 /* We must have *some* valid pointer to the stack frame. */
11442 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11443
11444 /* The DRAP is never valid at this point. */
11445 gcc_assert (!m->fs.drap_valid);
11446
11447 /* See the comment about red zone and frame
11448 pointer usage in ix86_expand_prologue. */
11449 if (frame_pointer_needed && frame.red_zone_size)
11450 emit_insn (gen_memory_blockage ());
11451
11452 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11453 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11454
11455 /* Determine the CFA offset of the end of the red-zone. */
11456 m->fs.red_zone_offset = 0;
11457 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11458 {
11459 /* The red-zone begins below the return address. */
11460 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11461
11462 /* When the register save area is in the aligned portion of
11463 the stack, determine the maximum runtime displacement that
11464 matches up with the aligned frame. */
11465 if (stack_realign_drap)
11466 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11467 + UNITS_PER_WORD);
11468 }
11469
11470 /* Special care must be taken for the normal return case of a function
11471 using eh_return: the eax and edx registers are marked as saved, but
11472 not restored along this path. Adjust the save location to match. */
11473 if (crtl->calls_eh_return && style != 2)
11474 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11475
11476 /* EH_RETURN requires the use of moves to function properly. */
11477 if (crtl->calls_eh_return)
11478 restore_regs_via_mov = true;
11479 /* SEH requires the use of pops to identify the epilogue. */
11480 else if (TARGET_SEH)
11481 restore_regs_via_mov = false;
11482 /* If we're only restoring one register and sp is not valid then
11483 using a move instruction to restore the register since it's
11484 less work than reloading sp and popping the register. */
11485 else if (!m->fs.sp_valid && frame.nregs <= 1)
11486 restore_regs_via_mov = true;
11487 else if (TARGET_EPILOGUE_USING_MOVE
11488 && cfun->machine->use_fast_prologue_epilogue
11489 && (frame.nregs > 1
11490 || m->fs.sp_offset != frame.reg_save_offset))
11491 restore_regs_via_mov = true;
11492 else if (frame_pointer_needed
11493 && !frame.nregs
11494 && m->fs.sp_offset != frame.reg_save_offset)
11495 restore_regs_via_mov = true;
11496 else if (frame_pointer_needed
11497 && TARGET_USE_LEAVE
11498 && cfun->machine->use_fast_prologue_epilogue
11499 && frame.nregs == 1)
11500 restore_regs_via_mov = true;
11501 else
11502 restore_regs_via_mov = false;
11503
11504 if (restore_regs_via_mov || frame.nsseregs)
11505 {
11506 /* Ensure that the entire register save area is addressable via
11507 the stack pointer, if we will restore via sp. */
11508 if (TARGET_64BIT
11509 && m->fs.sp_offset > 0x7fffffff
11510 && !(m->fs.fp_valid || m->fs.drap_valid)
11511 && (frame.nsseregs + frame.nregs) != 0)
11512 {
11513 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11514 GEN_INT (m->fs.sp_offset
11515 - frame.sse_reg_save_offset),
11516 style,
11517 m->fs.cfa_reg == stack_pointer_rtx);
11518 }
11519 }
11520
11521 /* If there are any SSE registers to restore, then we have to do it
11522 via moves, since there's obviously no pop for SSE regs. */
11523 if (frame.nsseregs)
11524 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11525 style == 2);
11526
11527 if (restore_regs_via_mov)
11528 {
11529 rtx t;
11530
11531 if (frame.nregs)
11532 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11533
11534 /* eh_return epilogues need %ecx added to the stack pointer. */
11535 if (style == 2)
11536 {
11537 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11538
11539 /* Stack align doesn't work with eh_return. */
11540 gcc_assert (!stack_realign_drap);
11541 /* Neither does regparm nested functions. */
11542 gcc_assert (!ix86_static_chain_on_stack);
11543
11544 if (frame_pointer_needed)
11545 {
11546 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11547 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11548 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11549
11550 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11551 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11552
11553 /* Note that we use SA as a temporary CFA, as the return
11554 address is at the proper place relative to it. We
11555 pretend this happens at the FP restore insn because
11556 prior to this insn the FP would be stored at the wrong
11557 offset relative to SA, and after this insn we have no
11558 other reasonable register to use for the CFA. We don't
11559 bother resetting the CFA to the SP for the duration of
11560 the return insn. */
11561 add_reg_note (insn, REG_CFA_DEF_CFA,
11562 plus_constant (Pmode, sa, UNITS_PER_WORD));
11563 ix86_add_queued_cfa_restore_notes (insn);
11564 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11565 RTX_FRAME_RELATED_P (insn) = 1;
11566
11567 m->fs.cfa_reg = sa;
11568 m->fs.cfa_offset = UNITS_PER_WORD;
11569 m->fs.fp_valid = false;
11570
11571 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11572 const0_rtx, style, false);
11573 }
11574 else
11575 {
11576 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11577 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11578 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11579 ix86_add_queued_cfa_restore_notes (insn);
11580
11581 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11582 if (m->fs.cfa_offset != UNITS_PER_WORD)
11583 {
11584 m->fs.cfa_offset = UNITS_PER_WORD;
11585 add_reg_note (insn, REG_CFA_DEF_CFA,
11586 plus_constant (Pmode, stack_pointer_rtx,
11587 UNITS_PER_WORD));
11588 RTX_FRAME_RELATED_P (insn) = 1;
11589 }
11590 }
11591 m->fs.sp_offset = UNITS_PER_WORD;
11592 m->fs.sp_valid = true;
11593 }
11594 }
11595 else
11596 {
11597 /* SEH requires that the function end with (1) a stack adjustment
11598 if necessary, (2) a sequence of pops, and (3) a return or
11599 jump instruction. Prevent insns from the function body from
11600 being scheduled into this sequence. */
11601 if (TARGET_SEH)
11602 {
11603 /* Prevent a catch region from being adjacent to the standard
11604 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11605 several other flags that would be interesting to test are
11606 not yet set up. */
11607 if (flag_non_call_exceptions)
11608 emit_insn (gen_nops (const1_rtx));
11609 else
11610 emit_insn (gen_blockage ());
11611 }
11612
11613 /* First step is to deallocate the stack frame so that we can
11614 pop the registers. Also do it on SEH target for very large
11615 frame as the emitted instructions aren't allowed by the ABI in
11616 epilogues. */
11617 if (!m->fs.sp_valid
11618 || (TARGET_SEH
11619 && (m->fs.sp_offset - frame.reg_save_offset
11620 >= SEH_MAX_FRAME_SIZE)))
11621 {
11622 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11623 GEN_INT (m->fs.fp_offset
11624 - frame.reg_save_offset),
11625 style, false);
11626 }
11627 else if (m->fs.sp_offset != frame.reg_save_offset)
11628 {
11629 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11630 GEN_INT (m->fs.sp_offset
11631 - frame.reg_save_offset),
11632 style,
11633 m->fs.cfa_reg == stack_pointer_rtx);
11634 }
11635
11636 ix86_emit_restore_regs_using_pop ();
11637 }
11638
11639 /* If we used a stack pointer and haven't already got rid of it,
11640 then do so now. */
11641 if (m->fs.fp_valid)
11642 {
11643 /* If the stack pointer is valid and pointing at the frame
11644 pointer store address, then we only need a pop. */
11645 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11646 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11647 /* Leave results in shorter dependency chains on CPUs that are
11648 able to grok it fast. */
11649 else if (TARGET_USE_LEAVE
11650 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11651 || !cfun->machine->use_fast_prologue_epilogue)
11652 ix86_emit_leave ();
11653 else
11654 {
11655 pro_epilogue_adjust_stack (stack_pointer_rtx,
11656 hard_frame_pointer_rtx,
11657 const0_rtx, style, !using_drap);
11658 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11659 }
11660 }
11661
11662 if (using_drap)
11663 {
11664 int param_ptr_offset = UNITS_PER_WORD;
11665 rtx insn;
11666
11667 gcc_assert (stack_realign_drap);
11668
11669 if (ix86_static_chain_on_stack)
11670 param_ptr_offset += UNITS_PER_WORD;
11671 if (!call_used_regs[REGNO (crtl->drap_reg)])
11672 param_ptr_offset += UNITS_PER_WORD;
11673
11674 insn = emit_insn (gen_rtx_SET
11675 (VOIDmode, stack_pointer_rtx,
11676 gen_rtx_PLUS (Pmode,
11677 crtl->drap_reg,
11678 GEN_INT (-param_ptr_offset))));
11679 m->fs.cfa_reg = stack_pointer_rtx;
11680 m->fs.cfa_offset = param_ptr_offset;
11681 m->fs.sp_offset = param_ptr_offset;
11682 m->fs.realigned = false;
11683
11684 add_reg_note (insn, REG_CFA_DEF_CFA,
11685 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11686 GEN_INT (param_ptr_offset)));
11687 RTX_FRAME_RELATED_P (insn) = 1;
11688
11689 if (!call_used_regs[REGNO (crtl->drap_reg)])
11690 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11691 }
11692
11693 /* At this point the stack pointer must be valid, and we must have
11694 restored all of the registers. We may not have deallocated the
11695 entire stack frame. We've delayed this until now because it may
11696 be possible to merge the local stack deallocation with the
11697 deallocation forced by ix86_static_chain_on_stack. */
11698 gcc_assert (m->fs.sp_valid);
11699 gcc_assert (!m->fs.fp_valid);
11700 gcc_assert (!m->fs.realigned);
11701 if (m->fs.sp_offset != UNITS_PER_WORD)
11702 {
11703 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11704 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11705 style, true);
11706 }
11707 else
11708 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11709
11710 /* Sibcall epilogues don't want a return instruction. */
11711 if (style == 0)
11712 {
11713 m->fs = frame_state_save;
11714 return;
11715 }
11716
11717 if (crtl->args.pops_args && crtl->args.size)
11718 {
11719 rtx popc = GEN_INT (crtl->args.pops_args);
11720
11721 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11722 address, do explicit add, and jump indirectly to the caller. */
11723
11724 if (crtl->args.pops_args >= 65536)
11725 {
11726 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11727 rtx insn;
11728
11729 /* There is no "pascal" calling convention in any 64bit ABI. */
11730 gcc_assert (!TARGET_64BIT);
11731
11732 insn = emit_insn (gen_pop (ecx));
11733 m->fs.cfa_offset -= UNITS_PER_WORD;
11734 m->fs.sp_offset -= UNITS_PER_WORD;
11735
11736 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11737 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11738 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11739 add_reg_note (insn, REG_CFA_REGISTER,
11740 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11741 RTX_FRAME_RELATED_P (insn) = 1;
11742
11743 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11744 popc, -1, true);
11745 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11746 }
11747 else
11748 emit_jump_insn (gen_simple_return_pop_internal (popc));
11749 }
11750 else
11751 emit_jump_insn (gen_simple_return_internal ());
11752
11753 /* Restore the state back to the state from the prologue,
11754 so that it's correct for the next epilogue. */
11755 m->fs = frame_state_save;
11756 }
11757
11758 /* Reset from the function's potential modifications. */
11759
11760 static void
11761 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11762 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11763 {
11764 if (pic_offset_table_rtx)
11765 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11766 #if TARGET_MACHO
11767 /* Mach-O doesn't support labels at the end of objects, so if
11768 it looks like we might want one, insert a NOP. */
11769 {
11770 rtx insn = get_last_insn ();
11771 rtx deleted_debug_label = NULL_RTX;
11772 while (insn
11773 && NOTE_P (insn)
11774 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11775 {
11776 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11777 notes only, instead set their CODE_LABEL_NUMBER to -1,
11778 otherwise there would be code generation differences
11779 in between -g and -g0. */
11780 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11781 deleted_debug_label = insn;
11782 insn = PREV_INSN (insn);
11783 }
11784 if (insn
11785 && (LABEL_P (insn)
11786 || (NOTE_P (insn)
11787 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11788 fputs ("\tnop\n", file);
11789 else if (deleted_debug_label)
11790 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11791 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11792 CODE_LABEL_NUMBER (insn) = -1;
11793 }
11794 #endif
11795
11796 }
11797
11798 /* Return a scratch register to use in the split stack prologue. The
11799 split stack prologue is used for -fsplit-stack. It is the first
11800 instructions in the function, even before the regular prologue.
11801 The scratch register can be any caller-saved register which is not
11802 used for parameters or for the static chain. */
11803
11804 static unsigned int
11805 split_stack_prologue_scratch_regno (void)
11806 {
11807 if (TARGET_64BIT)
11808 return R11_REG;
11809 else
11810 {
11811 bool is_fastcall, is_thiscall;
11812 int regparm;
11813
11814 is_fastcall = (lookup_attribute ("fastcall",
11815 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11816 != NULL);
11817 is_thiscall = (lookup_attribute ("thiscall",
11818 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11819 != NULL);
11820 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11821
11822 if (is_fastcall)
11823 {
11824 if (DECL_STATIC_CHAIN (cfun->decl))
11825 {
11826 sorry ("-fsplit-stack does not support fastcall with "
11827 "nested function");
11828 return INVALID_REGNUM;
11829 }
11830 return AX_REG;
11831 }
11832 else if (is_thiscall)
11833 {
11834 if (!DECL_STATIC_CHAIN (cfun->decl))
11835 return DX_REG;
11836 return AX_REG;
11837 }
11838 else if (regparm < 3)
11839 {
11840 if (!DECL_STATIC_CHAIN (cfun->decl))
11841 return CX_REG;
11842 else
11843 {
11844 if (regparm >= 2)
11845 {
11846 sorry ("-fsplit-stack does not support 2 register "
11847 "parameters for a nested function");
11848 return INVALID_REGNUM;
11849 }
11850 return DX_REG;
11851 }
11852 }
11853 else
11854 {
11855 /* FIXME: We could make this work by pushing a register
11856 around the addition and comparison. */
11857 sorry ("-fsplit-stack does not support 3 register parameters");
11858 return INVALID_REGNUM;
11859 }
11860 }
11861 }
11862
11863 /* A SYMBOL_REF for the function which allocates new stackspace for
11864 -fsplit-stack. */
11865
11866 static GTY(()) rtx split_stack_fn;
11867
11868 /* A SYMBOL_REF for the more stack function when using the large
11869 model. */
11870
11871 static GTY(()) rtx split_stack_fn_large;
11872
11873 /* Handle -fsplit-stack. These are the first instructions in the
11874 function, even before the regular prologue. */
11875
11876 void
11877 ix86_expand_split_stack_prologue (void)
11878 {
11879 struct ix86_frame frame;
11880 HOST_WIDE_INT allocate;
11881 unsigned HOST_WIDE_INT args_size;
11882 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11883 rtx scratch_reg = NULL_RTX;
11884 rtx varargs_label = NULL_RTX;
11885 rtx fn;
11886
11887 gcc_assert (flag_split_stack && reload_completed);
11888
11889 ix86_finalize_stack_realign_flags ();
11890 ix86_compute_frame_layout (&frame);
11891 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11892
11893 /* This is the label we will branch to if we have enough stack
11894 space. We expect the basic block reordering pass to reverse this
11895 branch if optimizing, so that we branch in the unlikely case. */
11896 label = gen_label_rtx ();
11897
11898 /* We need to compare the stack pointer minus the frame size with
11899 the stack boundary in the TCB. The stack boundary always gives
11900 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11901 can compare directly. Otherwise we need to do an addition. */
11902
11903 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11904 UNSPEC_STACK_CHECK);
11905 limit = gen_rtx_CONST (Pmode, limit);
11906 limit = gen_rtx_MEM (Pmode, limit);
11907 if (allocate < SPLIT_STACK_AVAILABLE)
11908 current = stack_pointer_rtx;
11909 else
11910 {
11911 unsigned int scratch_regno;
11912 rtx offset;
11913
11914 /* We need a scratch register to hold the stack pointer minus
11915 the required frame size. Since this is the very start of the
11916 function, the scratch register can be any caller-saved
11917 register which is not used for parameters. */
11918 offset = GEN_INT (- allocate);
11919 scratch_regno = split_stack_prologue_scratch_regno ();
11920 if (scratch_regno == INVALID_REGNUM)
11921 return;
11922 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11923 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11924 {
11925 /* We don't use ix86_gen_add3 in this case because it will
11926 want to split to lea, but when not optimizing the insn
11927 will not be split after this point. */
11928 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11929 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11930 offset)));
11931 }
11932 else
11933 {
11934 emit_move_insn (scratch_reg, offset);
11935 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11936 stack_pointer_rtx));
11937 }
11938 current = scratch_reg;
11939 }
11940
11941 ix86_expand_branch (GEU, current, limit, label);
11942 jump_insn = get_last_insn ();
11943 JUMP_LABEL (jump_insn) = label;
11944
11945 /* Mark the jump as very likely to be taken. */
11946 add_int_reg_note (jump_insn, REG_BR_PROB,
11947 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11948
11949 if (split_stack_fn == NULL_RTX)
11950 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11951 fn = split_stack_fn;
11952
11953 /* Get more stack space. We pass in the desired stack space and the
11954 size of the arguments to copy to the new stack. In 32-bit mode
11955 we push the parameters; __morestack will return on a new stack
11956 anyhow. In 64-bit mode we pass the parameters in r10 and
11957 r11. */
11958 allocate_rtx = GEN_INT (allocate);
11959 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11960 call_fusage = NULL_RTX;
11961 if (TARGET_64BIT)
11962 {
11963 rtx reg10, reg11;
11964
11965 reg10 = gen_rtx_REG (Pmode, R10_REG);
11966 reg11 = gen_rtx_REG (Pmode, R11_REG);
11967
11968 /* If this function uses a static chain, it will be in %r10.
11969 Preserve it across the call to __morestack. */
11970 if (DECL_STATIC_CHAIN (cfun->decl))
11971 {
11972 rtx rax;
11973
11974 rax = gen_rtx_REG (word_mode, AX_REG);
11975 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11976 use_reg (&call_fusage, rax);
11977 }
11978
11979 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11980 && !TARGET_PECOFF)
11981 {
11982 HOST_WIDE_INT argval;
11983
11984 gcc_assert (Pmode == DImode);
11985 /* When using the large model we need to load the address
11986 into a register, and we've run out of registers. So we
11987 switch to a different calling convention, and we call a
11988 different function: __morestack_large. We pass the
11989 argument size in the upper 32 bits of r10 and pass the
11990 frame size in the lower 32 bits. */
11991 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11992 gcc_assert ((args_size & 0xffffffff) == args_size);
11993
11994 if (split_stack_fn_large == NULL_RTX)
11995 split_stack_fn_large =
11996 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11997
11998 if (ix86_cmodel == CM_LARGE_PIC)
11999 {
12000 rtx label, x;
12001
12002 label = gen_label_rtx ();
12003 emit_label (label);
12004 LABEL_PRESERVE_P (label) = 1;
12005 emit_insn (gen_set_rip_rex64 (reg10, label));
12006 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12007 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12008 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12009 UNSPEC_GOT);
12010 x = gen_rtx_CONST (Pmode, x);
12011 emit_move_insn (reg11, x);
12012 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12013 x = gen_const_mem (Pmode, x);
12014 emit_move_insn (reg11, x);
12015 }
12016 else
12017 emit_move_insn (reg11, split_stack_fn_large);
12018
12019 fn = reg11;
12020
12021 argval = ((args_size << 16) << 16) + allocate;
12022 emit_move_insn (reg10, GEN_INT (argval));
12023 }
12024 else
12025 {
12026 emit_move_insn (reg10, allocate_rtx);
12027 emit_move_insn (reg11, GEN_INT (args_size));
12028 use_reg (&call_fusage, reg11);
12029 }
12030
12031 use_reg (&call_fusage, reg10);
12032 }
12033 else
12034 {
12035 emit_insn (gen_push (GEN_INT (args_size)));
12036 emit_insn (gen_push (allocate_rtx));
12037 }
12038 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12039 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12040 NULL_RTX, false);
12041 add_function_usage_to (call_insn, call_fusage);
12042
12043 /* In order to make call/return prediction work right, we now need
12044 to execute a return instruction. See
12045 libgcc/config/i386/morestack.S for the details on how this works.
12046
12047 For flow purposes gcc must not see this as a return
12048 instruction--we need control flow to continue at the subsequent
12049 label. Therefore, we use an unspec. */
12050 gcc_assert (crtl->args.pops_args < 65536);
12051 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12052
12053 /* If we are in 64-bit mode and this function uses a static chain,
12054 we saved %r10 in %rax before calling _morestack. */
12055 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12056 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12057 gen_rtx_REG (word_mode, AX_REG));
12058
12059 /* If this function calls va_start, we need to store a pointer to
12060 the arguments on the old stack, because they may not have been
12061 all copied to the new stack. At this point the old stack can be
12062 found at the frame pointer value used by __morestack, because
12063 __morestack has set that up before calling back to us. Here we
12064 store that pointer in a scratch register, and in
12065 ix86_expand_prologue we store the scratch register in a stack
12066 slot. */
12067 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12068 {
12069 unsigned int scratch_regno;
12070 rtx frame_reg;
12071 int words;
12072
12073 scratch_regno = split_stack_prologue_scratch_regno ();
12074 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12075 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12076
12077 /* 64-bit:
12078 fp -> old fp value
12079 return address within this function
12080 return address of caller of this function
12081 stack arguments
12082 So we add three words to get to the stack arguments.
12083
12084 32-bit:
12085 fp -> old fp value
12086 return address within this function
12087 first argument to __morestack
12088 second argument to __morestack
12089 return address of caller of this function
12090 stack arguments
12091 So we add five words to get to the stack arguments.
12092 */
12093 words = TARGET_64BIT ? 3 : 5;
12094 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12095 gen_rtx_PLUS (Pmode, frame_reg,
12096 GEN_INT (words * UNITS_PER_WORD))));
12097
12098 varargs_label = gen_label_rtx ();
12099 emit_jump_insn (gen_jump (varargs_label));
12100 JUMP_LABEL (get_last_insn ()) = varargs_label;
12101
12102 emit_barrier ();
12103 }
12104
12105 emit_label (label);
12106 LABEL_NUSES (label) = 1;
12107
12108 /* If this function calls va_start, we now have to set the scratch
12109 register for the case where we do not call __morestack. In this
12110 case we need to set it based on the stack pointer. */
12111 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12112 {
12113 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12114 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12115 GEN_INT (UNITS_PER_WORD))));
12116
12117 emit_label (varargs_label);
12118 LABEL_NUSES (varargs_label) = 1;
12119 }
12120 }
12121
12122 /* We may have to tell the dataflow pass that the split stack prologue
12123 is initializing a scratch register. */
12124
12125 static void
12126 ix86_live_on_entry (bitmap regs)
12127 {
12128 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12129 {
12130 gcc_assert (flag_split_stack);
12131 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12132 }
12133 }
12134 \f
12135 /* Extract the parts of an RTL expression that is a valid memory address
12136 for an instruction. Return 0 if the structure of the address is
12137 grossly off. Return -1 if the address contains ASHIFT, so it is not
12138 strictly valid, but still used for computing length of lea instruction. */
12139
12140 int
12141 ix86_decompose_address (rtx addr, struct ix86_address *out)
12142 {
12143 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12144 rtx base_reg, index_reg;
12145 HOST_WIDE_INT scale = 1;
12146 rtx scale_rtx = NULL_RTX;
12147 rtx tmp;
12148 int retval = 1;
12149 enum ix86_address_seg seg = SEG_DEFAULT;
12150
12151 /* Allow zero-extended SImode addresses,
12152 they will be emitted with addr32 prefix. */
12153 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12154 {
12155 if (GET_CODE (addr) == ZERO_EXTEND
12156 && GET_MODE (XEXP (addr, 0)) == SImode)
12157 {
12158 addr = XEXP (addr, 0);
12159 if (CONST_INT_P (addr))
12160 return 0;
12161 }
12162 else if (GET_CODE (addr) == AND
12163 && const_32bit_mask (XEXP (addr, 1), DImode))
12164 {
12165 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12166 if (addr == NULL_RTX)
12167 return 0;
12168
12169 if (CONST_INT_P (addr))
12170 return 0;
12171 }
12172 }
12173
12174 /* Allow SImode subregs of DImode addresses,
12175 they will be emitted with addr32 prefix. */
12176 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12177 {
12178 if (GET_CODE (addr) == SUBREG
12179 && GET_MODE (SUBREG_REG (addr)) == DImode)
12180 {
12181 addr = SUBREG_REG (addr);
12182 if (CONST_INT_P (addr))
12183 return 0;
12184 }
12185 }
12186
12187 if (REG_P (addr))
12188 base = addr;
12189 else if (GET_CODE (addr) == SUBREG)
12190 {
12191 if (REG_P (SUBREG_REG (addr)))
12192 base = addr;
12193 else
12194 return 0;
12195 }
12196 else if (GET_CODE (addr) == PLUS)
12197 {
12198 rtx addends[4], op;
12199 int n = 0, i;
12200
12201 op = addr;
12202 do
12203 {
12204 if (n >= 4)
12205 return 0;
12206 addends[n++] = XEXP (op, 1);
12207 op = XEXP (op, 0);
12208 }
12209 while (GET_CODE (op) == PLUS);
12210 if (n >= 4)
12211 return 0;
12212 addends[n] = op;
12213
12214 for (i = n; i >= 0; --i)
12215 {
12216 op = addends[i];
12217 switch (GET_CODE (op))
12218 {
12219 case MULT:
12220 if (index)
12221 return 0;
12222 index = XEXP (op, 0);
12223 scale_rtx = XEXP (op, 1);
12224 break;
12225
12226 case ASHIFT:
12227 if (index)
12228 return 0;
12229 index = XEXP (op, 0);
12230 tmp = XEXP (op, 1);
12231 if (!CONST_INT_P (tmp))
12232 return 0;
12233 scale = INTVAL (tmp);
12234 if ((unsigned HOST_WIDE_INT) scale > 3)
12235 return 0;
12236 scale = 1 << scale;
12237 break;
12238
12239 case ZERO_EXTEND:
12240 op = XEXP (op, 0);
12241 if (GET_CODE (op) != UNSPEC)
12242 return 0;
12243 /* FALLTHRU */
12244
12245 case UNSPEC:
12246 if (XINT (op, 1) == UNSPEC_TP
12247 && TARGET_TLS_DIRECT_SEG_REFS
12248 && seg == SEG_DEFAULT)
12249 seg = DEFAULT_TLS_SEG_REG;
12250 else
12251 return 0;
12252 break;
12253
12254 case SUBREG:
12255 if (!REG_P (SUBREG_REG (op)))
12256 return 0;
12257 /* FALLTHRU */
12258
12259 case REG:
12260 if (!base)
12261 base = op;
12262 else if (!index)
12263 index = op;
12264 else
12265 return 0;
12266 break;
12267
12268 case CONST:
12269 case CONST_INT:
12270 case SYMBOL_REF:
12271 case LABEL_REF:
12272 if (disp)
12273 return 0;
12274 disp = op;
12275 break;
12276
12277 default:
12278 return 0;
12279 }
12280 }
12281 }
12282 else if (GET_CODE (addr) == MULT)
12283 {
12284 index = XEXP (addr, 0); /* index*scale */
12285 scale_rtx = XEXP (addr, 1);
12286 }
12287 else if (GET_CODE (addr) == ASHIFT)
12288 {
12289 /* We're called for lea too, which implements ashift on occasion. */
12290 index = XEXP (addr, 0);
12291 tmp = XEXP (addr, 1);
12292 if (!CONST_INT_P (tmp))
12293 return 0;
12294 scale = INTVAL (tmp);
12295 if ((unsigned HOST_WIDE_INT) scale > 3)
12296 return 0;
12297 scale = 1 << scale;
12298 retval = -1;
12299 }
12300 else
12301 disp = addr; /* displacement */
12302
12303 if (index)
12304 {
12305 if (REG_P (index))
12306 ;
12307 else if (GET_CODE (index) == SUBREG
12308 && REG_P (SUBREG_REG (index)))
12309 ;
12310 else
12311 return 0;
12312 }
12313
12314 /* Extract the integral value of scale. */
12315 if (scale_rtx)
12316 {
12317 if (!CONST_INT_P (scale_rtx))
12318 return 0;
12319 scale = INTVAL (scale_rtx);
12320 }
12321
12322 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12323 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12324
12325 /* Avoid useless 0 displacement. */
12326 if (disp == const0_rtx && (base || index))
12327 disp = NULL_RTX;
12328
12329 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12330 if (base_reg && index_reg && scale == 1
12331 && (index_reg == arg_pointer_rtx
12332 || index_reg == frame_pointer_rtx
12333 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12334 {
12335 rtx tmp;
12336 tmp = base, base = index, index = tmp;
12337 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12338 }
12339
12340 /* Special case: %ebp cannot be encoded as a base without a displacement.
12341 Similarly %r13. */
12342 if (!disp
12343 && base_reg
12344 && (base_reg == hard_frame_pointer_rtx
12345 || base_reg == frame_pointer_rtx
12346 || base_reg == arg_pointer_rtx
12347 || (REG_P (base_reg)
12348 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12349 || REGNO (base_reg) == R13_REG))))
12350 disp = const0_rtx;
12351
12352 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12353 Avoid this by transforming to [%esi+0].
12354 Reload calls address legitimization without cfun defined, so we need
12355 to test cfun for being non-NULL. */
12356 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12357 && base_reg && !index_reg && !disp
12358 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12359 disp = const0_rtx;
12360
12361 /* Special case: encode reg+reg instead of reg*2. */
12362 if (!base && index && scale == 2)
12363 base = index, base_reg = index_reg, scale = 1;
12364
12365 /* Special case: scaling cannot be encoded without base or displacement. */
12366 if (!base && !disp && index && scale != 1)
12367 disp = const0_rtx;
12368
12369 out->base = base;
12370 out->index = index;
12371 out->disp = disp;
12372 out->scale = scale;
12373 out->seg = seg;
12374
12375 return retval;
12376 }
12377 \f
12378 /* Return cost of the memory address x.
12379 For i386, it is better to use a complex address than let gcc copy
12380 the address into a reg and make a new pseudo. But not if the address
12381 requires to two regs - that would mean more pseudos with longer
12382 lifetimes. */
12383 static int
12384 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12385 addr_space_t as ATTRIBUTE_UNUSED,
12386 bool speed ATTRIBUTE_UNUSED)
12387 {
12388 struct ix86_address parts;
12389 int cost = 1;
12390 int ok = ix86_decompose_address (x, &parts);
12391
12392 gcc_assert (ok);
12393
12394 if (parts.base && GET_CODE (parts.base) == SUBREG)
12395 parts.base = SUBREG_REG (parts.base);
12396 if (parts.index && GET_CODE (parts.index) == SUBREG)
12397 parts.index = SUBREG_REG (parts.index);
12398
12399 /* Attempt to minimize number of registers in the address. */
12400 if ((parts.base
12401 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12402 || (parts.index
12403 && (!REG_P (parts.index)
12404 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12405 cost++;
12406
12407 if (parts.base
12408 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12409 && parts.index
12410 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12411 && parts.base != parts.index)
12412 cost++;
12413
12414 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12415 since it's predecode logic can't detect the length of instructions
12416 and it degenerates to vector decoded. Increase cost of such
12417 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12418 to split such addresses or even refuse such addresses at all.
12419
12420 Following addressing modes are affected:
12421 [base+scale*index]
12422 [scale*index+disp]
12423 [base+index]
12424
12425 The first and last case may be avoidable by explicitly coding the zero in
12426 memory address, but I don't have AMD-K6 machine handy to check this
12427 theory. */
12428
12429 if (TARGET_K6
12430 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12431 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12432 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12433 cost += 10;
12434
12435 return cost;
12436 }
12437 \f
12438 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12439 this is used for to form addresses to local data when -fPIC is in
12440 use. */
12441
12442 static bool
12443 darwin_local_data_pic (rtx disp)
12444 {
12445 return (GET_CODE (disp) == UNSPEC
12446 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12447 }
12448
12449 /* Determine if a given RTX is a valid constant. We already know this
12450 satisfies CONSTANT_P. */
12451
12452 static bool
12453 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12454 {
12455 switch (GET_CODE (x))
12456 {
12457 case CONST:
12458 x = XEXP (x, 0);
12459
12460 if (GET_CODE (x) == PLUS)
12461 {
12462 if (!CONST_INT_P (XEXP (x, 1)))
12463 return false;
12464 x = XEXP (x, 0);
12465 }
12466
12467 if (TARGET_MACHO && darwin_local_data_pic (x))
12468 return true;
12469
12470 /* Only some unspecs are valid as "constants". */
12471 if (GET_CODE (x) == UNSPEC)
12472 switch (XINT (x, 1))
12473 {
12474 case UNSPEC_GOT:
12475 case UNSPEC_GOTOFF:
12476 case UNSPEC_PLTOFF:
12477 return TARGET_64BIT;
12478 case UNSPEC_TPOFF:
12479 case UNSPEC_NTPOFF:
12480 x = XVECEXP (x, 0, 0);
12481 return (GET_CODE (x) == SYMBOL_REF
12482 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12483 case UNSPEC_DTPOFF:
12484 x = XVECEXP (x, 0, 0);
12485 return (GET_CODE (x) == SYMBOL_REF
12486 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12487 default:
12488 return false;
12489 }
12490
12491 /* We must have drilled down to a symbol. */
12492 if (GET_CODE (x) == LABEL_REF)
12493 return true;
12494 if (GET_CODE (x) != SYMBOL_REF)
12495 return false;
12496 /* FALLTHRU */
12497
12498 case SYMBOL_REF:
12499 /* TLS symbols are never valid. */
12500 if (SYMBOL_REF_TLS_MODEL (x))
12501 return false;
12502
12503 /* DLLIMPORT symbols are never valid. */
12504 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12505 && SYMBOL_REF_DLLIMPORT_P (x))
12506 return false;
12507
12508 #if TARGET_MACHO
12509 /* mdynamic-no-pic */
12510 if (MACHO_DYNAMIC_NO_PIC_P)
12511 return machopic_symbol_defined_p (x);
12512 #endif
12513 break;
12514
12515 case CONST_DOUBLE:
12516 if (GET_MODE (x) == TImode
12517 && x != CONST0_RTX (TImode)
12518 && !TARGET_64BIT)
12519 return false;
12520 break;
12521
12522 case CONST_VECTOR:
12523 if (!standard_sse_constant_p (x))
12524 return false;
12525
12526 default:
12527 break;
12528 }
12529
12530 /* Otherwise we handle everything else in the move patterns. */
12531 return true;
12532 }
12533
12534 /* Determine if it's legal to put X into the constant pool. This
12535 is not possible for the address of thread-local symbols, which
12536 is checked above. */
12537
12538 static bool
12539 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12540 {
12541 /* We can always put integral constants and vectors in memory. */
12542 switch (GET_CODE (x))
12543 {
12544 case CONST_INT:
12545 case CONST_DOUBLE:
12546 case CONST_VECTOR:
12547 return false;
12548
12549 default:
12550 break;
12551 }
12552 return !ix86_legitimate_constant_p (mode, x);
12553 }
12554
12555 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12556 otherwise zero. */
12557
12558 static bool
12559 is_imported_p (rtx x)
12560 {
12561 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12562 || GET_CODE (x) != SYMBOL_REF)
12563 return false;
12564
12565 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12566 }
12567
12568
12569 /* Nonzero if the constant value X is a legitimate general operand
12570 when generating PIC code. It is given that flag_pic is on and
12571 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12572
12573 bool
12574 legitimate_pic_operand_p (rtx x)
12575 {
12576 rtx inner;
12577
12578 switch (GET_CODE (x))
12579 {
12580 case CONST:
12581 inner = XEXP (x, 0);
12582 if (GET_CODE (inner) == PLUS
12583 && CONST_INT_P (XEXP (inner, 1)))
12584 inner = XEXP (inner, 0);
12585
12586 /* Only some unspecs are valid as "constants". */
12587 if (GET_CODE (inner) == UNSPEC)
12588 switch (XINT (inner, 1))
12589 {
12590 case UNSPEC_GOT:
12591 case UNSPEC_GOTOFF:
12592 case UNSPEC_PLTOFF:
12593 return TARGET_64BIT;
12594 case UNSPEC_TPOFF:
12595 x = XVECEXP (inner, 0, 0);
12596 return (GET_CODE (x) == SYMBOL_REF
12597 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12598 case UNSPEC_MACHOPIC_OFFSET:
12599 return legitimate_pic_address_disp_p (x);
12600 default:
12601 return false;
12602 }
12603 /* FALLTHRU */
12604
12605 case SYMBOL_REF:
12606 case LABEL_REF:
12607 return legitimate_pic_address_disp_p (x);
12608
12609 default:
12610 return true;
12611 }
12612 }
12613
12614 /* Determine if a given CONST RTX is a valid memory displacement
12615 in PIC mode. */
12616
12617 bool
12618 legitimate_pic_address_disp_p (rtx disp)
12619 {
12620 bool saw_plus;
12621
12622 /* In 64bit mode we can allow direct addresses of symbols and labels
12623 when they are not dynamic symbols. */
12624 if (TARGET_64BIT)
12625 {
12626 rtx op0 = disp, op1;
12627
12628 switch (GET_CODE (disp))
12629 {
12630 case LABEL_REF:
12631 return true;
12632
12633 case CONST:
12634 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12635 break;
12636 op0 = XEXP (XEXP (disp, 0), 0);
12637 op1 = XEXP (XEXP (disp, 0), 1);
12638 if (!CONST_INT_P (op1)
12639 || INTVAL (op1) >= 16*1024*1024
12640 || INTVAL (op1) < -16*1024*1024)
12641 break;
12642 if (GET_CODE (op0) == LABEL_REF)
12643 return true;
12644 if (GET_CODE (op0) == CONST
12645 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12646 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12647 return true;
12648 if (GET_CODE (op0) == UNSPEC
12649 && XINT (op0, 1) == UNSPEC_PCREL)
12650 return true;
12651 if (GET_CODE (op0) != SYMBOL_REF)
12652 break;
12653 /* FALLTHRU */
12654
12655 case SYMBOL_REF:
12656 /* TLS references should always be enclosed in UNSPEC.
12657 The dllimported symbol needs always to be resolved. */
12658 if (SYMBOL_REF_TLS_MODEL (op0)
12659 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12660 return false;
12661
12662 if (TARGET_PECOFF)
12663 {
12664 if (is_imported_p (op0))
12665 return true;
12666
12667 if (SYMBOL_REF_FAR_ADDR_P (op0)
12668 || !SYMBOL_REF_LOCAL_P (op0))
12669 break;
12670
12671 /* Function-symbols need to be resolved only for
12672 large-model.
12673 For the small-model we don't need to resolve anything
12674 here. */
12675 if ((ix86_cmodel != CM_LARGE_PIC
12676 && SYMBOL_REF_FUNCTION_P (op0))
12677 || ix86_cmodel == CM_SMALL_PIC)
12678 return true;
12679 /* Non-external symbols don't need to be resolved for
12680 large, and medium-model. */
12681 if ((ix86_cmodel == CM_LARGE_PIC
12682 || ix86_cmodel == CM_MEDIUM_PIC)
12683 && !SYMBOL_REF_EXTERNAL_P (op0))
12684 return true;
12685 }
12686 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12687 && SYMBOL_REF_LOCAL_P (op0)
12688 && ix86_cmodel != CM_LARGE_PIC)
12689 return true;
12690 break;
12691
12692 default:
12693 break;
12694 }
12695 }
12696 if (GET_CODE (disp) != CONST)
12697 return false;
12698 disp = XEXP (disp, 0);
12699
12700 if (TARGET_64BIT)
12701 {
12702 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12703 of GOT tables. We should not need these anyway. */
12704 if (GET_CODE (disp) != UNSPEC
12705 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12706 && XINT (disp, 1) != UNSPEC_GOTOFF
12707 && XINT (disp, 1) != UNSPEC_PCREL
12708 && XINT (disp, 1) != UNSPEC_PLTOFF))
12709 return false;
12710
12711 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12712 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12713 return false;
12714 return true;
12715 }
12716
12717 saw_plus = false;
12718 if (GET_CODE (disp) == PLUS)
12719 {
12720 if (!CONST_INT_P (XEXP (disp, 1)))
12721 return false;
12722 disp = XEXP (disp, 0);
12723 saw_plus = true;
12724 }
12725
12726 if (TARGET_MACHO && darwin_local_data_pic (disp))
12727 return true;
12728
12729 if (GET_CODE (disp) != UNSPEC)
12730 return false;
12731
12732 switch (XINT (disp, 1))
12733 {
12734 case UNSPEC_GOT:
12735 if (saw_plus)
12736 return false;
12737 /* We need to check for both symbols and labels because VxWorks loads
12738 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12739 details. */
12740 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12741 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12742 case UNSPEC_GOTOFF:
12743 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12744 While ABI specify also 32bit relocation but we don't produce it in
12745 small PIC model at all. */
12746 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12747 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12748 && !TARGET_64BIT)
12749 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12750 return false;
12751 case UNSPEC_GOTTPOFF:
12752 case UNSPEC_GOTNTPOFF:
12753 case UNSPEC_INDNTPOFF:
12754 if (saw_plus)
12755 return false;
12756 disp = XVECEXP (disp, 0, 0);
12757 return (GET_CODE (disp) == SYMBOL_REF
12758 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12759 case UNSPEC_NTPOFF:
12760 disp = XVECEXP (disp, 0, 0);
12761 return (GET_CODE (disp) == SYMBOL_REF
12762 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12763 case UNSPEC_DTPOFF:
12764 disp = XVECEXP (disp, 0, 0);
12765 return (GET_CODE (disp) == SYMBOL_REF
12766 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12767 }
12768
12769 return false;
12770 }
12771
12772 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12773 replace the input X, or the original X if no replacement is called for.
12774 The output parameter *WIN is 1 if the calling macro should goto WIN,
12775 0 if it should not. */
12776
12777 bool
12778 ix86_legitimize_reload_address (rtx x,
12779 enum machine_mode mode ATTRIBUTE_UNUSED,
12780 int opnum, int type,
12781 int ind_levels ATTRIBUTE_UNUSED)
12782 {
12783 /* Reload can generate:
12784
12785 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12786 (reg:DI 97))
12787 (reg:DI 2 cx))
12788
12789 This RTX is rejected from ix86_legitimate_address_p due to
12790 non-strictness of base register 97. Following this rejection,
12791 reload pushes all three components into separate registers,
12792 creating invalid memory address RTX.
12793
12794 Following code reloads only the invalid part of the
12795 memory address RTX. */
12796
12797 if (GET_CODE (x) == PLUS
12798 && REG_P (XEXP (x, 1))
12799 && GET_CODE (XEXP (x, 0)) == PLUS
12800 && REG_P (XEXP (XEXP (x, 0), 1)))
12801 {
12802 rtx base, index;
12803 bool something_reloaded = false;
12804
12805 base = XEXP (XEXP (x, 0), 1);
12806 if (!REG_OK_FOR_BASE_STRICT_P (base))
12807 {
12808 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12809 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12810 opnum, (enum reload_type) type);
12811 something_reloaded = true;
12812 }
12813
12814 index = XEXP (x, 1);
12815 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12816 {
12817 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12818 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12819 opnum, (enum reload_type) type);
12820 something_reloaded = true;
12821 }
12822
12823 gcc_assert (something_reloaded);
12824 return true;
12825 }
12826
12827 return false;
12828 }
12829
12830 /* Determine if op is suitable RTX for an address register.
12831 Return naked register if a register or a register subreg is
12832 found, otherwise return NULL_RTX. */
12833
12834 static rtx
12835 ix86_validate_address_register (rtx op)
12836 {
12837 enum machine_mode mode = GET_MODE (op);
12838
12839 /* Only SImode or DImode registers can form the address. */
12840 if (mode != SImode && mode != DImode)
12841 return NULL_RTX;
12842
12843 if (REG_P (op))
12844 return op;
12845 else if (GET_CODE (op) == SUBREG)
12846 {
12847 rtx reg = SUBREG_REG (op);
12848
12849 if (!REG_P (reg))
12850 return NULL_RTX;
12851
12852 mode = GET_MODE (reg);
12853
12854 /* Don't allow SUBREGs that span more than a word. It can
12855 lead to spill failures when the register is one word out
12856 of a two word structure. */
12857 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12858 return NULL_RTX;
12859
12860 /* Allow only SUBREGs of non-eliminable hard registers. */
12861 if (register_no_elim_operand (reg, mode))
12862 return reg;
12863 }
12864
12865 /* Op is not a register. */
12866 return NULL_RTX;
12867 }
12868
12869 /* Recognizes RTL expressions that are valid memory addresses for an
12870 instruction. The MODE argument is the machine mode for the MEM
12871 expression that wants to use this address.
12872
12873 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12874 convert common non-canonical forms to canonical form so that they will
12875 be recognized. */
12876
12877 static bool
12878 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12879 rtx addr, bool strict)
12880 {
12881 struct ix86_address parts;
12882 rtx base, index, disp;
12883 HOST_WIDE_INT scale;
12884 enum ix86_address_seg seg;
12885
12886 if (ix86_decompose_address (addr, &parts) <= 0)
12887 /* Decomposition failed. */
12888 return false;
12889
12890 base = parts.base;
12891 index = parts.index;
12892 disp = parts.disp;
12893 scale = parts.scale;
12894 seg = parts.seg;
12895
12896 /* Validate base register. */
12897 if (base)
12898 {
12899 rtx reg = ix86_validate_address_register (base);
12900
12901 if (reg == NULL_RTX)
12902 return false;
12903
12904 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12905 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12906 /* Base is not valid. */
12907 return false;
12908 }
12909
12910 /* Validate index register. */
12911 if (index)
12912 {
12913 rtx reg = ix86_validate_address_register (index);
12914
12915 if (reg == NULL_RTX)
12916 return false;
12917
12918 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12919 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12920 /* Index is not valid. */
12921 return false;
12922 }
12923
12924 /* Index and base should have the same mode. */
12925 if (base && index
12926 && GET_MODE (base) != GET_MODE (index))
12927 return false;
12928
12929 /* Address override works only on the (%reg) part of %fs:(%reg). */
12930 if (seg != SEG_DEFAULT
12931 && ((base && GET_MODE (base) != word_mode)
12932 || (index && GET_MODE (index) != word_mode)))
12933 return false;
12934
12935 /* Validate scale factor. */
12936 if (scale != 1)
12937 {
12938 if (!index)
12939 /* Scale without index. */
12940 return false;
12941
12942 if (scale != 2 && scale != 4 && scale != 8)
12943 /* Scale is not a valid multiplier. */
12944 return false;
12945 }
12946
12947 /* Validate displacement. */
12948 if (disp)
12949 {
12950 if (GET_CODE (disp) == CONST
12951 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12952 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12953 switch (XINT (XEXP (disp, 0), 1))
12954 {
12955 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12956 used. While ABI specify also 32bit relocations, we don't produce
12957 them at all and use IP relative instead. */
12958 case UNSPEC_GOT:
12959 case UNSPEC_GOTOFF:
12960 gcc_assert (flag_pic);
12961 if (!TARGET_64BIT)
12962 goto is_legitimate_pic;
12963
12964 /* 64bit address unspec. */
12965 return false;
12966
12967 case UNSPEC_GOTPCREL:
12968 case UNSPEC_PCREL:
12969 gcc_assert (flag_pic);
12970 goto is_legitimate_pic;
12971
12972 case UNSPEC_GOTTPOFF:
12973 case UNSPEC_GOTNTPOFF:
12974 case UNSPEC_INDNTPOFF:
12975 case UNSPEC_NTPOFF:
12976 case UNSPEC_DTPOFF:
12977 break;
12978
12979 case UNSPEC_STACK_CHECK:
12980 gcc_assert (flag_split_stack);
12981 break;
12982
12983 default:
12984 /* Invalid address unspec. */
12985 return false;
12986 }
12987
12988 else if (SYMBOLIC_CONST (disp)
12989 && (flag_pic
12990 || (TARGET_MACHO
12991 #if TARGET_MACHO
12992 && MACHOPIC_INDIRECT
12993 && !machopic_operand_p (disp)
12994 #endif
12995 )))
12996 {
12997
12998 is_legitimate_pic:
12999 if (TARGET_64BIT && (index || base))
13000 {
13001 /* foo@dtpoff(%rX) is ok. */
13002 if (GET_CODE (disp) != CONST
13003 || GET_CODE (XEXP (disp, 0)) != PLUS
13004 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13005 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13006 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13007 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13008 /* Non-constant pic memory reference. */
13009 return false;
13010 }
13011 else if ((!TARGET_MACHO || flag_pic)
13012 && ! legitimate_pic_address_disp_p (disp))
13013 /* Displacement is an invalid pic construct. */
13014 return false;
13015 #if TARGET_MACHO
13016 else if (MACHO_DYNAMIC_NO_PIC_P
13017 && !ix86_legitimate_constant_p (Pmode, disp))
13018 /* displacment must be referenced via non_lazy_pointer */
13019 return false;
13020 #endif
13021
13022 /* This code used to verify that a symbolic pic displacement
13023 includes the pic_offset_table_rtx register.
13024
13025 While this is good idea, unfortunately these constructs may
13026 be created by "adds using lea" optimization for incorrect
13027 code like:
13028
13029 int a;
13030 int foo(int i)
13031 {
13032 return *(&a+i);
13033 }
13034
13035 This code is nonsensical, but results in addressing
13036 GOT table with pic_offset_table_rtx base. We can't
13037 just refuse it easily, since it gets matched by
13038 "addsi3" pattern, that later gets split to lea in the
13039 case output register differs from input. While this
13040 can be handled by separate addsi pattern for this case
13041 that never results in lea, this seems to be easier and
13042 correct fix for crash to disable this test. */
13043 }
13044 else if (GET_CODE (disp) != LABEL_REF
13045 && !CONST_INT_P (disp)
13046 && (GET_CODE (disp) != CONST
13047 || !ix86_legitimate_constant_p (Pmode, disp))
13048 && (GET_CODE (disp) != SYMBOL_REF
13049 || !ix86_legitimate_constant_p (Pmode, disp)))
13050 /* Displacement is not constant. */
13051 return false;
13052 else if (TARGET_64BIT
13053 && !x86_64_immediate_operand (disp, VOIDmode))
13054 /* Displacement is out of range. */
13055 return false;
13056 /* In x32 mode, constant addresses are sign extended to 64bit, so
13057 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13058 else if (TARGET_X32 && !(index || base)
13059 && CONST_INT_P (disp)
13060 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13061 return false;
13062 }
13063
13064 /* Everything looks valid. */
13065 return true;
13066 }
13067
13068 /* Determine if a given RTX is a valid constant address. */
13069
13070 bool
13071 constant_address_p (rtx x)
13072 {
13073 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13074 }
13075 \f
13076 /* Return a unique alias set for the GOT. */
13077
13078 static alias_set_type
13079 ix86_GOT_alias_set (void)
13080 {
13081 static alias_set_type set = -1;
13082 if (set == -1)
13083 set = new_alias_set ();
13084 return set;
13085 }
13086
13087 /* Return a legitimate reference for ORIG (an address) using the
13088 register REG. If REG is 0, a new pseudo is generated.
13089
13090 There are two types of references that must be handled:
13091
13092 1. Global data references must load the address from the GOT, via
13093 the PIC reg. An insn is emitted to do this load, and the reg is
13094 returned.
13095
13096 2. Static data references, constant pool addresses, and code labels
13097 compute the address as an offset from the GOT, whose base is in
13098 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13099 differentiate them from global data objects. The returned
13100 address is the PIC reg + an unspec constant.
13101
13102 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13103 reg also appears in the address. */
13104
13105 static rtx
13106 legitimize_pic_address (rtx orig, rtx reg)
13107 {
13108 rtx addr = orig;
13109 rtx new_rtx = orig;
13110
13111 #if TARGET_MACHO
13112 if (TARGET_MACHO && !TARGET_64BIT)
13113 {
13114 if (reg == 0)
13115 reg = gen_reg_rtx (Pmode);
13116 /* Use the generic Mach-O PIC machinery. */
13117 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13118 }
13119 #endif
13120
13121 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13122 {
13123 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13124 if (tmp)
13125 return tmp;
13126 }
13127
13128 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13129 new_rtx = addr;
13130 else if (TARGET_64BIT && !TARGET_PECOFF
13131 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13132 {
13133 rtx tmpreg;
13134 /* This symbol may be referenced via a displacement from the PIC
13135 base address (@GOTOFF). */
13136
13137 if (reload_in_progress)
13138 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13139 if (GET_CODE (addr) == CONST)
13140 addr = XEXP (addr, 0);
13141 if (GET_CODE (addr) == PLUS)
13142 {
13143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13144 UNSPEC_GOTOFF);
13145 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13146 }
13147 else
13148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13149 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13150 if (!reg)
13151 tmpreg = gen_reg_rtx (Pmode);
13152 else
13153 tmpreg = reg;
13154 emit_move_insn (tmpreg, new_rtx);
13155
13156 if (reg != 0)
13157 {
13158 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13159 tmpreg, 1, OPTAB_DIRECT);
13160 new_rtx = reg;
13161 }
13162 else
13163 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13164 }
13165 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13166 {
13167 /* This symbol may be referenced via a displacement from the PIC
13168 base address (@GOTOFF). */
13169
13170 if (reload_in_progress)
13171 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13172 if (GET_CODE (addr) == CONST)
13173 addr = XEXP (addr, 0);
13174 if (GET_CODE (addr) == PLUS)
13175 {
13176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13177 UNSPEC_GOTOFF);
13178 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13179 }
13180 else
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13183 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13184
13185 if (reg != 0)
13186 {
13187 emit_move_insn (reg, new_rtx);
13188 new_rtx = reg;
13189 }
13190 }
13191 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13192 /* We can't use @GOTOFF for text labels on VxWorks;
13193 see gotoff_operand. */
13194 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13195 {
13196 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13197 if (tmp)
13198 return tmp;
13199
13200 /* For x64 PE-COFF there is no GOT table. So we use address
13201 directly. */
13202 if (TARGET_64BIT && TARGET_PECOFF)
13203 {
13204 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13205 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13206
13207 if (reg == 0)
13208 reg = gen_reg_rtx (Pmode);
13209 emit_move_insn (reg, new_rtx);
13210 new_rtx = reg;
13211 }
13212 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13213 {
13214 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13215 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13216 new_rtx = gen_const_mem (Pmode, new_rtx);
13217 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13218
13219 if (reg == 0)
13220 reg = gen_reg_rtx (Pmode);
13221 /* Use directly gen_movsi, otherwise the address is loaded
13222 into register for CSE. We don't want to CSE this addresses,
13223 instead we CSE addresses from the GOT table, so skip this. */
13224 emit_insn (gen_movsi (reg, new_rtx));
13225 new_rtx = reg;
13226 }
13227 else
13228 {
13229 /* This symbol must be referenced via a load from the
13230 Global Offset Table (@GOT). */
13231
13232 if (reload_in_progress)
13233 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13234 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13235 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13236 if (TARGET_64BIT)
13237 new_rtx = force_reg (Pmode, new_rtx);
13238 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13239 new_rtx = gen_const_mem (Pmode, new_rtx);
13240 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13241
13242 if (reg == 0)
13243 reg = gen_reg_rtx (Pmode);
13244 emit_move_insn (reg, new_rtx);
13245 new_rtx = reg;
13246 }
13247 }
13248 else
13249 {
13250 if (CONST_INT_P (addr)
13251 && !x86_64_immediate_operand (addr, VOIDmode))
13252 {
13253 if (reg)
13254 {
13255 emit_move_insn (reg, addr);
13256 new_rtx = reg;
13257 }
13258 else
13259 new_rtx = force_reg (Pmode, addr);
13260 }
13261 else if (GET_CODE (addr) == CONST)
13262 {
13263 addr = XEXP (addr, 0);
13264
13265 /* We must match stuff we generate before. Assume the only
13266 unspecs that can get here are ours. Not that we could do
13267 anything with them anyway.... */
13268 if (GET_CODE (addr) == UNSPEC
13269 || (GET_CODE (addr) == PLUS
13270 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13271 return orig;
13272 gcc_assert (GET_CODE (addr) == PLUS);
13273 }
13274 if (GET_CODE (addr) == PLUS)
13275 {
13276 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13277
13278 /* Check first to see if this is a constant offset from a @GOTOFF
13279 symbol reference. */
13280 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13281 && CONST_INT_P (op1))
13282 {
13283 if (!TARGET_64BIT)
13284 {
13285 if (reload_in_progress)
13286 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13287 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13288 UNSPEC_GOTOFF);
13289 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13290 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13291 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13292
13293 if (reg != 0)
13294 {
13295 emit_move_insn (reg, new_rtx);
13296 new_rtx = reg;
13297 }
13298 }
13299 else
13300 {
13301 if (INTVAL (op1) < -16*1024*1024
13302 || INTVAL (op1) >= 16*1024*1024)
13303 {
13304 if (!x86_64_immediate_operand (op1, Pmode))
13305 op1 = force_reg (Pmode, op1);
13306 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13307 }
13308 }
13309 }
13310 else
13311 {
13312 rtx base = legitimize_pic_address (op0, reg);
13313 enum machine_mode mode = GET_MODE (base);
13314 new_rtx
13315 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13316
13317 if (CONST_INT_P (new_rtx))
13318 {
13319 if (INTVAL (new_rtx) < -16*1024*1024
13320 || INTVAL (new_rtx) >= 16*1024*1024)
13321 {
13322 if (!x86_64_immediate_operand (new_rtx, mode))
13323 new_rtx = force_reg (mode, new_rtx);
13324 new_rtx
13325 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13326 }
13327 else
13328 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13329 }
13330 else
13331 {
13332 if (GET_CODE (new_rtx) == PLUS
13333 && CONSTANT_P (XEXP (new_rtx, 1)))
13334 {
13335 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13336 new_rtx = XEXP (new_rtx, 1);
13337 }
13338 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13339 }
13340 }
13341 }
13342 }
13343 return new_rtx;
13344 }
13345 \f
13346 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13347
13348 static rtx
13349 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13350 {
13351 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13352
13353 if (GET_MODE (tp) != tp_mode)
13354 {
13355 gcc_assert (GET_MODE (tp) == SImode);
13356 gcc_assert (tp_mode == DImode);
13357
13358 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13359 }
13360
13361 if (to_reg)
13362 tp = copy_to_mode_reg (tp_mode, tp);
13363
13364 return tp;
13365 }
13366
13367 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13368
13369 static GTY(()) rtx ix86_tls_symbol;
13370
13371 static rtx
13372 ix86_tls_get_addr (void)
13373 {
13374 if (!ix86_tls_symbol)
13375 {
13376 const char *sym
13377 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13378 ? "___tls_get_addr" : "__tls_get_addr");
13379
13380 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13381 }
13382
13383 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13384 {
13385 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13386 UNSPEC_PLTOFF);
13387 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13388 gen_rtx_CONST (Pmode, unspec));
13389 }
13390
13391 return ix86_tls_symbol;
13392 }
13393
13394 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13395
13396 static GTY(()) rtx ix86_tls_module_base_symbol;
13397
13398 rtx
13399 ix86_tls_module_base (void)
13400 {
13401 if (!ix86_tls_module_base_symbol)
13402 {
13403 ix86_tls_module_base_symbol
13404 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13405
13406 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13407 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13408 }
13409
13410 return ix86_tls_module_base_symbol;
13411 }
13412
13413 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13414 false if we expect this to be used for a memory address and true if
13415 we expect to load the address into a register. */
13416
13417 static rtx
13418 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13419 {
13420 rtx dest, base, off;
13421 rtx pic = NULL_RTX, tp = NULL_RTX;
13422 enum machine_mode tp_mode = Pmode;
13423 int type;
13424
13425 /* Fall back to global dynamic model if tool chain cannot support local
13426 dynamic. */
13427 if (TARGET_SUN_TLS && !TARGET_64BIT
13428 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13429 && model == TLS_MODEL_LOCAL_DYNAMIC)
13430 model = TLS_MODEL_GLOBAL_DYNAMIC;
13431
13432 switch (model)
13433 {
13434 case TLS_MODEL_GLOBAL_DYNAMIC:
13435 dest = gen_reg_rtx (Pmode);
13436
13437 if (!TARGET_64BIT)
13438 {
13439 if (flag_pic && !TARGET_PECOFF)
13440 pic = pic_offset_table_rtx;
13441 else
13442 {
13443 pic = gen_reg_rtx (Pmode);
13444 emit_insn (gen_set_got (pic));
13445 }
13446 }
13447
13448 if (TARGET_GNU2_TLS)
13449 {
13450 if (TARGET_64BIT)
13451 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13452 else
13453 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13454
13455 tp = get_thread_pointer (Pmode, true);
13456 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13457
13458 if (GET_MODE (x) != Pmode)
13459 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13460
13461 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13462 }
13463 else
13464 {
13465 rtx caddr = ix86_tls_get_addr ();
13466
13467 if (TARGET_64BIT)
13468 {
13469 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13470 rtx insns;
13471
13472 start_sequence ();
13473 emit_call_insn
13474 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13475 insns = get_insns ();
13476 end_sequence ();
13477
13478 if (GET_MODE (x) != Pmode)
13479 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13480
13481 RTL_CONST_CALL_P (insns) = 1;
13482 emit_libcall_block (insns, dest, rax, x);
13483 }
13484 else
13485 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13486 }
13487 break;
13488
13489 case TLS_MODEL_LOCAL_DYNAMIC:
13490 base = gen_reg_rtx (Pmode);
13491
13492 if (!TARGET_64BIT)
13493 {
13494 if (flag_pic)
13495 pic = pic_offset_table_rtx;
13496 else
13497 {
13498 pic = gen_reg_rtx (Pmode);
13499 emit_insn (gen_set_got (pic));
13500 }
13501 }
13502
13503 if (TARGET_GNU2_TLS)
13504 {
13505 rtx tmp = ix86_tls_module_base ();
13506
13507 if (TARGET_64BIT)
13508 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13509 else
13510 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13511
13512 tp = get_thread_pointer (Pmode, true);
13513 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13514 gen_rtx_MINUS (Pmode, tmp, tp));
13515 }
13516 else
13517 {
13518 rtx caddr = ix86_tls_get_addr ();
13519
13520 if (TARGET_64BIT)
13521 {
13522 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13523 rtx insns, eqv;
13524
13525 start_sequence ();
13526 emit_call_insn
13527 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13528 insns = get_insns ();
13529 end_sequence ();
13530
13531 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13532 share the LD_BASE result with other LD model accesses. */
13533 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13534 UNSPEC_TLS_LD_BASE);
13535
13536 RTL_CONST_CALL_P (insns) = 1;
13537 emit_libcall_block (insns, base, rax, eqv);
13538 }
13539 else
13540 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13541 }
13542
13543 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13544 off = gen_rtx_CONST (Pmode, off);
13545
13546 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13547
13548 if (TARGET_GNU2_TLS)
13549 {
13550 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13551
13552 if (GET_MODE (x) != Pmode)
13553 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13554
13555 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13556 }
13557 break;
13558
13559 case TLS_MODEL_INITIAL_EXEC:
13560 if (TARGET_64BIT)
13561 {
13562 if (TARGET_SUN_TLS && !TARGET_X32)
13563 {
13564 /* The Sun linker took the AMD64 TLS spec literally
13565 and can only handle %rax as destination of the
13566 initial executable code sequence. */
13567
13568 dest = gen_reg_rtx (DImode);
13569 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13570 return dest;
13571 }
13572
13573 /* Generate DImode references to avoid %fs:(%reg32)
13574 problems and linker IE->LE relaxation bug. */
13575 tp_mode = DImode;
13576 pic = NULL;
13577 type = UNSPEC_GOTNTPOFF;
13578 }
13579 else if (flag_pic)
13580 {
13581 if (reload_in_progress)
13582 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13583 pic = pic_offset_table_rtx;
13584 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13585 }
13586 else if (!TARGET_ANY_GNU_TLS)
13587 {
13588 pic = gen_reg_rtx (Pmode);
13589 emit_insn (gen_set_got (pic));
13590 type = UNSPEC_GOTTPOFF;
13591 }
13592 else
13593 {
13594 pic = NULL;
13595 type = UNSPEC_INDNTPOFF;
13596 }
13597
13598 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13599 off = gen_rtx_CONST (tp_mode, off);
13600 if (pic)
13601 off = gen_rtx_PLUS (tp_mode, pic, off);
13602 off = gen_const_mem (tp_mode, off);
13603 set_mem_alias_set (off, ix86_GOT_alias_set ());
13604
13605 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13606 {
13607 base = get_thread_pointer (tp_mode,
13608 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13609 off = force_reg (tp_mode, off);
13610 return gen_rtx_PLUS (tp_mode, base, off);
13611 }
13612 else
13613 {
13614 base = get_thread_pointer (Pmode, true);
13615 dest = gen_reg_rtx (Pmode);
13616 emit_insn (ix86_gen_sub3 (dest, base, off));
13617 }
13618 break;
13619
13620 case TLS_MODEL_LOCAL_EXEC:
13621 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13622 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13623 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13624 off = gen_rtx_CONST (Pmode, off);
13625
13626 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13627 {
13628 base = get_thread_pointer (Pmode,
13629 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13630 return gen_rtx_PLUS (Pmode, base, off);
13631 }
13632 else
13633 {
13634 base = get_thread_pointer (Pmode, true);
13635 dest = gen_reg_rtx (Pmode);
13636 emit_insn (ix86_gen_sub3 (dest, base, off));
13637 }
13638 break;
13639
13640 default:
13641 gcc_unreachable ();
13642 }
13643
13644 return dest;
13645 }
13646
13647 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13648 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13649 unique refptr-DECL symbol corresponding to symbol DECL. */
13650
13651 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13652 htab_t dllimport_map;
13653
13654 static tree
13655 get_dllimport_decl (tree decl, bool beimport)
13656 {
13657 struct tree_map *h, in;
13658 void **loc;
13659 const char *name;
13660 const char *prefix;
13661 size_t namelen, prefixlen;
13662 char *imp_name;
13663 tree to;
13664 rtx rtl;
13665
13666 if (!dllimport_map)
13667 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13668
13669 in.hash = htab_hash_pointer (decl);
13670 in.base.from = decl;
13671 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13672 h = (struct tree_map *) *loc;
13673 if (h)
13674 return h->to;
13675
13676 *loc = h = ggc_alloc<tree_map> ();
13677 h->hash = in.hash;
13678 h->base.from = decl;
13679 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13680 VAR_DECL, NULL, ptr_type_node);
13681 DECL_ARTIFICIAL (to) = 1;
13682 DECL_IGNORED_P (to) = 1;
13683 DECL_EXTERNAL (to) = 1;
13684 TREE_READONLY (to) = 1;
13685
13686 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13687 name = targetm.strip_name_encoding (name);
13688 if (beimport)
13689 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13690 ? "*__imp_" : "*__imp__";
13691 else
13692 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13693 namelen = strlen (name);
13694 prefixlen = strlen (prefix);
13695 imp_name = (char *) alloca (namelen + prefixlen + 1);
13696 memcpy (imp_name, prefix, prefixlen);
13697 memcpy (imp_name + prefixlen, name, namelen + 1);
13698
13699 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13700 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13701 SET_SYMBOL_REF_DECL (rtl, to);
13702 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13703 if (!beimport)
13704 {
13705 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13706 #ifdef SUB_TARGET_RECORD_STUB
13707 SUB_TARGET_RECORD_STUB (name);
13708 #endif
13709 }
13710
13711 rtl = gen_const_mem (Pmode, rtl);
13712 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13713
13714 SET_DECL_RTL (to, rtl);
13715 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13716
13717 return to;
13718 }
13719
13720 /* Expand SYMBOL into its corresponding far-addresse symbol.
13721 WANT_REG is true if we require the result be a register. */
13722
13723 static rtx
13724 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13725 {
13726 tree imp_decl;
13727 rtx x;
13728
13729 gcc_assert (SYMBOL_REF_DECL (symbol));
13730 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13731
13732 x = DECL_RTL (imp_decl);
13733 if (want_reg)
13734 x = force_reg (Pmode, x);
13735 return x;
13736 }
13737
13738 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13739 true if we require the result be a register. */
13740
13741 static rtx
13742 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13743 {
13744 tree imp_decl;
13745 rtx x;
13746
13747 gcc_assert (SYMBOL_REF_DECL (symbol));
13748 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13749
13750 x = DECL_RTL (imp_decl);
13751 if (want_reg)
13752 x = force_reg (Pmode, x);
13753 return x;
13754 }
13755
13756 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13757 is true if we require the result be a register. */
13758
13759 static rtx
13760 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13761 {
13762 if (!TARGET_PECOFF)
13763 return NULL_RTX;
13764
13765 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13766 {
13767 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13768 return legitimize_dllimport_symbol (addr, inreg);
13769 if (GET_CODE (addr) == CONST
13770 && GET_CODE (XEXP (addr, 0)) == PLUS
13771 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13772 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13773 {
13774 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13775 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13776 }
13777 }
13778
13779 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13780 return NULL_RTX;
13781 if (GET_CODE (addr) == SYMBOL_REF
13782 && !is_imported_p (addr)
13783 && SYMBOL_REF_EXTERNAL_P (addr)
13784 && SYMBOL_REF_DECL (addr))
13785 return legitimize_pe_coff_extern_decl (addr, inreg);
13786
13787 if (GET_CODE (addr) == CONST
13788 && GET_CODE (XEXP (addr, 0)) == PLUS
13789 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13790 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13791 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13792 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13793 {
13794 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13795 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13796 }
13797 return NULL_RTX;
13798 }
13799
13800 /* Try machine-dependent ways of modifying an illegitimate address
13801 to be legitimate. If we find one, return the new, valid address.
13802 This macro is used in only one place: `memory_address' in explow.c.
13803
13804 OLDX is the address as it was before break_out_memory_refs was called.
13805 In some cases it is useful to look at this to decide what needs to be done.
13806
13807 It is always safe for this macro to do nothing. It exists to recognize
13808 opportunities to optimize the output.
13809
13810 For the 80386, we handle X+REG by loading X into a register R and
13811 using R+REG. R will go in a general reg and indexing will be used.
13812 However, if REG is a broken-out memory address or multiplication,
13813 nothing needs to be done because REG can certainly go in a general reg.
13814
13815 When -fpic is used, special handling is needed for symbolic references.
13816 See comments by legitimize_pic_address in i386.c for details. */
13817
13818 static rtx
13819 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13820 enum machine_mode mode)
13821 {
13822 int changed = 0;
13823 unsigned log;
13824
13825 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13826 if (log)
13827 return legitimize_tls_address (x, (enum tls_model) log, false);
13828 if (GET_CODE (x) == CONST
13829 && GET_CODE (XEXP (x, 0)) == PLUS
13830 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13831 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13832 {
13833 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13834 (enum tls_model) log, false);
13835 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13836 }
13837
13838 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13839 {
13840 rtx tmp = legitimize_pe_coff_symbol (x, true);
13841 if (tmp)
13842 return tmp;
13843 }
13844
13845 if (flag_pic && SYMBOLIC_CONST (x))
13846 return legitimize_pic_address (x, 0);
13847
13848 #if TARGET_MACHO
13849 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13850 return machopic_indirect_data_reference (x, 0);
13851 #endif
13852
13853 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13854 if (GET_CODE (x) == ASHIFT
13855 && CONST_INT_P (XEXP (x, 1))
13856 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13857 {
13858 changed = 1;
13859 log = INTVAL (XEXP (x, 1));
13860 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13861 GEN_INT (1 << log));
13862 }
13863
13864 if (GET_CODE (x) == PLUS)
13865 {
13866 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13867
13868 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13869 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13870 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13871 {
13872 changed = 1;
13873 log = INTVAL (XEXP (XEXP (x, 0), 1));
13874 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13875 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13876 GEN_INT (1 << log));
13877 }
13878
13879 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13880 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13881 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13882 {
13883 changed = 1;
13884 log = INTVAL (XEXP (XEXP (x, 1), 1));
13885 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13886 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13887 GEN_INT (1 << log));
13888 }
13889
13890 /* Put multiply first if it isn't already. */
13891 if (GET_CODE (XEXP (x, 1)) == MULT)
13892 {
13893 rtx tmp = XEXP (x, 0);
13894 XEXP (x, 0) = XEXP (x, 1);
13895 XEXP (x, 1) = tmp;
13896 changed = 1;
13897 }
13898
13899 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13900 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13901 created by virtual register instantiation, register elimination, and
13902 similar optimizations. */
13903 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13904 {
13905 changed = 1;
13906 x = gen_rtx_PLUS (Pmode,
13907 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13908 XEXP (XEXP (x, 1), 0)),
13909 XEXP (XEXP (x, 1), 1));
13910 }
13911
13912 /* Canonicalize
13913 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13914 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13915 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13916 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13917 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13918 && CONSTANT_P (XEXP (x, 1)))
13919 {
13920 rtx constant;
13921 rtx other = NULL_RTX;
13922
13923 if (CONST_INT_P (XEXP (x, 1)))
13924 {
13925 constant = XEXP (x, 1);
13926 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13927 }
13928 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13929 {
13930 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13931 other = XEXP (x, 1);
13932 }
13933 else
13934 constant = 0;
13935
13936 if (constant)
13937 {
13938 changed = 1;
13939 x = gen_rtx_PLUS (Pmode,
13940 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13941 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13942 plus_constant (Pmode, other,
13943 INTVAL (constant)));
13944 }
13945 }
13946
13947 if (changed && ix86_legitimate_address_p (mode, x, false))
13948 return x;
13949
13950 if (GET_CODE (XEXP (x, 0)) == MULT)
13951 {
13952 changed = 1;
13953 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13954 }
13955
13956 if (GET_CODE (XEXP (x, 1)) == MULT)
13957 {
13958 changed = 1;
13959 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13960 }
13961
13962 if (changed
13963 && REG_P (XEXP (x, 1))
13964 && REG_P (XEXP (x, 0)))
13965 return x;
13966
13967 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13968 {
13969 changed = 1;
13970 x = legitimize_pic_address (x, 0);
13971 }
13972
13973 if (changed && ix86_legitimate_address_p (mode, x, false))
13974 return x;
13975
13976 if (REG_P (XEXP (x, 0)))
13977 {
13978 rtx temp = gen_reg_rtx (Pmode);
13979 rtx val = force_operand (XEXP (x, 1), temp);
13980 if (val != temp)
13981 {
13982 val = convert_to_mode (Pmode, val, 1);
13983 emit_move_insn (temp, val);
13984 }
13985
13986 XEXP (x, 1) = temp;
13987 return x;
13988 }
13989
13990 else if (REG_P (XEXP (x, 1)))
13991 {
13992 rtx temp = gen_reg_rtx (Pmode);
13993 rtx val = force_operand (XEXP (x, 0), temp);
13994 if (val != temp)
13995 {
13996 val = convert_to_mode (Pmode, val, 1);
13997 emit_move_insn (temp, val);
13998 }
13999
14000 XEXP (x, 0) = temp;
14001 return x;
14002 }
14003 }
14004
14005 return x;
14006 }
14007 \f
14008 /* Print an integer constant expression in assembler syntax. Addition
14009 and subtraction are the only arithmetic that may appear in these
14010 expressions. FILE is the stdio stream to write to, X is the rtx, and
14011 CODE is the operand print code from the output string. */
14012
14013 static void
14014 output_pic_addr_const (FILE *file, rtx x, int code)
14015 {
14016 char buf[256];
14017
14018 switch (GET_CODE (x))
14019 {
14020 case PC:
14021 gcc_assert (flag_pic);
14022 putc ('.', file);
14023 break;
14024
14025 case SYMBOL_REF:
14026 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14027 output_addr_const (file, x);
14028 else
14029 {
14030 const char *name = XSTR (x, 0);
14031
14032 /* Mark the decl as referenced so that cgraph will
14033 output the function. */
14034 if (SYMBOL_REF_DECL (x))
14035 mark_decl_referenced (SYMBOL_REF_DECL (x));
14036
14037 #if TARGET_MACHO
14038 if (MACHOPIC_INDIRECT
14039 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14040 name = machopic_indirection_name (x, /*stub_p=*/true);
14041 #endif
14042 assemble_name (file, name);
14043 }
14044 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14045 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14046 fputs ("@PLT", file);
14047 break;
14048
14049 case LABEL_REF:
14050 x = XEXP (x, 0);
14051 /* FALLTHRU */
14052 case CODE_LABEL:
14053 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14054 assemble_name (asm_out_file, buf);
14055 break;
14056
14057 case CONST_INT:
14058 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14059 break;
14060
14061 case CONST:
14062 /* This used to output parentheses around the expression,
14063 but that does not work on the 386 (either ATT or BSD assembler). */
14064 output_pic_addr_const (file, XEXP (x, 0), code);
14065 break;
14066
14067 case CONST_DOUBLE:
14068 if (GET_MODE (x) == VOIDmode)
14069 {
14070 /* We can use %d if the number is <32 bits and positive. */
14071 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14072 fprintf (file, "0x%lx%08lx",
14073 (unsigned long) CONST_DOUBLE_HIGH (x),
14074 (unsigned long) CONST_DOUBLE_LOW (x));
14075 else
14076 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14077 }
14078 else
14079 /* We can't handle floating point constants;
14080 TARGET_PRINT_OPERAND must handle them. */
14081 output_operand_lossage ("floating constant misused");
14082 break;
14083
14084 case PLUS:
14085 /* Some assemblers need integer constants to appear first. */
14086 if (CONST_INT_P (XEXP (x, 0)))
14087 {
14088 output_pic_addr_const (file, XEXP (x, 0), code);
14089 putc ('+', file);
14090 output_pic_addr_const (file, XEXP (x, 1), code);
14091 }
14092 else
14093 {
14094 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14095 output_pic_addr_const (file, XEXP (x, 1), code);
14096 putc ('+', file);
14097 output_pic_addr_const (file, XEXP (x, 0), code);
14098 }
14099 break;
14100
14101 case MINUS:
14102 if (!TARGET_MACHO)
14103 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14104 output_pic_addr_const (file, XEXP (x, 0), code);
14105 putc ('-', file);
14106 output_pic_addr_const (file, XEXP (x, 1), code);
14107 if (!TARGET_MACHO)
14108 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14109 break;
14110
14111 case UNSPEC:
14112 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14113 {
14114 bool f = i386_asm_output_addr_const_extra (file, x);
14115 gcc_assert (f);
14116 break;
14117 }
14118
14119 gcc_assert (XVECLEN (x, 0) == 1);
14120 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14121 switch (XINT (x, 1))
14122 {
14123 case UNSPEC_GOT:
14124 fputs ("@GOT", file);
14125 break;
14126 case UNSPEC_GOTOFF:
14127 fputs ("@GOTOFF", file);
14128 break;
14129 case UNSPEC_PLTOFF:
14130 fputs ("@PLTOFF", file);
14131 break;
14132 case UNSPEC_PCREL:
14133 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14134 "(%rip)" : "[rip]", file);
14135 break;
14136 case UNSPEC_GOTPCREL:
14137 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14138 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14139 break;
14140 case UNSPEC_GOTTPOFF:
14141 /* FIXME: This might be @TPOFF in Sun ld too. */
14142 fputs ("@gottpoff", file);
14143 break;
14144 case UNSPEC_TPOFF:
14145 fputs ("@tpoff", file);
14146 break;
14147 case UNSPEC_NTPOFF:
14148 if (TARGET_64BIT)
14149 fputs ("@tpoff", file);
14150 else
14151 fputs ("@ntpoff", file);
14152 break;
14153 case UNSPEC_DTPOFF:
14154 fputs ("@dtpoff", file);
14155 break;
14156 case UNSPEC_GOTNTPOFF:
14157 if (TARGET_64BIT)
14158 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14159 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14160 else
14161 fputs ("@gotntpoff", file);
14162 break;
14163 case UNSPEC_INDNTPOFF:
14164 fputs ("@indntpoff", file);
14165 break;
14166 #if TARGET_MACHO
14167 case UNSPEC_MACHOPIC_OFFSET:
14168 putc ('-', file);
14169 machopic_output_function_base_name (file);
14170 break;
14171 #endif
14172 default:
14173 output_operand_lossage ("invalid UNSPEC as operand");
14174 break;
14175 }
14176 break;
14177
14178 default:
14179 output_operand_lossage ("invalid expression as operand");
14180 }
14181 }
14182
14183 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14184 We need to emit DTP-relative relocations. */
14185
14186 static void ATTRIBUTE_UNUSED
14187 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14188 {
14189 fputs (ASM_LONG, file);
14190 output_addr_const (file, x);
14191 fputs ("@dtpoff", file);
14192 switch (size)
14193 {
14194 case 4:
14195 break;
14196 case 8:
14197 fputs (", 0", file);
14198 break;
14199 default:
14200 gcc_unreachable ();
14201 }
14202 }
14203
14204 /* Return true if X is a representation of the PIC register. This copes
14205 with calls from ix86_find_base_term, where the register might have
14206 been replaced by a cselib value. */
14207
14208 static bool
14209 ix86_pic_register_p (rtx x)
14210 {
14211 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14212 return (pic_offset_table_rtx
14213 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14214 else
14215 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14216 }
14217
14218 /* Helper function for ix86_delegitimize_address.
14219 Attempt to delegitimize TLS local-exec accesses. */
14220
14221 static rtx
14222 ix86_delegitimize_tls_address (rtx orig_x)
14223 {
14224 rtx x = orig_x, unspec;
14225 struct ix86_address addr;
14226
14227 if (!TARGET_TLS_DIRECT_SEG_REFS)
14228 return orig_x;
14229 if (MEM_P (x))
14230 x = XEXP (x, 0);
14231 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14232 return orig_x;
14233 if (ix86_decompose_address (x, &addr) == 0
14234 || addr.seg != DEFAULT_TLS_SEG_REG
14235 || addr.disp == NULL_RTX
14236 || GET_CODE (addr.disp) != CONST)
14237 return orig_x;
14238 unspec = XEXP (addr.disp, 0);
14239 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14240 unspec = XEXP (unspec, 0);
14241 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14242 return orig_x;
14243 x = XVECEXP (unspec, 0, 0);
14244 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14245 if (unspec != XEXP (addr.disp, 0))
14246 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14247 if (addr.index)
14248 {
14249 rtx idx = addr.index;
14250 if (addr.scale != 1)
14251 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14252 x = gen_rtx_PLUS (Pmode, idx, x);
14253 }
14254 if (addr.base)
14255 x = gen_rtx_PLUS (Pmode, addr.base, x);
14256 if (MEM_P (orig_x))
14257 x = replace_equiv_address_nv (orig_x, x);
14258 return x;
14259 }
14260
14261 /* In the name of slightly smaller debug output, and to cater to
14262 general assembler lossage, recognize PIC+GOTOFF and turn it back
14263 into a direct symbol reference.
14264
14265 On Darwin, this is necessary to avoid a crash, because Darwin
14266 has a different PIC label for each routine but the DWARF debugging
14267 information is not associated with any particular routine, so it's
14268 necessary to remove references to the PIC label from RTL stored by
14269 the DWARF output code. */
14270
14271 static rtx
14272 ix86_delegitimize_address (rtx x)
14273 {
14274 rtx orig_x = delegitimize_mem_from_attrs (x);
14275 /* addend is NULL or some rtx if x is something+GOTOFF where
14276 something doesn't include the PIC register. */
14277 rtx addend = NULL_RTX;
14278 /* reg_addend is NULL or a multiple of some register. */
14279 rtx reg_addend = NULL_RTX;
14280 /* const_addend is NULL or a const_int. */
14281 rtx const_addend = NULL_RTX;
14282 /* This is the result, or NULL. */
14283 rtx result = NULL_RTX;
14284
14285 x = orig_x;
14286
14287 if (MEM_P (x))
14288 x = XEXP (x, 0);
14289
14290 if (TARGET_64BIT)
14291 {
14292 if (GET_CODE (x) == CONST
14293 && GET_CODE (XEXP (x, 0)) == PLUS
14294 && GET_MODE (XEXP (x, 0)) == Pmode
14295 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14296 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14297 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14298 {
14299 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14300 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14301 if (MEM_P (orig_x))
14302 x = replace_equiv_address_nv (orig_x, x);
14303 return x;
14304 }
14305
14306 if (GET_CODE (x) == CONST
14307 && GET_CODE (XEXP (x, 0)) == UNSPEC
14308 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14309 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14310 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14311 {
14312 x = XVECEXP (XEXP (x, 0), 0, 0);
14313 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14314 {
14315 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14316 GET_MODE (x), 0);
14317 if (x == NULL_RTX)
14318 return orig_x;
14319 }
14320 return x;
14321 }
14322
14323 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14324 return ix86_delegitimize_tls_address (orig_x);
14325
14326 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14327 and -mcmodel=medium -fpic. */
14328 }
14329
14330 if (GET_CODE (x) != PLUS
14331 || GET_CODE (XEXP (x, 1)) != CONST)
14332 return ix86_delegitimize_tls_address (orig_x);
14333
14334 if (ix86_pic_register_p (XEXP (x, 0)))
14335 /* %ebx + GOT/GOTOFF */
14336 ;
14337 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14338 {
14339 /* %ebx + %reg * scale + GOT/GOTOFF */
14340 reg_addend = XEXP (x, 0);
14341 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14342 reg_addend = XEXP (reg_addend, 1);
14343 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14344 reg_addend = XEXP (reg_addend, 0);
14345 else
14346 {
14347 reg_addend = NULL_RTX;
14348 addend = XEXP (x, 0);
14349 }
14350 }
14351 else
14352 addend = XEXP (x, 0);
14353
14354 x = XEXP (XEXP (x, 1), 0);
14355 if (GET_CODE (x) == PLUS
14356 && CONST_INT_P (XEXP (x, 1)))
14357 {
14358 const_addend = XEXP (x, 1);
14359 x = XEXP (x, 0);
14360 }
14361
14362 if (GET_CODE (x) == UNSPEC
14363 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14364 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14365 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14366 && !MEM_P (orig_x) && !addend)))
14367 result = XVECEXP (x, 0, 0);
14368
14369 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14370 && !MEM_P (orig_x))
14371 result = XVECEXP (x, 0, 0);
14372
14373 if (! result)
14374 return ix86_delegitimize_tls_address (orig_x);
14375
14376 if (const_addend)
14377 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14378 if (reg_addend)
14379 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14380 if (addend)
14381 {
14382 /* If the rest of original X doesn't involve the PIC register, add
14383 addend and subtract pic_offset_table_rtx. This can happen e.g.
14384 for code like:
14385 leal (%ebx, %ecx, 4), %ecx
14386 ...
14387 movl foo@GOTOFF(%ecx), %edx
14388 in which case we return (%ecx - %ebx) + foo. */
14389 if (pic_offset_table_rtx)
14390 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14391 pic_offset_table_rtx),
14392 result);
14393 else
14394 return orig_x;
14395 }
14396 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14397 {
14398 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14399 if (result == NULL_RTX)
14400 return orig_x;
14401 }
14402 return result;
14403 }
14404
14405 /* If X is a machine specific address (i.e. a symbol or label being
14406 referenced as a displacement from the GOT implemented using an
14407 UNSPEC), then return the base term. Otherwise return X. */
14408
14409 rtx
14410 ix86_find_base_term (rtx x)
14411 {
14412 rtx term;
14413
14414 if (TARGET_64BIT)
14415 {
14416 if (GET_CODE (x) != CONST)
14417 return x;
14418 term = XEXP (x, 0);
14419 if (GET_CODE (term) == PLUS
14420 && (CONST_INT_P (XEXP (term, 1))
14421 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14422 term = XEXP (term, 0);
14423 if (GET_CODE (term) != UNSPEC
14424 || (XINT (term, 1) != UNSPEC_GOTPCREL
14425 && XINT (term, 1) != UNSPEC_PCREL))
14426 return x;
14427
14428 return XVECEXP (term, 0, 0);
14429 }
14430
14431 return ix86_delegitimize_address (x);
14432 }
14433 \f
14434 static void
14435 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14436 bool fp, FILE *file)
14437 {
14438 const char *suffix;
14439
14440 if (mode == CCFPmode || mode == CCFPUmode)
14441 {
14442 code = ix86_fp_compare_code_to_integer (code);
14443 mode = CCmode;
14444 }
14445 if (reverse)
14446 code = reverse_condition (code);
14447
14448 switch (code)
14449 {
14450 case EQ:
14451 switch (mode)
14452 {
14453 case CCAmode:
14454 suffix = "a";
14455 break;
14456
14457 case CCCmode:
14458 suffix = "c";
14459 break;
14460
14461 case CCOmode:
14462 suffix = "o";
14463 break;
14464
14465 case CCSmode:
14466 suffix = "s";
14467 break;
14468
14469 default:
14470 suffix = "e";
14471 }
14472 break;
14473 case NE:
14474 switch (mode)
14475 {
14476 case CCAmode:
14477 suffix = "na";
14478 break;
14479
14480 case CCCmode:
14481 suffix = "nc";
14482 break;
14483
14484 case CCOmode:
14485 suffix = "no";
14486 break;
14487
14488 case CCSmode:
14489 suffix = "ns";
14490 break;
14491
14492 default:
14493 suffix = "ne";
14494 }
14495 break;
14496 case GT:
14497 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14498 suffix = "g";
14499 break;
14500 case GTU:
14501 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14502 Those same assemblers have the same but opposite lossage on cmov. */
14503 if (mode == CCmode)
14504 suffix = fp ? "nbe" : "a";
14505 else
14506 gcc_unreachable ();
14507 break;
14508 case LT:
14509 switch (mode)
14510 {
14511 case CCNOmode:
14512 case CCGOCmode:
14513 suffix = "s";
14514 break;
14515
14516 case CCmode:
14517 case CCGCmode:
14518 suffix = "l";
14519 break;
14520
14521 default:
14522 gcc_unreachable ();
14523 }
14524 break;
14525 case LTU:
14526 if (mode == CCmode)
14527 suffix = "b";
14528 else if (mode == CCCmode)
14529 suffix = "c";
14530 else
14531 gcc_unreachable ();
14532 break;
14533 case GE:
14534 switch (mode)
14535 {
14536 case CCNOmode:
14537 case CCGOCmode:
14538 suffix = "ns";
14539 break;
14540
14541 case CCmode:
14542 case CCGCmode:
14543 suffix = "ge";
14544 break;
14545
14546 default:
14547 gcc_unreachable ();
14548 }
14549 break;
14550 case GEU:
14551 if (mode == CCmode)
14552 suffix = fp ? "nb" : "ae";
14553 else if (mode == CCCmode)
14554 suffix = "nc";
14555 else
14556 gcc_unreachable ();
14557 break;
14558 case LE:
14559 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14560 suffix = "le";
14561 break;
14562 case LEU:
14563 if (mode == CCmode)
14564 suffix = "be";
14565 else
14566 gcc_unreachable ();
14567 break;
14568 case UNORDERED:
14569 suffix = fp ? "u" : "p";
14570 break;
14571 case ORDERED:
14572 suffix = fp ? "nu" : "np";
14573 break;
14574 default:
14575 gcc_unreachable ();
14576 }
14577 fputs (suffix, file);
14578 }
14579
14580 /* Print the name of register X to FILE based on its machine mode and number.
14581 If CODE is 'w', pretend the mode is HImode.
14582 If CODE is 'b', pretend the mode is QImode.
14583 If CODE is 'k', pretend the mode is SImode.
14584 If CODE is 'q', pretend the mode is DImode.
14585 If CODE is 'x', pretend the mode is V4SFmode.
14586 If CODE is 't', pretend the mode is V8SFmode.
14587 If CODE is 'g', pretend the mode is V16SFmode.
14588 If CODE is 'h', pretend the reg is the 'high' byte register.
14589 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14590 If CODE is 'd', duplicate the operand for AVX instruction.
14591 */
14592
14593 void
14594 print_reg (rtx x, int code, FILE *file)
14595 {
14596 const char *reg;
14597 unsigned int regno;
14598 bool duplicated = code == 'd' && TARGET_AVX;
14599
14600 if (ASSEMBLER_DIALECT == ASM_ATT)
14601 putc ('%', file);
14602
14603 if (x == pc_rtx)
14604 {
14605 gcc_assert (TARGET_64BIT);
14606 fputs ("rip", file);
14607 return;
14608 }
14609
14610 regno = true_regnum (x);
14611 gcc_assert (regno != ARG_POINTER_REGNUM
14612 && regno != FRAME_POINTER_REGNUM
14613 && regno != FLAGS_REG
14614 && regno != FPSR_REG
14615 && regno != FPCR_REG);
14616
14617 if (code == 'w' || MMX_REG_P (x))
14618 code = 2;
14619 else if (code == 'b')
14620 code = 1;
14621 else if (code == 'k')
14622 code = 4;
14623 else if (code == 'q')
14624 code = 8;
14625 else if (code == 'y')
14626 code = 3;
14627 else if (code == 'h')
14628 code = 0;
14629 else if (code == 'x')
14630 code = 16;
14631 else if (code == 't')
14632 code = 32;
14633 else if (code == 'g')
14634 code = 64;
14635 else
14636 code = GET_MODE_SIZE (GET_MODE (x));
14637
14638 /* Irritatingly, AMD extended registers use different naming convention
14639 from the normal registers: "r%d[bwd]" */
14640 if (REX_INT_REGNO_P (regno))
14641 {
14642 gcc_assert (TARGET_64BIT);
14643 putc ('r', file);
14644 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14645 switch (code)
14646 {
14647 case 0:
14648 error ("extended registers have no high halves");
14649 break;
14650 case 1:
14651 putc ('b', file);
14652 break;
14653 case 2:
14654 putc ('w', file);
14655 break;
14656 case 4:
14657 putc ('d', file);
14658 break;
14659 case 8:
14660 /* no suffix */
14661 break;
14662 default:
14663 error ("unsupported operand size for extended register");
14664 break;
14665 }
14666 return;
14667 }
14668
14669 reg = NULL;
14670 switch (code)
14671 {
14672 case 3:
14673 if (STACK_TOP_P (x))
14674 {
14675 reg = "st(0)";
14676 break;
14677 }
14678 /* FALLTHRU */
14679 case 8:
14680 case 4:
14681 case 12:
14682 if (! ANY_FP_REG_P (x))
14683 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14684 /* FALLTHRU */
14685 case 16:
14686 case 2:
14687 normal:
14688 reg = hi_reg_name[regno];
14689 break;
14690 case 1:
14691 if (regno >= ARRAY_SIZE (qi_reg_name))
14692 goto normal;
14693 reg = qi_reg_name[regno];
14694 break;
14695 case 0:
14696 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14697 goto normal;
14698 reg = qi_high_reg_name[regno];
14699 break;
14700 case 32:
14701 if (SSE_REG_P (x))
14702 {
14703 gcc_assert (!duplicated);
14704 putc ('y', file);
14705 fputs (hi_reg_name[regno] + 1, file);
14706 return;
14707 }
14708 case 64:
14709 if (SSE_REG_P (x))
14710 {
14711 gcc_assert (!duplicated);
14712 putc ('z', file);
14713 fputs (hi_reg_name[REGNO (x)] + 1, file);
14714 return;
14715 }
14716 break;
14717 default:
14718 gcc_unreachable ();
14719 }
14720
14721 fputs (reg, file);
14722 if (duplicated)
14723 {
14724 if (ASSEMBLER_DIALECT == ASM_ATT)
14725 fprintf (file, ", %%%s", reg);
14726 else
14727 fprintf (file, ", %s", reg);
14728 }
14729 }
14730
14731 /* Locate some local-dynamic symbol still in use by this function
14732 so that we can print its name in some tls_local_dynamic_base
14733 pattern. */
14734
14735 static int
14736 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14737 {
14738 rtx x = *px;
14739
14740 if (GET_CODE (x) == SYMBOL_REF
14741 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14742 {
14743 cfun->machine->some_ld_name = XSTR (x, 0);
14744 return 1;
14745 }
14746
14747 return 0;
14748 }
14749
14750 static const char *
14751 get_some_local_dynamic_name (void)
14752 {
14753 rtx insn;
14754
14755 if (cfun->machine->some_ld_name)
14756 return cfun->machine->some_ld_name;
14757
14758 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14759 if (NONDEBUG_INSN_P (insn)
14760 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14761 return cfun->machine->some_ld_name;
14762
14763 return NULL;
14764 }
14765
14766 /* Meaning of CODE:
14767 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14768 C -- print opcode suffix for set/cmov insn.
14769 c -- like C, but print reversed condition
14770 F,f -- likewise, but for floating-point.
14771 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14772 otherwise nothing
14773 R -- print embeded rounding and sae.
14774 r -- print only sae.
14775 z -- print the opcode suffix for the size of the current operand.
14776 Z -- likewise, with special suffixes for x87 instructions.
14777 * -- print a star (in certain assembler syntax)
14778 A -- print an absolute memory reference.
14779 E -- print address with DImode register names if TARGET_64BIT.
14780 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14781 s -- print a shift double count, followed by the assemblers argument
14782 delimiter.
14783 b -- print the QImode name of the register for the indicated operand.
14784 %b0 would print %al if operands[0] is reg 0.
14785 w -- likewise, print the HImode name of the register.
14786 k -- likewise, print the SImode name of the register.
14787 q -- likewise, print the DImode name of the register.
14788 x -- likewise, print the V4SFmode name of the register.
14789 t -- likewise, print the V8SFmode name of the register.
14790 g -- likewise, print the V16SFmode name of the register.
14791 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14792 y -- print "st(0)" instead of "st" as a register.
14793 d -- print duplicated register operand for AVX instruction.
14794 D -- print condition for SSE cmp instruction.
14795 P -- if PIC, print an @PLT suffix.
14796 p -- print raw symbol name.
14797 X -- don't print any sort of PIC '@' suffix for a symbol.
14798 & -- print some in-use local-dynamic symbol name.
14799 H -- print a memory address offset by 8; used for sse high-parts
14800 Y -- print condition for XOP pcom* instruction.
14801 + -- print a branch hint as 'cs' or 'ds' prefix
14802 ; -- print a semicolon (after prefixes due to bug in older gas).
14803 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14804 @ -- print a segment register of thread base pointer load
14805 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14806 */
14807
14808 void
14809 ix86_print_operand (FILE *file, rtx x, int code)
14810 {
14811 if (code)
14812 {
14813 switch (code)
14814 {
14815 case 'A':
14816 switch (ASSEMBLER_DIALECT)
14817 {
14818 case ASM_ATT:
14819 putc ('*', file);
14820 break;
14821
14822 case ASM_INTEL:
14823 /* Intel syntax. For absolute addresses, registers should not
14824 be surrounded by braces. */
14825 if (!REG_P (x))
14826 {
14827 putc ('[', file);
14828 ix86_print_operand (file, x, 0);
14829 putc (']', file);
14830 return;
14831 }
14832 break;
14833
14834 default:
14835 gcc_unreachable ();
14836 }
14837
14838 ix86_print_operand (file, x, 0);
14839 return;
14840
14841 case 'E':
14842 /* Wrap address in an UNSPEC to declare special handling. */
14843 if (TARGET_64BIT)
14844 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14845
14846 output_address (x);
14847 return;
14848
14849 case 'L':
14850 if (ASSEMBLER_DIALECT == ASM_ATT)
14851 putc ('l', file);
14852 return;
14853
14854 case 'W':
14855 if (ASSEMBLER_DIALECT == ASM_ATT)
14856 putc ('w', file);
14857 return;
14858
14859 case 'B':
14860 if (ASSEMBLER_DIALECT == ASM_ATT)
14861 putc ('b', file);
14862 return;
14863
14864 case 'Q':
14865 if (ASSEMBLER_DIALECT == ASM_ATT)
14866 putc ('l', file);
14867 return;
14868
14869 case 'S':
14870 if (ASSEMBLER_DIALECT == ASM_ATT)
14871 putc ('s', file);
14872 return;
14873
14874 case 'T':
14875 if (ASSEMBLER_DIALECT == ASM_ATT)
14876 putc ('t', file);
14877 return;
14878
14879 case 'O':
14880 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14881 if (ASSEMBLER_DIALECT != ASM_ATT)
14882 return;
14883
14884 switch (GET_MODE_SIZE (GET_MODE (x)))
14885 {
14886 case 2:
14887 putc ('w', file);
14888 break;
14889
14890 case 4:
14891 putc ('l', file);
14892 break;
14893
14894 case 8:
14895 putc ('q', file);
14896 break;
14897
14898 default:
14899 output_operand_lossage
14900 ("invalid operand size for operand code 'O'");
14901 return;
14902 }
14903
14904 putc ('.', file);
14905 #endif
14906 return;
14907
14908 case 'z':
14909 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14910 {
14911 /* Opcodes don't get size suffixes if using Intel opcodes. */
14912 if (ASSEMBLER_DIALECT == ASM_INTEL)
14913 return;
14914
14915 switch (GET_MODE_SIZE (GET_MODE (x)))
14916 {
14917 case 1:
14918 putc ('b', file);
14919 return;
14920
14921 case 2:
14922 putc ('w', file);
14923 return;
14924
14925 case 4:
14926 putc ('l', file);
14927 return;
14928
14929 case 8:
14930 putc ('q', file);
14931 return;
14932
14933 default:
14934 output_operand_lossage
14935 ("invalid operand size for operand code 'z'");
14936 return;
14937 }
14938 }
14939
14940 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14941 warning
14942 (0, "non-integer operand used with operand code 'z'");
14943 /* FALLTHRU */
14944
14945 case 'Z':
14946 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14947 if (ASSEMBLER_DIALECT == ASM_INTEL)
14948 return;
14949
14950 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14951 {
14952 switch (GET_MODE_SIZE (GET_MODE (x)))
14953 {
14954 case 2:
14955 #ifdef HAVE_AS_IX86_FILDS
14956 putc ('s', file);
14957 #endif
14958 return;
14959
14960 case 4:
14961 putc ('l', file);
14962 return;
14963
14964 case 8:
14965 #ifdef HAVE_AS_IX86_FILDQ
14966 putc ('q', file);
14967 #else
14968 fputs ("ll", file);
14969 #endif
14970 return;
14971
14972 default:
14973 break;
14974 }
14975 }
14976 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14977 {
14978 /* 387 opcodes don't get size suffixes
14979 if the operands are registers. */
14980 if (STACK_REG_P (x))
14981 return;
14982
14983 switch (GET_MODE_SIZE (GET_MODE (x)))
14984 {
14985 case 4:
14986 putc ('s', file);
14987 return;
14988
14989 case 8:
14990 putc ('l', file);
14991 return;
14992
14993 case 12:
14994 case 16:
14995 putc ('t', file);
14996 return;
14997
14998 default:
14999 break;
15000 }
15001 }
15002 else
15003 {
15004 output_operand_lossage
15005 ("invalid operand type used with operand code 'Z'");
15006 return;
15007 }
15008
15009 output_operand_lossage
15010 ("invalid operand size for operand code 'Z'");
15011 return;
15012
15013 case 'd':
15014 case 'b':
15015 case 'w':
15016 case 'k':
15017 case 'q':
15018 case 'h':
15019 case 't':
15020 case 'g':
15021 case 'y':
15022 case 'x':
15023 case 'X':
15024 case 'P':
15025 case 'p':
15026 break;
15027
15028 case 's':
15029 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15030 {
15031 ix86_print_operand (file, x, 0);
15032 fputs (", ", file);
15033 }
15034 return;
15035
15036 case 'Y':
15037 switch (GET_CODE (x))
15038 {
15039 case NE:
15040 fputs ("neq", file);
15041 break;
15042 case EQ:
15043 fputs ("eq", file);
15044 break;
15045 case GE:
15046 case GEU:
15047 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15048 break;
15049 case GT:
15050 case GTU:
15051 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15052 break;
15053 case LE:
15054 case LEU:
15055 fputs ("le", file);
15056 break;
15057 case LT:
15058 case LTU:
15059 fputs ("lt", file);
15060 break;
15061 case UNORDERED:
15062 fputs ("unord", file);
15063 break;
15064 case ORDERED:
15065 fputs ("ord", file);
15066 break;
15067 case UNEQ:
15068 fputs ("ueq", file);
15069 break;
15070 case UNGE:
15071 fputs ("nlt", file);
15072 break;
15073 case UNGT:
15074 fputs ("nle", file);
15075 break;
15076 case UNLE:
15077 fputs ("ule", file);
15078 break;
15079 case UNLT:
15080 fputs ("ult", file);
15081 break;
15082 case LTGT:
15083 fputs ("une", file);
15084 break;
15085 default:
15086 output_operand_lossage ("operand is not a condition code, "
15087 "invalid operand code 'Y'");
15088 return;
15089 }
15090 return;
15091
15092 case 'D':
15093 /* Little bit of braindamage here. The SSE compare instructions
15094 does use completely different names for the comparisons that the
15095 fp conditional moves. */
15096 switch (GET_CODE (x))
15097 {
15098 case UNEQ:
15099 if (TARGET_AVX)
15100 {
15101 fputs ("eq_us", file);
15102 break;
15103 }
15104 case EQ:
15105 fputs ("eq", file);
15106 break;
15107 case UNLT:
15108 if (TARGET_AVX)
15109 {
15110 fputs ("nge", file);
15111 break;
15112 }
15113 case LT:
15114 fputs ("lt", file);
15115 break;
15116 case UNLE:
15117 if (TARGET_AVX)
15118 {
15119 fputs ("ngt", file);
15120 break;
15121 }
15122 case LE:
15123 fputs ("le", file);
15124 break;
15125 case UNORDERED:
15126 fputs ("unord", file);
15127 break;
15128 case LTGT:
15129 if (TARGET_AVX)
15130 {
15131 fputs ("neq_oq", file);
15132 break;
15133 }
15134 case NE:
15135 fputs ("neq", file);
15136 break;
15137 case GE:
15138 if (TARGET_AVX)
15139 {
15140 fputs ("ge", file);
15141 break;
15142 }
15143 case UNGE:
15144 fputs ("nlt", file);
15145 break;
15146 case GT:
15147 if (TARGET_AVX)
15148 {
15149 fputs ("gt", file);
15150 break;
15151 }
15152 case UNGT:
15153 fputs ("nle", file);
15154 break;
15155 case ORDERED:
15156 fputs ("ord", file);
15157 break;
15158 default:
15159 output_operand_lossage ("operand is not a condition code, "
15160 "invalid operand code 'D'");
15161 return;
15162 }
15163 return;
15164
15165 case 'F':
15166 case 'f':
15167 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15168 if (ASSEMBLER_DIALECT == ASM_ATT)
15169 putc ('.', file);
15170 #endif
15171
15172 case 'C':
15173 case 'c':
15174 if (!COMPARISON_P (x))
15175 {
15176 output_operand_lossage ("operand is not a condition code, "
15177 "invalid operand code '%c'", code);
15178 return;
15179 }
15180 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15181 code == 'c' || code == 'f',
15182 code == 'F' || code == 'f',
15183 file);
15184 return;
15185
15186 case 'H':
15187 if (!offsettable_memref_p (x))
15188 {
15189 output_operand_lossage ("operand is not an offsettable memory "
15190 "reference, invalid operand code 'H'");
15191 return;
15192 }
15193 /* It doesn't actually matter what mode we use here, as we're
15194 only going to use this for printing. */
15195 x = adjust_address_nv (x, DImode, 8);
15196 /* Output 'qword ptr' for intel assembler dialect. */
15197 if (ASSEMBLER_DIALECT == ASM_INTEL)
15198 code = 'q';
15199 break;
15200
15201 case 'K':
15202 gcc_assert (CONST_INT_P (x));
15203
15204 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15205 #ifdef HAVE_AS_IX86_HLE
15206 fputs ("xacquire ", file);
15207 #else
15208 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15209 #endif
15210 else if (INTVAL (x) & IX86_HLE_RELEASE)
15211 #ifdef HAVE_AS_IX86_HLE
15212 fputs ("xrelease ", file);
15213 #else
15214 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15215 #endif
15216 /* We do not want to print value of the operand. */
15217 return;
15218
15219 case 'N':
15220 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15221 fputs ("{z}", file);
15222 return;
15223
15224 case 'r':
15225 gcc_assert (CONST_INT_P (x));
15226 gcc_assert (INTVAL (x) == ROUND_SAE);
15227
15228 if (ASSEMBLER_DIALECT == ASM_INTEL)
15229 fputs (", ", file);
15230
15231 fputs ("{sae}", file);
15232
15233 if (ASSEMBLER_DIALECT == ASM_ATT)
15234 fputs (", ", file);
15235
15236 return;
15237
15238 case 'R':
15239 gcc_assert (CONST_INT_P (x));
15240
15241 if (ASSEMBLER_DIALECT == ASM_INTEL)
15242 fputs (", ", file);
15243
15244 switch (INTVAL (x))
15245 {
15246 case ROUND_NEAREST_INT | ROUND_SAE:
15247 fputs ("{rn-sae}", file);
15248 break;
15249 case ROUND_NEG_INF | ROUND_SAE:
15250 fputs ("{rd-sae}", file);
15251 break;
15252 case ROUND_POS_INF | ROUND_SAE:
15253 fputs ("{ru-sae}", file);
15254 break;
15255 case ROUND_ZERO | ROUND_SAE:
15256 fputs ("{rz-sae}", file);
15257 break;
15258 default:
15259 gcc_unreachable ();
15260 }
15261
15262 if (ASSEMBLER_DIALECT == ASM_ATT)
15263 fputs (", ", file);
15264
15265 return;
15266
15267 case '*':
15268 if (ASSEMBLER_DIALECT == ASM_ATT)
15269 putc ('*', file);
15270 return;
15271
15272 case '&':
15273 {
15274 const char *name = get_some_local_dynamic_name ();
15275 if (name == NULL)
15276 output_operand_lossage ("'%%&' used without any "
15277 "local dynamic TLS references");
15278 else
15279 assemble_name (file, name);
15280 return;
15281 }
15282
15283 case '+':
15284 {
15285 rtx x;
15286
15287 if (!optimize
15288 || optimize_function_for_size_p (cfun)
15289 || !TARGET_BRANCH_PREDICTION_HINTS)
15290 return;
15291
15292 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15293 if (x)
15294 {
15295 int pred_val = XINT (x, 0);
15296
15297 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15298 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15299 {
15300 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15301 bool cputaken
15302 = final_forward_branch_p (current_output_insn) == 0;
15303
15304 /* Emit hints only in the case default branch prediction
15305 heuristics would fail. */
15306 if (taken != cputaken)
15307 {
15308 /* We use 3e (DS) prefix for taken branches and
15309 2e (CS) prefix for not taken branches. */
15310 if (taken)
15311 fputs ("ds ; ", file);
15312 else
15313 fputs ("cs ; ", file);
15314 }
15315 }
15316 }
15317 return;
15318 }
15319
15320 case ';':
15321 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15322 putc (';', file);
15323 #endif
15324 return;
15325
15326 case '@':
15327 if (ASSEMBLER_DIALECT == ASM_ATT)
15328 putc ('%', file);
15329
15330 /* The kernel uses a different segment register for performance
15331 reasons; a system call would not have to trash the userspace
15332 segment register, which would be expensive. */
15333 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15334 fputs ("fs", file);
15335 else
15336 fputs ("gs", file);
15337 return;
15338
15339 case '~':
15340 putc (TARGET_AVX2 ? 'i' : 'f', file);
15341 return;
15342
15343 case '^':
15344 if (TARGET_64BIT && Pmode != word_mode)
15345 fputs ("addr32 ", file);
15346 return;
15347
15348 default:
15349 output_operand_lossage ("invalid operand code '%c'", code);
15350 }
15351 }
15352
15353 if (REG_P (x))
15354 print_reg (x, code, file);
15355
15356 else if (MEM_P (x))
15357 {
15358 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15359 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15360 && GET_MODE (x) != BLKmode)
15361 {
15362 const char * size;
15363 switch (GET_MODE_SIZE (GET_MODE (x)))
15364 {
15365 case 1: size = "BYTE"; break;
15366 case 2: size = "WORD"; break;
15367 case 4: size = "DWORD"; break;
15368 case 8: size = "QWORD"; break;
15369 case 12: size = "TBYTE"; break;
15370 case 16:
15371 if (GET_MODE (x) == XFmode)
15372 size = "TBYTE";
15373 else
15374 size = "XMMWORD";
15375 break;
15376 case 32: size = "YMMWORD"; break;
15377 case 64: size = "ZMMWORD"; break;
15378 default:
15379 gcc_unreachable ();
15380 }
15381
15382 /* Check for explicit size override (codes 'b', 'w', 'k',
15383 'q' and 'x') */
15384 if (code == 'b')
15385 size = "BYTE";
15386 else if (code == 'w')
15387 size = "WORD";
15388 else if (code == 'k')
15389 size = "DWORD";
15390 else if (code == 'q')
15391 size = "QWORD";
15392 else if (code == 'x')
15393 size = "XMMWORD";
15394
15395 fputs (size, file);
15396 fputs (" PTR ", file);
15397 }
15398
15399 x = XEXP (x, 0);
15400 /* Avoid (%rip) for call operands. */
15401 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15402 && !CONST_INT_P (x))
15403 output_addr_const (file, x);
15404 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15405 output_operand_lossage ("invalid constraints for operand");
15406 else
15407 output_address (x);
15408 }
15409
15410 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15411 {
15412 REAL_VALUE_TYPE r;
15413 long l;
15414
15415 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15416 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15417
15418 if (ASSEMBLER_DIALECT == ASM_ATT)
15419 putc ('$', file);
15420 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15421 if (code == 'q')
15422 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15423 (unsigned long long) (int) l);
15424 else
15425 fprintf (file, "0x%08x", (unsigned int) l);
15426 }
15427
15428 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15429 {
15430 REAL_VALUE_TYPE r;
15431 long l[2];
15432
15433 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15434 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15435
15436 if (ASSEMBLER_DIALECT == ASM_ATT)
15437 putc ('$', file);
15438 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15439 }
15440
15441 /* These float cases don't actually occur as immediate operands. */
15442 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15443 {
15444 char dstr[30];
15445
15446 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15447 fputs (dstr, file);
15448 }
15449
15450 else
15451 {
15452 /* We have patterns that allow zero sets of memory, for instance.
15453 In 64-bit mode, we should probably support all 8-byte vectors,
15454 since we can in fact encode that into an immediate. */
15455 if (GET_CODE (x) == CONST_VECTOR)
15456 {
15457 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15458 x = const0_rtx;
15459 }
15460
15461 if (code != 'P' && code != 'p')
15462 {
15463 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15464 {
15465 if (ASSEMBLER_DIALECT == ASM_ATT)
15466 putc ('$', file);
15467 }
15468 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15469 || GET_CODE (x) == LABEL_REF)
15470 {
15471 if (ASSEMBLER_DIALECT == ASM_ATT)
15472 putc ('$', file);
15473 else
15474 fputs ("OFFSET FLAT:", file);
15475 }
15476 }
15477 if (CONST_INT_P (x))
15478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15479 else if (flag_pic || MACHOPIC_INDIRECT)
15480 output_pic_addr_const (file, x, code);
15481 else
15482 output_addr_const (file, x);
15483 }
15484 }
15485
15486 static bool
15487 ix86_print_operand_punct_valid_p (unsigned char code)
15488 {
15489 return (code == '@' || code == '*' || code == '+' || code == '&'
15490 || code == ';' || code == '~' || code == '^');
15491 }
15492 \f
15493 /* Print a memory operand whose address is ADDR. */
15494
15495 static void
15496 ix86_print_operand_address (FILE *file, rtx addr)
15497 {
15498 struct ix86_address parts;
15499 rtx base, index, disp;
15500 int scale;
15501 int ok;
15502 bool vsib = false;
15503 int code = 0;
15504
15505 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15506 {
15507 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15508 gcc_assert (parts.index == NULL_RTX);
15509 parts.index = XVECEXP (addr, 0, 1);
15510 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15511 addr = XVECEXP (addr, 0, 0);
15512 vsib = true;
15513 }
15514 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15515 {
15516 gcc_assert (TARGET_64BIT);
15517 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15518 code = 'q';
15519 }
15520 else
15521 ok = ix86_decompose_address (addr, &parts);
15522
15523 gcc_assert (ok);
15524
15525 base = parts.base;
15526 index = parts.index;
15527 disp = parts.disp;
15528 scale = parts.scale;
15529
15530 switch (parts.seg)
15531 {
15532 case SEG_DEFAULT:
15533 break;
15534 case SEG_FS:
15535 case SEG_GS:
15536 if (ASSEMBLER_DIALECT == ASM_ATT)
15537 putc ('%', file);
15538 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15539 break;
15540 default:
15541 gcc_unreachable ();
15542 }
15543
15544 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15545 if (TARGET_64BIT && !base && !index)
15546 {
15547 rtx symbol = disp;
15548
15549 if (GET_CODE (disp) == CONST
15550 && GET_CODE (XEXP (disp, 0)) == PLUS
15551 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15552 symbol = XEXP (XEXP (disp, 0), 0);
15553
15554 if (GET_CODE (symbol) == LABEL_REF
15555 || (GET_CODE (symbol) == SYMBOL_REF
15556 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15557 base = pc_rtx;
15558 }
15559 if (!base && !index)
15560 {
15561 /* Displacement only requires special attention. */
15562
15563 if (CONST_INT_P (disp))
15564 {
15565 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15566 fputs ("ds:", file);
15567 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15568 }
15569 else if (flag_pic)
15570 output_pic_addr_const (file, disp, 0);
15571 else
15572 output_addr_const (file, disp);
15573 }
15574 else
15575 {
15576 /* Print SImode register names to force addr32 prefix. */
15577 if (SImode_address_operand (addr, VOIDmode))
15578 {
15579 #ifdef ENABLE_CHECKING
15580 gcc_assert (TARGET_64BIT);
15581 switch (GET_CODE (addr))
15582 {
15583 case SUBREG:
15584 gcc_assert (GET_MODE (addr) == SImode);
15585 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15586 break;
15587 case ZERO_EXTEND:
15588 case AND:
15589 gcc_assert (GET_MODE (addr) == DImode);
15590 break;
15591 default:
15592 gcc_unreachable ();
15593 }
15594 #endif
15595 gcc_assert (!code);
15596 code = 'k';
15597 }
15598 else if (code == 0
15599 && TARGET_X32
15600 && disp
15601 && CONST_INT_P (disp)
15602 && INTVAL (disp) < -16*1024*1024)
15603 {
15604 /* X32 runs in 64-bit mode, where displacement, DISP, in
15605 address DISP(%r64), is encoded as 32-bit immediate sign-
15606 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15607 address is %r64 + 0xffffffffbffffd00. When %r64 <
15608 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15609 which is invalid for x32. The correct address is %r64
15610 - 0x40000300 == 0xf7ffdd64. To properly encode
15611 -0x40000300(%r64) for x32, we zero-extend negative
15612 displacement by forcing addr32 prefix which truncates
15613 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15614 zero-extend all negative displacements, including -1(%rsp).
15615 However, for small negative displacements, sign-extension
15616 won't cause overflow. We only zero-extend negative
15617 displacements if they < -16*1024*1024, which is also used
15618 to check legitimate address displacements for PIC. */
15619 code = 'k';
15620 }
15621
15622 if (ASSEMBLER_DIALECT == ASM_ATT)
15623 {
15624 if (disp)
15625 {
15626 if (flag_pic)
15627 output_pic_addr_const (file, disp, 0);
15628 else if (GET_CODE (disp) == LABEL_REF)
15629 output_asm_label (disp);
15630 else
15631 output_addr_const (file, disp);
15632 }
15633
15634 putc ('(', file);
15635 if (base)
15636 print_reg (base, code, file);
15637 if (index)
15638 {
15639 putc (',', file);
15640 print_reg (index, vsib ? 0 : code, file);
15641 if (scale != 1 || vsib)
15642 fprintf (file, ",%d", scale);
15643 }
15644 putc (')', file);
15645 }
15646 else
15647 {
15648 rtx offset = NULL_RTX;
15649
15650 if (disp)
15651 {
15652 /* Pull out the offset of a symbol; print any symbol itself. */
15653 if (GET_CODE (disp) == CONST
15654 && GET_CODE (XEXP (disp, 0)) == PLUS
15655 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15656 {
15657 offset = XEXP (XEXP (disp, 0), 1);
15658 disp = gen_rtx_CONST (VOIDmode,
15659 XEXP (XEXP (disp, 0), 0));
15660 }
15661
15662 if (flag_pic)
15663 output_pic_addr_const (file, disp, 0);
15664 else if (GET_CODE (disp) == LABEL_REF)
15665 output_asm_label (disp);
15666 else if (CONST_INT_P (disp))
15667 offset = disp;
15668 else
15669 output_addr_const (file, disp);
15670 }
15671
15672 putc ('[', file);
15673 if (base)
15674 {
15675 print_reg (base, code, file);
15676 if (offset)
15677 {
15678 if (INTVAL (offset) >= 0)
15679 putc ('+', file);
15680 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15681 }
15682 }
15683 else if (offset)
15684 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15685 else
15686 putc ('0', file);
15687
15688 if (index)
15689 {
15690 putc ('+', file);
15691 print_reg (index, vsib ? 0 : code, file);
15692 if (scale != 1 || vsib)
15693 fprintf (file, "*%d", scale);
15694 }
15695 putc (']', file);
15696 }
15697 }
15698 }
15699
15700 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15701
15702 static bool
15703 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15704 {
15705 rtx op;
15706
15707 if (GET_CODE (x) != UNSPEC)
15708 return false;
15709
15710 op = XVECEXP (x, 0, 0);
15711 switch (XINT (x, 1))
15712 {
15713 case UNSPEC_GOTTPOFF:
15714 output_addr_const (file, op);
15715 /* FIXME: This might be @TPOFF in Sun ld. */
15716 fputs ("@gottpoff", file);
15717 break;
15718 case UNSPEC_TPOFF:
15719 output_addr_const (file, op);
15720 fputs ("@tpoff", file);
15721 break;
15722 case UNSPEC_NTPOFF:
15723 output_addr_const (file, op);
15724 if (TARGET_64BIT)
15725 fputs ("@tpoff", file);
15726 else
15727 fputs ("@ntpoff", file);
15728 break;
15729 case UNSPEC_DTPOFF:
15730 output_addr_const (file, op);
15731 fputs ("@dtpoff", file);
15732 break;
15733 case UNSPEC_GOTNTPOFF:
15734 output_addr_const (file, op);
15735 if (TARGET_64BIT)
15736 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15737 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15738 else
15739 fputs ("@gotntpoff", file);
15740 break;
15741 case UNSPEC_INDNTPOFF:
15742 output_addr_const (file, op);
15743 fputs ("@indntpoff", file);
15744 break;
15745 #if TARGET_MACHO
15746 case UNSPEC_MACHOPIC_OFFSET:
15747 output_addr_const (file, op);
15748 putc ('-', file);
15749 machopic_output_function_base_name (file);
15750 break;
15751 #endif
15752
15753 case UNSPEC_STACK_CHECK:
15754 {
15755 int offset;
15756
15757 gcc_assert (flag_split_stack);
15758
15759 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15760 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15761 #else
15762 gcc_unreachable ();
15763 #endif
15764
15765 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15766 }
15767 break;
15768
15769 default:
15770 return false;
15771 }
15772
15773 return true;
15774 }
15775 \f
15776 /* Split one or more double-mode RTL references into pairs of half-mode
15777 references. The RTL can be REG, offsettable MEM, integer constant, or
15778 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15779 split and "num" is its length. lo_half and hi_half are output arrays
15780 that parallel "operands". */
15781
15782 void
15783 split_double_mode (enum machine_mode mode, rtx operands[],
15784 int num, rtx lo_half[], rtx hi_half[])
15785 {
15786 enum machine_mode half_mode;
15787 unsigned int byte;
15788
15789 switch (mode)
15790 {
15791 case TImode:
15792 half_mode = DImode;
15793 break;
15794 case DImode:
15795 half_mode = SImode;
15796 break;
15797 default:
15798 gcc_unreachable ();
15799 }
15800
15801 byte = GET_MODE_SIZE (half_mode);
15802
15803 while (num--)
15804 {
15805 rtx op = operands[num];
15806
15807 /* simplify_subreg refuse to split volatile memory addresses,
15808 but we still have to handle it. */
15809 if (MEM_P (op))
15810 {
15811 lo_half[num] = adjust_address (op, half_mode, 0);
15812 hi_half[num] = adjust_address (op, half_mode, byte);
15813 }
15814 else
15815 {
15816 lo_half[num] = simplify_gen_subreg (half_mode, op,
15817 GET_MODE (op) == VOIDmode
15818 ? mode : GET_MODE (op), 0);
15819 hi_half[num] = simplify_gen_subreg (half_mode, op,
15820 GET_MODE (op) == VOIDmode
15821 ? mode : GET_MODE (op), byte);
15822 }
15823 }
15824 }
15825 \f
15826 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15827 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15828 is the expression of the binary operation. The output may either be
15829 emitted here, or returned to the caller, like all output_* functions.
15830
15831 There is no guarantee that the operands are the same mode, as they
15832 might be within FLOAT or FLOAT_EXTEND expressions. */
15833
15834 #ifndef SYSV386_COMPAT
15835 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15836 wants to fix the assemblers because that causes incompatibility
15837 with gcc. No-one wants to fix gcc because that causes
15838 incompatibility with assemblers... You can use the option of
15839 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15840 #define SYSV386_COMPAT 1
15841 #endif
15842
15843 const char *
15844 output_387_binary_op (rtx insn, rtx *operands)
15845 {
15846 static char buf[40];
15847 const char *p;
15848 const char *ssep;
15849 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15850
15851 #ifdef ENABLE_CHECKING
15852 /* Even if we do not want to check the inputs, this documents input
15853 constraints. Which helps in understanding the following code. */
15854 if (STACK_REG_P (operands[0])
15855 && ((REG_P (operands[1])
15856 && REGNO (operands[0]) == REGNO (operands[1])
15857 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15858 || (REG_P (operands[2])
15859 && REGNO (operands[0]) == REGNO (operands[2])
15860 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15861 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15862 ; /* ok */
15863 else
15864 gcc_assert (is_sse);
15865 #endif
15866
15867 switch (GET_CODE (operands[3]))
15868 {
15869 case PLUS:
15870 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15871 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15872 p = "fiadd";
15873 else
15874 p = "fadd";
15875 ssep = "vadd";
15876 break;
15877
15878 case MINUS:
15879 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15880 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15881 p = "fisub";
15882 else
15883 p = "fsub";
15884 ssep = "vsub";
15885 break;
15886
15887 case MULT:
15888 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15889 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15890 p = "fimul";
15891 else
15892 p = "fmul";
15893 ssep = "vmul";
15894 break;
15895
15896 case DIV:
15897 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15898 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15899 p = "fidiv";
15900 else
15901 p = "fdiv";
15902 ssep = "vdiv";
15903 break;
15904
15905 default:
15906 gcc_unreachable ();
15907 }
15908
15909 if (is_sse)
15910 {
15911 if (TARGET_AVX)
15912 {
15913 strcpy (buf, ssep);
15914 if (GET_MODE (operands[0]) == SFmode)
15915 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15916 else
15917 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15918 }
15919 else
15920 {
15921 strcpy (buf, ssep + 1);
15922 if (GET_MODE (operands[0]) == SFmode)
15923 strcat (buf, "ss\t{%2, %0|%0, %2}");
15924 else
15925 strcat (buf, "sd\t{%2, %0|%0, %2}");
15926 }
15927 return buf;
15928 }
15929 strcpy (buf, p);
15930
15931 switch (GET_CODE (operands[3]))
15932 {
15933 case MULT:
15934 case PLUS:
15935 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15936 {
15937 rtx temp = operands[2];
15938 operands[2] = operands[1];
15939 operands[1] = temp;
15940 }
15941
15942 /* know operands[0] == operands[1]. */
15943
15944 if (MEM_P (operands[2]))
15945 {
15946 p = "%Z2\t%2";
15947 break;
15948 }
15949
15950 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15951 {
15952 if (STACK_TOP_P (operands[0]))
15953 /* How is it that we are storing to a dead operand[2]?
15954 Well, presumably operands[1] is dead too. We can't
15955 store the result to st(0) as st(0) gets popped on this
15956 instruction. Instead store to operands[2] (which I
15957 think has to be st(1)). st(1) will be popped later.
15958 gcc <= 2.8.1 didn't have this check and generated
15959 assembly code that the Unixware assembler rejected. */
15960 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15961 else
15962 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15963 break;
15964 }
15965
15966 if (STACK_TOP_P (operands[0]))
15967 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15968 else
15969 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15970 break;
15971
15972 case MINUS:
15973 case DIV:
15974 if (MEM_P (operands[1]))
15975 {
15976 p = "r%Z1\t%1";
15977 break;
15978 }
15979
15980 if (MEM_P (operands[2]))
15981 {
15982 p = "%Z2\t%2";
15983 break;
15984 }
15985
15986 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15987 {
15988 #if SYSV386_COMPAT
15989 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15990 derived assemblers, confusingly reverse the direction of
15991 the operation for fsub{r} and fdiv{r} when the
15992 destination register is not st(0). The Intel assembler
15993 doesn't have this brain damage. Read !SYSV386_COMPAT to
15994 figure out what the hardware really does. */
15995 if (STACK_TOP_P (operands[0]))
15996 p = "{p\t%0, %2|rp\t%2, %0}";
15997 else
15998 p = "{rp\t%2, %0|p\t%0, %2}";
15999 #else
16000 if (STACK_TOP_P (operands[0]))
16001 /* As above for fmul/fadd, we can't store to st(0). */
16002 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16003 else
16004 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16005 #endif
16006 break;
16007 }
16008
16009 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16010 {
16011 #if SYSV386_COMPAT
16012 if (STACK_TOP_P (operands[0]))
16013 p = "{rp\t%0, %1|p\t%1, %0}";
16014 else
16015 p = "{p\t%1, %0|rp\t%0, %1}";
16016 #else
16017 if (STACK_TOP_P (operands[0]))
16018 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16019 else
16020 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16021 #endif
16022 break;
16023 }
16024
16025 if (STACK_TOP_P (operands[0]))
16026 {
16027 if (STACK_TOP_P (operands[1]))
16028 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16029 else
16030 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16031 break;
16032 }
16033 else if (STACK_TOP_P (operands[1]))
16034 {
16035 #if SYSV386_COMPAT
16036 p = "{\t%1, %0|r\t%0, %1}";
16037 #else
16038 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16039 #endif
16040 }
16041 else
16042 {
16043 #if SYSV386_COMPAT
16044 p = "{r\t%2, %0|\t%0, %2}";
16045 #else
16046 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16047 #endif
16048 }
16049 break;
16050
16051 default:
16052 gcc_unreachable ();
16053 }
16054
16055 strcat (buf, p);
16056 return buf;
16057 }
16058
16059 /* Check if a 256bit AVX register is referenced inside of EXP. */
16060
16061 static int
16062 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16063 {
16064 rtx exp = *pexp;
16065
16066 if (GET_CODE (exp) == SUBREG)
16067 exp = SUBREG_REG (exp);
16068
16069 if (REG_P (exp)
16070 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16071 return 1;
16072
16073 return 0;
16074 }
16075
16076 /* Return needed mode for entity in optimize_mode_switching pass. */
16077
16078 static int
16079 ix86_avx_u128_mode_needed (rtx insn)
16080 {
16081 if (CALL_P (insn))
16082 {
16083 rtx link;
16084
16085 /* Needed mode is set to AVX_U128_CLEAN if there are
16086 no 256bit modes used in function arguments. */
16087 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16088 link;
16089 link = XEXP (link, 1))
16090 {
16091 if (GET_CODE (XEXP (link, 0)) == USE)
16092 {
16093 rtx arg = XEXP (XEXP (link, 0), 0);
16094
16095 if (ix86_check_avx256_register (&arg, NULL))
16096 return AVX_U128_DIRTY;
16097 }
16098 }
16099
16100 return AVX_U128_CLEAN;
16101 }
16102
16103 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16104 changes state only when a 256bit register is written to, but we need
16105 to prevent the compiler from moving optimal insertion point above
16106 eventual read from 256bit register. */
16107 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16108 return AVX_U128_DIRTY;
16109
16110 return AVX_U128_ANY;
16111 }
16112
16113 /* Return mode that i387 must be switched into
16114 prior to the execution of insn. */
16115
16116 static int
16117 ix86_i387_mode_needed (int entity, rtx insn)
16118 {
16119 enum attr_i387_cw mode;
16120
16121 /* The mode UNINITIALIZED is used to store control word after a
16122 function call or ASM pattern. The mode ANY specify that function
16123 has no requirements on the control word and make no changes in the
16124 bits we are interested in. */
16125
16126 if (CALL_P (insn)
16127 || (NONJUMP_INSN_P (insn)
16128 && (asm_noperands (PATTERN (insn)) >= 0
16129 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16130 return I387_CW_UNINITIALIZED;
16131
16132 if (recog_memoized (insn) < 0)
16133 return I387_CW_ANY;
16134
16135 mode = get_attr_i387_cw (insn);
16136
16137 switch (entity)
16138 {
16139 case I387_TRUNC:
16140 if (mode == I387_CW_TRUNC)
16141 return mode;
16142 break;
16143
16144 case I387_FLOOR:
16145 if (mode == I387_CW_FLOOR)
16146 return mode;
16147 break;
16148
16149 case I387_CEIL:
16150 if (mode == I387_CW_CEIL)
16151 return mode;
16152 break;
16153
16154 case I387_MASK_PM:
16155 if (mode == I387_CW_MASK_PM)
16156 return mode;
16157 break;
16158
16159 default:
16160 gcc_unreachable ();
16161 }
16162
16163 return I387_CW_ANY;
16164 }
16165
16166 /* Return mode that entity must be switched into
16167 prior to the execution of insn. */
16168
16169 static int
16170 ix86_mode_needed (int entity, rtx insn)
16171 {
16172 switch (entity)
16173 {
16174 case AVX_U128:
16175 return ix86_avx_u128_mode_needed (insn);
16176 case I387_TRUNC:
16177 case I387_FLOOR:
16178 case I387_CEIL:
16179 case I387_MASK_PM:
16180 return ix86_i387_mode_needed (entity, insn);
16181 default:
16182 gcc_unreachable ();
16183 }
16184 return 0;
16185 }
16186
16187 /* Check if a 256bit AVX register is referenced in stores. */
16188
16189 static void
16190 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16191 {
16192 if (ix86_check_avx256_register (&dest, NULL))
16193 {
16194 bool *used = (bool *) data;
16195 *used = true;
16196 }
16197 }
16198
16199 /* Calculate mode of upper 128bit AVX registers after the insn. */
16200
16201 static int
16202 ix86_avx_u128_mode_after (int mode, rtx insn)
16203 {
16204 rtx pat = PATTERN (insn);
16205
16206 if (vzeroupper_operation (pat, VOIDmode)
16207 || vzeroall_operation (pat, VOIDmode))
16208 return AVX_U128_CLEAN;
16209
16210 /* We know that state is clean after CALL insn if there are no
16211 256bit registers used in the function return register. */
16212 if (CALL_P (insn))
16213 {
16214 bool avx_reg256_found = false;
16215 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16216
16217 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16218 }
16219
16220 /* Otherwise, return current mode. Remember that if insn
16221 references AVX 256bit registers, the mode was already changed
16222 to DIRTY from MODE_NEEDED. */
16223 return mode;
16224 }
16225
16226 /* Return the mode that an insn results in. */
16227
16228 int
16229 ix86_mode_after (int entity, int mode, rtx insn)
16230 {
16231 switch (entity)
16232 {
16233 case AVX_U128:
16234 return ix86_avx_u128_mode_after (mode, insn);
16235 case I387_TRUNC:
16236 case I387_FLOOR:
16237 case I387_CEIL:
16238 case I387_MASK_PM:
16239 return mode;
16240 default:
16241 gcc_unreachable ();
16242 }
16243 }
16244
16245 static int
16246 ix86_avx_u128_mode_entry (void)
16247 {
16248 tree arg;
16249
16250 /* Entry mode is set to AVX_U128_DIRTY if there are
16251 256bit modes used in function arguments. */
16252 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16253 arg = TREE_CHAIN (arg))
16254 {
16255 rtx incoming = DECL_INCOMING_RTL (arg);
16256
16257 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16258 return AVX_U128_DIRTY;
16259 }
16260
16261 return AVX_U128_CLEAN;
16262 }
16263
16264 /* Return a mode that ENTITY is assumed to be
16265 switched to at function entry. */
16266
16267 static int
16268 ix86_mode_entry (int entity)
16269 {
16270 switch (entity)
16271 {
16272 case AVX_U128:
16273 return ix86_avx_u128_mode_entry ();
16274 case I387_TRUNC:
16275 case I387_FLOOR:
16276 case I387_CEIL:
16277 case I387_MASK_PM:
16278 return I387_CW_ANY;
16279 default:
16280 gcc_unreachable ();
16281 }
16282 }
16283
16284 static int
16285 ix86_avx_u128_mode_exit (void)
16286 {
16287 rtx reg = crtl->return_rtx;
16288
16289 /* Exit mode is set to AVX_U128_DIRTY if there are
16290 256bit modes used in the function return register. */
16291 if (reg && ix86_check_avx256_register (&reg, NULL))
16292 return AVX_U128_DIRTY;
16293
16294 return AVX_U128_CLEAN;
16295 }
16296
16297 /* Return a mode that ENTITY is assumed to be
16298 switched to at function exit. */
16299
16300 static int
16301 ix86_mode_exit (int entity)
16302 {
16303 switch (entity)
16304 {
16305 case AVX_U128:
16306 return ix86_avx_u128_mode_exit ();
16307 case I387_TRUNC:
16308 case I387_FLOOR:
16309 case I387_CEIL:
16310 case I387_MASK_PM:
16311 return I387_CW_ANY;
16312 default:
16313 gcc_unreachable ();
16314 }
16315 }
16316
16317 static int
16318 ix86_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
16319 {
16320 return n;
16321 }
16322
16323 /* Output code to initialize control word copies used by trunc?f?i and
16324 rounding patterns. CURRENT_MODE is set to current control word,
16325 while NEW_MODE is set to new control word. */
16326
16327 static void
16328 emit_i387_cw_initialization (int mode)
16329 {
16330 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16331 rtx new_mode;
16332
16333 enum ix86_stack_slot slot;
16334
16335 rtx reg = gen_reg_rtx (HImode);
16336
16337 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16338 emit_move_insn (reg, copy_rtx (stored_mode));
16339
16340 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16341 || optimize_insn_for_size_p ())
16342 {
16343 switch (mode)
16344 {
16345 case I387_CW_TRUNC:
16346 /* round toward zero (truncate) */
16347 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16348 slot = SLOT_CW_TRUNC;
16349 break;
16350
16351 case I387_CW_FLOOR:
16352 /* round down toward -oo */
16353 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16354 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16355 slot = SLOT_CW_FLOOR;
16356 break;
16357
16358 case I387_CW_CEIL:
16359 /* round up toward +oo */
16360 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16361 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16362 slot = SLOT_CW_CEIL;
16363 break;
16364
16365 case I387_CW_MASK_PM:
16366 /* mask precision exception for nearbyint() */
16367 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16368 slot = SLOT_CW_MASK_PM;
16369 break;
16370
16371 default:
16372 gcc_unreachable ();
16373 }
16374 }
16375 else
16376 {
16377 switch (mode)
16378 {
16379 case I387_CW_TRUNC:
16380 /* round toward zero (truncate) */
16381 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16382 slot = SLOT_CW_TRUNC;
16383 break;
16384
16385 case I387_CW_FLOOR:
16386 /* round down toward -oo */
16387 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16388 slot = SLOT_CW_FLOOR;
16389 break;
16390
16391 case I387_CW_CEIL:
16392 /* round up toward +oo */
16393 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16394 slot = SLOT_CW_CEIL;
16395 break;
16396
16397 case I387_CW_MASK_PM:
16398 /* mask precision exception for nearbyint() */
16399 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16400 slot = SLOT_CW_MASK_PM;
16401 break;
16402
16403 default:
16404 gcc_unreachable ();
16405 }
16406 }
16407
16408 gcc_assert (slot < MAX_386_STACK_LOCALS);
16409
16410 new_mode = assign_386_stack_local (HImode, slot);
16411 emit_move_insn (new_mode, reg);
16412 }
16413
16414 /* Emit vzeroupper. */
16415
16416 void
16417 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16418 {
16419 int i;
16420
16421 /* Cancel automatic vzeroupper insertion if there are
16422 live call-saved SSE registers at the insertion point. */
16423
16424 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16425 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16426 return;
16427
16428 if (TARGET_64BIT)
16429 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16430 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16431 return;
16432
16433 emit_insn (gen_avx_vzeroupper ());
16434 }
16435
16436 /* Generate one or more insns to set ENTITY to MODE. */
16437
16438 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16439 is the set of hard registers live at the point where the insn(s)
16440 are to be inserted. */
16441
16442 static void
16443 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16444 {
16445 switch (entity)
16446 {
16447 case AVX_U128:
16448 if (mode == AVX_U128_CLEAN)
16449 ix86_avx_emit_vzeroupper (regs_live);
16450 break;
16451 case I387_TRUNC:
16452 case I387_FLOOR:
16453 case I387_CEIL:
16454 case I387_MASK_PM:
16455 if (mode != I387_CW_ANY
16456 && mode != I387_CW_UNINITIALIZED)
16457 emit_i387_cw_initialization (mode);
16458 break;
16459 default:
16460 gcc_unreachable ();
16461 }
16462 }
16463
16464 /* Output code for INSN to convert a float to a signed int. OPERANDS
16465 are the insn operands. The output may be [HSD]Imode and the input
16466 operand may be [SDX]Fmode. */
16467
16468 const char *
16469 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16470 {
16471 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16472 int dimode_p = GET_MODE (operands[0]) == DImode;
16473 int round_mode = get_attr_i387_cw (insn);
16474
16475 /* Jump through a hoop or two for DImode, since the hardware has no
16476 non-popping instruction. We used to do this a different way, but
16477 that was somewhat fragile and broke with post-reload splitters. */
16478 if ((dimode_p || fisttp) && !stack_top_dies)
16479 output_asm_insn ("fld\t%y1", operands);
16480
16481 gcc_assert (STACK_TOP_P (operands[1]));
16482 gcc_assert (MEM_P (operands[0]));
16483 gcc_assert (GET_MODE (operands[1]) != TFmode);
16484
16485 if (fisttp)
16486 output_asm_insn ("fisttp%Z0\t%0", operands);
16487 else
16488 {
16489 if (round_mode != I387_CW_ANY)
16490 output_asm_insn ("fldcw\t%3", operands);
16491 if (stack_top_dies || dimode_p)
16492 output_asm_insn ("fistp%Z0\t%0", operands);
16493 else
16494 output_asm_insn ("fist%Z0\t%0", operands);
16495 if (round_mode != I387_CW_ANY)
16496 output_asm_insn ("fldcw\t%2", operands);
16497 }
16498
16499 return "";
16500 }
16501
16502 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16503 have the values zero or one, indicates the ffreep insn's operand
16504 from the OPERANDS array. */
16505
16506 static const char *
16507 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16508 {
16509 if (TARGET_USE_FFREEP)
16510 #ifdef HAVE_AS_IX86_FFREEP
16511 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16512 #else
16513 {
16514 static char retval[32];
16515 int regno = REGNO (operands[opno]);
16516
16517 gcc_assert (STACK_REGNO_P (regno));
16518
16519 regno -= FIRST_STACK_REG;
16520
16521 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16522 return retval;
16523 }
16524 #endif
16525
16526 return opno ? "fstp\t%y1" : "fstp\t%y0";
16527 }
16528
16529
16530 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16531 should be used. UNORDERED_P is true when fucom should be used. */
16532
16533 const char *
16534 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16535 {
16536 int stack_top_dies;
16537 rtx cmp_op0, cmp_op1;
16538 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16539
16540 if (eflags_p)
16541 {
16542 cmp_op0 = operands[0];
16543 cmp_op1 = operands[1];
16544 }
16545 else
16546 {
16547 cmp_op0 = operands[1];
16548 cmp_op1 = operands[2];
16549 }
16550
16551 if (is_sse)
16552 {
16553 if (GET_MODE (operands[0]) == SFmode)
16554 if (unordered_p)
16555 return "%vucomiss\t{%1, %0|%0, %1}";
16556 else
16557 return "%vcomiss\t{%1, %0|%0, %1}";
16558 else
16559 if (unordered_p)
16560 return "%vucomisd\t{%1, %0|%0, %1}";
16561 else
16562 return "%vcomisd\t{%1, %0|%0, %1}";
16563 }
16564
16565 gcc_assert (STACK_TOP_P (cmp_op0));
16566
16567 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16568
16569 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16570 {
16571 if (stack_top_dies)
16572 {
16573 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16574 return output_387_ffreep (operands, 1);
16575 }
16576 else
16577 return "ftst\n\tfnstsw\t%0";
16578 }
16579
16580 if (STACK_REG_P (cmp_op1)
16581 && stack_top_dies
16582 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16583 && REGNO (cmp_op1) != FIRST_STACK_REG)
16584 {
16585 /* If both the top of the 387 stack dies, and the other operand
16586 is also a stack register that dies, then this must be a
16587 `fcompp' float compare */
16588
16589 if (eflags_p)
16590 {
16591 /* There is no double popping fcomi variant. Fortunately,
16592 eflags is immune from the fstp's cc clobbering. */
16593 if (unordered_p)
16594 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16595 else
16596 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16597 return output_387_ffreep (operands, 0);
16598 }
16599 else
16600 {
16601 if (unordered_p)
16602 return "fucompp\n\tfnstsw\t%0";
16603 else
16604 return "fcompp\n\tfnstsw\t%0";
16605 }
16606 }
16607 else
16608 {
16609 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16610
16611 static const char * const alt[16] =
16612 {
16613 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16614 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16615 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16616 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16617
16618 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16619 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16620 NULL,
16621 NULL,
16622
16623 "fcomi\t{%y1, %0|%0, %y1}",
16624 "fcomip\t{%y1, %0|%0, %y1}",
16625 "fucomi\t{%y1, %0|%0, %y1}",
16626 "fucomip\t{%y1, %0|%0, %y1}",
16627
16628 NULL,
16629 NULL,
16630 NULL,
16631 NULL
16632 };
16633
16634 int mask;
16635 const char *ret;
16636
16637 mask = eflags_p << 3;
16638 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16639 mask |= unordered_p << 1;
16640 mask |= stack_top_dies;
16641
16642 gcc_assert (mask < 16);
16643 ret = alt[mask];
16644 gcc_assert (ret);
16645
16646 return ret;
16647 }
16648 }
16649
16650 void
16651 ix86_output_addr_vec_elt (FILE *file, int value)
16652 {
16653 const char *directive = ASM_LONG;
16654
16655 #ifdef ASM_QUAD
16656 if (TARGET_LP64)
16657 directive = ASM_QUAD;
16658 #else
16659 gcc_assert (!TARGET_64BIT);
16660 #endif
16661
16662 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16663 }
16664
16665 void
16666 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16667 {
16668 const char *directive = ASM_LONG;
16669
16670 #ifdef ASM_QUAD
16671 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16672 directive = ASM_QUAD;
16673 #else
16674 gcc_assert (!TARGET_64BIT);
16675 #endif
16676 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16677 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16678 fprintf (file, "%s%s%d-%s%d\n",
16679 directive, LPREFIX, value, LPREFIX, rel);
16680 else if (HAVE_AS_GOTOFF_IN_DATA)
16681 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16682 #if TARGET_MACHO
16683 else if (TARGET_MACHO)
16684 {
16685 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16686 machopic_output_function_base_name (file);
16687 putc ('\n', file);
16688 }
16689 #endif
16690 else
16691 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16692 GOT_SYMBOL_NAME, LPREFIX, value);
16693 }
16694 \f
16695 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16696 for the target. */
16697
16698 void
16699 ix86_expand_clear (rtx dest)
16700 {
16701 rtx tmp;
16702
16703 /* We play register width games, which are only valid after reload. */
16704 gcc_assert (reload_completed);
16705
16706 /* Avoid HImode and its attendant prefix byte. */
16707 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16708 dest = gen_rtx_REG (SImode, REGNO (dest));
16709 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16710
16711 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16712 {
16713 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16714 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16715 }
16716
16717 emit_insn (tmp);
16718 }
16719
16720 /* X is an unchanging MEM. If it is a constant pool reference, return
16721 the constant pool rtx, else NULL. */
16722
16723 rtx
16724 maybe_get_pool_constant (rtx x)
16725 {
16726 x = ix86_delegitimize_address (XEXP (x, 0));
16727
16728 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16729 return get_pool_constant (x);
16730
16731 return NULL_RTX;
16732 }
16733
16734 void
16735 ix86_expand_move (enum machine_mode mode, rtx operands[])
16736 {
16737 rtx op0, op1;
16738 enum tls_model model;
16739
16740 op0 = operands[0];
16741 op1 = operands[1];
16742
16743 if (GET_CODE (op1) == SYMBOL_REF)
16744 {
16745 rtx tmp;
16746
16747 model = SYMBOL_REF_TLS_MODEL (op1);
16748 if (model)
16749 {
16750 op1 = legitimize_tls_address (op1, model, true);
16751 op1 = force_operand (op1, op0);
16752 if (op1 == op0)
16753 return;
16754 op1 = convert_to_mode (mode, op1, 1);
16755 }
16756 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16757 op1 = tmp;
16758 }
16759 else if (GET_CODE (op1) == CONST
16760 && GET_CODE (XEXP (op1, 0)) == PLUS
16761 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16762 {
16763 rtx addend = XEXP (XEXP (op1, 0), 1);
16764 rtx symbol = XEXP (XEXP (op1, 0), 0);
16765 rtx tmp;
16766
16767 model = SYMBOL_REF_TLS_MODEL (symbol);
16768 if (model)
16769 tmp = legitimize_tls_address (symbol, model, true);
16770 else
16771 tmp = legitimize_pe_coff_symbol (symbol, true);
16772
16773 if (tmp)
16774 {
16775 tmp = force_operand (tmp, NULL);
16776 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16777 op0, 1, OPTAB_DIRECT);
16778 if (tmp == op0)
16779 return;
16780 op1 = convert_to_mode (mode, tmp, 1);
16781 }
16782 }
16783
16784 if ((flag_pic || MACHOPIC_INDIRECT)
16785 && symbolic_operand (op1, mode))
16786 {
16787 if (TARGET_MACHO && !TARGET_64BIT)
16788 {
16789 #if TARGET_MACHO
16790 /* dynamic-no-pic */
16791 if (MACHOPIC_INDIRECT)
16792 {
16793 rtx temp = ((reload_in_progress
16794 || ((op0 && REG_P (op0))
16795 && mode == Pmode))
16796 ? op0 : gen_reg_rtx (Pmode));
16797 op1 = machopic_indirect_data_reference (op1, temp);
16798 if (MACHOPIC_PURE)
16799 op1 = machopic_legitimize_pic_address (op1, mode,
16800 temp == op1 ? 0 : temp);
16801 }
16802 if (op0 != op1 && GET_CODE (op0) != MEM)
16803 {
16804 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16805 emit_insn (insn);
16806 return;
16807 }
16808 if (GET_CODE (op0) == MEM)
16809 op1 = force_reg (Pmode, op1);
16810 else
16811 {
16812 rtx temp = op0;
16813 if (GET_CODE (temp) != REG)
16814 temp = gen_reg_rtx (Pmode);
16815 temp = legitimize_pic_address (op1, temp);
16816 if (temp == op0)
16817 return;
16818 op1 = temp;
16819 }
16820 /* dynamic-no-pic */
16821 #endif
16822 }
16823 else
16824 {
16825 if (MEM_P (op0))
16826 op1 = force_reg (mode, op1);
16827 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16828 {
16829 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16830 op1 = legitimize_pic_address (op1, reg);
16831 if (op0 == op1)
16832 return;
16833 op1 = convert_to_mode (mode, op1, 1);
16834 }
16835 }
16836 }
16837 else
16838 {
16839 if (MEM_P (op0)
16840 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16841 || !push_operand (op0, mode))
16842 && MEM_P (op1))
16843 op1 = force_reg (mode, op1);
16844
16845 if (push_operand (op0, mode)
16846 && ! general_no_elim_operand (op1, mode))
16847 op1 = copy_to_mode_reg (mode, op1);
16848
16849 /* Force large constants in 64bit compilation into register
16850 to get them CSEed. */
16851 if (can_create_pseudo_p ()
16852 && (mode == DImode) && TARGET_64BIT
16853 && immediate_operand (op1, mode)
16854 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16855 && !register_operand (op0, mode)
16856 && optimize)
16857 op1 = copy_to_mode_reg (mode, op1);
16858
16859 if (can_create_pseudo_p ()
16860 && FLOAT_MODE_P (mode)
16861 && GET_CODE (op1) == CONST_DOUBLE)
16862 {
16863 /* If we are loading a floating point constant to a register,
16864 force the value to memory now, since we'll get better code
16865 out the back end. */
16866
16867 op1 = validize_mem (force_const_mem (mode, op1));
16868 if (!register_operand (op0, mode))
16869 {
16870 rtx temp = gen_reg_rtx (mode);
16871 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16872 emit_move_insn (op0, temp);
16873 return;
16874 }
16875 }
16876 }
16877
16878 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16879 }
16880
16881 void
16882 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16883 {
16884 rtx op0 = operands[0], op1 = operands[1];
16885 unsigned int align = GET_MODE_ALIGNMENT (mode);
16886
16887 if (push_operand (op0, VOIDmode))
16888 op0 = emit_move_resolve_push (mode, op0);
16889
16890 /* Force constants other than zero into memory. We do not know how
16891 the instructions used to build constants modify the upper 64 bits
16892 of the register, once we have that information we may be able
16893 to handle some of them more efficiently. */
16894 if (can_create_pseudo_p ()
16895 && register_operand (op0, mode)
16896 && (CONSTANT_P (op1)
16897 || (GET_CODE (op1) == SUBREG
16898 && CONSTANT_P (SUBREG_REG (op1))))
16899 && !standard_sse_constant_p (op1))
16900 op1 = validize_mem (force_const_mem (mode, op1));
16901
16902 /* We need to check memory alignment for SSE mode since attribute
16903 can make operands unaligned. */
16904 if (can_create_pseudo_p ()
16905 && SSE_REG_MODE_P (mode)
16906 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16907 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16908 {
16909 rtx tmp[2];
16910
16911 /* ix86_expand_vector_move_misalign() does not like constants ... */
16912 if (CONSTANT_P (op1)
16913 || (GET_CODE (op1) == SUBREG
16914 && CONSTANT_P (SUBREG_REG (op1))))
16915 op1 = validize_mem (force_const_mem (mode, op1));
16916
16917 /* ... nor both arguments in memory. */
16918 if (!register_operand (op0, mode)
16919 && !register_operand (op1, mode))
16920 op1 = force_reg (mode, op1);
16921
16922 tmp[0] = op0; tmp[1] = op1;
16923 ix86_expand_vector_move_misalign (mode, tmp);
16924 return;
16925 }
16926
16927 /* Make operand1 a register if it isn't already. */
16928 if (can_create_pseudo_p ()
16929 && !register_operand (op0, mode)
16930 && !register_operand (op1, mode))
16931 {
16932 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16933 return;
16934 }
16935
16936 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16937 }
16938
16939 /* Split 32-byte AVX unaligned load and store if needed. */
16940
16941 static void
16942 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16943 {
16944 rtx m;
16945 rtx (*extract) (rtx, rtx, rtx);
16946 rtx (*load_unaligned) (rtx, rtx);
16947 rtx (*store_unaligned) (rtx, rtx);
16948 enum machine_mode mode;
16949
16950 switch (GET_MODE (op0))
16951 {
16952 default:
16953 gcc_unreachable ();
16954 case V32QImode:
16955 extract = gen_avx_vextractf128v32qi;
16956 load_unaligned = gen_avx_loaddquv32qi;
16957 store_unaligned = gen_avx_storedquv32qi;
16958 mode = V16QImode;
16959 break;
16960 case V8SFmode:
16961 extract = gen_avx_vextractf128v8sf;
16962 load_unaligned = gen_avx_loadups256;
16963 store_unaligned = gen_avx_storeups256;
16964 mode = V4SFmode;
16965 break;
16966 case V4DFmode:
16967 extract = gen_avx_vextractf128v4df;
16968 load_unaligned = gen_avx_loadupd256;
16969 store_unaligned = gen_avx_storeupd256;
16970 mode = V2DFmode;
16971 break;
16972 }
16973
16974 if (MEM_P (op1))
16975 {
16976 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16977 {
16978 rtx r = gen_reg_rtx (mode);
16979 m = adjust_address (op1, mode, 0);
16980 emit_move_insn (r, m);
16981 m = adjust_address (op1, mode, 16);
16982 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16983 emit_move_insn (op0, r);
16984 }
16985 /* Normal *mov<mode>_internal pattern will handle
16986 unaligned loads just fine if misaligned_operand
16987 is true, and without the UNSPEC it can be combined
16988 with arithmetic instructions. */
16989 else if (misaligned_operand (op1, GET_MODE (op1)))
16990 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16991 else
16992 emit_insn (load_unaligned (op0, op1));
16993 }
16994 else if (MEM_P (op0))
16995 {
16996 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16997 {
16998 m = adjust_address (op0, mode, 0);
16999 emit_insn (extract (m, op1, const0_rtx));
17000 m = adjust_address (op0, mode, 16);
17001 emit_insn (extract (m, op1, const1_rtx));
17002 }
17003 else
17004 emit_insn (store_unaligned (op0, op1));
17005 }
17006 else
17007 gcc_unreachable ();
17008 }
17009
17010 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17011 straight to ix86_expand_vector_move. */
17012 /* Code generation for scalar reg-reg moves of single and double precision data:
17013 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17014 movaps reg, reg
17015 else
17016 movss reg, reg
17017 if (x86_sse_partial_reg_dependency == true)
17018 movapd reg, reg
17019 else
17020 movsd reg, reg
17021
17022 Code generation for scalar loads of double precision data:
17023 if (x86_sse_split_regs == true)
17024 movlpd mem, reg (gas syntax)
17025 else
17026 movsd mem, reg
17027
17028 Code generation for unaligned packed loads of single precision data
17029 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17030 if (x86_sse_unaligned_move_optimal)
17031 movups mem, reg
17032
17033 if (x86_sse_partial_reg_dependency == true)
17034 {
17035 xorps reg, reg
17036 movlps mem, reg
17037 movhps mem+8, reg
17038 }
17039 else
17040 {
17041 movlps mem, reg
17042 movhps mem+8, reg
17043 }
17044
17045 Code generation for unaligned packed loads of double precision data
17046 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17047 if (x86_sse_unaligned_move_optimal)
17048 movupd mem, reg
17049
17050 if (x86_sse_split_regs == true)
17051 {
17052 movlpd mem, reg
17053 movhpd mem+8, reg
17054 }
17055 else
17056 {
17057 movsd mem, reg
17058 movhpd mem+8, reg
17059 }
17060 */
17061
17062 void
17063 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17064 {
17065 rtx op0, op1, orig_op0 = NULL_RTX, m;
17066 rtx (*load_unaligned) (rtx, rtx);
17067 rtx (*store_unaligned) (rtx, rtx);
17068
17069 op0 = operands[0];
17070 op1 = operands[1];
17071
17072 if (GET_MODE_SIZE (mode) == 64)
17073 {
17074 switch (GET_MODE_CLASS (mode))
17075 {
17076 case MODE_VECTOR_INT:
17077 case MODE_INT:
17078 if (GET_MODE (op0) != V16SImode)
17079 {
17080 if (!MEM_P (op0))
17081 {
17082 orig_op0 = op0;
17083 op0 = gen_reg_rtx (V16SImode);
17084 }
17085 else
17086 op0 = gen_lowpart (V16SImode, op0);
17087 }
17088 op1 = gen_lowpart (V16SImode, op1);
17089 /* FALLTHRU */
17090
17091 case MODE_VECTOR_FLOAT:
17092 switch (GET_MODE (op0))
17093 {
17094 default:
17095 gcc_unreachable ();
17096 case V16SImode:
17097 load_unaligned = gen_avx512f_loaddquv16si;
17098 store_unaligned = gen_avx512f_storedquv16si;
17099 break;
17100 case V16SFmode:
17101 load_unaligned = gen_avx512f_loadups512;
17102 store_unaligned = gen_avx512f_storeups512;
17103 break;
17104 case V8DFmode:
17105 load_unaligned = gen_avx512f_loadupd512;
17106 store_unaligned = gen_avx512f_storeupd512;
17107 break;
17108 }
17109
17110 if (MEM_P (op1))
17111 emit_insn (load_unaligned (op0, op1));
17112 else if (MEM_P (op0))
17113 emit_insn (store_unaligned (op0, op1));
17114 else
17115 gcc_unreachable ();
17116 if (orig_op0)
17117 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17118 break;
17119
17120 default:
17121 gcc_unreachable ();
17122 }
17123
17124 return;
17125 }
17126
17127 if (TARGET_AVX
17128 && GET_MODE_SIZE (mode) == 32)
17129 {
17130 switch (GET_MODE_CLASS (mode))
17131 {
17132 case MODE_VECTOR_INT:
17133 case MODE_INT:
17134 if (GET_MODE (op0) != V32QImode)
17135 {
17136 if (!MEM_P (op0))
17137 {
17138 orig_op0 = op0;
17139 op0 = gen_reg_rtx (V32QImode);
17140 }
17141 else
17142 op0 = gen_lowpart (V32QImode, op0);
17143 }
17144 op1 = gen_lowpart (V32QImode, op1);
17145 /* FALLTHRU */
17146
17147 case MODE_VECTOR_FLOAT:
17148 ix86_avx256_split_vector_move_misalign (op0, op1);
17149 if (orig_op0)
17150 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17151 break;
17152
17153 default:
17154 gcc_unreachable ();
17155 }
17156
17157 return;
17158 }
17159
17160 if (MEM_P (op1))
17161 {
17162 /* Normal *mov<mode>_internal pattern will handle
17163 unaligned loads just fine if misaligned_operand
17164 is true, and without the UNSPEC it can be combined
17165 with arithmetic instructions. */
17166 if (TARGET_AVX
17167 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17168 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17169 && misaligned_operand (op1, GET_MODE (op1)))
17170 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17171 /* ??? If we have typed data, then it would appear that using
17172 movdqu is the only way to get unaligned data loaded with
17173 integer type. */
17174 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17175 {
17176 if (GET_MODE (op0) != V16QImode)
17177 {
17178 orig_op0 = op0;
17179 op0 = gen_reg_rtx (V16QImode);
17180 }
17181 op1 = gen_lowpart (V16QImode, op1);
17182 /* We will eventually emit movups based on insn attributes. */
17183 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17184 if (orig_op0)
17185 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17186 }
17187 else if (TARGET_SSE2 && mode == V2DFmode)
17188 {
17189 rtx zero;
17190
17191 if (TARGET_AVX
17192 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17193 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17194 || optimize_insn_for_size_p ())
17195 {
17196 /* We will eventually emit movups based on insn attributes. */
17197 emit_insn (gen_sse2_loadupd (op0, op1));
17198 return;
17199 }
17200
17201 /* When SSE registers are split into halves, we can avoid
17202 writing to the top half twice. */
17203 if (TARGET_SSE_SPLIT_REGS)
17204 {
17205 emit_clobber (op0);
17206 zero = op0;
17207 }
17208 else
17209 {
17210 /* ??? Not sure about the best option for the Intel chips.
17211 The following would seem to satisfy; the register is
17212 entirely cleared, breaking the dependency chain. We
17213 then store to the upper half, with a dependency depth
17214 of one. A rumor has it that Intel recommends two movsd
17215 followed by an unpacklpd, but this is unconfirmed. And
17216 given that the dependency depth of the unpacklpd would
17217 still be one, I'm not sure why this would be better. */
17218 zero = CONST0_RTX (V2DFmode);
17219 }
17220
17221 m = adjust_address (op1, DFmode, 0);
17222 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17223 m = adjust_address (op1, DFmode, 8);
17224 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17225 }
17226 else
17227 {
17228 rtx t;
17229
17230 if (TARGET_AVX
17231 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17232 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17233 || optimize_insn_for_size_p ())
17234 {
17235 if (GET_MODE (op0) != V4SFmode)
17236 {
17237 orig_op0 = op0;
17238 op0 = gen_reg_rtx (V4SFmode);
17239 }
17240 op1 = gen_lowpart (V4SFmode, op1);
17241 emit_insn (gen_sse_loadups (op0, op1));
17242 if (orig_op0)
17243 emit_move_insn (orig_op0,
17244 gen_lowpart (GET_MODE (orig_op0), op0));
17245 return;
17246 }
17247
17248 if (mode != V4SFmode)
17249 t = gen_reg_rtx (V4SFmode);
17250 else
17251 t = op0;
17252
17253 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17254 emit_move_insn (t, CONST0_RTX (V4SFmode));
17255 else
17256 emit_clobber (t);
17257
17258 m = adjust_address (op1, V2SFmode, 0);
17259 emit_insn (gen_sse_loadlps (t, t, m));
17260 m = adjust_address (op1, V2SFmode, 8);
17261 emit_insn (gen_sse_loadhps (t, t, m));
17262 if (mode != V4SFmode)
17263 emit_move_insn (op0, gen_lowpart (mode, t));
17264 }
17265 }
17266 else if (MEM_P (op0))
17267 {
17268 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17269 {
17270 op0 = gen_lowpart (V16QImode, op0);
17271 op1 = gen_lowpart (V16QImode, op1);
17272 /* We will eventually emit movups based on insn attributes. */
17273 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17274 }
17275 else if (TARGET_SSE2 && mode == V2DFmode)
17276 {
17277 if (TARGET_AVX
17278 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17279 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17280 || optimize_insn_for_size_p ())
17281 /* We will eventually emit movups based on insn attributes. */
17282 emit_insn (gen_sse2_storeupd (op0, op1));
17283 else
17284 {
17285 m = adjust_address (op0, DFmode, 0);
17286 emit_insn (gen_sse2_storelpd (m, op1));
17287 m = adjust_address (op0, DFmode, 8);
17288 emit_insn (gen_sse2_storehpd (m, op1));
17289 }
17290 }
17291 else
17292 {
17293 if (mode != V4SFmode)
17294 op1 = gen_lowpart (V4SFmode, op1);
17295
17296 if (TARGET_AVX
17297 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17298 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17299 || optimize_insn_for_size_p ())
17300 {
17301 op0 = gen_lowpart (V4SFmode, op0);
17302 emit_insn (gen_sse_storeups (op0, op1));
17303 }
17304 else
17305 {
17306 m = adjust_address (op0, V2SFmode, 0);
17307 emit_insn (gen_sse_storelps (m, op1));
17308 m = adjust_address (op0, V2SFmode, 8);
17309 emit_insn (gen_sse_storehps (m, op1));
17310 }
17311 }
17312 }
17313 else
17314 gcc_unreachable ();
17315 }
17316
17317 /* Helper function of ix86_fixup_binary_operands to canonicalize
17318 operand order. Returns true if the operands should be swapped. */
17319
17320 static bool
17321 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17322 rtx operands[])
17323 {
17324 rtx dst = operands[0];
17325 rtx src1 = operands[1];
17326 rtx src2 = operands[2];
17327
17328 /* If the operation is not commutative, we can't do anything. */
17329 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17330 return false;
17331
17332 /* Highest priority is that src1 should match dst. */
17333 if (rtx_equal_p (dst, src1))
17334 return false;
17335 if (rtx_equal_p (dst, src2))
17336 return true;
17337
17338 /* Next highest priority is that immediate constants come second. */
17339 if (immediate_operand (src2, mode))
17340 return false;
17341 if (immediate_operand (src1, mode))
17342 return true;
17343
17344 /* Lowest priority is that memory references should come second. */
17345 if (MEM_P (src2))
17346 return false;
17347 if (MEM_P (src1))
17348 return true;
17349
17350 return false;
17351 }
17352
17353
17354 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17355 destination to use for the operation. If different from the true
17356 destination in operands[0], a copy operation will be required. */
17357
17358 rtx
17359 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17360 rtx operands[])
17361 {
17362 rtx dst = operands[0];
17363 rtx src1 = operands[1];
17364 rtx src2 = operands[2];
17365
17366 /* Canonicalize operand order. */
17367 if (ix86_swap_binary_operands_p (code, mode, operands))
17368 {
17369 rtx temp;
17370
17371 /* It is invalid to swap operands of different modes. */
17372 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17373
17374 temp = src1;
17375 src1 = src2;
17376 src2 = temp;
17377 }
17378
17379 /* Both source operands cannot be in memory. */
17380 if (MEM_P (src1) && MEM_P (src2))
17381 {
17382 /* Optimization: Only read from memory once. */
17383 if (rtx_equal_p (src1, src2))
17384 {
17385 src2 = force_reg (mode, src2);
17386 src1 = src2;
17387 }
17388 else if (rtx_equal_p (dst, src1))
17389 src2 = force_reg (mode, src2);
17390 else
17391 src1 = force_reg (mode, src1);
17392 }
17393
17394 /* If the destination is memory, and we do not have matching source
17395 operands, do things in registers. */
17396 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17397 dst = gen_reg_rtx (mode);
17398
17399 /* Source 1 cannot be a constant. */
17400 if (CONSTANT_P (src1))
17401 src1 = force_reg (mode, src1);
17402
17403 /* Source 1 cannot be a non-matching memory. */
17404 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17405 src1 = force_reg (mode, src1);
17406
17407 /* Improve address combine. */
17408 if (code == PLUS
17409 && GET_MODE_CLASS (mode) == MODE_INT
17410 && MEM_P (src2))
17411 src2 = force_reg (mode, src2);
17412
17413 operands[1] = src1;
17414 operands[2] = src2;
17415 return dst;
17416 }
17417
17418 /* Similarly, but assume that the destination has already been
17419 set up properly. */
17420
17421 void
17422 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17423 enum machine_mode mode, rtx operands[])
17424 {
17425 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17426 gcc_assert (dst == operands[0]);
17427 }
17428
17429 /* Attempt to expand a binary operator. Make the expansion closer to the
17430 actual machine, then just general_operand, which will allow 3 separate
17431 memory references (one output, two input) in a single insn. */
17432
17433 void
17434 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17435 rtx operands[])
17436 {
17437 rtx src1, src2, dst, op, clob;
17438
17439 dst = ix86_fixup_binary_operands (code, mode, operands);
17440 src1 = operands[1];
17441 src2 = operands[2];
17442
17443 /* Emit the instruction. */
17444
17445 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17446 if (reload_in_progress)
17447 {
17448 /* Reload doesn't know about the flags register, and doesn't know that
17449 it doesn't want to clobber it. We can only do this with PLUS. */
17450 gcc_assert (code == PLUS);
17451 emit_insn (op);
17452 }
17453 else if (reload_completed
17454 && code == PLUS
17455 && !rtx_equal_p (dst, src1))
17456 {
17457 /* This is going to be an LEA; avoid splitting it later. */
17458 emit_insn (op);
17459 }
17460 else
17461 {
17462 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17463 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17464 }
17465
17466 /* Fix up the destination if needed. */
17467 if (dst != operands[0])
17468 emit_move_insn (operands[0], dst);
17469 }
17470
17471 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17472 the given OPERANDS. */
17473
17474 void
17475 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17476 rtx operands[])
17477 {
17478 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17479 if (GET_CODE (operands[1]) == SUBREG)
17480 {
17481 op1 = operands[1];
17482 op2 = operands[2];
17483 }
17484 else if (GET_CODE (operands[2]) == SUBREG)
17485 {
17486 op1 = operands[2];
17487 op2 = operands[1];
17488 }
17489 /* Optimize (__m128i) d | (__m128i) e and similar code
17490 when d and e are float vectors into float vector logical
17491 insn. In C/C++ without using intrinsics there is no other way
17492 to express vector logical operation on float vectors than
17493 to cast them temporarily to integer vectors. */
17494 if (op1
17495 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17496 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17497 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17498 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17499 && SUBREG_BYTE (op1) == 0
17500 && (GET_CODE (op2) == CONST_VECTOR
17501 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17502 && SUBREG_BYTE (op2) == 0))
17503 && can_create_pseudo_p ())
17504 {
17505 rtx dst;
17506 switch (GET_MODE (SUBREG_REG (op1)))
17507 {
17508 case V4SFmode:
17509 case V8SFmode:
17510 case V2DFmode:
17511 case V4DFmode:
17512 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17513 if (GET_CODE (op2) == CONST_VECTOR)
17514 {
17515 op2 = gen_lowpart (GET_MODE (dst), op2);
17516 op2 = force_reg (GET_MODE (dst), op2);
17517 }
17518 else
17519 {
17520 op1 = operands[1];
17521 op2 = SUBREG_REG (operands[2]);
17522 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17523 op2 = force_reg (GET_MODE (dst), op2);
17524 }
17525 op1 = SUBREG_REG (op1);
17526 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17527 op1 = force_reg (GET_MODE (dst), op1);
17528 emit_insn (gen_rtx_SET (VOIDmode, dst,
17529 gen_rtx_fmt_ee (code, GET_MODE (dst),
17530 op1, op2)));
17531 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17532 return;
17533 default:
17534 break;
17535 }
17536 }
17537 if (!nonimmediate_operand (operands[1], mode))
17538 operands[1] = force_reg (mode, operands[1]);
17539 if (!nonimmediate_operand (operands[2], mode))
17540 operands[2] = force_reg (mode, operands[2]);
17541 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17543 gen_rtx_fmt_ee (code, mode, operands[1],
17544 operands[2])));
17545 }
17546
17547 /* Return TRUE or FALSE depending on whether the binary operator meets the
17548 appropriate constraints. */
17549
17550 bool
17551 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17552 rtx operands[3])
17553 {
17554 rtx dst = operands[0];
17555 rtx src1 = operands[1];
17556 rtx src2 = operands[2];
17557
17558 /* Both source operands cannot be in memory. */
17559 if (MEM_P (src1) && MEM_P (src2))
17560 return false;
17561
17562 /* Canonicalize operand order for commutative operators. */
17563 if (ix86_swap_binary_operands_p (code, mode, operands))
17564 {
17565 rtx temp = src1;
17566 src1 = src2;
17567 src2 = temp;
17568 }
17569
17570 /* If the destination is memory, we must have a matching source operand. */
17571 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17572 return false;
17573
17574 /* Source 1 cannot be a constant. */
17575 if (CONSTANT_P (src1))
17576 return false;
17577
17578 /* Source 1 cannot be a non-matching memory. */
17579 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17580 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17581 return (code == AND
17582 && (mode == HImode
17583 || mode == SImode
17584 || (TARGET_64BIT && mode == DImode))
17585 && satisfies_constraint_L (src2));
17586
17587 return true;
17588 }
17589
17590 /* Attempt to expand a unary operator. Make the expansion closer to the
17591 actual machine, then just general_operand, which will allow 2 separate
17592 memory references (one output, one input) in a single insn. */
17593
17594 void
17595 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17596 rtx operands[])
17597 {
17598 int matching_memory;
17599 rtx src, dst, op, clob;
17600
17601 dst = operands[0];
17602 src = operands[1];
17603
17604 /* If the destination is memory, and we do not have matching source
17605 operands, do things in registers. */
17606 matching_memory = 0;
17607 if (MEM_P (dst))
17608 {
17609 if (rtx_equal_p (dst, src))
17610 matching_memory = 1;
17611 else
17612 dst = gen_reg_rtx (mode);
17613 }
17614
17615 /* When source operand is memory, destination must match. */
17616 if (MEM_P (src) && !matching_memory)
17617 src = force_reg (mode, src);
17618
17619 /* Emit the instruction. */
17620
17621 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17622 if (reload_in_progress || code == NOT)
17623 {
17624 /* Reload doesn't know about the flags register, and doesn't know that
17625 it doesn't want to clobber it. */
17626 gcc_assert (code == NOT);
17627 emit_insn (op);
17628 }
17629 else
17630 {
17631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17633 }
17634
17635 /* Fix up the destination if needed. */
17636 if (dst != operands[0])
17637 emit_move_insn (operands[0], dst);
17638 }
17639
17640 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17641 divisor are within the range [0-255]. */
17642
17643 void
17644 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17645 bool signed_p)
17646 {
17647 rtx end_label, qimode_label;
17648 rtx insn, div, mod;
17649 rtx scratch, tmp0, tmp1, tmp2;
17650 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17651 rtx (*gen_zero_extend) (rtx, rtx);
17652 rtx (*gen_test_ccno_1) (rtx, rtx);
17653
17654 switch (mode)
17655 {
17656 case SImode:
17657 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17658 gen_test_ccno_1 = gen_testsi_ccno_1;
17659 gen_zero_extend = gen_zero_extendqisi2;
17660 break;
17661 case DImode:
17662 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17663 gen_test_ccno_1 = gen_testdi_ccno_1;
17664 gen_zero_extend = gen_zero_extendqidi2;
17665 break;
17666 default:
17667 gcc_unreachable ();
17668 }
17669
17670 end_label = gen_label_rtx ();
17671 qimode_label = gen_label_rtx ();
17672
17673 scratch = gen_reg_rtx (mode);
17674
17675 /* Use 8bit unsigned divimod if dividend and divisor are within
17676 the range [0-255]. */
17677 emit_move_insn (scratch, operands[2]);
17678 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17679 scratch, 1, OPTAB_DIRECT);
17680 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17681 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17682 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17683 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17684 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17685 pc_rtx);
17686 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17687 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17688 JUMP_LABEL (insn) = qimode_label;
17689
17690 /* Generate original signed/unsigned divimod. */
17691 div = gen_divmod4_1 (operands[0], operands[1],
17692 operands[2], operands[3]);
17693 emit_insn (div);
17694
17695 /* Branch to the end. */
17696 emit_jump_insn (gen_jump (end_label));
17697 emit_barrier ();
17698
17699 /* Generate 8bit unsigned divide. */
17700 emit_label (qimode_label);
17701 /* Don't use operands[0] for result of 8bit divide since not all
17702 registers support QImode ZERO_EXTRACT. */
17703 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17704 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17705 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17706 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17707
17708 if (signed_p)
17709 {
17710 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17711 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17712 }
17713 else
17714 {
17715 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17716 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17717 }
17718
17719 /* Extract remainder from AH. */
17720 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17721 if (REG_P (operands[1]))
17722 insn = emit_move_insn (operands[1], tmp1);
17723 else
17724 {
17725 /* Need a new scratch register since the old one has result
17726 of 8bit divide. */
17727 scratch = gen_reg_rtx (mode);
17728 emit_move_insn (scratch, tmp1);
17729 insn = emit_move_insn (operands[1], scratch);
17730 }
17731 set_unique_reg_note (insn, REG_EQUAL, mod);
17732
17733 /* Zero extend quotient from AL. */
17734 tmp1 = gen_lowpart (QImode, tmp0);
17735 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17736 set_unique_reg_note (insn, REG_EQUAL, div);
17737
17738 emit_label (end_label);
17739 }
17740
17741 /* Whether it is OK to emit CFI directives when emitting asm code. */
17742
17743 bool
17744 ix86_emit_cfi ()
17745 {
17746 return dwarf2out_do_cfi_asm ();
17747 }
17748
17749 #define LEA_MAX_STALL (3)
17750 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17751
17752 /* Increase given DISTANCE in half-cycles according to
17753 dependencies between PREV and NEXT instructions.
17754 Add 1 half-cycle if there is no dependency and
17755 go to next cycle if there is some dependecy. */
17756
17757 static unsigned int
17758 increase_distance (rtx prev, rtx next, unsigned int distance)
17759 {
17760 df_ref *use_rec;
17761 df_ref *def_rec;
17762
17763 if (!prev || !next)
17764 return distance + (distance & 1) + 2;
17765
17766 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17767 return distance + 1;
17768
17769 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17770 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17771 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17772 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17773 return distance + (distance & 1) + 2;
17774
17775 return distance + 1;
17776 }
17777
17778 /* Function checks if instruction INSN defines register number
17779 REGNO1 or REGNO2. */
17780
17781 static bool
17782 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17783 rtx insn)
17784 {
17785 df_ref *def_rec;
17786
17787 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17788 if (DF_REF_REG_DEF_P (*def_rec)
17789 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17790 && (regno1 == DF_REF_REGNO (*def_rec)
17791 || regno2 == DF_REF_REGNO (*def_rec)))
17792 {
17793 return true;
17794 }
17795
17796 return false;
17797 }
17798
17799 /* Function checks if instruction INSN uses register number
17800 REGNO as a part of address expression. */
17801
17802 static bool
17803 insn_uses_reg_mem (unsigned int regno, rtx insn)
17804 {
17805 df_ref *use_rec;
17806
17807 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17808 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17809 return true;
17810
17811 return false;
17812 }
17813
17814 /* Search backward for non-agu definition of register number REGNO1
17815 or register number REGNO2 in basic block starting from instruction
17816 START up to head of basic block or instruction INSN.
17817
17818 Function puts true value into *FOUND var if definition was found
17819 and false otherwise.
17820
17821 Distance in half-cycles between START and found instruction or head
17822 of BB is added to DISTANCE and returned. */
17823
17824 static int
17825 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17826 rtx insn, int distance,
17827 rtx start, bool *found)
17828 {
17829 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17830 rtx prev = start;
17831 rtx next = NULL;
17832
17833 *found = false;
17834
17835 while (prev
17836 && prev != insn
17837 && distance < LEA_SEARCH_THRESHOLD)
17838 {
17839 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17840 {
17841 distance = increase_distance (prev, next, distance);
17842 if (insn_defines_reg (regno1, regno2, prev))
17843 {
17844 if (recog_memoized (prev) < 0
17845 || get_attr_type (prev) != TYPE_LEA)
17846 {
17847 *found = true;
17848 return distance;
17849 }
17850 }
17851
17852 next = prev;
17853 }
17854 if (prev == BB_HEAD (bb))
17855 break;
17856
17857 prev = PREV_INSN (prev);
17858 }
17859
17860 return distance;
17861 }
17862
17863 /* Search backward for non-agu definition of register number REGNO1
17864 or register number REGNO2 in INSN's basic block until
17865 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17866 2. Reach neighbour BBs boundary, or
17867 3. Reach agu definition.
17868 Returns the distance between the non-agu definition point and INSN.
17869 If no definition point, returns -1. */
17870
17871 static int
17872 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17873 rtx insn)
17874 {
17875 basic_block bb = BLOCK_FOR_INSN (insn);
17876 int distance = 0;
17877 bool found = false;
17878
17879 if (insn != BB_HEAD (bb))
17880 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17881 distance, PREV_INSN (insn),
17882 &found);
17883
17884 if (!found && distance < LEA_SEARCH_THRESHOLD)
17885 {
17886 edge e;
17887 edge_iterator ei;
17888 bool simple_loop = false;
17889
17890 FOR_EACH_EDGE (e, ei, bb->preds)
17891 if (e->src == bb)
17892 {
17893 simple_loop = true;
17894 break;
17895 }
17896
17897 if (simple_loop)
17898 distance = distance_non_agu_define_in_bb (regno1, regno2,
17899 insn, distance,
17900 BB_END (bb), &found);
17901 else
17902 {
17903 int shortest_dist = -1;
17904 bool found_in_bb = false;
17905
17906 FOR_EACH_EDGE (e, ei, bb->preds)
17907 {
17908 int bb_dist
17909 = distance_non_agu_define_in_bb (regno1, regno2,
17910 insn, distance,
17911 BB_END (e->src),
17912 &found_in_bb);
17913 if (found_in_bb)
17914 {
17915 if (shortest_dist < 0)
17916 shortest_dist = bb_dist;
17917 else if (bb_dist > 0)
17918 shortest_dist = MIN (bb_dist, shortest_dist);
17919
17920 found = true;
17921 }
17922 }
17923
17924 distance = shortest_dist;
17925 }
17926 }
17927
17928 /* get_attr_type may modify recog data. We want to make sure
17929 that recog data is valid for instruction INSN, on which
17930 distance_non_agu_define is called. INSN is unchanged here. */
17931 extract_insn_cached (insn);
17932
17933 if (!found)
17934 return -1;
17935
17936 return distance >> 1;
17937 }
17938
17939 /* Return the distance in half-cycles between INSN and the next
17940 insn that uses register number REGNO in memory address added
17941 to DISTANCE. Return -1 if REGNO0 is set.
17942
17943 Put true value into *FOUND if register usage was found and
17944 false otherwise.
17945 Put true value into *REDEFINED if register redefinition was
17946 found and false otherwise. */
17947
17948 static int
17949 distance_agu_use_in_bb (unsigned int regno,
17950 rtx insn, int distance, rtx start,
17951 bool *found, bool *redefined)
17952 {
17953 basic_block bb = NULL;
17954 rtx next = start;
17955 rtx prev = NULL;
17956
17957 *found = false;
17958 *redefined = false;
17959
17960 if (start != NULL_RTX)
17961 {
17962 bb = BLOCK_FOR_INSN (start);
17963 if (start != BB_HEAD (bb))
17964 /* If insn and start belong to the same bb, set prev to insn,
17965 so the call to increase_distance will increase the distance
17966 between insns by 1. */
17967 prev = insn;
17968 }
17969
17970 while (next
17971 && next != insn
17972 && distance < LEA_SEARCH_THRESHOLD)
17973 {
17974 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17975 {
17976 distance = increase_distance(prev, next, distance);
17977 if (insn_uses_reg_mem (regno, next))
17978 {
17979 /* Return DISTANCE if OP0 is used in memory
17980 address in NEXT. */
17981 *found = true;
17982 return distance;
17983 }
17984
17985 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17986 {
17987 /* Return -1 if OP0 is set in NEXT. */
17988 *redefined = true;
17989 return -1;
17990 }
17991
17992 prev = next;
17993 }
17994
17995 if (next == BB_END (bb))
17996 break;
17997
17998 next = NEXT_INSN (next);
17999 }
18000
18001 return distance;
18002 }
18003
18004 /* Return the distance between INSN and the next insn that uses
18005 register number REGNO0 in memory address. Return -1 if no such
18006 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18007
18008 static int
18009 distance_agu_use (unsigned int regno0, rtx insn)
18010 {
18011 basic_block bb = BLOCK_FOR_INSN (insn);
18012 int distance = 0;
18013 bool found = false;
18014 bool redefined = false;
18015
18016 if (insn != BB_END (bb))
18017 distance = distance_agu_use_in_bb (regno0, insn, distance,
18018 NEXT_INSN (insn),
18019 &found, &redefined);
18020
18021 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18022 {
18023 edge e;
18024 edge_iterator ei;
18025 bool simple_loop = false;
18026
18027 FOR_EACH_EDGE (e, ei, bb->succs)
18028 if (e->dest == bb)
18029 {
18030 simple_loop = true;
18031 break;
18032 }
18033
18034 if (simple_loop)
18035 distance = distance_agu_use_in_bb (regno0, insn,
18036 distance, BB_HEAD (bb),
18037 &found, &redefined);
18038 else
18039 {
18040 int shortest_dist = -1;
18041 bool found_in_bb = false;
18042 bool redefined_in_bb = false;
18043
18044 FOR_EACH_EDGE (e, ei, bb->succs)
18045 {
18046 int bb_dist
18047 = distance_agu_use_in_bb (regno0, insn,
18048 distance, BB_HEAD (e->dest),
18049 &found_in_bb, &redefined_in_bb);
18050 if (found_in_bb)
18051 {
18052 if (shortest_dist < 0)
18053 shortest_dist = bb_dist;
18054 else if (bb_dist > 0)
18055 shortest_dist = MIN (bb_dist, shortest_dist);
18056
18057 found = true;
18058 }
18059 }
18060
18061 distance = shortest_dist;
18062 }
18063 }
18064
18065 if (!found || redefined)
18066 return -1;
18067
18068 return distance >> 1;
18069 }
18070
18071 /* Define this macro to tune LEA priority vs ADD, it take effect when
18072 there is a dilemma of choicing LEA or ADD
18073 Negative value: ADD is more preferred than LEA
18074 Zero: Netrual
18075 Positive value: LEA is more preferred than ADD*/
18076 #define IX86_LEA_PRIORITY 0
18077
18078 /* Return true if usage of lea INSN has performance advantage
18079 over a sequence of instructions. Instructions sequence has
18080 SPLIT_COST cycles higher latency than lea latency. */
18081
18082 static bool
18083 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18084 unsigned int regno2, int split_cost, bool has_scale)
18085 {
18086 int dist_define, dist_use;
18087
18088 /* For Silvermont if using a 2-source or 3-source LEA for
18089 non-destructive destination purposes, or due to wanting
18090 ability to use SCALE, the use of LEA is justified. */
18091 if (TARGET_SILVERMONT || TARGET_INTEL)
18092 {
18093 if (has_scale)
18094 return true;
18095 if (split_cost < 1)
18096 return false;
18097 if (regno0 == regno1 || regno0 == regno2)
18098 return false;
18099 return true;
18100 }
18101
18102 dist_define = distance_non_agu_define (regno1, regno2, insn);
18103 dist_use = distance_agu_use (regno0, insn);
18104
18105 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18106 {
18107 /* If there is no non AGU operand definition, no AGU
18108 operand usage and split cost is 0 then both lea
18109 and non lea variants have same priority. Currently
18110 we prefer lea for 64 bit code and non lea on 32 bit
18111 code. */
18112 if (dist_use < 0 && split_cost == 0)
18113 return TARGET_64BIT || IX86_LEA_PRIORITY;
18114 else
18115 return true;
18116 }
18117
18118 /* With longer definitions distance lea is more preferable.
18119 Here we change it to take into account splitting cost and
18120 lea priority. */
18121 dist_define += split_cost + IX86_LEA_PRIORITY;
18122
18123 /* If there is no use in memory addess then we just check
18124 that split cost exceeds AGU stall. */
18125 if (dist_use < 0)
18126 return dist_define > LEA_MAX_STALL;
18127
18128 /* If this insn has both backward non-agu dependence and forward
18129 agu dependence, the one with short distance takes effect. */
18130 return dist_define >= dist_use;
18131 }
18132
18133 /* Return true if it is legal to clobber flags by INSN and
18134 false otherwise. */
18135
18136 static bool
18137 ix86_ok_to_clobber_flags (rtx insn)
18138 {
18139 basic_block bb = BLOCK_FOR_INSN (insn);
18140 df_ref *use;
18141 bitmap live;
18142
18143 while (insn)
18144 {
18145 if (NONDEBUG_INSN_P (insn))
18146 {
18147 for (use = DF_INSN_USES (insn); *use; use++)
18148 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18149 return false;
18150
18151 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18152 return true;
18153 }
18154
18155 if (insn == BB_END (bb))
18156 break;
18157
18158 insn = NEXT_INSN (insn);
18159 }
18160
18161 live = df_get_live_out(bb);
18162 return !REGNO_REG_SET_P (live, FLAGS_REG);
18163 }
18164
18165 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18166 move and add to avoid AGU stalls. */
18167
18168 bool
18169 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18170 {
18171 unsigned int regno0, regno1, regno2;
18172
18173 /* Check if we need to optimize. */
18174 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18175 return false;
18176
18177 /* Check it is correct to split here. */
18178 if (!ix86_ok_to_clobber_flags(insn))
18179 return false;
18180
18181 regno0 = true_regnum (operands[0]);
18182 regno1 = true_regnum (operands[1]);
18183 regno2 = true_regnum (operands[2]);
18184
18185 /* We need to split only adds with non destructive
18186 destination operand. */
18187 if (regno0 == regno1 || regno0 == regno2)
18188 return false;
18189 else
18190 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18191 }
18192
18193 /* Return true if we should emit lea instruction instead of mov
18194 instruction. */
18195
18196 bool
18197 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18198 {
18199 unsigned int regno0, regno1;
18200
18201 /* Check if we need to optimize. */
18202 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18203 return false;
18204
18205 /* Use lea for reg to reg moves only. */
18206 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18207 return false;
18208
18209 regno0 = true_regnum (operands[0]);
18210 regno1 = true_regnum (operands[1]);
18211
18212 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18213 }
18214
18215 /* Return true if we need to split lea into a sequence of
18216 instructions to avoid AGU stalls. */
18217
18218 bool
18219 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18220 {
18221 unsigned int regno0, regno1, regno2;
18222 int split_cost;
18223 struct ix86_address parts;
18224 int ok;
18225
18226 /* Check we need to optimize. */
18227 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18228 return false;
18229
18230 /* The "at least two components" test below might not catch simple
18231 move or zero extension insns if parts.base is non-NULL and parts.disp
18232 is const0_rtx as the only components in the address, e.g. if the
18233 register is %rbp or %r13. As this test is much cheaper and moves or
18234 zero extensions are the common case, do this check first. */
18235 if (REG_P (operands[1])
18236 || (SImode_address_operand (operands[1], VOIDmode)
18237 && REG_P (XEXP (operands[1], 0))))
18238 return false;
18239
18240 /* Check if it is OK to split here. */
18241 if (!ix86_ok_to_clobber_flags (insn))
18242 return false;
18243
18244 ok = ix86_decompose_address (operands[1], &parts);
18245 gcc_assert (ok);
18246
18247 /* There should be at least two components in the address. */
18248 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18249 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18250 return false;
18251
18252 /* We should not split into add if non legitimate pic
18253 operand is used as displacement. */
18254 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18255 return false;
18256
18257 regno0 = true_regnum (operands[0]) ;
18258 regno1 = INVALID_REGNUM;
18259 regno2 = INVALID_REGNUM;
18260
18261 if (parts.base)
18262 regno1 = true_regnum (parts.base);
18263 if (parts.index)
18264 regno2 = true_regnum (parts.index);
18265
18266 split_cost = 0;
18267
18268 /* Compute how many cycles we will add to execution time
18269 if split lea into a sequence of instructions. */
18270 if (parts.base || parts.index)
18271 {
18272 /* Have to use mov instruction if non desctructive
18273 destination form is used. */
18274 if (regno1 != regno0 && regno2 != regno0)
18275 split_cost += 1;
18276
18277 /* Have to add index to base if both exist. */
18278 if (parts.base && parts.index)
18279 split_cost += 1;
18280
18281 /* Have to use shift and adds if scale is 2 or greater. */
18282 if (parts.scale > 1)
18283 {
18284 if (regno0 != regno1)
18285 split_cost += 1;
18286 else if (regno2 == regno0)
18287 split_cost += 4;
18288 else
18289 split_cost += parts.scale;
18290 }
18291
18292 /* Have to use add instruction with immediate if
18293 disp is non zero. */
18294 if (parts.disp && parts.disp != const0_rtx)
18295 split_cost += 1;
18296
18297 /* Subtract the price of lea. */
18298 split_cost -= 1;
18299 }
18300
18301 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18302 parts.scale > 1);
18303 }
18304
18305 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18306 matches destination. RTX includes clobber of FLAGS_REG. */
18307
18308 static void
18309 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18310 rtx dst, rtx src)
18311 {
18312 rtx op, clob;
18313
18314 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18315 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18316
18317 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18318 }
18319
18320 /* Return true if regno1 def is nearest to the insn. */
18321
18322 static bool
18323 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18324 {
18325 rtx prev = insn;
18326 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18327
18328 if (insn == start)
18329 return false;
18330 while (prev && prev != start)
18331 {
18332 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18333 {
18334 prev = PREV_INSN (prev);
18335 continue;
18336 }
18337 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18338 return true;
18339 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18340 return false;
18341 prev = PREV_INSN (prev);
18342 }
18343
18344 /* None of the regs is defined in the bb. */
18345 return false;
18346 }
18347
18348 /* Split lea instructions into a sequence of instructions
18349 which are executed on ALU to avoid AGU stalls.
18350 It is assumed that it is allowed to clobber flags register
18351 at lea position. */
18352
18353 void
18354 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18355 {
18356 unsigned int regno0, regno1, regno2;
18357 struct ix86_address parts;
18358 rtx target, tmp;
18359 int ok, adds;
18360
18361 ok = ix86_decompose_address (operands[1], &parts);
18362 gcc_assert (ok);
18363
18364 target = gen_lowpart (mode, operands[0]);
18365
18366 regno0 = true_regnum (target);
18367 regno1 = INVALID_REGNUM;
18368 regno2 = INVALID_REGNUM;
18369
18370 if (parts.base)
18371 {
18372 parts.base = gen_lowpart (mode, parts.base);
18373 regno1 = true_regnum (parts.base);
18374 }
18375
18376 if (parts.index)
18377 {
18378 parts.index = gen_lowpart (mode, parts.index);
18379 regno2 = true_regnum (parts.index);
18380 }
18381
18382 if (parts.disp)
18383 parts.disp = gen_lowpart (mode, parts.disp);
18384
18385 if (parts.scale > 1)
18386 {
18387 /* Case r1 = r1 + ... */
18388 if (regno1 == regno0)
18389 {
18390 /* If we have a case r1 = r1 + C * r2 then we
18391 should use multiplication which is very
18392 expensive. Assume cost model is wrong if we
18393 have such case here. */
18394 gcc_assert (regno2 != regno0);
18395
18396 for (adds = parts.scale; adds > 0; adds--)
18397 ix86_emit_binop (PLUS, mode, target, parts.index);
18398 }
18399 else
18400 {
18401 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18402 if (regno0 != regno2)
18403 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18404
18405 /* Use shift for scaling. */
18406 ix86_emit_binop (ASHIFT, mode, target,
18407 GEN_INT (exact_log2 (parts.scale)));
18408
18409 if (parts.base)
18410 ix86_emit_binop (PLUS, mode, target, parts.base);
18411
18412 if (parts.disp && parts.disp != const0_rtx)
18413 ix86_emit_binop (PLUS, mode, target, parts.disp);
18414 }
18415 }
18416 else if (!parts.base && !parts.index)
18417 {
18418 gcc_assert(parts.disp);
18419 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18420 }
18421 else
18422 {
18423 if (!parts.base)
18424 {
18425 if (regno0 != regno2)
18426 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18427 }
18428 else if (!parts.index)
18429 {
18430 if (regno0 != regno1)
18431 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18432 }
18433 else
18434 {
18435 if (regno0 == regno1)
18436 tmp = parts.index;
18437 else if (regno0 == regno2)
18438 tmp = parts.base;
18439 else
18440 {
18441 rtx tmp1;
18442
18443 /* Find better operand for SET instruction, depending
18444 on which definition is farther from the insn. */
18445 if (find_nearest_reg_def (insn, regno1, regno2))
18446 tmp = parts.index, tmp1 = parts.base;
18447 else
18448 tmp = parts.base, tmp1 = parts.index;
18449
18450 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18451
18452 if (parts.disp && parts.disp != const0_rtx)
18453 ix86_emit_binop (PLUS, mode, target, parts.disp);
18454
18455 ix86_emit_binop (PLUS, mode, target, tmp1);
18456 return;
18457 }
18458
18459 ix86_emit_binop (PLUS, mode, target, tmp);
18460 }
18461
18462 if (parts.disp && parts.disp != const0_rtx)
18463 ix86_emit_binop (PLUS, mode, target, parts.disp);
18464 }
18465 }
18466
18467 /* Return true if it is ok to optimize an ADD operation to LEA
18468 operation to avoid flag register consumation. For most processors,
18469 ADD is faster than LEA. For the processors like BONNELL, if the
18470 destination register of LEA holds an actual address which will be
18471 used soon, LEA is better and otherwise ADD is better. */
18472
18473 bool
18474 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18475 {
18476 unsigned int regno0 = true_regnum (operands[0]);
18477 unsigned int regno1 = true_regnum (operands[1]);
18478 unsigned int regno2 = true_regnum (operands[2]);
18479
18480 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18481 if (regno0 != regno1 && regno0 != regno2)
18482 return true;
18483
18484 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18485 return false;
18486
18487 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18488 }
18489
18490 /* Return true if destination reg of SET_BODY is shift count of
18491 USE_BODY. */
18492
18493 static bool
18494 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18495 {
18496 rtx set_dest;
18497 rtx shift_rtx;
18498 int i;
18499
18500 /* Retrieve destination of SET_BODY. */
18501 switch (GET_CODE (set_body))
18502 {
18503 case SET:
18504 set_dest = SET_DEST (set_body);
18505 if (!set_dest || !REG_P (set_dest))
18506 return false;
18507 break;
18508 case PARALLEL:
18509 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18510 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18511 use_body))
18512 return true;
18513 default:
18514 return false;
18515 break;
18516 }
18517
18518 /* Retrieve shift count of USE_BODY. */
18519 switch (GET_CODE (use_body))
18520 {
18521 case SET:
18522 shift_rtx = XEXP (use_body, 1);
18523 break;
18524 case PARALLEL:
18525 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18526 if (ix86_dep_by_shift_count_body (set_body,
18527 XVECEXP (use_body, 0, i)))
18528 return true;
18529 default:
18530 return false;
18531 break;
18532 }
18533
18534 if (shift_rtx
18535 && (GET_CODE (shift_rtx) == ASHIFT
18536 || GET_CODE (shift_rtx) == LSHIFTRT
18537 || GET_CODE (shift_rtx) == ASHIFTRT
18538 || GET_CODE (shift_rtx) == ROTATE
18539 || GET_CODE (shift_rtx) == ROTATERT))
18540 {
18541 rtx shift_count = XEXP (shift_rtx, 1);
18542
18543 /* Return true if shift count is dest of SET_BODY. */
18544 if (REG_P (shift_count))
18545 {
18546 /* Add check since it can be invoked before register
18547 allocation in pre-reload schedule. */
18548 if (reload_completed
18549 && true_regnum (set_dest) == true_regnum (shift_count))
18550 return true;
18551 else if (REGNO(set_dest) == REGNO(shift_count))
18552 return true;
18553 }
18554 }
18555
18556 return false;
18557 }
18558
18559 /* Return true if destination reg of SET_INSN is shift count of
18560 USE_INSN. */
18561
18562 bool
18563 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18564 {
18565 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18566 PATTERN (use_insn));
18567 }
18568
18569 /* Return TRUE or FALSE depending on whether the unary operator meets the
18570 appropriate constraints. */
18571
18572 bool
18573 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18574 enum machine_mode mode ATTRIBUTE_UNUSED,
18575 rtx operands[2])
18576 {
18577 /* If one of operands is memory, source and destination must match. */
18578 if ((MEM_P (operands[0])
18579 || MEM_P (operands[1]))
18580 && ! rtx_equal_p (operands[0], operands[1]))
18581 return false;
18582 return true;
18583 }
18584
18585 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18586 are ok, keeping in mind the possible movddup alternative. */
18587
18588 bool
18589 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18590 {
18591 if (MEM_P (operands[0]))
18592 return rtx_equal_p (operands[0], operands[1 + high]);
18593 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18594 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18595 return true;
18596 }
18597
18598 /* Post-reload splitter for converting an SF or DFmode value in an
18599 SSE register into an unsigned SImode. */
18600
18601 void
18602 ix86_split_convert_uns_si_sse (rtx operands[])
18603 {
18604 enum machine_mode vecmode;
18605 rtx value, large, zero_or_two31, input, two31, x;
18606
18607 large = operands[1];
18608 zero_or_two31 = operands[2];
18609 input = operands[3];
18610 two31 = operands[4];
18611 vecmode = GET_MODE (large);
18612 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18613
18614 /* Load up the value into the low element. We must ensure that the other
18615 elements are valid floats -- zero is the easiest such value. */
18616 if (MEM_P (input))
18617 {
18618 if (vecmode == V4SFmode)
18619 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18620 else
18621 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18622 }
18623 else
18624 {
18625 input = gen_rtx_REG (vecmode, REGNO (input));
18626 emit_move_insn (value, CONST0_RTX (vecmode));
18627 if (vecmode == V4SFmode)
18628 emit_insn (gen_sse_movss (value, value, input));
18629 else
18630 emit_insn (gen_sse2_movsd (value, value, input));
18631 }
18632
18633 emit_move_insn (large, two31);
18634 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18635
18636 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18637 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18638
18639 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18640 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18641
18642 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18643 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18644
18645 large = gen_rtx_REG (V4SImode, REGNO (large));
18646 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18647
18648 x = gen_rtx_REG (V4SImode, REGNO (value));
18649 if (vecmode == V4SFmode)
18650 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18651 else
18652 emit_insn (gen_sse2_cvttpd2dq (x, value));
18653 value = x;
18654
18655 emit_insn (gen_xorv4si3 (value, value, large));
18656 }
18657
18658 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18659 Expects the 64-bit DImode to be supplied in a pair of integral
18660 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18661 -mfpmath=sse, !optimize_size only. */
18662
18663 void
18664 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18665 {
18666 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18667 rtx int_xmm, fp_xmm;
18668 rtx biases, exponents;
18669 rtx x;
18670
18671 int_xmm = gen_reg_rtx (V4SImode);
18672 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18673 emit_insn (gen_movdi_to_sse (int_xmm, input));
18674 else if (TARGET_SSE_SPLIT_REGS)
18675 {
18676 emit_clobber (int_xmm);
18677 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18678 }
18679 else
18680 {
18681 x = gen_reg_rtx (V2DImode);
18682 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18683 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18684 }
18685
18686 x = gen_rtx_CONST_VECTOR (V4SImode,
18687 gen_rtvec (4, GEN_INT (0x43300000UL),
18688 GEN_INT (0x45300000UL),
18689 const0_rtx, const0_rtx));
18690 exponents = validize_mem (force_const_mem (V4SImode, x));
18691
18692 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18693 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18694
18695 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18696 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18697 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18698 (0x1.0p84 + double(fp_value_hi_xmm)).
18699 Note these exponents differ by 32. */
18700
18701 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18702
18703 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18704 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18705 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18706 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18707 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18708 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18709 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18710 biases = validize_mem (force_const_mem (V2DFmode, biases));
18711 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18712
18713 /* Add the upper and lower DFmode values together. */
18714 if (TARGET_SSE3)
18715 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18716 else
18717 {
18718 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18719 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18720 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18721 }
18722
18723 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18724 }
18725
18726 /* Not used, but eases macroization of patterns. */
18727 void
18728 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18729 rtx input ATTRIBUTE_UNUSED)
18730 {
18731 gcc_unreachable ();
18732 }
18733
18734 /* Convert an unsigned SImode value into a DFmode. Only currently used
18735 for SSE, but applicable anywhere. */
18736
18737 void
18738 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18739 {
18740 REAL_VALUE_TYPE TWO31r;
18741 rtx x, fp;
18742
18743 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18744 NULL, 1, OPTAB_DIRECT);
18745
18746 fp = gen_reg_rtx (DFmode);
18747 emit_insn (gen_floatsidf2 (fp, x));
18748
18749 real_ldexp (&TWO31r, &dconst1, 31);
18750 x = const_double_from_real_value (TWO31r, DFmode);
18751
18752 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18753 if (x != target)
18754 emit_move_insn (target, x);
18755 }
18756
18757 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18758 32-bit mode; otherwise we have a direct convert instruction. */
18759
18760 void
18761 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18762 {
18763 REAL_VALUE_TYPE TWO32r;
18764 rtx fp_lo, fp_hi, x;
18765
18766 fp_lo = gen_reg_rtx (DFmode);
18767 fp_hi = gen_reg_rtx (DFmode);
18768
18769 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18770
18771 real_ldexp (&TWO32r, &dconst1, 32);
18772 x = const_double_from_real_value (TWO32r, DFmode);
18773 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18774
18775 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18776
18777 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18778 0, OPTAB_DIRECT);
18779 if (x != target)
18780 emit_move_insn (target, x);
18781 }
18782
18783 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18784 For x86_32, -mfpmath=sse, !optimize_size only. */
18785 void
18786 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18787 {
18788 REAL_VALUE_TYPE ONE16r;
18789 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18790
18791 real_ldexp (&ONE16r, &dconst1, 16);
18792 x = const_double_from_real_value (ONE16r, SFmode);
18793 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18794 NULL, 0, OPTAB_DIRECT);
18795 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18796 NULL, 0, OPTAB_DIRECT);
18797 fp_hi = gen_reg_rtx (SFmode);
18798 fp_lo = gen_reg_rtx (SFmode);
18799 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18800 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18801 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18802 0, OPTAB_DIRECT);
18803 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18804 0, OPTAB_DIRECT);
18805 if (!rtx_equal_p (target, fp_hi))
18806 emit_move_insn (target, fp_hi);
18807 }
18808
18809 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18810 a vector of unsigned ints VAL to vector of floats TARGET. */
18811
18812 void
18813 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18814 {
18815 rtx tmp[8];
18816 REAL_VALUE_TYPE TWO16r;
18817 enum machine_mode intmode = GET_MODE (val);
18818 enum machine_mode fltmode = GET_MODE (target);
18819 rtx (*cvt) (rtx, rtx);
18820
18821 if (intmode == V4SImode)
18822 cvt = gen_floatv4siv4sf2;
18823 else
18824 cvt = gen_floatv8siv8sf2;
18825 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18826 tmp[0] = force_reg (intmode, tmp[0]);
18827 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18828 OPTAB_DIRECT);
18829 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18830 NULL_RTX, 1, OPTAB_DIRECT);
18831 tmp[3] = gen_reg_rtx (fltmode);
18832 emit_insn (cvt (tmp[3], tmp[1]));
18833 tmp[4] = gen_reg_rtx (fltmode);
18834 emit_insn (cvt (tmp[4], tmp[2]));
18835 real_ldexp (&TWO16r, &dconst1, 16);
18836 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18837 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18838 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18839 OPTAB_DIRECT);
18840 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18841 OPTAB_DIRECT);
18842 if (tmp[7] != target)
18843 emit_move_insn (target, tmp[7]);
18844 }
18845
18846 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18847 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18848 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18849 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18850
18851 rtx
18852 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18853 {
18854 REAL_VALUE_TYPE TWO31r;
18855 rtx two31r, tmp[4];
18856 enum machine_mode mode = GET_MODE (val);
18857 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18858 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18859 rtx (*cmp) (rtx, rtx, rtx, rtx);
18860 int i;
18861
18862 for (i = 0; i < 3; i++)
18863 tmp[i] = gen_reg_rtx (mode);
18864 real_ldexp (&TWO31r, &dconst1, 31);
18865 two31r = const_double_from_real_value (TWO31r, scalarmode);
18866 two31r = ix86_build_const_vector (mode, 1, two31r);
18867 two31r = force_reg (mode, two31r);
18868 switch (mode)
18869 {
18870 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18871 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18872 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18873 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18874 default: gcc_unreachable ();
18875 }
18876 tmp[3] = gen_rtx_LE (mode, two31r, val);
18877 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18878 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18879 0, OPTAB_DIRECT);
18880 if (intmode == V4SImode || TARGET_AVX2)
18881 *xorp = expand_simple_binop (intmode, ASHIFT,
18882 gen_lowpart (intmode, tmp[0]),
18883 GEN_INT (31), NULL_RTX, 0,
18884 OPTAB_DIRECT);
18885 else
18886 {
18887 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18888 two31 = ix86_build_const_vector (intmode, 1, two31);
18889 *xorp = expand_simple_binop (intmode, AND,
18890 gen_lowpart (intmode, tmp[0]),
18891 two31, NULL_RTX, 0,
18892 OPTAB_DIRECT);
18893 }
18894 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18895 0, OPTAB_DIRECT);
18896 }
18897
18898 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18899 then replicate the value for all elements of the vector
18900 register. */
18901
18902 rtx
18903 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18904 {
18905 int i, n_elt;
18906 rtvec v;
18907 enum machine_mode scalar_mode;
18908
18909 switch (mode)
18910 {
18911 case V64QImode:
18912 case V32QImode:
18913 case V16QImode:
18914 case V32HImode:
18915 case V16HImode:
18916 case V8HImode:
18917 case V16SImode:
18918 case V8SImode:
18919 case V4SImode:
18920 case V8DImode:
18921 case V4DImode:
18922 case V2DImode:
18923 gcc_assert (vect);
18924 case V16SFmode:
18925 case V8SFmode:
18926 case V4SFmode:
18927 case V8DFmode:
18928 case V4DFmode:
18929 case V2DFmode:
18930 n_elt = GET_MODE_NUNITS (mode);
18931 v = rtvec_alloc (n_elt);
18932 scalar_mode = GET_MODE_INNER (mode);
18933
18934 RTVEC_ELT (v, 0) = value;
18935
18936 for (i = 1; i < n_elt; ++i)
18937 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18938
18939 return gen_rtx_CONST_VECTOR (mode, v);
18940
18941 default:
18942 gcc_unreachable ();
18943 }
18944 }
18945
18946 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18947 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18948 for an SSE register. If VECT is true, then replicate the mask for
18949 all elements of the vector register. If INVERT is true, then create
18950 a mask excluding the sign bit. */
18951
18952 rtx
18953 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18954 {
18955 enum machine_mode vec_mode, imode;
18956 HOST_WIDE_INT hi, lo;
18957 int shift = 63;
18958 rtx v;
18959 rtx mask;
18960
18961 /* Find the sign bit, sign extended to 2*HWI. */
18962 switch (mode)
18963 {
18964 case V16SImode:
18965 case V16SFmode:
18966 case V8SImode:
18967 case V4SImode:
18968 case V8SFmode:
18969 case V4SFmode:
18970 vec_mode = mode;
18971 mode = GET_MODE_INNER (mode);
18972 imode = SImode;
18973 lo = 0x80000000, hi = lo < 0;
18974 break;
18975
18976 case V8DImode:
18977 case V4DImode:
18978 case V2DImode:
18979 case V8DFmode:
18980 case V4DFmode:
18981 case V2DFmode:
18982 vec_mode = mode;
18983 mode = GET_MODE_INNER (mode);
18984 imode = DImode;
18985 if (HOST_BITS_PER_WIDE_INT >= 64)
18986 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18987 else
18988 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18989 break;
18990
18991 case TImode:
18992 case TFmode:
18993 vec_mode = VOIDmode;
18994 if (HOST_BITS_PER_WIDE_INT >= 64)
18995 {
18996 imode = TImode;
18997 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18998 }
18999 else
19000 {
19001 rtvec vec;
19002
19003 imode = DImode;
19004 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19005
19006 if (invert)
19007 {
19008 lo = ~lo, hi = ~hi;
19009 v = constm1_rtx;
19010 }
19011 else
19012 v = const0_rtx;
19013
19014 mask = immed_double_const (lo, hi, imode);
19015
19016 vec = gen_rtvec (2, v, mask);
19017 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19018 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19019
19020 return v;
19021 }
19022 break;
19023
19024 default:
19025 gcc_unreachable ();
19026 }
19027
19028 if (invert)
19029 lo = ~lo, hi = ~hi;
19030
19031 /* Force this value into the low part of a fp vector constant. */
19032 mask = immed_double_const (lo, hi, imode);
19033 mask = gen_lowpart (mode, mask);
19034
19035 if (vec_mode == VOIDmode)
19036 return force_reg (mode, mask);
19037
19038 v = ix86_build_const_vector (vec_mode, vect, mask);
19039 return force_reg (vec_mode, v);
19040 }
19041
19042 /* Generate code for floating point ABS or NEG. */
19043
19044 void
19045 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19046 rtx operands[])
19047 {
19048 rtx mask, set, dst, src;
19049 bool use_sse = false;
19050 bool vector_mode = VECTOR_MODE_P (mode);
19051 enum machine_mode vmode = mode;
19052
19053 if (vector_mode)
19054 use_sse = true;
19055 else if (mode == TFmode)
19056 use_sse = true;
19057 else if (TARGET_SSE_MATH)
19058 {
19059 use_sse = SSE_FLOAT_MODE_P (mode);
19060 if (mode == SFmode)
19061 vmode = V4SFmode;
19062 else if (mode == DFmode)
19063 vmode = V2DFmode;
19064 }
19065
19066 /* NEG and ABS performed with SSE use bitwise mask operations.
19067 Create the appropriate mask now. */
19068 if (use_sse)
19069 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19070 else
19071 mask = NULL_RTX;
19072
19073 dst = operands[0];
19074 src = operands[1];
19075
19076 set = gen_rtx_fmt_e (code, mode, src);
19077 set = gen_rtx_SET (VOIDmode, dst, set);
19078
19079 if (mask)
19080 {
19081 rtx use, clob;
19082 rtvec par;
19083
19084 use = gen_rtx_USE (VOIDmode, mask);
19085 if (vector_mode)
19086 par = gen_rtvec (2, set, use);
19087 else
19088 {
19089 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19090 par = gen_rtvec (3, set, use, clob);
19091 }
19092 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19093 }
19094 else
19095 emit_insn (set);
19096 }
19097
19098 /* Expand a copysign operation. Special case operand 0 being a constant. */
19099
19100 void
19101 ix86_expand_copysign (rtx operands[])
19102 {
19103 enum machine_mode mode, vmode;
19104 rtx dest, op0, op1, mask, nmask;
19105
19106 dest = operands[0];
19107 op0 = operands[1];
19108 op1 = operands[2];
19109
19110 mode = GET_MODE (dest);
19111
19112 if (mode == SFmode)
19113 vmode = V4SFmode;
19114 else if (mode == DFmode)
19115 vmode = V2DFmode;
19116 else
19117 vmode = mode;
19118
19119 if (GET_CODE (op0) == CONST_DOUBLE)
19120 {
19121 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19122
19123 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19124 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19125
19126 if (mode == SFmode || mode == DFmode)
19127 {
19128 if (op0 == CONST0_RTX (mode))
19129 op0 = CONST0_RTX (vmode);
19130 else
19131 {
19132 rtx v = ix86_build_const_vector (vmode, false, op0);
19133
19134 op0 = force_reg (vmode, v);
19135 }
19136 }
19137 else if (op0 != CONST0_RTX (mode))
19138 op0 = force_reg (mode, op0);
19139
19140 mask = ix86_build_signbit_mask (vmode, 0, 0);
19141
19142 if (mode == SFmode)
19143 copysign_insn = gen_copysignsf3_const;
19144 else if (mode == DFmode)
19145 copysign_insn = gen_copysigndf3_const;
19146 else
19147 copysign_insn = gen_copysigntf3_const;
19148
19149 emit_insn (copysign_insn (dest, op0, op1, mask));
19150 }
19151 else
19152 {
19153 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19154
19155 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19156 mask = ix86_build_signbit_mask (vmode, 0, 0);
19157
19158 if (mode == SFmode)
19159 copysign_insn = gen_copysignsf3_var;
19160 else if (mode == DFmode)
19161 copysign_insn = gen_copysigndf3_var;
19162 else
19163 copysign_insn = gen_copysigntf3_var;
19164
19165 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19166 }
19167 }
19168
19169 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19170 be a constant, and so has already been expanded into a vector constant. */
19171
19172 void
19173 ix86_split_copysign_const (rtx operands[])
19174 {
19175 enum machine_mode mode, vmode;
19176 rtx dest, op0, mask, x;
19177
19178 dest = operands[0];
19179 op0 = operands[1];
19180 mask = operands[3];
19181
19182 mode = GET_MODE (dest);
19183 vmode = GET_MODE (mask);
19184
19185 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19186 x = gen_rtx_AND (vmode, dest, mask);
19187 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19188
19189 if (op0 != CONST0_RTX (vmode))
19190 {
19191 x = gen_rtx_IOR (vmode, dest, op0);
19192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19193 }
19194 }
19195
19196 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19197 so we have to do two masks. */
19198
19199 void
19200 ix86_split_copysign_var (rtx operands[])
19201 {
19202 enum machine_mode mode, vmode;
19203 rtx dest, scratch, op0, op1, mask, nmask, x;
19204
19205 dest = operands[0];
19206 scratch = operands[1];
19207 op0 = operands[2];
19208 op1 = operands[3];
19209 nmask = operands[4];
19210 mask = operands[5];
19211
19212 mode = GET_MODE (dest);
19213 vmode = GET_MODE (mask);
19214
19215 if (rtx_equal_p (op0, op1))
19216 {
19217 /* Shouldn't happen often (it's useless, obviously), but when it does
19218 we'd generate incorrect code if we continue below. */
19219 emit_move_insn (dest, op0);
19220 return;
19221 }
19222
19223 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19224 {
19225 gcc_assert (REGNO (op1) == REGNO (scratch));
19226
19227 x = gen_rtx_AND (vmode, scratch, mask);
19228 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19229
19230 dest = mask;
19231 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19232 x = gen_rtx_NOT (vmode, dest);
19233 x = gen_rtx_AND (vmode, x, op0);
19234 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19235 }
19236 else
19237 {
19238 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19239 {
19240 x = gen_rtx_AND (vmode, scratch, mask);
19241 }
19242 else /* alternative 2,4 */
19243 {
19244 gcc_assert (REGNO (mask) == REGNO (scratch));
19245 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19246 x = gen_rtx_AND (vmode, scratch, op1);
19247 }
19248 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19249
19250 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19251 {
19252 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19253 x = gen_rtx_AND (vmode, dest, nmask);
19254 }
19255 else /* alternative 3,4 */
19256 {
19257 gcc_assert (REGNO (nmask) == REGNO (dest));
19258 dest = nmask;
19259 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19260 x = gen_rtx_AND (vmode, dest, op0);
19261 }
19262 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19263 }
19264
19265 x = gen_rtx_IOR (vmode, dest, scratch);
19266 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19267 }
19268
19269 /* Return TRUE or FALSE depending on whether the first SET in INSN
19270 has source and destination with matching CC modes, and that the
19271 CC mode is at least as constrained as REQ_MODE. */
19272
19273 bool
19274 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19275 {
19276 rtx set;
19277 enum machine_mode set_mode;
19278
19279 set = PATTERN (insn);
19280 if (GET_CODE (set) == PARALLEL)
19281 set = XVECEXP (set, 0, 0);
19282 gcc_assert (GET_CODE (set) == SET);
19283 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19284
19285 set_mode = GET_MODE (SET_DEST (set));
19286 switch (set_mode)
19287 {
19288 case CCNOmode:
19289 if (req_mode != CCNOmode
19290 && (req_mode != CCmode
19291 || XEXP (SET_SRC (set), 1) != const0_rtx))
19292 return false;
19293 break;
19294 case CCmode:
19295 if (req_mode == CCGCmode)
19296 return false;
19297 /* FALLTHRU */
19298 case CCGCmode:
19299 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19300 return false;
19301 /* FALLTHRU */
19302 case CCGOCmode:
19303 if (req_mode == CCZmode)
19304 return false;
19305 /* FALLTHRU */
19306 case CCZmode:
19307 break;
19308
19309 case CCAmode:
19310 case CCCmode:
19311 case CCOmode:
19312 case CCSmode:
19313 if (set_mode != req_mode)
19314 return false;
19315 break;
19316
19317 default:
19318 gcc_unreachable ();
19319 }
19320
19321 return GET_MODE (SET_SRC (set)) == set_mode;
19322 }
19323
19324 /* Generate insn patterns to do an integer compare of OPERANDS. */
19325
19326 static rtx
19327 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19328 {
19329 enum machine_mode cmpmode;
19330 rtx tmp, flags;
19331
19332 cmpmode = SELECT_CC_MODE (code, op0, op1);
19333 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19334
19335 /* This is very simple, but making the interface the same as in the
19336 FP case makes the rest of the code easier. */
19337 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19338 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19339
19340 /* Return the test that should be put into the flags user, i.e.
19341 the bcc, scc, or cmov instruction. */
19342 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19343 }
19344
19345 /* Figure out whether to use ordered or unordered fp comparisons.
19346 Return the appropriate mode to use. */
19347
19348 enum machine_mode
19349 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19350 {
19351 /* ??? In order to make all comparisons reversible, we do all comparisons
19352 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19353 all forms trapping and nontrapping comparisons, we can make inequality
19354 comparisons trapping again, since it results in better code when using
19355 FCOM based compares. */
19356 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19357 }
19358
19359 enum machine_mode
19360 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19361 {
19362 enum machine_mode mode = GET_MODE (op0);
19363
19364 if (SCALAR_FLOAT_MODE_P (mode))
19365 {
19366 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19367 return ix86_fp_compare_mode (code);
19368 }
19369
19370 switch (code)
19371 {
19372 /* Only zero flag is needed. */
19373 case EQ: /* ZF=0 */
19374 case NE: /* ZF!=0 */
19375 return CCZmode;
19376 /* Codes needing carry flag. */
19377 case GEU: /* CF=0 */
19378 case LTU: /* CF=1 */
19379 /* Detect overflow checks. They need just the carry flag. */
19380 if (GET_CODE (op0) == PLUS
19381 && rtx_equal_p (op1, XEXP (op0, 0)))
19382 return CCCmode;
19383 else
19384 return CCmode;
19385 case GTU: /* CF=0 & ZF=0 */
19386 case LEU: /* CF=1 | ZF=1 */
19387 return CCmode;
19388 /* Codes possibly doable only with sign flag when
19389 comparing against zero. */
19390 case GE: /* SF=OF or SF=0 */
19391 case LT: /* SF<>OF or SF=1 */
19392 if (op1 == const0_rtx)
19393 return CCGOCmode;
19394 else
19395 /* For other cases Carry flag is not required. */
19396 return CCGCmode;
19397 /* Codes doable only with sign flag when comparing
19398 against zero, but we miss jump instruction for it
19399 so we need to use relational tests against overflow
19400 that thus needs to be zero. */
19401 case GT: /* ZF=0 & SF=OF */
19402 case LE: /* ZF=1 | SF<>OF */
19403 if (op1 == const0_rtx)
19404 return CCNOmode;
19405 else
19406 return CCGCmode;
19407 /* strcmp pattern do (use flags) and combine may ask us for proper
19408 mode. */
19409 case USE:
19410 return CCmode;
19411 default:
19412 gcc_unreachable ();
19413 }
19414 }
19415
19416 /* Return the fixed registers used for condition codes. */
19417
19418 static bool
19419 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19420 {
19421 *p1 = FLAGS_REG;
19422 *p2 = FPSR_REG;
19423 return true;
19424 }
19425
19426 /* If two condition code modes are compatible, return a condition code
19427 mode which is compatible with both. Otherwise, return
19428 VOIDmode. */
19429
19430 static enum machine_mode
19431 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19432 {
19433 if (m1 == m2)
19434 return m1;
19435
19436 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19437 return VOIDmode;
19438
19439 if ((m1 == CCGCmode && m2 == CCGOCmode)
19440 || (m1 == CCGOCmode && m2 == CCGCmode))
19441 return CCGCmode;
19442
19443 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19444 return m2;
19445 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19446 return m1;
19447
19448 switch (m1)
19449 {
19450 default:
19451 gcc_unreachable ();
19452
19453 case CCmode:
19454 case CCGCmode:
19455 case CCGOCmode:
19456 case CCNOmode:
19457 case CCAmode:
19458 case CCCmode:
19459 case CCOmode:
19460 case CCSmode:
19461 case CCZmode:
19462 switch (m2)
19463 {
19464 default:
19465 return VOIDmode;
19466
19467 case CCmode:
19468 case CCGCmode:
19469 case CCGOCmode:
19470 case CCNOmode:
19471 case CCAmode:
19472 case CCCmode:
19473 case CCOmode:
19474 case CCSmode:
19475 case CCZmode:
19476 return CCmode;
19477 }
19478
19479 case CCFPmode:
19480 case CCFPUmode:
19481 /* These are only compatible with themselves, which we already
19482 checked above. */
19483 return VOIDmode;
19484 }
19485 }
19486
19487
19488 /* Return a comparison we can do and that it is equivalent to
19489 swap_condition (code) apart possibly from orderedness.
19490 But, never change orderedness if TARGET_IEEE_FP, returning
19491 UNKNOWN in that case if necessary. */
19492
19493 static enum rtx_code
19494 ix86_fp_swap_condition (enum rtx_code code)
19495 {
19496 switch (code)
19497 {
19498 case GT: /* GTU - CF=0 & ZF=0 */
19499 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19500 case GE: /* GEU - CF=0 */
19501 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19502 case UNLT: /* LTU - CF=1 */
19503 return TARGET_IEEE_FP ? UNKNOWN : GT;
19504 case UNLE: /* LEU - CF=1 | ZF=1 */
19505 return TARGET_IEEE_FP ? UNKNOWN : GE;
19506 default:
19507 return swap_condition (code);
19508 }
19509 }
19510
19511 /* Return cost of comparison CODE using the best strategy for performance.
19512 All following functions do use number of instructions as a cost metrics.
19513 In future this should be tweaked to compute bytes for optimize_size and
19514 take into account performance of various instructions on various CPUs. */
19515
19516 static int
19517 ix86_fp_comparison_cost (enum rtx_code code)
19518 {
19519 int arith_cost;
19520
19521 /* The cost of code using bit-twiddling on %ah. */
19522 switch (code)
19523 {
19524 case UNLE:
19525 case UNLT:
19526 case LTGT:
19527 case GT:
19528 case GE:
19529 case UNORDERED:
19530 case ORDERED:
19531 case UNEQ:
19532 arith_cost = 4;
19533 break;
19534 case LT:
19535 case NE:
19536 case EQ:
19537 case UNGE:
19538 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19539 break;
19540 case LE:
19541 case UNGT:
19542 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19543 break;
19544 default:
19545 gcc_unreachable ();
19546 }
19547
19548 switch (ix86_fp_comparison_strategy (code))
19549 {
19550 case IX86_FPCMP_COMI:
19551 return arith_cost > 4 ? 3 : 2;
19552 case IX86_FPCMP_SAHF:
19553 return arith_cost > 4 ? 4 : 3;
19554 default:
19555 return arith_cost;
19556 }
19557 }
19558
19559 /* Return strategy to use for floating-point. We assume that fcomi is always
19560 preferrable where available, since that is also true when looking at size
19561 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19562
19563 enum ix86_fpcmp_strategy
19564 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19565 {
19566 /* Do fcomi/sahf based test when profitable. */
19567
19568 if (TARGET_CMOVE)
19569 return IX86_FPCMP_COMI;
19570
19571 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19572 return IX86_FPCMP_SAHF;
19573
19574 return IX86_FPCMP_ARITH;
19575 }
19576
19577 /* Swap, force into registers, or otherwise massage the two operands
19578 to a fp comparison. The operands are updated in place; the new
19579 comparison code is returned. */
19580
19581 static enum rtx_code
19582 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19583 {
19584 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19585 rtx op0 = *pop0, op1 = *pop1;
19586 enum machine_mode op_mode = GET_MODE (op0);
19587 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19588
19589 /* All of the unordered compare instructions only work on registers.
19590 The same is true of the fcomi compare instructions. The XFmode
19591 compare instructions require registers except when comparing
19592 against zero or when converting operand 1 from fixed point to
19593 floating point. */
19594
19595 if (!is_sse
19596 && (fpcmp_mode == CCFPUmode
19597 || (op_mode == XFmode
19598 && ! (standard_80387_constant_p (op0) == 1
19599 || standard_80387_constant_p (op1) == 1)
19600 && GET_CODE (op1) != FLOAT)
19601 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19602 {
19603 op0 = force_reg (op_mode, op0);
19604 op1 = force_reg (op_mode, op1);
19605 }
19606 else
19607 {
19608 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19609 things around if they appear profitable, otherwise force op0
19610 into a register. */
19611
19612 if (standard_80387_constant_p (op0) == 0
19613 || (MEM_P (op0)
19614 && ! (standard_80387_constant_p (op1) == 0
19615 || MEM_P (op1))))
19616 {
19617 enum rtx_code new_code = ix86_fp_swap_condition (code);
19618 if (new_code != UNKNOWN)
19619 {
19620 rtx tmp;
19621 tmp = op0, op0 = op1, op1 = tmp;
19622 code = new_code;
19623 }
19624 }
19625
19626 if (!REG_P (op0))
19627 op0 = force_reg (op_mode, op0);
19628
19629 if (CONSTANT_P (op1))
19630 {
19631 int tmp = standard_80387_constant_p (op1);
19632 if (tmp == 0)
19633 op1 = validize_mem (force_const_mem (op_mode, op1));
19634 else if (tmp == 1)
19635 {
19636 if (TARGET_CMOVE)
19637 op1 = force_reg (op_mode, op1);
19638 }
19639 else
19640 op1 = force_reg (op_mode, op1);
19641 }
19642 }
19643
19644 /* Try to rearrange the comparison to make it cheaper. */
19645 if (ix86_fp_comparison_cost (code)
19646 > ix86_fp_comparison_cost (swap_condition (code))
19647 && (REG_P (op1) || can_create_pseudo_p ()))
19648 {
19649 rtx tmp;
19650 tmp = op0, op0 = op1, op1 = tmp;
19651 code = swap_condition (code);
19652 if (!REG_P (op0))
19653 op0 = force_reg (op_mode, op0);
19654 }
19655
19656 *pop0 = op0;
19657 *pop1 = op1;
19658 return code;
19659 }
19660
19661 /* Convert comparison codes we use to represent FP comparison to integer
19662 code that will result in proper branch. Return UNKNOWN if no such code
19663 is available. */
19664
19665 enum rtx_code
19666 ix86_fp_compare_code_to_integer (enum rtx_code code)
19667 {
19668 switch (code)
19669 {
19670 case GT:
19671 return GTU;
19672 case GE:
19673 return GEU;
19674 case ORDERED:
19675 case UNORDERED:
19676 return code;
19677 break;
19678 case UNEQ:
19679 return EQ;
19680 break;
19681 case UNLT:
19682 return LTU;
19683 break;
19684 case UNLE:
19685 return LEU;
19686 break;
19687 case LTGT:
19688 return NE;
19689 break;
19690 default:
19691 return UNKNOWN;
19692 }
19693 }
19694
19695 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19696
19697 static rtx
19698 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19699 {
19700 enum machine_mode fpcmp_mode, intcmp_mode;
19701 rtx tmp, tmp2;
19702
19703 fpcmp_mode = ix86_fp_compare_mode (code);
19704 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19705
19706 /* Do fcomi/sahf based test when profitable. */
19707 switch (ix86_fp_comparison_strategy (code))
19708 {
19709 case IX86_FPCMP_COMI:
19710 intcmp_mode = fpcmp_mode;
19711 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19712 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19713 tmp);
19714 emit_insn (tmp);
19715 break;
19716
19717 case IX86_FPCMP_SAHF:
19718 intcmp_mode = fpcmp_mode;
19719 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19720 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19721 tmp);
19722
19723 if (!scratch)
19724 scratch = gen_reg_rtx (HImode);
19725 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19726 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19727 break;
19728
19729 case IX86_FPCMP_ARITH:
19730 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19731 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19732 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19733 if (!scratch)
19734 scratch = gen_reg_rtx (HImode);
19735 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19736
19737 /* In the unordered case, we have to check C2 for NaN's, which
19738 doesn't happen to work out to anything nice combination-wise.
19739 So do some bit twiddling on the value we've got in AH to come
19740 up with an appropriate set of condition codes. */
19741
19742 intcmp_mode = CCNOmode;
19743 switch (code)
19744 {
19745 case GT:
19746 case UNGT:
19747 if (code == GT || !TARGET_IEEE_FP)
19748 {
19749 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19750 code = EQ;
19751 }
19752 else
19753 {
19754 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19755 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19756 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19757 intcmp_mode = CCmode;
19758 code = GEU;
19759 }
19760 break;
19761 case LT:
19762 case UNLT:
19763 if (code == LT && TARGET_IEEE_FP)
19764 {
19765 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19766 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19767 intcmp_mode = CCmode;
19768 code = EQ;
19769 }
19770 else
19771 {
19772 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19773 code = NE;
19774 }
19775 break;
19776 case GE:
19777 case UNGE:
19778 if (code == GE || !TARGET_IEEE_FP)
19779 {
19780 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19781 code = EQ;
19782 }
19783 else
19784 {
19785 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19786 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19787 code = NE;
19788 }
19789 break;
19790 case LE:
19791 case UNLE:
19792 if (code == LE && TARGET_IEEE_FP)
19793 {
19794 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19795 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19796 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19797 intcmp_mode = CCmode;
19798 code = LTU;
19799 }
19800 else
19801 {
19802 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19803 code = NE;
19804 }
19805 break;
19806 case EQ:
19807 case UNEQ:
19808 if (code == EQ && TARGET_IEEE_FP)
19809 {
19810 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19811 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19812 intcmp_mode = CCmode;
19813 code = EQ;
19814 }
19815 else
19816 {
19817 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19818 code = NE;
19819 }
19820 break;
19821 case NE:
19822 case LTGT:
19823 if (code == NE && TARGET_IEEE_FP)
19824 {
19825 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19826 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19827 GEN_INT (0x40)));
19828 code = NE;
19829 }
19830 else
19831 {
19832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19833 code = EQ;
19834 }
19835 break;
19836
19837 case UNORDERED:
19838 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19839 code = NE;
19840 break;
19841 case ORDERED:
19842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19843 code = EQ;
19844 break;
19845
19846 default:
19847 gcc_unreachable ();
19848 }
19849 break;
19850
19851 default:
19852 gcc_unreachable();
19853 }
19854
19855 /* Return the test that should be put into the flags user, i.e.
19856 the bcc, scc, or cmov instruction. */
19857 return gen_rtx_fmt_ee (code, VOIDmode,
19858 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19859 const0_rtx);
19860 }
19861
19862 static rtx
19863 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19864 {
19865 rtx ret;
19866
19867 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19868 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19869
19870 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19871 {
19872 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19873 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19874 }
19875 else
19876 ret = ix86_expand_int_compare (code, op0, op1);
19877
19878 return ret;
19879 }
19880
19881 void
19882 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19883 {
19884 enum machine_mode mode = GET_MODE (op0);
19885 rtx tmp;
19886
19887 switch (mode)
19888 {
19889 case SFmode:
19890 case DFmode:
19891 case XFmode:
19892 case QImode:
19893 case HImode:
19894 case SImode:
19895 simple:
19896 tmp = ix86_expand_compare (code, op0, op1);
19897 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19898 gen_rtx_LABEL_REF (VOIDmode, label),
19899 pc_rtx);
19900 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19901 return;
19902
19903 case DImode:
19904 if (TARGET_64BIT)
19905 goto simple;
19906 case TImode:
19907 /* Expand DImode branch into multiple compare+branch. */
19908 {
19909 rtx lo[2], hi[2], label2;
19910 enum rtx_code code1, code2, code3;
19911 enum machine_mode submode;
19912
19913 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19914 {
19915 tmp = op0, op0 = op1, op1 = tmp;
19916 code = swap_condition (code);
19917 }
19918
19919 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19920 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19921
19922 submode = mode == DImode ? SImode : DImode;
19923
19924 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19925 avoid two branches. This costs one extra insn, so disable when
19926 optimizing for size. */
19927
19928 if ((code == EQ || code == NE)
19929 && (!optimize_insn_for_size_p ()
19930 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19931 {
19932 rtx xor0, xor1;
19933
19934 xor1 = hi[0];
19935 if (hi[1] != const0_rtx)
19936 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19937 NULL_RTX, 0, OPTAB_WIDEN);
19938
19939 xor0 = lo[0];
19940 if (lo[1] != const0_rtx)
19941 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19942 NULL_RTX, 0, OPTAB_WIDEN);
19943
19944 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19945 NULL_RTX, 0, OPTAB_WIDEN);
19946
19947 ix86_expand_branch (code, tmp, const0_rtx, label);
19948 return;
19949 }
19950
19951 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19952 op1 is a constant and the low word is zero, then we can just
19953 examine the high word. Similarly for low word -1 and
19954 less-or-equal-than or greater-than. */
19955
19956 if (CONST_INT_P (hi[1]))
19957 switch (code)
19958 {
19959 case LT: case LTU: case GE: case GEU:
19960 if (lo[1] == const0_rtx)
19961 {
19962 ix86_expand_branch (code, hi[0], hi[1], label);
19963 return;
19964 }
19965 break;
19966 case LE: case LEU: case GT: case GTU:
19967 if (lo[1] == constm1_rtx)
19968 {
19969 ix86_expand_branch (code, hi[0], hi[1], label);
19970 return;
19971 }
19972 break;
19973 default:
19974 break;
19975 }
19976
19977 /* Otherwise, we need two or three jumps. */
19978
19979 label2 = gen_label_rtx ();
19980
19981 code1 = code;
19982 code2 = swap_condition (code);
19983 code3 = unsigned_condition (code);
19984
19985 switch (code)
19986 {
19987 case LT: case GT: case LTU: case GTU:
19988 break;
19989
19990 case LE: code1 = LT; code2 = GT; break;
19991 case GE: code1 = GT; code2 = LT; break;
19992 case LEU: code1 = LTU; code2 = GTU; break;
19993 case GEU: code1 = GTU; code2 = LTU; break;
19994
19995 case EQ: code1 = UNKNOWN; code2 = NE; break;
19996 case NE: code2 = UNKNOWN; break;
19997
19998 default:
19999 gcc_unreachable ();
20000 }
20001
20002 /*
20003 * a < b =>
20004 * if (hi(a) < hi(b)) goto true;
20005 * if (hi(a) > hi(b)) goto false;
20006 * if (lo(a) < lo(b)) goto true;
20007 * false:
20008 */
20009
20010 if (code1 != UNKNOWN)
20011 ix86_expand_branch (code1, hi[0], hi[1], label);
20012 if (code2 != UNKNOWN)
20013 ix86_expand_branch (code2, hi[0], hi[1], label2);
20014
20015 ix86_expand_branch (code3, lo[0], lo[1], label);
20016
20017 if (code2 != UNKNOWN)
20018 emit_label (label2);
20019 return;
20020 }
20021
20022 default:
20023 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20024 goto simple;
20025 }
20026 }
20027
20028 /* Split branch based on floating point condition. */
20029 void
20030 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20031 rtx target1, rtx target2, rtx tmp)
20032 {
20033 rtx condition;
20034 rtx i;
20035
20036 if (target2 != pc_rtx)
20037 {
20038 rtx tmp = target2;
20039 code = reverse_condition_maybe_unordered (code);
20040 target2 = target1;
20041 target1 = tmp;
20042 }
20043
20044 condition = ix86_expand_fp_compare (code, op1, op2,
20045 tmp);
20046
20047 i = emit_jump_insn (gen_rtx_SET
20048 (VOIDmode, pc_rtx,
20049 gen_rtx_IF_THEN_ELSE (VOIDmode,
20050 condition, target1, target2)));
20051 if (split_branch_probability >= 0)
20052 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20053 }
20054
20055 void
20056 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20057 {
20058 rtx ret;
20059
20060 gcc_assert (GET_MODE (dest) == QImode);
20061
20062 ret = ix86_expand_compare (code, op0, op1);
20063 PUT_MODE (ret, QImode);
20064 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20065 }
20066
20067 /* Expand comparison setting or clearing carry flag. Return true when
20068 successful and set pop for the operation. */
20069 static bool
20070 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20071 {
20072 enum machine_mode mode =
20073 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20074
20075 /* Do not handle double-mode compares that go through special path. */
20076 if (mode == (TARGET_64BIT ? TImode : DImode))
20077 return false;
20078
20079 if (SCALAR_FLOAT_MODE_P (mode))
20080 {
20081 rtx compare_op, compare_seq;
20082
20083 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20084
20085 /* Shortcut: following common codes never translate
20086 into carry flag compares. */
20087 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20088 || code == ORDERED || code == UNORDERED)
20089 return false;
20090
20091 /* These comparisons require zero flag; swap operands so they won't. */
20092 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20093 && !TARGET_IEEE_FP)
20094 {
20095 rtx tmp = op0;
20096 op0 = op1;
20097 op1 = tmp;
20098 code = swap_condition (code);
20099 }
20100
20101 /* Try to expand the comparison and verify that we end up with
20102 carry flag based comparison. This fails to be true only when
20103 we decide to expand comparison using arithmetic that is not
20104 too common scenario. */
20105 start_sequence ();
20106 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20107 compare_seq = get_insns ();
20108 end_sequence ();
20109
20110 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20111 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20112 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20113 else
20114 code = GET_CODE (compare_op);
20115
20116 if (code != LTU && code != GEU)
20117 return false;
20118
20119 emit_insn (compare_seq);
20120 *pop = compare_op;
20121 return true;
20122 }
20123
20124 if (!INTEGRAL_MODE_P (mode))
20125 return false;
20126
20127 switch (code)
20128 {
20129 case LTU:
20130 case GEU:
20131 break;
20132
20133 /* Convert a==0 into (unsigned)a<1. */
20134 case EQ:
20135 case NE:
20136 if (op1 != const0_rtx)
20137 return false;
20138 op1 = const1_rtx;
20139 code = (code == EQ ? LTU : GEU);
20140 break;
20141
20142 /* Convert a>b into b<a or a>=b-1. */
20143 case GTU:
20144 case LEU:
20145 if (CONST_INT_P (op1))
20146 {
20147 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20148 /* Bail out on overflow. We still can swap operands but that
20149 would force loading of the constant into register. */
20150 if (op1 == const0_rtx
20151 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20152 return false;
20153 code = (code == GTU ? GEU : LTU);
20154 }
20155 else
20156 {
20157 rtx tmp = op1;
20158 op1 = op0;
20159 op0 = tmp;
20160 code = (code == GTU ? LTU : GEU);
20161 }
20162 break;
20163
20164 /* Convert a>=0 into (unsigned)a<0x80000000. */
20165 case LT:
20166 case GE:
20167 if (mode == DImode || op1 != const0_rtx)
20168 return false;
20169 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20170 code = (code == LT ? GEU : LTU);
20171 break;
20172 case LE:
20173 case GT:
20174 if (mode == DImode || op1 != constm1_rtx)
20175 return false;
20176 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20177 code = (code == LE ? GEU : LTU);
20178 break;
20179
20180 default:
20181 return false;
20182 }
20183 /* Swapping operands may cause constant to appear as first operand. */
20184 if (!nonimmediate_operand (op0, VOIDmode))
20185 {
20186 if (!can_create_pseudo_p ())
20187 return false;
20188 op0 = force_reg (mode, op0);
20189 }
20190 *pop = ix86_expand_compare (code, op0, op1);
20191 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20192 return true;
20193 }
20194
20195 bool
20196 ix86_expand_int_movcc (rtx operands[])
20197 {
20198 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20199 rtx compare_seq, compare_op;
20200 enum machine_mode mode = GET_MODE (operands[0]);
20201 bool sign_bit_compare_p = false;
20202 rtx op0 = XEXP (operands[1], 0);
20203 rtx op1 = XEXP (operands[1], 1);
20204
20205 if (GET_MODE (op0) == TImode
20206 || (GET_MODE (op0) == DImode
20207 && !TARGET_64BIT))
20208 return false;
20209
20210 start_sequence ();
20211 compare_op = ix86_expand_compare (code, op0, op1);
20212 compare_seq = get_insns ();
20213 end_sequence ();
20214
20215 compare_code = GET_CODE (compare_op);
20216
20217 if ((op1 == const0_rtx && (code == GE || code == LT))
20218 || (op1 == constm1_rtx && (code == GT || code == LE)))
20219 sign_bit_compare_p = true;
20220
20221 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20222 HImode insns, we'd be swallowed in word prefix ops. */
20223
20224 if ((mode != HImode || TARGET_FAST_PREFIX)
20225 && (mode != (TARGET_64BIT ? TImode : DImode))
20226 && CONST_INT_P (operands[2])
20227 && CONST_INT_P (operands[3]))
20228 {
20229 rtx out = operands[0];
20230 HOST_WIDE_INT ct = INTVAL (operands[2]);
20231 HOST_WIDE_INT cf = INTVAL (operands[3]);
20232 HOST_WIDE_INT diff;
20233
20234 diff = ct - cf;
20235 /* Sign bit compares are better done using shifts than we do by using
20236 sbb. */
20237 if (sign_bit_compare_p
20238 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20239 {
20240 /* Detect overlap between destination and compare sources. */
20241 rtx tmp = out;
20242
20243 if (!sign_bit_compare_p)
20244 {
20245 rtx flags;
20246 bool fpcmp = false;
20247
20248 compare_code = GET_CODE (compare_op);
20249
20250 flags = XEXP (compare_op, 0);
20251
20252 if (GET_MODE (flags) == CCFPmode
20253 || GET_MODE (flags) == CCFPUmode)
20254 {
20255 fpcmp = true;
20256 compare_code
20257 = ix86_fp_compare_code_to_integer (compare_code);
20258 }
20259
20260 /* To simplify rest of code, restrict to the GEU case. */
20261 if (compare_code == LTU)
20262 {
20263 HOST_WIDE_INT tmp = ct;
20264 ct = cf;
20265 cf = tmp;
20266 compare_code = reverse_condition (compare_code);
20267 code = reverse_condition (code);
20268 }
20269 else
20270 {
20271 if (fpcmp)
20272 PUT_CODE (compare_op,
20273 reverse_condition_maybe_unordered
20274 (GET_CODE (compare_op)));
20275 else
20276 PUT_CODE (compare_op,
20277 reverse_condition (GET_CODE (compare_op)));
20278 }
20279 diff = ct - cf;
20280
20281 if (reg_overlap_mentioned_p (out, op0)
20282 || reg_overlap_mentioned_p (out, op1))
20283 tmp = gen_reg_rtx (mode);
20284
20285 if (mode == DImode)
20286 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20287 else
20288 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20289 flags, compare_op));
20290 }
20291 else
20292 {
20293 if (code == GT || code == GE)
20294 code = reverse_condition (code);
20295 else
20296 {
20297 HOST_WIDE_INT tmp = ct;
20298 ct = cf;
20299 cf = tmp;
20300 diff = ct - cf;
20301 }
20302 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20303 }
20304
20305 if (diff == 1)
20306 {
20307 /*
20308 * cmpl op0,op1
20309 * sbbl dest,dest
20310 * [addl dest, ct]
20311 *
20312 * Size 5 - 8.
20313 */
20314 if (ct)
20315 tmp = expand_simple_binop (mode, PLUS,
20316 tmp, GEN_INT (ct),
20317 copy_rtx (tmp), 1, OPTAB_DIRECT);
20318 }
20319 else if (cf == -1)
20320 {
20321 /*
20322 * cmpl op0,op1
20323 * sbbl dest,dest
20324 * orl $ct, dest
20325 *
20326 * Size 8.
20327 */
20328 tmp = expand_simple_binop (mode, IOR,
20329 tmp, GEN_INT (ct),
20330 copy_rtx (tmp), 1, OPTAB_DIRECT);
20331 }
20332 else if (diff == -1 && ct)
20333 {
20334 /*
20335 * cmpl op0,op1
20336 * sbbl dest,dest
20337 * notl dest
20338 * [addl dest, cf]
20339 *
20340 * Size 8 - 11.
20341 */
20342 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20343 if (cf)
20344 tmp = expand_simple_binop (mode, PLUS,
20345 copy_rtx (tmp), GEN_INT (cf),
20346 copy_rtx (tmp), 1, OPTAB_DIRECT);
20347 }
20348 else
20349 {
20350 /*
20351 * cmpl op0,op1
20352 * sbbl dest,dest
20353 * [notl dest]
20354 * andl cf - ct, dest
20355 * [addl dest, ct]
20356 *
20357 * Size 8 - 11.
20358 */
20359
20360 if (cf == 0)
20361 {
20362 cf = ct;
20363 ct = 0;
20364 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20365 }
20366
20367 tmp = expand_simple_binop (mode, AND,
20368 copy_rtx (tmp),
20369 gen_int_mode (cf - ct, mode),
20370 copy_rtx (tmp), 1, OPTAB_DIRECT);
20371 if (ct)
20372 tmp = expand_simple_binop (mode, PLUS,
20373 copy_rtx (tmp), GEN_INT (ct),
20374 copy_rtx (tmp), 1, OPTAB_DIRECT);
20375 }
20376
20377 if (!rtx_equal_p (tmp, out))
20378 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20379
20380 return true;
20381 }
20382
20383 if (diff < 0)
20384 {
20385 enum machine_mode cmp_mode = GET_MODE (op0);
20386
20387 HOST_WIDE_INT tmp;
20388 tmp = ct, ct = cf, cf = tmp;
20389 diff = -diff;
20390
20391 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20392 {
20393 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20394
20395 /* We may be reversing unordered compare to normal compare, that
20396 is not valid in general (we may convert non-trapping condition
20397 to trapping one), however on i386 we currently emit all
20398 comparisons unordered. */
20399 compare_code = reverse_condition_maybe_unordered (compare_code);
20400 code = reverse_condition_maybe_unordered (code);
20401 }
20402 else
20403 {
20404 compare_code = reverse_condition (compare_code);
20405 code = reverse_condition (code);
20406 }
20407 }
20408
20409 compare_code = UNKNOWN;
20410 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20411 && CONST_INT_P (op1))
20412 {
20413 if (op1 == const0_rtx
20414 && (code == LT || code == GE))
20415 compare_code = code;
20416 else if (op1 == constm1_rtx)
20417 {
20418 if (code == LE)
20419 compare_code = LT;
20420 else if (code == GT)
20421 compare_code = GE;
20422 }
20423 }
20424
20425 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20426 if (compare_code != UNKNOWN
20427 && GET_MODE (op0) == GET_MODE (out)
20428 && (cf == -1 || ct == -1))
20429 {
20430 /* If lea code below could be used, only optimize
20431 if it results in a 2 insn sequence. */
20432
20433 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20434 || diff == 3 || diff == 5 || diff == 9)
20435 || (compare_code == LT && ct == -1)
20436 || (compare_code == GE && cf == -1))
20437 {
20438 /*
20439 * notl op1 (if necessary)
20440 * sarl $31, op1
20441 * orl cf, op1
20442 */
20443 if (ct != -1)
20444 {
20445 cf = ct;
20446 ct = -1;
20447 code = reverse_condition (code);
20448 }
20449
20450 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20451
20452 out = expand_simple_binop (mode, IOR,
20453 out, GEN_INT (cf),
20454 out, 1, OPTAB_DIRECT);
20455 if (out != operands[0])
20456 emit_move_insn (operands[0], out);
20457
20458 return true;
20459 }
20460 }
20461
20462
20463 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20464 || diff == 3 || diff == 5 || diff == 9)
20465 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20466 && (mode != DImode
20467 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20468 {
20469 /*
20470 * xorl dest,dest
20471 * cmpl op1,op2
20472 * setcc dest
20473 * lea cf(dest*(ct-cf)),dest
20474 *
20475 * Size 14.
20476 *
20477 * This also catches the degenerate setcc-only case.
20478 */
20479
20480 rtx tmp;
20481 int nops;
20482
20483 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20484
20485 nops = 0;
20486 /* On x86_64 the lea instruction operates on Pmode, so we need
20487 to get arithmetics done in proper mode to match. */
20488 if (diff == 1)
20489 tmp = copy_rtx (out);
20490 else
20491 {
20492 rtx out1;
20493 out1 = copy_rtx (out);
20494 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20495 nops++;
20496 if (diff & 1)
20497 {
20498 tmp = gen_rtx_PLUS (mode, tmp, out1);
20499 nops++;
20500 }
20501 }
20502 if (cf != 0)
20503 {
20504 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20505 nops++;
20506 }
20507 if (!rtx_equal_p (tmp, out))
20508 {
20509 if (nops == 1)
20510 out = force_operand (tmp, copy_rtx (out));
20511 else
20512 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20513 }
20514 if (!rtx_equal_p (out, operands[0]))
20515 emit_move_insn (operands[0], copy_rtx (out));
20516
20517 return true;
20518 }
20519
20520 /*
20521 * General case: Jumpful:
20522 * xorl dest,dest cmpl op1, op2
20523 * cmpl op1, op2 movl ct, dest
20524 * setcc dest jcc 1f
20525 * decl dest movl cf, dest
20526 * andl (cf-ct),dest 1:
20527 * addl ct,dest
20528 *
20529 * Size 20. Size 14.
20530 *
20531 * This is reasonably steep, but branch mispredict costs are
20532 * high on modern cpus, so consider failing only if optimizing
20533 * for space.
20534 */
20535
20536 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20537 && BRANCH_COST (optimize_insn_for_speed_p (),
20538 false) >= 2)
20539 {
20540 if (cf == 0)
20541 {
20542 enum machine_mode cmp_mode = GET_MODE (op0);
20543
20544 cf = ct;
20545 ct = 0;
20546
20547 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20548 {
20549 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20550
20551 /* We may be reversing unordered compare to normal compare,
20552 that is not valid in general (we may convert non-trapping
20553 condition to trapping one), however on i386 we currently
20554 emit all comparisons unordered. */
20555 code = reverse_condition_maybe_unordered (code);
20556 }
20557 else
20558 {
20559 code = reverse_condition (code);
20560 if (compare_code != UNKNOWN)
20561 compare_code = reverse_condition (compare_code);
20562 }
20563 }
20564
20565 if (compare_code != UNKNOWN)
20566 {
20567 /* notl op1 (if needed)
20568 sarl $31, op1
20569 andl (cf-ct), op1
20570 addl ct, op1
20571
20572 For x < 0 (resp. x <= -1) there will be no notl,
20573 so if possible swap the constants to get rid of the
20574 complement.
20575 True/false will be -1/0 while code below (store flag
20576 followed by decrement) is 0/-1, so the constants need
20577 to be exchanged once more. */
20578
20579 if (compare_code == GE || !cf)
20580 {
20581 code = reverse_condition (code);
20582 compare_code = LT;
20583 }
20584 else
20585 {
20586 HOST_WIDE_INT tmp = cf;
20587 cf = ct;
20588 ct = tmp;
20589 }
20590
20591 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20592 }
20593 else
20594 {
20595 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20596
20597 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20598 constm1_rtx,
20599 copy_rtx (out), 1, OPTAB_DIRECT);
20600 }
20601
20602 out = expand_simple_binop (mode, AND, copy_rtx (out),
20603 gen_int_mode (cf - ct, mode),
20604 copy_rtx (out), 1, OPTAB_DIRECT);
20605 if (ct)
20606 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20607 copy_rtx (out), 1, OPTAB_DIRECT);
20608 if (!rtx_equal_p (out, operands[0]))
20609 emit_move_insn (operands[0], copy_rtx (out));
20610
20611 return true;
20612 }
20613 }
20614
20615 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20616 {
20617 /* Try a few things more with specific constants and a variable. */
20618
20619 optab op;
20620 rtx var, orig_out, out, tmp;
20621
20622 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20623 return false;
20624
20625 /* If one of the two operands is an interesting constant, load a
20626 constant with the above and mask it in with a logical operation. */
20627
20628 if (CONST_INT_P (operands[2]))
20629 {
20630 var = operands[3];
20631 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20632 operands[3] = constm1_rtx, op = and_optab;
20633 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20634 operands[3] = const0_rtx, op = ior_optab;
20635 else
20636 return false;
20637 }
20638 else if (CONST_INT_P (operands[3]))
20639 {
20640 var = operands[2];
20641 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20642 operands[2] = constm1_rtx, op = and_optab;
20643 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20644 operands[2] = const0_rtx, op = ior_optab;
20645 else
20646 return false;
20647 }
20648 else
20649 return false;
20650
20651 orig_out = operands[0];
20652 tmp = gen_reg_rtx (mode);
20653 operands[0] = tmp;
20654
20655 /* Recurse to get the constant loaded. */
20656 if (ix86_expand_int_movcc (operands) == 0)
20657 return false;
20658
20659 /* Mask in the interesting variable. */
20660 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20661 OPTAB_WIDEN);
20662 if (!rtx_equal_p (out, orig_out))
20663 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20664
20665 return true;
20666 }
20667
20668 /*
20669 * For comparison with above,
20670 *
20671 * movl cf,dest
20672 * movl ct,tmp
20673 * cmpl op1,op2
20674 * cmovcc tmp,dest
20675 *
20676 * Size 15.
20677 */
20678
20679 if (! nonimmediate_operand (operands[2], mode))
20680 operands[2] = force_reg (mode, operands[2]);
20681 if (! nonimmediate_operand (operands[3], mode))
20682 operands[3] = force_reg (mode, operands[3]);
20683
20684 if (! register_operand (operands[2], VOIDmode)
20685 && (mode == QImode
20686 || ! register_operand (operands[3], VOIDmode)))
20687 operands[2] = force_reg (mode, operands[2]);
20688
20689 if (mode == QImode
20690 && ! register_operand (operands[3], VOIDmode))
20691 operands[3] = force_reg (mode, operands[3]);
20692
20693 emit_insn (compare_seq);
20694 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20695 gen_rtx_IF_THEN_ELSE (mode,
20696 compare_op, operands[2],
20697 operands[3])));
20698 return true;
20699 }
20700
20701 /* Swap, force into registers, or otherwise massage the two operands
20702 to an sse comparison with a mask result. Thus we differ a bit from
20703 ix86_prepare_fp_compare_args which expects to produce a flags result.
20704
20705 The DEST operand exists to help determine whether to commute commutative
20706 operators. The POP0/POP1 operands are updated in place. The new
20707 comparison code is returned, or UNKNOWN if not implementable. */
20708
20709 static enum rtx_code
20710 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20711 rtx *pop0, rtx *pop1)
20712 {
20713 rtx tmp;
20714
20715 switch (code)
20716 {
20717 case LTGT:
20718 case UNEQ:
20719 /* AVX supports all the needed comparisons. */
20720 if (TARGET_AVX)
20721 break;
20722 /* We have no LTGT as an operator. We could implement it with
20723 NE & ORDERED, but this requires an extra temporary. It's
20724 not clear that it's worth it. */
20725 return UNKNOWN;
20726
20727 case LT:
20728 case LE:
20729 case UNGT:
20730 case UNGE:
20731 /* These are supported directly. */
20732 break;
20733
20734 case EQ:
20735 case NE:
20736 case UNORDERED:
20737 case ORDERED:
20738 /* AVX has 3 operand comparisons, no need to swap anything. */
20739 if (TARGET_AVX)
20740 break;
20741 /* For commutative operators, try to canonicalize the destination
20742 operand to be first in the comparison - this helps reload to
20743 avoid extra moves. */
20744 if (!dest || !rtx_equal_p (dest, *pop1))
20745 break;
20746 /* FALLTHRU */
20747
20748 case GE:
20749 case GT:
20750 case UNLE:
20751 case UNLT:
20752 /* These are not supported directly before AVX, and furthermore
20753 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20754 comparison operands to transform into something that is
20755 supported. */
20756 tmp = *pop0;
20757 *pop0 = *pop1;
20758 *pop1 = tmp;
20759 code = swap_condition (code);
20760 break;
20761
20762 default:
20763 gcc_unreachable ();
20764 }
20765
20766 return code;
20767 }
20768
20769 /* Detect conditional moves that exactly match min/max operational
20770 semantics. Note that this is IEEE safe, as long as we don't
20771 interchange the operands.
20772
20773 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20774 and TRUE if the operation is successful and instructions are emitted. */
20775
20776 static bool
20777 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20778 rtx cmp_op1, rtx if_true, rtx if_false)
20779 {
20780 enum machine_mode mode;
20781 bool is_min;
20782 rtx tmp;
20783
20784 if (code == LT)
20785 ;
20786 else if (code == UNGE)
20787 {
20788 tmp = if_true;
20789 if_true = if_false;
20790 if_false = tmp;
20791 }
20792 else
20793 return false;
20794
20795 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20796 is_min = true;
20797 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20798 is_min = false;
20799 else
20800 return false;
20801
20802 mode = GET_MODE (dest);
20803
20804 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20805 but MODE may be a vector mode and thus not appropriate. */
20806 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20807 {
20808 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20809 rtvec v;
20810
20811 if_true = force_reg (mode, if_true);
20812 v = gen_rtvec (2, if_true, if_false);
20813 tmp = gen_rtx_UNSPEC (mode, v, u);
20814 }
20815 else
20816 {
20817 code = is_min ? SMIN : SMAX;
20818 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20819 }
20820
20821 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20822 return true;
20823 }
20824
20825 /* Expand an sse vector comparison. Return the register with the result. */
20826
20827 static rtx
20828 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20829 rtx op_true, rtx op_false)
20830 {
20831 enum machine_mode mode = GET_MODE (dest);
20832 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20833
20834 /* In general case result of comparison can differ from operands' type. */
20835 enum machine_mode cmp_mode;
20836
20837 /* In AVX512F the result of comparison is an integer mask. */
20838 bool maskcmp = false;
20839 rtx x;
20840
20841 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20842 {
20843 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20844 gcc_assert (cmp_mode != BLKmode);
20845
20846 maskcmp = true;
20847 }
20848 else
20849 cmp_mode = cmp_ops_mode;
20850
20851
20852 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20853 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20854 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20855
20856 if (optimize
20857 || reg_overlap_mentioned_p (dest, op_true)
20858 || reg_overlap_mentioned_p (dest, op_false))
20859 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20860
20861 /* Compare patterns for int modes are unspec in AVX512F only. */
20862 if (maskcmp && (code == GT || code == EQ))
20863 {
20864 rtx (*gen)(rtx, rtx, rtx);
20865
20866 switch (cmp_ops_mode)
20867 {
20868 case V16SImode:
20869 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20870 break;
20871 case V8DImode:
20872 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20873 break;
20874 default:
20875 gen = NULL;
20876 }
20877
20878 if (gen)
20879 {
20880 emit_insn (gen (dest, cmp_op0, cmp_op1));
20881 return dest;
20882 }
20883 }
20884 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20885
20886 if (cmp_mode != mode && !maskcmp)
20887 {
20888 x = force_reg (cmp_ops_mode, x);
20889 convert_move (dest, x, false);
20890 }
20891 else
20892 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20893
20894 return dest;
20895 }
20896
20897 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20898 operations. This is used for both scalar and vector conditional moves. */
20899
20900 static void
20901 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20902 {
20903 enum machine_mode mode = GET_MODE (dest);
20904 enum machine_mode cmpmode = GET_MODE (cmp);
20905
20906 /* In AVX512F the result of comparison is an integer mask. */
20907 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20908
20909 rtx t2, t3, x;
20910
20911 if (vector_all_ones_operand (op_true, mode)
20912 && rtx_equal_p (op_false, CONST0_RTX (mode))
20913 && !maskcmp)
20914 {
20915 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20916 }
20917 else if (op_false == CONST0_RTX (mode)
20918 && !maskcmp)
20919 {
20920 op_true = force_reg (mode, op_true);
20921 x = gen_rtx_AND (mode, cmp, op_true);
20922 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20923 }
20924 else if (op_true == CONST0_RTX (mode)
20925 && !maskcmp)
20926 {
20927 op_false = force_reg (mode, op_false);
20928 x = gen_rtx_NOT (mode, cmp);
20929 x = gen_rtx_AND (mode, x, op_false);
20930 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20931 }
20932 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20933 && !maskcmp)
20934 {
20935 op_false = force_reg (mode, op_false);
20936 x = gen_rtx_IOR (mode, cmp, op_false);
20937 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20938 }
20939 else if (TARGET_XOP
20940 && !maskcmp)
20941 {
20942 op_true = force_reg (mode, op_true);
20943
20944 if (!nonimmediate_operand (op_false, mode))
20945 op_false = force_reg (mode, op_false);
20946
20947 emit_insn (gen_rtx_SET (mode, dest,
20948 gen_rtx_IF_THEN_ELSE (mode, cmp,
20949 op_true,
20950 op_false)));
20951 }
20952 else
20953 {
20954 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20955 rtx d = dest;
20956
20957 if (!nonimmediate_operand (op_true, mode))
20958 op_true = force_reg (mode, op_true);
20959
20960 op_false = force_reg (mode, op_false);
20961
20962 switch (mode)
20963 {
20964 case V4SFmode:
20965 if (TARGET_SSE4_1)
20966 gen = gen_sse4_1_blendvps;
20967 break;
20968 case V2DFmode:
20969 if (TARGET_SSE4_1)
20970 gen = gen_sse4_1_blendvpd;
20971 break;
20972 case V16QImode:
20973 case V8HImode:
20974 case V4SImode:
20975 case V2DImode:
20976 if (TARGET_SSE4_1)
20977 {
20978 gen = gen_sse4_1_pblendvb;
20979 if (mode != V16QImode)
20980 d = gen_reg_rtx (V16QImode);
20981 op_false = gen_lowpart (V16QImode, op_false);
20982 op_true = gen_lowpart (V16QImode, op_true);
20983 cmp = gen_lowpart (V16QImode, cmp);
20984 }
20985 break;
20986 case V8SFmode:
20987 if (TARGET_AVX)
20988 gen = gen_avx_blendvps256;
20989 break;
20990 case V4DFmode:
20991 if (TARGET_AVX)
20992 gen = gen_avx_blendvpd256;
20993 break;
20994 case V32QImode:
20995 case V16HImode:
20996 case V8SImode:
20997 case V4DImode:
20998 if (TARGET_AVX2)
20999 {
21000 gen = gen_avx2_pblendvb;
21001 if (mode != V32QImode)
21002 d = gen_reg_rtx (V32QImode);
21003 op_false = gen_lowpart (V32QImode, op_false);
21004 op_true = gen_lowpart (V32QImode, op_true);
21005 cmp = gen_lowpart (V32QImode, cmp);
21006 }
21007 break;
21008
21009 case V16SImode:
21010 gen = gen_avx512f_blendmv16si;
21011 break;
21012 case V8DImode:
21013 gen = gen_avx512f_blendmv8di;
21014 break;
21015 case V8DFmode:
21016 gen = gen_avx512f_blendmv8df;
21017 break;
21018 case V16SFmode:
21019 gen = gen_avx512f_blendmv16sf;
21020 break;
21021
21022 default:
21023 break;
21024 }
21025
21026 if (gen != NULL)
21027 {
21028 emit_insn (gen (d, op_false, op_true, cmp));
21029 if (d != dest)
21030 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21031 }
21032 else
21033 {
21034 op_true = force_reg (mode, op_true);
21035
21036 t2 = gen_reg_rtx (mode);
21037 if (optimize)
21038 t3 = gen_reg_rtx (mode);
21039 else
21040 t3 = dest;
21041
21042 x = gen_rtx_AND (mode, op_true, cmp);
21043 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21044
21045 x = gen_rtx_NOT (mode, cmp);
21046 x = gen_rtx_AND (mode, x, op_false);
21047 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21048
21049 x = gen_rtx_IOR (mode, t3, t2);
21050 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21051 }
21052 }
21053 }
21054
21055 /* Expand a floating-point conditional move. Return true if successful. */
21056
21057 bool
21058 ix86_expand_fp_movcc (rtx operands[])
21059 {
21060 enum machine_mode mode = GET_MODE (operands[0]);
21061 enum rtx_code code = GET_CODE (operands[1]);
21062 rtx tmp, compare_op;
21063 rtx op0 = XEXP (operands[1], 0);
21064 rtx op1 = XEXP (operands[1], 1);
21065
21066 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21067 {
21068 enum machine_mode cmode;
21069
21070 /* Since we've no cmove for sse registers, don't force bad register
21071 allocation just to gain access to it. Deny movcc when the
21072 comparison mode doesn't match the move mode. */
21073 cmode = GET_MODE (op0);
21074 if (cmode == VOIDmode)
21075 cmode = GET_MODE (op1);
21076 if (cmode != mode)
21077 return false;
21078
21079 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21080 if (code == UNKNOWN)
21081 return false;
21082
21083 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21084 operands[2], operands[3]))
21085 return true;
21086
21087 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21088 operands[2], operands[3]);
21089 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21090 return true;
21091 }
21092
21093 if (GET_MODE (op0) == TImode
21094 || (GET_MODE (op0) == DImode
21095 && !TARGET_64BIT))
21096 return false;
21097
21098 /* The floating point conditional move instructions don't directly
21099 support conditions resulting from a signed integer comparison. */
21100
21101 compare_op = ix86_expand_compare (code, op0, op1);
21102 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21103 {
21104 tmp = gen_reg_rtx (QImode);
21105 ix86_expand_setcc (tmp, code, op0, op1);
21106
21107 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21108 }
21109
21110 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21111 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21112 operands[2], operands[3])));
21113
21114 return true;
21115 }
21116
21117 /* Expand a floating-point vector conditional move; a vcond operation
21118 rather than a movcc operation. */
21119
21120 bool
21121 ix86_expand_fp_vcond (rtx operands[])
21122 {
21123 enum rtx_code code = GET_CODE (operands[3]);
21124 rtx cmp;
21125
21126 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21127 &operands[4], &operands[5]);
21128 if (code == UNKNOWN)
21129 {
21130 rtx temp;
21131 switch (GET_CODE (operands[3]))
21132 {
21133 case LTGT:
21134 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21135 operands[5], operands[0], operands[0]);
21136 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21137 operands[5], operands[1], operands[2]);
21138 code = AND;
21139 break;
21140 case UNEQ:
21141 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21142 operands[5], operands[0], operands[0]);
21143 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21144 operands[5], operands[1], operands[2]);
21145 code = IOR;
21146 break;
21147 default:
21148 gcc_unreachable ();
21149 }
21150 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21151 OPTAB_DIRECT);
21152 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21153 return true;
21154 }
21155
21156 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21157 operands[5], operands[1], operands[2]))
21158 return true;
21159
21160 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21161 operands[1], operands[2]);
21162 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21163 return true;
21164 }
21165
21166 /* Expand a signed/unsigned integral vector conditional move. */
21167
21168 bool
21169 ix86_expand_int_vcond (rtx operands[])
21170 {
21171 enum machine_mode data_mode = GET_MODE (operands[0]);
21172 enum machine_mode mode = GET_MODE (operands[4]);
21173 enum rtx_code code = GET_CODE (operands[3]);
21174 bool negate = false;
21175 rtx x, cop0, cop1;
21176
21177 cop0 = operands[4];
21178 cop1 = operands[5];
21179
21180 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21181 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21182 if ((code == LT || code == GE)
21183 && data_mode == mode
21184 && cop1 == CONST0_RTX (mode)
21185 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21186 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21187 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21188 && (GET_MODE_SIZE (data_mode) == 16
21189 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21190 {
21191 rtx negop = operands[2 - (code == LT)];
21192 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21193 if (negop == CONST1_RTX (data_mode))
21194 {
21195 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21196 operands[0], 1, OPTAB_DIRECT);
21197 if (res != operands[0])
21198 emit_move_insn (operands[0], res);
21199 return true;
21200 }
21201 else if (GET_MODE_INNER (data_mode) != DImode
21202 && vector_all_ones_operand (negop, data_mode))
21203 {
21204 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21205 operands[0], 0, OPTAB_DIRECT);
21206 if (res != operands[0])
21207 emit_move_insn (operands[0], res);
21208 return true;
21209 }
21210 }
21211
21212 if (!nonimmediate_operand (cop1, mode))
21213 cop1 = force_reg (mode, cop1);
21214 if (!general_operand (operands[1], data_mode))
21215 operands[1] = force_reg (data_mode, operands[1]);
21216 if (!general_operand (operands[2], data_mode))
21217 operands[2] = force_reg (data_mode, operands[2]);
21218
21219 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21220 if (TARGET_XOP
21221 && (mode == V16QImode || mode == V8HImode
21222 || mode == V4SImode || mode == V2DImode))
21223 ;
21224 else
21225 {
21226 /* Canonicalize the comparison to EQ, GT, GTU. */
21227 switch (code)
21228 {
21229 case EQ:
21230 case GT:
21231 case GTU:
21232 break;
21233
21234 case NE:
21235 case LE:
21236 case LEU:
21237 code = reverse_condition (code);
21238 negate = true;
21239 break;
21240
21241 case GE:
21242 case GEU:
21243 code = reverse_condition (code);
21244 negate = true;
21245 /* FALLTHRU */
21246
21247 case LT:
21248 case LTU:
21249 code = swap_condition (code);
21250 x = cop0, cop0 = cop1, cop1 = x;
21251 break;
21252
21253 default:
21254 gcc_unreachable ();
21255 }
21256
21257 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21258 if (mode == V2DImode)
21259 {
21260 switch (code)
21261 {
21262 case EQ:
21263 /* SSE4.1 supports EQ. */
21264 if (!TARGET_SSE4_1)
21265 return false;
21266 break;
21267
21268 case GT:
21269 case GTU:
21270 /* SSE4.2 supports GT/GTU. */
21271 if (!TARGET_SSE4_2)
21272 return false;
21273 break;
21274
21275 default:
21276 gcc_unreachable ();
21277 }
21278 }
21279
21280 /* Unsigned parallel compare is not supported by the hardware.
21281 Play some tricks to turn this into a signed comparison
21282 against 0. */
21283 if (code == GTU)
21284 {
21285 cop0 = force_reg (mode, cop0);
21286
21287 switch (mode)
21288 {
21289 case V16SImode:
21290 case V8DImode:
21291 case V8SImode:
21292 case V4DImode:
21293 case V4SImode:
21294 case V2DImode:
21295 {
21296 rtx t1, t2, mask;
21297 rtx (*gen_sub3) (rtx, rtx, rtx);
21298
21299 switch (mode)
21300 {
21301 case V16SImode: gen_sub3 = gen_subv16si3; break;
21302 case V8DImode: gen_sub3 = gen_subv8di3; break;
21303 case V8SImode: gen_sub3 = gen_subv8si3; break;
21304 case V4DImode: gen_sub3 = gen_subv4di3; break;
21305 case V4SImode: gen_sub3 = gen_subv4si3; break;
21306 case V2DImode: gen_sub3 = gen_subv2di3; break;
21307 default:
21308 gcc_unreachable ();
21309 }
21310 /* Subtract (-(INT MAX) - 1) from both operands to make
21311 them signed. */
21312 mask = ix86_build_signbit_mask (mode, true, false);
21313 t1 = gen_reg_rtx (mode);
21314 emit_insn (gen_sub3 (t1, cop0, mask));
21315
21316 t2 = gen_reg_rtx (mode);
21317 emit_insn (gen_sub3 (t2, cop1, mask));
21318
21319 cop0 = t1;
21320 cop1 = t2;
21321 code = GT;
21322 }
21323 break;
21324
21325 case V32QImode:
21326 case V16HImode:
21327 case V16QImode:
21328 case V8HImode:
21329 /* Perform a parallel unsigned saturating subtraction. */
21330 x = gen_reg_rtx (mode);
21331 emit_insn (gen_rtx_SET (VOIDmode, x,
21332 gen_rtx_US_MINUS (mode, cop0, cop1)));
21333
21334 cop0 = x;
21335 cop1 = CONST0_RTX (mode);
21336 code = EQ;
21337 negate = !negate;
21338 break;
21339
21340 default:
21341 gcc_unreachable ();
21342 }
21343 }
21344 }
21345
21346 /* Allow the comparison to be done in one mode, but the movcc to
21347 happen in another mode. */
21348 if (data_mode == mode)
21349 {
21350 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21351 operands[1+negate], operands[2-negate]);
21352 }
21353 else
21354 {
21355 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21356 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21357 operands[1+negate], operands[2-negate]);
21358 if (GET_MODE (x) == mode)
21359 x = gen_lowpart (data_mode, x);
21360 }
21361
21362 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21363 operands[2-negate]);
21364 return true;
21365 }
21366
21367 static bool
21368 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21369 {
21370 enum machine_mode mode = GET_MODE (op0);
21371 switch (mode)
21372 {
21373 case V16SImode:
21374 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21375 force_reg (V16SImode, mask),
21376 op1));
21377 return true;
21378 case V16SFmode:
21379 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21380 force_reg (V16SImode, mask),
21381 op1));
21382 return true;
21383 case V8DImode:
21384 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21385 force_reg (V8DImode, mask), op1));
21386 return true;
21387 case V8DFmode:
21388 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21389 force_reg (V8DImode, mask), op1));
21390 return true;
21391 default:
21392 return false;
21393 }
21394 }
21395
21396 /* Expand a variable vector permutation. */
21397
21398 void
21399 ix86_expand_vec_perm (rtx operands[])
21400 {
21401 rtx target = operands[0];
21402 rtx op0 = operands[1];
21403 rtx op1 = operands[2];
21404 rtx mask = operands[3];
21405 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21406 enum machine_mode mode = GET_MODE (op0);
21407 enum machine_mode maskmode = GET_MODE (mask);
21408 int w, e, i;
21409 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21410
21411 /* Number of elements in the vector. */
21412 w = GET_MODE_NUNITS (mode);
21413 e = GET_MODE_UNIT_SIZE (mode);
21414 gcc_assert (w <= 64);
21415
21416 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21417 return;
21418
21419 if (TARGET_AVX2)
21420 {
21421 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21422 {
21423 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21424 an constant shuffle operand. With a tiny bit of effort we can
21425 use VPERMD instead. A re-interpretation stall for V4DFmode is
21426 unfortunate but there's no avoiding it.
21427 Similarly for V16HImode we don't have instructions for variable
21428 shuffling, while for V32QImode we can use after preparing suitable
21429 masks vpshufb; vpshufb; vpermq; vpor. */
21430
21431 if (mode == V16HImode)
21432 {
21433 maskmode = mode = V32QImode;
21434 w = 32;
21435 e = 1;
21436 }
21437 else
21438 {
21439 maskmode = mode = V8SImode;
21440 w = 8;
21441 e = 4;
21442 }
21443 t1 = gen_reg_rtx (maskmode);
21444
21445 /* Replicate the low bits of the V4DImode mask into V8SImode:
21446 mask = { A B C D }
21447 t1 = { A A B B C C D D }. */
21448 for (i = 0; i < w / 2; ++i)
21449 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21450 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21451 vt = force_reg (maskmode, vt);
21452 mask = gen_lowpart (maskmode, mask);
21453 if (maskmode == V8SImode)
21454 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21455 else
21456 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21457
21458 /* Multiply the shuffle indicies by two. */
21459 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21460 OPTAB_DIRECT);
21461
21462 /* Add one to the odd shuffle indicies:
21463 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21464 for (i = 0; i < w / 2; ++i)
21465 {
21466 vec[i * 2] = const0_rtx;
21467 vec[i * 2 + 1] = const1_rtx;
21468 }
21469 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21470 vt = validize_mem (force_const_mem (maskmode, vt));
21471 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21472 OPTAB_DIRECT);
21473
21474 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21475 operands[3] = mask = t1;
21476 target = gen_reg_rtx (mode);
21477 op0 = gen_lowpart (mode, op0);
21478 op1 = gen_lowpart (mode, op1);
21479 }
21480
21481 switch (mode)
21482 {
21483 case V8SImode:
21484 /* The VPERMD and VPERMPS instructions already properly ignore
21485 the high bits of the shuffle elements. No need for us to
21486 perform an AND ourselves. */
21487 if (one_operand_shuffle)
21488 {
21489 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21490 if (target != operands[0])
21491 emit_move_insn (operands[0],
21492 gen_lowpart (GET_MODE (operands[0]), target));
21493 }
21494 else
21495 {
21496 t1 = gen_reg_rtx (V8SImode);
21497 t2 = gen_reg_rtx (V8SImode);
21498 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21499 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21500 goto merge_two;
21501 }
21502 return;
21503
21504 case V8SFmode:
21505 mask = gen_lowpart (V8SImode, mask);
21506 if (one_operand_shuffle)
21507 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21508 else
21509 {
21510 t1 = gen_reg_rtx (V8SFmode);
21511 t2 = gen_reg_rtx (V8SFmode);
21512 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21513 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21514 goto merge_two;
21515 }
21516 return;
21517
21518 case V4SImode:
21519 /* By combining the two 128-bit input vectors into one 256-bit
21520 input vector, we can use VPERMD and VPERMPS for the full
21521 two-operand shuffle. */
21522 t1 = gen_reg_rtx (V8SImode);
21523 t2 = gen_reg_rtx (V8SImode);
21524 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21525 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21526 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21527 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21528 return;
21529
21530 case V4SFmode:
21531 t1 = gen_reg_rtx (V8SFmode);
21532 t2 = gen_reg_rtx (V8SImode);
21533 mask = gen_lowpart (V4SImode, mask);
21534 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21535 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21536 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21537 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21538 return;
21539
21540 case V32QImode:
21541 t1 = gen_reg_rtx (V32QImode);
21542 t2 = gen_reg_rtx (V32QImode);
21543 t3 = gen_reg_rtx (V32QImode);
21544 vt2 = GEN_INT (128);
21545 for (i = 0; i < 32; i++)
21546 vec[i] = vt2;
21547 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21548 vt = force_reg (V32QImode, vt);
21549 for (i = 0; i < 32; i++)
21550 vec[i] = i < 16 ? vt2 : const0_rtx;
21551 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21552 vt2 = force_reg (V32QImode, vt2);
21553 /* From mask create two adjusted masks, which contain the same
21554 bits as mask in the low 7 bits of each vector element.
21555 The first mask will have the most significant bit clear
21556 if it requests element from the same 128-bit lane
21557 and MSB set if it requests element from the other 128-bit lane.
21558 The second mask will have the opposite values of the MSB,
21559 and additionally will have its 128-bit lanes swapped.
21560 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21561 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21562 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21563 stands for other 12 bytes. */
21564 /* The bit whether element is from the same lane or the other
21565 lane is bit 4, so shift it up by 3 to the MSB position. */
21566 t5 = gen_reg_rtx (V4DImode);
21567 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21568 GEN_INT (3)));
21569 /* Clear MSB bits from the mask just in case it had them set. */
21570 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21571 /* After this t1 will have MSB set for elements from other lane. */
21572 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21573 /* Clear bits other than MSB. */
21574 emit_insn (gen_andv32qi3 (t1, t1, vt));
21575 /* Or in the lower bits from mask into t3. */
21576 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21577 /* And invert MSB bits in t1, so MSB is set for elements from the same
21578 lane. */
21579 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21580 /* Swap 128-bit lanes in t3. */
21581 t6 = gen_reg_rtx (V4DImode);
21582 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21583 const2_rtx, GEN_INT (3),
21584 const0_rtx, const1_rtx));
21585 /* And or in the lower bits from mask into t1. */
21586 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21587 if (one_operand_shuffle)
21588 {
21589 /* Each of these shuffles will put 0s in places where
21590 element from the other 128-bit lane is needed, otherwise
21591 will shuffle in the requested value. */
21592 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21593 gen_lowpart (V32QImode, t6)));
21594 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21595 /* For t3 the 128-bit lanes are swapped again. */
21596 t7 = gen_reg_rtx (V4DImode);
21597 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21598 const2_rtx, GEN_INT (3),
21599 const0_rtx, const1_rtx));
21600 /* And oring both together leads to the result. */
21601 emit_insn (gen_iorv32qi3 (target, t1,
21602 gen_lowpart (V32QImode, t7)));
21603 if (target != operands[0])
21604 emit_move_insn (operands[0],
21605 gen_lowpart (GET_MODE (operands[0]), target));
21606 return;
21607 }
21608
21609 t4 = gen_reg_rtx (V32QImode);
21610 /* Similarly to the above one_operand_shuffle code,
21611 just for repeated twice for each operand. merge_two:
21612 code will merge the two results together. */
21613 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21614 gen_lowpart (V32QImode, t6)));
21615 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21616 gen_lowpart (V32QImode, t6)));
21617 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21618 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21619 t7 = gen_reg_rtx (V4DImode);
21620 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21621 const2_rtx, GEN_INT (3),
21622 const0_rtx, const1_rtx));
21623 t8 = gen_reg_rtx (V4DImode);
21624 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21625 const2_rtx, GEN_INT (3),
21626 const0_rtx, const1_rtx));
21627 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21628 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21629 t1 = t4;
21630 t2 = t3;
21631 goto merge_two;
21632
21633 default:
21634 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21635 break;
21636 }
21637 }
21638
21639 if (TARGET_XOP)
21640 {
21641 /* The XOP VPPERM insn supports three inputs. By ignoring the
21642 one_operand_shuffle special case, we avoid creating another
21643 set of constant vectors in memory. */
21644 one_operand_shuffle = false;
21645
21646 /* mask = mask & {2*w-1, ...} */
21647 vt = GEN_INT (2*w - 1);
21648 }
21649 else
21650 {
21651 /* mask = mask & {w-1, ...} */
21652 vt = GEN_INT (w - 1);
21653 }
21654
21655 for (i = 0; i < w; i++)
21656 vec[i] = vt;
21657 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21658 mask = expand_simple_binop (maskmode, AND, mask, vt,
21659 NULL_RTX, 0, OPTAB_DIRECT);
21660
21661 /* For non-QImode operations, convert the word permutation control
21662 into a byte permutation control. */
21663 if (mode != V16QImode)
21664 {
21665 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21666 GEN_INT (exact_log2 (e)),
21667 NULL_RTX, 0, OPTAB_DIRECT);
21668
21669 /* Convert mask to vector of chars. */
21670 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21671
21672 /* Replicate each of the input bytes into byte positions:
21673 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21674 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21675 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21676 for (i = 0; i < 16; ++i)
21677 vec[i] = GEN_INT (i/e * e);
21678 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21679 vt = validize_mem (force_const_mem (V16QImode, vt));
21680 if (TARGET_XOP)
21681 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21682 else
21683 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21684
21685 /* Convert it into the byte positions by doing
21686 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21687 for (i = 0; i < 16; ++i)
21688 vec[i] = GEN_INT (i % e);
21689 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21690 vt = validize_mem (force_const_mem (V16QImode, vt));
21691 emit_insn (gen_addv16qi3 (mask, mask, vt));
21692 }
21693
21694 /* The actual shuffle operations all operate on V16QImode. */
21695 op0 = gen_lowpart (V16QImode, op0);
21696 op1 = gen_lowpart (V16QImode, op1);
21697
21698 if (TARGET_XOP)
21699 {
21700 if (GET_MODE (target) != V16QImode)
21701 target = gen_reg_rtx (V16QImode);
21702 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21703 if (target != operands[0])
21704 emit_move_insn (operands[0],
21705 gen_lowpart (GET_MODE (operands[0]), target));
21706 }
21707 else if (one_operand_shuffle)
21708 {
21709 if (GET_MODE (target) != V16QImode)
21710 target = gen_reg_rtx (V16QImode);
21711 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21712 if (target != operands[0])
21713 emit_move_insn (operands[0],
21714 gen_lowpart (GET_MODE (operands[0]), target));
21715 }
21716 else
21717 {
21718 rtx xops[6];
21719 bool ok;
21720
21721 /* Shuffle the two input vectors independently. */
21722 t1 = gen_reg_rtx (V16QImode);
21723 t2 = gen_reg_rtx (V16QImode);
21724 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21725 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21726
21727 merge_two:
21728 /* Then merge them together. The key is whether any given control
21729 element contained a bit set that indicates the second word. */
21730 mask = operands[3];
21731 vt = GEN_INT (w);
21732 if (maskmode == V2DImode && !TARGET_SSE4_1)
21733 {
21734 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21735 more shuffle to convert the V2DI input mask into a V4SI
21736 input mask. At which point the masking that expand_int_vcond
21737 will work as desired. */
21738 rtx t3 = gen_reg_rtx (V4SImode);
21739 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21740 const0_rtx, const0_rtx,
21741 const2_rtx, const2_rtx));
21742 mask = t3;
21743 maskmode = V4SImode;
21744 e = w = 4;
21745 }
21746
21747 for (i = 0; i < w; i++)
21748 vec[i] = vt;
21749 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21750 vt = force_reg (maskmode, vt);
21751 mask = expand_simple_binop (maskmode, AND, mask, vt,
21752 NULL_RTX, 0, OPTAB_DIRECT);
21753
21754 if (GET_MODE (target) != mode)
21755 target = gen_reg_rtx (mode);
21756 xops[0] = target;
21757 xops[1] = gen_lowpart (mode, t2);
21758 xops[2] = gen_lowpart (mode, t1);
21759 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21760 xops[4] = mask;
21761 xops[5] = vt;
21762 ok = ix86_expand_int_vcond (xops);
21763 gcc_assert (ok);
21764 if (target != operands[0])
21765 emit_move_insn (operands[0],
21766 gen_lowpart (GET_MODE (operands[0]), target));
21767 }
21768 }
21769
21770 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21771 true if we should do zero extension, else sign extension. HIGH_P is
21772 true if we want the N/2 high elements, else the low elements. */
21773
21774 void
21775 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21776 {
21777 enum machine_mode imode = GET_MODE (src);
21778 rtx tmp;
21779
21780 if (TARGET_SSE4_1)
21781 {
21782 rtx (*unpack)(rtx, rtx);
21783 rtx (*extract)(rtx, rtx) = NULL;
21784 enum machine_mode halfmode = BLKmode;
21785
21786 switch (imode)
21787 {
21788 case V32QImode:
21789 if (unsigned_p)
21790 unpack = gen_avx2_zero_extendv16qiv16hi2;
21791 else
21792 unpack = gen_avx2_sign_extendv16qiv16hi2;
21793 halfmode = V16QImode;
21794 extract
21795 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21796 break;
21797 case V32HImode:
21798 if (unsigned_p)
21799 unpack = gen_avx512f_zero_extendv16hiv16si2;
21800 else
21801 unpack = gen_avx512f_sign_extendv16hiv16si2;
21802 halfmode = V16HImode;
21803 extract
21804 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21805 break;
21806 case V16HImode:
21807 if (unsigned_p)
21808 unpack = gen_avx2_zero_extendv8hiv8si2;
21809 else
21810 unpack = gen_avx2_sign_extendv8hiv8si2;
21811 halfmode = V8HImode;
21812 extract
21813 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21814 break;
21815 case V16SImode:
21816 if (unsigned_p)
21817 unpack = gen_avx512f_zero_extendv8siv8di2;
21818 else
21819 unpack = gen_avx512f_sign_extendv8siv8di2;
21820 halfmode = V8SImode;
21821 extract
21822 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21823 break;
21824 case V8SImode:
21825 if (unsigned_p)
21826 unpack = gen_avx2_zero_extendv4siv4di2;
21827 else
21828 unpack = gen_avx2_sign_extendv4siv4di2;
21829 halfmode = V4SImode;
21830 extract
21831 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21832 break;
21833 case V16QImode:
21834 if (unsigned_p)
21835 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21836 else
21837 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21838 break;
21839 case V8HImode:
21840 if (unsigned_p)
21841 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21842 else
21843 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21844 break;
21845 case V4SImode:
21846 if (unsigned_p)
21847 unpack = gen_sse4_1_zero_extendv2siv2di2;
21848 else
21849 unpack = gen_sse4_1_sign_extendv2siv2di2;
21850 break;
21851 default:
21852 gcc_unreachable ();
21853 }
21854
21855 if (GET_MODE_SIZE (imode) >= 32)
21856 {
21857 tmp = gen_reg_rtx (halfmode);
21858 emit_insn (extract (tmp, src));
21859 }
21860 else if (high_p)
21861 {
21862 /* Shift higher 8 bytes to lower 8 bytes. */
21863 tmp = gen_reg_rtx (V1TImode);
21864 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21865 GEN_INT (64)));
21866 tmp = gen_lowpart (imode, tmp);
21867 }
21868 else
21869 tmp = src;
21870
21871 emit_insn (unpack (dest, tmp));
21872 }
21873 else
21874 {
21875 rtx (*unpack)(rtx, rtx, rtx);
21876
21877 switch (imode)
21878 {
21879 case V16QImode:
21880 if (high_p)
21881 unpack = gen_vec_interleave_highv16qi;
21882 else
21883 unpack = gen_vec_interleave_lowv16qi;
21884 break;
21885 case V8HImode:
21886 if (high_p)
21887 unpack = gen_vec_interleave_highv8hi;
21888 else
21889 unpack = gen_vec_interleave_lowv8hi;
21890 break;
21891 case V4SImode:
21892 if (high_p)
21893 unpack = gen_vec_interleave_highv4si;
21894 else
21895 unpack = gen_vec_interleave_lowv4si;
21896 break;
21897 default:
21898 gcc_unreachable ();
21899 }
21900
21901 if (unsigned_p)
21902 tmp = force_reg (imode, CONST0_RTX (imode));
21903 else
21904 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21905 src, pc_rtx, pc_rtx);
21906
21907 rtx tmp2 = gen_reg_rtx (imode);
21908 emit_insn (unpack (tmp2, src, tmp));
21909 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21910 }
21911 }
21912
21913 /* Expand conditional increment or decrement using adb/sbb instructions.
21914 The default case using setcc followed by the conditional move can be
21915 done by generic code. */
21916 bool
21917 ix86_expand_int_addcc (rtx operands[])
21918 {
21919 enum rtx_code code = GET_CODE (operands[1]);
21920 rtx flags;
21921 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21922 rtx compare_op;
21923 rtx val = const0_rtx;
21924 bool fpcmp = false;
21925 enum machine_mode mode;
21926 rtx op0 = XEXP (operands[1], 0);
21927 rtx op1 = XEXP (operands[1], 1);
21928
21929 if (operands[3] != const1_rtx
21930 && operands[3] != constm1_rtx)
21931 return false;
21932 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21933 return false;
21934 code = GET_CODE (compare_op);
21935
21936 flags = XEXP (compare_op, 0);
21937
21938 if (GET_MODE (flags) == CCFPmode
21939 || GET_MODE (flags) == CCFPUmode)
21940 {
21941 fpcmp = true;
21942 code = ix86_fp_compare_code_to_integer (code);
21943 }
21944
21945 if (code != LTU)
21946 {
21947 val = constm1_rtx;
21948 if (fpcmp)
21949 PUT_CODE (compare_op,
21950 reverse_condition_maybe_unordered
21951 (GET_CODE (compare_op)));
21952 else
21953 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21954 }
21955
21956 mode = GET_MODE (operands[0]);
21957
21958 /* Construct either adc or sbb insn. */
21959 if ((code == LTU) == (operands[3] == constm1_rtx))
21960 {
21961 switch (mode)
21962 {
21963 case QImode:
21964 insn = gen_subqi3_carry;
21965 break;
21966 case HImode:
21967 insn = gen_subhi3_carry;
21968 break;
21969 case SImode:
21970 insn = gen_subsi3_carry;
21971 break;
21972 case DImode:
21973 insn = gen_subdi3_carry;
21974 break;
21975 default:
21976 gcc_unreachable ();
21977 }
21978 }
21979 else
21980 {
21981 switch (mode)
21982 {
21983 case QImode:
21984 insn = gen_addqi3_carry;
21985 break;
21986 case HImode:
21987 insn = gen_addhi3_carry;
21988 break;
21989 case SImode:
21990 insn = gen_addsi3_carry;
21991 break;
21992 case DImode:
21993 insn = gen_adddi3_carry;
21994 break;
21995 default:
21996 gcc_unreachable ();
21997 }
21998 }
21999 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22000
22001 return true;
22002 }
22003
22004
22005 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22006 but works for floating pointer parameters and nonoffsetable memories.
22007 For pushes, it returns just stack offsets; the values will be saved
22008 in the right order. Maximally three parts are generated. */
22009
22010 static int
22011 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22012 {
22013 int size;
22014
22015 if (!TARGET_64BIT)
22016 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22017 else
22018 size = (GET_MODE_SIZE (mode) + 4) / 8;
22019
22020 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22021 gcc_assert (size >= 2 && size <= 4);
22022
22023 /* Optimize constant pool reference to immediates. This is used by fp
22024 moves, that force all constants to memory to allow combining. */
22025 if (MEM_P (operand) && MEM_READONLY_P (operand))
22026 {
22027 rtx tmp = maybe_get_pool_constant (operand);
22028 if (tmp)
22029 operand = tmp;
22030 }
22031
22032 if (MEM_P (operand) && !offsettable_memref_p (operand))
22033 {
22034 /* The only non-offsetable memories we handle are pushes. */
22035 int ok = push_operand (operand, VOIDmode);
22036
22037 gcc_assert (ok);
22038
22039 operand = copy_rtx (operand);
22040 PUT_MODE (operand, word_mode);
22041 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22042 return size;
22043 }
22044
22045 if (GET_CODE (operand) == CONST_VECTOR)
22046 {
22047 enum machine_mode imode = int_mode_for_mode (mode);
22048 /* Caution: if we looked through a constant pool memory above,
22049 the operand may actually have a different mode now. That's
22050 ok, since we want to pun this all the way back to an integer. */
22051 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22052 gcc_assert (operand != NULL);
22053 mode = imode;
22054 }
22055
22056 if (!TARGET_64BIT)
22057 {
22058 if (mode == DImode)
22059 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22060 else
22061 {
22062 int i;
22063
22064 if (REG_P (operand))
22065 {
22066 gcc_assert (reload_completed);
22067 for (i = 0; i < size; i++)
22068 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22069 }
22070 else if (offsettable_memref_p (operand))
22071 {
22072 operand = adjust_address (operand, SImode, 0);
22073 parts[0] = operand;
22074 for (i = 1; i < size; i++)
22075 parts[i] = adjust_address (operand, SImode, 4 * i);
22076 }
22077 else if (GET_CODE (operand) == CONST_DOUBLE)
22078 {
22079 REAL_VALUE_TYPE r;
22080 long l[4];
22081
22082 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22083 switch (mode)
22084 {
22085 case TFmode:
22086 real_to_target (l, &r, mode);
22087 parts[3] = gen_int_mode (l[3], SImode);
22088 parts[2] = gen_int_mode (l[2], SImode);
22089 break;
22090 case XFmode:
22091 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22092 long double may not be 80-bit. */
22093 real_to_target (l, &r, mode);
22094 parts[2] = gen_int_mode (l[2], SImode);
22095 break;
22096 case DFmode:
22097 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22098 break;
22099 default:
22100 gcc_unreachable ();
22101 }
22102 parts[1] = gen_int_mode (l[1], SImode);
22103 parts[0] = gen_int_mode (l[0], SImode);
22104 }
22105 else
22106 gcc_unreachable ();
22107 }
22108 }
22109 else
22110 {
22111 if (mode == TImode)
22112 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22113 if (mode == XFmode || mode == TFmode)
22114 {
22115 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22116 if (REG_P (operand))
22117 {
22118 gcc_assert (reload_completed);
22119 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22120 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22121 }
22122 else if (offsettable_memref_p (operand))
22123 {
22124 operand = adjust_address (operand, DImode, 0);
22125 parts[0] = operand;
22126 parts[1] = adjust_address (operand, upper_mode, 8);
22127 }
22128 else if (GET_CODE (operand) == CONST_DOUBLE)
22129 {
22130 REAL_VALUE_TYPE r;
22131 long l[4];
22132
22133 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22134 real_to_target (l, &r, mode);
22135
22136 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22137 if (HOST_BITS_PER_WIDE_INT >= 64)
22138 parts[0]
22139 = gen_int_mode
22140 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22141 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22142 DImode);
22143 else
22144 parts[0] = immed_double_const (l[0], l[1], DImode);
22145
22146 if (upper_mode == SImode)
22147 parts[1] = gen_int_mode (l[2], SImode);
22148 else if (HOST_BITS_PER_WIDE_INT >= 64)
22149 parts[1]
22150 = gen_int_mode
22151 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22152 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22153 DImode);
22154 else
22155 parts[1] = immed_double_const (l[2], l[3], DImode);
22156 }
22157 else
22158 gcc_unreachable ();
22159 }
22160 }
22161
22162 return size;
22163 }
22164
22165 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22166 Return false when normal moves are needed; true when all required
22167 insns have been emitted. Operands 2-4 contain the input values
22168 int the correct order; operands 5-7 contain the output values. */
22169
22170 void
22171 ix86_split_long_move (rtx operands[])
22172 {
22173 rtx part[2][4];
22174 int nparts, i, j;
22175 int push = 0;
22176 int collisions = 0;
22177 enum machine_mode mode = GET_MODE (operands[0]);
22178 bool collisionparts[4];
22179
22180 /* The DFmode expanders may ask us to move double.
22181 For 64bit target this is single move. By hiding the fact
22182 here we simplify i386.md splitters. */
22183 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22184 {
22185 /* Optimize constant pool reference to immediates. This is used by
22186 fp moves, that force all constants to memory to allow combining. */
22187
22188 if (MEM_P (operands[1])
22189 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22190 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22191 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22192 if (push_operand (operands[0], VOIDmode))
22193 {
22194 operands[0] = copy_rtx (operands[0]);
22195 PUT_MODE (operands[0], word_mode);
22196 }
22197 else
22198 operands[0] = gen_lowpart (DImode, operands[0]);
22199 operands[1] = gen_lowpart (DImode, operands[1]);
22200 emit_move_insn (operands[0], operands[1]);
22201 return;
22202 }
22203
22204 /* The only non-offsettable memory we handle is push. */
22205 if (push_operand (operands[0], VOIDmode))
22206 push = 1;
22207 else
22208 gcc_assert (!MEM_P (operands[0])
22209 || offsettable_memref_p (operands[0]));
22210
22211 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22212 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22213
22214 /* When emitting push, take care for source operands on the stack. */
22215 if (push && MEM_P (operands[1])
22216 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22217 {
22218 rtx src_base = XEXP (part[1][nparts - 1], 0);
22219
22220 /* Compensate for the stack decrement by 4. */
22221 if (!TARGET_64BIT && nparts == 3
22222 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22223 src_base = plus_constant (Pmode, src_base, 4);
22224
22225 /* src_base refers to the stack pointer and is
22226 automatically decreased by emitted push. */
22227 for (i = 0; i < nparts; i++)
22228 part[1][i] = change_address (part[1][i],
22229 GET_MODE (part[1][i]), src_base);
22230 }
22231
22232 /* We need to do copy in the right order in case an address register
22233 of the source overlaps the destination. */
22234 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22235 {
22236 rtx tmp;
22237
22238 for (i = 0; i < nparts; i++)
22239 {
22240 collisionparts[i]
22241 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22242 if (collisionparts[i])
22243 collisions++;
22244 }
22245
22246 /* Collision in the middle part can be handled by reordering. */
22247 if (collisions == 1 && nparts == 3 && collisionparts [1])
22248 {
22249 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22250 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22251 }
22252 else if (collisions == 1
22253 && nparts == 4
22254 && (collisionparts [1] || collisionparts [2]))
22255 {
22256 if (collisionparts [1])
22257 {
22258 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22259 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22260 }
22261 else
22262 {
22263 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22264 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22265 }
22266 }
22267
22268 /* If there are more collisions, we can't handle it by reordering.
22269 Do an lea to the last part and use only one colliding move. */
22270 else if (collisions > 1)
22271 {
22272 rtx base;
22273
22274 collisions = 1;
22275
22276 base = part[0][nparts - 1];
22277
22278 /* Handle the case when the last part isn't valid for lea.
22279 Happens in 64-bit mode storing the 12-byte XFmode. */
22280 if (GET_MODE (base) != Pmode)
22281 base = gen_rtx_REG (Pmode, REGNO (base));
22282
22283 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22284 part[1][0] = replace_equiv_address (part[1][0], base);
22285 for (i = 1; i < nparts; i++)
22286 {
22287 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22288 part[1][i] = replace_equiv_address (part[1][i], tmp);
22289 }
22290 }
22291 }
22292
22293 if (push)
22294 {
22295 if (!TARGET_64BIT)
22296 {
22297 if (nparts == 3)
22298 {
22299 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22300 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22301 stack_pointer_rtx, GEN_INT (-4)));
22302 emit_move_insn (part[0][2], part[1][2]);
22303 }
22304 else if (nparts == 4)
22305 {
22306 emit_move_insn (part[0][3], part[1][3]);
22307 emit_move_insn (part[0][2], part[1][2]);
22308 }
22309 }
22310 else
22311 {
22312 /* In 64bit mode we don't have 32bit push available. In case this is
22313 register, it is OK - we will just use larger counterpart. We also
22314 retype memory - these comes from attempt to avoid REX prefix on
22315 moving of second half of TFmode value. */
22316 if (GET_MODE (part[1][1]) == SImode)
22317 {
22318 switch (GET_CODE (part[1][1]))
22319 {
22320 case MEM:
22321 part[1][1] = adjust_address (part[1][1], DImode, 0);
22322 break;
22323
22324 case REG:
22325 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22326 break;
22327
22328 default:
22329 gcc_unreachable ();
22330 }
22331
22332 if (GET_MODE (part[1][0]) == SImode)
22333 part[1][0] = part[1][1];
22334 }
22335 }
22336 emit_move_insn (part[0][1], part[1][1]);
22337 emit_move_insn (part[0][0], part[1][0]);
22338 return;
22339 }
22340
22341 /* Choose correct order to not overwrite the source before it is copied. */
22342 if ((REG_P (part[0][0])
22343 && REG_P (part[1][1])
22344 && (REGNO (part[0][0]) == REGNO (part[1][1])
22345 || (nparts == 3
22346 && REGNO (part[0][0]) == REGNO (part[1][2]))
22347 || (nparts == 4
22348 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22349 || (collisions > 0
22350 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22351 {
22352 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22353 {
22354 operands[2 + i] = part[0][j];
22355 operands[6 + i] = part[1][j];
22356 }
22357 }
22358 else
22359 {
22360 for (i = 0; i < nparts; i++)
22361 {
22362 operands[2 + i] = part[0][i];
22363 operands[6 + i] = part[1][i];
22364 }
22365 }
22366
22367 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22368 if (optimize_insn_for_size_p ())
22369 {
22370 for (j = 0; j < nparts - 1; j++)
22371 if (CONST_INT_P (operands[6 + j])
22372 && operands[6 + j] != const0_rtx
22373 && REG_P (operands[2 + j]))
22374 for (i = j; i < nparts - 1; i++)
22375 if (CONST_INT_P (operands[7 + i])
22376 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22377 operands[7 + i] = operands[2 + j];
22378 }
22379
22380 for (i = 0; i < nparts; i++)
22381 emit_move_insn (operands[2 + i], operands[6 + i]);
22382
22383 return;
22384 }
22385
22386 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22387 left shift by a constant, either using a single shift or
22388 a sequence of add instructions. */
22389
22390 static void
22391 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22392 {
22393 rtx (*insn)(rtx, rtx, rtx);
22394
22395 if (count == 1
22396 || (count * ix86_cost->add <= ix86_cost->shift_const
22397 && !optimize_insn_for_size_p ()))
22398 {
22399 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22400 while (count-- > 0)
22401 emit_insn (insn (operand, operand, operand));
22402 }
22403 else
22404 {
22405 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22406 emit_insn (insn (operand, operand, GEN_INT (count)));
22407 }
22408 }
22409
22410 void
22411 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22412 {
22413 rtx (*gen_ashl3)(rtx, rtx, rtx);
22414 rtx (*gen_shld)(rtx, rtx, rtx);
22415 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22416
22417 rtx low[2], high[2];
22418 int count;
22419
22420 if (CONST_INT_P (operands[2]))
22421 {
22422 split_double_mode (mode, operands, 2, low, high);
22423 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22424
22425 if (count >= half_width)
22426 {
22427 emit_move_insn (high[0], low[1]);
22428 emit_move_insn (low[0], const0_rtx);
22429
22430 if (count > half_width)
22431 ix86_expand_ashl_const (high[0], count - half_width, mode);
22432 }
22433 else
22434 {
22435 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22436
22437 if (!rtx_equal_p (operands[0], operands[1]))
22438 emit_move_insn (operands[0], operands[1]);
22439
22440 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22441 ix86_expand_ashl_const (low[0], count, mode);
22442 }
22443 return;
22444 }
22445
22446 split_double_mode (mode, operands, 1, low, high);
22447
22448 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22449
22450 if (operands[1] == const1_rtx)
22451 {
22452 /* Assuming we've chosen a QImode capable registers, then 1 << N
22453 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22454 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22455 {
22456 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22457
22458 ix86_expand_clear (low[0]);
22459 ix86_expand_clear (high[0]);
22460 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22461
22462 d = gen_lowpart (QImode, low[0]);
22463 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22464 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22465 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22466
22467 d = gen_lowpart (QImode, high[0]);
22468 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22469 s = gen_rtx_NE (QImode, flags, const0_rtx);
22470 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22471 }
22472
22473 /* Otherwise, we can get the same results by manually performing
22474 a bit extract operation on bit 5/6, and then performing the two
22475 shifts. The two methods of getting 0/1 into low/high are exactly
22476 the same size. Avoiding the shift in the bit extract case helps
22477 pentium4 a bit; no one else seems to care much either way. */
22478 else
22479 {
22480 enum machine_mode half_mode;
22481 rtx (*gen_lshr3)(rtx, rtx, rtx);
22482 rtx (*gen_and3)(rtx, rtx, rtx);
22483 rtx (*gen_xor3)(rtx, rtx, rtx);
22484 HOST_WIDE_INT bits;
22485 rtx x;
22486
22487 if (mode == DImode)
22488 {
22489 half_mode = SImode;
22490 gen_lshr3 = gen_lshrsi3;
22491 gen_and3 = gen_andsi3;
22492 gen_xor3 = gen_xorsi3;
22493 bits = 5;
22494 }
22495 else
22496 {
22497 half_mode = DImode;
22498 gen_lshr3 = gen_lshrdi3;
22499 gen_and3 = gen_anddi3;
22500 gen_xor3 = gen_xordi3;
22501 bits = 6;
22502 }
22503
22504 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22505 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22506 else
22507 x = gen_lowpart (half_mode, operands[2]);
22508 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22509
22510 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22511 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22512 emit_move_insn (low[0], high[0]);
22513 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22514 }
22515
22516 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22517 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22518 return;
22519 }
22520
22521 if (operands[1] == constm1_rtx)
22522 {
22523 /* For -1 << N, we can avoid the shld instruction, because we
22524 know that we're shifting 0...31/63 ones into a -1. */
22525 emit_move_insn (low[0], constm1_rtx);
22526 if (optimize_insn_for_size_p ())
22527 emit_move_insn (high[0], low[0]);
22528 else
22529 emit_move_insn (high[0], constm1_rtx);
22530 }
22531 else
22532 {
22533 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22534
22535 if (!rtx_equal_p (operands[0], operands[1]))
22536 emit_move_insn (operands[0], operands[1]);
22537
22538 split_double_mode (mode, operands, 1, low, high);
22539 emit_insn (gen_shld (high[0], low[0], operands[2]));
22540 }
22541
22542 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22543
22544 if (TARGET_CMOVE && scratch)
22545 {
22546 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22547 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22548
22549 ix86_expand_clear (scratch);
22550 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22551 }
22552 else
22553 {
22554 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22555 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22556
22557 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22558 }
22559 }
22560
22561 void
22562 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22563 {
22564 rtx (*gen_ashr3)(rtx, rtx, rtx)
22565 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22566 rtx (*gen_shrd)(rtx, rtx, rtx);
22567 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22568
22569 rtx low[2], high[2];
22570 int count;
22571
22572 if (CONST_INT_P (operands[2]))
22573 {
22574 split_double_mode (mode, operands, 2, low, high);
22575 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22576
22577 if (count == GET_MODE_BITSIZE (mode) - 1)
22578 {
22579 emit_move_insn (high[0], high[1]);
22580 emit_insn (gen_ashr3 (high[0], high[0],
22581 GEN_INT (half_width - 1)));
22582 emit_move_insn (low[0], high[0]);
22583
22584 }
22585 else if (count >= half_width)
22586 {
22587 emit_move_insn (low[0], high[1]);
22588 emit_move_insn (high[0], low[0]);
22589 emit_insn (gen_ashr3 (high[0], high[0],
22590 GEN_INT (half_width - 1)));
22591
22592 if (count > half_width)
22593 emit_insn (gen_ashr3 (low[0], low[0],
22594 GEN_INT (count - half_width)));
22595 }
22596 else
22597 {
22598 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22599
22600 if (!rtx_equal_p (operands[0], operands[1]))
22601 emit_move_insn (operands[0], operands[1]);
22602
22603 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22604 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22605 }
22606 }
22607 else
22608 {
22609 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22610
22611 if (!rtx_equal_p (operands[0], operands[1]))
22612 emit_move_insn (operands[0], operands[1]);
22613
22614 split_double_mode (mode, operands, 1, low, high);
22615
22616 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22617 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22618
22619 if (TARGET_CMOVE && scratch)
22620 {
22621 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22622 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22623
22624 emit_move_insn (scratch, high[0]);
22625 emit_insn (gen_ashr3 (scratch, scratch,
22626 GEN_INT (half_width - 1)));
22627 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22628 scratch));
22629 }
22630 else
22631 {
22632 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22633 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22634
22635 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22636 }
22637 }
22638 }
22639
22640 void
22641 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22642 {
22643 rtx (*gen_lshr3)(rtx, rtx, rtx)
22644 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22645 rtx (*gen_shrd)(rtx, rtx, rtx);
22646 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22647
22648 rtx low[2], high[2];
22649 int count;
22650
22651 if (CONST_INT_P (operands[2]))
22652 {
22653 split_double_mode (mode, operands, 2, low, high);
22654 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22655
22656 if (count >= half_width)
22657 {
22658 emit_move_insn (low[0], high[1]);
22659 ix86_expand_clear (high[0]);
22660
22661 if (count > half_width)
22662 emit_insn (gen_lshr3 (low[0], low[0],
22663 GEN_INT (count - half_width)));
22664 }
22665 else
22666 {
22667 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22668
22669 if (!rtx_equal_p (operands[0], operands[1]))
22670 emit_move_insn (operands[0], operands[1]);
22671
22672 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22673 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22674 }
22675 }
22676 else
22677 {
22678 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22679
22680 if (!rtx_equal_p (operands[0], operands[1]))
22681 emit_move_insn (operands[0], operands[1]);
22682
22683 split_double_mode (mode, operands, 1, low, high);
22684
22685 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22686 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22687
22688 if (TARGET_CMOVE && scratch)
22689 {
22690 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22691 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22692
22693 ix86_expand_clear (scratch);
22694 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22695 scratch));
22696 }
22697 else
22698 {
22699 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22700 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22701
22702 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22703 }
22704 }
22705 }
22706
22707 /* Predict just emitted jump instruction to be taken with probability PROB. */
22708 static void
22709 predict_jump (int prob)
22710 {
22711 rtx insn = get_last_insn ();
22712 gcc_assert (JUMP_P (insn));
22713 add_int_reg_note (insn, REG_BR_PROB, prob);
22714 }
22715
22716 /* Helper function for the string operations below. Dest VARIABLE whether
22717 it is aligned to VALUE bytes. If true, jump to the label. */
22718 static rtx
22719 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22720 {
22721 rtx label = gen_label_rtx ();
22722 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22723 if (GET_MODE (variable) == DImode)
22724 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22725 else
22726 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22727 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22728 1, label);
22729 if (epilogue)
22730 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22731 else
22732 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22733 return label;
22734 }
22735
22736 /* Adjust COUNTER by the VALUE. */
22737 static void
22738 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22739 {
22740 rtx (*gen_add)(rtx, rtx, rtx)
22741 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22742
22743 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22744 }
22745
22746 /* Zero extend possibly SImode EXP to Pmode register. */
22747 rtx
22748 ix86_zero_extend_to_Pmode (rtx exp)
22749 {
22750 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22751 }
22752
22753 /* Divide COUNTREG by SCALE. */
22754 static rtx
22755 scale_counter (rtx countreg, int scale)
22756 {
22757 rtx sc;
22758
22759 if (scale == 1)
22760 return countreg;
22761 if (CONST_INT_P (countreg))
22762 return GEN_INT (INTVAL (countreg) / scale);
22763 gcc_assert (REG_P (countreg));
22764
22765 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22766 GEN_INT (exact_log2 (scale)),
22767 NULL, 1, OPTAB_DIRECT);
22768 return sc;
22769 }
22770
22771 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22772 DImode for constant loop counts. */
22773
22774 static enum machine_mode
22775 counter_mode (rtx count_exp)
22776 {
22777 if (GET_MODE (count_exp) != VOIDmode)
22778 return GET_MODE (count_exp);
22779 if (!CONST_INT_P (count_exp))
22780 return Pmode;
22781 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22782 return DImode;
22783 return SImode;
22784 }
22785
22786 /* Copy the address to a Pmode register. This is used for x32 to
22787 truncate DImode TLS address to a SImode register. */
22788
22789 static rtx
22790 ix86_copy_addr_to_reg (rtx addr)
22791 {
22792 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22793 return copy_addr_to_reg (addr);
22794 else
22795 {
22796 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22797 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22798 }
22799 }
22800
22801 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22802 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22803 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22804 memory by VALUE (supposed to be in MODE).
22805
22806 The size is rounded down to whole number of chunk size moved at once.
22807 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22808
22809
22810 static void
22811 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22812 rtx destptr, rtx srcptr, rtx value,
22813 rtx count, enum machine_mode mode, int unroll,
22814 int expected_size, bool issetmem)
22815 {
22816 rtx out_label, top_label, iter, tmp;
22817 enum machine_mode iter_mode = counter_mode (count);
22818 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22819 rtx piece_size = GEN_INT (piece_size_n);
22820 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22821 rtx size;
22822 int i;
22823
22824 top_label = gen_label_rtx ();
22825 out_label = gen_label_rtx ();
22826 iter = gen_reg_rtx (iter_mode);
22827
22828 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22829 NULL, 1, OPTAB_DIRECT);
22830 /* Those two should combine. */
22831 if (piece_size == const1_rtx)
22832 {
22833 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22834 true, out_label);
22835 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22836 }
22837 emit_move_insn (iter, const0_rtx);
22838
22839 emit_label (top_label);
22840
22841 tmp = convert_modes (Pmode, iter_mode, iter, true);
22842
22843 /* This assert could be relaxed - in this case we'll need to compute
22844 smallest power of two, containing in PIECE_SIZE_N and pass it to
22845 offset_address. */
22846 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22847 destmem = offset_address (destmem, tmp, piece_size_n);
22848 destmem = adjust_address (destmem, mode, 0);
22849
22850 if (!issetmem)
22851 {
22852 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22853 srcmem = adjust_address (srcmem, mode, 0);
22854
22855 /* When unrolling for chips that reorder memory reads and writes,
22856 we can save registers by using single temporary.
22857 Also using 4 temporaries is overkill in 32bit mode. */
22858 if (!TARGET_64BIT && 0)
22859 {
22860 for (i = 0; i < unroll; i++)
22861 {
22862 if (i)
22863 {
22864 destmem =
22865 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22866 srcmem =
22867 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22868 }
22869 emit_move_insn (destmem, srcmem);
22870 }
22871 }
22872 else
22873 {
22874 rtx tmpreg[4];
22875 gcc_assert (unroll <= 4);
22876 for (i = 0; i < unroll; i++)
22877 {
22878 tmpreg[i] = gen_reg_rtx (mode);
22879 if (i)
22880 {
22881 srcmem =
22882 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22883 }
22884 emit_move_insn (tmpreg[i], srcmem);
22885 }
22886 for (i = 0; i < unroll; i++)
22887 {
22888 if (i)
22889 {
22890 destmem =
22891 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22892 }
22893 emit_move_insn (destmem, tmpreg[i]);
22894 }
22895 }
22896 }
22897 else
22898 for (i = 0; i < unroll; i++)
22899 {
22900 if (i)
22901 destmem =
22902 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22903 emit_move_insn (destmem, value);
22904 }
22905
22906 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22907 true, OPTAB_LIB_WIDEN);
22908 if (tmp != iter)
22909 emit_move_insn (iter, tmp);
22910
22911 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22912 true, top_label);
22913 if (expected_size != -1)
22914 {
22915 expected_size /= GET_MODE_SIZE (mode) * unroll;
22916 if (expected_size == 0)
22917 predict_jump (0);
22918 else if (expected_size > REG_BR_PROB_BASE)
22919 predict_jump (REG_BR_PROB_BASE - 1);
22920 else
22921 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22922 }
22923 else
22924 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22925 iter = ix86_zero_extend_to_Pmode (iter);
22926 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22927 true, OPTAB_LIB_WIDEN);
22928 if (tmp != destptr)
22929 emit_move_insn (destptr, tmp);
22930 if (!issetmem)
22931 {
22932 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22933 true, OPTAB_LIB_WIDEN);
22934 if (tmp != srcptr)
22935 emit_move_insn (srcptr, tmp);
22936 }
22937 emit_label (out_label);
22938 }
22939
22940 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22941 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22942 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22943 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22944 ORIG_VALUE is the original value passed to memset to fill the memory with.
22945 Other arguments have same meaning as for previous function. */
22946
22947 static void
22948 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22949 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22950 rtx count,
22951 enum machine_mode mode, bool issetmem)
22952 {
22953 rtx destexp;
22954 rtx srcexp;
22955 rtx countreg;
22956 HOST_WIDE_INT rounded_count;
22957
22958 /* If possible, it is shorter to use rep movs.
22959 TODO: Maybe it is better to move this logic to decide_alg. */
22960 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22961 && (!issetmem || orig_value == const0_rtx))
22962 mode = SImode;
22963
22964 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22965 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22966
22967 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22968 GET_MODE_SIZE (mode)));
22969 if (mode != QImode)
22970 {
22971 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22972 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22973 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22974 }
22975 else
22976 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22977 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22978 {
22979 rounded_count = (INTVAL (count)
22980 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22981 destmem = shallow_copy_rtx (destmem);
22982 set_mem_size (destmem, rounded_count);
22983 }
22984 else if (MEM_SIZE_KNOWN_P (destmem))
22985 clear_mem_size (destmem);
22986
22987 if (issetmem)
22988 {
22989 value = force_reg (mode, gen_lowpart (mode, value));
22990 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22991 }
22992 else
22993 {
22994 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22995 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22996 if (mode != QImode)
22997 {
22998 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22999 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23000 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23001 }
23002 else
23003 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23004 if (CONST_INT_P (count))
23005 {
23006 rounded_count = (INTVAL (count)
23007 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23008 srcmem = shallow_copy_rtx (srcmem);
23009 set_mem_size (srcmem, rounded_count);
23010 }
23011 else
23012 {
23013 if (MEM_SIZE_KNOWN_P (srcmem))
23014 clear_mem_size (srcmem);
23015 }
23016 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23017 destexp, srcexp));
23018 }
23019 }
23020
23021 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23022 DESTMEM.
23023 SRC is passed by pointer to be updated on return.
23024 Return value is updated DST. */
23025 static rtx
23026 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23027 HOST_WIDE_INT size_to_move)
23028 {
23029 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23030 enum insn_code code;
23031 enum machine_mode move_mode;
23032 int piece_size, i;
23033
23034 /* Find the widest mode in which we could perform moves.
23035 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23036 it until move of such size is supported. */
23037 piece_size = 1 << floor_log2 (size_to_move);
23038 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23039 code = optab_handler (mov_optab, move_mode);
23040 while (code == CODE_FOR_nothing && piece_size > 1)
23041 {
23042 piece_size >>= 1;
23043 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23044 code = optab_handler (mov_optab, move_mode);
23045 }
23046
23047 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23048 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23049 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23050 {
23051 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23052 move_mode = mode_for_vector (word_mode, nunits);
23053 code = optab_handler (mov_optab, move_mode);
23054 if (code == CODE_FOR_nothing)
23055 {
23056 move_mode = word_mode;
23057 piece_size = GET_MODE_SIZE (move_mode);
23058 code = optab_handler (mov_optab, move_mode);
23059 }
23060 }
23061 gcc_assert (code != CODE_FOR_nothing);
23062
23063 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23064 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23065
23066 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23067 gcc_assert (size_to_move % piece_size == 0);
23068 adjust = GEN_INT (piece_size);
23069 for (i = 0; i < size_to_move; i += piece_size)
23070 {
23071 /* We move from memory to memory, so we'll need to do it via
23072 a temporary register. */
23073 tempreg = gen_reg_rtx (move_mode);
23074 emit_insn (GEN_FCN (code) (tempreg, src));
23075 emit_insn (GEN_FCN (code) (dst, tempreg));
23076
23077 emit_move_insn (destptr,
23078 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23079 emit_move_insn (srcptr,
23080 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23081
23082 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23083 piece_size);
23084 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23085 piece_size);
23086 }
23087
23088 /* Update DST and SRC rtx. */
23089 *srcmem = src;
23090 return dst;
23091 }
23092
23093 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23094 static void
23095 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23096 rtx destptr, rtx srcptr, rtx count, int max_size)
23097 {
23098 rtx src, dest;
23099 if (CONST_INT_P (count))
23100 {
23101 HOST_WIDE_INT countval = INTVAL (count);
23102 HOST_WIDE_INT epilogue_size = countval % max_size;
23103 int i;
23104
23105 /* For now MAX_SIZE should be a power of 2. This assert could be
23106 relaxed, but it'll require a bit more complicated epilogue
23107 expanding. */
23108 gcc_assert ((max_size & (max_size - 1)) == 0);
23109 for (i = max_size; i >= 1; i >>= 1)
23110 {
23111 if (epilogue_size & i)
23112 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23113 }
23114 return;
23115 }
23116 if (max_size > 8)
23117 {
23118 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23119 count, 1, OPTAB_DIRECT);
23120 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23121 count, QImode, 1, 4, false);
23122 return;
23123 }
23124
23125 /* When there are stringops, we can cheaply increase dest and src pointers.
23126 Otherwise we save code size by maintaining offset (zero is readily
23127 available from preceding rep operation) and using x86 addressing modes.
23128 */
23129 if (TARGET_SINGLE_STRINGOP)
23130 {
23131 if (max_size > 4)
23132 {
23133 rtx label = ix86_expand_aligntest (count, 4, true);
23134 src = change_address (srcmem, SImode, srcptr);
23135 dest = change_address (destmem, SImode, destptr);
23136 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23137 emit_label (label);
23138 LABEL_NUSES (label) = 1;
23139 }
23140 if (max_size > 2)
23141 {
23142 rtx label = ix86_expand_aligntest (count, 2, true);
23143 src = change_address (srcmem, HImode, srcptr);
23144 dest = change_address (destmem, HImode, destptr);
23145 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23146 emit_label (label);
23147 LABEL_NUSES (label) = 1;
23148 }
23149 if (max_size > 1)
23150 {
23151 rtx label = ix86_expand_aligntest (count, 1, true);
23152 src = change_address (srcmem, QImode, srcptr);
23153 dest = change_address (destmem, QImode, destptr);
23154 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23155 emit_label (label);
23156 LABEL_NUSES (label) = 1;
23157 }
23158 }
23159 else
23160 {
23161 rtx offset = force_reg (Pmode, const0_rtx);
23162 rtx tmp;
23163
23164 if (max_size > 4)
23165 {
23166 rtx label = ix86_expand_aligntest (count, 4, true);
23167 src = change_address (srcmem, SImode, srcptr);
23168 dest = change_address (destmem, SImode, destptr);
23169 emit_move_insn (dest, src);
23170 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23171 true, OPTAB_LIB_WIDEN);
23172 if (tmp != offset)
23173 emit_move_insn (offset, tmp);
23174 emit_label (label);
23175 LABEL_NUSES (label) = 1;
23176 }
23177 if (max_size > 2)
23178 {
23179 rtx label = ix86_expand_aligntest (count, 2, true);
23180 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23181 src = change_address (srcmem, HImode, tmp);
23182 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23183 dest = change_address (destmem, HImode, tmp);
23184 emit_move_insn (dest, src);
23185 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23186 true, OPTAB_LIB_WIDEN);
23187 if (tmp != offset)
23188 emit_move_insn (offset, tmp);
23189 emit_label (label);
23190 LABEL_NUSES (label) = 1;
23191 }
23192 if (max_size > 1)
23193 {
23194 rtx label = ix86_expand_aligntest (count, 1, true);
23195 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23196 src = change_address (srcmem, QImode, tmp);
23197 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23198 dest = change_address (destmem, QImode, tmp);
23199 emit_move_insn (dest, src);
23200 emit_label (label);
23201 LABEL_NUSES (label) = 1;
23202 }
23203 }
23204 }
23205
23206 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23207 with value PROMOTED_VAL.
23208 SRC is passed by pointer to be updated on return.
23209 Return value is updated DST. */
23210 static rtx
23211 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23212 HOST_WIDE_INT size_to_move)
23213 {
23214 rtx dst = destmem, adjust;
23215 enum insn_code code;
23216 enum machine_mode move_mode;
23217 int piece_size, i;
23218
23219 /* Find the widest mode in which we could perform moves.
23220 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23221 it until move of such size is supported. */
23222 move_mode = GET_MODE (promoted_val);
23223 if (move_mode == VOIDmode)
23224 move_mode = QImode;
23225 if (size_to_move < GET_MODE_SIZE (move_mode))
23226 {
23227 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23228 promoted_val = gen_lowpart (move_mode, promoted_val);
23229 }
23230 piece_size = GET_MODE_SIZE (move_mode);
23231 code = optab_handler (mov_optab, move_mode);
23232 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23233
23234 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23235
23236 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23237 gcc_assert (size_to_move % piece_size == 0);
23238 adjust = GEN_INT (piece_size);
23239 for (i = 0; i < size_to_move; i += piece_size)
23240 {
23241 if (piece_size <= GET_MODE_SIZE (word_mode))
23242 {
23243 emit_insn (gen_strset (destptr, dst, promoted_val));
23244 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23245 piece_size);
23246 continue;
23247 }
23248
23249 emit_insn (GEN_FCN (code) (dst, promoted_val));
23250
23251 emit_move_insn (destptr,
23252 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23253
23254 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23255 piece_size);
23256 }
23257
23258 /* Update DST rtx. */
23259 return dst;
23260 }
23261 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23262 static void
23263 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23264 rtx count, int max_size)
23265 {
23266 count =
23267 expand_simple_binop (counter_mode (count), AND, count,
23268 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23269 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23270 gen_lowpart (QImode, value), count, QImode,
23271 1, max_size / 2, true);
23272 }
23273
23274 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23275 static void
23276 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23277 rtx count, int max_size)
23278 {
23279 rtx dest;
23280
23281 if (CONST_INT_P (count))
23282 {
23283 HOST_WIDE_INT countval = INTVAL (count);
23284 HOST_WIDE_INT epilogue_size = countval % max_size;
23285 int i;
23286
23287 /* For now MAX_SIZE should be a power of 2. This assert could be
23288 relaxed, but it'll require a bit more complicated epilogue
23289 expanding. */
23290 gcc_assert ((max_size & (max_size - 1)) == 0);
23291 for (i = max_size; i >= 1; i >>= 1)
23292 {
23293 if (epilogue_size & i)
23294 {
23295 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23296 destmem = emit_memset (destmem, destptr, vec_value, i);
23297 else
23298 destmem = emit_memset (destmem, destptr, value, i);
23299 }
23300 }
23301 return;
23302 }
23303 if (max_size > 32)
23304 {
23305 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23306 return;
23307 }
23308 if (max_size > 16)
23309 {
23310 rtx label = ix86_expand_aligntest (count, 16, true);
23311 if (TARGET_64BIT)
23312 {
23313 dest = change_address (destmem, DImode, destptr);
23314 emit_insn (gen_strset (destptr, dest, value));
23315 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23316 emit_insn (gen_strset (destptr, dest, value));
23317 }
23318 else
23319 {
23320 dest = change_address (destmem, SImode, destptr);
23321 emit_insn (gen_strset (destptr, dest, value));
23322 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23323 emit_insn (gen_strset (destptr, dest, value));
23324 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23325 emit_insn (gen_strset (destptr, dest, value));
23326 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23327 emit_insn (gen_strset (destptr, dest, value));
23328 }
23329 emit_label (label);
23330 LABEL_NUSES (label) = 1;
23331 }
23332 if (max_size > 8)
23333 {
23334 rtx label = ix86_expand_aligntest (count, 8, true);
23335 if (TARGET_64BIT)
23336 {
23337 dest = change_address (destmem, DImode, destptr);
23338 emit_insn (gen_strset (destptr, dest, value));
23339 }
23340 else
23341 {
23342 dest = change_address (destmem, SImode, destptr);
23343 emit_insn (gen_strset (destptr, dest, value));
23344 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23345 emit_insn (gen_strset (destptr, dest, value));
23346 }
23347 emit_label (label);
23348 LABEL_NUSES (label) = 1;
23349 }
23350 if (max_size > 4)
23351 {
23352 rtx label = ix86_expand_aligntest (count, 4, true);
23353 dest = change_address (destmem, SImode, destptr);
23354 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23355 emit_label (label);
23356 LABEL_NUSES (label) = 1;
23357 }
23358 if (max_size > 2)
23359 {
23360 rtx label = ix86_expand_aligntest (count, 2, true);
23361 dest = change_address (destmem, HImode, destptr);
23362 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23363 emit_label (label);
23364 LABEL_NUSES (label) = 1;
23365 }
23366 if (max_size > 1)
23367 {
23368 rtx label = ix86_expand_aligntest (count, 1, true);
23369 dest = change_address (destmem, QImode, destptr);
23370 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23371 emit_label (label);
23372 LABEL_NUSES (label) = 1;
23373 }
23374 }
23375
23376 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23377 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23378 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23379 ignored.
23380 Return value is updated DESTMEM. */
23381 static rtx
23382 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23383 rtx destptr, rtx srcptr, rtx value,
23384 rtx vec_value, rtx count, int align,
23385 int desired_alignment, bool issetmem)
23386 {
23387 int i;
23388 for (i = 1; i < desired_alignment; i <<= 1)
23389 {
23390 if (align <= i)
23391 {
23392 rtx label = ix86_expand_aligntest (destptr, i, false);
23393 if (issetmem)
23394 {
23395 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23396 destmem = emit_memset (destmem, destptr, vec_value, i);
23397 else
23398 destmem = emit_memset (destmem, destptr, value, i);
23399 }
23400 else
23401 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23402 ix86_adjust_counter (count, i);
23403 emit_label (label);
23404 LABEL_NUSES (label) = 1;
23405 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23406 }
23407 }
23408 return destmem;
23409 }
23410
23411 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23412 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23413 and jump to DONE_LABEL. */
23414 static void
23415 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23416 rtx destptr, rtx srcptr,
23417 rtx value, rtx vec_value,
23418 rtx count, int size,
23419 rtx done_label, bool issetmem)
23420 {
23421 rtx label = ix86_expand_aligntest (count, size, false);
23422 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23423 rtx modesize;
23424 int n;
23425
23426 /* If we do not have vector value to copy, we must reduce size. */
23427 if (issetmem)
23428 {
23429 if (!vec_value)
23430 {
23431 if (GET_MODE (value) == VOIDmode && size > 8)
23432 mode = Pmode;
23433 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23434 mode = GET_MODE (value);
23435 }
23436 else
23437 mode = GET_MODE (vec_value), value = vec_value;
23438 }
23439 else
23440 {
23441 /* Choose appropriate vector mode. */
23442 if (size >= 32)
23443 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23444 else if (size >= 16)
23445 mode = TARGET_SSE ? V16QImode : DImode;
23446 srcmem = change_address (srcmem, mode, srcptr);
23447 }
23448 destmem = change_address (destmem, mode, destptr);
23449 modesize = GEN_INT (GET_MODE_SIZE (mode));
23450 gcc_assert (GET_MODE_SIZE (mode) <= size);
23451 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23452 {
23453 if (issetmem)
23454 emit_move_insn (destmem, gen_lowpart (mode, value));
23455 else
23456 {
23457 emit_move_insn (destmem, srcmem);
23458 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23459 }
23460 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23461 }
23462
23463 destmem = offset_address (destmem, count, 1);
23464 destmem = offset_address (destmem, GEN_INT (-2 * size),
23465 GET_MODE_SIZE (mode));
23466 if (!issetmem)
23467 {
23468 srcmem = offset_address (srcmem, count, 1);
23469 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23470 GET_MODE_SIZE (mode));
23471 }
23472 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23473 {
23474 if (issetmem)
23475 emit_move_insn (destmem, gen_lowpart (mode, value));
23476 else
23477 {
23478 emit_move_insn (destmem, srcmem);
23479 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23480 }
23481 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23482 }
23483 emit_jump_insn (gen_jump (done_label));
23484 emit_barrier ();
23485
23486 emit_label (label);
23487 LABEL_NUSES (label) = 1;
23488 }
23489
23490 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23491 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23492 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23493 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23494 DONE_LABEL is a label after the whole copying sequence. The label is created
23495 on demand if *DONE_LABEL is NULL.
23496 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23497 bounds after the initial copies.
23498
23499 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23500 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23501 we will dispatch to a library call for large blocks.
23502
23503 In pseudocode we do:
23504
23505 if (COUNT < SIZE)
23506 {
23507 Assume that SIZE is 4. Bigger sizes are handled analogously
23508 if (COUNT & 4)
23509 {
23510 copy 4 bytes from SRCPTR to DESTPTR
23511 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23512 goto done_label
23513 }
23514 if (!COUNT)
23515 goto done_label;
23516 copy 1 byte from SRCPTR to DESTPTR
23517 if (COUNT & 2)
23518 {
23519 copy 2 bytes from SRCPTR to DESTPTR
23520 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23521 }
23522 }
23523 else
23524 {
23525 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23526 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23527
23528 OLD_DESPTR = DESTPTR;
23529 Align DESTPTR up to DESIRED_ALIGN
23530 SRCPTR += DESTPTR - OLD_DESTPTR
23531 COUNT -= DEST_PTR - OLD_DESTPTR
23532 if (DYNAMIC_CHECK)
23533 Round COUNT down to multiple of SIZE
23534 << optional caller supplied zero size guard is here >>
23535 << optional caller suppplied dynamic check is here >>
23536 << caller supplied main copy loop is here >>
23537 }
23538 done_label:
23539 */
23540 static void
23541 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23542 rtx *destptr, rtx *srcptr,
23543 enum machine_mode mode,
23544 rtx value, rtx vec_value,
23545 rtx *count,
23546 rtx *done_label,
23547 int size,
23548 int desired_align,
23549 int align,
23550 unsigned HOST_WIDE_INT *min_size,
23551 bool dynamic_check,
23552 bool issetmem)
23553 {
23554 rtx loop_label = NULL, label;
23555 int n;
23556 rtx modesize;
23557 int prolog_size = 0;
23558 rtx mode_value;
23559
23560 /* Chose proper value to copy. */
23561 if (issetmem && VECTOR_MODE_P (mode))
23562 mode_value = vec_value;
23563 else
23564 mode_value = value;
23565 gcc_assert (GET_MODE_SIZE (mode) <= size);
23566
23567 /* See if block is big or small, handle small blocks. */
23568 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23569 {
23570 int size2 = size;
23571 loop_label = gen_label_rtx ();
23572
23573 if (!*done_label)
23574 *done_label = gen_label_rtx ();
23575
23576 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23577 1, loop_label);
23578 size2 >>= 1;
23579
23580 /* Handle sizes > 3. */
23581 for (;size2 > 2; size2 >>= 1)
23582 expand_small_movmem_or_setmem (destmem, srcmem,
23583 *destptr, *srcptr,
23584 value, vec_value,
23585 *count,
23586 size2, *done_label, issetmem);
23587 /* Nothing to copy? Jump to DONE_LABEL if so */
23588 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23589 1, *done_label);
23590
23591 /* Do a byte copy. */
23592 destmem = change_address (destmem, QImode, *destptr);
23593 if (issetmem)
23594 emit_move_insn (destmem, gen_lowpart (QImode, value));
23595 else
23596 {
23597 srcmem = change_address (srcmem, QImode, *srcptr);
23598 emit_move_insn (destmem, srcmem);
23599 }
23600
23601 /* Handle sizes 2 and 3. */
23602 label = ix86_expand_aligntest (*count, 2, false);
23603 destmem = change_address (destmem, HImode, *destptr);
23604 destmem = offset_address (destmem, *count, 1);
23605 destmem = offset_address (destmem, GEN_INT (-2), 2);
23606 if (issetmem)
23607 emit_move_insn (destmem, gen_lowpart (HImode, value));
23608 else
23609 {
23610 srcmem = change_address (srcmem, HImode, *srcptr);
23611 srcmem = offset_address (srcmem, *count, 1);
23612 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23613 emit_move_insn (destmem, srcmem);
23614 }
23615
23616 emit_label (label);
23617 LABEL_NUSES (label) = 1;
23618 emit_jump_insn (gen_jump (*done_label));
23619 emit_barrier ();
23620 }
23621 else
23622 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23623 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23624
23625 /* Start memcpy for COUNT >= SIZE. */
23626 if (loop_label)
23627 {
23628 emit_label (loop_label);
23629 LABEL_NUSES (loop_label) = 1;
23630 }
23631
23632 /* Copy first desired_align bytes. */
23633 if (!issetmem)
23634 srcmem = change_address (srcmem, mode, *srcptr);
23635 destmem = change_address (destmem, mode, *destptr);
23636 modesize = GEN_INT (GET_MODE_SIZE (mode));
23637 for (n = 0; prolog_size < desired_align - align; n++)
23638 {
23639 if (issetmem)
23640 emit_move_insn (destmem, mode_value);
23641 else
23642 {
23643 emit_move_insn (destmem, srcmem);
23644 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23645 }
23646 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23647 prolog_size += GET_MODE_SIZE (mode);
23648 }
23649
23650
23651 /* Copy last SIZE bytes. */
23652 destmem = offset_address (destmem, *count, 1);
23653 destmem = offset_address (destmem,
23654 GEN_INT (-size - prolog_size),
23655 1);
23656 if (issetmem)
23657 emit_move_insn (destmem, mode_value);
23658 else
23659 {
23660 srcmem = offset_address (srcmem, *count, 1);
23661 srcmem = offset_address (srcmem,
23662 GEN_INT (-size - prolog_size),
23663 1);
23664 emit_move_insn (destmem, srcmem);
23665 }
23666 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23667 {
23668 destmem = offset_address (destmem, modesize, 1);
23669 if (issetmem)
23670 emit_move_insn (destmem, mode_value);
23671 else
23672 {
23673 srcmem = offset_address (srcmem, modesize, 1);
23674 emit_move_insn (destmem, srcmem);
23675 }
23676 }
23677
23678 /* Align destination. */
23679 if (desired_align > 1 && desired_align > align)
23680 {
23681 rtx saveddest = *destptr;
23682
23683 gcc_assert (desired_align <= size);
23684 /* Align destptr up, place it to new register. */
23685 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23686 GEN_INT (prolog_size),
23687 NULL_RTX, 1, OPTAB_DIRECT);
23688 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23689 GEN_INT (-desired_align),
23690 *destptr, 1, OPTAB_DIRECT);
23691 /* See how many bytes we skipped. */
23692 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23693 *destptr,
23694 saveddest, 1, OPTAB_DIRECT);
23695 /* Adjust srcptr and count. */
23696 if (!issetmem)
23697 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23698 *srcptr, 1, OPTAB_DIRECT);
23699 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23700 saveddest, *count, 1, OPTAB_DIRECT);
23701 /* We copied at most size + prolog_size. */
23702 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23703 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23704 else
23705 *min_size = 0;
23706
23707 /* Our loops always round down the bock size, but for dispatch to library
23708 we need precise value. */
23709 if (dynamic_check)
23710 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23711 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23712 }
23713 else
23714 {
23715 gcc_assert (prolog_size == 0);
23716 /* Decrease count, so we won't end up copying last word twice. */
23717 if (!CONST_INT_P (*count))
23718 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23719 constm1_rtx, *count, 1, OPTAB_DIRECT);
23720 else
23721 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23722 if (*min_size)
23723 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23724 }
23725 }
23726
23727
23728 /* This function is like the previous one, except here we know how many bytes
23729 need to be copied. That allows us to update alignment not only of DST, which
23730 is returned, but also of SRC, which is passed as a pointer for that
23731 reason. */
23732 static rtx
23733 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23734 rtx srcreg, rtx value, rtx vec_value,
23735 int desired_align, int align_bytes,
23736 bool issetmem)
23737 {
23738 rtx src = NULL;
23739 rtx orig_dst = dst;
23740 rtx orig_src = NULL;
23741 int piece_size = 1;
23742 int copied_bytes = 0;
23743
23744 if (!issetmem)
23745 {
23746 gcc_assert (srcp != NULL);
23747 src = *srcp;
23748 orig_src = src;
23749 }
23750
23751 for (piece_size = 1;
23752 piece_size <= desired_align && copied_bytes < align_bytes;
23753 piece_size <<= 1)
23754 {
23755 if (align_bytes & piece_size)
23756 {
23757 if (issetmem)
23758 {
23759 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23760 dst = emit_memset (dst, destreg, vec_value, piece_size);
23761 else
23762 dst = emit_memset (dst, destreg, value, piece_size);
23763 }
23764 else
23765 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23766 copied_bytes += piece_size;
23767 }
23768 }
23769 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23770 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23771 if (MEM_SIZE_KNOWN_P (orig_dst))
23772 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23773
23774 if (!issetmem)
23775 {
23776 int src_align_bytes = get_mem_align_offset (src, desired_align
23777 * BITS_PER_UNIT);
23778 if (src_align_bytes >= 0)
23779 src_align_bytes = desired_align - src_align_bytes;
23780 if (src_align_bytes >= 0)
23781 {
23782 unsigned int src_align;
23783 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23784 {
23785 if ((src_align_bytes & (src_align - 1))
23786 == (align_bytes & (src_align - 1)))
23787 break;
23788 }
23789 if (src_align > (unsigned int) desired_align)
23790 src_align = desired_align;
23791 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23792 set_mem_align (src, src_align * BITS_PER_UNIT);
23793 }
23794 if (MEM_SIZE_KNOWN_P (orig_src))
23795 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23796 *srcp = src;
23797 }
23798
23799 return dst;
23800 }
23801
23802 /* Return true if ALG can be used in current context.
23803 Assume we expand memset if MEMSET is true. */
23804 static bool
23805 alg_usable_p (enum stringop_alg alg, bool memset)
23806 {
23807 if (alg == no_stringop)
23808 return false;
23809 if (alg == vector_loop)
23810 return TARGET_SSE || TARGET_AVX;
23811 /* Algorithms using the rep prefix want at least edi and ecx;
23812 additionally, memset wants eax and memcpy wants esi. Don't
23813 consider such algorithms if the user has appropriated those
23814 registers for their own purposes. */
23815 if (alg == rep_prefix_1_byte
23816 || alg == rep_prefix_4_byte
23817 || alg == rep_prefix_8_byte)
23818 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23819 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23820 return true;
23821 }
23822
23823 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23824 static enum stringop_alg
23825 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23826 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23827 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23828 {
23829 const struct stringop_algs * algs;
23830 bool optimize_for_speed;
23831 int max = -1;
23832 const struct processor_costs *cost;
23833 int i;
23834 bool any_alg_usable_p = false;
23835
23836 *noalign = false;
23837 *dynamic_check = -1;
23838
23839 /* Even if the string operation call is cold, we still might spend a lot
23840 of time processing large blocks. */
23841 if (optimize_function_for_size_p (cfun)
23842 || (optimize_insn_for_size_p ()
23843 && (max_size < 256
23844 || (expected_size != -1 && expected_size < 256))))
23845 optimize_for_speed = false;
23846 else
23847 optimize_for_speed = true;
23848
23849 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23850 if (memset)
23851 algs = &cost->memset[TARGET_64BIT != 0];
23852 else
23853 algs = &cost->memcpy[TARGET_64BIT != 0];
23854
23855 /* See maximal size for user defined algorithm. */
23856 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23857 {
23858 enum stringop_alg candidate = algs->size[i].alg;
23859 bool usable = alg_usable_p (candidate, memset);
23860 any_alg_usable_p |= usable;
23861
23862 if (candidate != libcall && candidate && usable)
23863 max = algs->size[i].max;
23864 }
23865
23866 /* If expected size is not known but max size is small enough
23867 so inline version is a win, set expected size into
23868 the range. */
23869 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23870 && expected_size == -1)
23871 expected_size = min_size / 2 + max_size / 2;
23872
23873 /* If user specified the algorithm, honnor it if possible. */
23874 if (ix86_stringop_alg != no_stringop
23875 && alg_usable_p (ix86_stringop_alg, memset))
23876 return ix86_stringop_alg;
23877 /* rep; movq or rep; movl is the smallest variant. */
23878 else if (!optimize_for_speed)
23879 {
23880 *noalign = true;
23881 if (!count || (count & 3) || (memset && !zero_memset))
23882 return alg_usable_p (rep_prefix_1_byte, memset)
23883 ? rep_prefix_1_byte : loop_1_byte;
23884 else
23885 return alg_usable_p (rep_prefix_4_byte, memset)
23886 ? rep_prefix_4_byte : loop;
23887 }
23888 /* Very tiny blocks are best handled via the loop, REP is expensive to
23889 setup. */
23890 else if (expected_size != -1 && expected_size < 4)
23891 return loop_1_byte;
23892 else if (expected_size != -1)
23893 {
23894 enum stringop_alg alg = libcall;
23895 bool alg_noalign = false;
23896 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23897 {
23898 /* We get here if the algorithms that were not libcall-based
23899 were rep-prefix based and we are unable to use rep prefixes
23900 based on global register usage. Break out of the loop and
23901 use the heuristic below. */
23902 if (algs->size[i].max == 0)
23903 break;
23904 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23905 {
23906 enum stringop_alg candidate = algs->size[i].alg;
23907
23908 if (candidate != libcall && alg_usable_p (candidate, memset))
23909 {
23910 alg = candidate;
23911 alg_noalign = algs->size[i].noalign;
23912 }
23913 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23914 last non-libcall inline algorithm. */
23915 if (TARGET_INLINE_ALL_STRINGOPS)
23916 {
23917 /* When the current size is best to be copied by a libcall,
23918 but we are still forced to inline, run the heuristic below
23919 that will pick code for medium sized blocks. */
23920 if (alg != libcall)
23921 {
23922 *noalign = alg_noalign;
23923 return alg;
23924 }
23925 break;
23926 }
23927 else if (alg_usable_p (candidate, memset))
23928 {
23929 *noalign = algs->size[i].noalign;
23930 return candidate;
23931 }
23932 }
23933 }
23934 }
23935 /* When asked to inline the call anyway, try to pick meaningful choice.
23936 We look for maximal size of block that is faster to copy by hand and
23937 take blocks of at most of that size guessing that average size will
23938 be roughly half of the block.
23939
23940 If this turns out to be bad, we might simply specify the preferred
23941 choice in ix86_costs. */
23942 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23943 && (algs->unknown_size == libcall
23944 || !alg_usable_p (algs->unknown_size, memset)))
23945 {
23946 enum stringop_alg alg;
23947
23948 /* If there aren't any usable algorithms, then recursing on
23949 smaller sizes isn't going to find anything. Just return the
23950 simple byte-at-a-time copy loop. */
23951 if (!any_alg_usable_p)
23952 {
23953 /* Pick something reasonable. */
23954 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23955 *dynamic_check = 128;
23956 return loop_1_byte;
23957 }
23958 if (max == -1)
23959 max = 4096;
23960 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23961 zero_memset, dynamic_check, noalign);
23962 gcc_assert (*dynamic_check == -1);
23963 gcc_assert (alg != libcall);
23964 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23965 *dynamic_check = max;
23966 return alg;
23967 }
23968 return (alg_usable_p (algs->unknown_size, memset)
23969 ? algs->unknown_size : libcall);
23970 }
23971
23972 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23973 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23974 static int
23975 decide_alignment (int align,
23976 enum stringop_alg alg,
23977 int expected_size,
23978 enum machine_mode move_mode)
23979 {
23980 int desired_align = 0;
23981
23982 gcc_assert (alg != no_stringop);
23983
23984 if (alg == libcall)
23985 return 0;
23986 if (move_mode == VOIDmode)
23987 return 0;
23988
23989 desired_align = GET_MODE_SIZE (move_mode);
23990 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23991 copying whole cacheline at once. */
23992 if (TARGET_PENTIUMPRO
23993 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23994 desired_align = 8;
23995
23996 if (optimize_size)
23997 desired_align = 1;
23998 if (desired_align < align)
23999 desired_align = align;
24000 if (expected_size != -1 && expected_size < 4)
24001 desired_align = align;
24002
24003 return desired_align;
24004 }
24005
24006
24007 /* Helper function for memcpy. For QImode value 0xXY produce
24008 0xXYXYXYXY of wide specified by MODE. This is essentially
24009 a * 0x10101010, but we can do slightly better than
24010 synth_mult by unwinding the sequence by hand on CPUs with
24011 slow multiply. */
24012 static rtx
24013 promote_duplicated_reg (enum machine_mode mode, rtx val)
24014 {
24015 enum machine_mode valmode = GET_MODE (val);
24016 rtx tmp;
24017 int nops = mode == DImode ? 3 : 2;
24018
24019 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24020 if (val == const0_rtx)
24021 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24022 if (CONST_INT_P (val))
24023 {
24024 HOST_WIDE_INT v = INTVAL (val) & 255;
24025
24026 v |= v << 8;
24027 v |= v << 16;
24028 if (mode == DImode)
24029 v |= (v << 16) << 16;
24030 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24031 }
24032
24033 if (valmode == VOIDmode)
24034 valmode = QImode;
24035 if (valmode != QImode)
24036 val = gen_lowpart (QImode, val);
24037 if (mode == QImode)
24038 return val;
24039 if (!TARGET_PARTIAL_REG_STALL)
24040 nops--;
24041 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24042 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24043 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24044 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24045 {
24046 rtx reg = convert_modes (mode, QImode, val, true);
24047 tmp = promote_duplicated_reg (mode, const1_rtx);
24048 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24049 OPTAB_DIRECT);
24050 }
24051 else
24052 {
24053 rtx reg = convert_modes (mode, QImode, val, true);
24054
24055 if (!TARGET_PARTIAL_REG_STALL)
24056 if (mode == SImode)
24057 emit_insn (gen_movsi_insv_1 (reg, reg));
24058 else
24059 emit_insn (gen_movdi_insv_1 (reg, reg));
24060 else
24061 {
24062 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24063 NULL, 1, OPTAB_DIRECT);
24064 reg =
24065 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24066 }
24067 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24068 NULL, 1, OPTAB_DIRECT);
24069 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24070 if (mode == SImode)
24071 return reg;
24072 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24073 NULL, 1, OPTAB_DIRECT);
24074 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24075 return reg;
24076 }
24077 }
24078
24079 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24080 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24081 alignment from ALIGN to DESIRED_ALIGN. */
24082 static rtx
24083 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24084 int align)
24085 {
24086 rtx promoted_val;
24087
24088 if (TARGET_64BIT
24089 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24090 promoted_val = promote_duplicated_reg (DImode, val);
24091 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24092 promoted_val = promote_duplicated_reg (SImode, val);
24093 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24094 promoted_val = promote_duplicated_reg (HImode, val);
24095 else
24096 promoted_val = val;
24097
24098 return promoted_val;
24099 }
24100
24101 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24102 operations when profitable. The code depends upon architecture, block size
24103 and alignment, but always has one of the following overall structures:
24104
24105 Aligned move sequence:
24106
24107 1) Prologue guard: Conditional that jumps up to epilogues for small
24108 blocks that can be handled by epilogue alone. This is faster
24109 but also needed for correctness, since prologue assume the block
24110 is larger than the desired alignment.
24111
24112 Optional dynamic check for size and libcall for large
24113 blocks is emitted here too, with -minline-stringops-dynamically.
24114
24115 2) Prologue: copy first few bytes in order to get destination
24116 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24117 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24118 copied. We emit either a jump tree on power of two sized
24119 blocks, or a byte loop.
24120
24121 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24122 with specified algorithm.
24123
24124 4) Epilogue: code copying tail of the block that is too small to be
24125 handled by main body (or up to size guarded by prologue guard).
24126
24127 Misaligned move sequence
24128
24129 1) missaligned move prologue/epilogue containing:
24130 a) Prologue handling small memory blocks and jumping to done_label
24131 (skipped if blocks are known to be large enough)
24132 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24133 needed by single possibly misaligned move
24134 (skipped if alignment is not needed)
24135 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24136
24137 2) Zero size guard dispatching to done_label, if needed
24138
24139 3) dispatch to library call, if needed,
24140
24141 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24142 with specified algorithm. */
24143 bool
24144 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24145 rtx align_exp, rtx expected_align_exp,
24146 rtx expected_size_exp, rtx min_size_exp,
24147 rtx max_size_exp, rtx probable_max_size_exp,
24148 bool issetmem)
24149 {
24150 rtx destreg;
24151 rtx srcreg = NULL;
24152 rtx label = NULL;
24153 rtx tmp;
24154 rtx jump_around_label = NULL;
24155 HOST_WIDE_INT align = 1;
24156 unsigned HOST_WIDE_INT count = 0;
24157 HOST_WIDE_INT expected_size = -1;
24158 int size_needed = 0, epilogue_size_needed;
24159 int desired_align = 0, align_bytes = 0;
24160 enum stringop_alg alg;
24161 rtx promoted_val = NULL;
24162 rtx vec_promoted_val = NULL;
24163 bool force_loopy_epilogue = false;
24164 int dynamic_check;
24165 bool need_zero_guard = false;
24166 bool noalign;
24167 enum machine_mode move_mode = VOIDmode;
24168 int unroll_factor = 1;
24169 /* TODO: Once value ranges are available, fill in proper data. */
24170 unsigned HOST_WIDE_INT min_size = 0;
24171 unsigned HOST_WIDE_INT max_size = -1;
24172 unsigned HOST_WIDE_INT probable_max_size = -1;
24173 bool misaligned_prologue_used = false;
24174
24175 if (CONST_INT_P (align_exp))
24176 align = INTVAL (align_exp);
24177 /* i386 can do misaligned access on reasonably increased cost. */
24178 if (CONST_INT_P (expected_align_exp)
24179 && INTVAL (expected_align_exp) > align)
24180 align = INTVAL (expected_align_exp);
24181 /* ALIGN is the minimum of destination and source alignment, but we care here
24182 just about destination alignment. */
24183 else if (!issetmem
24184 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24185 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24186
24187 if (CONST_INT_P (count_exp))
24188 {
24189 min_size = max_size = probable_max_size = count = expected_size
24190 = INTVAL (count_exp);
24191 /* When COUNT is 0, there is nothing to do. */
24192 if (!count)
24193 return true;
24194 }
24195 else
24196 {
24197 if (min_size_exp)
24198 min_size = INTVAL (min_size_exp);
24199 if (max_size_exp)
24200 max_size = INTVAL (max_size_exp);
24201 if (probable_max_size_exp)
24202 probable_max_size = INTVAL (probable_max_size_exp);
24203 if (CONST_INT_P (expected_size_exp))
24204 expected_size = INTVAL (expected_size_exp);
24205 }
24206
24207 /* Make sure we don't need to care about overflow later on. */
24208 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24209 return false;
24210
24211 /* Step 0: Decide on preferred algorithm, desired alignment and
24212 size of chunks to be copied by main loop. */
24213 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24214 issetmem,
24215 issetmem && val_exp == const0_rtx,
24216 &dynamic_check, &noalign);
24217 if (alg == libcall)
24218 return false;
24219 gcc_assert (alg != no_stringop);
24220
24221 /* For now vector-version of memset is generated only for memory zeroing, as
24222 creating of promoted vector value is very cheap in this case. */
24223 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24224 alg = unrolled_loop;
24225
24226 if (!count)
24227 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24228 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24229 if (!issetmem)
24230 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24231
24232 unroll_factor = 1;
24233 move_mode = word_mode;
24234 switch (alg)
24235 {
24236 case libcall:
24237 case no_stringop:
24238 case last_alg:
24239 gcc_unreachable ();
24240 case loop_1_byte:
24241 need_zero_guard = true;
24242 move_mode = QImode;
24243 break;
24244 case loop:
24245 need_zero_guard = true;
24246 break;
24247 case unrolled_loop:
24248 need_zero_guard = true;
24249 unroll_factor = (TARGET_64BIT ? 4 : 2);
24250 break;
24251 case vector_loop:
24252 need_zero_guard = true;
24253 unroll_factor = 4;
24254 /* Find the widest supported mode. */
24255 move_mode = word_mode;
24256 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24257 != CODE_FOR_nothing)
24258 move_mode = GET_MODE_WIDER_MODE (move_mode);
24259
24260 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24261 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24262 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24263 {
24264 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24265 move_mode = mode_for_vector (word_mode, nunits);
24266 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24267 move_mode = word_mode;
24268 }
24269 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24270 break;
24271 case rep_prefix_8_byte:
24272 move_mode = DImode;
24273 break;
24274 case rep_prefix_4_byte:
24275 move_mode = SImode;
24276 break;
24277 case rep_prefix_1_byte:
24278 move_mode = QImode;
24279 break;
24280 }
24281 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24282 epilogue_size_needed = size_needed;
24283
24284 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24285 if (!TARGET_ALIGN_STRINGOPS || noalign)
24286 align = desired_align;
24287
24288 /* Step 1: Prologue guard. */
24289
24290 /* Alignment code needs count to be in register. */
24291 if (CONST_INT_P (count_exp) && desired_align > align)
24292 {
24293 if (INTVAL (count_exp) > desired_align
24294 && INTVAL (count_exp) > size_needed)
24295 {
24296 align_bytes
24297 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24298 if (align_bytes <= 0)
24299 align_bytes = 0;
24300 else
24301 align_bytes = desired_align - align_bytes;
24302 }
24303 if (align_bytes == 0)
24304 count_exp = force_reg (counter_mode (count_exp), count_exp);
24305 }
24306 gcc_assert (desired_align >= 1 && align >= 1);
24307
24308 /* Misaligned move sequences handle both prologue and epilogue at once.
24309 Default code generation results in a smaller code for large alignments
24310 and also avoids redundant job when sizes are known precisely. */
24311 misaligned_prologue_used
24312 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24313 && MAX (desired_align, epilogue_size_needed) <= 32
24314 && desired_align <= epilogue_size_needed
24315 && ((desired_align > align && !align_bytes)
24316 || (!count && epilogue_size_needed > 1)));
24317
24318 /* Do the cheap promotion to allow better CSE across the
24319 main loop and epilogue (ie one load of the big constant in the
24320 front of all code.
24321 For now the misaligned move sequences do not have fast path
24322 without broadcasting. */
24323 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24324 {
24325 if (alg == vector_loop)
24326 {
24327 gcc_assert (val_exp == const0_rtx);
24328 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24329 promoted_val = promote_duplicated_reg_to_size (val_exp,
24330 GET_MODE_SIZE (word_mode),
24331 desired_align, align);
24332 }
24333 else
24334 {
24335 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24336 desired_align, align);
24337 }
24338 }
24339 /* Misaligned move sequences handles both prologues and epilogues at once.
24340 Default code generation results in smaller code for large alignments and
24341 also avoids redundant job when sizes are known precisely. */
24342 if (misaligned_prologue_used)
24343 {
24344 /* Misaligned move prologue handled small blocks by itself. */
24345 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24346 (dst, src, &destreg, &srcreg,
24347 move_mode, promoted_val, vec_promoted_val,
24348 &count_exp,
24349 &jump_around_label,
24350 desired_align < align
24351 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24352 desired_align, align, &min_size, dynamic_check, issetmem);
24353 if (!issetmem)
24354 src = change_address (src, BLKmode, srcreg);
24355 dst = change_address (dst, BLKmode, destreg);
24356 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24357 epilogue_size_needed = 0;
24358 if (need_zero_guard && !min_size)
24359 {
24360 /* It is possible that we copied enough so the main loop will not
24361 execute. */
24362 gcc_assert (size_needed > 1);
24363 if (jump_around_label == NULL_RTX)
24364 jump_around_label = gen_label_rtx ();
24365 emit_cmp_and_jump_insns (count_exp,
24366 GEN_INT (size_needed),
24367 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24368 if (expected_size == -1
24369 || expected_size < (desired_align - align) / 2 + size_needed)
24370 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24371 else
24372 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24373 }
24374 }
24375 /* Ensure that alignment prologue won't copy past end of block. */
24376 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24377 {
24378 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24379 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24380 Make sure it is power of 2. */
24381 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24382
24383 /* To improve performance of small blocks, we jump around the VAL
24384 promoting mode. This mean that if the promoted VAL is not constant,
24385 we might not use it in the epilogue and have to use byte
24386 loop variant. */
24387 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24388 force_loopy_epilogue = true;
24389 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24390 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24391 {
24392 /* If main algorithm works on QImode, no epilogue is needed.
24393 For small sizes just don't align anything. */
24394 if (size_needed == 1)
24395 desired_align = align;
24396 else
24397 goto epilogue;
24398 }
24399 else if (!count
24400 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24401 {
24402 label = gen_label_rtx ();
24403 emit_cmp_and_jump_insns (count_exp,
24404 GEN_INT (epilogue_size_needed),
24405 LTU, 0, counter_mode (count_exp), 1, label);
24406 if (expected_size == -1 || expected_size < epilogue_size_needed)
24407 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24408 else
24409 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24410 }
24411 }
24412
24413 /* Emit code to decide on runtime whether library call or inline should be
24414 used. */
24415 if (dynamic_check != -1)
24416 {
24417 if (!issetmem && CONST_INT_P (count_exp))
24418 {
24419 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24420 {
24421 emit_block_move_via_libcall (dst, src, count_exp, false);
24422 count_exp = const0_rtx;
24423 goto epilogue;
24424 }
24425 }
24426 else
24427 {
24428 rtx hot_label = gen_label_rtx ();
24429 if (jump_around_label == NULL_RTX)
24430 jump_around_label = gen_label_rtx ();
24431 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24432 LEU, 0, counter_mode (count_exp),
24433 1, hot_label);
24434 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24435 if (issetmem)
24436 set_storage_via_libcall (dst, count_exp, val_exp, false);
24437 else
24438 emit_block_move_via_libcall (dst, src, count_exp, false);
24439 emit_jump (jump_around_label);
24440 emit_label (hot_label);
24441 }
24442 }
24443
24444 /* Step 2: Alignment prologue. */
24445 /* Do the expensive promotion once we branched off the small blocks. */
24446 if (issetmem && !promoted_val)
24447 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24448 desired_align, align);
24449
24450 if (desired_align > align && !misaligned_prologue_used)
24451 {
24452 if (align_bytes == 0)
24453 {
24454 /* Except for the first move in prologue, we no longer know
24455 constant offset in aliasing info. It don't seems to worth
24456 the pain to maintain it for the first move, so throw away
24457 the info early. */
24458 dst = change_address (dst, BLKmode, destreg);
24459 if (!issetmem)
24460 src = change_address (src, BLKmode, srcreg);
24461 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24462 promoted_val, vec_promoted_val,
24463 count_exp, align, desired_align,
24464 issetmem);
24465 /* At most desired_align - align bytes are copied. */
24466 if (min_size < (unsigned)(desired_align - align))
24467 min_size = 0;
24468 else
24469 min_size -= desired_align - align;
24470 }
24471 else
24472 {
24473 /* If we know how many bytes need to be stored before dst is
24474 sufficiently aligned, maintain aliasing info accurately. */
24475 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24476 srcreg,
24477 promoted_val,
24478 vec_promoted_val,
24479 desired_align,
24480 align_bytes,
24481 issetmem);
24482
24483 count_exp = plus_constant (counter_mode (count_exp),
24484 count_exp, -align_bytes);
24485 count -= align_bytes;
24486 min_size -= align_bytes;
24487 max_size -= align_bytes;
24488 }
24489 if (need_zero_guard
24490 && !min_size
24491 && (count < (unsigned HOST_WIDE_INT) size_needed
24492 || (align_bytes == 0
24493 && count < ((unsigned HOST_WIDE_INT) size_needed
24494 + desired_align - align))))
24495 {
24496 /* It is possible that we copied enough so the main loop will not
24497 execute. */
24498 gcc_assert (size_needed > 1);
24499 if (label == NULL_RTX)
24500 label = gen_label_rtx ();
24501 emit_cmp_and_jump_insns (count_exp,
24502 GEN_INT (size_needed),
24503 LTU, 0, counter_mode (count_exp), 1, label);
24504 if (expected_size == -1
24505 || expected_size < (desired_align - align) / 2 + size_needed)
24506 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24507 else
24508 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24509 }
24510 }
24511 if (label && size_needed == 1)
24512 {
24513 emit_label (label);
24514 LABEL_NUSES (label) = 1;
24515 label = NULL;
24516 epilogue_size_needed = 1;
24517 if (issetmem)
24518 promoted_val = val_exp;
24519 }
24520 else if (label == NULL_RTX && !misaligned_prologue_used)
24521 epilogue_size_needed = size_needed;
24522
24523 /* Step 3: Main loop. */
24524
24525 switch (alg)
24526 {
24527 case libcall:
24528 case no_stringop:
24529 case last_alg:
24530 gcc_unreachable ();
24531 case loop_1_byte:
24532 case loop:
24533 case unrolled_loop:
24534 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24535 count_exp, move_mode, unroll_factor,
24536 expected_size, issetmem);
24537 break;
24538 case vector_loop:
24539 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24540 vec_promoted_val, count_exp, move_mode,
24541 unroll_factor, expected_size, issetmem);
24542 break;
24543 case rep_prefix_8_byte:
24544 case rep_prefix_4_byte:
24545 case rep_prefix_1_byte:
24546 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24547 val_exp, count_exp, move_mode, issetmem);
24548 break;
24549 }
24550 /* Adjust properly the offset of src and dest memory for aliasing. */
24551 if (CONST_INT_P (count_exp))
24552 {
24553 if (!issetmem)
24554 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24555 (count / size_needed) * size_needed);
24556 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24557 (count / size_needed) * size_needed);
24558 }
24559 else
24560 {
24561 if (!issetmem)
24562 src = change_address (src, BLKmode, srcreg);
24563 dst = change_address (dst, BLKmode, destreg);
24564 }
24565
24566 /* Step 4: Epilogue to copy the remaining bytes. */
24567 epilogue:
24568 if (label)
24569 {
24570 /* When the main loop is done, COUNT_EXP might hold original count,
24571 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24572 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24573 bytes. Compensate if needed. */
24574
24575 if (size_needed < epilogue_size_needed)
24576 {
24577 tmp =
24578 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24579 GEN_INT (size_needed - 1), count_exp, 1,
24580 OPTAB_DIRECT);
24581 if (tmp != count_exp)
24582 emit_move_insn (count_exp, tmp);
24583 }
24584 emit_label (label);
24585 LABEL_NUSES (label) = 1;
24586 }
24587
24588 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24589 {
24590 if (force_loopy_epilogue)
24591 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24592 epilogue_size_needed);
24593 else
24594 {
24595 if (issetmem)
24596 expand_setmem_epilogue (dst, destreg, promoted_val,
24597 vec_promoted_val, count_exp,
24598 epilogue_size_needed);
24599 else
24600 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24601 epilogue_size_needed);
24602 }
24603 }
24604 if (jump_around_label)
24605 emit_label (jump_around_label);
24606 return true;
24607 }
24608
24609
24610 /* Expand the appropriate insns for doing strlen if not just doing
24611 repnz; scasb
24612
24613 out = result, initialized with the start address
24614 align_rtx = alignment of the address.
24615 scratch = scratch register, initialized with the startaddress when
24616 not aligned, otherwise undefined
24617
24618 This is just the body. It needs the initializations mentioned above and
24619 some address computing at the end. These things are done in i386.md. */
24620
24621 static void
24622 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24623 {
24624 int align;
24625 rtx tmp;
24626 rtx align_2_label = NULL_RTX;
24627 rtx align_3_label = NULL_RTX;
24628 rtx align_4_label = gen_label_rtx ();
24629 rtx end_0_label = gen_label_rtx ();
24630 rtx mem;
24631 rtx tmpreg = gen_reg_rtx (SImode);
24632 rtx scratch = gen_reg_rtx (SImode);
24633 rtx cmp;
24634
24635 align = 0;
24636 if (CONST_INT_P (align_rtx))
24637 align = INTVAL (align_rtx);
24638
24639 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24640
24641 /* Is there a known alignment and is it less than 4? */
24642 if (align < 4)
24643 {
24644 rtx scratch1 = gen_reg_rtx (Pmode);
24645 emit_move_insn (scratch1, out);
24646 /* Is there a known alignment and is it not 2? */
24647 if (align != 2)
24648 {
24649 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24650 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24651
24652 /* Leave just the 3 lower bits. */
24653 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24654 NULL_RTX, 0, OPTAB_WIDEN);
24655
24656 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24657 Pmode, 1, align_4_label);
24658 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24659 Pmode, 1, align_2_label);
24660 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24661 Pmode, 1, align_3_label);
24662 }
24663 else
24664 {
24665 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24666 check if is aligned to 4 - byte. */
24667
24668 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24669 NULL_RTX, 0, OPTAB_WIDEN);
24670
24671 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24672 Pmode, 1, align_4_label);
24673 }
24674
24675 mem = change_address (src, QImode, out);
24676
24677 /* Now compare the bytes. */
24678
24679 /* Compare the first n unaligned byte on a byte per byte basis. */
24680 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24681 QImode, 1, end_0_label);
24682
24683 /* Increment the address. */
24684 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24685
24686 /* Not needed with an alignment of 2 */
24687 if (align != 2)
24688 {
24689 emit_label (align_2_label);
24690
24691 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24692 end_0_label);
24693
24694 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24695
24696 emit_label (align_3_label);
24697 }
24698
24699 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24700 end_0_label);
24701
24702 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24703 }
24704
24705 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24706 align this loop. It gives only huge programs, but does not help to
24707 speed up. */
24708 emit_label (align_4_label);
24709
24710 mem = change_address (src, SImode, out);
24711 emit_move_insn (scratch, mem);
24712 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24713
24714 /* This formula yields a nonzero result iff one of the bytes is zero.
24715 This saves three branches inside loop and many cycles. */
24716
24717 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24718 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24719 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24720 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24721 gen_int_mode (0x80808080, SImode)));
24722 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24723 align_4_label);
24724
24725 if (TARGET_CMOVE)
24726 {
24727 rtx reg = gen_reg_rtx (SImode);
24728 rtx reg2 = gen_reg_rtx (Pmode);
24729 emit_move_insn (reg, tmpreg);
24730 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24731
24732 /* If zero is not in the first two bytes, move two bytes forward. */
24733 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24734 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24735 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24736 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24737 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24738 reg,
24739 tmpreg)));
24740 /* Emit lea manually to avoid clobbering of flags. */
24741 emit_insn (gen_rtx_SET (SImode, reg2,
24742 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24743
24744 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24745 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24746 emit_insn (gen_rtx_SET (VOIDmode, out,
24747 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24748 reg2,
24749 out)));
24750 }
24751 else
24752 {
24753 rtx end_2_label = gen_label_rtx ();
24754 /* Is zero in the first two bytes? */
24755
24756 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24757 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24758 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24759 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24760 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24761 pc_rtx);
24762 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24763 JUMP_LABEL (tmp) = end_2_label;
24764
24765 /* Not in the first two. Move two bytes forward. */
24766 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24767 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24768
24769 emit_label (end_2_label);
24770
24771 }
24772
24773 /* Avoid branch in fixing the byte. */
24774 tmpreg = gen_lowpart (QImode, tmpreg);
24775 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24776 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24777 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24778 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24779
24780 emit_label (end_0_label);
24781 }
24782
24783 /* Expand strlen. */
24784
24785 bool
24786 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24787 {
24788 rtx addr, scratch1, scratch2, scratch3, scratch4;
24789
24790 /* The generic case of strlen expander is long. Avoid it's
24791 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24792
24793 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24794 && !TARGET_INLINE_ALL_STRINGOPS
24795 && !optimize_insn_for_size_p ()
24796 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24797 return false;
24798
24799 addr = force_reg (Pmode, XEXP (src, 0));
24800 scratch1 = gen_reg_rtx (Pmode);
24801
24802 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24803 && !optimize_insn_for_size_p ())
24804 {
24805 /* Well it seems that some optimizer does not combine a call like
24806 foo(strlen(bar), strlen(bar));
24807 when the move and the subtraction is done here. It does calculate
24808 the length just once when these instructions are done inside of
24809 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24810 often used and I use one fewer register for the lifetime of
24811 output_strlen_unroll() this is better. */
24812
24813 emit_move_insn (out, addr);
24814
24815 ix86_expand_strlensi_unroll_1 (out, src, align);
24816
24817 /* strlensi_unroll_1 returns the address of the zero at the end of
24818 the string, like memchr(), so compute the length by subtracting
24819 the start address. */
24820 emit_insn (ix86_gen_sub3 (out, out, addr));
24821 }
24822 else
24823 {
24824 rtx unspec;
24825
24826 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24827 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24828 return false;
24829
24830 scratch2 = gen_reg_rtx (Pmode);
24831 scratch3 = gen_reg_rtx (Pmode);
24832 scratch4 = force_reg (Pmode, constm1_rtx);
24833
24834 emit_move_insn (scratch3, addr);
24835 eoschar = force_reg (QImode, eoschar);
24836
24837 src = replace_equiv_address_nv (src, scratch3);
24838
24839 /* If .md starts supporting :P, this can be done in .md. */
24840 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24841 scratch4), UNSPEC_SCAS);
24842 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24843 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24844 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24845 }
24846 return true;
24847 }
24848
24849 /* For given symbol (function) construct code to compute address of it's PLT
24850 entry in large x86-64 PIC model. */
24851 static rtx
24852 construct_plt_address (rtx symbol)
24853 {
24854 rtx tmp, unspec;
24855
24856 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24857 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24858 gcc_assert (Pmode == DImode);
24859
24860 tmp = gen_reg_rtx (Pmode);
24861 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24862
24863 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24864 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24865 return tmp;
24866 }
24867
24868 rtx
24869 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24870 rtx callarg2,
24871 rtx pop, bool sibcall)
24872 {
24873 unsigned int const cregs_size
24874 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24875 rtx vec[3 + cregs_size];
24876 rtx use = NULL, call;
24877 unsigned int vec_len = 0;
24878
24879 if (pop == const0_rtx)
24880 pop = NULL;
24881 gcc_assert (!TARGET_64BIT || !pop);
24882
24883 if (TARGET_MACHO && !TARGET_64BIT)
24884 {
24885 #if TARGET_MACHO
24886 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24887 fnaddr = machopic_indirect_call_target (fnaddr);
24888 #endif
24889 }
24890 else
24891 {
24892 /* Static functions and indirect calls don't need the pic register. */
24893 if (flag_pic
24894 && (!TARGET_64BIT
24895 || (ix86_cmodel == CM_LARGE_PIC
24896 && DEFAULT_ABI != MS_ABI))
24897 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24898 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24899 use_reg (&use, pic_offset_table_rtx);
24900 }
24901
24902 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24903 {
24904 rtx al = gen_rtx_REG (QImode, AX_REG);
24905 emit_move_insn (al, callarg2);
24906 use_reg (&use, al);
24907 }
24908
24909 if (ix86_cmodel == CM_LARGE_PIC
24910 && !TARGET_PECOFF
24911 && MEM_P (fnaddr)
24912 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24913 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24914 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24915 else if (sibcall
24916 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24917 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24918 {
24919 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24920 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24921 }
24922
24923 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24924 if (retval)
24925 call = gen_rtx_SET (VOIDmode, retval, call);
24926 vec[vec_len++] = call;
24927
24928 if (pop)
24929 {
24930 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24931 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24932 vec[vec_len++] = pop;
24933 }
24934
24935 if (TARGET_64BIT_MS_ABI
24936 && (!callarg2 || INTVAL (callarg2) != -2))
24937 {
24938 unsigned i;
24939
24940 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24941 UNSPEC_MS_TO_SYSV_CALL);
24942
24943 for (i = 0; i < cregs_size; i++)
24944 {
24945 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24946 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24947
24948 vec[vec_len++]
24949 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24950 }
24951 }
24952
24953 if (vec_len > 1)
24954 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24955 call = emit_call_insn (call);
24956 if (use)
24957 CALL_INSN_FUNCTION_USAGE (call) = use;
24958
24959 return call;
24960 }
24961
24962 /* Output the assembly for a call instruction. */
24963
24964 const char *
24965 ix86_output_call_insn (rtx insn, rtx call_op)
24966 {
24967 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24968 bool seh_nop_p = false;
24969 const char *xasm;
24970
24971 if (SIBLING_CALL_P (insn))
24972 {
24973 if (direct_p)
24974 xasm = "jmp\t%P0";
24975 /* SEH epilogue detection requires the indirect branch case
24976 to include REX.W. */
24977 else if (TARGET_SEH)
24978 xasm = "rex.W jmp %A0";
24979 else
24980 xasm = "jmp\t%A0";
24981
24982 output_asm_insn (xasm, &call_op);
24983 return "";
24984 }
24985
24986 /* SEH unwinding can require an extra nop to be emitted in several
24987 circumstances. Determine if we have one of those. */
24988 if (TARGET_SEH)
24989 {
24990 rtx i;
24991
24992 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24993 {
24994 /* If we get to another real insn, we don't need the nop. */
24995 if (INSN_P (i))
24996 break;
24997
24998 /* If we get to the epilogue note, prevent a catch region from
24999 being adjacent to the standard epilogue sequence. If non-
25000 call-exceptions, we'll have done this during epilogue emission. */
25001 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25002 && !flag_non_call_exceptions
25003 && !can_throw_internal (insn))
25004 {
25005 seh_nop_p = true;
25006 break;
25007 }
25008 }
25009
25010 /* If we didn't find a real insn following the call, prevent the
25011 unwinder from looking into the next function. */
25012 if (i == NULL)
25013 seh_nop_p = true;
25014 }
25015
25016 if (direct_p)
25017 xasm = "call\t%P0";
25018 else
25019 xasm = "call\t%A0";
25020
25021 output_asm_insn (xasm, &call_op);
25022
25023 if (seh_nop_p)
25024 return "nop";
25025
25026 return "";
25027 }
25028 \f
25029 /* Clear stack slot assignments remembered from previous functions.
25030 This is called from INIT_EXPANDERS once before RTL is emitted for each
25031 function. */
25032
25033 static struct machine_function *
25034 ix86_init_machine_status (void)
25035 {
25036 struct machine_function *f;
25037
25038 f = ggc_cleared_alloc<machine_function> ();
25039 f->use_fast_prologue_epilogue_nregs = -1;
25040 f->call_abi = ix86_abi;
25041
25042 return f;
25043 }
25044
25045 /* Return a MEM corresponding to a stack slot with mode MODE.
25046 Allocate a new slot if necessary.
25047
25048 The RTL for a function can have several slots available: N is
25049 which slot to use. */
25050
25051 rtx
25052 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25053 {
25054 struct stack_local_entry *s;
25055
25056 gcc_assert (n < MAX_386_STACK_LOCALS);
25057
25058 for (s = ix86_stack_locals; s; s = s->next)
25059 if (s->mode == mode && s->n == n)
25060 return validize_mem (copy_rtx (s->rtl));
25061
25062 s = ggc_alloc<stack_local_entry> ();
25063 s->n = n;
25064 s->mode = mode;
25065 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25066
25067 s->next = ix86_stack_locals;
25068 ix86_stack_locals = s;
25069 return validize_mem (s->rtl);
25070 }
25071
25072 static void
25073 ix86_instantiate_decls (void)
25074 {
25075 struct stack_local_entry *s;
25076
25077 for (s = ix86_stack_locals; s; s = s->next)
25078 if (s->rtl != NULL_RTX)
25079 instantiate_decl_rtl (s->rtl);
25080 }
25081 \f
25082 /* Check whether x86 address PARTS is a pc-relative address. */
25083
25084 static bool
25085 rip_relative_addr_p (struct ix86_address *parts)
25086 {
25087 rtx base, index, disp;
25088
25089 base = parts->base;
25090 index = parts->index;
25091 disp = parts->disp;
25092
25093 if (disp && !base && !index)
25094 {
25095 if (TARGET_64BIT)
25096 {
25097 rtx symbol = disp;
25098
25099 if (GET_CODE (disp) == CONST)
25100 symbol = XEXP (disp, 0);
25101 if (GET_CODE (symbol) == PLUS
25102 && CONST_INT_P (XEXP (symbol, 1)))
25103 symbol = XEXP (symbol, 0);
25104
25105 if (GET_CODE (symbol) == LABEL_REF
25106 || (GET_CODE (symbol) == SYMBOL_REF
25107 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25108 || (GET_CODE (symbol) == UNSPEC
25109 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25110 || XINT (symbol, 1) == UNSPEC_PCREL
25111 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25112 return true;
25113 }
25114 }
25115 return false;
25116 }
25117
25118 /* Calculate the length of the memory address in the instruction encoding.
25119 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25120 or other prefixes. We never generate addr32 prefix for LEA insn. */
25121
25122 int
25123 memory_address_length (rtx addr, bool lea)
25124 {
25125 struct ix86_address parts;
25126 rtx base, index, disp;
25127 int len;
25128 int ok;
25129
25130 if (GET_CODE (addr) == PRE_DEC
25131 || GET_CODE (addr) == POST_INC
25132 || GET_CODE (addr) == PRE_MODIFY
25133 || GET_CODE (addr) == POST_MODIFY)
25134 return 0;
25135
25136 ok = ix86_decompose_address (addr, &parts);
25137 gcc_assert (ok);
25138
25139 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25140
25141 /* If this is not LEA instruction, add the length of addr32 prefix. */
25142 if (TARGET_64BIT && !lea
25143 && (SImode_address_operand (addr, VOIDmode)
25144 || (parts.base && GET_MODE (parts.base) == SImode)
25145 || (parts.index && GET_MODE (parts.index) == SImode)))
25146 len++;
25147
25148 base = parts.base;
25149 index = parts.index;
25150 disp = parts.disp;
25151
25152 if (base && GET_CODE (base) == SUBREG)
25153 base = SUBREG_REG (base);
25154 if (index && GET_CODE (index) == SUBREG)
25155 index = SUBREG_REG (index);
25156
25157 gcc_assert (base == NULL_RTX || REG_P (base));
25158 gcc_assert (index == NULL_RTX || REG_P (index));
25159
25160 /* Rule of thumb:
25161 - esp as the base always wants an index,
25162 - ebp as the base always wants a displacement,
25163 - r12 as the base always wants an index,
25164 - r13 as the base always wants a displacement. */
25165
25166 /* Register Indirect. */
25167 if (base && !index && !disp)
25168 {
25169 /* esp (for its index) and ebp (for its displacement) need
25170 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25171 code. */
25172 if (base == arg_pointer_rtx
25173 || base == frame_pointer_rtx
25174 || REGNO (base) == SP_REG
25175 || REGNO (base) == BP_REG
25176 || REGNO (base) == R12_REG
25177 || REGNO (base) == R13_REG)
25178 len++;
25179 }
25180
25181 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25182 is not disp32, but disp32(%rip), so for disp32
25183 SIB byte is needed, unless print_operand_address
25184 optimizes it into disp32(%rip) or (%rip) is implied
25185 by UNSPEC. */
25186 else if (disp && !base && !index)
25187 {
25188 len += 4;
25189 if (rip_relative_addr_p (&parts))
25190 len++;
25191 }
25192 else
25193 {
25194 /* Find the length of the displacement constant. */
25195 if (disp)
25196 {
25197 if (base && satisfies_constraint_K (disp))
25198 len += 1;
25199 else
25200 len += 4;
25201 }
25202 /* ebp always wants a displacement. Similarly r13. */
25203 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25204 len++;
25205
25206 /* An index requires the two-byte modrm form.... */
25207 if (index
25208 /* ...like esp (or r12), which always wants an index. */
25209 || base == arg_pointer_rtx
25210 || base == frame_pointer_rtx
25211 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25212 len++;
25213 }
25214
25215 return len;
25216 }
25217
25218 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25219 is set, expect that insn have 8bit immediate alternative. */
25220 int
25221 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25222 {
25223 int len = 0;
25224 int i;
25225 extract_insn_cached (insn);
25226 for (i = recog_data.n_operands - 1; i >= 0; --i)
25227 if (CONSTANT_P (recog_data.operand[i]))
25228 {
25229 enum attr_mode mode = get_attr_mode (insn);
25230
25231 gcc_assert (!len);
25232 if (shortform && CONST_INT_P (recog_data.operand[i]))
25233 {
25234 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25235 switch (mode)
25236 {
25237 case MODE_QI:
25238 len = 1;
25239 continue;
25240 case MODE_HI:
25241 ival = trunc_int_for_mode (ival, HImode);
25242 break;
25243 case MODE_SI:
25244 ival = trunc_int_for_mode (ival, SImode);
25245 break;
25246 default:
25247 break;
25248 }
25249 if (IN_RANGE (ival, -128, 127))
25250 {
25251 len = 1;
25252 continue;
25253 }
25254 }
25255 switch (mode)
25256 {
25257 case MODE_QI:
25258 len = 1;
25259 break;
25260 case MODE_HI:
25261 len = 2;
25262 break;
25263 case MODE_SI:
25264 len = 4;
25265 break;
25266 /* Immediates for DImode instructions are encoded
25267 as 32bit sign extended values. */
25268 case MODE_DI:
25269 len = 4;
25270 break;
25271 default:
25272 fatal_insn ("unknown insn mode", insn);
25273 }
25274 }
25275 return len;
25276 }
25277
25278 /* Compute default value for "length_address" attribute. */
25279 int
25280 ix86_attr_length_address_default (rtx insn)
25281 {
25282 int i;
25283
25284 if (get_attr_type (insn) == TYPE_LEA)
25285 {
25286 rtx set = PATTERN (insn), addr;
25287
25288 if (GET_CODE (set) == PARALLEL)
25289 set = XVECEXP (set, 0, 0);
25290
25291 gcc_assert (GET_CODE (set) == SET);
25292
25293 addr = SET_SRC (set);
25294
25295 return memory_address_length (addr, true);
25296 }
25297
25298 extract_insn_cached (insn);
25299 for (i = recog_data.n_operands - 1; i >= 0; --i)
25300 if (MEM_P (recog_data.operand[i]))
25301 {
25302 constrain_operands_cached (reload_completed);
25303 if (which_alternative != -1)
25304 {
25305 const char *constraints = recog_data.constraints[i];
25306 int alt = which_alternative;
25307
25308 while (*constraints == '=' || *constraints == '+')
25309 constraints++;
25310 while (alt-- > 0)
25311 while (*constraints++ != ',')
25312 ;
25313 /* Skip ignored operands. */
25314 if (*constraints == 'X')
25315 continue;
25316 }
25317 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25318 }
25319 return 0;
25320 }
25321
25322 /* Compute default value for "length_vex" attribute. It includes
25323 2 or 3 byte VEX prefix and 1 opcode byte. */
25324
25325 int
25326 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25327 {
25328 int i;
25329
25330 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25331 byte VEX prefix. */
25332 if (!has_0f_opcode || has_vex_w)
25333 return 3 + 1;
25334
25335 /* We can always use 2 byte VEX prefix in 32bit. */
25336 if (!TARGET_64BIT)
25337 return 2 + 1;
25338
25339 extract_insn_cached (insn);
25340
25341 for (i = recog_data.n_operands - 1; i >= 0; --i)
25342 if (REG_P (recog_data.operand[i]))
25343 {
25344 /* REX.W bit uses 3 byte VEX prefix. */
25345 if (GET_MODE (recog_data.operand[i]) == DImode
25346 && GENERAL_REG_P (recog_data.operand[i]))
25347 return 3 + 1;
25348 }
25349 else
25350 {
25351 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25352 if (MEM_P (recog_data.operand[i])
25353 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25354 return 3 + 1;
25355 }
25356
25357 return 2 + 1;
25358 }
25359 \f
25360 /* Return the maximum number of instructions a cpu can issue. */
25361
25362 static int
25363 ix86_issue_rate (void)
25364 {
25365 switch (ix86_tune)
25366 {
25367 case PROCESSOR_PENTIUM:
25368 case PROCESSOR_BONNELL:
25369 case PROCESSOR_SILVERMONT:
25370 case PROCESSOR_INTEL:
25371 case PROCESSOR_K6:
25372 case PROCESSOR_BTVER2:
25373 case PROCESSOR_PENTIUM4:
25374 case PROCESSOR_NOCONA:
25375 return 2;
25376
25377 case PROCESSOR_PENTIUMPRO:
25378 case PROCESSOR_ATHLON:
25379 case PROCESSOR_K8:
25380 case PROCESSOR_AMDFAM10:
25381 case PROCESSOR_GENERIC:
25382 case PROCESSOR_BTVER1:
25383 return 3;
25384
25385 case PROCESSOR_BDVER1:
25386 case PROCESSOR_BDVER2:
25387 case PROCESSOR_BDVER3:
25388 case PROCESSOR_BDVER4:
25389 case PROCESSOR_CORE2:
25390 case PROCESSOR_NEHALEM:
25391 case PROCESSOR_SANDYBRIDGE:
25392 case PROCESSOR_HASWELL:
25393 return 4;
25394
25395 default:
25396 return 1;
25397 }
25398 }
25399
25400 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25401 by DEP_INSN and nothing set by DEP_INSN. */
25402
25403 static bool
25404 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25405 {
25406 rtx set, set2;
25407
25408 /* Simplify the test for uninteresting insns. */
25409 if (insn_type != TYPE_SETCC
25410 && insn_type != TYPE_ICMOV
25411 && insn_type != TYPE_FCMOV
25412 && insn_type != TYPE_IBR)
25413 return false;
25414
25415 if ((set = single_set (dep_insn)) != 0)
25416 {
25417 set = SET_DEST (set);
25418 set2 = NULL_RTX;
25419 }
25420 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25421 && XVECLEN (PATTERN (dep_insn), 0) == 2
25422 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25423 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25424 {
25425 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25426 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25427 }
25428 else
25429 return false;
25430
25431 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25432 return false;
25433
25434 /* This test is true if the dependent insn reads the flags but
25435 not any other potentially set register. */
25436 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25437 return false;
25438
25439 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25440 return false;
25441
25442 return true;
25443 }
25444
25445 /* Return true iff USE_INSN has a memory address with operands set by
25446 SET_INSN. */
25447
25448 bool
25449 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25450 {
25451 int i;
25452 extract_insn_cached (use_insn);
25453 for (i = recog_data.n_operands - 1; i >= 0; --i)
25454 if (MEM_P (recog_data.operand[i]))
25455 {
25456 rtx addr = XEXP (recog_data.operand[i], 0);
25457 return modified_in_p (addr, set_insn) != 0;
25458 }
25459 return false;
25460 }
25461
25462 /* Helper function for exact_store_load_dependency.
25463 Return true if addr is found in insn. */
25464 static bool
25465 exact_dependency_1 (rtx addr, rtx insn)
25466 {
25467 enum rtx_code code;
25468 const char *format_ptr;
25469 int i, j;
25470
25471 code = GET_CODE (insn);
25472 switch (code)
25473 {
25474 case MEM:
25475 if (rtx_equal_p (addr, insn))
25476 return true;
25477 break;
25478 case REG:
25479 CASE_CONST_ANY:
25480 case SYMBOL_REF:
25481 case CODE_LABEL:
25482 case PC:
25483 case CC0:
25484 case EXPR_LIST:
25485 return false;
25486 default:
25487 break;
25488 }
25489
25490 format_ptr = GET_RTX_FORMAT (code);
25491 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25492 {
25493 switch (*format_ptr++)
25494 {
25495 case 'e':
25496 if (exact_dependency_1 (addr, XEXP (insn, i)))
25497 return true;
25498 break;
25499 case 'E':
25500 for (j = 0; j < XVECLEN (insn, i); j++)
25501 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25502 return true;
25503 break;
25504 }
25505 }
25506 return false;
25507 }
25508
25509 /* Return true if there exists exact dependency for store & load, i.e.
25510 the same memory address is used in them. */
25511 static bool
25512 exact_store_load_dependency (rtx store, rtx load)
25513 {
25514 rtx set1, set2;
25515
25516 set1 = single_set (store);
25517 if (!set1)
25518 return false;
25519 if (!MEM_P (SET_DEST (set1)))
25520 return false;
25521 set2 = single_set (load);
25522 if (!set2)
25523 return false;
25524 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25525 return true;
25526 return false;
25527 }
25528
25529 static int
25530 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25531 {
25532 enum attr_type insn_type, dep_insn_type;
25533 enum attr_memory memory;
25534 rtx set, set2;
25535 int dep_insn_code_number;
25536
25537 /* Anti and output dependencies have zero cost on all CPUs. */
25538 if (REG_NOTE_KIND (link) != 0)
25539 return 0;
25540
25541 dep_insn_code_number = recog_memoized (dep_insn);
25542
25543 /* If we can't recognize the insns, we can't really do anything. */
25544 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25545 return cost;
25546
25547 insn_type = get_attr_type (insn);
25548 dep_insn_type = get_attr_type (dep_insn);
25549
25550 switch (ix86_tune)
25551 {
25552 case PROCESSOR_PENTIUM:
25553 /* Address Generation Interlock adds a cycle of latency. */
25554 if (insn_type == TYPE_LEA)
25555 {
25556 rtx addr = PATTERN (insn);
25557
25558 if (GET_CODE (addr) == PARALLEL)
25559 addr = XVECEXP (addr, 0, 0);
25560
25561 gcc_assert (GET_CODE (addr) == SET);
25562
25563 addr = SET_SRC (addr);
25564 if (modified_in_p (addr, dep_insn))
25565 cost += 1;
25566 }
25567 else if (ix86_agi_dependent (dep_insn, insn))
25568 cost += 1;
25569
25570 /* ??? Compares pair with jump/setcc. */
25571 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25572 cost = 0;
25573
25574 /* Floating point stores require value to be ready one cycle earlier. */
25575 if (insn_type == TYPE_FMOV
25576 && get_attr_memory (insn) == MEMORY_STORE
25577 && !ix86_agi_dependent (dep_insn, insn))
25578 cost += 1;
25579 break;
25580
25581 case PROCESSOR_PENTIUMPRO:
25582 /* INT->FP conversion is expensive. */
25583 if (get_attr_fp_int_src (dep_insn))
25584 cost += 5;
25585
25586 /* There is one cycle extra latency between an FP op and a store. */
25587 if (insn_type == TYPE_FMOV
25588 && (set = single_set (dep_insn)) != NULL_RTX
25589 && (set2 = single_set (insn)) != NULL_RTX
25590 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25591 && MEM_P (SET_DEST (set2)))
25592 cost += 1;
25593
25594 memory = get_attr_memory (insn);
25595
25596 /* Show ability of reorder buffer to hide latency of load by executing
25597 in parallel with previous instruction in case
25598 previous instruction is not needed to compute the address. */
25599 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25600 && !ix86_agi_dependent (dep_insn, insn))
25601 {
25602 /* Claim moves to take one cycle, as core can issue one load
25603 at time and the next load can start cycle later. */
25604 if (dep_insn_type == TYPE_IMOV
25605 || dep_insn_type == TYPE_FMOV)
25606 cost = 1;
25607 else if (cost > 1)
25608 cost--;
25609 }
25610 break;
25611
25612 case PROCESSOR_K6:
25613 /* The esp dependency is resolved before
25614 the instruction is really finished. */
25615 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25616 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25617 return 1;
25618
25619 /* INT->FP conversion is expensive. */
25620 if (get_attr_fp_int_src (dep_insn))
25621 cost += 5;
25622
25623 memory = get_attr_memory (insn);
25624
25625 /* Show ability of reorder buffer to hide latency of load by executing
25626 in parallel with previous instruction in case
25627 previous instruction is not needed to compute the address. */
25628 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25629 && !ix86_agi_dependent (dep_insn, insn))
25630 {
25631 /* Claim moves to take one cycle, as core can issue one load
25632 at time and the next load can start cycle later. */
25633 if (dep_insn_type == TYPE_IMOV
25634 || dep_insn_type == TYPE_FMOV)
25635 cost = 1;
25636 else if (cost > 2)
25637 cost -= 2;
25638 else
25639 cost = 1;
25640 }
25641 break;
25642
25643 case PROCESSOR_AMDFAM10:
25644 case PROCESSOR_BDVER1:
25645 case PROCESSOR_BDVER2:
25646 case PROCESSOR_BDVER3:
25647 case PROCESSOR_BDVER4:
25648 case PROCESSOR_BTVER1:
25649 case PROCESSOR_BTVER2:
25650 case PROCESSOR_GENERIC:
25651 /* Stack engine allows to execute push&pop instructions in parall. */
25652 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25653 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25654 return 0;
25655 /* FALLTHRU */
25656
25657 case PROCESSOR_ATHLON:
25658 case PROCESSOR_K8:
25659 memory = get_attr_memory (insn);
25660
25661 /* Show ability of reorder buffer to hide latency of load by executing
25662 in parallel with previous instruction in case
25663 previous instruction is not needed to compute the address. */
25664 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25665 && !ix86_agi_dependent (dep_insn, insn))
25666 {
25667 enum attr_unit unit = get_attr_unit (insn);
25668 int loadcost = 3;
25669
25670 /* Because of the difference between the length of integer and
25671 floating unit pipeline preparation stages, the memory operands
25672 for floating point are cheaper.
25673
25674 ??? For Athlon it the difference is most probably 2. */
25675 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25676 loadcost = 3;
25677 else
25678 loadcost = TARGET_ATHLON ? 2 : 0;
25679
25680 if (cost >= loadcost)
25681 cost -= loadcost;
25682 else
25683 cost = 0;
25684 }
25685 break;
25686
25687 case PROCESSOR_CORE2:
25688 case PROCESSOR_NEHALEM:
25689 case PROCESSOR_SANDYBRIDGE:
25690 case PROCESSOR_HASWELL:
25691 /* Stack engine allows to execute push&pop instructions in parall. */
25692 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25693 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25694 return 0;
25695
25696 memory = get_attr_memory (insn);
25697
25698 /* Show ability of reorder buffer to hide latency of load by executing
25699 in parallel with previous instruction in case
25700 previous instruction is not needed to compute the address. */
25701 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25702 && !ix86_agi_dependent (dep_insn, insn))
25703 {
25704 if (cost >= 4)
25705 cost -= 4;
25706 else
25707 cost = 0;
25708 }
25709 break;
25710
25711 case PROCESSOR_SILVERMONT:
25712 case PROCESSOR_INTEL:
25713 if (!reload_completed)
25714 return cost;
25715
25716 /* Increase cost of integer loads. */
25717 memory = get_attr_memory (dep_insn);
25718 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25719 {
25720 enum attr_unit unit = get_attr_unit (dep_insn);
25721 if (unit == UNIT_INTEGER && cost == 1)
25722 {
25723 if (memory == MEMORY_LOAD)
25724 cost = 3;
25725 else
25726 {
25727 /* Increase cost of ld/st for short int types only
25728 because of store forwarding issue. */
25729 rtx set = single_set (dep_insn);
25730 if (set && (GET_MODE (SET_DEST (set)) == QImode
25731 || GET_MODE (SET_DEST (set)) == HImode))
25732 {
25733 /* Increase cost of store/load insn if exact
25734 dependence exists and it is load insn. */
25735 enum attr_memory insn_memory = get_attr_memory (insn);
25736 if (insn_memory == MEMORY_LOAD
25737 && exact_store_load_dependency (dep_insn, insn))
25738 cost = 3;
25739 }
25740 }
25741 }
25742 }
25743
25744 default:
25745 break;
25746 }
25747
25748 return cost;
25749 }
25750
25751 /* How many alternative schedules to try. This should be as wide as the
25752 scheduling freedom in the DFA, but no wider. Making this value too
25753 large results extra work for the scheduler. */
25754
25755 static int
25756 ia32_multipass_dfa_lookahead (void)
25757 {
25758 switch (ix86_tune)
25759 {
25760 case PROCESSOR_PENTIUM:
25761 return 2;
25762
25763 case PROCESSOR_PENTIUMPRO:
25764 case PROCESSOR_K6:
25765 return 1;
25766
25767 case PROCESSOR_BDVER1:
25768 case PROCESSOR_BDVER2:
25769 case PROCESSOR_BDVER3:
25770 case PROCESSOR_BDVER4:
25771 /* We use lookahead value 4 for BD both before and after reload
25772 schedules. Plan is to have value 8 included for O3. */
25773 return 4;
25774
25775 case PROCESSOR_CORE2:
25776 case PROCESSOR_NEHALEM:
25777 case PROCESSOR_SANDYBRIDGE:
25778 case PROCESSOR_HASWELL:
25779 case PROCESSOR_BONNELL:
25780 case PROCESSOR_SILVERMONT:
25781 case PROCESSOR_INTEL:
25782 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25783 as many instructions can be executed on a cycle, i.e.,
25784 issue_rate. I wonder why tuning for many CPUs does not do this. */
25785 if (reload_completed)
25786 return ix86_issue_rate ();
25787 /* Don't use lookahead for pre-reload schedule to save compile time. */
25788 return 0;
25789
25790 default:
25791 return 0;
25792 }
25793 }
25794
25795 /* Return true if target platform supports macro-fusion. */
25796
25797 static bool
25798 ix86_macro_fusion_p ()
25799 {
25800 return TARGET_FUSE_CMP_AND_BRANCH;
25801 }
25802
25803 /* Check whether current microarchitecture support macro fusion
25804 for insn pair "CONDGEN + CONDJMP". Refer to
25805 "Intel Architectures Optimization Reference Manual". */
25806
25807 static bool
25808 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25809 {
25810 rtx src, dest;
25811 rtx single_set = single_set (condgen);
25812 enum rtx_code ccode;
25813 rtx compare_set = NULL_RTX, test_if, cond;
25814 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25815
25816 if (get_attr_type (condgen) != TYPE_TEST
25817 && get_attr_type (condgen) != TYPE_ICMP
25818 && get_attr_type (condgen) != TYPE_INCDEC
25819 && get_attr_type (condgen) != TYPE_ALU)
25820 return false;
25821
25822 if (single_set == NULL_RTX
25823 && !TARGET_FUSE_ALU_AND_BRANCH)
25824 return false;
25825
25826 if (single_set != NULL_RTX)
25827 compare_set = single_set;
25828 else
25829 {
25830 int i;
25831 rtx pat = PATTERN (condgen);
25832 for (i = 0; i < XVECLEN (pat, 0); i++)
25833 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25834 {
25835 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25836 if (GET_CODE (set_src) == COMPARE)
25837 compare_set = XVECEXP (pat, 0, i);
25838 else
25839 alu_set = XVECEXP (pat, 0, i);
25840 }
25841 }
25842 if (compare_set == NULL_RTX)
25843 return false;
25844 src = SET_SRC (compare_set);
25845 if (GET_CODE (src) != COMPARE)
25846 return false;
25847
25848 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25849 supported. */
25850 if ((MEM_P (XEXP (src, 0))
25851 && CONST_INT_P (XEXP (src, 1)))
25852 || (MEM_P (XEXP (src, 1))
25853 && CONST_INT_P (XEXP (src, 0))))
25854 return false;
25855
25856 /* No fusion for RIP-relative address. */
25857 if (MEM_P (XEXP (src, 0)))
25858 addr = XEXP (XEXP (src, 0), 0);
25859 else if (MEM_P (XEXP (src, 1)))
25860 addr = XEXP (XEXP (src, 1), 0);
25861
25862 if (addr) {
25863 ix86_address parts;
25864 int ok = ix86_decompose_address (addr, &parts);
25865 gcc_assert (ok);
25866
25867 if (rip_relative_addr_p (&parts))
25868 return false;
25869 }
25870
25871 test_if = SET_SRC (pc_set (condjmp));
25872 cond = XEXP (test_if, 0);
25873 ccode = GET_CODE (cond);
25874 /* Check whether conditional jump use Sign or Overflow Flags. */
25875 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25876 && (ccode == GE
25877 || ccode == GT
25878 || ccode == LE
25879 || ccode == LT))
25880 return false;
25881
25882 /* Return true for TYPE_TEST and TYPE_ICMP. */
25883 if (get_attr_type (condgen) == TYPE_TEST
25884 || get_attr_type (condgen) == TYPE_ICMP)
25885 return true;
25886
25887 /* The following is the case that macro-fusion for alu + jmp. */
25888 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25889 return false;
25890
25891 /* No fusion for alu op with memory destination operand. */
25892 dest = SET_DEST (alu_set);
25893 if (MEM_P (dest))
25894 return false;
25895
25896 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25897 supported. */
25898 if (get_attr_type (condgen) == TYPE_INCDEC
25899 && (ccode == GEU
25900 || ccode == GTU
25901 || ccode == LEU
25902 || ccode == LTU))
25903 return false;
25904
25905 return true;
25906 }
25907
25908 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25909 execution. It is applied if
25910 (1) IMUL instruction is on the top of list;
25911 (2) There exists the only producer of independent IMUL instruction in
25912 ready list.
25913 Return index of IMUL producer if it was found and -1 otherwise. */
25914 static int
25915 do_reorder_for_imul (rtx *ready, int n_ready)
25916 {
25917 rtx insn, set, insn1, insn2;
25918 sd_iterator_def sd_it;
25919 dep_t dep;
25920 int index = -1;
25921 int i;
25922
25923 if (!TARGET_BONNELL)
25924 return index;
25925
25926 /* Check that IMUL instruction is on the top of ready list. */
25927 insn = ready[n_ready - 1];
25928 set = single_set (insn);
25929 if (!set)
25930 return index;
25931 if (!(GET_CODE (SET_SRC (set)) == MULT
25932 && GET_MODE (SET_SRC (set)) == SImode))
25933 return index;
25934
25935 /* Search for producer of independent IMUL instruction. */
25936 for (i = n_ready - 2; i >= 0; i--)
25937 {
25938 insn = ready[i];
25939 if (!NONDEBUG_INSN_P (insn))
25940 continue;
25941 /* Skip IMUL instruction. */
25942 insn2 = PATTERN (insn);
25943 if (GET_CODE (insn2) == PARALLEL)
25944 insn2 = XVECEXP (insn2, 0, 0);
25945 if (GET_CODE (insn2) == SET
25946 && GET_CODE (SET_SRC (insn2)) == MULT
25947 && GET_MODE (SET_SRC (insn2)) == SImode)
25948 continue;
25949
25950 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25951 {
25952 rtx con;
25953 con = DEP_CON (dep);
25954 if (!NONDEBUG_INSN_P (con))
25955 continue;
25956 insn1 = PATTERN (con);
25957 if (GET_CODE (insn1) == PARALLEL)
25958 insn1 = XVECEXP (insn1, 0, 0);
25959
25960 if (GET_CODE (insn1) == SET
25961 && GET_CODE (SET_SRC (insn1)) == MULT
25962 && GET_MODE (SET_SRC (insn1)) == SImode)
25963 {
25964 sd_iterator_def sd_it1;
25965 dep_t dep1;
25966 /* Check if there is no other dependee for IMUL. */
25967 index = i;
25968 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25969 {
25970 rtx pro;
25971 pro = DEP_PRO (dep1);
25972 if (!NONDEBUG_INSN_P (pro))
25973 continue;
25974 if (pro != insn)
25975 index = -1;
25976 }
25977 if (index >= 0)
25978 break;
25979 }
25980 }
25981 if (index >= 0)
25982 break;
25983 }
25984 return index;
25985 }
25986
25987 /* Try to find the best candidate on the top of ready list if two insns
25988 have the same priority - candidate is best if its dependees were
25989 scheduled earlier. Applied for Silvermont only.
25990 Return true if top 2 insns must be interchanged. */
25991 static bool
25992 swap_top_of_ready_list (rtx *ready, int n_ready)
25993 {
25994 rtx top = ready[n_ready - 1];
25995 rtx next = ready[n_ready - 2];
25996 rtx set;
25997 sd_iterator_def sd_it;
25998 dep_t dep;
25999 int clock1 = -1;
26000 int clock2 = -1;
26001 #define INSN_TICK(INSN) (HID (INSN)->tick)
26002
26003 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26004 return false;
26005
26006 if (!NONDEBUG_INSN_P (top))
26007 return false;
26008 if (!NONJUMP_INSN_P (top))
26009 return false;
26010 if (!NONDEBUG_INSN_P (next))
26011 return false;
26012 if (!NONJUMP_INSN_P (next))
26013 return false;
26014 set = single_set (top);
26015 if (!set)
26016 return false;
26017 set = single_set (next);
26018 if (!set)
26019 return false;
26020
26021 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26022 {
26023 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26024 return false;
26025 /* Determine winner more precise. */
26026 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26027 {
26028 rtx pro;
26029 pro = DEP_PRO (dep);
26030 if (!NONDEBUG_INSN_P (pro))
26031 continue;
26032 if (INSN_TICK (pro) > clock1)
26033 clock1 = INSN_TICK (pro);
26034 }
26035 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26036 {
26037 rtx pro;
26038 pro = DEP_PRO (dep);
26039 if (!NONDEBUG_INSN_P (pro))
26040 continue;
26041 if (INSN_TICK (pro) > clock2)
26042 clock2 = INSN_TICK (pro);
26043 }
26044
26045 if (clock1 == clock2)
26046 {
26047 /* Determine winner - load must win. */
26048 enum attr_memory memory1, memory2;
26049 memory1 = get_attr_memory (top);
26050 memory2 = get_attr_memory (next);
26051 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26052 return true;
26053 }
26054 return (bool) (clock2 < clock1);
26055 }
26056 return false;
26057 #undef INSN_TICK
26058 }
26059
26060 /* Perform possible reodering of ready list for Atom/Silvermont only.
26061 Return issue rate. */
26062 static int
26063 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26064 int clock_var)
26065 {
26066 int issue_rate = -1;
26067 int n_ready = *pn_ready;
26068 int i;
26069 rtx insn;
26070 int index = -1;
26071
26072 /* Set up issue rate. */
26073 issue_rate = ix86_issue_rate ();
26074
26075 /* Do reodering for BONNELL/SILVERMONT only. */
26076 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26077 return issue_rate;
26078
26079 /* Nothing to do if ready list contains only 1 instruction. */
26080 if (n_ready <= 1)
26081 return issue_rate;
26082
26083 /* Do reodering for post-reload scheduler only. */
26084 if (!reload_completed)
26085 return issue_rate;
26086
26087 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26088 {
26089 if (sched_verbose > 1)
26090 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26091 INSN_UID (ready[index]));
26092
26093 /* Put IMUL producer (ready[index]) at the top of ready list. */
26094 insn = ready[index];
26095 for (i = index; i < n_ready - 1; i++)
26096 ready[i] = ready[i + 1];
26097 ready[n_ready - 1] = insn;
26098 return issue_rate;
26099 }
26100 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26101 {
26102 if (sched_verbose > 1)
26103 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26104 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26105 /* Swap 2 top elements of ready list. */
26106 insn = ready[n_ready - 1];
26107 ready[n_ready - 1] = ready[n_ready - 2];
26108 ready[n_ready - 2] = insn;
26109 }
26110 return issue_rate;
26111 }
26112
26113 static bool
26114 ix86_class_likely_spilled_p (reg_class_t);
26115
26116 /* Returns true if lhs of insn is HW function argument register and set up
26117 is_spilled to true if it is likely spilled HW register. */
26118 static bool
26119 insn_is_function_arg (rtx insn, bool* is_spilled)
26120 {
26121 rtx dst;
26122
26123 if (!NONDEBUG_INSN_P (insn))
26124 return false;
26125 /* Call instructions are not movable, ignore it. */
26126 if (CALL_P (insn))
26127 return false;
26128 insn = PATTERN (insn);
26129 if (GET_CODE (insn) == PARALLEL)
26130 insn = XVECEXP (insn, 0, 0);
26131 if (GET_CODE (insn) != SET)
26132 return false;
26133 dst = SET_DEST (insn);
26134 if (REG_P (dst) && HARD_REGISTER_P (dst)
26135 && ix86_function_arg_regno_p (REGNO (dst)))
26136 {
26137 /* Is it likely spilled HW register? */
26138 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26139 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26140 *is_spilled = true;
26141 return true;
26142 }
26143 return false;
26144 }
26145
26146 /* Add output dependencies for chain of function adjacent arguments if only
26147 there is a move to likely spilled HW register. Return first argument
26148 if at least one dependence was added or NULL otherwise. */
26149 static rtx
26150 add_parameter_dependencies (rtx call, rtx head)
26151 {
26152 rtx insn;
26153 rtx last = call;
26154 rtx first_arg = NULL;
26155 bool is_spilled = false;
26156
26157 head = PREV_INSN (head);
26158
26159 /* Find nearest to call argument passing instruction. */
26160 while (true)
26161 {
26162 last = PREV_INSN (last);
26163 if (last == head)
26164 return NULL;
26165 if (!NONDEBUG_INSN_P (last))
26166 continue;
26167 if (insn_is_function_arg (last, &is_spilled))
26168 break;
26169 return NULL;
26170 }
26171
26172 first_arg = last;
26173 while (true)
26174 {
26175 insn = PREV_INSN (last);
26176 if (!INSN_P (insn))
26177 break;
26178 if (insn == head)
26179 break;
26180 if (!NONDEBUG_INSN_P (insn))
26181 {
26182 last = insn;
26183 continue;
26184 }
26185 if (insn_is_function_arg (insn, &is_spilled))
26186 {
26187 /* Add output depdendence between two function arguments if chain
26188 of output arguments contains likely spilled HW registers. */
26189 if (is_spilled)
26190 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26191 first_arg = last = insn;
26192 }
26193 else
26194 break;
26195 }
26196 if (!is_spilled)
26197 return NULL;
26198 return first_arg;
26199 }
26200
26201 /* Add output or anti dependency from insn to first_arg to restrict its code
26202 motion. */
26203 static void
26204 avoid_func_arg_motion (rtx first_arg, rtx insn)
26205 {
26206 rtx set;
26207 rtx tmp;
26208
26209 set = single_set (insn);
26210 if (!set)
26211 return;
26212 tmp = SET_DEST (set);
26213 if (REG_P (tmp))
26214 {
26215 /* Add output dependency to the first function argument. */
26216 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26217 return;
26218 }
26219 /* Add anti dependency. */
26220 add_dependence (first_arg, insn, REG_DEP_ANTI);
26221 }
26222
26223 /* Avoid cross block motion of function argument through adding dependency
26224 from the first non-jump instruction in bb. */
26225 static void
26226 add_dependee_for_func_arg (rtx arg, basic_block bb)
26227 {
26228 rtx insn = BB_END (bb);
26229
26230 while (insn)
26231 {
26232 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26233 {
26234 rtx set = single_set (insn);
26235 if (set)
26236 {
26237 avoid_func_arg_motion (arg, insn);
26238 return;
26239 }
26240 }
26241 if (insn == BB_HEAD (bb))
26242 return;
26243 insn = PREV_INSN (insn);
26244 }
26245 }
26246
26247 /* Hook for pre-reload schedule - avoid motion of function arguments
26248 passed in likely spilled HW registers. */
26249 static void
26250 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26251 {
26252 rtx insn;
26253 rtx first_arg = NULL;
26254 if (reload_completed)
26255 return;
26256 while (head != tail && DEBUG_INSN_P (head))
26257 head = NEXT_INSN (head);
26258 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26259 if (INSN_P (insn) && CALL_P (insn))
26260 {
26261 first_arg = add_parameter_dependencies (insn, head);
26262 if (first_arg)
26263 {
26264 /* Add dependee for first argument to predecessors if only
26265 region contains more than one block. */
26266 basic_block bb = BLOCK_FOR_INSN (insn);
26267 int rgn = CONTAINING_RGN (bb->index);
26268 int nr_blks = RGN_NR_BLOCKS (rgn);
26269 /* Skip trivial regions and region head blocks that can have
26270 predecessors outside of region. */
26271 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26272 {
26273 edge e;
26274 edge_iterator ei;
26275
26276 /* Regions are SCCs with the exception of selective
26277 scheduling with pipelining of outer blocks enabled.
26278 So also check that immediate predecessors of a non-head
26279 block are in the same region. */
26280 FOR_EACH_EDGE (e, ei, bb->preds)
26281 {
26282 /* Avoid creating of loop-carried dependencies through
26283 using topological ordering in the region. */
26284 if (rgn == CONTAINING_RGN (e->src->index)
26285 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26286 add_dependee_for_func_arg (first_arg, e->src);
26287 }
26288 }
26289 insn = first_arg;
26290 if (insn == head)
26291 break;
26292 }
26293 }
26294 else if (first_arg)
26295 avoid_func_arg_motion (first_arg, insn);
26296 }
26297
26298 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26299 HW registers to maximum, to schedule them at soon as possible. These are
26300 moves from function argument registers at the top of the function entry
26301 and moves from function return value registers after call. */
26302 static int
26303 ix86_adjust_priority (rtx insn, int priority)
26304 {
26305 rtx set;
26306
26307 if (reload_completed)
26308 return priority;
26309
26310 if (!NONDEBUG_INSN_P (insn))
26311 return priority;
26312
26313 set = single_set (insn);
26314 if (set)
26315 {
26316 rtx tmp = SET_SRC (set);
26317 if (REG_P (tmp)
26318 && HARD_REGISTER_P (tmp)
26319 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26320 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26321 return current_sched_info->sched_max_insns_priority;
26322 }
26323
26324 return priority;
26325 }
26326
26327 /* Model decoder of Core 2/i7.
26328 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26329 track the instruction fetch block boundaries and make sure that long
26330 (9+ bytes) instructions are assigned to D0. */
26331
26332 /* Maximum length of an insn that can be handled by
26333 a secondary decoder unit. '8' for Core 2/i7. */
26334 static int core2i7_secondary_decoder_max_insn_size;
26335
26336 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26337 '16' for Core 2/i7. */
26338 static int core2i7_ifetch_block_size;
26339
26340 /* Maximum number of instructions decoder can handle per cycle.
26341 '6' for Core 2/i7. */
26342 static int core2i7_ifetch_block_max_insns;
26343
26344 typedef struct ix86_first_cycle_multipass_data_ *
26345 ix86_first_cycle_multipass_data_t;
26346 typedef const struct ix86_first_cycle_multipass_data_ *
26347 const_ix86_first_cycle_multipass_data_t;
26348
26349 /* A variable to store target state across calls to max_issue within
26350 one cycle. */
26351 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26352 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26353
26354 /* Initialize DATA. */
26355 static void
26356 core2i7_first_cycle_multipass_init (void *_data)
26357 {
26358 ix86_first_cycle_multipass_data_t data
26359 = (ix86_first_cycle_multipass_data_t) _data;
26360
26361 data->ifetch_block_len = 0;
26362 data->ifetch_block_n_insns = 0;
26363 data->ready_try_change = NULL;
26364 data->ready_try_change_size = 0;
26365 }
26366
26367 /* Advancing the cycle; reset ifetch block counts. */
26368 static void
26369 core2i7_dfa_post_advance_cycle (void)
26370 {
26371 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26372
26373 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26374
26375 data->ifetch_block_len = 0;
26376 data->ifetch_block_n_insns = 0;
26377 }
26378
26379 static int min_insn_size (rtx);
26380
26381 /* Filter out insns from ready_try that the core will not be able to issue
26382 on current cycle due to decoder. */
26383 static void
26384 core2i7_first_cycle_multipass_filter_ready_try
26385 (const_ix86_first_cycle_multipass_data_t data,
26386 char *ready_try, int n_ready, bool first_cycle_insn_p)
26387 {
26388 while (n_ready--)
26389 {
26390 rtx insn;
26391 int insn_size;
26392
26393 if (ready_try[n_ready])
26394 continue;
26395
26396 insn = get_ready_element (n_ready);
26397 insn_size = min_insn_size (insn);
26398
26399 if (/* If this is a too long an insn for a secondary decoder ... */
26400 (!first_cycle_insn_p
26401 && insn_size > core2i7_secondary_decoder_max_insn_size)
26402 /* ... or it would not fit into the ifetch block ... */
26403 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26404 /* ... or the decoder is full already ... */
26405 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26406 /* ... mask the insn out. */
26407 {
26408 ready_try[n_ready] = 1;
26409
26410 if (data->ready_try_change)
26411 bitmap_set_bit (data->ready_try_change, n_ready);
26412 }
26413 }
26414 }
26415
26416 /* Prepare for a new round of multipass lookahead scheduling. */
26417 static void
26418 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26419 bool first_cycle_insn_p)
26420 {
26421 ix86_first_cycle_multipass_data_t data
26422 = (ix86_first_cycle_multipass_data_t) _data;
26423 const_ix86_first_cycle_multipass_data_t prev_data
26424 = ix86_first_cycle_multipass_data;
26425
26426 /* Restore the state from the end of the previous round. */
26427 data->ifetch_block_len = prev_data->ifetch_block_len;
26428 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26429
26430 /* Filter instructions that cannot be issued on current cycle due to
26431 decoder restrictions. */
26432 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26433 first_cycle_insn_p);
26434 }
26435
26436 /* INSN is being issued in current solution. Account for its impact on
26437 the decoder model. */
26438 static void
26439 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26440 rtx insn, const void *_prev_data)
26441 {
26442 ix86_first_cycle_multipass_data_t data
26443 = (ix86_first_cycle_multipass_data_t) _data;
26444 const_ix86_first_cycle_multipass_data_t prev_data
26445 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26446
26447 int insn_size = min_insn_size (insn);
26448
26449 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26450 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26451 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26452 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26453
26454 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26455 if (!data->ready_try_change)
26456 {
26457 data->ready_try_change = sbitmap_alloc (n_ready);
26458 data->ready_try_change_size = n_ready;
26459 }
26460 else if (data->ready_try_change_size < n_ready)
26461 {
26462 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26463 n_ready, 0);
26464 data->ready_try_change_size = n_ready;
26465 }
26466 bitmap_clear (data->ready_try_change);
26467
26468 /* Filter out insns from ready_try that the core will not be able to issue
26469 on current cycle due to decoder. */
26470 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26471 false);
26472 }
26473
26474 /* Revert the effect on ready_try. */
26475 static void
26476 core2i7_first_cycle_multipass_backtrack (const void *_data,
26477 char *ready_try,
26478 int n_ready ATTRIBUTE_UNUSED)
26479 {
26480 const_ix86_first_cycle_multipass_data_t data
26481 = (const_ix86_first_cycle_multipass_data_t) _data;
26482 unsigned int i = 0;
26483 sbitmap_iterator sbi;
26484
26485 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26486 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26487 {
26488 ready_try[i] = 0;
26489 }
26490 }
26491
26492 /* Save the result of multipass lookahead scheduling for the next round. */
26493 static void
26494 core2i7_first_cycle_multipass_end (const void *_data)
26495 {
26496 const_ix86_first_cycle_multipass_data_t data
26497 = (const_ix86_first_cycle_multipass_data_t) _data;
26498 ix86_first_cycle_multipass_data_t next_data
26499 = ix86_first_cycle_multipass_data;
26500
26501 if (data != NULL)
26502 {
26503 next_data->ifetch_block_len = data->ifetch_block_len;
26504 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26505 }
26506 }
26507
26508 /* Deallocate target data. */
26509 static void
26510 core2i7_first_cycle_multipass_fini (void *_data)
26511 {
26512 ix86_first_cycle_multipass_data_t data
26513 = (ix86_first_cycle_multipass_data_t) _data;
26514
26515 if (data->ready_try_change)
26516 {
26517 sbitmap_free (data->ready_try_change);
26518 data->ready_try_change = NULL;
26519 data->ready_try_change_size = 0;
26520 }
26521 }
26522
26523 /* Prepare for scheduling pass. */
26524 static void
26525 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26526 int verbose ATTRIBUTE_UNUSED,
26527 int max_uid ATTRIBUTE_UNUSED)
26528 {
26529 /* Install scheduling hooks for current CPU. Some of these hooks are used
26530 in time-critical parts of the scheduler, so we only set them up when
26531 they are actually used. */
26532 switch (ix86_tune)
26533 {
26534 case PROCESSOR_CORE2:
26535 case PROCESSOR_NEHALEM:
26536 case PROCESSOR_SANDYBRIDGE:
26537 case PROCESSOR_HASWELL:
26538 /* Do not perform multipass scheduling for pre-reload schedule
26539 to save compile time. */
26540 if (reload_completed)
26541 {
26542 targetm.sched.dfa_post_advance_cycle
26543 = core2i7_dfa_post_advance_cycle;
26544 targetm.sched.first_cycle_multipass_init
26545 = core2i7_first_cycle_multipass_init;
26546 targetm.sched.first_cycle_multipass_begin
26547 = core2i7_first_cycle_multipass_begin;
26548 targetm.sched.first_cycle_multipass_issue
26549 = core2i7_first_cycle_multipass_issue;
26550 targetm.sched.first_cycle_multipass_backtrack
26551 = core2i7_first_cycle_multipass_backtrack;
26552 targetm.sched.first_cycle_multipass_end
26553 = core2i7_first_cycle_multipass_end;
26554 targetm.sched.first_cycle_multipass_fini
26555 = core2i7_first_cycle_multipass_fini;
26556
26557 /* Set decoder parameters. */
26558 core2i7_secondary_decoder_max_insn_size = 8;
26559 core2i7_ifetch_block_size = 16;
26560 core2i7_ifetch_block_max_insns = 6;
26561 break;
26562 }
26563 /* ... Fall through ... */
26564 default:
26565 targetm.sched.dfa_post_advance_cycle = NULL;
26566 targetm.sched.first_cycle_multipass_init = NULL;
26567 targetm.sched.first_cycle_multipass_begin = NULL;
26568 targetm.sched.first_cycle_multipass_issue = NULL;
26569 targetm.sched.first_cycle_multipass_backtrack = NULL;
26570 targetm.sched.first_cycle_multipass_end = NULL;
26571 targetm.sched.first_cycle_multipass_fini = NULL;
26572 break;
26573 }
26574 }
26575
26576 \f
26577 /* Compute the alignment given to a constant that is being placed in memory.
26578 EXP is the constant and ALIGN is the alignment that the object would
26579 ordinarily have.
26580 The value of this function is used instead of that alignment to align
26581 the object. */
26582
26583 int
26584 ix86_constant_alignment (tree exp, int align)
26585 {
26586 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26587 || TREE_CODE (exp) == INTEGER_CST)
26588 {
26589 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26590 return 64;
26591 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26592 return 128;
26593 }
26594 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26595 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26596 return BITS_PER_WORD;
26597
26598 return align;
26599 }
26600
26601 /* Compute the alignment for a static variable.
26602 TYPE is the data type, and ALIGN is the alignment that
26603 the object would ordinarily have. The value of this function is used
26604 instead of that alignment to align the object. */
26605
26606 int
26607 ix86_data_alignment (tree type, int align, bool opt)
26608 {
26609 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26610 for symbols from other compilation units or symbols that don't need
26611 to bind locally. In order to preserve some ABI compatibility with
26612 those compilers, ensure we don't decrease alignment from what we
26613 used to assume. */
26614
26615 int max_align_compat
26616 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26617
26618 /* A data structure, equal or greater than the size of a cache line
26619 (64 bytes in the Pentium 4 and other recent Intel processors, including
26620 processors based on Intel Core microarchitecture) should be aligned
26621 so that its base address is a multiple of a cache line size. */
26622
26623 int max_align
26624 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26625
26626 if (max_align < BITS_PER_WORD)
26627 max_align = BITS_PER_WORD;
26628
26629 if (opt
26630 && AGGREGATE_TYPE_P (type)
26631 && TYPE_SIZE (type)
26632 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26633 {
26634 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26635 && align < max_align_compat)
26636 align = max_align_compat;
26637 if (wi::geu_p (TYPE_SIZE (type), max_align)
26638 && align < max_align)
26639 align = max_align;
26640 }
26641
26642 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26643 to 16byte boundary. */
26644 if (TARGET_64BIT)
26645 {
26646 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26647 && TYPE_SIZE (type)
26648 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26649 && wi::geu_p (TYPE_SIZE (type), 128)
26650 && align < 128)
26651 return 128;
26652 }
26653
26654 if (!opt)
26655 return align;
26656
26657 if (TREE_CODE (type) == ARRAY_TYPE)
26658 {
26659 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26660 return 64;
26661 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26662 return 128;
26663 }
26664 else if (TREE_CODE (type) == COMPLEX_TYPE)
26665 {
26666
26667 if (TYPE_MODE (type) == DCmode && align < 64)
26668 return 64;
26669 if ((TYPE_MODE (type) == XCmode
26670 || TYPE_MODE (type) == TCmode) && align < 128)
26671 return 128;
26672 }
26673 else if ((TREE_CODE (type) == RECORD_TYPE
26674 || TREE_CODE (type) == UNION_TYPE
26675 || TREE_CODE (type) == QUAL_UNION_TYPE)
26676 && TYPE_FIELDS (type))
26677 {
26678 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26679 return 64;
26680 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26681 return 128;
26682 }
26683 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26684 || TREE_CODE (type) == INTEGER_TYPE)
26685 {
26686 if (TYPE_MODE (type) == DFmode && align < 64)
26687 return 64;
26688 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26689 return 128;
26690 }
26691
26692 return align;
26693 }
26694
26695 /* Compute the alignment for a local variable or a stack slot. EXP is
26696 the data type or decl itself, MODE is the widest mode available and
26697 ALIGN is the alignment that the object would ordinarily have. The
26698 value of this macro is used instead of that alignment to align the
26699 object. */
26700
26701 unsigned int
26702 ix86_local_alignment (tree exp, enum machine_mode mode,
26703 unsigned int align)
26704 {
26705 tree type, decl;
26706
26707 if (exp && DECL_P (exp))
26708 {
26709 type = TREE_TYPE (exp);
26710 decl = exp;
26711 }
26712 else
26713 {
26714 type = exp;
26715 decl = NULL;
26716 }
26717
26718 /* Don't do dynamic stack realignment for long long objects with
26719 -mpreferred-stack-boundary=2. */
26720 if (!TARGET_64BIT
26721 && align == 64
26722 && ix86_preferred_stack_boundary < 64
26723 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26724 && (!type || !TYPE_USER_ALIGN (type))
26725 && (!decl || !DECL_USER_ALIGN (decl)))
26726 align = 32;
26727
26728 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26729 register in MODE. We will return the largest alignment of XF
26730 and DF. */
26731 if (!type)
26732 {
26733 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26734 align = GET_MODE_ALIGNMENT (DFmode);
26735 return align;
26736 }
26737
26738 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26739 to 16byte boundary. Exact wording is:
26740
26741 An array uses the same alignment as its elements, except that a local or
26742 global array variable of length at least 16 bytes or
26743 a C99 variable-length array variable always has alignment of at least 16 bytes.
26744
26745 This was added to allow use of aligned SSE instructions at arrays. This
26746 rule is meant for static storage (where compiler can not do the analysis
26747 by itself). We follow it for automatic variables only when convenient.
26748 We fully control everything in the function compiled and functions from
26749 other unit can not rely on the alignment.
26750
26751 Exclude va_list type. It is the common case of local array where
26752 we can not benefit from the alignment.
26753
26754 TODO: Probably one should optimize for size only when var is not escaping. */
26755 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26756 && TARGET_SSE)
26757 {
26758 if (AGGREGATE_TYPE_P (type)
26759 && (va_list_type_node == NULL_TREE
26760 || (TYPE_MAIN_VARIANT (type)
26761 != TYPE_MAIN_VARIANT (va_list_type_node)))
26762 && TYPE_SIZE (type)
26763 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26764 && wi::geu_p (TYPE_SIZE (type), 16)
26765 && align < 128)
26766 return 128;
26767 }
26768 if (TREE_CODE (type) == ARRAY_TYPE)
26769 {
26770 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26771 return 64;
26772 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26773 return 128;
26774 }
26775 else if (TREE_CODE (type) == COMPLEX_TYPE)
26776 {
26777 if (TYPE_MODE (type) == DCmode && align < 64)
26778 return 64;
26779 if ((TYPE_MODE (type) == XCmode
26780 || TYPE_MODE (type) == TCmode) && align < 128)
26781 return 128;
26782 }
26783 else if ((TREE_CODE (type) == RECORD_TYPE
26784 || TREE_CODE (type) == UNION_TYPE
26785 || TREE_CODE (type) == QUAL_UNION_TYPE)
26786 && TYPE_FIELDS (type))
26787 {
26788 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26789 return 64;
26790 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26791 return 128;
26792 }
26793 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26794 || TREE_CODE (type) == INTEGER_TYPE)
26795 {
26796
26797 if (TYPE_MODE (type) == DFmode && align < 64)
26798 return 64;
26799 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26800 return 128;
26801 }
26802 return align;
26803 }
26804
26805 /* Compute the minimum required alignment for dynamic stack realignment
26806 purposes for a local variable, parameter or a stack slot. EXP is
26807 the data type or decl itself, MODE is its mode and ALIGN is the
26808 alignment that the object would ordinarily have. */
26809
26810 unsigned int
26811 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26812 unsigned int align)
26813 {
26814 tree type, decl;
26815
26816 if (exp && DECL_P (exp))
26817 {
26818 type = TREE_TYPE (exp);
26819 decl = exp;
26820 }
26821 else
26822 {
26823 type = exp;
26824 decl = NULL;
26825 }
26826
26827 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26828 return align;
26829
26830 /* Don't do dynamic stack realignment for long long objects with
26831 -mpreferred-stack-boundary=2. */
26832 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26833 && (!type || !TYPE_USER_ALIGN (type))
26834 && (!decl || !DECL_USER_ALIGN (decl)))
26835 return 32;
26836
26837 return align;
26838 }
26839 \f
26840 /* Find a location for the static chain incoming to a nested function.
26841 This is a register, unless all free registers are used by arguments. */
26842
26843 static rtx
26844 ix86_static_chain (const_tree fndecl, bool incoming_p)
26845 {
26846 unsigned regno;
26847
26848 if (!DECL_STATIC_CHAIN (fndecl))
26849 return NULL;
26850
26851 if (TARGET_64BIT)
26852 {
26853 /* We always use R10 in 64-bit mode. */
26854 regno = R10_REG;
26855 }
26856 else
26857 {
26858 tree fntype;
26859 unsigned int ccvt;
26860
26861 /* By default in 32-bit mode we use ECX to pass the static chain. */
26862 regno = CX_REG;
26863
26864 fntype = TREE_TYPE (fndecl);
26865 ccvt = ix86_get_callcvt (fntype);
26866 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26867 {
26868 /* Fastcall functions use ecx/edx for arguments, which leaves
26869 us with EAX for the static chain.
26870 Thiscall functions use ecx for arguments, which also
26871 leaves us with EAX for the static chain. */
26872 regno = AX_REG;
26873 }
26874 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26875 {
26876 /* Thiscall functions use ecx for arguments, which leaves
26877 us with EAX and EDX for the static chain.
26878 We are using for abi-compatibility EAX. */
26879 regno = AX_REG;
26880 }
26881 else if (ix86_function_regparm (fntype, fndecl) == 3)
26882 {
26883 /* For regparm 3, we have no free call-clobbered registers in
26884 which to store the static chain. In order to implement this,
26885 we have the trampoline push the static chain to the stack.
26886 However, we can't push a value below the return address when
26887 we call the nested function directly, so we have to use an
26888 alternate entry point. For this we use ESI, and have the
26889 alternate entry point push ESI, so that things appear the
26890 same once we're executing the nested function. */
26891 if (incoming_p)
26892 {
26893 if (fndecl == current_function_decl)
26894 ix86_static_chain_on_stack = true;
26895 return gen_frame_mem (SImode,
26896 plus_constant (Pmode,
26897 arg_pointer_rtx, -8));
26898 }
26899 regno = SI_REG;
26900 }
26901 }
26902
26903 return gen_rtx_REG (Pmode, regno);
26904 }
26905
26906 /* Emit RTL insns to initialize the variable parts of a trampoline.
26907 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26908 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26909 to be passed to the target function. */
26910
26911 static void
26912 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26913 {
26914 rtx mem, fnaddr;
26915 int opcode;
26916 int offset = 0;
26917
26918 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26919
26920 if (TARGET_64BIT)
26921 {
26922 int size;
26923
26924 /* Load the function address to r11. Try to load address using
26925 the shorter movl instead of movabs. We may want to support
26926 movq for kernel mode, but kernel does not use trampolines at
26927 the moment. FNADDR is a 32bit address and may not be in
26928 DImode when ptr_mode == SImode. Always use movl in this
26929 case. */
26930 if (ptr_mode == SImode
26931 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26932 {
26933 fnaddr = copy_addr_to_reg (fnaddr);
26934
26935 mem = adjust_address (m_tramp, HImode, offset);
26936 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26937
26938 mem = adjust_address (m_tramp, SImode, offset + 2);
26939 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26940 offset += 6;
26941 }
26942 else
26943 {
26944 mem = adjust_address (m_tramp, HImode, offset);
26945 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26946
26947 mem = adjust_address (m_tramp, DImode, offset + 2);
26948 emit_move_insn (mem, fnaddr);
26949 offset += 10;
26950 }
26951
26952 /* Load static chain using movabs to r10. Use the shorter movl
26953 instead of movabs when ptr_mode == SImode. */
26954 if (ptr_mode == SImode)
26955 {
26956 opcode = 0xba41;
26957 size = 6;
26958 }
26959 else
26960 {
26961 opcode = 0xba49;
26962 size = 10;
26963 }
26964
26965 mem = adjust_address (m_tramp, HImode, offset);
26966 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26967
26968 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26969 emit_move_insn (mem, chain_value);
26970 offset += size;
26971
26972 /* Jump to r11; the last (unused) byte is a nop, only there to
26973 pad the write out to a single 32-bit store. */
26974 mem = adjust_address (m_tramp, SImode, offset);
26975 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26976 offset += 4;
26977 }
26978 else
26979 {
26980 rtx disp, chain;
26981
26982 /* Depending on the static chain location, either load a register
26983 with a constant, or push the constant to the stack. All of the
26984 instructions are the same size. */
26985 chain = ix86_static_chain (fndecl, true);
26986 if (REG_P (chain))
26987 {
26988 switch (REGNO (chain))
26989 {
26990 case AX_REG:
26991 opcode = 0xb8; break;
26992 case CX_REG:
26993 opcode = 0xb9; break;
26994 default:
26995 gcc_unreachable ();
26996 }
26997 }
26998 else
26999 opcode = 0x68;
27000
27001 mem = adjust_address (m_tramp, QImode, offset);
27002 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27003
27004 mem = adjust_address (m_tramp, SImode, offset + 1);
27005 emit_move_insn (mem, chain_value);
27006 offset += 5;
27007
27008 mem = adjust_address (m_tramp, QImode, offset);
27009 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27010
27011 mem = adjust_address (m_tramp, SImode, offset + 1);
27012
27013 /* Compute offset from the end of the jmp to the target function.
27014 In the case in which the trampoline stores the static chain on
27015 the stack, we need to skip the first insn which pushes the
27016 (call-saved) register static chain; this push is 1 byte. */
27017 offset += 5;
27018 disp = expand_binop (SImode, sub_optab, fnaddr,
27019 plus_constant (Pmode, XEXP (m_tramp, 0),
27020 offset - (MEM_P (chain) ? 1 : 0)),
27021 NULL_RTX, 1, OPTAB_DIRECT);
27022 emit_move_insn (mem, disp);
27023 }
27024
27025 gcc_assert (offset <= TRAMPOLINE_SIZE);
27026
27027 #ifdef HAVE_ENABLE_EXECUTE_STACK
27028 #ifdef CHECK_EXECUTE_STACK_ENABLED
27029 if (CHECK_EXECUTE_STACK_ENABLED)
27030 #endif
27031 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27032 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27033 #endif
27034 }
27035 \f
27036 /* The following file contains several enumerations and data structures
27037 built from the definitions in i386-builtin-types.def. */
27038
27039 #include "i386-builtin-types.inc"
27040
27041 /* Table for the ix86 builtin non-function types. */
27042 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27043
27044 /* Retrieve an element from the above table, building some of
27045 the types lazily. */
27046
27047 static tree
27048 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27049 {
27050 unsigned int index;
27051 tree type, itype;
27052
27053 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27054
27055 type = ix86_builtin_type_tab[(int) tcode];
27056 if (type != NULL)
27057 return type;
27058
27059 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27060 if (tcode <= IX86_BT_LAST_VECT)
27061 {
27062 enum machine_mode mode;
27063
27064 index = tcode - IX86_BT_LAST_PRIM - 1;
27065 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27066 mode = ix86_builtin_type_vect_mode[index];
27067
27068 type = build_vector_type_for_mode (itype, mode);
27069 }
27070 else
27071 {
27072 int quals;
27073
27074 index = tcode - IX86_BT_LAST_VECT - 1;
27075 if (tcode <= IX86_BT_LAST_PTR)
27076 quals = TYPE_UNQUALIFIED;
27077 else
27078 quals = TYPE_QUAL_CONST;
27079
27080 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27081 if (quals != TYPE_UNQUALIFIED)
27082 itype = build_qualified_type (itype, quals);
27083
27084 type = build_pointer_type (itype);
27085 }
27086
27087 ix86_builtin_type_tab[(int) tcode] = type;
27088 return type;
27089 }
27090
27091 /* Table for the ix86 builtin function types. */
27092 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27093
27094 /* Retrieve an element from the above table, building some of
27095 the types lazily. */
27096
27097 static tree
27098 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27099 {
27100 tree type;
27101
27102 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27103
27104 type = ix86_builtin_func_type_tab[(int) tcode];
27105 if (type != NULL)
27106 return type;
27107
27108 if (tcode <= IX86_BT_LAST_FUNC)
27109 {
27110 unsigned start = ix86_builtin_func_start[(int) tcode];
27111 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27112 tree rtype, atype, args = void_list_node;
27113 unsigned i;
27114
27115 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27116 for (i = after - 1; i > start; --i)
27117 {
27118 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27119 args = tree_cons (NULL, atype, args);
27120 }
27121
27122 type = build_function_type (rtype, args);
27123 }
27124 else
27125 {
27126 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27127 enum ix86_builtin_func_type icode;
27128
27129 icode = ix86_builtin_func_alias_base[index];
27130 type = ix86_get_builtin_func_type (icode);
27131 }
27132
27133 ix86_builtin_func_type_tab[(int) tcode] = type;
27134 return type;
27135 }
27136
27137
27138 /* Codes for all the SSE/MMX builtins. */
27139 enum ix86_builtins
27140 {
27141 IX86_BUILTIN_ADDPS,
27142 IX86_BUILTIN_ADDSS,
27143 IX86_BUILTIN_DIVPS,
27144 IX86_BUILTIN_DIVSS,
27145 IX86_BUILTIN_MULPS,
27146 IX86_BUILTIN_MULSS,
27147 IX86_BUILTIN_SUBPS,
27148 IX86_BUILTIN_SUBSS,
27149
27150 IX86_BUILTIN_CMPEQPS,
27151 IX86_BUILTIN_CMPLTPS,
27152 IX86_BUILTIN_CMPLEPS,
27153 IX86_BUILTIN_CMPGTPS,
27154 IX86_BUILTIN_CMPGEPS,
27155 IX86_BUILTIN_CMPNEQPS,
27156 IX86_BUILTIN_CMPNLTPS,
27157 IX86_BUILTIN_CMPNLEPS,
27158 IX86_BUILTIN_CMPNGTPS,
27159 IX86_BUILTIN_CMPNGEPS,
27160 IX86_BUILTIN_CMPORDPS,
27161 IX86_BUILTIN_CMPUNORDPS,
27162 IX86_BUILTIN_CMPEQSS,
27163 IX86_BUILTIN_CMPLTSS,
27164 IX86_BUILTIN_CMPLESS,
27165 IX86_BUILTIN_CMPNEQSS,
27166 IX86_BUILTIN_CMPNLTSS,
27167 IX86_BUILTIN_CMPNLESS,
27168 IX86_BUILTIN_CMPORDSS,
27169 IX86_BUILTIN_CMPUNORDSS,
27170
27171 IX86_BUILTIN_COMIEQSS,
27172 IX86_BUILTIN_COMILTSS,
27173 IX86_BUILTIN_COMILESS,
27174 IX86_BUILTIN_COMIGTSS,
27175 IX86_BUILTIN_COMIGESS,
27176 IX86_BUILTIN_COMINEQSS,
27177 IX86_BUILTIN_UCOMIEQSS,
27178 IX86_BUILTIN_UCOMILTSS,
27179 IX86_BUILTIN_UCOMILESS,
27180 IX86_BUILTIN_UCOMIGTSS,
27181 IX86_BUILTIN_UCOMIGESS,
27182 IX86_BUILTIN_UCOMINEQSS,
27183
27184 IX86_BUILTIN_CVTPI2PS,
27185 IX86_BUILTIN_CVTPS2PI,
27186 IX86_BUILTIN_CVTSI2SS,
27187 IX86_BUILTIN_CVTSI642SS,
27188 IX86_BUILTIN_CVTSS2SI,
27189 IX86_BUILTIN_CVTSS2SI64,
27190 IX86_BUILTIN_CVTTPS2PI,
27191 IX86_BUILTIN_CVTTSS2SI,
27192 IX86_BUILTIN_CVTTSS2SI64,
27193
27194 IX86_BUILTIN_MAXPS,
27195 IX86_BUILTIN_MAXSS,
27196 IX86_BUILTIN_MINPS,
27197 IX86_BUILTIN_MINSS,
27198
27199 IX86_BUILTIN_LOADUPS,
27200 IX86_BUILTIN_STOREUPS,
27201 IX86_BUILTIN_MOVSS,
27202
27203 IX86_BUILTIN_MOVHLPS,
27204 IX86_BUILTIN_MOVLHPS,
27205 IX86_BUILTIN_LOADHPS,
27206 IX86_BUILTIN_LOADLPS,
27207 IX86_BUILTIN_STOREHPS,
27208 IX86_BUILTIN_STORELPS,
27209
27210 IX86_BUILTIN_MASKMOVQ,
27211 IX86_BUILTIN_MOVMSKPS,
27212 IX86_BUILTIN_PMOVMSKB,
27213
27214 IX86_BUILTIN_MOVNTPS,
27215 IX86_BUILTIN_MOVNTQ,
27216
27217 IX86_BUILTIN_LOADDQU,
27218 IX86_BUILTIN_STOREDQU,
27219
27220 IX86_BUILTIN_PACKSSWB,
27221 IX86_BUILTIN_PACKSSDW,
27222 IX86_BUILTIN_PACKUSWB,
27223
27224 IX86_BUILTIN_PADDB,
27225 IX86_BUILTIN_PADDW,
27226 IX86_BUILTIN_PADDD,
27227 IX86_BUILTIN_PADDQ,
27228 IX86_BUILTIN_PADDSB,
27229 IX86_BUILTIN_PADDSW,
27230 IX86_BUILTIN_PADDUSB,
27231 IX86_BUILTIN_PADDUSW,
27232 IX86_BUILTIN_PSUBB,
27233 IX86_BUILTIN_PSUBW,
27234 IX86_BUILTIN_PSUBD,
27235 IX86_BUILTIN_PSUBQ,
27236 IX86_BUILTIN_PSUBSB,
27237 IX86_BUILTIN_PSUBSW,
27238 IX86_BUILTIN_PSUBUSB,
27239 IX86_BUILTIN_PSUBUSW,
27240
27241 IX86_BUILTIN_PAND,
27242 IX86_BUILTIN_PANDN,
27243 IX86_BUILTIN_POR,
27244 IX86_BUILTIN_PXOR,
27245
27246 IX86_BUILTIN_PAVGB,
27247 IX86_BUILTIN_PAVGW,
27248
27249 IX86_BUILTIN_PCMPEQB,
27250 IX86_BUILTIN_PCMPEQW,
27251 IX86_BUILTIN_PCMPEQD,
27252 IX86_BUILTIN_PCMPGTB,
27253 IX86_BUILTIN_PCMPGTW,
27254 IX86_BUILTIN_PCMPGTD,
27255
27256 IX86_BUILTIN_PMADDWD,
27257
27258 IX86_BUILTIN_PMAXSW,
27259 IX86_BUILTIN_PMAXUB,
27260 IX86_BUILTIN_PMINSW,
27261 IX86_BUILTIN_PMINUB,
27262
27263 IX86_BUILTIN_PMULHUW,
27264 IX86_BUILTIN_PMULHW,
27265 IX86_BUILTIN_PMULLW,
27266
27267 IX86_BUILTIN_PSADBW,
27268 IX86_BUILTIN_PSHUFW,
27269
27270 IX86_BUILTIN_PSLLW,
27271 IX86_BUILTIN_PSLLD,
27272 IX86_BUILTIN_PSLLQ,
27273 IX86_BUILTIN_PSRAW,
27274 IX86_BUILTIN_PSRAD,
27275 IX86_BUILTIN_PSRLW,
27276 IX86_BUILTIN_PSRLD,
27277 IX86_BUILTIN_PSRLQ,
27278 IX86_BUILTIN_PSLLWI,
27279 IX86_BUILTIN_PSLLDI,
27280 IX86_BUILTIN_PSLLQI,
27281 IX86_BUILTIN_PSRAWI,
27282 IX86_BUILTIN_PSRADI,
27283 IX86_BUILTIN_PSRLWI,
27284 IX86_BUILTIN_PSRLDI,
27285 IX86_BUILTIN_PSRLQI,
27286
27287 IX86_BUILTIN_PUNPCKHBW,
27288 IX86_BUILTIN_PUNPCKHWD,
27289 IX86_BUILTIN_PUNPCKHDQ,
27290 IX86_BUILTIN_PUNPCKLBW,
27291 IX86_BUILTIN_PUNPCKLWD,
27292 IX86_BUILTIN_PUNPCKLDQ,
27293
27294 IX86_BUILTIN_SHUFPS,
27295
27296 IX86_BUILTIN_RCPPS,
27297 IX86_BUILTIN_RCPSS,
27298 IX86_BUILTIN_RSQRTPS,
27299 IX86_BUILTIN_RSQRTPS_NR,
27300 IX86_BUILTIN_RSQRTSS,
27301 IX86_BUILTIN_RSQRTF,
27302 IX86_BUILTIN_SQRTPS,
27303 IX86_BUILTIN_SQRTPS_NR,
27304 IX86_BUILTIN_SQRTSS,
27305
27306 IX86_BUILTIN_UNPCKHPS,
27307 IX86_BUILTIN_UNPCKLPS,
27308
27309 IX86_BUILTIN_ANDPS,
27310 IX86_BUILTIN_ANDNPS,
27311 IX86_BUILTIN_ORPS,
27312 IX86_BUILTIN_XORPS,
27313
27314 IX86_BUILTIN_EMMS,
27315 IX86_BUILTIN_LDMXCSR,
27316 IX86_BUILTIN_STMXCSR,
27317 IX86_BUILTIN_SFENCE,
27318
27319 IX86_BUILTIN_FXSAVE,
27320 IX86_BUILTIN_FXRSTOR,
27321 IX86_BUILTIN_FXSAVE64,
27322 IX86_BUILTIN_FXRSTOR64,
27323
27324 IX86_BUILTIN_XSAVE,
27325 IX86_BUILTIN_XRSTOR,
27326 IX86_BUILTIN_XSAVE64,
27327 IX86_BUILTIN_XRSTOR64,
27328
27329 IX86_BUILTIN_XSAVEOPT,
27330 IX86_BUILTIN_XSAVEOPT64,
27331
27332 IX86_BUILTIN_XSAVEC,
27333 IX86_BUILTIN_XSAVEC64,
27334
27335 IX86_BUILTIN_XSAVES,
27336 IX86_BUILTIN_XRSTORS,
27337 IX86_BUILTIN_XSAVES64,
27338 IX86_BUILTIN_XRSTORS64,
27339
27340 /* 3DNow! Original */
27341 IX86_BUILTIN_FEMMS,
27342 IX86_BUILTIN_PAVGUSB,
27343 IX86_BUILTIN_PF2ID,
27344 IX86_BUILTIN_PFACC,
27345 IX86_BUILTIN_PFADD,
27346 IX86_BUILTIN_PFCMPEQ,
27347 IX86_BUILTIN_PFCMPGE,
27348 IX86_BUILTIN_PFCMPGT,
27349 IX86_BUILTIN_PFMAX,
27350 IX86_BUILTIN_PFMIN,
27351 IX86_BUILTIN_PFMUL,
27352 IX86_BUILTIN_PFRCP,
27353 IX86_BUILTIN_PFRCPIT1,
27354 IX86_BUILTIN_PFRCPIT2,
27355 IX86_BUILTIN_PFRSQIT1,
27356 IX86_BUILTIN_PFRSQRT,
27357 IX86_BUILTIN_PFSUB,
27358 IX86_BUILTIN_PFSUBR,
27359 IX86_BUILTIN_PI2FD,
27360 IX86_BUILTIN_PMULHRW,
27361
27362 /* 3DNow! Athlon Extensions */
27363 IX86_BUILTIN_PF2IW,
27364 IX86_BUILTIN_PFNACC,
27365 IX86_BUILTIN_PFPNACC,
27366 IX86_BUILTIN_PI2FW,
27367 IX86_BUILTIN_PSWAPDSI,
27368 IX86_BUILTIN_PSWAPDSF,
27369
27370 /* SSE2 */
27371 IX86_BUILTIN_ADDPD,
27372 IX86_BUILTIN_ADDSD,
27373 IX86_BUILTIN_DIVPD,
27374 IX86_BUILTIN_DIVSD,
27375 IX86_BUILTIN_MULPD,
27376 IX86_BUILTIN_MULSD,
27377 IX86_BUILTIN_SUBPD,
27378 IX86_BUILTIN_SUBSD,
27379
27380 IX86_BUILTIN_CMPEQPD,
27381 IX86_BUILTIN_CMPLTPD,
27382 IX86_BUILTIN_CMPLEPD,
27383 IX86_BUILTIN_CMPGTPD,
27384 IX86_BUILTIN_CMPGEPD,
27385 IX86_BUILTIN_CMPNEQPD,
27386 IX86_BUILTIN_CMPNLTPD,
27387 IX86_BUILTIN_CMPNLEPD,
27388 IX86_BUILTIN_CMPNGTPD,
27389 IX86_BUILTIN_CMPNGEPD,
27390 IX86_BUILTIN_CMPORDPD,
27391 IX86_BUILTIN_CMPUNORDPD,
27392 IX86_BUILTIN_CMPEQSD,
27393 IX86_BUILTIN_CMPLTSD,
27394 IX86_BUILTIN_CMPLESD,
27395 IX86_BUILTIN_CMPNEQSD,
27396 IX86_BUILTIN_CMPNLTSD,
27397 IX86_BUILTIN_CMPNLESD,
27398 IX86_BUILTIN_CMPORDSD,
27399 IX86_BUILTIN_CMPUNORDSD,
27400
27401 IX86_BUILTIN_COMIEQSD,
27402 IX86_BUILTIN_COMILTSD,
27403 IX86_BUILTIN_COMILESD,
27404 IX86_BUILTIN_COMIGTSD,
27405 IX86_BUILTIN_COMIGESD,
27406 IX86_BUILTIN_COMINEQSD,
27407 IX86_BUILTIN_UCOMIEQSD,
27408 IX86_BUILTIN_UCOMILTSD,
27409 IX86_BUILTIN_UCOMILESD,
27410 IX86_BUILTIN_UCOMIGTSD,
27411 IX86_BUILTIN_UCOMIGESD,
27412 IX86_BUILTIN_UCOMINEQSD,
27413
27414 IX86_BUILTIN_MAXPD,
27415 IX86_BUILTIN_MAXSD,
27416 IX86_BUILTIN_MINPD,
27417 IX86_BUILTIN_MINSD,
27418
27419 IX86_BUILTIN_ANDPD,
27420 IX86_BUILTIN_ANDNPD,
27421 IX86_BUILTIN_ORPD,
27422 IX86_BUILTIN_XORPD,
27423
27424 IX86_BUILTIN_SQRTPD,
27425 IX86_BUILTIN_SQRTSD,
27426
27427 IX86_BUILTIN_UNPCKHPD,
27428 IX86_BUILTIN_UNPCKLPD,
27429
27430 IX86_BUILTIN_SHUFPD,
27431
27432 IX86_BUILTIN_LOADUPD,
27433 IX86_BUILTIN_STOREUPD,
27434 IX86_BUILTIN_MOVSD,
27435
27436 IX86_BUILTIN_LOADHPD,
27437 IX86_BUILTIN_LOADLPD,
27438
27439 IX86_BUILTIN_CVTDQ2PD,
27440 IX86_BUILTIN_CVTDQ2PS,
27441
27442 IX86_BUILTIN_CVTPD2DQ,
27443 IX86_BUILTIN_CVTPD2PI,
27444 IX86_BUILTIN_CVTPD2PS,
27445 IX86_BUILTIN_CVTTPD2DQ,
27446 IX86_BUILTIN_CVTTPD2PI,
27447
27448 IX86_BUILTIN_CVTPI2PD,
27449 IX86_BUILTIN_CVTSI2SD,
27450 IX86_BUILTIN_CVTSI642SD,
27451
27452 IX86_BUILTIN_CVTSD2SI,
27453 IX86_BUILTIN_CVTSD2SI64,
27454 IX86_BUILTIN_CVTSD2SS,
27455 IX86_BUILTIN_CVTSS2SD,
27456 IX86_BUILTIN_CVTTSD2SI,
27457 IX86_BUILTIN_CVTTSD2SI64,
27458
27459 IX86_BUILTIN_CVTPS2DQ,
27460 IX86_BUILTIN_CVTPS2PD,
27461 IX86_BUILTIN_CVTTPS2DQ,
27462
27463 IX86_BUILTIN_MOVNTI,
27464 IX86_BUILTIN_MOVNTI64,
27465 IX86_BUILTIN_MOVNTPD,
27466 IX86_BUILTIN_MOVNTDQ,
27467
27468 IX86_BUILTIN_MOVQ128,
27469
27470 /* SSE2 MMX */
27471 IX86_BUILTIN_MASKMOVDQU,
27472 IX86_BUILTIN_MOVMSKPD,
27473 IX86_BUILTIN_PMOVMSKB128,
27474
27475 IX86_BUILTIN_PACKSSWB128,
27476 IX86_BUILTIN_PACKSSDW128,
27477 IX86_BUILTIN_PACKUSWB128,
27478
27479 IX86_BUILTIN_PADDB128,
27480 IX86_BUILTIN_PADDW128,
27481 IX86_BUILTIN_PADDD128,
27482 IX86_BUILTIN_PADDQ128,
27483 IX86_BUILTIN_PADDSB128,
27484 IX86_BUILTIN_PADDSW128,
27485 IX86_BUILTIN_PADDUSB128,
27486 IX86_BUILTIN_PADDUSW128,
27487 IX86_BUILTIN_PSUBB128,
27488 IX86_BUILTIN_PSUBW128,
27489 IX86_BUILTIN_PSUBD128,
27490 IX86_BUILTIN_PSUBQ128,
27491 IX86_BUILTIN_PSUBSB128,
27492 IX86_BUILTIN_PSUBSW128,
27493 IX86_BUILTIN_PSUBUSB128,
27494 IX86_BUILTIN_PSUBUSW128,
27495
27496 IX86_BUILTIN_PAND128,
27497 IX86_BUILTIN_PANDN128,
27498 IX86_BUILTIN_POR128,
27499 IX86_BUILTIN_PXOR128,
27500
27501 IX86_BUILTIN_PAVGB128,
27502 IX86_BUILTIN_PAVGW128,
27503
27504 IX86_BUILTIN_PCMPEQB128,
27505 IX86_BUILTIN_PCMPEQW128,
27506 IX86_BUILTIN_PCMPEQD128,
27507 IX86_BUILTIN_PCMPGTB128,
27508 IX86_BUILTIN_PCMPGTW128,
27509 IX86_BUILTIN_PCMPGTD128,
27510
27511 IX86_BUILTIN_PMADDWD128,
27512
27513 IX86_BUILTIN_PMAXSW128,
27514 IX86_BUILTIN_PMAXUB128,
27515 IX86_BUILTIN_PMINSW128,
27516 IX86_BUILTIN_PMINUB128,
27517
27518 IX86_BUILTIN_PMULUDQ,
27519 IX86_BUILTIN_PMULUDQ128,
27520 IX86_BUILTIN_PMULHUW128,
27521 IX86_BUILTIN_PMULHW128,
27522 IX86_BUILTIN_PMULLW128,
27523
27524 IX86_BUILTIN_PSADBW128,
27525 IX86_BUILTIN_PSHUFHW,
27526 IX86_BUILTIN_PSHUFLW,
27527 IX86_BUILTIN_PSHUFD,
27528
27529 IX86_BUILTIN_PSLLDQI128,
27530 IX86_BUILTIN_PSLLWI128,
27531 IX86_BUILTIN_PSLLDI128,
27532 IX86_BUILTIN_PSLLQI128,
27533 IX86_BUILTIN_PSRAWI128,
27534 IX86_BUILTIN_PSRADI128,
27535 IX86_BUILTIN_PSRLDQI128,
27536 IX86_BUILTIN_PSRLWI128,
27537 IX86_BUILTIN_PSRLDI128,
27538 IX86_BUILTIN_PSRLQI128,
27539
27540 IX86_BUILTIN_PSLLDQ128,
27541 IX86_BUILTIN_PSLLW128,
27542 IX86_BUILTIN_PSLLD128,
27543 IX86_BUILTIN_PSLLQ128,
27544 IX86_BUILTIN_PSRAW128,
27545 IX86_BUILTIN_PSRAD128,
27546 IX86_BUILTIN_PSRLW128,
27547 IX86_BUILTIN_PSRLD128,
27548 IX86_BUILTIN_PSRLQ128,
27549
27550 IX86_BUILTIN_PUNPCKHBW128,
27551 IX86_BUILTIN_PUNPCKHWD128,
27552 IX86_BUILTIN_PUNPCKHDQ128,
27553 IX86_BUILTIN_PUNPCKHQDQ128,
27554 IX86_BUILTIN_PUNPCKLBW128,
27555 IX86_BUILTIN_PUNPCKLWD128,
27556 IX86_BUILTIN_PUNPCKLDQ128,
27557 IX86_BUILTIN_PUNPCKLQDQ128,
27558
27559 IX86_BUILTIN_CLFLUSH,
27560 IX86_BUILTIN_MFENCE,
27561 IX86_BUILTIN_LFENCE,
27562 IX86_BUILTIN_PAUSE,
27563
27564 IX86_BUILTIN_FNSTENV,
27565 IX86_BUILTIN_FLDENV,
27566 IX86_BUILTIN_FNSTSW,
27567 IX86_BUILTIN_FNCLEX,
27568
27569 IX86_BUILTIN_BSRSI,
27570 IX86_BUILTIN_BSRDI,
27571 IX86_BUILTIN_RDPMC,
27572 IX86_BUILTIN_RDTSC,
27573 IX86_BUILTIN_RDTSCP,
27574 IX86_BUILTIN_ROLQI,
27575 IX86_BUILTIN_ROLHI,
27576 IX86_BUILTIN_RORQI,
27577 IX86_BUILTIN_RORHI,
27578
27579 /* SSE3. */
27580 IX86_BUILTIN_ADDSUBPS,
27581 IX86_BUILTIN_HADDPS,
27582 IX86_BUILTIN_HSUBPS,
27583 IX86_BUILTIN_MOVSHDUP,
27584 IX86_BUILTIN_MOVSLDUP,
27585 IX86_BUILTIN_ADDSUBPD,
27586 IX86_BUILTIN_HADDPD,
27587 IX86_BUILTIN_HSUBPD,
27588 IX86_BUILTIN_LDDQU,
27589
27590 IX86_BUILTIN_MONITOR,
27591 IX86_BUILTIN_MWAIT,
27592
27593 /* SSSE3. */
27594 IX86_BUILTIN_PHADDW,
27595 IX86_BUILTIN_PHADDD,
27596 IX86_BUILTIN_PHADDSW,
27597 IX86_BUILTIN_PHSUBW,
27598 IX86_BUILTIN_PHSUBD,
27599 IX86_BUILTIN_PHSUBSW,
27600 IX86_BUILTIN_PMADDUBSW,
27601 IX86_BUILTIN_PMULHRSW,
27602 IX86_BUILTIN_PSHUFB,
27603 IX86_BUILTIN_PSIGNB,
27604 IX86_BUILTIN_PSIGNW,
27605 IX86_BUILTIN_PSIGND,
27606 IX86_BUILTIN_PALIGNR,
27607 IX86_BUILTIN_PABSB,
27608 IX86_BUILTIN_PABSW,
27609 IX86_BUILTIN_PABSD,
27610
27611 IX86_BUILTIN_PHADDW128,
27612 IX86_BUILTIN_PHADDD128,
27613 IX86_BUILTIN_PHADDSW128,
27614 IX86_BUILTIN_PHSUBW128,
27615 IX86_BUILTIN_PHSUBD128,
27616 IX86_BUILTIN_PHSUBSW128,
27617 IX86_BUILTIN_PMADDUBSW128,
27618 IX86_BUILTIN_PMULHRSW128,
27619 IX86_BUILTIN_PSHUFB128,
27620 IX86_BUILTIN_PSIGNB128,
27621 IX86_BUILTIN_PSIGNW128,
27622 IX86_BUILTIN_PSIGND128,
27623 IX86_BUILTIN_PALIGNR128,
27624 IX86_BUILTIN_PABSB128,
27625 IX86_BUILTIN_PABSW128,
27626 IX86_BUILTIN_PABSD128,
27627
27628 /* AMDFAM10 - SSE4A New Instructions. */
27629 IX86_BUILTIN_MOVNTSD,
27630 IX86_BUILTIN_MOVNTSS,
27631 IX86_BUILTIN_EXTRQI,
27632 IX86_BUILTIN_EXTRQ,
27633 IX86_BUILTIN_INSERTQI,
27634 IX86_BUILTIN_INSERTQ,
27635
27636 /* SSE4.1. */
27637 IX86_BUILTIN_BLENDPD,
27638 IX86_BUILTIN_BLENDPS,
27639 IX86_BUILTIN_BLENDVPD,
27640 IX86_BUILTIN_BLENDVPS,
27641 IX86_BUILTIN_PBLENDVB128,
27642 IX86_BUILTIN_PBLENDW128,
27643
27644 IX86_BUILTIN_DPPD,
27645 IX86_BUILTIN_DPPS,
27646
27647 IX86_BUILTIN_INSERTPS128,
27648
27649 IX86_BUILTIN_MOVNTDQA,
27650 IX86_BUILTIN_MPSADBW128,
27651 IX86_BUILTIN_PACKUSDW128,
27652 IX86_BUILTIN_PCMPEQQ,
27653 IX86_BUILTIN_PHMINPOSUW128,
27654
27655 IX86_BUILTIN_PMAXSB128,
27656 IX86_BUILTIN_PMAXSD128,
27657 IX86_BUILTIN_PMAXUD128,
27658 IX86_BUILTIN_PMAXUW128,
27659
27660 IX86_BUILTIN_PMINSB128,
27661 IX86_BUILTIN_PMINSD128,
27662 IX86_BUILTIN_PMINUD128,
27663 IX86_BUILTIN_PMINUW128,
27664
27665 IX86_BUILTIN_PMOVSXBW128,
27666 IX86_BUILTIN_PMOVSXBD128,
27667 IX86_BUILTIN_PMOVSXBQ128,
27668 IX86_BUILTIN_PMOVSXWD128,
27669 IX86_BUILTIN_PMOVSXWQ128,
27670 IX86_BUILTIN_PMOVSXDQ128,
27671
27672 IX86_BUILTIN_PMOVZXBW128,
27673 IX86_BUILTIN_PMOVZXBD128,
27674 IX86_BUILTIN_PMOVZXBQ128,
27675 IX86_BUILTIN_PMOVZXWD128,
27676 IX86_BUILTIN_PMOVZXWQ128,
27677 IX86_BUILTIN_PMOVZXDQ128,
27678
27679 IX86_BUILTIN_PMULDQ128,
27680 IX86_BUILTIN_PMULLD128,
27681
27682 IX86_BUILTIN_ROUNDSD,
27683 IX86_BUILTIN_ROUNDSS,
27684
27685 IX86_BUILTIN_ROUNDPD,
27686 IX86_BUILTIN_ROUNDPS,
27687
27688 IX86_BUILTIN_FLOORPD,
27689 IX86_BUILTIN_CEILPD,
27690 IX86_BUILTIN_TRUNCPD,
27691 IX86_BUILTIN_RINTPD,
27692 IX86_BUILTIN_ROUNDPD_AZ,
27693
27694 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27695 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27696 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27697
27698 IX86_BUILTIN_FLOORPS,
27699 IX86_BUILTIN_CEILPS,
27700 IX86_BUILTIN_TRUNCPS,
27701 IX86_BUILTIN_RINTPS,
27702 IX86_BUILTIN_ROUNDPS_AZ,
27703
27704 IX86_BUILTIN_FLOORPS_SFIX,
27705 IX86_BUILTIN_CEILPS_SFIX,
27706 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27707
27708 IX86_BUILTIN_PTESTZ,
27709 IX86_BUILTIN_PTESTC,
27710 IX86_BUILTIN_PTESTNZC,
27711
27712 IX86_BUILTIN_VEC_INIT_V2SI,
27713 IX86_BUILTIN_VEC_INIT_V4HI,
27714 IX86_BUILTIN_VEC_INIT_V8QI,
27715 IX86_BUILTIN_VEC_EXT_V2DF,
27716 IX86_BUILTIN_VEC_EXT_V2DI,
27717 IX86_BUILTIN_VEC_EXT_V4SF,
27718 IX86_BUILTIN_VEC_EXT_V4SI,
27719 IX86_BUILTIN_VEC_EXT_V8HI,
27720 IX86_BUILTIN_VEC_EXT_V2SI,
27721 IX86_BUILTIN_VEC_EXT_V4HI,
27722 IX86_BUILTIN_VEC_EXT_V16QI,
27723 IX86_BUILTIN_VEC_SET_V2DI,
27724 IX86_BUILTIN_VEC_SET_V4SF,
27725 IX86_BUILTIN_VEC_SET_V4SI,
27726 IX86_BUILTIN_VEC_SET_V8HI,
27727 IX86_BUILTIN_VEC_SET_V4HI,
27728 IX86_BUILTIN_VEC_SET_V16QI,
27729
27730 IX86_BUILTIN_VEC_PACK_SFIX,
27731 IX86_BUILTIN_VEC_PACK_SFIX256,
27732
27733 /* SSE4.2. */
27734 IX86_BUILTIN_CRC32QI,
27735 IX86_BUILTIN_CRC32HI,
27736 IX86_BUILTIN_CRC32SI,
27737 IX86_BUILTIN_CRC32DI,
27738
27739 IX86_BUILTIN_PCMPESTRI128,
27740 IX86_BUILTIN_PCMPESTRM128,
27741 IX86_BUILTIN_PCMPESTRA128,
27742 IX86_BUILTIN_PCMPESTRC128,
27743 IX86_BUILTIN_PCMPESTRO128,
27744 IX86_BUILTIN_PCMPESTRS128,
27745 IX86_BUILTIN_PCMPESTRZ128,
27746 IX86_BUILTIN_PCMPISTRI128,
27747 IX86_BUILTIN_PCMPISTRM128,
27748 IX86_BUILTIN_PCMPISTRA128,
27749 IX86_BUILTIN_PCMPISTRC128,
27750 IX86_BUILTIN_PCMPISTRO128,
27751 IX86_BUILTIN_PCMPISTRS128,
27752 IX86_BUILTIN_PCMPISTRZ128,
27753
27754 IX86_BUILTIN_PCMPGTQ,
27755
27756 /* AES instructions */
27757 IX86_BUILTIN_AESENC128,
27758 IX86_BUILTIN_AESENCLAST128,
27759 IX86_BUILTIN_AESDEC128,
27760 IX86_BUILTIN_AESDECLAST128,
27761 IX86_BUILTIN_AESIMC128,
27762 IX86_BUILTIN_AESKEYGENASSIST128,
27763
27764 /* PCLMUL instruction */
27765 IX86_BUILTIN_PCLMULQDQ128,
27766
27767 /* AVX */
27768 IX86_BUILTIN_ADDPD256,
27769 IX86_BUILTIN_ADDPS256,
27770 IX86_BUILTIN_ADDSUBPD256,
27771 IX86_BUILTIN_ADDSUBPS256,
27772 IX86_BUILTIN_ANDPD256,
27773 IX86_BUILTIN_ANDPS256,
27774 IX86_BUILTIN_ANDNPD256,
27775 IX86_BUILTIN_ANDNPS256,
27776 IX86_BUILTIN_BLENDPD256,
27777 IX86_BUILTIN_BLENDPS256,
27778 IX86_BUILTIN_BLENDVPD256,
27779 IX86_BUILTIN_BLENDVPS256,
27780 IX86_BUILTIN_DIVPD256,
27781 IX86_BUILTIN_DIVPS256,
27782 IX86_BUILTIN_DPPS256,
27783 IX86_BUILTIN_HADDPD256,
27784 IX86_BUILTIN_HADDPS256,
27785 IX86_BUILTIN_HSUBPD256,
27786 IX86_BUILTIN_HSUBPS256,
27787 IX86_BUILTIN_MAXPD256,
27788 IX86_BUILTIN_MAXPS256,
27789 IX86_BUILTIN_MINPD256,
27790 IX86_BUILTIN_MINPS256,
27791 IX86_BUILTIN_MULPD256,
27792 IX86_BUILTIN_MULPS256,
27793 IX86_BUILTIN_ORPD256,
27794 IX86_BUILTIN_ORPS256,
27795 IX86_BUILTIN_SHUFPD256,
27796 IX86_BUILTIN_SHUFPS256,
27797 IX86_BUILTIN_SUBPD256,
27798 IX86_BUILTIN_SUBPS256,
27799 IX86_BUILTIN_XORPD256,
27800 IX86_BUILTIN_XORPS256,
27801 IX86_BUILTIN_CMPSD,
27802 IX86_BUILTIN_CMPSS,
27803 IX86_BUILTIN_CMPPD,
27804 IX86_BUILTIN_CMPPS,
27805 IX86_BUILTIN_CMPPD256,
27806 IX86_BUILTIN_CMPPS256,
27807 IX86_BUILTIN_CVTDQ2PD256,
27808 IX86_BUILTIN_CVTDQ2PS256,
27809 IX86_BUILTIN_CVTPD2PS256,
27810 IX86_BUILTIN_CVTPS2DQ256,
27811 IX86_BUILTIN_CVTPS2PD256,
27812 IX86_BUILTIN_CVTTPD2DQ256,
27813 IX86_BUILTIN_CVTPD2DQ256,
27814 IX86_BUILTIN_CVTTPS2DQ256,
27815 IX86_BUILTIN_EXTRACTF128PD256,
27816 IX86_BUILTIN_EXTRACTF128PS256,
27817 IX86_BUILTIN_EXTRACTF128SI256,
27818 IX86_BUILTIN_VZEROALL,
27819 IX86_BUILTIN_VZEROUPPER,
27820 IX86_BUILTIN_VPERMILVARPD,
27821 IX86_BUILTIN_VPERMILVARPS,
27822 IX86_BUILTIN_VPERMILVARPD256,
27823 IX86_BUILTIN_VPERMILVARPS256,
27824 IX86_BUILTIN_VPERMILPD,
27825 IX86_BUILTIN_VPERMILPS,
27826 IX86_BUILTIN_VPERMILPD256,
27827 IX86_BUILTIN_VPERMILPS256,
27828 IX86_BUILTIN_VPERMIL2PD,
27829 IX86_BUILTIN_VPERMIL2PS,
27830 IX86_BUILTIN_VPERMIL2PD256,
27831 IX86_BUILTIN_VPERMIL2PS256,
27832 IX86_BUILTIN_VPERM2F128PD256,
27833 IX86_BUILTIN_VPERM2F128PS256,
27834 IX86_BUILTIN_VPERM2F128SI256,
27835 IX86_BUILTIN_VBROADCASTSS,
27836 IX86_BUILTIN_VBROADCASTSD256,
27837 IX86_BUILTIN_VBROADCASTSS256,
27838 IX86_BUILTIN_VBROADCASTPD256,
27839 IX86_BUILTIN_VBROADCASTPS256,
27840 IX86_BUILTIN_VINSERTF128PD256,
27841 IX86_BUILTIN_VINSERTF128PS256,
27842 IX86_BUILTIN_VINSERTF128SI256,
27843 IX86_BUILTIN_LOADUPD256,
27844 IX86_BUILTIN_LOADUPS256,
27845 IX86_BUILTIN_STOREUPD256,
27846 IX86_BUILTIN_STOREUPS256,
27847 IX86_BUILTIN_LDDQU256,
27848 IX86_BUILTIN_MOVNTDQ256,
27849 IX86_BUILTIN_MOVNTPD256,
27850 IX86_BUILTIN_MOVNTPS256,
27851 IX86_BUILTIN_LOADDQU256,
27852 IX86_BUILTIN_STOREDQU256,
27853 IX86_BUILTIN_MASKLOADPD,
27854 IX86_BUILTIN_MASKLOADPS,
27855 IX86_BUILTIN_MASKSTOREPD,
27856 IX86_BUILTIN_MASKSTOREPS,
27857 IX86_BUILTIN_MASKLOADPD256,
27858 IX86_BUILTIN_MASKLOADPS256,
27859 IX86_BUILTIN_MASKSTOREPD256,
27860 IX86_BUILTIN_MASKSTOREPS256,
27861 IX86_BUILTIN_MOVSHDUP256,
27862 IX86_BUILTIN_MOVSLDUP256,
27863 IX86_BUILTIN_MOVDDUP256,
27864
27865 IX86_BUILTIN_SQRTPD256,
27866 IX86_BUILTIN_SQRTPS256,
27867 IX86_BUILTIN_SQRTPS_NR256,
27868 IX86_BUILTIN_RSQRTPS256,
27869 IX86_BUILTIN_RSQRTPS_NR256,
27870
27871 IX86_BUILTIN_RCPPS256,
27872
27873 IX86_BUILTIN_ROUNDPD256,
27874 IX86_BUILTIN_ROUNDPS256,
27875
27876 IX86_BUILTIN_FLOORPD256,
27877 IX86_BUILTIN_CEILPD256,
27878 IX86_BUILTIN_TRUNCPD256,
27879 IX86_BUILTIN_RINTPD256,
27880 IX86_BUILTIN_ROUNDPD_AZ256,
27881
27882 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27883 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27884 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27885
27886 IX86_BUILTIN_FLOORPS256,
27887 IX86_BUILTIN_CEILPS256,
27888 IX86_BUILTIN_TRUNCPS256,
27889 IX86_BUILTIN_RINTPS256,
27890 IX86_BUILTIN_ROUNDPS_AZ256,
27891
27892 IX86_BUILTIN_FLOORPS_SFIX256,
27893 IX86_BUILTIN_CEILPS_SFIX256,
27894 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27895
27896 IX86_BUILTIN_UNPCKHPD256,
27897 IX86_BUILTIN_UNPCKLPD256,
27898 IX86_BUILTIN_UNPCKHPS256,
27899 IX86_BUILTIN_UNPCKLPS256,
27900
27901 IX86_BUILTIN_SI256_SI,
27902 IX86_BUILTIN_PS256_PS,
27903 IX86_BUILTIN_PD256_PD,
27904 IX86_BUILTIN_SI_SI256,
27905 IX86_BUILTIN_PS_PS256,
27906 IX86_BUILTIN_PD_PD256,
27907
27908 IX86_BUILTIN_VTESTZPD,
27909 IX86_BUILTIN_VTESTCPD,
27910 IX86_BUILTIN_VTESTNZCPD,
27911 IX86_BUILTIN_VTESTZPS,
27912 IX86_BUILTIN_VTESTCPS,
27913 IX86_BUILTIN_VTESTNZCPS,
27914 IX86_BUILTIN_VTESTZPD256,
27915 IX86_BUILTIN_VTESTCPD256,
27916 IX86_BUILTIN_VTESTNZCPD256,
27917 IX86_BUILTIN_VTESTZPS256,
27918 IX86_BUILTIN_VTESTCPS256,
27919 IX86_BUILTIN_VTESTNZCPS256,
27920 IX86_BUILTIN_PTESTZ256,
27921 IX86_BUILTIN_PTESTC256,
27922 IX86_BUILTIN_PTESTNZC256,
27923
27924 IX86_BUILTIN_MOVMSKPD256,
27925 IX86_BUILTIN_MOVMSKPS256,
27926
27927 /* AVX2 */
27928 IX86_BUILTIN_MPSADBW256,
27929 IX86_BUILTIN_PABSB256,
27930 IX86_BUILTIN_PABSW256,
27931 IX86_BUILTIN_PABSD256,
27932 IX86_BUILTIN_PACKSSDW256,
27933 IX86_BUILTIN_PACKSSWB256,
27934 IX86_BUILTIN_PACKUSDW256,
27935 IX86_BUILTIN_PACKUSWB256,
27936 IX86_BUILTIN_PADDB256,
27937 IX86_BUILTIN_PADDW256,
27938 IX86_BUILTIN_PADDD256,
27939 IX86_BUILTIN_PADDQ256,
27940 IX86_BUILTIN_PADDSB256,
27941 IX86_BUILTIN_PADDSW256,
27942 IX86_BUILTIN_PADDUSB256,
27943 IX86_BUILTIN_PADDUSW256,
27944 IX86_BUILTIN_PALIGNR256,
27945 IX86_BUILTIN_AND256I,
27946 IX86_BUILTIN_ANDNOT256I,
27947 IX86_BUILTIN_PAVGB256,
27948 IX86_BUILTIN_PAVGW256,
27949 IX86_BUILTIN_PBLENDVB256,
27950 IX86_BUILTIN_PBLENDVW256,
27951 IX86_BUILTIN_PCMPEQB256,
27952 IX86_BUILTIN_PCMPEQW256,
27953 IX86_BUILTIN_PCMPEQD256,
27954 IX86_BUILTIN_PCMPEQQ256,
27955 IX86_BUILTIN_PCMPGTB256,
27956 IX86_BUILTIN_PCMPGTW256,
27957 IX86_BUILTIN_PCMPGTD256,
27958 IX86_BUILTIN_PCMPGTQ256,
27959 IX86_BUILTIN_PHADDW256,
27960 IX86_BUILTIN_PHADDD256,
27961 IX86_BUILTIN_PHADDSW256,
27962 IX86_BUILTIN_PHSUBW256,
27963 IX86_BUILTIN_PHSUBD256,
27964 IX86_BUILTIN_PHSUBSW256,
27965 IX86_BUILTIN_PMADDUBSW256,
27966 IX86_BUILTIN_PMADDWD256,
27967 IX86_BUILTIN_PMAXSB256,
27968 IX86_BUILTIN_PMAXSW256,
27969 IX86_BUILTIN_PMAXSD256,
27970 IX86_BUILTIN_PMAXUB256,
27971 IX86_BUILTIN_PMAXUW256,
27972 IX86_BUILTIN_PMAXUD256,
27973 IX86_BUILTIN_PMINSB256,
27974 IX86_BUILTIN_PMINSW256,
27975 IX86_BUILTIN_PMINSD256,
27976 IX86_BUILTIN_PMINUB256,
27977 IX86_BUILTIN_PMINUW256,
27978 IX86_BUILTIN_PMINUD256,
27979 IX86_BUILTIN_PMOVMSKB256,
27980 IX86_BUILTIN_PMOVSXBW256,
27981 IX86_BUILTIN_PMOVSXBD256,
27982 IX86_BUILTIN_PMOVSXBQ256,
27983 IX86_BUILTIN_PMOVSXWD256,
27984 IX86_BUILTIN_PMOVSXWQ256,
27985 IX86_BUILTIN_PMOVSXDQ256,
27986 IX86_BUILTIN_PMOVZXBW256,
27987 IX86_BUILTIN_PMOVZXBD256,
27988 IX86_BUILTIN_PMOVZXBQ256,
27989 IX86_BUILTIN_PMOVZXWD256,
27990 IX86_BUILTIN_PMOVZXWQ256,
27991 IX86_BUILTIN_PMOVZXDQ256,
27992 IX86_BUILTIN_PMULDQ256,
27993 IX86_BUILTIN_PMULHRSW256,
27994 IX86_BUILTIN_PMULHUW256,
27995 IX86_BUILTIN_PMULHW256,
27996 IX86_BUILTIN_PMULLW256,
27997 IX86_BUILTIN_PMULLD256,
27998 IX86_BUILTIN_PMULUDQ256,
27999 IX86_BUILTIN_POR256,
28000 IX86_BUILTIN_PSADBW256,
28001 IX86_BUILTIN_PSHUFB256,
28002 IX86_BUILTIN_PSHUFD256,
28003 IX86_BUILTIN_PSHUFHW256,
28004 IX86_BUILTIN_PSHUFLW256,
28005 IX86_BUILTIN_PSIGNB256,
28006 IX86_BUILTIN_PSIGNW256,
28007 IX86_BUILTIN_PSIGND256,
28008 IX86_BUILTIN_PSLLDQI256,
28009 IX86_BUILTIN_PSLLWI256,
28010 IX86_BUILTIN_PSLLW256,
28011 IX86_BUILTIN_PSLLDI256,
28012 IX86_BUILTIN_PSLLD256,
28013 IX86_BUILTIN_PSLLQI256,
28014 IX86_BUILTIN_PSLLQ256,
28015 IX86_BUILTIN_PSRAWI256,
28016 IX86_BUILTIN_PSRAW256,
28017 IX86_BUILTIN_PSRADI256,
28018 IX86_BUILTIN_PSRAD256,
28019 IX86_BUILTIN_PSRLDQI256,
28020 IX86_BUILTIN_PSRLWI256,
28021 IX86_BUILTIN_PSRLW256,
28022 IX86_BUILTIN_PSRLDI256,
28023 IX86_BUILTIN_PSRLD256,
28024 IX86_BUILTIN_PSRLQI256,
28025 IX86_BUILTIN_PSRLQ256,
28026 IX86_BUILTIN_PSUBB256,
28027 IX86_BUILTIN_PSUBW256,
28028 IX86_BUILTIN_PSUBD256,
28029 IX86_BUILTIN_PSUBQ256,
28030 IX86_BUILTIN_PSUBSB256,
28031 IX86_BUILTIN_PSUBSW256,
28032 IX86_BUILTIN_PSUBUSB256,
28033 IX86_BUILTIN_PSUBUSW256,
28034 IX86_BUILTIN_PUNPCKHBW256,
28035 IX86_BUILTIN_PUNPCKHWD256,
28036 IX86_BUILTIN_PUNPCKHDQ256,
28037 IX86_BUILTIN_PUNPCKHQDQ256,
28038 IX86_BUILTIN_PUNPCKLBW256,
28039 IX86_BUILTIN_PUNPCKLWD256,
28040 IX86_BUILTIN_PUNPCKLDQ256,
28041 IX86_BUILTIN_PUNPCKLQDQ256,
28042 IX86_BUILTIN_PXOR256,
28043 IX86_BUILTIN_MOVNTDQA256,
28044 IX86_BUILTIN_VBROADCASTSS_PS,
28045 IX86_BUILTIN_VBROADCASTSS_PS256,
28046 IX86_BUILTIN_VBROADCASTSD_PD256,
28047 IX86_BUILTIN_VBROADCASTSI256,
28048 IX86_BUILTIN_PBLENDD256,
28049 IX86_BUILTIN_PBLENDD128,
28050 IX86_BUILTIN_PBROADCASTB256,
28051 IX86_BUILTIN_PBROADCASTW256,
28052 IX86_BUILTIN_PBROADCASTD256,
28053 IX86_BUILTIN_PBROADCASTQ256,
28054 IX86_BUILTIN_PBROADCASTB128,
28055 IX86_BUILTIN_PBROADCASTW128,
28056 IX86_BUILTIN_PBROADCASTD128,
28057 IX86_BUILTIN_PBROADCASTQ128,
28058 IX86_BUILTIN_VPERMVARSI256,
28059 IX86_BUILTIN_VPERMDF256,
28060 IX86_BUILTIN_VPERMVARSF256,
28061 IX86_BUILTIN_VPERMDI256,
28062 IX86_BUILTIN_VPERMTI256,
28063 IX86_BUILTIN_VEXTRACT128I256,
28064 IX86_BUILTIN_VINSERT128I256,
28065 IX86_BUILTIN_MASKLOADD,
28066 IX86_BUILTIN_MASKLOADQ,
28067 IX86_BUILTIN_MASKLOADD256,
28068 IX86_BUILTIN_MASKLOADQ256,
28069 IX86_BUILTIN_MASKSTORED,
28070 IX86_BUILTIN_MASKSTOREQ,
28071 IX86_BUILTIN_MASKSTORED256,
28072 IX86_BUILTIN_MASKSTOREQ256,
28073 IX86_BUILTIN_PSLLVV4DI,
28074 IX86_BUILTIN_PSLLVV2DI,
28075 IX86_BUILTIN_PSLLVV8SI,
28076 IX86_BUILTIN_PSLLVV4SI,
28077 IX86_BUILTIN_PSRAVV8SI,
28078 IX86_BUILTIN_PSRAVV4SI,
28079 IX86_BUILTIN_PSRLVV4DI,
28080 IX86_BUILTIN_PSRLVV2DI,
28081 IX86_BUILTIN_PSRLVV8SI,
28082 IX86_BUILTIN_PSRLVV4SI,
28083
28084 IX86_BUILTIN_GATHERSIV2DF,
28085 IX86_BUILTIN_GATHERSIV4DF,
28086 IX86_BUILTIN_GATHERDIV2DF,
28087 IX86_BUILTIN_GATHERDIV4DF,
28088 IX86_BUILTIN_GATHERSIV4SF,
28089 IX86_BUILTIN_GATHERSIV8SF,
28090 IX86_BUILTIN_GATHERDIV4SF,
28091 IX86_BUILTIN_GATHERDIV8SF,
28092 IX86_BUILTIN_GATHERSIV2DI,
28093 IX86_BUILTIN_GATHERSIV4DI,
28094 IX86_BUILTIN_GATHERDIV2DI,
28095 IX86_BUILTIN_GATHERDIV4DI,
28096 IX86_BUILTIN_GATHERSIV4SI,
28097 IX86_BUILTIN_GATHERSIV8SI,
28098 IX86_BUILTIN_GATHERDIV4SI,
28099 IX86_BUILTIN_GATHERDIV8SI,
28100
28101 /* AVX512F */
28102 IX86_BUILTIN_ADDPD512,
28103 IX86_BUILTIN_ADDPS512,
28104 IX86_BUILTIN_ADDSD_ROUND,
28105 IX86_BUILTIN_ADDSS_ROUND,
28106 IX86_BUILTIN_ALIGND512,
28107 IX86_BUILTIN_ALIGNQ512,
28108 IX86_BUILTIN_BLENDMD512,
28109 IX86_BUILTIN_BLENDMPD512,
28110 IX86_BUILTIN_BLENDMPS512,
28111 IX86_BUILTIN_BLENDMQ512,
28112 IX86_BUILTIN_BROADCASTF32X4_512,
28113 IX86_BUILTIN_BROADCASTF64X4_512,
28114 IX86_BUILTIN_BROADCASTI32X4_512,
28115 IX86_BUILTIN_BROADCASTI64X4_512,
28116 IX86_BUILTIN_BROADCASTSD512,
28117 IX86_BUILTIN_BROADCASTSS512,
28118 IX86_BUILTIN_CMPD512,
28119 IX86_BUILTIN_CMPPD512,
28120 IX86_BUILTIN_CMPPS512,
28121 IX86_BUILTIN_CMPQ512,
28122 IX86_BUILTIN_CMPSD_MASK,
28123 IX86_BUILTIN_CMPSS_MASK,
28124 IX86_BUILTIN_COMIDF,
28125 IX86_BUILTIN_COMISF,
28126 IX86_BUILTIN_COMPRESSPD512,
28127 IX86_BUILTIN_COMPRESSPDSTORE512,
28128 IX86_BUILTIN_COMPRESSPS512,
28129 IX86_BUILTIN_COMPRESSPSSTORE512,
28130 IX86_BUILTIN_CVTDQ2PD512,
28131 IX86_BUILTIN_CVTDQ2PS512,
28132 IX86_BUILTIN_CVTPD2DQ512,
28133 IX86_BUILTIN_CVTPD2PS512,
28134 IX86_BUILTIN_CVTPD2UDQ512,
28135 IX86_BUILTIN_CVTPH2PS512,
28136 IX86_BUILTIN_CVTPS2DQ512,
28137 IX86_BUILTIN_CVTPS2PD512,
28138 IX86_BUILTIN_CVTPS2PH512,
28139 IX86_BUILTIN_CVTPS2UDQ512,
28140 IX86_BUILTIN_CVTSD2SS_ROUND,
28141 IX86_BUILTIN_CVTSI2SD64,
28142 IX86_BUILTIN_CVTSI2SS32,
28143 IX86_BUILTIN_CVTSI2SS64,
28144 IX86_BUILTIN_CVTSS2SD_ROUND,
28145 IX86_BUILTIN_CVTTPD2DQ512,
28146 IX86_BUILTIN_CVTTPD2UDQ512,
28147 IX86_BUILTIN_CVTTPS2DQ512,
28148 IX86_BUILTIN_CVTTPS2UDQ512,
28149 IX86_BUILTIN_CVTUDQ2PD512,
28150 IX86_BUILTIN_CVTUDQ2PS512,
28151 IX86_BUILTIN_CVTUSI2SD32,
28152 IX86_BUILTIN_CVTUSI2SD64,
28153 IX86_BUILTIN_CVTUSI2SS32,
28154 IX86_BUILTIN_CVTUSI2SS64,
28155 IX86_BUILTIN_DIVPD512,
28156 IX86_BUILTIN_DIVPS512,
28157 IX86_BUILTIN_DIVSD_ROUND,
28158 IX86_BUILTIN_DIVSS_ROUND,
28159 IX86_BUILTIN_EXPANDPD512,
28160 IX86_BUILTIN_EXPANDPD512Z,
28161 IX86_BUILTIN_EXPANDPDLOAD512,
28162 IX86_BUILTIN_EXPANDPDLOAD512Z,
28163 IX86_BUILTIN_EXPANDPS512,
28164 IX86_BUILTIN_EXPANDPS512Z,
28165 IX86_BUILTIN_EXPANDPSLOAD512,
28166 IX86_BUILTIN_EXPANDPSLOAD512Z,
28167 IX86_BUILTIN_EXTRACTF32X4,
28168 IX86_BUILTIN_EXTRACTF64X4,
28169 IX86_BUILTIN_EXTRACTI32X4,
28170 IX86_BUILTIN_EXTRACTI64X4,
28171 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28172 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28173 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28174 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28175 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28176 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28177 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28178 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28179 IX86_BUILTIN_GETEXPPD512,
28180 IX86_BUILTIN_GETEXPPS512,
28181 IX86_BUILTIN_GETEXPSD128,
28182 IX86_BUILTIN_GETEXPSS128,
28183 IX86_BUILTIN_GETMANTPD512,
28184 IX86_BUILTIN_GETMANTPS512,
28185 IX86_BUILTIN_GETMANTSD128,
28186 IX86_BUILTIN_GETMANTSS128,
28187 IX86_BUILTIN_INSERTF32X4,
28188 IX86_BUILTIN_INSERTF64X4,
28189 IX86_BUILTIN_INSERTI32X4,
28190 IX86_BUILTIN_INSERTI64X4,
28191 IX86_BUILTIN_LOADAPD512,
28192 IX86_BUILTIN_LOADAPS512,
28193 IX86_BUILTIN_LOADDQUDI512,
28194 IX86_BUILTIN_LOADDQUSI512,
28195 IX86_BUILTIN_LOADUPD512,
28196 IX86_BUILTIN_LOADUPS512,
28197 IX86_BUILTIN_MAXPD512,
28198 IX86_BUILTIN_MAXPS512,
28199 IX86_BUILTIN_MAXSD_ROUND,
28200 IX86_BUILTIN_MAXSS_ROUND,
28201 IX86_BUILTIN_MINPD512,
28202 IX86_BUILTIN_MINPS512,
28203 IX86_BUILTIN_MINSD_ROUND,
28204 IX86_BUILTIN_MINSS_ROUND,
28205 IX86_BUILTIN_MOVAPD512,
28206 IX86_BUILTIN_MOVAPS512,
28207 IX86_BUILTIN_MOVDDUP512,
28208 IX86_BUILTIN_MOVDQA32LOAD512,
28209 IX86_BUILTIN_MOVDQA32STORE512,
28210 IX86_BUILTIN_MOVDQA32_512,
28211 IX86_BUILTIN_MOVDQA64LOAD512,
28212 IX86_BUILTIN_MOVDQA64STORE512,
28213 IX86_BUILTIN_MOVDQA64_512,
28214 IX86_BUILTIN_MOVNTDQ512,
28215 IX86_BUILTIN_MOVNTDQA512,
28216 IX86_BUILTIN_MOVNTPD512,
28217 IX86_BUILTIN_MOVNTPS512,
28218 IX86_BUILTIN_MOVSHDUP512,
28219 IX86_BUILTIN_MOVSLDUP512,
28220 IX86_BUILTIN_MULPD512,
28221 IX86_BUILTIN_MULPS512,
28222 IX86_BUILTIN_MULSD_ROUND,
28223 IX86_BUILTIN_MULSS_ROUND,
28224 IX86_BUILTIN_PABSD512,
28225 IX86_BUILTIN_PABSQ512,
28226 IX86_BUILTIN_PADDD512,
28227 IX86_BUILTIN_PADDQ512,
28228 IX86_BUILTIN_PANDD512,
28229 IX86_BUILTIN_PANDND512,
28230 IX86_BUILTIN_PANDNQ512,
28231 IX86_BUILTIN_PANDQ512,
28232 IX86_BUILTIN_PBROADCASTD512,
28233 IX86_BUILTIN_PBROADCASTD512_GPR,
28234 IX86_BUILTIN_PBROADCASTMB512,
28235 IX86_BUILTIN_PBROADCASTMW512,
28236 IX86_BUILTIN_PBROADCASTQ512,
28237 IX86_BUILTIN_PBROADCASTQ512_GPR,
28238 IX86_BUILTIN_PBROADCASTQ512_MEM,
28239 IX86_BUILTIN_PCMPEQD512_MASK,
28240 IX86_BUILTIN_PCMPEQQ512_MASK,
28241 IX86_BUILTIN_PCMPGTD512_MASK,
28242 IX86_BUILTIN_PCMPGTQ512_MASK,
28243 IX86_BUILTIN_PCOMPRESSD512,
28244 IX86_BUILTIN_PCOMPRESSDSTORE512,
28245 IX86_BUILTIN_PCOMPRESSQ512,
28246 IX86_BUILTIN_PCOMPRESSQSTORE512,
28247 IX86_BUILTIN_PEXPANDD512,
28248 IX86_BUILTIN_PEXPANDD512Z,
28249 IX86_BUILTIN_PEXPANDDLOAD512,
28250 IX86_BUILTIN_PEXPANDDLOAD512Z,
28251 IX86_BUILTIN_PEXPANDQ512,
28252 IX86_BUILTIN_PEXPANDQ512Z,
28253 IX86_BUILTIN_PEXPANDQLOAD512,
28254 IX86_BUILTIN_PEXPANDQLOAD512Z,
28255 IX86_BUILTIN_PMAXSD512,
28256 IX86_BUILTIN_PMAXSQ512,
28257 IX86_BUILTIN_PMAXUD512,
28258 IX86_BUILTIN_PMAXUQ512,
28259 IX86_BUILTIN_PMINSD512,
28260 IX86_BUILTIN_PMINSQ512,
28261 IX86_BUILTIN_PMINUD512,
28262 IX86_BUILTIN_PMINUQ512,
28263 IX86_BUILTIN_PMOVDB512,
28264 IX86_BUILTIN_PMOVDB512_MEM,
28265 IX86_BUILTIN_PMOVDW512,
28266 IX86_BUILTIN_PMOVDW512_MEM,
28267 IX86_BUILTIN_PMOVQB512,
28268 IX86_BUILTIN_PMOVQB512_MEM,
28269 IX86_BUILTIN_PMOVQD512,
28270 IX86_BUILTIN_PMOVQD512_MEM,
28271 IX86_BUILTIN_PMOVQW512,
28272 IX86_BUILTIN_PMOVQW512_MEM,
28273 IX86_BUILTIN_PMOVSDB512,
28274 IX86_BUILTIN_PMOVSDB512_MEM,
28275 IX86_BUILTIN_PMOVSDW512,
28276 IX86_BUILTIN_PMOVSDW512_MEM,
28277 IX86_BUILTIN_PMOVSQB512,
28278 IX86_BUILTIN_PMOVSQB512_MEM,
28279 IX86_BUILTIN_PMOVSQD512,
28280 IX86_BUILTIN_PMOVSQD512_MEM,
28281 IX86_BUILTIN_PMOVSQW512,
28282 IX86_BUILTIN_PMOVSQW512_MEM,
28283 IX86_BUILTIN_PMOVSXBD512,
28284 IX86_BUILTIN_PMOVSXBQ512,
28285 IX86_BUILTIN_PMOVSXDQ512,
28286 IX86_BUILTIN_PMOVSXWD512,
28287 IX86_BUILTIN_PMOVSXWQ512,
28288 IX86_BUILTIN_PMOVUSDB512,
28289 IX86_BUILTIN_PMOVUSDB512_MEM,
28290 IX86_BUILTIN_PMOVUSDW512,
28291 IX86_BUILTIN_PMOVUSDW512_MEM,
28292 IX86_BUILTIN_PMOVUSQB512,
28293 IX86_BUILTIN_PMOVUSQB512_MEM,
28294 IX86_BUILTIN_PMOVUSQD512,
28295 IX86_BUILTIN_PMOVUSQD512_MEM,
28296 IX86_BUILTIN_PMOVUSQW512,
28297 IX86_BUILTIN_PMOVUSQW512_MEM,
28298 IX86_BUILTIN_PMOVZXBD512,
28299 IX86_BUILTIN_PMOVZXBQ512,
28300 IX86_BUILTIN_PMOVZXDQ512,
28301 IX86_BUILTIN_PMOVZXWD512,
28302 IX86_BUILTIN_PMOVZXWQ512,
28303 IX86_BUILTIN_PMULDQ512,
28304 IX86_BUILTIN_PMULLD512,
28305 IX86_BUILTIN_PMULUDQ512,
28306 IX86_BUILTIN_PORD512,
28307 IX86_BUILTIN_PORQ512,
28308 IX86_BUILTIN_PROLD512,
28309 IX86_BUILTIN_PROLQ512,
28310 IX86_BUILTIN_PROLVD512,
28311 IX86_BUILTIN_PROLVQ512,
28312 IX86_BUILTIN_PRORD512,
28313 IX86_BUILTIN_PRORQ512,
28314 IX86_BUILTIN_PRORVD512,
28315 IX86_BUILTIN_PRORVQ512,
28316 IX86_BUILTIN_PSHUFD512,
28317 IX86_BUILTIN_PSLLD512,
28318 IX86_BUILTIN_PSLLDI512,
28319 IX86_BUILTIN_PSLLQ512,
28320 IX86_BUILTIN_PSLLQI512,
28321 IX86_BUILTIN_PSLLVV16SI,
28322 IX86_BUILTIN_PSLLVV8DI,
28323 IX86_BUILTIN_PSRAD512,
28324 IX86_BUILTIN_PSRADI512,
28325 IX86_BUILTIN_PSRAQ512,
28326 IX86_BUILTIN_PSRAQI512,
28327 IX86_BUILTIN_PSRAVV16SI,
28328 IX86_BUILTIN_PSRAVV8DI,
28329 IX86_BUILTIN_PSRLD512,
28330 IX86_BUILTIN_PSRLDI512,
28331 IX86_BUILTIN_PSRLQ512,
28332 IX86_BUILTIN_PSRLQI512,
28333 IX86_BUILTIN_PSRLVV16SI,
28334 IX86_BUILTIN_PSRLVV8DI,
28335 IX86_BUILTIN_PSUBD512,
28336 IX86_BUILTIN_PSUBQ512,
28337 IX86_BUILTIN_PTESTMD512,
28338 IX86_BUILTIN_PTESTMQ512,
28339 IX86_BUILTIN_PTESTNMD512,
28340 IX86_BUILTIN_PTESTNMQ512,
28341 IX86_BUILTIN_PUNPCKHDQ512,
28342 IX86_BUILTIN_PUNPCKHQDQ512,
28343 IX86_BUILTIN_PUNPCKLDQ512,
28344 IX86_BUILTIN_PUNPCKLQDQ512,
28345 IX86_BUILTIN_PXORD512,
28346 IX86_BUILTIN_PXORQ512,
28347 IX86_BUILTIN_RCP14PD512,
28348 IX86_BUILTIN_RCP14PS512,
28349 IX86_BUILTIN_RCP14SD,
28350 IX86_BUILTIN_RCP14SS,
28351 IX86_BUILTIN_RNDSCALEPD,
28352 IX86_BUILTIN_RNDSCALEPS,
28353 IX86_BUILTIN_RNDSCALESD,
28354 IX86_BUILTIN_RNDSCALESS,
28355 IX86_BUILTIN_RSQRT14PD512,
28356 IX86_BUILTIN_RSQRT14PS512,
28357 IX86_BUILTIN_RSQRT14SD,
28358 IX86_BUILTIN_RSQRT14SS,
28359 IX86_BUILTIN_SCALEFPD512,
28360 IX86_BUILTIN_SCALEFPS512,
28361 IX86_BUILTIN_SCALEFSD,
28362 IX86_BUILTIN_SCALEFSS,
28363 IX86_BUILTIN_SHUFPD512,
28364 IX86_BUILTIN_SHUFPS512,
28365 IX86_BUILTIN_SHUF_F32x4,
28366 IX86_BUILTIN_SHUF_F64x2,
28367 IX86_BUILTIN_SHUF_I32x4,
28368 IX86_BUILTIN_SHUF_I64x2,
28369 IX86_BUILTIN_SQRTPD512,
28370 IX86_BUILTIN_SQRTPD512_MASK,
28371 IX86_BUILTIN_SQRTPS512_MASK,
28372 IX86_BUILTIN_SQRTPS_NR512,
28373 IX86_BUILTIN_SQRTSD_ROUND,
28374 IX86_BUILTIN_SQRTSS_ROUND,
28375 IX86_BUILTIN_STOREAPD512,
28376 IX86_BUILTIN_STOREAPS512,
28377 IX86_BUILTIN_STOREDQUDI512,
28378 IX86_BUILTIN_STOREDQUSI512,
28379 IX86_BUILTIN_STOREUPD512,
28380 IX86_BUILTIN_STOREUPS512,
28381 IX86_BUILTIN_SUBPD512,
28382 IX86_BUILTIN_SUBPS512,
28383 IX86_BUILTIN_SUBSD_ROUND,
28384 IX86_BUILTIN_SUBSS_ROUND,
28385 IX86_BUILTIN_UCMPD512,
28386 IX86_BUILTIN_UCMPQ512,
28387 IX86_BUILTIN_UNPCKHPD512,
28388 IX86_BUILTIN_UNPCKHPS512,
28389 IX86_BUILTIN_UNPCKLPD512,
28390 IX86_BUILTIN_UNPCKLPS512,
28391 IX86_BUILTIN_VCVTSD2SI32,
28392 IX86_BUILTIN_VCVTSD2SI64,
28393 IX86_BUILTIN_VCVTSD2USI32,
28394 IX86_BUILTIN_VCVTSD2USI64,
28395 IX86_BUILTIN_VCVTSS2SI32,
28396 IX86_BUILTIN_VCVTSS2SI64,
28397 IX86_BUILTIN_VCVTSS2USI32,
28398 IX86_BUILTIN_VCVTSS2USI64,
28399 IX86_BUILTIN_VCVTTSD2SI32,
28400 IX86_BUILTIN_VCVTTSD2SI64,
28401 IX86_BUILTIN_VCVTTSD2USI32,
28402 IX86_BUILTIN_VCVTTSD2USI64,
28403 IX86_BUILTIN_VCVTTSS2SI32,
28404 IX86_BUILTIN_VCVTTSS2SI64,
28405 IX86_BUILTIN_VCVTTSS2USI32,
28406 IX86_BUILTIN_VCVTTSS2USI64,
28407 IX86_BUILTIN_VFMADDPD512_MASK,
28408 IX86_BUILTIN_VFMADDPD512_MASK3,
28409 IX86_BUILTIN_VFMADDPD512_MASKZ,
28410 IX86_BUILTIN_VFMADDPS512_MASK,
28411 IX86_BUILTIN_VFMADDPS512_MASK3,
28412 IX86_BUILTIN_VFMADDPS512_MASKZ,
28413 IX86_BUILTIN_VFMADDSD3_ROUND,
28414 IX86_BUILTIN_VFMADDSS3_ROUND,
28415 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28416 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28417 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28418 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28419 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28420 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28421 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28422 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28423 IX86_BUILTIN_VFMSUBPD512_MASK3,
28424 IX86_BUILTIN_VFMSUBPS512_MASK3,
28425 IX86_BUILTIN_VFMSUBSD3_MASK3,
28426 IX86_BUILTIN_VFMSUBSS3_MASK3,
28427 IX86_BUILTIN_VFNMADDPD512_MASK,
28428 IX86_BUILTIN_VFNMADDPS512_MASK,
28429 IX86_BUILTIN_VFNMSUBPD512_MASK,
28430 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28431 IX86_BUILTIN_VFNMSUBPS512_MASK,
28432 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28433 IX86_BUILTIN_VPCLZCNTD512,
28434 IX86_BUILTIN_VPCLZCNTQ512,
28435 IX86_BUILTIN_VPCONFLICTD512,
28436 IX86_BUILTIN_VPCONFLICTQ512,
28437 IX86_BUILTIN_VPERMDF512,
28438 IX86_BUILTIN_VPERMDI512,
28439 IX86_BUILTIN_VPERMI2VARD512,
28440 IX86_BUILTIN_VPERMI2VARPD512,
28441 IX86_BUILTIN_VPERMI2VARPS512,
28442 IX86_BUILTIN_VPERMI2VARQ512,
28443 IX86_BUILTIN_VPERMILPD512,
28444 IX86_BUILTIN_VPERMILPS512,
28445 IX86_BUILTIN_VPERMILVARPD512,
28446 IX86_BUILTIN_VPERMILVARPS512,
28447 IX86_BUILTIN_VPERMT2VARD512,
28448 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28449 IX86_BUILTIN_VPERMT2VARPD512,
28450 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28451 IX86_BUILTIN_VPERMT2VARPS512,
28452 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28453 IX86_BUILTIN_VPERMT2VARQ512,
28454 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28455 IX86_BUILTIN_VPERMVARDF512,
28456 IX86_BUILTIN_VPERMVARDI512,
28457 IX86_BUILTIN_VPERMVARSF512,
28458 IX86_BUILTIN_VPERMVARSI512,
28459 IX86_BUILTIN_VTERNLOGD512_MASK,
28460 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28461 IX86_BUILTIN_VTERNLOGQ512_MASK,
28462 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28463
28464 /* Mask arithmetic operations */
28465 IX86_BUILTIN_KAND16,
28466 IX86_BUILTIN_KANDN16,
28467 IX86_BUILTIN_KNOT16,
28468 IX86_BUILTIN_KOR16,
28469 IX86_BUILTIN_KORTESTC16,
28470 IX86_BUILTIN_KORTESTZ16,
28471 IX86_BUILTIN_KUNPCKBW,
28472 IX86_BUILTIN_KXNOR16,
28473 IX86_BUILTIN_KXOR16,
28474 IX86_BUILTIN_KMOV16,
28475
28476 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28477 where all operands are 32-byte or 64-byte wide respectively. */
28478 IX86_BUILTIN_GATHERALTSIV4DF,
28479 IX86_BUILTIN_GATHERALTDIV8SF,
28480 IX86_BUILTIN_GATHERALTSIV4DI,
28481 IX86_BUILTIN_GATHERALTDIV8SI,
28482 IX86_BUILTIN_GATHER3ALTDIV16SF,
28483 IX86_BUILTIN_GATHER3ALTDIV16SI,
28484 IX86_BUILTIN_GATHER3ALTSIV8DF,
28485 IX86_BUILTIN_GATHER3ALTSIV8DI,
28486 IX86_BUILTIN_GATHER3DIV16SF,
28487 IX86_BUILTIN_GATHER3DIV16SI,
28488 IX86_BUILTIN_GATHER3DIV8DF,
28489 IX86_BUILTIN_GATHER3DIV8DI,
28490 IX86_BUILTIN_GATHER3SIV16SF,
28491 IX86_BUILTIN_GATHER3SIV16SI,
28492 IX86_BUILTIN_GATHER3SIV8DF,
28493 IX86_BUILTIN_GATHER3SIV8DI,
28494 IX86_BUILTIN_SCATTERDIV16SF,
28495 IX86_BUILTIN_SCATTERDIV16SI,
28496 IX86_BUILTIN_SCATTERDIV8DF,
28497 IX86_BUILTIN_SCATTERDIV8DI,
28498 IX86_BUILTIN_SCATTERSIV16SF,
28499 IX86_BUILTIN_SCATTERSIV16SI,
28500 IX86_BUILTIN_SCATTERSIV8DF,
28501 IX86_BUILTIN_SCATTERSIV8DI,
28502
28503 /* AVX512PF */
28504 IX86_BUILTIN_GATHERPFQPD,
28505 IX86_BUILTIN_GATHERPFDPS,
28506 IX86_BUILTIN_GATHERPFDPD,
28507 IX86_BUILTIN_GATHERPFQPS,
28508 IX86_BUILTIN_SCATTERPFDPD,
28509 IX86_BUILTIN_SCATTERPFDPS,
28510 IX86_BUILTIN_SCATTERPFQPD,
28511 IX86_BUILTIN_SCATTERPFQPS,
28512
28513 /* AVX-512ER */
28514 IX86_BUILTIN_EXP2PD_MASK,
28515 IX86_BUILTIN_EXP2PS_MASK,
28516 IX86_BUILTIN_EXP2PS,
28517 IX86_BUILTIN_RCP28PD,
28518 IX86_BUILTIN_RCP28PS,
28519 IX86_BUILTIN_RCP28SD,
28520 IX86_BUILTIN_RCP28SS,
28521 IX86_BUILTIN_RSQRT28PD,
28522 IX86_BUILTIN_RSQRT28PS,
28523 IX86_BUILTIN_RSQRT28SD,
28524 IX86_BUILTIN_RSQRT28SS,
28525
28526 /* SHA builtins. */
28527 IX86_BUILTIN_SHA1MSG1,
28528 IX86_BUILTIN_SHA1MSG2,
28529 IX86_BUILTIN_SHA1NEXTE,
28530 IX86_BUILTIN_SHA1RNDS4,
28531 IX86_BUILTIN_SHA256MSG1,
28532 IX86_BUILTIN_SHA256MSG2,
28533 IX86_BUILTIN_SHA256RNDS2,
28534
28535 /* CLFLUSHOPT instructions. */
28536 IX86_BUILTIN_CLFLUSHOPT,
28537
28538 /* TFmode support builtins. */
28539 IX86_BUILTIN_INFQ,
28540 IX86_BUILTIN_HUGE_VALQ,
28541 IX86_BUILTIN_FABSQ,
28542 IX86_BUILTIN_COPYSIGNQ,
28543
28544 /* Vectorizer support builtins. */
28545 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28546 IX86_BUILTIN_CPYSGNPS,
28547 IX86_BUILTIN_CPYSGNPD,
28548 IX86_BUILTIN_CPYSGNPS256,
28549 IX86_BUILTIN_CPYSGNPS512,
28550 IX86_BUILTIN_CPYSGNPD256,
28551 IX86_BUILTIN_CPYSGNPD512,
28552 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28553 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28554
28555
28556 /* FMA4 instructions. */
28557 IX86_BUILTIN_VFMADDSS,
28558 IX86_BUILTIN_VFMADDSD,
28559 IX86_BUILTIN_VFMADDPS,
28560 IX86_BUILTIN_VFMADDPD,
28561 IX86_BUILTIN_VFMADDPS256,
28562 IX86_BUILTIN_VFMADDPD256,
28563 IX86_BUILTIN_VFMADDSUBPS,
28564 IX86_BUILTIN_VFMADDSUBPD,
28565 IX86_BUILTIN_VFMADDSUBPS256,
28566 IX86_BUILTIN_VFMADDSUBPD256,
28567
28568 /* FMA3 instructions. */
28569 IX86_BUILTIN_VFMADDSS3,
28570 IX86_BUILTIN_VFMADDSD3,
28571
28572 /* XOP instructions. */
28573 IX86_BUILTIN_VPCMOV,
28574 IX86_BUILTIN_VPCMOV_V2DI,
28575 IX86_BUILTIN_VPCMOV_V4SI,
28576 IX86_BUILTIN_VPCMOV_V8HI,
28577 IX86_BUILTIN_VPCMOV_V16QI,
28578 IX86_BUILTIN_VPCMOV_V4SF,
28579 IX86_BUILTIN_VPCMOV_V2DF,
28580 IX86_BUILTIN_VPCMOV256,
28581 IX86_BUILTIN_VPCMOV_V4DI256,
28582 IX86_BUILTIN_VPCMOV_V8SI256,
28583 IX86_BUILTIN_VPCMOV_V16HI256,
28584 IX86_BUILTIN_VPCMOV_V32QI256,
28585 IX86_BUILTIN_VPCMOV_V8SF256,
28586 IX86_BUILTIN_VPCMOV_V4DF256,
28587
28588 IX86_BUILTIN_VPPERM,
28589
28590 IX86_BUILTIN_VPMACSSWW,
28591 IX86_BUILTIN_VPMACSWW,
28592 IX86_BUILTIN_VPMACSSWD,
28593 IX86_BUILTIN_VPMACSWD,
28594 IX86_BUILTIN_VPMACSSDD,
28595 IX86_BUILTIN_VPMACSDD,
28596 IX86_BUILTIN_VPMACSSDQL,
28597 IX86_BUILTIN_VPMACSSDQH,
28598 IX86_BUILTIN_VPMACSDQL,
28599 IX86_BUILTIN_VPMACSDQH,
28600 IX86_BUILTIN_VPMADCSSWD,
28601 IX86_BUILTIN_VPMADCSWD,
28602
28603 IX86_BUILTIN_VPHADDBW,
28604 IX86_BUILTIN_VPHADDBD,
28605 IX86_BUILTIN_VPHADDBQ,
28606 IX86_BUILTIN_VPHADDWD,
28607 IX86_BUILTIN_VPHADDWQ,
28608 IX86_BUILTIN_VPHADDDQ,
28609 IX86_BUILTIN_VPHADDUBW,
28610 IX86_BUILTIN_VPHADDUBD,
28611 IX86_BUILTIN_VPHADDUBQ,
28612 IX86_BUILTIN_VPHADDUWD,
28613 IX86_BUILTIN_VPHADDUWQ,
28614 IX86_BUILTIN_VPHADDUDQ,
28615 IX86_BUILTIN_VPHSUBBW,
28616 IX86_BUILTIN_VPHSUBWD,
28617 IX86_BUILTIN_VPHSUBDQ,
28618
28619 IX86_BUILTIN_VPROTB,
28620 IX86_BUILTIN_VPROTW,
28621 IX86_BUILTIN_VPROTD,
28622 IX86_BUILTIN_VPROTQ,
28623 IX86_BUILTIN_VPROTB_IMM,
28624 IX86_BUILTIN_VPROTW_IMM,
28625 IX86_BUILTIN_VPROTD_IMM,
28626 IX86_BUILTIN_VPROTQ_IMM,
28627
28628 IX86_BUILTIN_VPSHLB,
28629 IX86_BUILTIN_VPSHLW,
28630 IX86_BUILTIN_VPSHLD,
28631 IX86_BUILTIN_VPSHLQ,
28632 IX86_BUILTIN_VPSHAB,
28633 IX86_BUILTIN_VPSHAW,
28634 IX86_BUILTIN_VPSHAD,
28635 IX86_BUILTIN_VPSHAQ,
28636
28637 IX86_BUILTIN_VFRCZSS,
28638 IX86_BUILTIN_VFRCZSD,
28639 IX86_BUILTIN_VFRCZPS,
28640 IX86_BUILTIN_VFRCZPD,
28641 IX86_BUILTIN_VFRCZPS256,
28642 IX86_BUILTIN_VFRCZPD256,
28643
28644 IX86_BUILTIN_VPCOMEQUB,
28645 IX86_BUILTIN_VPCOMNEUB,
28646 IX86_BUILTIN_VPCOMLTUB,
28647 IX86_BUILTIN_VPCOMLEUB,
28648 IX86_BUILTIN_VPCOMGTUB,
28649 IX86_BUILTIN_VPCOMGEUB,
28650 IX86_BUILTIN_VPCOMFALSEUB,
28651 IX86_BUILTIN_VPCOMTRUEUB,
28652
28653 IX86_BUILTIN_VPCOMEQUW,
28654 IX86_BUILTIN_VPCOMNEUW,
28655 IX86_BUILTIN_VPCOMLTUW,
28656 IX86_BUILTIN_VPCOMLEUW,
28657 IX86_BUILTIN_VPCOMGTUW,
28658 IX86_BUILTIN_VPCOMGEUW,
28659 IX86_BUILTIN_VPCOMFALSEUW,
28660 IX86_BUILTIN_VPCOMTRUEUW,
28661
28662 IX86_BUILTIN_VPCOMEQUD,
28663 IX86_BUILTIN_VPCOMNEUD,
28664 IX86_BUILTIN_VPCOMLTUD,
28665 IX86_BUILTIN_VPCOMLEUD,
28666 IX86_BUILTIN_VPCOMGTUD,
28667 IX86_BUILTIN_VPCOMGEUD,
28668 IX86_BUILTIN_VPCOMFALSEUD,
28669 IX86_BUILTIN_VPCOMTRUEUD,
28670
28671 IX86_BUILTIN_VPCOMEQUQ,
28672 IX86_BUILTIN_VPCOMNEUQ,
28673 IX86_BUILTIN_VPCOMLTUQ,
28674 IX86_BUILTIN_VPCOMLEUQ,
28675 IX86_BUILTIN_VPCOMGTUQ,
28676 IX86_BUILTIN_VPCOMGEUQ,
28677 IX86_BUILTIN_VPCOMFALSEUQ,
28678 IX86_BUILTIN_VPCOMTRUEUQ,
28679
28680 IX86_BUILTIN_VPCOMEQB,
28681 IX86_BUILTIN_VPCOMNEB,
28682 IX86_BUILTIN_VPCOMLTB,
28683 IX86_BUILTIN_VPCOMLEB,
28684 IX86_BUILTIN_VPCOMGTB,
28685 IX86_BUILTIN_VPCOMGEB,
28686 IX86_BUILTIN_VPCOMFALSEB,
28687 IX86_BUILTIN_VPCOMTRUEB,
28688
28689 IX86_BUILTIN_VPCOMEQW,
28690 IX86_BUILTIN_VPCOMNEW,
28691 IX86_BUILTIN_VPCOMLTW,
28692 IX86_BUILTIN_VPCOMLEW,
28693 IX86_BUILTIN_VPCOMGTW,
28694 IX86_BUILTIN_VPCOMGEW,
28695 IX86_BUILTIN_VPCOMFALSEW,
28696 IX86_BUILTIN_VPCOMTRUEW,
28697
28698 IX86_BUILTIN_VPCOMEQD,
28699 IX86_BUILTIN_VPCOMNED,
28700 IX86_BUILTIN_VPCOMLTD,
28701 IX86_BUILTIN_VPCOMLED,
28702 IX86_BUILTIN_VPCOMGTD,
28703 IX86_BUILTIN_VPCOMGED,
28704 IX86_BUILTIN_VPCOMFALSED,
28705 IX86_BUILTIN_VPCOMTRUED,
28706
28707 IX86_BUILTIN_VPCOMEQQ,
28708 IX86_BUILTIN_VPCOMNEQ,
28709 IX86_BUILTIN_VPCOMLTQ,
28710 IX86_BUILTIN_VPCOMLEQ,
28711 IX86_BUILTIN_VPCOMGTQ,
28712 IX86_BUILTIN_VPCOMGEQ,
28713 IX86_BUILTIN_VPCOMFALSEQ,
28714 IX86_BUILTIN_VPCOMTRUEQ,
28715
28716 /* LWP instructions. */
28717 IX86_BUILTIN_LLWPCB,
28718 IX86_BUILTIN_SLWPCB,
28719 IX86_BUILTIN_LWPVAL32,
28720 IX86_BUILTIN_LWPVAL64,
28721 IX86_BUILTIN_LWPINS32,
28722 IX86_BUILTIN_LWPINS64,
28723
28724 IX86_BUILTIN_CLZS,
28725
28726 /* RTM */
28727 IX86_BUILTIN_XBEGIN,
28728 IX86_BUILTIN_XEND,
28729 IX86_BUILTIN_XABORT,
28730 IX86_BUILTIN_XTEST,
28731
28732 /* BMI instructions. */
28733 IX86_BUILTIN_BEXTR32,
28734 IX86_BUILTIN_BEXTR64,
28735 IX86_BUILTIN_CTZS,
28736
28737 /* TBM instructions. */
28738 IX86_BUILTIN_BEXTRI32,
28739 IX86_BUILTIN_BEXTRI64,
28740
28741 /* BMI2 instructions. */
28742 IX86_BUILTIN_BZHI32,
28743 IX86_BUILTIN_BZHI64,
28744 IX86_BUILTIN_PDEP32,
28745 IX86_BUILTIN_PDEP64,
28746 IX86_BUILTIN_PEXT32,
28747 IX86_BUILTIN_PEXT64,
28748
28749 /* ADX instructions. */
28750 IX86_BUILTIN_ADDCARRYX32,
28751 IX86_BUILTIN_ADDCARRYX64,
28752
28753 /* FSGSBASE instructions. */
28754 IX86_BUILTIN_RDFSBASE32,
28755 IX86_BUILTIN_RDFSBASE64,
28756 IX86_BUILTIN_RDGSBASE32,
28757 IX86_BUILTIN_RDGSBASE64,
28758 IX86_BUILTIN_WRFSBASE32,
28759 IX86_BUILTIN_WRFSBASE64,
28760 IX86_BUILTIN_WRGSBASE32,
28761 IX86_BUILTIN_WRGSBASE64,
28762
28763 /* RDRND instructions. */
28764 IX86_BUILTIN_RDRAND16_STEP,
28765 IX86_BUILTIN_RDRAND32_STEP,
28766 IX86_BUILTIN_RDRAND64_STEP,
28767
28768 /* RDSEED instructions. */
28769 IX86_BUILTIN_RDSEED16_STEP,
28770 IX86_BUILTIN_RDSEED32_STEP,
28771 IX86_BUILTIN_RDSEED64_STEP,
28772
28773 /* F16C instructions. */
28774 IX86_BUILTIN_CVTPH2PS,
28775 IX86_BUILTIN_CVTPH2PS256,
28776 IX86_BUILTIN_CVTPS2PH,
28777 IX86_BUILTIN_CVTPS2PH256,
28778
28779 /* CFString built-in for darwin */
28780 IX86_BUILTIN_CFSTRING,
28781
28782 /* Builtins to get CPU type and supported features. */
28783 IX86_BUILTIN_CPU_INIT,
28784 IX86_BUILTIN_CPU_IS,
28785 IX86_BUILTIN_CPU_SUPPORTS,
28786
28787 /* Read/write FLAGS register built-ins. */
28788 IX86_BUILTIN_READ_FLAGS,
28789 IX86_BUILTIN_WRITE_FLAGS,
28790
28791 IX86_BUILTIN_MAX
28792 };
28793
28794 /* Table for the ix86 builtin decls. */
28795 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28796
28797 /* Table of all of the builtin functions that are possible with different ISA's
28798 but are waiting to be built until a function is declared to use that
28799 ISA. */
28800 struct builtin_isa {
28801 const char *name; /* function name */
28802 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28803 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28804 bool const_p; /* true if the declaration is constant */
28805 bool set_and_not_built_p;
28806 };
28807
28808 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28809
28810
28811 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28812 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28813 function decl in the ix86_builtins array. Returns the function decl or
28814 NULL_TREE, if the builtin was not added.
28815
28816 If the front end has a special hook for builtin functions, delay adding
28817 builtin functions that aren't in the current ISA until the ISA is changed
28818 with function specific optimization. Doing so, can save about 300K for the
28819 default compiler. When the builtin is expanded, check at that time whether
28820 it is valid.
28821
28822 If the front end doesn't have a special hook, record all builtins, even if
28823 it isn't an instruction set in the current ISA in case the user uses
28824 function specific options for a different ISA, so that we don't get scope
28825 errors if a builtin is added in the middle of a function scope. */
28826
28827 static inline tree
28828 def_builtin (HOST_WIDE_INT mask, const char *name,
28829 enum ix86_builtin_func_type tcode,
28830 enum ix86_builtins code)
28831 {
28832 tree decl = NULL_TREE;
28833
28834 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28835 {
28836 ix86_builtins_isa[(int) code].isa = mask;
28837
28838 mask &= ~OPTION_MASK_ISA_64BIT;
28839 if (mask == 0
28840 || (mask & ix86_isa_flags) != 0
28841 || (lang_hooks.builtin_function
28842 == lang_hooks.builtin_function_ext_scope))
28843
28844 {
28845 tree type = ix86_get_builtin_func_type (tcode);
28846 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28847 NULL, NULL_TREE);
28848 ix86_builtins[(int) code] = decl;
28849 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28850 }
28851 else
28852 {
28853 ix86_builtins[(int) code] = NULL_TREE;
28854 ix86_builtins_isa[(int) code].tcode = tcode;
28855 ix86_builtins_isa[(int) code].name = name;
28856 ix86_builtins_isa[(int) code].const_p = false;
28857 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28858 }
28859 }
28860
28861 return decl;
28862 }
28863
28864 /* Like def_builtin, but also marks the function decl "const". */
28865
28866 static inline tree
28867 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28868 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28869 {
28870 tree decl = def_builtin (mask, name, tcode, code);
28871 if (decl)
28872 TREE_READONLY (decl) = 1;
28873 else
28874 ix86_builtins_isa[(int) code].const_p = true;
28875
28876 return decl;
28877 }
28878
28879 /* Add any new builtin functions for a given ISA that may not have been
28880 declared. This saves a bit of space compared to adding all of the
28881 declarations to the tree, even if we didn't use them. */
28882
28883 static void
28884 ix86_add_new_builtins (HOST_WIDE_INT isa)
28885 {
28886 int i;
28887
28888 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28889 {
28890 if ((ix86_builtins_isa[i].isa & isa) != 0
28891 && ix86_builtins_isa[i].set_and_not_built_p)
28892 {
28893 tree decl, type;
28894
28895 /* Don't define the builtin again. */
28896 ix86_builtins_isa[i].set_and_not_built_p = false;
28897
28898 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28899 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28900 type, i, BUILT_IN_MD, NULL,
28901 NULL_TREE);
28902
28903 ix86_builtins[i] = decl;
28904 if (ix86_builtins_isa[i].const_p)
28905 TREE_READONLY (decl) = 1;
28906 }
28907 }
28908 }
28909
28910 /* Bits for builtin_description.flag. */
28911
28912 /* Set when we don't support the comparison natively, and should
28913 swap_comparison in order to support it. */
28914 #define BUILTIN_DESC_SWAP_OPERANDS 1
28915
28916 struct builtin_description
28917 {
28918 const HOST_WIDE_INT mask;
28919 const enum insn_code icode;
28920 const char *const name;
28921 const enum ix86_builtins code;
28922 const enum rtx_code comparison;
28923 const int flag;
28924 };
28925
28926 static const struct builtin_description bdesc_comi[] =
28927 {
28928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28952 };
28953
28954 static const struct builtin_description bdesc_pcmpestr[] =
28955 {
28956 /* SSE4.2 */
28957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28964 };
28965
28966 static const struct builtin_description bdesc_pcmpistr[] =
28967 {
28968 /* SSE4.2 */
28969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28976 };
28977
28978 /* Special builtins with variable number of arguments. */
28979 static const struct builtin_description bdesc_special_args[] =
28980 {
28981 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28982 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28983 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28984
28985 /* 80387 (for use internally for atomic compound assignment). */
28986 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28987 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28988 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28989 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28990
28991 /* MMX */
28992 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28993
28994 /* 3DNow! */
28995 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28996
28997 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
28998 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28999 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29000 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29001 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29002 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29003 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29004 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29005 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29006
29007 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29008 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29009 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29010 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29011 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29012 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29015
29016 /* SSE */
29017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29020
29021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29025
29026 /* SSE or 3DNow!A */
29027 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29028 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29029
29030 /* SSE2 */
29031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29038 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29041
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29044
29045 /* SSE3 */
29046 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29047
29048 /* SSE4.1 */
29049 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29050
29051 /* SSE4A */
29052 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29053 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29054
29055 /* AVX */
29056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29058
29059 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29060 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29061 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29064
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29072
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29076
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29085
29086 /* AVX2 */
29087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29096
29097 /* AVX512F */
29098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29145
29146 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29147 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29148 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29149 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29151 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29152
29153 /* FSGSBASE */
29154 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29155 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29156 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29157 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29162
29163 /* RTM */
29164 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29165 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29166 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29167 };
29168
29169 /* Builtins with variable number of arguments. */
29170 static const struct builtin_description bdesc_args[] =
29171 {
29172 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29173 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29174 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29175 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29176 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29177 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29178 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29179
29180 /* MMX */
29181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29187
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29196
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29199
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29204
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29211
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29218
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29222
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29224
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29231
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29238
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29243
29244 /* 3DNow! */
29245 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29246 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29247 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29248 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29249
29250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29265
29266 /* 3DNow!A */
29267 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29268 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29269 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29270 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29271 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29272 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29273
29274 /* SSE */
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29277 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29287
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29289
29290 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29319
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29324
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29329
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29340 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29341
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29343
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29347
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29350
29351 /* SSE MMX or 3Dnow!A */
29352 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29353 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29354 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29355
29356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29357 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29359 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29360
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29362 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29363
29364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29365
29366 /* SSE2 */
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29368
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29374
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29380
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29382
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29385 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29386 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29387
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29391
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29400
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29421
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29426
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29431
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29439
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29448
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29457
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29460
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29465
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29468
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29475
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29480
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29489
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29493
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29496
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29499
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29501
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29503 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29506
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29514
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29522
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29527
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29531
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29533
29534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29535
29536 /* SSE2 MMX */
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29538 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29539
29540 /* SSE3 */
29541 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29542 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29543
29544 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29545 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29546 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29547 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29549 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29550
29551 /* SSSE3 */
29552 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29553 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29554 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29555 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29558
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29583
29584 /* SSSE3. */
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29587
29588 /* SSE4.1 */
29589 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29590 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29591 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29599
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29613
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29626
29627 /* SSE4.1 */
29628 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29629 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29630 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29631 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29632
29633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29634 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29636 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29637
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29640
29641 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29643
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29648
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29651
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29654
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29658
29659 /* SSE4.2 */
29660 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29661 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29662 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29663 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29664 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29665
29666 /* SSE4A */
29667 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29668 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29669 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29670 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29671
29672 /* AES */
29673 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29674 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29675
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29677 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29679 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29680
29681 /* PCLMUL */
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29683
29684 /* AVX */
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29711
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29716
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29751
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29755
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29761
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29763
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29766
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29771
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29774
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29777
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29782
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29785
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29788
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29793
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29800
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29816
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29819
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29822
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29824
29825 /* AVX2 */
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29972
29973 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29974
29975 /* BMI */
29976 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29977 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29978 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29979
29980 /* TBM */
29981 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29982 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29983
29984 /* F16C */
29985 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29986 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29987 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29988 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29989
29990 /* BMI2 */
29991 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29992 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29993 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29994 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29995 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29996 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29997
29998 /* AVX512F */
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30049 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30051 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30160 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30161 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30162 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30163 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30190
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30195 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30199
30200 /* Mask arithmetic operations */
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30211
30212 /* SHA */
30213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30220 };
30221
30222 /* Builtins with rounding support. */
30223 static const struct builtin_description bdesc_round_args[] =
30224 {
30225 /* AVX512F */
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30245 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30247 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30254 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30256 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30306 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30308 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30310 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30312 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30314 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30318 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30320 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30345
30346 /* AVX512ER */
30347 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30348 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30349 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30350 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30351 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30352 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30353 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30354 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30355 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30356 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30357 };
30358
30359 /* FMA4 and XOP. */
30360 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30361 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30362 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30363 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30364 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30365 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30366 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30367 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30368 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30369 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30370 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30371 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30372 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30373 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30374 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30375 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30376 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30377 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30378 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30379 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30380 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30381 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30382 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30383 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30384 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30385 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30386 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30387 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30388 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30389 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30390 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30391 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30392 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30393 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30394 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30395 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30396 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30397 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30398 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30399 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30400 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30401 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30402 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30403 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30404 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30405 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30406 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30407 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30408 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30409 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30410 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30411 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30412
30413 static const struct builtin_description bdesc_multi_arg[] =
30414 {
30415 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30416 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30417 UNKNOWN, (int)MULTI_ARG_3_SF },
30418 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30419 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30420 UNKNOWN, (int)MULTI_ARG_3_DF },
30421
30422 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30423 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30424 UNKNOWN, (int)MULTI_ARG_3_SF },
30425 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30426 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30427 UNKNOWN, (int)MULTI_ARG_3_DF },
30428
30429 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30430 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30431 UNKNOWN, (int)MULTI_ARG_3_SF },
30432 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30433 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30434 UNKNOWN, (int)MULTI_ARG_3_DF },
30435 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30436 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30437 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30438 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30439 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30440 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30441
30442 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30443 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30444 UNKNOWN, (int)MULTI_ARG_3_SF },
30445 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30446 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30447 UNKNOWN, (int)MULTI_ARG_3_DF },
30448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30449 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30450 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30452 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30453 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30454
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30462
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30470
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30472
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30485
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30502
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30509
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30525
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30533
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30541
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30549
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30557
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30565
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30573
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30581
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30589
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30598
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30607
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30612
30613 };
30614 \f
30615 /* TM vector builtins. */
30616
30617 /* Reuse the existing x86-specific `struct builtin_description' cause
30618 we're lazy. Add casts to make them fit. */
30619 static const struct builtin_description bdesc_tm[] =
30620 {
30621 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30622 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30623 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30624 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30625 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30626 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30627 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30628
30629 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30630 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30631 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30632 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30633 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30634 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30635 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30636
30637 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30638 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30639 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30640 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30641 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30642 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30643 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30644
30645 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30646 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30647 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30648 };
30649
30650 /* TM callbacks. */
30651
30652 /* Return the builtin decl needed to load a vector of TYPE. */
30653
30654 static tree
30655 ix86_builtin_tm_load (tree type)
30656 {
30657 if (TREE_CODE (type) == VECTOR_TYPE)
30658 {
30659 switch (tree_to_uhwi (TYPE_SIZE (type)))
30660 {
30661 case 64:
30662 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30663 case 128:
30664 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30665 case 256:
30666 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30667 }
30668 }
30669 return NULL_TREE;
30670 }
30671
30672 /* Return the builtin decl needed to store a vector of TYPE. */
30673
30674 static tree
30675 ix86_builtin_tm_store (tree type)
30676 {
30677 if (TREE_CODE (type) == VECTOR_TYPE)
30678 {
30679 switch (tree_to_uhwi (TYPE_SIZE (type)))
30680 {
30681 case 64:
30682 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30683 case 128:
30684 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30685 case 256:
30686 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30687 }
30688 }
30689 return NULL_TREE;
30690 }
30691 \f
30692 /* Initialize the transactional memory vector load/store builtins. */
30693
30694 static void
30695 ix86_init_tm_builtins (void)
30696 {
30697 enum ix86_builtin_func_type ftype;
30698 const struct builtin_description *d;
30699 size_t i;
30700 tree decl;
30701 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30702 tree attrs_log, attrs_type_log;
30703
30704 if (!flag_tm)
30705 return;
30706
30707 /* If there are no builtins defined, we must be compiling in a
30708 language without trans-mem support. */
30709 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30710 return;
30711
30712 /* Use whatever attributes a normal TM load has. */
30713 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30714 attrs_load = DECL_ATTRIBUTES (decl);
30715 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30716 /* Use whatever attributes a normal TM store has. */
30717 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30718 attrs_store = DECL_ATTRIBUTES (decl);
30719 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30720 /* Use whatever attributes a normal TM log has. */
30721 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30722 attrs_log = DECL_ATTRIBUTES (decl);
30723 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30724
30725 for (i = 0, d = bdesc_tm;
30726 i < ARRAY_SIZE (bdesc_tm);
30727 i++, d++)
30728 {
30729 if ((d->mask & ix86_isa_flags) != 0
30730 || (lang_hooks.builtin_function
30731 == lang_hooks.builtin_function_ext_scope))
30732 {
30733 tree type, attrs, attrs_type;
30734 enum built_in_function code = (enum built_in_function) d->code;
30735
30736 ftype = (enum ix86_builtin_func_type) d->flag;
30737 type = ix86_get_builtin_func_type (ftype);
30738
30739 if (BUILTIN_TM_LOAD_P (code))
30740 {
30741 attrs = attrs_load;
30742 attrs_type = attrs_type_load;
30743 }
30744 else if (BUILTIN_TM_STORE_P (code))
30745 {
30746 attrs = attrs_store;
30747 attrs_type = attrs_type_store;
30748 }
30749 else
30750 {
30751 attrs = attrs_log;
30752 attrs_type = attrs_type_log;
30753 }
30754 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30755 /* The builtin without the prefix for
30756 calling it directly. */
30757 d->name + strlen ("__builtin_"),
30758 attrs);
30759 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30760 set the TYPE_ATTRIBUTES. */
30761 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30762
30763 set_builtin_decl (code, decl, false);
30764 }
30765 }
30766 }
30767
30768 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30769 in the current target ISA to allow the user to compile particular modules
30770 with different target specific options that differ from the command line
30771 options. */
30772 static void
30773 ix86_init_mmx_sse_builtins (void)
30774 {
30775 const struct builtin_description * d;
30776 enum ix86_builtin_func_type ftype;
30777 size_t i;
30778
30779 /* Add all special builtins with variable number of operands. */
30780 for (i = 0, d = bdesc_special_args;
30781 i < ARRAY_SIZE (bdesc_special_args);
30782 i++, d++)
30783 {
30784 if (d->name == 0)
30785 continue;
30786
30787 ftype = (enum ix86_builtin_func_type) d->flag;
30788 def_builtin (d->mask, d->name, ftype, d->code);
30789 }
30790
30791 /* Add all builtins with variable number of operands. */
30792 for (i = 0, d = bdesc_args;
30793 i < ARRAY_SIZE (bdesc_args);
30794 i++, d++)
30795 {
30796 if (d->name == 0)
30797 continue;
30798
30799 ftype = (enum ix86_builtin_func_type) d->flag;
30800 def_builtin_const (d->mask, d->name, ftype, d->code);
30801 }
30802
30803 /* Add all builtins with rounding. */
30804 for (i = 0, d = bdesc_round_args;
30805 i < ARRAY_SIZE (bdesc_round_args);
30806 i++, d++)
30807 {
30808 if (d->name == 0)
30809 continue;
30810
30811 ftype = (enum ix86_builtin_func_type) d->flag;
30812 def_builtin_const (d->mask, d->name, ftype, d->code);
30813 }
30814
30815 /* pcmpestr[im] insns. */
30816 for (i = 0, d = bdesc_pcmpestr;
30817 i < ARRAY_SIZE (bdesc_pcmpestr);
30818 i++, d++)
30819 {
30820 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30821 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30822 else
30823 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30824 def_builtin_const (d->mask, d->name, ftype, d->code);
30825 }
30826
30827 /* pcmpistr[im] insns. */
30828 for (i = 0, d = bdesc_pcmpistr;
30829 i < ARRAY_SIZE (bdesc_pcmpistr);
30830 i++, d++)
30831 {
30832 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30833 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30834 else
30835 ftype = INT_FTYPE_V16QI_V16QI_INT;
30836 def_builtin_const (d->mask, d->name, ftype, d->code);
30837 }
30838
30839 /* comi/ucomi insns. */
30840 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30841 {
30842 if (d->mask == OPTION_MASK_ISA_SSE2)
30843 ftype = INT_FTYPE_V2DF_V2DF;
30844 else
30845 ftype = INT_FTYPE_V4SF_V4SF;
30846 def_builtin_const (d->mask, d->name, ftype, d->code);
30847 }
30848
30849 /* SSE */
30850 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30851 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30852 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30853 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30854
30855 /* SSE or 3DNow!A */
30856 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30857 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30858 IX86_BUILTIN_MASKMOVQ);
30859
30860 /* SSE2 */
30861 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30862 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30863
30864 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30865 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30866 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30867 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30868
30869 /* SSE3. */
30870 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30871 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30872 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30873 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30874
30875 /* AES */
30876 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30877 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30878 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30879 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30880 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30881 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30882 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30883 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30884 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30885 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30886 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30887 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30888
30889 /* PCLMUL */
30890 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30891 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30892
30893 /* RDRND */
30894 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30895 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30896 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30897 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30898 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30899 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30900 IX86_BUILTIN_RDRAND64_STEP);
30901
30902 /* AVX2 */
30903 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30904 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30905 IX86_BUILTIN_GATHERSIV2DF);
30906
30907 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30908 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30909 IX86_BUILTIN_GATHERSIV4DF);
30910
30911 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30912 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30913 IX86_BUILTIN_GATHERDIV2DF);
30914
30915 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30916 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30917 IX86_BUILTIN_GATHERDIV4DF);
30918
30919 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30920 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30921 IX86_BUILTIN_GATHERSIV4SF);
30922
30923 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30924 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30925 IX86_BUILTIN_GATHERSIV8SF);
30926
30927 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30928 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30929 IX86_BUILTIN_GATHERDIV4SF);
30930
30931 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30932 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30933 IX86_BUILTIN_GATHERDIV8SF);
30934
30935 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30936 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30937 IX86_BUILTIN_GATHERSIV2DI);
30938
30939 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30940 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30941 IX86_BUILTIN_GATHERSIV4DI);
30942
30943 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30944 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30945 IX86_BUILTIN_GATHERDIV2DI);
30946
30947 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30948 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30949 IX86_BUILTIN_GATHERDIV4DI);
30950
30951 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30952 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30953 IX86_BUILTIN_GATHERSIV4SI);
30954
30955 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30956 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30957 IX86_BUILTIN_GATHERSIV8SI);
30958
30959 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30960 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30961 IX86_BUILTIN_GATHERDIV4SI);
30962
30963 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30964 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30965 IX86_BUILTIN_GATHERDIV8SI);
30966
30967 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30968 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30969 IX86_BUILTIN_GATHERALTSIV4DF);
30970
30971 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30972 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30973 IX86_BUILTIN_GATHERALTDIV8SF);
30974
30975 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30976 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30977 IX86_BUILTIN_GATHERALTSIV4DI);
30978
30979 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30980 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30981 IX86_BUILTIN_GATHERALTDIV8SI);
30982
30983 /* AVX512F */
30984 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30985 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30986 IX86_BUILTIN_GATHER3SIV16SF);
30987
30988 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30989 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30990 IX86_BUILTIN_GATHER3SIV8DF);
30991
30992 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30993 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30994 IX86_BUILTIN_GATHER3DIV16SF);
30995
30996 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30997 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30998 IX86_BUILTIN_GATHER3DIV8DF);
30999
31000 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31001 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31002 IX86_BUILTIN_GATHER3SIV16SI);
31003
31004 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31005 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31006 IX86_BUILTIN_GATHER3SIV8DI);
31007
31008 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31009 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31010 IX86_BUILTIN_GATHER3DIV16SI);
31011
31012 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31013 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31014 IX86_BUILTIN_GATHER3DIV8DI);
31015
31016 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31017 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31018 IX86_BUILTIN_GATHER3ALTSIV8DF);
31019
31020 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31021 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31022 IX86_BUILTIN_GATHER3ALTDIV16SF);
31023
31024 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31025 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31026 IX86_BUILTIN_GATHER3ALTSIV8DI);
31027
31028 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31029 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31030 IX86_BUILTIN_GATHER3ALTDIV16SI);
31031
31032 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31033 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31034 IX86_BUILTIN_SCATTERSIV16SF);
31035
31036 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31037 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31038 IX86_BUILTIN_SCATTERSIV8DF);
31039
31040 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31041 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31042 IX86_BUILTIN_SCATTERDIV16SF);
31043
31044 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31045 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31046 IX86_BUILTIN_SCATTERDIV8DF);
31047
31048 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31049 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31050 IX86_BUILTIN_SCATTERSIV16SI);
31051
31052 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31053 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31054 IX86_BUILTIN_SCATTERSIV8DI);
31055
31056 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31057 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31058 IX86_BUILTIN_SCATTERDIV16SI);
31059
31060 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31061 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31062 IX86_BUILTIN_SCATTERDIV8DI);
31063
31064 /* AVX512PF */
31065 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31066 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31067 IX86_BUILTIN_GATHERPFDPD);
31068 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31069 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31070 IX86_BUILTIN_GATHERPFDPS);
31071 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31072 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31073 IX86_BUILTIN_GATHERPFQPD);
31074 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31075 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31076 IX86_BUILTIN_GATHERPFQPS);
31077 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31078 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31079 IX86_BUILTIN_SCATTERPFDPD);
31080 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31081 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31082 IX86_BUILTIN_SCATTERPFDPS);
31083 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31084 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31085 IX86_BUILTIN_SCATTERPFQPD);
31086 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31087 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31088 IX86_BUILTIN_SCATTERPFQPS);
31089
31090 /* SHA */
31091 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31092 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31093 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31094 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31095 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31096 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31097 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31098 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31099 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31100 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31101 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31102 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31103 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31104 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31105
31106 /* RTM. */
31107 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31108 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31109
31110 /* MMX access to the vec_init patterns. */
31111 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31112 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31113
31114 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31115 V4HI_FTYPE_HI_HI_HI_HI,
31116 IX86_BUILTIN_VEC_INIT_V4HI);
31117
31118 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31119 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31120 IX86_BUILTIN_VEC_INIT_V8QI);
31121
31122 /* Access to the vec_extract patterns. */
31123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31124 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31125 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31126 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31127 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31128 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31129 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31130 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31132 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31133
31134 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31135 "__builtin_ia32_vec_ext_v4hi",
31136 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31137
31138 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31139 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31140
31141 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31142 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31143
31144 /* Access to the vec_set patterns. */
31145 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31146 "__builtin_ia32_vec_set_v2di",
31147 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31148
31149 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31150 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31151
31152 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31153 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31154
31155 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31156 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31157
31158 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31159 "__builtin_ia32_vec_set_v4hi",
31160 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31161
31162 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31163 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31164
31165 /* RDSEED */
31166 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31167 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31168 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31169 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31170 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31171 "__builtin_ia32_rdseed_di_step",
31172 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31173
31174 /* ADCX */
31175 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31176 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31177 def_builtin (OPTION_MASK_ISA_64BIT,
31178 "__builtin_ia32_addcarryx_u64",
31179 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31180 IX86_BUILTIN_ADDCARRYX64);
31181
31182 /* Read/write FLAGS. */
31183 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31184 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31185 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31186 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31187 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31188 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31189 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31190 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31191
31192 /* CLFLUSHOPT. */
31193 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31194 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31195
31196 /* Add FMA4 multi-arg argument instructions */
31197 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31198 {
31199 if (d->name == 0)
31200 continue;
31201
31202 ftype = (enum ix86_builtin_func_type) d->flag;
31203 def_builtin_const (d->mask, d->name, ftype, d->code);
31204 }
31205 }
31206
31207 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31208 to return a pointer to VERSION_DECL if the outcome of the expression
31209 formed by PREDICATE_CHAIN is true. This function will be called during
31210 version dispatch to decide which function version to execute. It returns
31211 the basic block at the end, to which more conditions can be added. */
31212
31213 static basic_block
31214 add_condition_to_bb (tree function_decl, tree version_decl,
31215 tree predicate_chain, basic_block new_bb)
31216 {
31217 gimple return_stmt;
31218 tree convert_expr, result_var;
31219 gimple convert_stmt;
31220 gimple call_cond_stmt;
31221 gimple if_else_stmt;
31222
31223 basic_block bb1, bb2, bb3;
31224 edge e12, e23;
31225
31226 tree cond_var, and_expr_var = NULL_TREE;
31227 gimple_seq gseq;
31228
31229 tree predicate_decl, predicate_arg;
31230
31231 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31232
31233 gcc_assert (new_bb != NULL);
31234 gseq = bb_seq (new_bb);
31235
31236
31237 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31238 build_fold_addr_expr (version_decl));
31239 result_var = create_tmp_var (ptr_type_node, NULL);
31240 convert_stmt = gimple_build_assign (result_var, convert_expr);
31241 return_stmt = gimple_build_return (result_var);
31242
31243 if (predicate_chain == NULL_TREE)
31244 {
31245 gimple_seq_add_stmt (&gseq, convert_stmt);
31246 gimple_seq_add_stmt (&gseq, return_stmt);
31247 set_bb_seq (new_bb, gseq);
31248 gimple_set_bb (convert_stmt, new_bb);
31249 gimple_set_bb (return_stmt, new_bb);
31250 pop_cfun ();
31251 return new_bb;
31252 }
31253
31254 while (predicate_chain != NULL)
31255 {
31256 cond_var = create_tmp_var (integer_type_node, NULL);
31257 predicate_decl = TREE_PURPOSE (predicate_chain);
31258 predicate_arg = TREE_VALUE (predicate_chain);
31259 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31260 gimple_call_set_lhs (call_cond_stmt, cond_var);
31261
31262 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31263 gimple_set_bb (call_cond_stmt, new_bb);
31264 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31265
31266 predicate_chain = TREE_CHAIN (predicate_chain);
31267
31268 if (and_expr_var == NULL)
31269 and_expr_var = cond_var;
31270 else
31271 {
31272 gimple assign_stmt;
31273 /* Use MIN_EXPR to check if any integer is zero?.
31274 and_expr_var = min_expr <cond_var, and_expr_var> */
31275 assign_stmt = gimple_build_assign (and_expr_var,
31276 build2 (MIN_EXPR, integer_type_node,
31277 cond_var, and_expr_var));
31278
31279 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31280 gimple_set_bb (assign_stmt, new_bb);
31281 gimple_seq_add_stmt (&gseq, assign_stmt);
31282 }
31283 }
31284
31285 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31286 integer_zero_node,
31287 NULL_TREE, NULL_TREE);
31288 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31289 gimple_set_bb (if_else_stmt, new_bb);
31290 gimple_seq_add_stmt (&gseq, if_else_stmt);
31291
31292 gimple_seq_add_stmt (&gseq, convert_stmt);
31293 gimple_seq_add_stmt (&gseq, return_stmt);
31294 set_bb_seq (new_bb, gseq);
31295
31296 bb1 = new_bb;
31297 e12 = split_block (bb1, if_else_stmt);
31298 bb2 = e12->dest;
31299 e12->flags &= ~EDGE_FALLTHRU;
31300 e12->flags |= EDGE_TRUE_VALUE;
31301
31302 e23 = split_block (bb2, return_stmt);
31303
31304 gimple_set_bb (convert_stmt, bb2);
31305 gimple_set_bb (return_stmt, bb2);
31306
31307 bb3 = e23->dest;
31308 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31309
31310 remove_edge (e23);
31311 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31312
31313 pop_cfun ();
31314
31315 return bb3;
31316 }
31317
31318 /* This parses the attribute arguments to target in DECL and determines
31319 the right builtin to use to match the platform specification.
31320 It returns the priority value for this version decl. If PREDICATE_LIST
31321 is not NULL, it stores the list of cpu features that need to be checked
31322 before dispatching this function. */
31323
31324 static unsigned int
31325 get_builtin_code_for_version (tree decl, tree *predicate_list)
31326 {
31327 tree attrs;
31328 struct cl_target_option cur_target;
31329 tree target_node;
31330 struct cl_target_option *new_target;
31331 const char *arg_str = NULL;
31332 const char *attrs_str = NULL;
31333 char *tok_str = NULL;
31334 char *token;
31335
31336 /* Priority of i386 features, greater value is higher priority. This is
31337 used to decide the order in which function dispatch must happen. For
31338 instance, a version specialized for SSE4.2 should be checked for dispatch
31339 before a version for SSE3, as SSE4.2 implies SSE3. */
31340 enum feature_priority
31341 {
31342 P_ZERO = 0,
31343 P_MMX,
31344 P_SSE,
31345 P_SSE2,
31346 P_SSE3,
31347 P_SSSE3,
31348 P_PROC_SSSE3,
31349 P_SSE4_A,
31350 P_PROC_SSE4_A,
31351 P_SSE4_1,
31352 P_SSE4_2,
31353 P_PROC_SSE4_2,
31354 P_POPCNT,
31355 P_AVX,
31356 P_PROC_AVX,
31357 P_FMA4,
31358 P_XOP,
31359 P_PROC_XOP,
31360 P_FMA,
31361 P_PROC_FMA,
31362 P_AVX2,
31363 P_PROC_AVX2
31364 };
31365
31366 enum feature_priority priority = P_ZERO;
31367
31368 /* These are the target attribute strings for which a dispatcher is
31369 available, from fold_builtin_cpu. */
31370
31371 static struct _feature_list
31372 {
31373 const char *const name;
31374 const enum feature_priority priority;
31375 }
31376 const feature_list[] =
31377 {
31378 {"mmx", P_MMX},
31379 {"sse", P_SSE},
31380 {"sse2", P_SSE2},
31381 {"sse3", P_SSE3},
31382 {"sse4a", P_SSE4_A},
31383 {"ssse3", P_SSSE3},
31384 {"sse4.1", P_SSE4_1},
31385 {"sse4.2", P_SSE4_2},
31386 {"popcnt", P_POPCNT},
31387 {"avx", P_AVX},
31388 {"fma4", P_FMA4},
31389 {"xop", P_XOP},
31390 {"fma", P_FMA},
31391 {"avx2", P_AVX2}
31392 };
31393
31394
31395 static unsigned int NUM_FEATURES
31396 = sizeof (feature_list) / sizeof (struct _feature_list);
31397
31398 unsigned int i;
31399
31400 tree predicate_chain = NULL_TREE;
31401 tree predicate_decl, predicate_arg;
31402
31403 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31404 gcc_assert (attrs != NULL);
31405
31406 attrs = TREE_VALUE (TREE_VALUE (attrs));
31407
31408 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31409 attrs_str = TREE_STRING_POINTER (attrs);
31410
31411 /* Return priority zero for default function. */
31412 if (strcmp (attrs_str, "default") == 0)
31413 return 0;
31414
31415 /* Handle arch= if specified. For priority, set it to be 1 more than
31416 the best instruction set the processor can handle. For instance, if
31417 there is a version for atom and a version for ssse3 (the highest ISA
31418 priority for atom), the atom version must be checked for dispatch
31419 before the ssse3 version. */
31420 if (strstr (attrs_str, "arch=") != NULL)
31421 {
31422 cl_target_option_save (&cur_target, &global_options);
31423 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31424 &global_options_set);
31425
31426 gcc_assert (target_node);
31427 new_target = TREE_TARGET_OPTION (target_node);
31428 gcc_assert (new_target);
31429
31430 if (new_target->arch_specified && new_target->arch > 0)
31431 {
31432 switch (new_target->arch)
31433 {
31434 case PROCESSOR_CORE2:
31435 arg_str = "core2";
31436 priority = P_PROC_SSSE3;
31437 break;
31438 case PROCESSOR_NEHALEM:
31439 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31440 arg_str = "westmere";
31441 else
31442 /* We translate "arch=corei7" and "arch=nehalem" to
31443 "corei7" so that it will be mapped to M_INTEL_COREI7
31444 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31445 arg_str = "corei7";
31446 priority = P_PROC_SSE4_2;
31447 break;
31448 case PROCESSOR_SANDYBRIDGE:
31449 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31450 arg_str = "ivybridge";
31451 else
31452 arg_str = "sandybridge";
31453 priority = P_PROC_AVX;
31454 break;
31455 case PROCESSOR_HASWELL:
31456 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31457 arg_str = "broadwell";
31458 else
31459 arg_str = "haswell";
31460 priority = P_PROC_AVX2;
31461 break;
31462 case PROCESSOR_BONNELL:
31463 arg_str = "bonnell";
31464 priority = P_PROC_SSSE3;
31465 break;
31466 case PROCESSOR_SILVERMONT:
31467 arg_str = "silvermont";
31468 priority = P_PROC_SSE4_2;
31469 break;
31470 case PROCESSOR_AMDFAM10:
31471 arg_str = "amdfam10h";
31472 priority = P_PROC_SSE4_A;
31473 break;
31474 case PROCESSOR_BTVER1:
31475 arg_str = "btver1";
31476 priority = P_PROC_SSE4_A;
31477 break;
31478 case PROCESSOR_BTVER2:
31479 arg_str = "btver2";
31480 priority = P_PROC_AVX;
31481 break;
31482 case PROCESSOR_BDVER1:
31483 arg_str = "bdver1";
31484 priority = P_PROC_XOP;
31485 break;
31486 case PROCESSOR_BDVER2:
31487 arg_str = "bdver2";
31488 priority = P_PROC_FMA;
31489 break;
31490 case PROCESSOR_BDVER3:
31491 arg_str = "bdver3";
31492 priority = P_PROC_FMA;
31493 break;
31494 case PROCESSOR_BDVER4:
31495 arg_str = "bdver4";
31496 priority = P_PROC_AVX2;
31497 break;
31498 }
31499 }
31500
31501 cl_target_option_restore (&global_options, &cur_target);
31502
31503 if (predicate_list && arg_str == NULL)
31504 {
31505 error_at (DECL_SOURCE_LOCATION (decl),
31506 "No dispatcher found for the versioning attributes");
31507 return 0;
31508 }
31509
31510 if (predicate_list)
31511 {
31512 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31513 /* For a C string literal the length includes the trailing NULL. */
31514 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31515 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31516 predicate_chain);
31517 }
31518 }
31519
31520 /* Process feature name. */
31521 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31522 strcpy (tok_str, attrs_str);
31523 token = strtok (tok_str, ",");
31524 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31525
31526 while (token != NULL)
31527 {
31528 /* Do not process "arch=" */
31529 if (strncmp (token, "arch=", 5) == 0)
31530 {
31531 token = strtok (NULL, ",");
31532 continue;
31533 }
31534 for (i = 0; i < NUM_FEATURES; ++i)
31535 {
31536 if (strcmp (token, feature_list[i].name) == 0)
31537 {
31538 if (predicate_list)
31539 {
31540 predicate_arg = build_string_literal (
31541 strlen (feature_list[i].name) + 1,
31542 feature_list[i].name);
31543 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31544 predicate_chain);
31545 }
31546 /* Find the maximum priority feature. */
31547 if (feature_list[i].priority > priority)
31548 priority = feature_list[i].priority;
31549
31550 break;
31551 }
31552 }
31553 if (predicate_list && i == NUM_FEATURES)
31554 {
31555 error_at (DECL_SOURCE_LOCATION (decl),
31556 "No dispatcher found for %s", token);
31557 return 0;
31558 }
31559 token = strtok (NULL, ",");
31560 }
31561 free (tok_str);
31562
31563 if (predicate_list && predicate_chain == NULL_TREE)
31564 {
31565 error_at (DECL_SOURCE_LOCATION (decl),
31566 "No dispatcher found for the versioning attributes : %s",
31567 attrs_str);
31568 return 0;
31569 }
31570 else if (predicate_list)
31571 {
31572 predicate_chain = nreverse (predicate_chain);
31573 *predicate_list = predicate_chain;
31574 }
31575
31576 return priority;
31577 }
31578
31579 /* This compares the priority of target features in function DECL1
31580 and DECL2. It returns positive value if DECL1 is higher priority,
31581 negative value if DECL2 is higher priority and 0 if they are the
31582 same. */
31583
31584 static int
31585 ix86_compare_version_priority (tree decl1, tree decl2)
31586 {
31587 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31588 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31589
31590 return (int)priority1 - (int)priority2;
31591 }
31592
31593 /* V1 and V2 point to function versions with different priorities
31594 based on the target ISA. This function compares their priorities. */
31595
31596 static int
31597 feature_compare (const void *v1, const void *v2)
31598 {
31599 typedef struct _function_version_info
31600 {
31601 tree version_decl;
31602 tree predicate_chain;
31603 unsigned int dispatch_priority;
31604 } function_version_info;
31605
31606 const function_version_info c1 = *(const function_version_info *)v1;
31607 const function_version_info c2 = *(const function_version_info *)v2;
31608 return (c2.dispatch_priority - c1.dispatch_priority);
31609 }
31610
31611 /* This function generates the dispatch function for
31612 multi-versioned functions. DISPATCH_DECL is the function which will
31613 contain the dispatch logic. FNDECLS are the function choices for
31614 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31615 in DISPATCH_DECL in which the dispatch code is generated. */
31616
31617 static int
31618 dispatch_function_versions (tree dispatch_decl,
31619 void *fndecls_p,
31620 basic_block *empty_bb)
31621 {
31622 tree default_decl;
31623 gimple ifunc_cpu_init_stmt;
31624 gimple_seq gseq;
31625 int ix;
31626 tree ele;
31627 vec<tree> *fndecls;
31628 unsigned int num_versions = 0;
31629 unsigned int actual_versions = 0;
31630 unsigned int i;
31631
31632 struct _function_version_info
31633 {
31634 tree version_decl;
31635 tree predicate_chain;
31636 unsigned int dispatch_priority;
31637 }*function_version_info;
31638
31639 gcc_assert (dispatch_decl != NULL
31640 && fndecls_p != NULL
31641 && empty_bb != NULL);
31642
31643 /*fndecls_p is actually a vector. */
31644 fndecls = static_cast<vec<tree> *> (fndecls_p);
31645
31646 /* At least one more version other than the default. */
31647 num_versions = fndecls->length ();
31648 gcc_assert (num_versions >= 2);
31649
31650 function_version_info = (struct _function_version_info *)
31651 XNEWVEC (struct _function_version_info, (num_versions - 1));
31652
31653 /* The first version in the vector is the default decl. */
31654 default_decl = (*fndecls)[0];
31655
31656 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31657
31658 gseq = bb_seq (*empty_bb);
31659 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31660 constructors, so explicity call __builtin_cpu_init here. */
31661 ifunc_cpu_init_stmt = gimple_build_call_vec (
31662 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31663 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31664 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31665 set_bb_seq (*empty_bb, gseq);
31666
31667 pop_cfun ();
31668
31669
31670 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31671 {
31672 tree version_decl = ele;
31673 tree predicate_chain = NULL_TREE;
31674 unsigned int priority;
31675 /* Get attribute string, parse it and find the right predicate decl.
31676 The predicate function could be a lengthy combination of many
31677 features, like arch-type and various isa-variants. */
31678 priority = get_builtin_code_for_version (version_decl,
31679 &predicate_chain);
31680
31681 if (predicate_chain == NULL_TREE)
31682 continue;
31683
31684 function_version_info [actual_versions].version_decl = version_decl;
31685 function_version_info [actual_versions].predicate_chain
31686 = predicate_chain;
31687 function_version_info [actual_versions].dispatch_priority = priority;
31688 actual_versions++;
31689 }
31690
31691 /* Sort the versions according to descending order of dispatch priority. The
31692 priority is based on the ISA. This is not a perfect solution. There
31693 could still be ambiguity. If more than one function version is suitable
31694 to execute, which one should be dispatched? In future, allow the user
31695 to specify a dispatch priority next to the version. */
31696 qsort (function_version_info, actual_versions,
31697 sizeof (struct _function_version_info), feature_compare);
31698
31699 for (i = 0; i < actual_versions; ++i)
31700 *empty_bb = add_condition_to_bb (dispatch_decl,
31701 function_version_info[i].version_decl,
31702 function_version_info[i].predicate_chain,
31703 *empty_bb);
31704
31705 /* dispatch default version at the end. */
31706 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31707 NULL, *empty_bb);
31708
31709 free (function_version_info);
31710 return 0;
31711 }
31712
31713 /* Comparator function to be used in qsort routine to sort attribute
31714 specification strings to "target". */
31715
31716 static int
31717 attr_strcmp (const void *v1, const void *v2)
31718 {
31719 const char *c1 = *(char *const*)v1;
31720 const char *c2 = *(char *const*)v2;
31721 return strcmp (c1, c2);
31722 }
31723
31724 /* ARGLIST is the argument to target attribute. This function tokenizes
31725 the comma separated arguments, sorts them and returns a string which
31726 is a unique identifier for the comma separated arguments. It also
31727 replaces non-identifier characters "=,-" with "_". */
31728
31729 static char *
31730 sorted_attr_string (tree arglist)
31731 {
31732 tree arg;
31733 size_t str_len_sum = 0;
31734 char **args = NULL;
31735 char *attr_str, *ret_str;
31736 char *attr = NULL;
31737 unsigned int argnum = 1;
31738 unsigned int i;
31739
31740 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31741 {
31742 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31743 size_t len = strlen (str);
31744 str_len_sum += len + 1;
31745 if (arg != arglist)
31746 argnum++;
31747 for (i = 0; i < strlen (str); i++)
31748 if (str[i] == ',')
31749 argnum++;
31750 }
31751
31752 attr_str = XNEWVEC (char, str_len_sum);
31753 str_len_sum = 0;
31754 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31755 {
31756 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31757 size_t len = strlen (str);
31758 memcpy (attr_str + str_len_sum, str, len);
31759 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31760 str_len_sum += len + 1;
31761 }
31762
31763 /* Replace "=,-" with "_". */
31764 for (i = 0; i < strlen (attr_str); i++)
31765 if (attr_str[i] == '=' || attr_str[i]== '-')
31766 attr_str[i] = '_';
31767
31768 if (argnum == 1)
31769 return attr_str;
31770
31771 args = XNEWVEC (char *, argnum);
31772
31773 i = 0;
31774 attr = strtok (attr_str, ",");
31775 while (attr != NULL)
31776 {
31777 args[i] = attr;
31778 i++;
31779 attr = strtok (NULL, ",");
31780 }
31781
31782 qsort (args, argnum, sizeof (char *), attr_strcmp);
31783
31784 ret_str = XNEWVEC (char, str_len_sum);
31785 str_len_sum = 0;
31786 for (i = 0; i < argnum; i++)
31787 {
31788 size_t len = strlen (args[i]);
31789 memcpy (ret_str + str_len_sum, args[i], len);
31790 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31791 str_len_sum += len + 1;
31792 }
31793
31794 XDELETEVEC (args);
31795 XDELETEVEC (attr_str);
31796 return ret_str;
31797 }
31798
31799 /* This function changes the assembler name for functions that are
31800 versions. If DECL is a function version and has a "target"
31801 attribute, it appends the attribute string to its assembler name. */
31802
31803 static tree
31804 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31805 {
31806 tree version_attr;
31807 const char *orig_name, *version_string;
31808 char *attr_str, *assembler_name;
31809
31810 if (DECL_DECLARED_INLINE_P (decl)
31811 && lookup_attribute ("gnu_inline",
31812 DECL_ATTRIBUTES (decl)))
31813 error_at (DECL_SOURCE_LOCATION (decl),
31814 "Function versions cannot be marked as gnu_inline,"
31815 " bodies have to be generated");
31816
31817 if (DECL_VIRTUAL_P (decl)
31818 || DECL_VINDEX (decl))
31819 sorry ("Virtual function multiversioning not supported");
31820
31821 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31822
31823 /* target attribute string cannot be NULL. */
31824 gcc_assert (version_attr != NULL_TREE);
31825
31826 orig_name = IDENTIFIER_POINTER (id);
31827 version_string
31828 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31829
31830 if (strcmp (version_string, "default") == 0)
31831 return id;
31832
31833 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31834 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31835
31836 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31837
31838 /* Allow assembler name to be modified if already set. */
31839 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31840 SET_DECL_RTL (decl, NULL);
31841
31842 tree ret = get_identifier (assembler_name);
31843 XDELETEVEC (attr_str);
31844 XDELETEVEC (assembler_name);
31845 return ret;
31846 }
31847
31848 /* This function returns true if FN1 and FN2 are versions of the same function,
31849 that is, the target strings of the function decls are different. This assumes
31850 that FN1 and FN2 have the same signature. */
31851
31852 static bool
31853 ix86_function_versions (tree fn1, tree fn2)
31854 {
31855 tree attr1, attr2;
31856 char *target1, *target2;
31857 bool result;
31858
31859 if (TREE_CODE (fn1) != FUNCTION_DECL
31860 || TREE_CODE (fn2) != FUNCTION_DECL)
31861 return false;
31862
31863 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31864 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31865
31866 /* At least one function decl should have the target attribute specified. */
31867 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31868 return false;
31869
31870 /* Diagnose missing target attribute if one of the decls is already
31871 multi-versioned. */
31872 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31873 {
31874 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31875 {
31876 if (attr2 != NULL_TREE)
31877 {
31878 tree tem = fn1;
31879 fn1 = fn2;
31880 fn2 = tem;
31881 attr1 = attr2;
31882 }
31883 error_at (DECL_SOURCE_LOCATION (fn2),
31884 "missing %<target%> attribute for multi-versioned %D",
31885 fn2);
31886 inform (DECL_SOURCE_LOCATION (fn1),
31887 "previous declaration of %D", fn1);
31888 /* Prevent diagnosing of the same error multiple times. */
31889 DECL_ATTRIBUTES (fn2)
31890 = tree_cons (get_identifier ("target"),
31891 copy_node (TREE_VALUE (attr1)),
31892 DECL_ATTRIBUTES (fn2));
31893 }
31894 return false;
31895 }
31896
31897 target1 = sorted_attr_string (TREE_VALUE (attr1));
31898 target2 = sorted_attr_string (TREE_VALUE (attr2));
31899
31900 /* The sorted target strings must be different for fn1 and fn2
31901 to be versions. */
31902 if (strcmp (target1, target2) == 0)
31903 result = false;
31904 else
31905 result = true;
31906
31907 XDELETEVEC (target1);
31908 XDELETEVEC (target2);
31909
31910 return result;
31911 }
31912
31913 static tree
31914 ix86_mangle_decl_assembler_name (tree decl, tree id)
31915 {
31916 /* For function version, add the target suffix to the assembler name. */
31917 if (TREE_CODE (decl) == FUNCTION_DECL
31918 && DECL_FUNCTION_VERSIONED (decl))
31919 id = ix86_mangle_function_version_assembler_name (decl, id);
31920 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31921 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31922 #endif
31923
31924 return id;
31925 }
31926
31927 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31928 is true, append the full path name of the source file. */
31929
31930 static char *
31931 make_name (tree decl, const char *suffix, bool make_unique)
31932 {
31933 char *global_var_name;
31934 int name_len;
31935 const char *name;
31936 const char *unique_name = NULL;
31937
31938 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31939
31940 /* Get a unique name that can be used globally without any chances
31941 of collision at link time. */
31942 if (make_unique)
31943 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31944
31945 name_len = strlen (name) + strlen (suffix) + 2;
31946
31947 if (make_unique)
31948 name_len += strlen (unique_name) + 1;
31949 global_var_name = XNEWVEC (char, name_len);
31950
31951 /* Use '.' to concatenate names as it is demangler friendly. */
31952 if (make_unique)
31953 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31954 suffix);
31955 else
31956 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31957
31958 return global_var_name;
31959 }
31960
31961 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31962
31963 /* Make a dispatcher declaration for the multi-versioned function DECL.
31964 Calls to DECL function will be replaced with calls to the dispatcher
31965 by the front-end. Return the decl created. */
31966
31967 static tree
31968 make_dispatcher_decl (const tree decl)
31969 {
31970 tree func_decl;
31971 char *func_name;
31972 tree fn_type, func_type;
31973 bool is_uniq = false;
31974
31975 if (TREE_PUBLIC (decl) == 0)
31976 is_uniq = true;
31977
31978 func_name = make_name (decl, "ifunc", is_uniq);
31979
31980 fn_type = TREE_TYPE (decl);
31981 func_type = build_function_type (TREE_TYPE (fn_type),
31982 TYPE_ARG_TYPES (fn_type));
31983
31984 func_decl = build_fn_decl (func_name, func_type);
31985 XDELETEVEC (func_name);
31986 TREE_USED (func_decl) = 1;
31987 DECL_CONTEXT (func_decl) = NULL_TREE;
31988 DECL_INITIAL (func_decl) = error_mark_node;
31989 DECL_ARTIFICIAL (func_decl) = 1;
31990 /* Mark this func as external, the resolver will flip it again if
31991 it gets generated. */
31992 DECL_EXTERNAL (func_decl) = 1;
31993 /* This will be of type IFUNCs have to be externally visible. */
31994 TREE_PUBLIC (func_decl) = 1;
31995
31996 return func_decl;
31997 }
31998
31999 #endif
32000
32001 /* Returns true if decl is multi-versioned and DECL is the default function,
32002 that is it is not tagged with target specific optimization. */
32003
32004 static bool
32005 is_function_default_version (const tree decl)
32006 {
32007 if (TREE_CODE (decl) != FUNCTION_DECL
32008 || !DECL_FUNCTION_VERSIONED (decl))
32009 return false;
32010 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32011 gcc_assert (attr);
32012 attr = TREE_VALUE (TREE_VALUE (attr));
32013 return (TREE_CODE (attr) == STRING_CST
32014 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32015 }
32016
32017 /* Make a dispatcher declaration for the multi-versioned function DECL.
32018 Calls to DECL function will be replaced with calls to the dispatcher
32019 by the front-end. Returns the decl of the dispatcher function. */
32020
32021 static tree
32022 ix86_get_function_versions_dispatcher (void *decl)
32023 {
32024 tree fn = (tree) decl;
32025 struct cgraph_node *node = NULL;
32026 struct cgraph_node *default_node = NULL;
32027 struct cgraph_function_version_info *node_v = NULL;
32028 struct cgraph_function_version_info *first_v = NULL;
32029
32030 tree dispatch_decl = NULL;
32031
32032 struct cgraph_function_version_info *default_version_info = NULL;
32033
32034 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32035
32036 node = cgraph_get_node (fn);
32037 gcc_assert (node != NULL);
32038
32039 node_v = get_cgraph_node_version (node);
32040 gcc_assert (node_v != NULL);
32041
32042 if (node_v->dispatcher_resolver != NULL)
32043 return node_v->dispatcher_resolver;
32044
32045 /* Find the default version and make it the first node. */
32046 first_v = node_v;
32047 /* Go to the beginning of the chain. */
32048 while (first_v->prev != NULL)
32049 first_v = first_v->prev;
32050 default_version_info = first_v;
32051 while (default_version_info != NULL)
32052 {
32053 if (is_function_default_version
32054 (default_version_info->this_node->decl))
32055 break;
32056 default_version_info = default_version_info->next;
32057 }
32058
32059 /* If there is no default node, just return NULL. */
32060 if (default_version_info == NULL)
32061 return NULL;
32062
32063 /* Make default info the first node. */
32064 if (first_v != default_version_info)
32065 {
32066 default_version_info->prev->next = default_version_info->next;
32067 if (default_version_info->next)
32068 default_version_info->next->prev = default_version_info->prev;
32069 first_v->prev = default_version_info;
32070 default_version_info->next = first_v;
32071 default_version_info->prev = NULL;
32072 }
32073
32074 default_node = default_version_info->this_node;
32075
32076 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32077 if (targetm.has_ifunc_p ())
32078 {
32079 struct cgraph_function_version_info *it_v = NULL;
32080 struct cgraph_node *dispatcher_node = NULL;
32081 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32082
32083 /* Right now, the dispatching is done via ifunc. */
32084 dispatch_decl = make_dispatcher_decl (default_node->decl);
32085
32086 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32087 gcc_assert (dispatcher_node != NULL);
32088 dispatcher_node->dispatcher_function = 1;
32089 dispatcher_version_info
32090 = insert_new_cgraph_node_version (dispatcher_node);
32091 dispatcher_version_info->next = default_version_info;
32092 dispatcher_node->definition = 1;
32093
32094 /* Set the dispatcher for all the versions. */
32095 it_v = default_version_info;
32096 while (it_v != NULL)
32097 {
32098 it_v->dispatcher_resolver = dispatch_decl;
32099 it_v = it_v->next;
32100 }
32101 }
32102 else
32103 #endif
32104 {
32105 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32106 "multiversioning needs ifunc which is not supported "
32107 "on this target");
32108 }
32109
32110 return dispatch_decl;
32111 }
32112
32113 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32114 it to CHAIN. */
32115
32116 static tree
32117 make_attribute (const char *name, const char *arg_name, tree chain)
32118 {
32119 tree attr_name;
32120 tree attr_arg_name;
32121 tree attr_args;
32122 tree attr;
32123
32124 attr_name = get_identifier (name);
32125 attr_arg_name = build_string (strlen (arg_name), arg_name);
32126 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32127 attr = tree_cons (attr_name, attr_args, chain);
32128 return attr;
32129 }
32130
32131 /* Make the resolver function decl to dispatch the versions of
32132 a multi-versioned function, DEFAULT_DECL. Create an
32133 empty basic block in the resolver and store the pointer in
32134 EMPTY_BB. Return the decl of the resolver function. */
32135
32136 static tree
32137 make_resolver_func (const tree default_decl,
32138 const tree dispatch_decl,
32139 basic_block *empty_bb)
32140 {
32141 char *resolver_name;
32142 tree decl, type, decl_name, t;
32143 bool is_uniq = false;
32144
32145 /* IFUNC's have to be globally visible. So, if the default_decl is
32146 not, then the name of the IFUNC should be made unique. */
32147 if (TREE_PUBLIC (default_decl) == 0)
32148 is_uniq = true;
32149
32150 /* Append the filename to the resolver function if the versions are
32151 not externally visible. This is because the resolver function has
32152 to be externally visible for the loader to find it. So, appending
32153 the filename will prevent conflicts with a resolver function from
32154 another module which is based on the same version name. */
32155 resolver_name = make_name (default_decl, "resolver", is_uniq);
32156
32157 /* The resolver function should return a (void *). */
32158 type = build_function_type_list (ptr_type_node, NULL_TREE);
32159
32160 decl = build_fn_decl (resolver_name, type);
32161 decl_name = get_identifier (resolver_name);
32162 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32163
32164 DECL_NAME (decl) = decl_name;
32165 TREE_USED (decl) = 1;
32166 DECL_ARTIFICIAL (decl) = 1;
32167 DECL_IGNORED_P (decl) = 0;
32168 /* IFUNC resolvers have to be externally visible. */
32169 TREE_PUBLIC (decl) = 1;
32170 DECL_UNINLINABLE (decl) = 1;
32171
32172 /* Resolver is not external, body is generated. */
32173 DECL_EXTERNAL (decl) = 0;
32174 DECL_EXTERNAL (dispatch_decl) = 0;
32175
32176 DECL_CONTEXT (decl) = NULL_TREE;
32177 DECL_INITIAL (decl) = make_node (BLOCK);
32178 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32179
32180 if (DECL_COMDAT_GROUP (default_decl)
32181 || TREE_PUBLIC (default_decl))
32182 {
32183 /* In this case, each translation unit with a call to this
32184 versioned function will put out a resolver. Ensure it
32185 is comdat to keep just one copy. */
32186 DECL_COMDAT (decl) = 1;
32187 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32188 }
32189 /* Build result decl and add to function_decl. */
32190 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32191 DECL_ARTIFICIAL (t) = 1;
32192 DECL_IGNORED_P (t) = 1;
32193 DECL_RESULT (decl) = t;
32194
32195 gimplify_function_tree (decl);
32196 push_cfun (DECL_STRUCT_FUNCTION (decl));
32197 *empty_bb = init_lowered_empty_function (decl, false);
32198
32199 cgraph_add_new_function (decl, true);
32200 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32201
32202 pop_cfun ();
32203
32204 gcc_assert (dispatch_decl != NULL);
32205 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32206 DECL_ATTRIBUTES (dispatch_decl)
32207 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32208
32209 /* Create the alias for dispatch to resolver here. */
32210 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32211 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32212 XDELETEVEC (resolver_name);
32213 return decl;
32214 }
32215
32216 /* Generate the dispatching code body to dispatch multi-versioned function
32217 DECL. The target hook is called to process the "target" attributes and
32218 provide the code to dispatch the right function at run-time. NODE points
32219 to the dispatcher decl whose body will be created. */
32220
32221 static tree
32222 ix86_generate_version_dispatcher_body (void *node_p)
32223 {
32224 tree resolver_decl;
32225 basic_block empty_bb;
32226 tree default_ver_decl;
32227 struct cgraph_node *versn;
32228 struct cgraph_node *node;
32229
32230 struct cgraph_function_version_info *node_version_info = NULL;
32231 struct cgraph_function_version_info *versn_info = NULL;
32232
32233 node = (cgraph_node *)node_p;
32234
32235 node_version_info = get_cgraph_node_version (node);
32236 gcc_assert (node->dispatcher_function
32237 && node_version_info != NULL);
32238
32239 if (node_version_info->dispatcher_resolver)
32240 return node_version_info->dispatcher_resolver;
32241
32242 /* The first version in the chain corresponds to the default version. */
32243 default_ver_decl = node_version_info->next->this_node->decl;
32244
32245 /* node is going to be an alias, so remove the finalized bit. */
32246 node->definition = false;
32247
32248 resolver_decl = make_resolver_func (default_ver_decl,
32249 node->decl, &empty_bb);
32250
32251 node_version_info->dispatcher_resolver = resolver_decl;
32252
32253 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32254
32255 auto_vec<tree, 2> fn_ver_vec;
32256
32257 for (versn_info = node_version_info->next; versn_info;
32258 versn_info = versn_info->next)
32259 {
32260 versn = versn_info->this_node;
32261 /* Check for virtual functions here again, as by this time it should
32262 have been determined if this function needs a vtable index or
32263 not. This happens for methods in derived classes that override
32264 virtual methods in base classes but are not explicitly marked as
32265 virtual. */
32266 if (DECL_VINDEX (versn->decl))
32267 sorry ("Virtual function multiversioning not supported");
32268
32269 fn_ver_vec.safe_push (versn->decl);
32270 }
32271
32272 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32273 rebuild_cgraph_edges ();
32274 pop_cfun ();
32275 return resolver_decl;
32276 }
32277 /* This builds the processor_model struct type defined in
32278 libgcc/config/i386/cpuinfo.c */
32279
32280 static tree
32281 build_processor_model_struct (void)
32282 {
32283 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32284 "__cpu_features"};
32285 tree field = NULL_TREE, field_chain = NULL_TREE;
32286 int i;
32287 tree type = make_node (RECORD_TYPE);
32288
32289 /* The first 3 fields are unsigned int. */
32290 for (i = 0; i < 3; ++i)
32291 {
32292 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32293 get_identifier (field_name[i]), unsigned_type_node);
32294 if (field_chain != NULL_TREE)
32295 DECL_CHAIN (field) = field_chain;
32296 field_chain = field;
32297 }
32298
32299 /* The last field is an array of unsigned integers of size one. */
32300 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32301 get_identifier (field_name[3]),
32302 build_array_type (unsigned_type_node,
32303 build_index_type (size_one_node)));
32304 if (field_chain != NULL_TREE)
32305 DECL_CHAIN (field) = field_chain;
32306 field_chain = field;
32307
32308 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32309 return type;
32310 }
32311
32312 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32313
32314 static tree
32315 make_var_decl (tree type, const char *name)
32316 {
32317 tree new_decl;
32318
32319 new_decl = build_decl (UNKNOWN_LOCATION,
32320 VAR_DECL,
32321 get_identifier(name),
32322 type);
32323
32324 DECL_EXTERNAL (new_decl) = 1;
32325 TREE_STATIC (new_decl) = 1;
32326 TREE_PUBLIC (new_decl) = 1;
32327 DECL_INITIAL (new_decl) = 0;
32328 DECL_ARTIFICIAL (new_decl) = 0;
32329 DECL_PRESERVE_P (new_decl) = 1;
32330
32331 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32332 assemble_variable (new_decl, 0, 0, 0);
32333
32334 return new_decl;
32335 }
32336
32337 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32338 into an integer defined in libgcc/config/i386/cpuinfo.c */
32339
32340 static tree
32341 fold_builtin_cpu (tree fndecl, tree *args)
32342 {
32343 unsigned int i;
32344 enum ix86_builtins fn_code = (enum ix86_builtins)
32345 DECL_FUNCTION_CODE (fndecl);
32346 tree param_string_cst = NULL;
32347
32348 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32349 enum processor_features
32350 {
32351 F_CMOV = 0,
32352 F_MMX,
32353 F_POPCNT,
32354 F_SSE,
32355 F_SSE2,
32356 F_SSE3,
32357 F_SSSE3,
32358 F_SSE4_1,
32359 F_SSE4_2,
32360 F_AVX,
32361 F_AVX2,
32362 F_SSE4_A,
32363 F_FMA4,
32364 F_XOP,
32365 F_FMA,
32366 F_MAX
32367 };
32368
32369 /* These are the values for vendor types and cpu types and subtypes
32370 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32371 the corresponding start value. */
32372 enum processor_model
32373 {
32374 M_INTEL = 1,
32375 M_AMD,
32376 M_CPU_TYPE_START,
32377 M_INTEL_BONNELL,
32378 M_INTEL_CORE2,
32379 M_INTEL_COREI7,
32380 M_AMDFAM10H,
32381 M_AMDFAM15H,
32382 M_INTEL_SILVERMONT,
32383 M_AMD_BTVER1,
32384 M_AMD_BTVER2,
32385 M_CPU_SUBTYPE_START,
32386 M_INTEL_COREI7_NEHALEM,
32387 M_INTEL_COREI7_WESTMERE,
32388 M_INTEL_COREI7_SANDYBRIDGE,
32389 M_AMDFAM10H_BARCELONA,
32390 M_AMDFAM10H_SHANGHAI,
32391 M_AMDFAM10H_ISTANBUL,
32392 M_AMDFAM15H_BDVER1,
32393 M_AMDFAM15H_BDVER2,
32394 M_AMDFAM15H_BDVER3,
32395 M_AMDFAM15H_BDVER4,
32396 M_INTEL_COREI7_IVYBRIDGE,
32397 M_INTEL_COREI7_HASWELL
32398 };
32399
32400 static struct _arch_names_table
32401 {
32402 const char *const name;
32403 const enum processor_model model;
32404 }
32405 const arch_names_table[] =
32406 {
32407 {"amd", M_AMD},
32408 {"intel", M_INTEL},
32409 {"atom", M_INTEL_BONNELL},
32410 {"slm", M_INTEL_SILVERMONT},
32411 {"core2", M_INTEL_CORE2},
32412 {"corei7", M_INTEL_COREI7},
32413 {"nehalem", M_INTEL_COREI7_NEHALEM},
32414 {"westmere", M_INTEL_COREI7_WESTMERE},
32415 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32416 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32417 {"haswell", M_INTEL_COREI7_HASWELL},
32418 {"bonnell", M_INTEL_BONNELL},
32419 {"silvermont", M_INTEL_SILVERMONT},
32420 {"amdfam10h", M_AMDFAM10H},
32421 {"barcelona", M_AMDFAM10H_BARCELONA},
32422 {"shanghai", M_AMDFAM10H_SHANGHAI},
32423 {"istanbul", M_AMDFAM10H_ISTANBUL},
32424 {"btver1", M_AMD_BTVER1},
32425 {"amdfam15h", M_AMDFAM15H},
32426 {"bdver1", M_AMDFAM15H_BDVER1},
32427 {"bdver2", M_AMDFAM15H_BDVER2},
32428 {"bdver3", M_AMDFAM15H_BDVER3},
32429 {"bdver4", M_AMDFAM15H_BDVER4},
32430 {"btver2", M_AMD_BTVER2},
32431 };
32432
32433 static struct _isa_names_table
32434 {
32435 const char *const name;
32436 const enum processor_features feature;
32437 }
32438 const isa_names_table[] =
32439 {
32440 {"cmov", F_CMOV},
32441 {"mmx", F_MMX},
32442 {"popcnt", F_POPCNT},
32443 {"sse", F_SSE},
32444 {"sse2", F_SSE2},
32445 {"sse3", F_SSE3},
32446 {"ssse3", F_SSSE3},
32447 {"sse4a", F_SSE4_A},
32448 {"sse4.1", F_SSE4_1},
32449 {"sse4.2", F_SSE4_2},
32450 {"avx", F_AVX},
32451 {"fma4", F_FMA4},
32452 {"xop", F_XOP},
32453 {"fma", F_FMA},
32454 {"avx2", F_AVX2}
32455 };
32456
32457 tree __processor_model_type = build_processor_model_struct ();
32458 tree __cpu_model_var = make_var_decl (__processor_model_type,
32459 "__cpu_model");
32460
32461
32462 varpool_add_new_variable (__cpu_model_var);
32463
32464 gcc_assert ((args != NULL) && (*args != NULL));
32465
32466 param_string_cst = *args;
32467 while (param_string_cst
32468 && TREE_CODE (param_string_cst) != STRING_CST)
32469 {
32470 /* *args must be a expr that can contain other EXPRS leading to a
32471 STRING_CST. */
32472 if (!EXPR_P (param_string_cst))
32473 {
32474 error ("Parameter to builtin must be a string constant or literal");
32475 return integer_zero_node;
32476 }
32477 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32478 }
32479
32480 gcc_assert (param_string_cst);
32481
32482 if (fn_code == IX86_BUILTIN_CPU_IS)
32483 {
32484 tree ref;
32485 tree field;
32486 tree final;
32487
32488 unsigned int field_val = 0;
32489 unsigned int NUM_ARCH_NAMES
32490 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32491
32492 for (i = 0; i < NUM_ARCH_NAMES; i++)
32493 if (strcmp (arch_names_table[i].name,
32494 TREE_STRING_POINTER (param_string_cst)) == 0)
32495 break;
32496
32497 if (i == NUM_ARCH_NAMES)
32498 {
32499 error ("Parameter to builtin not valid: %s",
32500 TREE_STRING_POINTER (param_string_cst));
32501 return integer_zero_node;
32502 }
32503
32504 field = TYPE_FIELDS (__processor_model_type);
32505 field_val = arch_names_table[i].model;
32506
32507 /* CPU types are stored in the next field. */
32508 if (field_val > M_CPU_TYPE_START
32509 && field_val < M_CPU_SUBTYPE_START)
32510 {
32511 field = DECL_CHAIN (field);
32512 field_val -= M_CPU_TYPE_START;
32513 }
32514
32515 /* CPU subtypes are stored in the next field. */
32516 if (field_val > M_CPU_SUBTYPE_START)
32517 {
32518 field = DECL_CHAIN ( DECL_CHAIN (field));
32519 field_val -= M_CPU_SUBTYPE_START;
32520 }
32521
32522 /* Get the appropriate field in __cpu_model. */
32523 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32524 field, NULL_TREE);
32525
32526 /* Check the value. */
32527 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32528 build_int_cstu (unsigned_type_node, field_val));
32529 return build1 (CONVERT_EXPR, integer_type_node, final);
32530 }
32531 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32532 {
32533 tree ref;
32534 tree array_elt;
32535 tree field;
32536 tree final;
32537
32538 unsigned int field_val = 0;
32539 unsigned int NUM_ISA_NAMES
32540 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32541
32542 for (i = 0; i < NUM_ISA_NAMES; i++)
32543 if (strcmp (isa_names_table[i].name,
32544 TREE_STRING_POINTER (param_string_cst)) == 0)
32545 break;
32546
32547 if (i == NUM_ISA_NAMES)
32548 {
32549 error ("Parameter to builtin not valid: %s",
32550 TREE_STRING_POINTER (param_string_cst));
32551 return integer_zero_node;
32552 }
32553
32554 field = TYPE_FIELDS (__processor_model_type);
32555 /* Get the last field, which is __cpu_features. */
32556 while (DECL_CHAIN (field))
32557 field = DECL_CHAIN (field);
32558
32559 /* Get the appropriate field: __cpu_model.__cpu_features */
32560 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32561 field, NULL_TREE);
32562
32563 /* Access the 0th element of __cpu_features array. */
32564 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32565 integer_zero_node, NULL_TREE, NULL_TREE);
32566
32567 field_val = (1 << isa_names_table[i].feature);
32568 /* Return __cpu_model.__cpu_features[0] & field_val */
32569 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32570 build_int_cstu (unsigned_type_node, field_val));
32571 return build1 (CONVERT_EXPR, integer_type_node, final);
32572 }
32573 gcc_unreachable ();
32574 }
32575
32576 static tree
32577 ix86_fold_builtin (tree fndecl, int n_args,
32578 tree *args, bool ignore ATTRIBUTE_UNUSED)
32579 {
32580 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32581 {
32582 enum ix86_builtins fn_code = (enum ix86_builtins)
32583 DECL_FUNCTION_CODE (fndecl);
32584 if (fn_code == IX86_BUILTIN_CPU_IS
32585 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32586 {
32587 gcc_assert (n_args == 1);
32588 return fold_builtin_cpu (fndecl, args);
32589 }
32590 }
32591
32592 #ifdef SUBTARGET_FOLD_BUILTIN
32593 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32594 #endif
32595
32596 return NULL_TREE;
32597 }
32598
32599 /* Make builtins to detect cpu type and features supported. NAME is
32600 the builtin name, CODE is the builtin code, and FTYPE is the function
32601 type of the builtin. */
32602
32603 static void
32604 make_cpu_type_builtin (const char* name, int code,
32605 enum ix86_builtin_func_type ftype, bool is_const)
32606 {
32607 tree decl;
32608 tree type;
32609
32610 type = ix86_get_builtin_func_type (ftype);
32611 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32612 NULL, NULL_TREE);
32613 gcc_assert (decl != NULL_TREE);
32614 ix86_builtins[(int) code] = decl;
32615 TREE_READONLY (decl) = is_const;
32616 }
32617
32618 /* Make builtins to get CPU type and features supported. The created
32619 builtins are :
32620
32621 __builtin_cpu_init (), to detect cpu type and features,
32622 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32623 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32624 */
32625
32626 static void
32627 ix86_init_platform_type_builtins (void)
32628 {
32629 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32630 INT_FTYPE_VOID, false);
32631 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32632 INT_FTYPE_PCCHAR, true);
32633 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32634 INT_FTYPE_PCCHAR, true);
32635 }
32636
32637 /* Internal method for ix86_init_builtins. */
32638
32639 static void
32640 ix86_init_builtins_va_builtins_abi (void)
32641 {
32642 tree ms_va_ref, sysv_va_ref;
32643 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32644 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32645 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32646 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32647
32648 if (!TARGET_64BIT)
32649 return;
32650 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32651 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32652 ms_va_ref = build_reference_type (ms_va_list_type_node);
32653 sysv_va_ref =
32654 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32655
32656 fnvoid_va_end_ms =
32657 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32658 fnvoid_va_start_ms =
32659 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32660 fnvoid_va_end_sysv =
32661 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32662 fnvoid_va_start_sysv =
32663 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32664 NULL_TREE);
32665 fnvoid_va_copy_ms =
32666 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32667 NULL_TREE);
32668 fnvoid_va_copy_sysv =
32669 build_function_type_list (void_type_node, sysv_va_ref,
32670 sysv_va_ref, NULL_TREE);
32671
32672 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32673 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32674 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32675 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32676 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32677 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32678 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32679 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32680 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32681 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32682 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32683 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32684 }
32685
32686 static void
32687 ix86_init_builtin_types (void)
32688 {
32689 tree float128_type_node, float80_type_node;
32690
32691 /* The __float80 type. */
32692 float80_type_node = long_double_type_node;
32693 if (TYPE_MODE (float80_type_node) != XFmode)
32694 {
32695 /* The __float80 type. */
32696 float80_type_node = make_node (REAL_TYPE);
32697
32698 TYPE_PRECISION (float80_type_node) = 80;
32699 layout_type (float80_type_node);
32700 }
32701 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32702
32703 /* The __float128 type. */
32704 float128_type_node = make_node (REAL_TYPE);
32705 TYPE_PRECISION (float128_type_node) = 128;
32706 layout_type (float128_type_node);
32707 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32708
32709 /* This macro is built by i386-builtin-types.awk. */
32710 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32711 }
32712
32713 static void
32714 ix86_init_builtins (void)
32715 {
32716 tree t;
32717
32718 ix86_init_builtin_types ();
32719
32720 /* Builtins to get CPU type and features. */
32721 ix86_init_platform_type_builtins ();
32722
32723 /* TFmode support builtins. */
32724 def_builtin_const (0, "__builtin_infq",
32725 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32726 def_builtin_const (0, "__builtin_huge_valq",
32727 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32728
32729 /* We will expand them to normal call if SSE isn't available since
32730 they are used by libgcc. */
32731 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32732 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32733 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32734 TREE_READONLY (t) = 1;
32735 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32736
32737 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32738 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32739 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32740 TREE_READONLY (t) = 1;
32741 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32742
32743 ix86_init_tm_builtins ();
32744 ix86_init_mmx_sse_builtins ();
32745
32746 if (TARGET_LP64)
32747 ix86_init_builtins_va_builtins_abi ();
32748
32749 #ifdef SUBTARGET_INIT_BUILTINS
32750 SUBTARGET_INIT_BUILTINS;
32751 #endif
32752 }
32753
32754 /* Return the ix86 builtin for CODE. */
32755
32756 static tree
32757 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32758 {
32759 if (code >= IX86_BUILTIN_MAX)
32760 return error_mark_node;
32761
32762 return ix86_builtins[code];
32763 }
32764
32765 /* Errors in the source file can cause expand_expr to return const0_rtx
32766 where we expect a vector. To avoid crashing, use one of the vector
32767 clear instructions. */
32768 static rtx
32769 safe_vector_operand (rtx x, enum machine_mode mode)
32770 {
32771 if (x == const0_rtx)
32772 x = CONST0_RTX (mode);
32773 return x;
32774 }
32775
32776 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32777
32778 static rtx
32779 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32780 {
32781 rtx pat;
32782 tree arg0 = CALL_EXPR_ARG (exp, 0);
32783 tree arg1 = CALL_EXPR_ARG (exp, 1);
32784 rtx op0 = expand_normal (arg0);
32785 rtx op1 = expand_normal (arg1);
32786 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32787 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32788 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32789
32790 if (VECTOR_MODE_P (mode0))
32791 op0 = safe_vector_operand (op0, mode0);
32792 if (VECTOR_MODE_P (mode1))
32793 op1 = safe_vector_operand (op1, mode1);
32794
32795 if (optimize || !target
32796 || GET_MODE (target) != tmode
32797 || !insn_data[icode].operand[0].predicate (target, tmode))
32798 target = gen_reg_rtx (tmode);
32799
32800 if (GET_MODE (op1) == SImode && mode1 == TImode)
32801 {
32802 rtx x = gen_reg_rtx (V4SImode);
32803 emit_insn (gen_sse2_loadd (x, op1));
32804 op1 = gen_lowpart (TImode, x);
32805 }
32806
32807 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32808 op0 = copy_to_mode_reg (mode0, op0);
32809 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32810 op1 = copy_to_mode_reg (mode1, op1);
32811
32812 pat = GEN_FCN (icode) (target, op0, op1);
32813 if (! pat)
32814 return 0;
32815
32816 emit_insn (pat);
32817
32818 return target;
32819 }
32820
32821 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32822
32823 static rtx
32824 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32825 enum ix86_builtin_func_type m_type,
32826 enum rtx_code sub_code)
32827 {
32828 rtx pat;
32829 int i;
32830 int nargs;
32831 bool comparison_p = false;
32832 bool tf_p = false;
32833 bool last_arg_constant = false;
32834 int num_memory = 0;
32835 struct {
32836 rtx op;
32837 enum machine_mode mode;
32838 } args[4];
32839
32840 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32841
32842 switch (m_type)
32843 {
32844 case MULTI_ARG_4_DF2_DI_I:
32845 case MULTI_ARG_4_DF2_DI_I1:
32846 case MULTI_ARG_4_SF2_SI_I:
32847 case MULTI_ARG_4_SF2_SI_I1:
32848 nargs = 4;
32849 last_arg_constant = true;
32850 break;
32851
32852 case MULTI_ARG_3_SF:
32853 case MULTI_ARG_3_DF:
32854 case MULTI_ARG_3_SF2:
32855 case MULTI_ARG_3_DF2:
32856 case MULTI_ARG_3_DI:
32857 case MULTI_ARG_3_SI:
32858 case MULTI_ARG_3_SI_DI:
32859 case MULTI_ARG_3_HI:
32860 case MULTI_ARG_3_HI_SI:
32861 case MULTI_ARG_3_QI:
32862 case MULTI_ARG_3_DI2:
32863 case MULTI_ARG_3_SI2:
32864 case MULTI_ARG_3_HI2:
32865 case MULTI_ARG_3_QI2:
32866 nargs = 3;
32867 break;
32868
32869 case MULTI_ARG_2_SF:
32870 case MULTI_ARG_2_DF:
32871 case MULTI_ARG_2_DI:
32872 case MULTI_ARG_2_SI:
32873 case MULTI_ARG_2_HI:
32874 case MULTI_ARG_2_QI:
32875 nargs = 2;
32876 break;
32877
32878 case MULTI_ARG_2_DI_IMM:
32879 case MULTI_ARG_2_SI_IMM:
32880 case MULTI_ARG_2_HI_IMM:
32881 case MULTI_ARG_2_QI_IMM:
32882 nargs = 2;
32883 last_arg_constant = true;
32884 break;
32885
32886 case MULTI_ARG_1_SF:
32887 case MULTI_ARG_1_DF:
32888 case MULTI_ARG_1_SF2:
32889 case MULTI_ARG_1_DF2:
32890 case MULTI_ARG_1_DI:
32891 case MULTI_ARG_1_SI:
32892 case MULTI_ARG_1_HI:
32893 case MULTI_ARG_1_QI:
32894 case MULTI_ARG_1_SI_DI:
32895 case MULTI_ARG_1_HI_DI:
32896 case MULTI_ARG_1_HI_SI:
32897 case MULTI_ARG_1_QI_DI:
32898 case MULTI_ARG_1_QI_SI:
32899 case MULTI_ARG_1_QI_HI:
32900 nargs = 1;
32901 break;
32902
32903 case MULTI_ARG_2_DI_CMP:
32904 case MULTI_ARG_2_SI_CMP:
32905 case MULTI_ARG_2_HI_CMP:
32906 case MULTI_ARG_2_QI_CMP:
32907 nargs = 2;
32908 comparison_p = true;
32909 break;
32910
32911 case MULTI_ARG_2_SF_TF:
32912 case MULTI_ARG_2_DF_TF:
32913 case MULTI_ARG_2_DI_TF:
32914 case MULTI_ARG_2_SI_TF:
32915 case MULTI_ARG_2_HI_TF:
32916 case MULTI_ARG_2_QI_TF:
32917 nargs = 2;
32918 tf_p = true;
32919 break;
32920
32921 default:
32922 gcc_unreachable ();
32923 }
32924
32925 if (optimize || !target
32926 || GET_MODE (target) != tmode
32927 || !insn_data[icode].operand[0].predicate (target, tmode))
32928 target = gen_reg_rtx (tmode);
32929
32930 gcc_assert (nargs <= 4);
32931
32932 for (i = 0; i < nargs; i++)
32933 {
32934 tree arg = CALL_EXPR_ARG (exp, i);
32935 rtx op = expand_normal (arg);
32936 int adjust = (comparison_p) ? 1 : 0;
32937 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32938
32939 if (last_arg_constant && i == nargs - 1)
32940 {
32941 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32942 {
32943 enum insn_code new_icode = icode;
32944 switch (icode)
32945 {
32946 case CODE_FOR_xop_vpermil2v2df3:
32947 case CODE_FOR_xop_vpermil2v4sf3:
32948 case CODE_FOR_xop_vpermil2v4df3:
32949 case CODE_FOR_xop_vpermil2v8sf3:
32950 error ("the last argument must be a 2-bit immediate");
32951 return gen_reg_rtx (tmode);
32952 case CODE_FOR_xop_rotlv2di3:
32953 new_icode = CODE_FOR_rotlv2di3;
32954 goto xop_rotl;
32955 case CODE_FOR_xop_rotlv4si3:
32956 new_icode = CODE_FOR_rotlv4si3;
32957 goto xop_rotl;
32958 case CODE_FOR_xop_rotlv8hi3:
32959 new_icode = CODE_FOR_rotlv8hi3;
32960 goto xop_rotl;
32961 case CODE_FOR_xop_rotlv16qi3:
32962 new_icode = CODE_FOR_rotlv16qi3;
32963 xop_rotl:
32964 if (CONST_INT_P (op))
32965 {
32966 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32967 op = GEN_INT (INTVAL (op) & mask);
32968 gcc_checking_assert
32969 (insn_data[icode].operand[i + 1].predicate (op, mode));
32970 }
32971 else
32972 {
32973 gcc_checking_assert
32974 (nargs == 2
32975 && insn_data[new_icode].operand[0].mode == tmode
32976 && insn_data[new_icode].operand[1].mode == tmode
32977 && insn_data[new_icode].operand[2].mode == mode
32978 && insn_data[new_icode].operand[0].predicate
32979 == insn_data[icode].operand[0].predicate
32980 && insn_data[new_icode].operand[1].predicate
32981 == insn_data[icode].operand[1].predicate);
32982 icode = new_icode;
32983 goto non_constant;
32984 }
32985 break;
32986 default:
32987 gcc_unreachable ();
32988 }
32989 }
32990 }
32991 else
32992 {
32993 non_constant:
32994 if (VECTOR_MODE_P (mode))
32995 op = safe_vector_operand (op, mode);
32996
32997 /* If we aren't optimizing, only allow one memory operand to be
32998 generated. */
32999 if (memory_operand (op, mode))
33000 num_memory++;
33001
33002 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33003
33004 if (optimize
33005 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33006 || num_memory > 1)
33007 op = force_reg (mode, op);
33008 }
33009
33010 args[i].op = op;
33011 args[i].mode = mode;
33012 }
33013
33014 switch (nargs)
33015 {
33016 case 1:
33017 pat = GEN_FCN (icode) (target, args[0].op);
33018 break;
33019
33020 case 2:
33021 if (tf_p)
33022 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33023 GEN_INT ((int)sub_code));
33024 else if (! comparison_p)
33025 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33026 else
33027 {
33028 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33029 args[0].op,
33030 args[1].op);
33031
33032 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33033 }
33034 break;
33035
33036 case 3:
33037 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33038 break;
33039
33040 case 4:
33041 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33042 break;
33043
33044 default:
33045 gcc_unreachable ();
33046 }
33047
33048 if (! pat)
33049 return 0;
33050
33051 emit_insn (pat);
33052 return target;
33053 }
33054
33055 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33056 insns with vec_merge. */
33057
33058 static rtx
33059 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33060 rtx target)
33061 {
33062 rtx pat;
33063 tree arg0 = CALL_EXPR_ARG (exp, 0);
33064 rtx op1, op0 = expand_normal (arg0);
33065 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33066 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33067
33068 if (optimize || !target
33069 || GET_MODE (target) != tmode
33070 || !insn_data[icode].operand[0].predicate (target, tmode))
33071 target = gen_reg_rtx (tmode);
33072
33073 if (VECTOR_MODE_P (mode0))
33074 op0 = safe_vector_operand (op0, mode0);
33075
33076 if ((optimize && !register_operand (op0, mode0))
33077 || !insn_data[icode].operand[1].predicate (op0, mode0))
33078 op0 = copy_to_mode_reg (mode0, op0);
33079
33080 op1 = op0;
33081 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33082 op1 = copy_to_mode_reg (mode0, op1);
33083
33084 pat = GEN_FCN (icode) (target, op0, op1);
33085 if (! pat)
33086 return 0;
33087 emit_insn (pat);
33088 return target;
33089 }
33090
33091 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33092
33093 static rtx
33094 ix86_expand_sse_compare (const struct builtin_description *d,
33095 tree exp, rtx target, bool swap)
33096 {
33097 rtx pat;
33098 tree arg0 = CALL_EXPR_ARG (exp, 0);
33099 tree arg1 = CALL_EXPR_ARG (exp, 1);
33100 rtx op0 = expand_normal (arg0);
33101 rtx op1 = expand_normal (arg1);
33102 rtx op2;
33103 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33104 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33105 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33106 enum rtx_code comparison = d->comparison;
33107
33108 if (VECTOR_MODE_P (mode0))
33109 op0 = safe_vector_operand (op0, mode0);
33110 if (VECTOR_MODE_P (mode1))
33111 op1 = safe_vector_operand (op1, mode1);
33112
33113 /* Swap operands if we have a comparison that isn't available in
33114 hardware. */
33115 if (swap)
33116 {
33117 rtx tmp = gen_reg_rtx (mode1);
33118 emit_move_insn (tmp, op1);
33119 op1 = op0;
33120 op0 = tmp;
33121 }
33122
33123 if (optimize || !target
33124 || GET_MODE (target) != tmode
33125 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33126 target = gen_reg_rtx (tmode);
33127
33128 if ((optimize && !register_operand (op0, mode0))
33129 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33130 op0 = copy_to_mode_reg (mode0, op0);
33131 if ((optimize && !register_operand (op1, mode1))
33132 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33133 op1 = copy_to_mode_reg (mode1, op1);
33134
33135 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33136 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33137 if (! pat)
33138 return 0;
33139 emit_insn (pat);
33140 return target;
33141 }
33142
33143 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33144
33145 static rtx
33146 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33147 rtx target)
33148 {
33149 rtx pat;
33150 tree arg0 = CALL_EXPR_ARG (exp, 0);
33151 tree arg1 = CALL_EXPR_ARG (exp, 1);
33152 rtx op0 = expand_normal (arg0);
33153 rtx op1 = expand_normal (arg1);
33154 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33155 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33156 enum rtx_code comparison = d->comparison;
33157
33158 if (VECTOR_MODE_P (mode0))
33159 op0 = safe_vector_operand (op0, mode0);
33160 if (VECTOR_MODE_P (mode1))
33161 op1 = safe_vector_operand (op1, mode1);
33162
33163 /* Swap operands if we have a comparison that isn't available in
33164 hardware. */
33165 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33166 {
33167 rtx tmp = op1;
33168 op1 = op0;
33169 op0 = tmp;
33170 }
33171
33172 target = gen_reg_rtx (SImode);
33173 emit_move_insn (target, const0_rtx);
33174 target = gen_rtx_SUBREG (QImode, target, 0);
33175
33176 if ((optimize && !register_operand (op0, mode0))
33177 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33178 op0 = copy_to_mode_reg (mode0, op0);
33179 if ((optimize && !register_operand (op1, mode1))
33180 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33181 op1 = copy_to_mode_reg (mode1, op1);
33182
33183 pat = GEN_FCN (d->icode) (op0, op1);
33184 if (! pat)
33185 return 0;
33186 emit_insn (pat);
33187 emit_insn (gen_rtx_SET (VOIDmode,
33188 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33189 gen_rtx_fmt_ee (comparison, QImode,
33190 SET_DEST (pat),
33191 const0_rtx)));
33192
33193 return SUBREG_REG (target);
33194 }
33195
33196 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33197
33198 static rtx
33199 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33200 rtx target)
33201 {
33202 rtx pat;
33203 tree arg0 = CALL_EXPR_ARG (exp, 0);
33204 rtx op1, op0 = expand_normal (arg0);
33205 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33206 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33207
33208 if (optimize || target == 0
33209 || GET_MODE (target) != tmode
33210 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33211 target = gen_reg_rtx (tmode);
33212
33213 if (VECTOR_MODE_P (mode0))
33214 op0 = safe_vector_operand (op0, mode0);
33215
33216 if ((optimize && !register_operand (op0, mode0))
33217 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33218 op0 = copy_to_mode_reg (mode0, op0);
33219
33220 op1 = GEN_INT (d->comparison);
33221
33222 pat = GEN_FCN (d->icode) (target, op0, op1);
33223 if (! pat)
33224 return 0;
33225 emit_insn (pat);
33226 return target;
33227 }
33228
33229 static rtx
33230 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33231 tree exp, rtx target)
33232 {
33233 rtx pat;
33234 tree arg0 = CALL_EXPR_ARG (exp, 0);
33235 tree arg1 = CALL_EXPR_ARG (exp, 1);
33236 rtx op0 = expand_normal (arg0);
33237 rtx op1 = expand_normal (arg1);
33238 rtx op2;
33239 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33240 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33241 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33242
33243 if (optimize || target == 0
33244 || GET_MODE (target) != tmode
33245 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33246 target = gen_reg_rtx (tmode);
33247
33248 op0 = safe_vector_operand (op0, mode0);
33249 op1 = safe_vector_operand (op1, mode1);
33250
33251 if ((optimize && !register_operand (op0, mode0))
33252 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33253 op0 = copy_to_mode_reg (mode0, op0);
33254 if ((optimize && !register_operand (op1, mode1))
33255 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33256 op1 = copy_to_mode_reg (mode1, op1);
33257
33258 op2 = GEN_INT (d->comparison);
33259
33260 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33261 if (! pat)
33262 return 0;
33263 emit_insn (pat);
33264 return target;
33265 }
33266
33267 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33268
33269 static rtx
33270 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33271 rtx target)
33272 {
33273 rtx pat;
33274 tree arg0 = CALL_EXPR_ARG (exp, 0);
33275 tree arg1 = CALL_EXPR_ARG (exp, 1);
33276 rtx op0 = expand_normal (arg0);
33277 rtx op1 = expand_normal (arg1);
33278 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33279 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33280 enum rtx_code comparison = d->comparison;
33281
33282 if (VECTOR_MODE_P (mode0))
33283 op0 = safe_vector_operand (op0, mode0);
33284 if (VECTOR_MODE_P (mode1))
33285 op1 = safe_vector_operand (op1, mode1);
33286
33287 target = gen_reg_rtx (SImode);
33288 emit_move_insn (target, const0_rtx);
33289 target = gen_rtx_SUBREG (QImode, target, 0);
33290
33291 if ((optimize && !register_operand (op0, mode0))
33292 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33293 op0 = copy_to_mode_reg (mode0, op0);
33294 if ((optimize && !register_operand (op1, mode1))
33295 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33296 op1 = copy_to_mode_reg (mode1, op1);
33297
33298 pat = GEN_FCN (d->icode) (op0, op1);
33299 if (! pat)
33300 return 0;
33301 emit_insn (pat);
33302 emit_insn (gen_rtx_SET (VOIDmode,
33303 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33304 gen_rtx_fmt_ee (comparison, QImode,
33305 SET_DEST (pat),
33306 const0_rtx)));
33307
33308 return SUBREG_REG (target);
33309 }
33310
33311 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33312
33313 static rtx
33314 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33315 tree exp, rtx target)
33316 {
33317 rtx pat;
33318 tree arg0 = CALL_EXPR_ARG (exp, 0);
33319 tree arg1 = CALL_EXPR_ARG (exp, 1);
33320 tree arg2 = CALL_EXPR_ARG (exp, 2);
33321 tree arg3 = CALL_EXPR_ARG (exp, 3);
33322 tree arg4 = CALL_EXPR_ARG (exp, 4);
33323 rtx scratch0, scratch1;
33324 rtx op0 = expand_normal (arg0);
33325 rtx op1 = expand_normal (arg1);
33326 rtx op2 = expand_normal (arg2);
33327 rtx op3 = expand_normal (arg3);
33328 rtx op4 = expand_normal (arg4);
33329 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33330
33331 tmode0 = insn_data[d->icode].operand[0].mode;
33332 tmode1 = insn_data[d->icode].operand[1].mode;
33333 modev2 = insn_data[d->icode].operand[2].mode;
33334 modei3 = insn_data[d->icode].operand[3].mode;
33335 modev4 = insn_data[d->icode].operand[4].mode;
33336 modei5 = insn_data[d->icode].operand[5].mode;
33337 modeimm = insn_data[d->icode].operand[6].mode;
33338
33339 if (VECTOR_MODE_P (modev2))
33340 op0 = safe_vector_operand (op0, modev2);
33341 if (VECTOR_MODE_P (modev4))
33342 op2 = safe_vector_operand (op2, modev4);
33343
33344 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33345 op0 = copy_to_mode_reg (modev2, op0);
33346 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33347 op1 = copy_to_mode_reg (modei3, op1);
33348 if ((optimize && !register_operand (op2, modev4))
33349 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33350 op2 = copy_to_mode_reg (modev4, op2);
33351 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33352 op3 = copy_to_mode_reg (modei5, op3);
33353
33354 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33355 {
33356 error ("the fifth argument must be an 8-bit immediate");
33357 return const0_rtx;
33358 }
33359
33360 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33361 {
33362 if (optimize || !target
33363 || GET_MODE (target) != tmode0
33364 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33365 target = gen_reg_rtx (tmode0);
33366
33367 scratch1 = gen_reg_rtx (tmode1);
33368
33369 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33370 }
33371 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33372 {
33373 if (optimize || !target
33374 || GET_MODE (target) != tmode1
33375 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33376 target = gen_reg_rtx (tmode1);
33377
33378 scratch0 = gen_reg_rtx (tmode0);
33379
33380 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33381 }
33382 else
33383 {
33384 gcc_assert (d->flag);
33385
33386 scratch0 = gen_reg_rtx (tmode0);
33387 scratch1 = gen_reg_rtx (tmode1);
33388
33389 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33390 }
33391
33392 if (! pat)
33393 return 0;
33394
33395 emit_insn (pat);
33396
33397 if (d->flag)
33398 {
33399 target = gen_reg_rtx (SImode);
33400 emit_move_insn (target, const0_rtx);
33401 target = gen_rtx_SUBREG (QImode, target, 0);
33402
33403 emit_insn
33404 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33405 gen_rtx_fmt_ee (EQ, QImode,
33406 gen_rtx_REG ((enum machine_mode) d->flag,
33407 FLAGS_REG),
33408 const0_rtx)));
33409 return SUBREG_REG (target);
33410 }
33411 else
33412 return target;
33413 }
33414
33415
33416 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33417
33418 static rtx
33419 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33420 tree exp, rtx target)
33421 {
33422 rtx pat;
33423 tree arg0 = CALL_EXPR_ARG (exp, 0);
33424 tree arg1 = CALL_EXPR_ARG (exp, 1);
33425 tree arg2 = CALL_EXPR_ARG (exp, 2);
33426 rtx scratch0, scratch1;
33427 rtx op0 = expand_normal (arg0);
33428 rtx op1 = expand_normal (arg1);
33429 rtx op2 = expand_normal (arg2);
33430 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33431
33432 tmode0 = insn_data[d->icode].operand[0].mode;
33433 tmode1 = insn_data[d->icode].operand[1].mode;
33434 modev2 = insn_data[d->icode].operand[2].mode;
33435 modev3 = insn_data[d->icode].operand[3].mode;
33436 modeimm = insn_data[d->icode].operand[4].mode;
33437
33438 if (VECTOR_MODE_P (modev2))
33439 op0 = safe_vector_operand (op0, modev2);
33440 if (VECTOR_MODE_P (modev3))
33441 op1 = safe_vector_operand (op1, modev3);
33442
33443 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33444 op0 = copy_to_mode_reg (modev2, op0);
33445 if ((optimize && !register_operand (op1, modev3))
33446 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33447 op1 = copy_to_mode_reg (modev3, op1);
33448
33449 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33450 {
33451 error ("the third argument must be an 8-bit immediate");
33452 return const0_rtx;
33453 }
33454
33455 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33456 {
33457 if (optimize || !target
33458 || GET_MODE (target) != tmode0
33459 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33460 target = gen_reg_rtx (tmode0);
33461
33462 scratch1 = gen_reg_rtx (tmode1);
33463
33464 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33465 }
33466 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33467 {
33468 if (optimize || !target
33469 || GET_MODE (target) != tmode1
33470 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33471 target = gen_reg_rtx (tmode1);
33472
33473 scratch0 = gen_reg_rtx (tmode0);
33474
33475 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33476 }
33477 else
33478 {
33479 gcc_assert (d->flag);
33480
33481 scratch0 = gen_reg_rtx (tmode0);
33482 scratch1 = gen_reg_rtx (tmode1);
33483
33484 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33485 }
33486
33487 if (! pat)
33488 return 0;
33489
33490 emit_insn (pat);
33491
33492 if (d->flag)
33493 {
33494 target = gen_reg_rtx (SImode);
33495 emit_move_insn (target, const0_rtx);
33496 target = gen_rtx_SUBREG (QImode, target, 0);
33497
33498 emit_insn
33499 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33500 gen_rtx_fmt_ee (EQ, QImode,
33501 gen_rtx_REG ((enum machine_mode) d->flag,
33502 FLAGS_REG),
33503 const0_rtx)));
33504 return SUBREG_REG (target);
33505 }
33506 else
33507 return target;
33508 }
33509
33510 /* Subroutine of ix86_expand_builtin to take care of insns with
33511 variable number of operands. */
33512
33513 static rtx
33514 ix86_expand_args_builtin (const struct builtin_description *d,
33515 tree exp, rtx target)
33516 {
33517 rtx pat, real_target;
33518 unsigned int i, nargs;
33519 unsigned int nargs_constant = 0;
33520 unsigned int mask_pos = 0;
33521 int num_memory = 0;
33522 struct
33523 {
33524 rtx op;
33525 enum machine_mode mode;
33526 } args[6];
33527 bool last_arg_count = false;
33528 enum insn_code icode = d->icode;
33529 const struct insn_data_d *insn_p = &insn_data[icode];
33530 enum machine_mode tmode = insn_p->operand[0].mode;
33531 enum machine_mode rmode = VOIDmode;
33532 bool swap = false;
33533 enum rtx_code comparison = d->comparison;
33534
33535 switch ((enum ix86_builtin_func_type) d->flag)
33536 {
33537 case V2DF_FTYPE_V2DF_ROUND:
33538 case V4DF_FTYPE_V4DF_ROUND:
33539 case V4SF_FTYPE_V4SF_ROUND:
33540 case V8SF_FTYPE_V8SF_ROUND:
33541 case V4SI_FTYPE_V4SF_ROUND:
33542 case V8SI_FTYPE_V8SF_ROUND:
33543 return ix86_expand_sse_round (d, exp, target);
33544 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33545 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33546 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33547 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33548 case INT_FTYPE_V8SF_V8SF_PTEST:
33549 case INT_FTYPE_V4DI_V4DI_PTEST:
33550 case INT_FTYPE_V4DF_V4DF_PTEST:
33551 case INT_FTYPE_V4SF_V4SF_PTEST:
33552 case INT_FTYPE_V2DI_V2DI_PTEST:
33553 case INT_FTYPE_V2DF_V2DF_PTEST:
33554 return ix86_expand_sse_ptest (d, exp, target);
33555 case FLOAT128_FTYPE_FLOAT128:
33556 case FLOAT_FTYPE_FLOAT:
33557 case INT_FTYPE_INT:
33558 case UINT64_FTYPE_INT:
33559 case UINT16_FTYPE_UINT16:
33560 case INT64_FTYPE_INT64:
33561 case INT64_FTYPE_V4SF:
33562 case INT64_FTYPE_V2DF:
33563 case INT_FTYPE_V16QI:
33564 case INT_FTYPE_V8QI:
33565 case INT_FTYPE_V8SF:
33566 case INT_FTYPE_V4DF:
33567 case INT_FTYPE_V4SF:
33568 case INT_FTYPE_V2DF:
33569 case INT_FTYPE_V32QI:
33570 case V16QI_FTYPE_V16QI:
33571 case V8SI_FTYPE_V8SF:
33572 case V8SI_FTYPE_V4SI:
33573 case V8HI_FTYPE_V8HI:
33574 case V8HI_FTYPE_V16QI:
33575 case V8QI_FTYPE_V8QI:
33576 case V8SF_FTYPE_V8SF:
33577 case V8SF_FTYPE_V8SI:
33578 case V8SF_FTYPE_V4SF:
33579 case V8SF_FTYPE_V8HI:
33580 case V4SI_FTYPE_V4SI:
33581 case V4SI_FTYPE_V16QI:
33582 case V4SI_FTYPE_V4SF:
33583 case V4SI_FTYPE_V8SI:
33584 case V4SI_FTYPE_V8HI:
33585 case V4SI_FTYPE_V4DF:
33586 case V4SI_FTYPE_V2DF:
33587 case V4HI_FTYPE_V4HI:
33588 case V4DF_FTYPE_V4DF:
33589 case V4DF_FTYPE_V4SI:
33590 case V4DF_FTYPE_V4SF:
33591 case V4DF_FTYPE_V2DF:
33592 case V4SF_FTYPE_V4SF:
33593 case V4SF_FTYPE_V4SI:
33594 case V4SF_FTYPE_V8SF:
33595 case V4SF_FTYPE_V4DF:
33596 case V4SF_FTYPE_V8HI:
33597 case V4SF_FTYPE_V2DF:
33598 case V2DI_FTYPE_V2DI:
33599 case V2DI_FTYPE_V16QI:
33600 case V2DI_FTYPE_V8HI:
33601 case V2DI_FTYPE_V4SI:
33602 case V2DF_FTYPE_V2DF:
33603 case V2DF_FTYPE_V4SI:
33604 case V2DF_FTYPE_V4DF:
33605 case V2DF_FTYPE_V4SF:
33606 case V2DF_FTYPE_V2SI:
33607 case V2SI_FTYPE_V2SI:
33608 case V2SI_FTYPE_V4SF:
33609 case V2SI_FTYPE_V2SF:
33610 case V2SI_FTYPE_V2DF:
33611 case V2SF_FTYPE_V2SF:
33612 case V2SF_FTYPE_V2SI:
33613 case V32QI_FTYPE_V32QI:
33614 case V32QI_FTYPE_V16QI:
33615 case V16HI_FTYPE_V16HI:
33616 case V16HI_FTYPE_V8HI:
33617 case V8SI_FTYPE_V8SI:
33618 case V16HI_FTYPE_V16QI:
33619 case V8SI_FTYPE_V16QI:
33620 case V4DI_FTYPE_V16QI:
33621 case V8SI_FTYPE_V8HI:
33622 case V4DI_FTYPE_V8HI:
33623 case V4DI_FTYPE_V4SI:
33624 case V4DI_FTYPE_V2DI:
33625 case HI_FTYPE_HI:
33626 case UINT_FTYPE_V2DF:
33627 case UINT_FTYPE_V4SF:
33628 case UINT64_FTYPE_V2DF:
33629 case UINT64_FTYPE_V4SF:
33630 case V16QI_FTYPE_V8DI:
33631 case V16HI_FTYPE_V16SI:
33632 case V16SI_FTYPE_HI:
33633 case V16SI_FTYPE_V16SI:
33634 case V16SI_FTYPE_INT:
33635 case V16SF_FTYPE_FLOAT:
33636 case V16SF_FTYPE_V4SF:
33637 case V16SF_FTYPE_V16SF:
33638 case V8HI_FTYPE_V8DI:
33639 case V8UHI_FTYPE_V8UHI:
33640 case V8SI_FTYPE_V8DI:
33641 case V8USI_FTYPE_V8USI:
33642 case V8SF_FTYPE_V8DF:
33643 case V8DI_FTYPE_QI:
33644 case V8DI_FTYPE_INT64:
33645 case V8DI_FTYPE_V4DI:
33646 case V8DI_FTYPE_V8DI:
33647 case V8DF_FTYPE_DOUBLE:
33648 case V8DF_FTYPE_V4DF:
33649 case V8DF_FTYPE_V8DF:
33650 case V8DF_FTYPE_V8SI:
33651 nargs = 1;
33652 break;
33653 case V4SF_FTYPE_V4SF_VEC_MERGE:
33654 case V2DF_FTYPE_V2DF_VEC_MERGE:
33655 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33656 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33657 case V16QI_FTYPE_V16QI_V16QI:
33658 case V16QI_FTYPE_V8HI_V8HI:
33659 case V16SI_FTYPE_V16SI_V16SI:
33660 case V16SF_FTYPE_V16SF_V16SF:
33661 case V16SF_FTYPE_V16SF_V16SI:
33662 case V8QI_FTYPE_V8QI_V8QI:
33663 case V8QI_FTYPE_V4HI_V4HI:
33664 case V8HI_FTYPE_V8HI_V8HI:
33665 case V8HI_FTYPE_V16QI_V16QI:
33666 case V8HI_FTYPE_V4SI_V4SI:
33667 case V8SF_FTYPE_V8SF_V8SF:
33668 case V8SF_FTYPE_V8SF_V8SI:
33669 case V8DI_FTYPE_V8DI_V8DI:
33670 case V8DF_FTYPE_V8DF_V8DF:
33671 case V8DF_FTYPE_V8DF_V8DI:
33672 case V4SI_FTYPE_V4SI_V4SI:
33673 case V4SI_FTYPE_V8HI_V8HI:
33674 case V4SI_FTYPE_V4SF_V4SF:
33675 case V4SI_FTYPE_V2DF_V2DF:
33676 case V4HI_FTYPE_V4HI_V4HI:
33677 case V4HI_FTYPE_V8QI_V8QI:
33678 case V4HI_FTYPE_V2SI_V2SI:
33679 case V4DF_FTYPE_V4DF_V4DF:
33680 case V4DF_FTYPE_V4DF_V4DI:
33681 case V4SF_FTYPE_V4SF_V4SF:
33682 case V4SF_FTYPE_V4SF_V4SI:
33683 case V4SF_FTYPE_V4SF_V2SI:
33684 case V4SF_FTYPE_V4SF_V2DF:
33685 case V4SF_FTYPE_V4SF_UINT:
33686 case V4SF_FTYPE_V4SF_UINT64:
33687 case V4SF_FTYPE_V4SF_DI:
33688 case V4SF_FTYPE_V4SF_SI:
33689 case V2DI_FTYPE_V2DI_V2DI:
33690 case V2DI_FTYPE_V16QI_V16QI:
33691 case V2DI_FTYPE_V4SI_V4SI:
33692 case V2UDI_FTYPE_V4USI_V4USI:
33693 case V2DI_FTYPE_V2DI_V16QI:
33694 case V2DI_FTYPE_V2DF_V2DF:
33695 case V2SI_FTYPE_V2SI_V2SI:
33696 case V2SI_FTYPE_V4HI_V4HI:
33697 case V2SI_FTYPE_V2SF_V2SF:
33698 case V2DF_FTYPE_V2DF_V2DF:
33699 case V2DF_FTYPE_V2DF_V4SF:
33700 case V2DF_FTYPE_V2DF_V2DI:
33701 case V2DF_FTYPE_V2DF_DI:
33702 case V2DF_FTYPE_V2DF_SI:
33703 case V2DF_FTYPE_V2DF_UINT:
33704 case V2DF_FTYPE_V2DF_UINT64:
33705 case V2SF_FTYPE_V2SF_V2SF:
33706 case V1DI_FTYPE_V1DI_V1DI:
33707 case V1DI_FTYPE_V8QI_V8QI:
33708 case V1DI_FTYPE_V2SI_V2SI:
33709 case V32QI_FTYPE_V16HI_V16HI:
33710 case V16HI_FTYPE_V8SI_V8SI:
33711 case V32QI_FTYPE_V32QI_V32QI:
33712 case V16HI_FTYPE_V32QI_V32QI:
33713 case V16HI_FTYPE_V16HI_V16HI:
33714 case V8SI_FTYPE_V4DF_V4DF:
33715 case V8SI_FTYPE_V8SI_V8SI:
33716 case V8SI_FTYPE_V16HI_V16HI:
33717 case V4DI_FTYPE_V4DI_V4DI:
33718 case V4DI_FTYPE_V8SI_V8SI:
33719 case V4UDI_FTYPE_V8USI_V8USI:
33720 case QI_FTYPE_V8DI_V8DI:
33721 case HI_FTYPE_V16SI_V16SI:
33722 if (comparison == UNKNOWN)
33723 return ix86_expand_binop_builtin (icode, exp, target);
33724 nargs = 2;
33725 break;
33726 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33727 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33728 gcc_assert (comparison != UNKNOWN);
33729 nargs = 2;
33730 swap = true;
33731 break;
33732 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33733 case V16HI_FTYPE_V16HI_SI_COUNT:
33734 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33735 case V8SI_FTYPE_V8SI_SI_COUNT:
33736 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33737 case V4DI_FTYPE_V4DI_INT_COUNT:
33738 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33739 case V8HI_FTYPE_V8HI_SI_COUNT:
33740 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33741 case V4SI_FTYPE_V4SI_SI_COUNT:
33742 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33743 case V4HI_FTYPE_V4HI_SI_COUNT:
33744 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33745 case V2DI_FTYPE_V2DI_SI_COUNT:
33746 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33747 case V2SI_FTYPE_V2SI_SI_COUNT:
33748 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33749 case V1DI_FTYPE_V1DI_SI_COUNT:
33750 nargs = 2;
33751 last_arg_count = true;
33752 break;
33753 case UINT64_FTYPE_UINT64_UINT64:
33754 case UINT_FTYPE_UINT_UINT:
33755 case UINT_FTYPE_UINT_USHORT:
33756 case UINT_FTYPE_UINT_UCHAR:
33757 case UINT16_FTYPE_UINT16_INT:
33758 case UINT8_FTYPE_UINT8_INT:
33759 case HI_FTYPE_HI_HI:
33760 case V16SI_FTYPE_V8DF_V8DF:
33761 nargs = 2;
33762 break;
33763 case V2DI_FTYPE_V2DI_INT_CONVERT:
33764 nargs = 2;
33765 rmode = V1TImode;
33766 nargs_constant = 1;
33767 break;
33768 case V4DI_FTYPE_V4DI_INT_CONVERT:
33769 nargs = 2;
33770 rmode = V2TImode;
33771 nargs_constant = 1;
33772 break;
33773 case V8HI_FTYPE_V8HI_INT:
33774 case V8HI_FTYPE_V8SF_INT:
33775 case V16HI_FTYPE_V16SF_INT:
33776 case V8HI_FTYPE_V4SF_INT:
33777 case V8SF_FTYPE_V8SF_INT:
33778 case V4SF_FTYPE_V16SF_INT:
33779 case V16SF_FTYPE_V16SF_INT:
33780 case V4SI_FTYPE_V4SI_INT:
33781 case V4SI_FTYPE_V8SI_INT:
33782 case V4HI_FTYPE_V4HI_INT:
33783 case V4DF_FTYPE_V4DF_INT:
33784 case V4DF_FTYPE_V8DF_INT:
33785 case V4SF_FTYPE_V4SF_INT:
33786 case V4SF_FTYPE_V8SF_INT:
33787 case V2DI_FTYPE_V2DI_INT:
33788 case V2DF_FTYPE_V2DF_INT:
33789 case V2DF_FTYPE_V4DF_INT:
33790 case V16HI_FTYPE_V16HI_INT:
33791 case V8SI_FTYPE_V8SI_INT:
33792 case V16SI_FTYPE_V16SI_INT:
33793 case V4SI_FTYPE_V16SI_INT:
33794 case V4DI_FTYPE_V4DI_INT:
33795 case V2DI_FTYPE_V4DI_INT:
33796 case V4DI_FTYPE_V8DI_INT:
33797 case HI_FTYPE_HI_INT:
33798 nargs = 2;
33799 nargs_constant = 1;
33800 break;
33801 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33802 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33803 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33804 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33805 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33806 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33807 case HI_FTYPE_V16SI_V16SI_HI:
33808 case QI_FTYPE_V8DI_V8DI_QI:
33809 case V16HI_FTYPE_V16SI_V16HI_HI:
33810 case V16QI_FTYPE_V16SI_V16QI_HI:
33811 case V16QI_FTYPE_V8DI_V16QI_QI:
33812 case V16SF_FTYPE_V16SF_V16SF_HI:
33813 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33814 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33815 case V16SF_FTYPE_V16SI_V16SF_HI:
33816 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33817 case V16SF_FTYPE_V4SF_V16SF_HI:
33818 case V16SI_FTYPE_SI_V16SI_HI:
33819 case V16SI_FTYPE_V16HI_V16SI_HI:
33820 case V16SI_FTYPE_V16QI_V16SI_HI:
33821 case V16SI_FTYPE_V16SF_V16SI_HI:
33822 case V16SI_FTYPE_V16SI_V16SI_HI:
33823 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33824 case V16SI_FTYPE_V4SI_V16SI_HI:
33825 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33826 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33827 case V8DF_FTYPE_V2DF_V8DF_QI:
33828 case V8DF_FTYPE_V4DF_V8DF_QI:
33829 case V8DF_FTYPE_V8DF_V8DF_QI:
33830 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33831 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33832 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33833 case V8DF_FTYPE_V8SF_V8DF_QI:
33834 case V8DF_FTYPE_V8SI_V8DF_QI:
33835 case V8DI_FTYPE_DI_V8DI_QI:
33836 case V8DI_FTYPE_V16QI_V8DI_QI:
33837 case V8DI_FTYPE_V2DI_V8DI_QI:
33838 case V8DI_FTYPE_V4DI_V8DI_QI:
33839 case V8DI_FTYPE_V8DI_V8DI_QI:
33840 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33841 case V8DI_FTYPE_V8HI_V8DI_QI:
33842 case V8DI_FTYPE_V8SI_V8DI_QI:
33843 case V8HI_FTYPE_V8DI_V8HI_QI:
33844 case V8SF_FTYPE_V8DF_V8SF_QI:
33845 case V8SI_FTYPE_V8DF_V8SI_QI:
33846 case V8SI_FTYPE_V8DI_V8SI_QI:
33847 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33848 nargs = 3;
33849 break;
33850 case V32QI_FTYPE_V32QI_V32QI_INT:
33851 case V16HI_FTYPE_V16HI_V16HI_INT:
33852 case V16QI_FTYPE_V16QI_V16QI_INT:
33853 case V4DI_FTYPE_V4DI_V4DI_INT:
33854 case V8HI_FTYPE_V8HI_V8HI_INT:
33855 case V8SI_FTYPE_V8SI_V8SI_INT:
33856 case V8SI_FTYPE_V8SI_V4SI_INT:
33857 case V8SF_FTYPE_V8SF_V8SF_INT:
33858 case V8SF_FTYPE_V8SF_V4SF_INT:
33859 case V4SI_FTYPE_V4SI_V4SI_INT:
33860 case V4DF_FTYPE_V4DF_V4DF_INT:
33861 case V16SF_FTYPE_V16SF_V16SF_INT:
33862 case V16SF_FTYPE_V16SF_V4SF_INT:
33863 case V16SI_FTYPE_V16SI_V4SI_INT:
33864 case V4DF_FTYPE_V4DF_V2DF_INT:
33865 case V4SF_FTYPE_V4SF_V4SF_INT:
33866 case V2DI_FTYPE_V2DI_V2DI_INT:
33867 case V4DI_FTYPE_V4DI_V2DI_INT:
33868 case V2DF_FTYPE_V2DF_V2DF_INT:
33869 case QI_FTYPE_V8DI_V8DI_INT:
33870 case QI_FTYPE_V8DF_V8DF_INT:
33871 case QI_FTYPE_V2DF_V2DF_INT:
33872 case QI_FTYPE_V4SF_V4SF_INT:
33873 case HI_FTYPE_V16SI_V16SI_INT:
33874 case HI_FTYPE_V16SF_V16SF_INT:
33875 nargs = 3;
33876 nargs_constant = 1;
33877 break;
33878 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33879 nargs = 3;
33880 rmode = V4DImode;
33881 nargs_constant = 1;
33882 break;
33883 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33884 nargs = 3;
33885 rmode = V2DImode;
33886 nargs_constant = 1;
33887 break;
33888 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33889 nargs = 3;
33890 rmode = DImode;
33891 nargs_constant = 1;
33892 break;
33893 case V2DI_FTYPE_V2DI_UINT_UINT:
33894 nargs = 3;
33895 nargs_constant = 2;
33896 break;
33897 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33898 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33899 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33900 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33901 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33902 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33903 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33904 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33905 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33906 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33907 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33908 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33909 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33910 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33911 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33912 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33913 nargs = 4;
33914 break;
33915 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33916 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33917 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33918 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33919 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33920 nargs = 4;
33921 nargs_constant = 1;
33922 break;
33923 case QI_FTYPE_V2DF_V2DF_INT_QI:
33924 case QI_FTYPE_V4SF_V4SF_INT_QI:
33925 nargs = 4;
33926 mask_pos = 1;
33927 nargs_constant = 1;
33928 break;
33929 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33930 nargs = 4;
33931 nargs_constant = 2;
33932 break;
33933 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33934 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33935 nargs = 4;
33936 break;
33937 case QI_FTYPE_V8DI_V8DI_INT_QI:
33938 case HI_FTYPE_V16SI_V16SI_INT_HI:
33939 case QI_FTYPE_V8DF_V8DF_INT_QI:
33940 case HI_FTYPE_V16SF_V16SF_INT_HI:
33941 mask_pos = 1;
33942 nargs = 4;
33943 nargs_constant = 1;
33944 break;
33945 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33946 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33947 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33948 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33949 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33950 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33951 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33952 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33953 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33954 nargs = 4;
33955 mask_pos = 2;
33956 nargs_constant = 1;
33957 break;
33958 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33959 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33960 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33961 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33962 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33963 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33964 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33965 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33966 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33967 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33968 nargs = 5;
33969 mask_pos = 2;
33970 nargs_constant = 1;
33971 break;
33972 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33973 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33974 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33975 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33976 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33977 nargs = 5;
33978 mask_pos = 1;
33979 nargs_constant = 1;
33980 break;
33981
33982 default:
33983 gcc_unreachable ();
33984 }
33985
33986 gcc_assert (nargs <= ARRAY_SIZE (args));
33987
33988 if (comparison != UNKNOWN)
33989 {
33990 gcc_assert (nargs == 2);
33991 return ix86_expand_sse_compare (d, exp, target, swap);
33992 }
33993
33994 if (rmode == VOIDmode || rmode == tmode)
33995 {
33996 if (optimize
33997 || target == 0
33998 || GET_MODE (target) != tmode
33999 || !insn_p->operand[0].predicate (target, tmode))
34000 target = gen_reg_rtx (tmode);
34001 real_target = target;
34002 }
34003 else
34004 {
34005 real_target = gen_reg_rtx (tmode);
34006 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34007 }
34008
34009 for (i = 0; i < nargs; i++)
34010 {
34011 tree arg = CALL_EXPR_ARG (exp, i);
34012 rtx op = expand_normal (arg);
34013 enum machine_mode mode = insn_p->operand[i + 1].mode;
34014 bool match = insn_p->operand[i + 1].predicate (op, mode);
34015
34016 if (last_arg_count && (i + 1) == nargs)
34017 {
34018 /* SIMD shift insns take either an 8-bit immediate or
34019 register as count. But builtin functions take int as
34020 count. If count doesn't match, we put it in register. */
34021 if (!match)
34022 {
34023 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34024 if (!insn_p->operand[i + 1].predicate (op, mode))
34025 op = copy_to_reg (op);
34026 }
34027 }
34028 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34029 (!mask_pos && (nargs - i) <= nargs_constant))
34030 {
34031 if (!match)
34032 switch (icode)
34033 {
34034 case CODE_FOR_avx2_inserti128:
34035 case CODE_FOR_avx2_extracti128:
34036 error ("the last argument must be an 1-bit immediate");
34037 return const0_rtx;
34038
34039 case CODE_FOR_avx512f_cmpv8di3_mask:
34040 case CODE_FOR_avx512f_cmpv16si3_mask:
34041 case CODE_FOR_avx512f_ucmpv8di3_mask:
34042 case CODE_FOR_avx512f_ucmpv16si3_mask:
34043 error ("the last argument must be a 3-bit immediate");
34044 return const0_rtx;
34045
34046 case CODE_FOR_sse4_1_roundsd:
34047 case CODE_FOR_sse4_1_roundss:
34048
34049 case CODE_FOR_sse4_1_roundpd:
34050 case CODE_FOR_sse4_1_roundps:
34051 case CODE_FOR_avx_roundpd256:
34052 case CODE_FOR_avx_roundps256:
34053
34054 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34055 case CODE_FOR_sse4_1_roundps_sfix:
34056 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34057 case CODE_FOR_avx_roundps_sfix256:
34058
34059 case CODE_FOR_sse4_1_blendps:
34060 case CODE_FOR_avx_blendpd256:
34061 case CODE_FOR_avx_vpermilv4df:
34062 case CODE_FOR_avx512f_getmantv8df_mask:
34063 case CODE_FOR_avx512f_getmantv16sf_mask:
34064 error ("the last argument must be a 4-bit immediate");
34065 return const0_rtx;
34066
34067 case CODE_FOR_sha1rnds4:
34068 case CODE_FOR_sse4_1_blendpd:
34069 case CODE_FOR_avx_vpermilv2df:
34070 case CODE_FOR_xop_vpermil2v2df3:
34071 case CODE_FOR_xop_vpermil2v4sf3:
34072 case CODE_FOR_xop_vpermil2v4df3:
34073 case CODE_FOR_xop_vpermil2v8sf3:
34074 case CODE_FOR_avx512f_vinsertf32x4_mask:
34075 case CODE_FOR_avx512f_vinserti32x4_mask:
34076 case CODE_FOR_avx512f_vextractf32x4_mask:
34077 case CODE_FOR_avx512f_vextracti32x4_mask:
34078 error ("the last argument must be a 2-bit immediate");
34079 return const0_rtx;
34080
34081 case CODE_FOR_avx_vextractf128v4df:
34082 case CODE_FOR_avx_vextractf128v8sf:
34083 case CODE_FOR_avx_vextractf128v8si:
34084 case CODE_FOR_avx_vinsertf128v4df:
34085 case CODE_FOR_avx_vinsertf128v8sf:
34086 case CODE_FOR_avx_vinsertf128v8si:
34087 case CODE_FOR_avx512f_vinsertf64x4_mask:
34088 case CODE_FOR_avx512f_vinserti64x4_mask:
34089 case CODE_FOR_avx512f_vextractf64x4_mask:
34090 case CODE_FOR_avx512f_vextracti64x4_mask:
34091 error ("the last argument must be a 1-bit immediate");
34092 return const0_rtx;
34093
34094 case CODE_FOR_avx_vmcmpv2df3:
34095 case CODE_FOR_avx_vmcmpv4sf3:
34096 case CODE_FOR_avx_cmpv2df3:
34097 case CODE_FOR_avx_cmpv4sf3:
34098 case CODE_FOR_avx_cmpv4df3:
34099 case CODE_FOR_avx_cmpv8sf3:
34100 case CODE_FOR_avx512f_cmpv8df3_mask:
34101 case CODE_FOR_avx512f_cmpv16sf3_mask:
34102 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34103 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34104 error ("the last argument must be a 5-bit immediate");
34105 return const0_rtx;
34106
34107 default:
34108 switch (nargs_constant)
34109 {
34110 case 2:
34111 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34112 (!mask_pos && (nargs - i) == nargs_constant))
34113 {
34114 error ("the next to last argument must be an 8-bit immediate");
34115 break;
34116 }
34117 case 1:
34118 error ("the last argument must be an 8-bit immediate");
34119 break;
34120 default:
34121 gcc_unreachable ();
34122 }
34123 return const0_rtx;
34124 }
34125 }
34126 else
34127 {
34128 if (VECTOR_MODE_P (mode))
34129 op = safe_vector_operand (op, mode);
34130
34131 /* If we aren't optimizing, only allow one memory operand to
34132 be generated. */
34133 if (memory_operand (op, mode))
34134 num_memory++;
34135
34136 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34137 {
34138 if (optimize || !match || num_memory > 1)
34139 op = copy_to_mode_reg (mode, op);
34140 }
34141 else
34142 {
34143 op = copy_to_reg (op);
34144 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34145 }
34146 }
34147
34148 args[i].op = op;
34149 args[i].mode = mode;
34150 }
34151
34152 switch (nargs)
34153 {
34154 case 1:
34155 pat = GEN_FCN (icode) (real_target, args[0].op);
34156 break;
34157 case 2:
34158 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34159 break;
34160 case 3:
34161 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34162 args[2].op);
34163 break;
34164 case 4:
34165 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34166 args[2].op, args[3].op);
34167 break;
34168 case 5:
34169 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34170 args[2].op, args[3].op, args[4].op);
34171 case 6:
34172 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34173 args[2].op, args[3].op, args[4].op,
34174 args[5].op);
34175 break;
34176 default:
34177 gcc_unreachable ();
34178 }
34179
34180 if (! pat)
34181 return 0;
34182
34183 emit_insn (pat);
34184 return target;
34185 }
34186
34187 /* Transform pattern of following layout:
34188 (parallel [
34189 set (A B)
34190 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34191 ])
34192 into:
34193 (set (A B))
34194
34195 Or:
34196 (parallel [ A B
34197 ...
34198 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34199 ...
34200 ])
34201 into:
34202 (parallel [ A B ... ]) */
34203
34204 static rtx
34205 ix86_erase_embedded_rounding (rtx pat)
34206 {
34207 if (GET_CODE (pat) == INSN)
34208 pat = PATTERN (pat);
34209
34210 gcc_assert (GET_CODE (pat) == PARALLEL);
34211
34212 if (XVECLEN (pat, 0) == 2)
34213 {
34214 rtx p0 = XVECEXP (pat, 0, 0);
34215 rtx p1 = XVECEXP (pat, 0, 1);
34216
34217 gcc_assert (GET_CODE (p0) == SET
34218 && GET_CODE (p1) == UNSPEC
34219 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34220
34221 return p0;
34222 }
34223 else
34224 {
34225 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34226 int i = 0;
34227 int j = 0;
34228
34229 for (; i < XVECLEN (pat, 0); ++i)
34230 {
34231 rtx elem = XVECEXP (pat, 0, i);
34232 if (GET_CODE (elem) != UNSPEC
34233 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34234 res [j++] = elem;
34235 }
34236
34237 /* No more than 1 occurence was removed. */
34238 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34239
34240 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34241 }
34242 }
34243
34244 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34245 with rounding. */
34246 static rtx
34247 ix86_expand_sse_comi_round (const struct builtin_description *d,
34248 tree exp, rtx target)
34249 {
34250 rtx pat, set_dst;
34251 tree arg0 = CALL_EXPR_ARG (exp, 0);
34252 tree arg1 = CALL_EXPR_ARG (exp, 1);
34253 tree arg2 = CALL_EXPR_ARG (exp, 2);
34254 tree arg3 = CALL_EXPR_ARG (exp, 3);
34255 rtx op0 = expand_normal (arg0);
34256 rtx op1 = expand_normal (arg1);
34257 rtx op2 = expand_normal (arg2);
34258 rtx op3 = expand_normal (arg3);
34259 enum insn_code icode = d->icode;
34260 const struct insn_data_d *insn_p = &insn_data[icode];
34261 enum machine_mode mode0 = insn_p->operand[0].mode;
34262 enum machine_mode mode1 = insn_p->operand[1].mode;
34263 enum rtx_code comparison = UNEQ;
34264 bool need_ucomi = false;
34265
34266 /* See avxintrin.h for values. */
34267 enum rtx_code comi_comparisons[32] =
34268 {
34269 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34270 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34271 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34272 };
34273 bool need_ucomi_values[32] =
34274 {
34275 true, false, false, true, true, false, false, true,
34276 true, false, false, true, true, false, false, true,
34277 false, true, true, false, false, true, true, false,
34278 false, true, true, false, false, true, true, false
34279 };
34280
34281 if (!CONST_INT_P (op2))
34282 {
34283 error ("the third argument must be comparison constant");
34284 return const0_rtx;
34285 }
34286 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34287 {
34288 error ("incorect comparison mode");
34289 return const0_rtx;
34290 }
34291
34292 if (!insn_p->operand[2].predicate (op3, SImode))
34293 {
34294 error ("incorrect rounding operand");
34295 return const0_rtx;
34296 }
34297
34298 comparison = comi_comparisons[INTVAL (op2)];
34299 need_ucomi = need_ucomi_values[INTVAL (op2)];
34300
34301 if (VECTOR_MODE_P (mode0))
34302 op0 = safe_vector_operand (op0, mode0);
34303 if (VECTOR_MODE_P (mode1))
34304 op1 = safe_vector_operand (op1, mode1);
34305
34306 target = gen_reg_rtx (SImode);
34307 emit_move_insn (target, const0_rtx);
34308 target = gen_rtx_SUBREG (QImode, target, 0);
34309
34310 if ((optimize && !register_operand (op0, mode0))
34311 || !insn_p->operand[0].predicate (op0, mode0))
34312 op0 = copy_to_mode_reg (mode0, op0);
34313 if ((optimize && !register_operand (op1, mode1))
34314 || !insn_p->operand[1].predicate (op1, mode1))
34315 op1 = copy_to_mode_reg (mode1, op1);
34316
34317 if (need_ucomi)
34318 icode = icode == CODE_FOR_sse_comi_round
34319 ? CODE_FOR_sse_ucomi_round
34320 : CODE_FOR_sse2_ucomi_round;
34321
34322 pat = GEN_FCN (icode) (op0, op1, op3);
34323 if (! pat)
34324 return 0;
34325
34326 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34327 if (INTVAL (op3) == NO_ROUND)
34328 {
34329 pat = ix86_erase_embedded_rounding (pat);
34330 if (! pat)
34331 return 0;
34332
34333 set_dst = SET_DEST (pat);
34334 }
34335 else
34336 {
34337 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34338 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34339 }
34340
34341 emit_insn (pat);
34342 emit_insn (gen_rtx_SET (VOIDmode,
34343 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34344 gen_rtx_fmt_ee (comparison, QImode,
34345 set_dst,
34346 const0_rtx)));
34347
34348 return SUBREG_REG (target);
34349 }
34350
34351 static rtx
34352 ix86_expand_round_builtin (const struct builtin_description *d,
34353 tree exp, rtx target)
34354 {
34355 rtx pat;
34356 unsigned int i, nargs;
34357 struct
34358 {
34359 rtx op;
34360 enum machine_mode mode;
34361 } args[6];
34362 enum insn_code icode = d->icode;
34363 const struct insn_data_d *insn_p = &insn_data[icode];
34364 enum machine_mode tmode = insn_p->operand[0].mode;
34365 unsigned int nargs_constant = 0;
34366 unsigned int redundant_embed_rnd = 0;
34367
34368 switch ((enum ix86_builtin_func_type) d->flag)
34369 {
34370 case UINT64_FTYPE_V2DF_INT:
34371 case UINT64_FTYPE_V4SF_INT:
34372 case UINT_FTYPE_V2DF_INT:
34373 case UINT_FTYPE_V4SF_INT:
34374 case INT64_FTYPE_V2DF_INT:
34375 case INT64_FTYPE_V4SF_INT:
34376 case INT_FTYPE_V2DF_INT:
34377 case INT_FTYPE_V4SF_INT:
34378 nargs = 2;
34379 break;
34380 case V4SF_FTYPE_V4SF_UINT_INT:
34381 case V4SF_FTYPE_V4SF_UINT64_INT:
34382 case V2DF_FTYPE_V2DF_UINT64_INT:
34383 case V4SF_FTYPE_V4SF_INT_INT:
34384 case V4SF_FTYPE_V4SF_INT64_INT:
34385 case V2DF_FTYPE_V2DF_INT64_INT:
34386 case V4SF_FTYPE_V4SF_V4SF_INT:
34387 case V2DF_FTYPE_V2DF_V2DF_INT:
34388 case V4SF_FTYPE_V4SF_V2DF_INT:
34389 case V2DF_FTYPE_V2DF_V4SF_INT:
34390 nargs = 3;
34391 break;
34392 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34393 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34394 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34395 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34396 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34397 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34398 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34399 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34400 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34401 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34402 nargs = 4;
34403 break;
34404 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34405 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34406 nargs_constant = 2;
34407 nargs = 4;
34408 break;
34409 case INT_FTYPE_V4SF_V4SF_INT_INT:
34410 case INT_FTYPE_V2DF_V2DF_INT_INT:
34411 return ix86_expand_sse_comi_round (d, exp, target);
34412 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34413 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34414 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34415 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34416 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34417 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34418 nargs = 5;
34419 break;
34420 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34421 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34422 nargs_constant = 4;
34423 nargs = 5;
34424 break;
34425 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34426 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34427 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34428 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34429 nargs_constant = 3;
34430 nargs = 5;
34431 break;
34432 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34433 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34434 nargs = 6;
34435 nargs_constant = 4;
34436 break;
34437 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34438 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34439 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34440 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34441 nargs = 6;
34442 nargs_constant = 3;
34443 break;
34444 default:
34445 gcc_unreachable ();
34446 }
34447 gcc_assert (nargs <= ARRAY_SIZE (args));
34448
34449 if (optimize
34450 || target == 0
34451 || GET_MODE (target) != tmode
34452 || !insn_p->operand[0].predicate (target, tmode))
34453 target = gen_reg_rtx (tmode);
34454
34455 for (i = 0; i < nargs; i++)
34456 {
34457 tree arg = CALL_EXPR_ARG (exp, i);
34458 rtx op = expand_normal (arg);
34459 enum machine_mode mode = insn_p->operand[i + 1].mode;
34460 bool match = insn_p->operand[i + 1].predicate (op, mode);
34461
34462 if (i == nargs - nargs_constant)
34463 {
34464 if (!match)
34465 {
34466 switch (icode)
34467 {
34468 case CODE_FOR_avx512f_getmantv8df_mask_round:
34469 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34470 case CODE_FOR_avx512f_getmantv2df_round:
34471 case CODE_FOR_avx512f_getmantv4sf_round:
34472 error ("the immediate argument must be a 4-bit immediate");
34473 return const0_rtx;
34474 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34475 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34476 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34477 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34478 error ("the immediate argument must be a 5-bit immediate");
34479 return const0_rtx;
34480 default:
34481 error ("the immediate argument must be an 8-bit immediate");
34482 return const0_rtx;
34483 }
34484 }
34485 }
34486 else if (i == nargs-1)
34487 {
34488 if (!insn_p->operand[nargs].predicate (op, SImode))
34489 {
34490 error ("incorrect rounding operand");
34491 return const0_rtx;
34492 }
34493
34494 /* If there is no rounding use normal version of the pattern. */
34495 if (INTVAL (op) == NO_ROUND)
34496 redundant_embed_rnd = 1;
34497 }
34498 else
34499 {
34500 if (VECTOR_MODE_P (mode))
34501 op = safe_vector_operand (op, mode);
34502
34503 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34504 {
34505 if (optimize || !match)
34506 op = copy_to_mode_reg (mode, op);
34507 }
34508 else
34509 {
34510 op = copy_to_reg (op);
34511 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34512 }
34513 }
34514
34515 args[i].op = op;
34516 args[i].mode = mode;
34517 }
34518
34519 switch (nargs)
34520 {
34521 case 1:
34522 pat = GEN_FCN (icode) (target, args[0].op);
34523 break;
34524 case 2:
34525 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34526 break;
34527 case 3:
34528 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34529 args[2].op);
34530 break;
34531 case 4:
34532 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34533 args[2].op, args[3].op);
34534 break;
34535 case 5:
34536 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34537 args[2].op, args[3].op, args[4].op);
34538 case 6:
34539 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34540 args[2].op, args[3].op, args[4].op,
34541 args[5].op);
34542 break;
34543 default:
34544 gcc_unreachable ();
34545 }
34546
34547 if (!pat)
34548 return 0;
34549
34550 if (redundant_embed_rnd)
34551 pat = ix86_erase_embedded_rounding (pat);
34552
34553 emit_insn (pat);
34554 return target;
34555 }
34556
34557 /* Subroutine of ix86_expand_builtin to take care of special insns
34558 with variable number of operands. */
34559
34560 static rtx
34561 ix86_expand_special_args_builtin (const struct builtin_description *d,
34562 tree exp, rtx target)
34563 {
34564 tree arg;
34565 rtx pat, op;
34566 unsigned int i, nargs, arg_adjust, memory;
34567 bool aligned_mem = false;
34568 struct
34569 {
34570 rtx op;
34571 enum machine_mode mode;
34572 } args[3];
34573 enum insn_code icode = d->icode;
34574 bool last_arg_constant = false;
34575 const struct insn_data_d *insn_p = &insn_data[icode];
34576 enum machine_mode tmode = insn_p->operand[0].mode;
34577 enum { load, store } klass;
34578
34579 switch ((enum ix86_builtin_func_type) d->flag)
34580 {
34581 case VOID_FTYPE_VOID:
34582 emit_insn (GEN_FCN (icode) (target));
34583 return 0;
34584 case VOID_FTYPE_UINT64:
34585 case VOID_FTYPE_UNSIGNED:
34586 nargs = 0;
34587 klass = store;
34588 memory = 0;
34589 break;
34590
34591 case INT_FTYPE_VOID:
34592 case UINT64_FTYPE_VOID:
34593 case UNSIGNED_FTYPE_VOID:
34594 nargs = 0;
34595 klass = load;
34596 memory = 0;
34597 break;
34598 case UINT64_FTYPE_PUNSIGNED:
34599 case V2DI_FTYPE_PV2DI:
34600 case V4DI_FTYPE_PV4DI:
34601 case V32QI_FTYPE_PCCHAR:
34602 case V16QI_FTYPE_PCCHAR:
34603 case V8SF_FTYPE_PCV4SF:
34604 case V8SF_FTYPE_PCFLOAT:
34605 case V4SF_FTYPE_PCFLOAT:
34606 case V4DF_FTYPE_PCV2DF:
34607 case V4DF_FTYPE_PCDOUBLE:
34608 case V2DF_FTYPE_PCDOUBLE:
34609 case VOID_FTYPE_PVOID:
34610 case V16SI_FTYPE_PV4SI:
34611 case V16SF_FTYPE_PV4SF:
34612 case V8DI_FTYPE_PV4DI:
34613 case V8DI_FTYPE_PV8DI:
34614 case V8DF_FTYPE_PV4DF:
34615 nargs = 1;
34616 klass = load;
34617 memory = 0;
34618 switch (icode)
34619 {
34620 case CODE_FOR_sse4_1_movntdqa:
34621 case CODE_FOR_avx2_movntdqa:
34622 case CODE_FOR_avx512f_movntdqa:
34623 aligned_mem = true;
34624 break;
34625 default:
34626 break;
34627 }
34628 break;
34629 case VOID_FTYPE_PV2SF_V4SF:
34630 case VOID_FTYPE_PV8DI_V8DI:
34631 case VOID_FTYPE_PV4DI_V4DI:
34632 case VOID_FTYPE_PV2DI_V2DI:
34633 case VOID_FTYPE_PCHAR_V32QI:
34634 case VOID_FTYPE_PCHAR_V16QI:
34635 case VOID_FTYPE_PFLOAT_V16SF:
34636 case VOID_FTYPE_PFLOAT_V8SF:
34637 case VOID_FTYPE_PFLOAT_V4SF:
34638 case VOID_FTYPE_PDOUBLE_V8DF:
34639 case VOID_FTYPE_PDOUBLE_V4DF:
34640 case VOID_FTYPE_PDOUBLE_V2DF:
34641 case VOID_FTYPE_PLONGLONG_LONGLONG:
34642 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34643 case VOID_FTYPE_PINT_INT:
34644 nargs = 1;
34645 klass = store;
34646 /* Reserve memory operand for target. */
34647 memory = ARRAY_SIZE (args);
34648 switch (icode)
34649 {
34650 /* These builtins and instructions require the memory
34651 to be properly aligned. */
34652 case CODE_FOR_avx_movntv4di:
34653 case CODE_FOR_sse2_movntv2di:
34654 case CODE_FOR_avx_movntv8sf:
34655 case CODE_FOR_sse_movntv4sf:
34656 case CODE_FOR_sse4a_vmmovntv4sf:
34657 case CODE_FOR_avx_movntv4df:
34658 case CODE_FOR_sse2_movntv2df:
34659 case CODE_FOR_sse4a_vmmovntv2df:
34660 case CODE_FOR_sse2_movntidi:
34661 case CODE_FOR_sse_movntq:
34662 case CODE_FOR_sse2_movntisi:
34663 case CODE_FOR_avx512f_movntv16sf:
34664 case CODE_FOR_avx512f_movntv8df:
34665 case CODE_FOR_avx512f_movntv8di:
34666 aligned_mem = true;
34667 break;
34668 default:
34669 break;
34670 }
34671 break;
34672 case V4SF_FTYPE_V4SF_PCV2SF:
34673 case V2DF_FTYPE_V2DF_PCDOUBLE:
34674 nargs = 2;
34675 klass = load;
34676 memory = 1;
34677 break;
34678 case V8SF_FTYPE_PCV8SF_V8SI:
34679 case V4DF_FTYPE_PCV4DF_V4DI:
34680 case V4SF_FTYPE_PCV4SF_V4SI:
34681 case V2DF_FTYPE_PCV2DF_V2DI:
34682 case V8SI_FTYPE_PCV8SI_V8SI:
34683 case V4DI_FTYPE_PCV4DI_V4DI:
34684 case V4SI_FTYPE_PCV4SI_V4SI:
34685 case V2DI_FTYPE_PCV2DI_V2DI:
34686 nargs = 2;
34687 klass = load;
34688 memory = 0;
34689 break;
34690 case VOID_FTYPE_PV8DF_V8DF_QI:
34691 case VOID_FTYPE_PV16SF_V16SF_HI:
34692 case VOID_FTYPE_PV8DI_V8DI_QI:
34693 case VOID_FTYPE_PV16SI_V16SI_HI:
34694 switch (icode)
34695 {
34696 /* These builtins and instructions require the memory
34697 to be properly aligned. */
34698 case CODE_FOR_avx512f_storev16sf_mask:
34699 case CODE_FOR_avx512f_storev16si_mask:
34700 case CODE_FOR_avx512f_storev8df_mask:
34701 case CODE_FOR_avx512f_storev8di_mask:
34702 aligned_mem = true;
34703 break;
34704 default:
34705 break;
34706 }
34707 /* FALLTHRU */
34708 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34709 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34710 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34711 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34712 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34713 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34714 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34715 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34716 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34717 case VOID_FTYPE_PFLOAT_V4SF_QI:
34718 case VOID_FTYPE_PV8SI_V8DI_QI:
34719 case VOID_FTYPE_PV8HI_V8DI_QI:
34720 case VOID_FTYPE_PV16HI_V16SI_HI:
34721 case VOID_FTYPE_PV16QI_V8DI_QI:
34722 case VOID_FTYPE_PV16QI_V16SI_HI:
34723 nargs = 2;
34724 klass = store;
34725 /* Reserve memory operand for target. */
34726 memory = ARRAY_SIZE (args);
34727 break;
34728 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34729 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34730 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34731 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34732 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34733 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34734 nargs = 3;
34735 klass = load;
34736 memory = 0;
34737 switch (icode)
34738 {
34739 /* These builtins and instructions require the memory
34740 to be properly aligned. */
34741 case CODE_FOR_avx512f_loadv16sf_mask:
34742 case CODE_FOR_avx512f_loadv16si_mask:
34743 case CODE_FOR_avx512f_loadv8df_mask:
34744 case CODE_FOR_avx512f_loadv8di_mask:
34745 aligned_mem = true;
34746 break;
34747 default:
34748 break;
34749 }
34750 break;
34751 case VOID_FTYPE_UINT_UINT_UINT:
34752 case VOID_FTYPE_UINT64_UINT_UINT:
34753 case UCHAR_FTYPE_UINT_UINT_UINT:
34754 case UCHAR_FTYPE_UINT64_UINT_UINT:
34755 nargs = 3;
34756 klass = load;
34757 memory = ARRAY_SIZE (args);
34758 last_arg_constant = true;
34759 break;
34760 default:
34761 gcc_unreachable ();
34762 }
34763
34764 gcc_assert (nargs <= ARRAY_SIZE (args));
34765
34766 if (klass == store)
34767 {
34768 arg = CALL_EXPR_ARG (exp, 0);
34769 op = expand_normal (arg);
34770 gcc_assert (target == 0);
34771 if (memory)
34772 {
34773 op = ix86_zero_extend_to_Pmode (op);
34774 target = gen_rtx_MEM (tmode, op);
34775 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34776 on it. Try to improve it using get_pointer_alignment,
34777 and if the special builtin is one that requires strict
34778 mode alignment, also from it's GET_MODE_ALIGNMENT.
34779 Failure to do so could lead to ix86_legitimate_combined_insn
34780 rejecting all changes to such insns. */
34781 unsigned int align = get_pointer_alignment (arg);
34782 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34783 align = GET_MODE_ALIGNMENT (tmode);
34784 if (MEM_ALIGN (target) < align)
34785 set_mem_align (target, align);
34786 }
34787 else
34788 target = force_reg (tmode, op);
34789 arg_adjust = 1;
34790 }
34791 else
34792 {
34793 arg_adjust = 0;
34794 if (optimize
34795 || target == 0
34796 || !register_operand (target, tmode)
34797 || GET_MODE (target) != tmode)
34798 target = gen_reg_rtx (tmode);
34799 }
34800
34801 for (i = 0; i < nargs; i++)
34802 {
34803 enum machine_mode mode = insn_p->operand[i + 1].mode;
34804 bool match;
34805
34806 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34807 op = expand_normal (arg);
34808 match = insn_p->operand[i + 1].predicate (op, mode);
34809
34810 if (last_arg_constant && (i + 1) == nargs)
34811 {
34812 if (!match)
34813 {
34814 if (icode == CODE_FOR_lwp_lwpvalsi3
34815 || icode == CODE_FOR_lwp_lwpinssi3
34816 || icode == CODE_FOR_lwp_lwpvaldi3
34817 || icode == CODE_FOR_lwp_lwpinsdi3)
34818 error ("the last argument must be a 32-bit immediate");
34819 else
34820 error ("the last argument must be an 8-bit immediate");
34821 return const0_rtx;
34822 }
34823 }
34824 else
34825 {
34826 if (i == memory)
34827 {
34828 /* This must be the memory operand. */
34829 op = ix86_zero_extend_to_Pmode (op);
34830 op = gen_rtx_MEM (mode, op);
34831 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34832 on it. Try to improve it using get_pointer_alignment,
34833 and if the special builtin is one that requires strict
34834 mode alignment, also from it's GET_MODE_ALIGNMENT.
34835 Failure to do so could lead to ix86_legitimate_combined_insn
34836 rejecting all changes to such insns. */
34837 unsigned int align = get_pointer_alignment (arg);
34838 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34839 align = GET_MODE_ALIGNMENT (mode);
34840 if (MEM_ALIGN (op) < align)
34841 set_mem_align (op, align);
34842 }
34843 else
34844 {
34845 /* This must be register. */
34846 if (VECTOR_MODE_P (mode))
34847 op = safe_vector_operand (op, mode);
34848
34849 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34850 op = copy_to_mode_reg (mode, op);
34851 else
34852 {
34853 op = copy_to_reg (op);
34854 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34855 }
34856 }
34857 }
34858
34859 args[i].op = op;
34860 args[i].mode = mode;
34861 }
34862
34863 switch (nargs)
34864 {
34865 case 0:
34866 pat = GEN_FCN (icode) (target);
34867 break;
34868 case 1:
34869 pat = GEN_FCN (icode) (target, args[0].op);
34870 break;
34871 case 2:
34872 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34873 break;
34874 case 3:
34875 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34876 break;
34877 default:
34878 gcc_unreachable ();
34879 }
34880
34881 if (! pat)
34882 return 0;
34883 emit_insn (pat);
34884 return klass == store ? 0 : target;
34885 }
34886
34887 /* Return the integer constant in ARG. Constrain it to be in the range
34888 of the subparts of VEC_TYPE; issue an error if not. */
34889
34890 static int
34891 get_element_number (tree vec_type, tree arg)
34892 {
34893 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34894
34895 if (!tree_fits_uhwi_p (arg)
34896 || (elt = tree_to_uhwi (arg), elt > max))
34897 {
34898 error ("selector must be an integer constant in the range 0..%wi", max);
34899 return 0;
34900 }
34901
34902 return elt;
34903 }
34904
34905 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34906 ix86_expand_vector_init. We DO have language-level syntax for this, in
34907 the form of (type){ init-list }. Except that since we can't place emms
34908 instructions from inside the compiler, we can't allow the use of MMX
34909 registers unless the user explicitly asks for it. So we do *not* define
34910 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34911 we have builtins invoked by mmintrin.h that gives us license to emit
34912 these sorts of instructions. */
34913
34914 static rtx
34915 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34916 {
34917 enum machine_mode tmode = TYPE_MODE (type);
34918 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34919 int i, n_elt = GET_MODE_NUNITS (tmode);
34920 rtvec v = rtvec_alloc (n_elt);
34921
34922 gcc_assert (VECTOR_MODE_P (tmode));
34923 gcc_assert (call_expr_nargs (exp) == n_elt);
34924
34925 for (i = 0; i < n_elt; ++i)
34926 {
34927 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34928 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34929 }
34930
34931 if (!target || !register_operand (target, tmode))
34932 target = gen_reg_rtx (tmode);
34933
34934 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34935 return target;
34936 }
34937
34938 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34939 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34940 had a language-level syntax for referencing vector elements. */
34941
34942 static rtx
34943 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34944 {
34945 enum machine_mode tmode, mode0;
34946 tree arg0, arg1;
34947 int elt;
34948 rtx op0;
34949
34950 arg0 = CALL_EXPR_ARG (exp, 0);
34951 arg1 = CALL_EXPR_ARG (exp, 1);
34952
34953 op0 = expand_normal (arg0);
34954 elt = get_element_number (TREE_TYPE (arg0), arg1);
34955
34956 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34957 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34958 gcc_assert (VECTOR_MODE_P (mode0));
34959
34960 op0 = force_reg (mode0, op0);
34961
34962 if (optimize || !target || !register_operand (target, tmode))
34963 target = gen_reg_rtx (tmode);
34964
34965 ix86_expand_vector_extract (true, target, op0, elt);
34966
34967 return target;
34968 }
34969
34970 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34971 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34972 a language-level syntax for referencing vector elements. */
34973
34974 static rtx
34975 ix86_expand_vec_set_builtin (tree exp)
34976 {
34977 enum machine_mode tmode, mode1;
34978 tree arg0, arg1, arg2;
34979 int elt;
34980 rtx op0, op1, target;
34981
34982 arg0 = CALL_EXPR_ARG (exp, 0);
34983 arg1 = CALL_EXPR_ARG (exp, 1);
34984 arg2 = CALL_EXPR_ARG (exp, 2);
34985
34986 tmode = TYPE_MODE (TREE_TYPE (arg0));
34987 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34988 gcc_assert (VECTOR_MODE_P (tmode));
34989
34990 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34991 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34992 elt = get_element_number (TREE_TYPE (arg0), arg2);
34993
34994 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34995 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34996
34997 op0 = force_reg (tmode, op0);
34998 op1 = force_reg (mode1, op1);
34999
35000 /* OP0 is the source of these builtin functions and shouldn't be
35001 modified. Create a copy, use it and return it as target. */
35002 target = gen_reg_rtx (tmode);
35003 emit_move_insn (target, op0);
35004 ix86_expand_vector_set (true, target, op1, elt);
35005
35006 return target;
35007 }
35008
35009 /* Expand an expression EXP that calls a built-in function,
35010 with result going to TARGET if that's convenient
35011 (and in mode MODE if that's convenient).
35012 SUBTARGET may be used as the target for computing one of EXP's operands.
35013 IGNORE is nonzero if the value is to be ignored. */
35014
35015 static rtx
35016 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35017 enum machine_mode mode, int ignore)
35018 {
35019 const struct builtin_description *d;
35020 size_t i;
35021 enum insn_code icode;
35022 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35023 tree arg0, arg1, arg2, arg3, arg4;
35024 rtx op0, op1, op2, op3, op4, pat, insn;
35025 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35026 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35027
35028 /* For CPU builtins that can be folded, fold first and expand the fold. */
35029 switch (fcode)
35030 {
35031 case IX86_BUILTIN_CPU_INIT:
35032 {
35033 /* Make it call __cpu_indicator_init in libgcc. */
35034 tree call_expr, fndecl, type;
35035 type = build_function_type_list (integer_type_node, NULL_TREE);
35036 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35037 call_expr = build_call_expr (fndecl, 0);
35038 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35039 }
35040 case IX86_BUILTIN_CPU_IS:
35041 case IX86_BUILTIN_CPU_SUPPORTS:
35042 {
35043 tree arg0 = CALL_EXPR_ARG (exp, 0);
35044 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35045 gcc_assert (fold_expr != NULL_TREE);
35046 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35047 }
35048 }
35049
35050 /* Determine whether the builtin function is available under the current ISA.
35051 Originally the builtin was not created if it wasn't applicable to the
35052 current ISA based on the command line switches. With function specific
35053 options, we need to check in the context of the function making the call
35054 whether it is supported. */
35055 if (ix86_builtins_isa[fcode].isa
35056 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35057 {
35058 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35059 NULL, (enum fpmath_unit) 0, false);
35060
35061 if (!opts)
35062 error ("%qE needs unknown isa option", fndecl);
35063 else
35064 {
35065 gcc_assert (opts != NULL);
35066 error ("%qE needs isa option %s", fndecl, opts);
35067 free (opts);
35068 }
35069 return const0_rtx;
35070 }
35071
35072 switch (fcode)
35073 {
35074 case IX86_BUILTIN_MASKMOVQ:
35075 case IX86_BUILTIN_MASKMOVDQU:
35076 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35077 ? CODE_FOR_mmx_maskmovq
35078 : CODE_FOR_sse2_maskmovdqu);
35079 /* Note the arg order is different from the operand order. */
35080 arg1 = CALL_EXPR_ARG (exp, 0);
35081 arg2 = CALL_EXPR_ARG (exp, 1);
35082 arg0 = CALL_EXPR_ARG (exp, 2);
35083 op0 = expand_normal (arg0);
35084 op1 = expand_normal (arg1);
35085 op2 = expand_normal (arg2);
35086 mode0 = insn_data[icode].operand[0].mode;
35087 mode1 = insn_data[icode].operand[1].mode;
35088 mode2 = insn_data[icode].operand[2].mode;
35089
35090 op0 = ix86_zero_extend_to_Pmode (op0);
35091 op0 = gen_rtx_MEM (mode1, op0);
35092
35093 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35094 op0 = copy_to_mode_reg (mode0, op0);
35095 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35096 op1 = copy_to_mode_reg (mode1, op1);
35097 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35098 op2 = copy_to_mode_reg (mode2, op2);
35099 pat = GEN_FCN (icode) (op0, op1, op2);
35100 if (! pat)
35101 return 0;
35102 emit_insn (pat);
35103 return 0;
35104
35105 case IX86_BUILTIN_LDMXCSR:
35106 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35107 target = assign_386_stack_local (SImode, SLOT_TEMP);
35108 emit_move_insn (target, op0);
35109 emit_insn (gen_sse_ldmxcsr (target));
35110 return 0;
35111
35112 case IX86_BUILTIN_STMXCSR:
35113 target = assign_386_stack_local (SImode, SLOT_TEMP);
35114 emit_insn (gen_sse_stmxcsr (target));
35115 return copy_to_mode_reg (SImode, target);
35116
35117 case IX86_BUILTIN_CLFLUSH:
35118 arg0 = CALL_EXPR_ARG (exp, 0);
35119 op0 = expand_normal (arg0);
35120 icode = CODE_FOR_sse2_clflush;
35121 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35122 op0 = ix86_zero_extend_to_Pmode (op0);
35123
35124 emit_insn (gen_sse2_clflush (op0));
35125 return 0;
35126
35127 case IX86_BUILTIN_CLFLUSHOPT:
35128 arg0 = CALL_EXPR_ARG (exp, 0);
35129 op0 = expand_normal (arg0);
35130 icode = CODE_FOR_clflushopt;
35131 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35132 op0 = ix86_zero_extend_to_Pmode (op0);
35133
35134 emit_insn (gen_clflushopt (op0));
35135 return 0;
35136
35137 case IX86_BUILTIN_MONITOR:
35138 arg0 = CALL_EXPR_ARG (exp, 0);
35139 arg1 = CALL_EXPR_ARG (exp, 1);
35140 arg2 = CALL_EXPR_ARG (exp, 2);
35141 op0 = expand_normal (arg0);
35142 op1 = expand_normal (arg1);
35143 op2 = expand_normal (arg2);
35144 if (!REG_P (op0))
35145 op0 = ix86_zero_extend_to_Pmode (op0);
35146 if (!REG_P (op1))
35147 op1 = copy_to_mode_reg (SImode, op1);
35148 if (!REG_P (op2))
35149 op2 = copy_to_mode_reg (SImode, op2);
35150 emit_insn (ix86_gen_monitor (op0, op1, op2));
35151 return 0;
35152
35153 case IX86_BUILTIN_MWAIT:
35154 arg0 = CALL_EXPR_ARG (exp, 0);
35155 arg1 = CALL_EXPR_ARG (exp, 1);
35156 op0 = expand_normal (arg0);
35157 op1 = expand_normal (arg1);
35158 if (!REG_P (op0))
35159 op0 = copy_to_mode_reg (SImode, op0);
35160 if (!REG_P (op1))
35161 op1 = copy_to_mode_reg (SImode, op1);
35162 emit_insn (gen_sse3_mwait (op0, op1));
35163 return 0;
35164
35165 case IX86_BUILTIN_VEC_INIT_V2SI:
35166 case IX86_BUILTIN_VEC_INIT_V4HI:
35167 case IX86_BUILTIN_VEC_INIT_V8QI:
35168 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35169
35170 case IX86_BUILTIN_VEC_EXT_V2DF:
35171 case IX86_BUILTIN_VEC_EXT_V2DI:
35172 case IX86_BUILTIN_VEC_EXT_V4SF:
35173 case IX86_BUILTIN_VEC_EXT_V4SI:
35174 case IX86_BUILTIN_VEC_EXT_V8HI:
35175 case IX86_BUILTIN_VEC_EXT_V2SI:
35176 case IX86_BUILTIN_VEC_EXT_V4HI:
35177 case IX86_BUILTIN_VEC_EXT_V16QI:
35178 return ix86_expand_vec_ext_builtin (exp, target);
35179
35180 case IX86_BUILTIN_VEC_SET_V2DI:
35181 case IX86_BUILTIN_VEC_SET_V4SF:
35182 case IX86_BUILTIN_VEC_SET_V4SI:
35183 case IX86_BUILTIN_VEC_SET_V8HI:
35184 case IX86_BUILTIN_VEC_SET_V4HI:
35185 case IX86_BUILTIN_VEC_SET_V16QI:
35186 return ix86_expand_vec_set_builtin (exp);
35187
35188 case IX86_BUILTIN_INFQ:
35189 case IX86_BUILTIN_HUGE_VALQ:
35190 {
35191 REAL_VALUE_TYPE inf;
35192 rtx tmp;
35193
35194 real_inf (&inf);
35195 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35196
35197 tmp = validize_mem (force_const_mem (mode, tmp));
35198
35199 if (target == 0)
35200 target = gen_reg_rtx (mode);
35201
35202 emit_move_insn (target, tmp);
35203 return target;
35204 }
35205
35206 case IX86_BUILTIN_RDPMC:
35207 case IX86_BUILTIN_RDTSC:
35208 case IX86_BUILTIN_RDTSCP:
35209
35210 op0 = gen_reg_rtx (DImode);
35211 op1 = gen_reg_rtx (DImode);
35212
35213 if (fcode == IX86_BUILTIN_RDPMC)
35214 {
35215 arg0 = CALL_EXPR_ARG (exp, 0);
35216 op2 = expand_normal (arg0);
35217 if (!register_operand (op2, SImode))
35218 op2 = copy_to_mode_reg (SImode, op2);
35219
35220 insn = (TARGET_64BIT
35221 ? gen_rdpmc_rex64 (op0, op1, op2)
35222 : gen_rdpmc (op0, op2));
35223 emit_insn (insn);
35224 }
35225 else if (fcode == IX86_BUILTIN_RDTSC)
35226 {
35227 insn = (TARGET_64BIT
35228 ? gen_rdtsc_rex64 (op0, op1)
35229 : gen_rdtsc (op0));
35230 emit_insn (insn);
35231 }
35232 else
35233 {
35234 op2 = gen_reg_rtx (SImode);
35235
35236 insn = (TARGET_64BIT
35237 ? gen_rdtscp_rex64 (op0, op1, op2)
35238 : gen_rdtscp (op0, op2));
35239 emit_insn (insn);
35240
35241 arg0 = CALL_EXPR_ARG (exp, 0);
35242 op4 = expand_normal (arg0);
35243 if (!address_operand (op4, VOIDmode))
35244 {
35245 op4 = convert_memory_address (Pmode, op4);
35246 op4 = copy_addr_to_reg (op4);
35247 }
35248 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35249 }
35250
35251 if (target == 0)
35252 {
35253 /* mode is VOIDmode if __builtin_rd* has been called
35254 without lhs. */
35255 if (mode == VOIDmode)
35256 return target;
35257 target = gen_reg_rtx (mode);
35258 }
35259
35260 if (TARGET_64BIT)
35261 {
35262 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35263 op1, 1, OPTAB_DIRECT);
35264 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35265 op0, 1, OPTAB_DIRECT);
35266 }
35267
35268 emit_move_insn (target, op0);
35269 return target;
35270
35271 case IX86_BUILTIN_FXSAVE:
35272 case IX86_BUILTIN_FXRSTOR:
35273 case IX86_BUILTIN_FXSAVE64:
35274 case IX86_BUILTIN_FXRSTOR64:
35275 case IX86_BUILTIN_FNSTENV:
35276 case IX86_BUILTIN_FLDENV:
35277 case IX86_BUILTIN_FNSTSW:
35278 mode0 = BLKmode;
35279 switch (fcode)
35280 {
35281 case IX86_BUILTIN_FXSAVE:
35282 icode = CODE_FOR_fxsave;
35283 break;
35284 case IX86_BUILTIN_FXRSTOR:
35285 icode = CODE_FOR_fxrstor;
35286 break;
35287 case IX86_BUILTIN_FXSAVE64:
35288 icode = CODE_FOR_fxsave64;
35289 break;
35290 case IX86_BUILTIN_FXRSTOR64:
35291 icode = CODE_FOR_fxrstor64;
35292 break;
35293 case IX86_BUILTIN_FNSTENV:
35294 icode = CODE_FOR_fnstenv;
35295 break;
35296 case IX86_BUILTIN_FLDENV:
35297 icode = CODE_FOR_fldenv;
35298 break;
35299 case IX86_BUILTIN_FNSTSW:
35300 icode = CODE_FOR_fnstsw;
35301 mode0 = HImode;
35302 break;
35303 default:
35304 gcc_unreachable ();
35305 }
35306
35307 arg0 = CALL_EXPR_ARG (exp, 0);
35308 op0 = expand_normal (arg0);
35309
35310 if (!address_operand (op0, VOIDmode))
35311 {
35312 op0 = convert_memory_address (Pmode, op0);
35313 op0 = copy_addr_to_reg (op0);
35314 }
35315 op0 = gen_rtx_MEM (mode0, op0);
35316
35317 pat = GEN_FCN (icode) (op0);
35318 if (pat)
35319 emit_insn (pat);
35320 return 0;
35321
35322 case IX86_BUILTIN_XSAVE:
35323 case IX86_BUILTIN_XRSTOR:
35324 case IX86_BUILTIN_XSAVE64:
35325 case IX86_BUILTIN_XRSTOR64:
35326 case IX86_BUILTIN_XSAVEOPT:
35327 case IX86_BUILTIN_XSAVEOPT64:
35328 case IX86_BUILTIN_XSAVES:
35329 case IX86_BUILTIN_XRSTORS:
35330 case IX86_BUILTIN_XSAVES64:
35331 case IX86_BUILTIN_XRSTORS64:
35332 case IX86_BUILTIN_XSAVEC:
35333 case IX86_BUILTIN_XSAVEC64:
35334 arg0 = CALL_EXPR_ARG (exp, 0);
35335 arg1 = CALL_EXPR_ARG (exp, 1);
35336 op0 = expand_normal (arg0);
35337 op1 = expand_normal (arg1);
35338
35339 if (!address_operand (op0, VOIDmode))
35340 {
35341 op0 = convert_memory_address (Pmode, op0);
35342 op0 = copy_addr_to_reg (op0);
35343 }
35344 op0 = gen_rtx_MEM (BLKmode, op0);
35345
35346 op1 = force_reg (DImode, op1);
35347
35348 if (TARGET_64BIT)
35349 {
35350 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35351 NULL, 1, OPTAB_DIRECT);
35352 switch (fcode)
35353 {
35354 case IX86_BUILTIN_XSAVE:
35355 icode = CODE_FOR_xsave_rex64;
35356 break;
35357 case IX86_BUILTIN_XRSTOR:
35358 icode = CODE_FOR_xrstor_rex64;
35359 break;
35360 case IX86_BUILTIN_XSAVE64:
35361 icode = CODE_FOR_xsave64;
35362 break;
35363 case IX86_BUILTIN_XRSTOR64:
35364 icode = CODE_FOR_xrstor64;
35365 break;
35366 case IX86_BUILTIN_XSAVEOPT:
35367 icode = CODE_FOR_xsaveopt_rex64;
35368 break;
35369 case IX86_BUILTIN_XSAVEOPT64:
35370 icode = CODE_FOR_xsaveopt64;
35371 break;
35372 case IX86_BUILTIN_XSAVES:
35373 icode = CODE_FOR_xsaves_rex64;
35374 break;
35375 case IX86_BUILTIN_XRSTORS:
35376 icode = CODE_FOR_xrstors_rex64;
35377 break;
35378 case IX86_BUILTIN_XSAVES64:
35379 icode = CODE_FOR_xsaves64;
35380 break;
35381 case IX86_BUILTIN_XRSTORS64:
35382 icode = CODE_FOR_xrstors64;
35383 break;
35384 case IX86_BUILTIN_XSAVEC:
35385 icode = CODE_FOR_xsavec_rex64;
35386 break;
35387 case IX86_BUILTIN_XSAVEC64:
35388 icode = CODE_FOR_xsavec64;
35389 break;
35390 default:
35391 gcc_unreachable ();
35392 }
35393
35394 op2 = gen_lowpart (SImode, op2);
35395 op1 = gen_lowpart (SImode, op1);
35396 pat = GEN_FCN (icode) (op0, op1, op2);
35397 }
35398 else
35399 {
35400 switch (fcode)
35401 {
35402 case IX86_BUILTIN_XSAVE:
35403 icode = CODE_FOR_xsave;
35404 break;
35405 case IX86_BUILTIN_XRSTOR:
35406 icode = CODE_FOR_xrstor;
35407 break;
35408 case IX86_BUILTIN_XSAVEOPT:
35409 icode = CODE_FOR_xsaveopt;
35410 break;
35411 case IX86_BUILTIN_XSAVES:
35412 icode = CODE_FOR_xsaves;
35413 break;
35414 case IX86_BUILTIN_XRSTORS:
35415 icode = CODE_FOR_xrstors;
35416 break;
35417 case IX86_BUILTIN_XSAVEC:
35418 icode = CODE_FOR_xsavec;
35419 break;
35420 default:
35421 gcc_unreachable ();
35422 }
35423 pat = GEN_FCN (icode) (op0, op1);
35424 }
35425
35426 if (pat)
35427 emit_insn (pat);
35428 return 0;
35429
35430 case IX86_BUILTIN_LLWPCB:
35431 arg0 = CALL_EXPR_ARG (exp, 0);
35432 op0 = expand_normal (arg0);
35433 icode = CODE_FOR_lwp_llwpcb;
35434 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35435 op0 = ix86_zero_extend_to_Pmode (op0);
35436 emit_insn (gen_lwp_llwpcb (op0));
35437 return 0;
35438
35439 case IX86_BUILTIN_SLWPCB:
35440 icode = CODE_FOR_lwp_slwpcb;
35441 if (!target
35442 || !insn_data[icode].operand[0].predicate (target, Pmode))
35443 target = gen_reg_rtx (Pmode);
35444 emit_insn (gen_lwp_slwpcb (target));
35445 return target;
35446
35447 case IX86_BUILTIN_BEXTRI32:
35448 case IX86_BUILTIN_BEXTRI64:
35449 arg0 = CALL_EXPR_ARG (exp, 0);
35450 arg1 = CALL_EXPR_ARG (exp, 1);
35451 op0 = expand_normal (arg0);
35452 op1 = expand_normal (arg1);
35453 icode = (fcode == IX86_BUILTIN_BEXTRI32
35454 ? CODE_FOR_tbm_bextri_si
35455 : CODE_FOR_tbm_bextri_di);
35456 if (!CONST_INT_P (op1))
35457 {
35458 error ("last argument must be an immediate");
35459 return const0_rtx;
35460 }
35461 else
35462 {
35463 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35464 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35465 op1 = GEN_INT (length);
35466 op2 = GEN_INT (lsb_index);
35467 pat = GEN_FCN (icode) (target, op0, op1, op2);
35468 if (pat)
35469 emit_insn (pat);
35470 return target;
35471 }
35472
35473 case IX86_BUILTIN_RDRAND16_STEP:
35474 icode = CODE_FOR_rdrandhi_1;
35475 mode0 = HImode;
35476 goto rdrand_step;
35477
35478 case IX86_BUILTIN_RDRAND32_STEP:
35479 icode = CODE_FOR_rdrandsi_1;
35480 mode0 = SImode;
35481 goto rdrand_step;
35482
35483 case IX86_BUILTIN_RDRAND64_STEP:
35484 icode = CODE_FOR_rdranddi_1;
35485 mode0 = DImode;
35486
35487 rdrand_step:
35488 op0 = gen_reg_rtx (mode0);
35489 emit_insn (GEN_FCN (icode) (op0));
35490
35491 arg0 = CALL_EXPR_ARG (exp, 0);
35492 op1 = expand_normal (arg0);
35493 if (!address_operand (op1, VOIDmode))
35494 {
35495 op1 = convert_memory_address (Pmode, op1);
35496 op1 = copy_addr_to_reg (op1);
35497 }
35498 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35499
35500 op1 = gen_reg_rtx (SImode);
35501 emit_move_insn (op1, CONST1_RTX (SImode));
35502
35503 /* Emit SImode conditional move. */
35504 if (mode0 == HImode)
35505 {
35506 op2 = gen_reg_rtx (SImode);
35507 emit_insn (gen_zero_extendhisi2 (op2, op0));
35508 }
35509 else if (mode0 == SImode)
35510 op2 = op0;
35511 else
35512 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35513
35514 if (target == 0
35515 || !register_operand (target, SImode))
35516 target = gen_reg_rtx (SImode);
35517
35518 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35519 const0_rtx);
35520 emit_insn (gen_rtx_SET (VOIDmode, target,
35521 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35522 return target;
35523
35524 case IX86_BUILTIN_RDSEED16_STEP:
35525 icode = CODE_FOR_rdseedhi_1;
35526 mode0 = HImode;
35527 goto rdseed_step;
35528
35529 case IX86_BUILTIN_RDSEED32_STEP:
35530 icode = CODE_FOR_rdseedsi_1;
35531 mode0 = SImode;
35532 goto rdseed_step;
35533
35534 case IX86_BUILTIN_RDSEED64_STEP:
35535 icode = CODE_FOR_rdseeddi_1;
35536 mode0 = DImode;
35537
35538 rdseed_step:
35539 op0 = gen_reg_rtx (mode0);
35540 emit_insn (GEN_FCN (icode) (op0));
35541
35542 arg0 = CALL_EXPR_ARG (exp, 0);
35543 op1 = expand_normal (arg0);
35544 if (!address_operand (op1, VOIDmode))
35545 {
35546 op1 = convert_memory_address (Pmode, op1);
35547 op1 = copy_addr_to_reg (op1);
35548 }
35549 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35550
35551 op2 = gen_reg_rtx (QImode);
35552
35553 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35554 const0_rtx);
35555 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35556
35557 if (target == 0
35558 || !register_operand (target, SImode))
35559 target = gen_reg_rtx (SImode);
35560
35561 emit_insn (gen_zero_extendqisi2 (target, op2));
35562 return target;
35563
35564 case IX86_BUILTIN_ADDCARRYX32:
35565 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35566 mode0 = SImode;
35567 goto addcarryx;
35568
35569 case IX86_BUILTIN_ADDCARRYX64:
35570 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35571 mode0 = DImode;
35572
35573 addcarryx:
35574 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35575 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35576 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35577 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35578
35579 op0 = gen_reg_rtx (QImode);
35580
35581 /* Generate CF from input operand. */
35582 op1 = expand_normal (arg0);
35583 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35584 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35585
35586 /* Gen ADCX instruction to compute X+Y+CF. */
35587 op2 = expand_normal (arg1);
35588 op3 = expand_normal (arg2);
35589
35590 if (!REG_P (op2))
35591 op2 = copy_to_mode_reg (mode0, op2);
35592 if (!REG_P (op3))
35593 op3 = copy_to_mode_reg (mode0, op3);
35594
35595 op0 = gen_reg_rtx (mode0);
35596
35597 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35598 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35599 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35600
35601 /* Store the result. */
35602 op4 = expand_normal (arg3);
35603 if (!address_operand (op4, VOIDmode))
35604 {
35605 op4 = convert_memory_address (Pmode, op4);
35606 op4 = copy_addr_to_reg (op4);
35607 }
35608 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35609
35610 /* Return current CF value. */
35611 if (target == 0)
35612 target = gen_reg_rtx (QImode);
35613
35614 PUT_MODE (pat, QImode);
35615 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35616 return target;
35617
35618 case IX86_BUILTIN_READ_FLAGS:
35619 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35620
35621 if (optimize
35622 || target == NULL_RTX
35623 || !nonimmediate_operand (target, word_mode)
35624 || GET_MODE (target) != word_mode)
35625 target = gen_reg_rtx (word_mode);
35626
35627 emit_insn (gen_pop (target));
35628 return target;
35629
35630 case IX86_BUILTIN_WRITE_FLAGS:
35631
35632 arg0 = CALL_EXPR_ARG (exp, 0);
35633 op0 = expand_normal (arg0);
35634 if (!general_no_elim_operand (op0, word_mode))
35635 op0 = copy_to_mode_reg (word_mode, op0);
35636
35637 emit_insn (gen_push (op0));
35638 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35639 return 0;
35640
35641 case IX86_BUILTIN_KORTESTC16:
35642 icode = CODE_FOR_kortestchi;
35643 mode0 = HImode;
35644 mode1 = CCCmode;
35645 goto kortest;
35646
35647 case IX86_BUILTIN_KORTESTZ16:
35648 icode = CODE_FOR_kortestzhi;
35649 mode0 = HImode;
35650 mode1 = CCZmode;
35651
35652 kortest:
35653 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35654 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35655 op0 = expand_normal (arg0);
35656 op1 = expand_normal (arg1);
35657
35658 op0 = copy_to_reg (op0);
35659 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35660 op1 = copy_to_reg (op1);
35661 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35662
35663 target = gen_reg_rtx (QImode);
35664 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35665
35666 /* Emit kortest. */
35667 emit_insn (GEN_FCN (icode) (op0, op1));
35668 /* And use setcc to return result from flags. */
35669 ix86_expand_setcc (target, EQ,
35670 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35671 return target;
35672
35673 case IX86_BUILTIN_GATHERSIV2DF:
35674 icode = CODE_FOR_avx2_gathersiv2df;
35675 goto gather_gen;
35676 case IX86_BUILTIN_GATHERSIV4DF:
35677 icode = CODE_FOR_avx2_gathersiv4df;
35678 goto gather_gen;
35679 case IX86_BUILTIN_GATHERDIV2DF:
35680 icode = CODE_FOR_avx2_gatherdiv2df;
35681 goto gather_gen;
35682 case IX86_BUILTIN_GATHERDIV4DF:
35683 icode = CODE_FOR_avx2_gatherdiv4df;
35684 goto gather_gen;
35685 case IX86_BUILTIN_GATHERSIV4SF:
35686 icode = CODE_FOR_avx2_gathersiv4sf;
35687 goto gather_gen;
35688 case IX86_BUILTIN_GATHERSIV8SF:
35689 icode = CODE_FOR_avx2_gathersiv8sf;
35690 goto gather_gen;
35691 case IX86_BUILTIN_GATHERDIV4SF:
35692 icode = CODE_FOR_avx2_gatherdiv4sf;
35693 goto gather_gen;
35694 case IX86_BUILTIN_GATHERDIV8SF:
35695 icode = CODE_FOR_avx2_gatherdiv8sf;
35696 goto gather_gen;
35697 case IX86_BUILTIN_GATHERSIV2DI:
35698 icode = CODE_FOR_avx2_gathersiv2di;
35699 goto gather_gen;
35700 case IX86_BUILTIN_GATHERSIV4DI:
35701 icode = CODE_FOR_avx2_gathersiv4di;
35702 goto gather_gen;
35703 case IX86_BUILTIN_GATHERDIV2DI:
35704 icode = CODE_FOR_avx2_gatherdiv2di;
35705 goto gather_gen;
35706 case IX86_BUILTIN_GATHERDIV4DI:
35707 icode = CODE_FOR_avx2_gatherdiv4di;
35708 goto gather_gen;
35709 case IX86_BUILTIN_GATHERSIV4SI:
35710 icode = CODE_FOR_avx2_gathersiv4si;
35711 goto gather_gen;
35712 case IX86_BUILTIN_GATHERSIV8SI:
35713 icode = CODE_FOR_avx2_gathersiv8si;
35714 goto gather_gen;
35715 case IX86_BUILTIN_GATHERDIV4SI:
35716 icode = CODE_FOR_avx2_gatherdiv4si;
35717 goto gather_gen;
35718 case IX86_BUILTIN_GATHERDIV8SI:
35719 icode = CODE_FOR_avx2_gatherdiv8si;
35720 goto gather_gen;
35721 case IX86_BUILTIN_GATHERALTSIV4DF:
35722 icode = CODE_FOR_avx2_gathersiv4df;
35723 goto gather_gen;
35724 case IX86_BUILTIN_GATHERALTDIV8SF:
35725 icode = CODE_FOR_avx2_gatherdiv8sf;
35726 goto gather_gen;
35727 case IX86_BUILTIN_GATHERALTSIV4DI:
35728 icode = CODE_FOR_avx2_gathersiv4di;
35729 goto gather_gen;
35730 case IX86_BUILTIN_GATHERALTDIV8SI:
35731 icode = CODE_FOR_avx2_gatherdiv8si;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHER3SIV16SF:
35734 icode = CODE_FOR_avx512f_gathersiv16sf;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHER3SIV8DF:
35737 icode = CODE_FOR_avx512f_gathersiv8df;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHER3DIV16SF:
35740 icode = CODE_FOR_avx512f_gatherdiv16sf;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHER3DIV8DF:
35743 icode = CODE_FOR_avx512f_gatherdiv8df;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHER3SIV16SI:
35746 icode = CODE_FOR_avx512f_gathersiv16si;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHER3SIV8DI:
35749 icode = CODE_FOR_avx512f_gathersiv8di;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHER3DIV16SI:
35752 icode = CODE_FOR_avx512f_gatherdiv16si;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHER3DIV8DI:
35755 icode = CODE_FOR_avx512f_gatherdiv8di;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35758 icode = CODE_FOR_avx512f_gathersiv8df;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35761 icode = CODE_FOR_avx512f_gatherdiv16sf;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35764 icode = CODE_FOR_avx512f_gathersiv8di;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35767 icode = CODE_FOR_avx512f_gatherdiv16si;
35768 goto gather_gen;
35769 case IX86_BUILTIN_SCATTERSIV16SF:
35770 icode = CODE_FOR_avx512f_scattersiv16sf;
35771 goto scatter_gen;
35772 case IX86_BUILTIN_SCATTERSIV8DF:
35773 icode = CODE_FOR_avx512f_scattersiv8df;
35774 goto scatter_gen;
35775 case IX86_BUILTIN_SCATTERDIV16SF:
35776 icode = CODE_FOR_avx512f_scatterdiv16sf;
35777 goto scatter_gen;
35778 case IX86_BUILTIN_SCATTERDIV8DF:
35779 icode = CODE_FOR_avx512f_scatterdiv8df;
35780 goto scatter_gen;
35781 case IX86_BUILTIN_SCATTERSIV16SI:
35782 icode = CODE_FOR_avx512f_scattersiv16si;
35783 goto scatter_gen;
35784 case IX86_BUILTIN_SCATTERSIV8DI:
35785 icode = CODE_FOR_avx512f_scattersiv8di;
35786 goto scatter_gen;
35787 case IX86_BUILTIN_SCATTERDIV16SI:
35788 icode = CODE_FOR_avx512f_scatterdiv16si;
35789 goto scatter_gen;
35790 case IX86_BUILTIN_SCATTERDIV8DI:
35791 icode = CODE_FOR_avx512f_scatterdiv8di;
35792 goto scatter_gen;
35793
35794 case IX86_BUILTIN_GATHERPFDPD:
35795 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35796 goto vec_prefetch_gen;
35797 case IX86_BUILTIN_GATHERPFDPS:
35798 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35799 goto vec_prefetch_gen;
35800 case IX86_BUILTIN_GATHERPFQPD:
35801 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35802 goto vec_prefetch_gen;
35803 case IX86_BUILTIN_GATHERPFQPS:
35804 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35805 goto vec_prefetch_gen;
35806 case IX86_BUILTIN_SCATTERPFDPD:
35807 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35808 goto vec_prefetch_gen;
35809 case IX86_BUILTIN_SCATTERPFDPS:
35810 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35811 goto vec_prefetch_gen;
35812 case IX86_BUILTIN_SCATTERPFQPD:
35813 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35814 goto vec_prefetch_gen;
35815 case IX86_BUILTIN_SCATTERPFQPS:
35816 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35817 goto vec_prefetch_gen;
35818
35819 gather_gen:
35820 rtx half;
35821 rtx (*gen) (rtx, rtx);
35822
35823 arg0 = CALL_EXPR_ARG (exp, 0);
35824 arg1 = CALL_EXPR_ARG (exp, 1);
35825 arg2 = CALL_EXPR_ARG (exp, 2);
35826 arg3 = CALL_EXPR_ARG (exp, 3);
35827 arg4 = CALL_EXPR_ARG (exp, 4);
35828 op0 = expand_normal (arg0);
35829 op1 = expand_normal (arg1);
35830 op2 = expand_normal (arg2);
35831 op3 = expand_normal (arg3);
35832 op4 = expand_normal (arg4);
35833 /* Note the arg order is different from the operand order. */
35834 mode0 = insn_data[icode].operand[1].mode;
35835 mode2 = insn_data[icode].operand[3].mode;
35836 mode3 = insn_data[icode].operand[4].mode;
35837 mode4 = insn_data[icode].operand[5].mode;
35838
35839 if (target == NULL_RTX
35840 || GET_MODE (target) != insn_data[icode].operand[0].mode
35841 || !insn_data[icode].operand[0].predicate (target,
35842 GET_MODE (target)))
35843 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35844 else
35845 subtarget = target;
35846
35847 switch (fcode)
35848 {
35849 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35850 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35851 half = gen_reg_rtx (V8SImode);
35852 if (!nonimmediate_operand (op2, V16SImode))
35853 op2 = copy_to_mode_reg (V16SImode, op2);
35854 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35855 op2 = half;
35856 break;
35857 case IX86_BUILTIN_GATHERALTSIV4DF:
35858 case IX86_BUILTIN_GATHERALTSIV4DI:
35859 half = gen_reg_rtx (V4SImode);
35860 if (!nonimmediate_operand (op2, V8SImode))
35861 op2 = copy_to_mode_reg (V8SImode, op2);
35862 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35863 op2 = half;
35864 break;
35865 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35866 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35867 half = gen_reg_rtx (mode0);
35868 if (mode0 == V8SFmode)
35869 gen = gen_vec_extract_lo_v16sf;
35870 else
35871 gen = gen_vec_extract_lo_v16si;
35872 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35873 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35874 emit_insn (gen (half, op0));
35875 op0 = half;
35876 if (GET_MODE (op3) != VOIDmode)
35877 {
35878 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35879 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35880 emit_insn (gen (half, op3));
35881 op3 = half;
35882 }
35883 break;
35884 case IX86_BUILTIN_GATHERALTDIV8SF:
35885 case IX86_BUILTIN_GATHERALTDIV8SI:
35886 half = gen_reg_rtx (mode0);
35887 if (mode0 == V4SFmode)
35888 gen = gen_vec_extract_lo_v8sf;
35889 else
35890 gen = gen_vec_extract_lo_v8si;
35891 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35892 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35893 emit_insn (gen (half, op0));
35894 op0 = half;
35895 if (GET_MODE (op3) != VOIDmode)
35896 {
35897 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35898 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35899 emit_insn (gen (half, op3));
35900 op3 = half;
35901 }
35902 break;
35903 default:
35904 break;
35905 }
35906
35907 /* Force memory operand only with base register here. But we
35908 don't want to do it on memory operand for other builtin
35909 functions. */
35910 op1 = ix86_zero_extend_to_Pmode (op1);
35911
35912 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35913 op0 = copy_to_mode_reg (mode0, op0);
35914 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35915 op1 = copy_to_mode_reg (Pmode, op1);
35916 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35917 op2 = copy_to_mode_reg (mode2, op2);
35918 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35919 {
35920 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35921 op3 = copy_to_mode_reg (mode3, op3);
35922 }
35923 else
35924 {
35925 op3 = copy_to_reg (op3);
35926 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35927 }
35928 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35929 {
35930 error ("the last argument must be scale 1, 2, 4, 8");
35931 return const0_rtx;
35932 }
35933
35934 /* Optimize. If mask is known to have all high bits set,
35935 replace op0 with pc_rtx to signal that the instruction
35936 overwrites the whole destination and doesn't use its
35937 previous contents. */
35938 if (optimize)
35939 {
35940 if (TREE_CODE (arg3) == INTEGER_CST)
35941 {
35942 if (integer_all_onesp (arg3))
35943 op0 = pc_rtx;
35944 }
35945 else if (TREE_CODE (arg3) == VECTOR_CST)
35946 {
35947 unsigned int negative = 0;
35948 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35949 {
35950 tree cst = VECTOR_CST_ELT (arg3, i);
35951 if (TREE_CODE (cst) == INTEGER_CST
35952 && tree_int_cst_sign_bit (cst))
35953 negative++;
35954 else if (TREE_CODE (cst) == REAL_CST
35955 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35956 negative++;
35957 }
35958 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35959 op0 = pc_rtx;
35960 }
35961 else if (TREE_CODE (arg3) == SSA_NAME
35962 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35963 {
35964 /* Recognize also when mask is like:
35965 __v2df src = _mm_setzero_pd ();
35966 __v2df mask = _mm_cmpeq_pd (src, src);
35967 or
35968 __v8sf src = _mm256_setzero_ps ();
35969 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35970 as that is a cheaper way to load all ones into
35971 a register than having to load a constant from
35972 memory. */
35973 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35974 if (is_gimple_call (def_stmt))
35975 {
35976 tree fndecl = gimple_call_fndecl (def_stmt);
35977 if (fndecl
35978 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35979 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35980 {
35981 case IX86_BUILTIN_CMPPD:
35982 case IX86_BUILTIN_CMPPS:
35983 case IX86_BUILTIN_CMPPD256:
35984 case IX86_BUILTIN_CMPPS256:
35985 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35986 break;
35987 /* FALLTHRU */
35988 case IX86_BUILTIN_CMPEQPD:
35989 case IX86_BUILTIN_CMPEQPS:
35990 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35991 && initializer_zerop (gimple_call_arg (def_stmt,
35992 1)))
35993 op0 = pc_rtx;
35994 break;
35995 default:
35996 break;
35997 }
35998 }
35999 }
36000 }
36001
36002 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36003 if (! pat)
36004 return const0_rtx;
36005 emit_insn (pat);
36006
36007 switch (fcode)
36008 {
36009 case IX86_BUILTIN_GATHER3DIV16SF:
36010 if (target == NULL_RTX)
36011 target = gen_reg_rtx (V8SFmode);
36012 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36013 break;
36014 case IX86_BUILTIN_GATHER3DIV16SI:
36015 if (target == NULL_RTX)
36016 target = gen_reg_rtx (V8SImode);
36017 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36018 break;
36019 case IX86_BUILTIN_GATHERDIV8SF:
36020 if (target == NULL_RTX)
36021 target = gen_reg_rtx (V4SFmode);
36022 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36023 break;
36024 case IX86_BUILTIN_GATHERDIV8SI:
36025 if (target == NULL_RTX)
36026 target = gen_reg_rtx (V4SImode);
36027 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36028 break;
36029 default:
36030 target = subtarget;
36031 break;
36032 }
36033 return target;
36034
36035 scatter_gen:
36036 arg0 = CALL_EXPR_ARG (exp, 0);
36037 arg1 = CALL_EXPR_ARG (exp, 1);
36038 arg2 = CALL_EXPR_ARG (exp, 2);
36039 arg3 = CALL_EXPR_ARG (exp, 3);
36040 arg4 = CALL_EXPR_ARG (exp, 4);
36041 op0 = expand_normal (arg0);
36042 op1 = expand_normal (arg1);
36043 op2 = expand_normal (arg2);
36044 op3 = expand_normal (arg3);
36045 op4 = expand_normal (arg4);
36046 mode1 = insn_data[icode].operand[1].mode;
36047 mode2 = insn_data[icode].operand[2].mode;
36048 mode3 = insn_data[icode].operand[3].mode;
36049 mode4 = insn_data[icode].operand[4].mode;
36050
36051 /* Force memory operand only with base register here. But we
36052 don't want to do it on memory operand for other builtin
36053 functions. */
36054 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36055
36056 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36057 op0 = copy_to_mode_reg (Pmode, op0);
36058
36059 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36060 {
36061 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36062 op1 = copy_to_mode_reg (mode1, op1);
36063 }
36064 else
36065 {
36066 op1 = copy_to_reg (op1);
36067 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36068 }
36069
36070 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36071 op2 = copy_to_mode_reg (mode2, op2);
36072
36073 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36074 op3 = copy_to_mode_reg (mode3, op3);
36075
36076 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36077 {
36078 error ("the last argument must be scale 1, 2, 4, 8");
36079 return const0_rtx;
36080 }
36081
36082 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36083 if (! pat)
36084 return const0_rtx;
36085
36086 emit_insn (pat);
36087 return 0;
36088
36089 vec_prefetch_gen:
36090 arg0 = CALL_EXPR_ARG (exp, 0);
36091 arg1 = CALL_EXPR_ARG (exp, 1);
36092 arg2 = CALL_EXPR_ARG (exp, 2);
36093 arg3 = CALL_EXPR_ARG (exp, 3);
36094 arg4 = CALL_EXPR_ARG (exp, 4);
36095 op0 = expand_normal (arg0);
36096 op1 = expand_normal (arg1);
36097 op2 = expand_normal (arg2);
36098 op3 = expand_normal (arg3);
36099 op4 = expand_normal (arg4);
36100 mode0 = insn_data[icode].operand[0].mode;
36101 mode1 = insn_data[icode].operand[1].mode;
36102 mode3 = insn_data[icode].operand[3].mode;
36103 mode4 = insn_data[icode].operand[4].mode;
36104
36105 if (GET_MODE (op0) == mode0
36106 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36107 {
36108 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36109 op0 = copy_to_mode_reg (mode0, op0);
36110 }
36111 else if (op0 != constm1_rtx)
36112 {
36113 op0 = copy_to_reg (op0);
36114 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36115 }
36116
36117 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36118 op1 = copy_to_mode_reg (mode1, op1);
36119
36120 /* Force memory operand only with base register here. But we
36121 don't want to do it on memory operand for other builtin
36122 functions. */
36123 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36124
36125 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36126 op2 = copy_to_mode_reg (Pmode, op2);
36127
36128 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36129 {
36130 error ("the forth argument must be scale 1, 2, 4, 8");
36131 return const0_rtx;
36132 }
36133
36134 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36135 {
36136 error ("incorrect hint operand");
36137 return const0_rtx;
36138 }
36139
36140 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36141 if (! pat)
36142 return const0_rtx;
36143
36144 emit_insn (pat);
36145
36146 return 0;
36147
36148 case IX86_BUILTIN_XABORT:
36149 icode = CODE_FOR_xabort;
36150 arg0 = CALL_EXPR_ARG (exp, 0);
36151 op0 = expand_normal (arg0);
36152 mode0 = insn_data[icode].operand[0].mode;
36153 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36154 {
36155 error ("the xabort's argument must be an 8-bit immediate");
36156 return const0_rtx;
36157 }
36158 emit_insn (gen_xabort (op0));
36159 return 0;
36160
36161 default:
36162 break;
36163 }
36164
36165 for (i = 0, d = bdesc_special_args;
36166 i < ARRAY_SIZE (bdesc_special_args);
36167 i++, d++)
36168 if (d->code == fcode)
36169 return ix86_expand_special_args_builtin (d, exp, target);
36170
36171 for (i = 0, d = bdesc_args;
36172 i < ARRAY_SIZE (bdesc_args);
36173 i++, d++)
36174 if (d->code == fcode)
36175 switch (fcode)
36176 {
36177 case IX86_BUILTIN_FABSQ:
36178 case IX86_BUILTIN_COPYSIGNQ:
36179 if (!TARGET_SSE)
36180 /* Emit a normal call if SSE isn't available. */
36181 return expand_call (exp, target, ignore);
36182 default:
36183 return ix86_expand_args_builtin (d, exp, target);
36184 }
36185
36186 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36187 if (d->code == fcode)
36188 return ix86_expand_sse_comi (d, exp, target);
36189
36190 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36191 if (d->code == fcode)
36192 return ix86_expand_round_builtin (d, exp, target);
36193
36194 for (i = 0, d = bdesc_pcmpestr;
36195 i < ARRAY_SIZE (bdesc_pcmpestr);
36196 i++, d++)
36197 if (d->code == fcode)
36198 return ix86_expand_sse_pcmpestr (d, exp, target);
36199
36200 for (i = 0, d = bdesc_pcmpistr;
36201 i < ARRAY_SIZE (bdesc_pcmpistr);
36202 i++, d++)
36203 if (d->code == fcode)
36204 return ix86_expand_sse_pcmpistr (d, exp, target);
36205
36206 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36207 if (d->code == fcode)
36208 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36209 (enum ix86_builtin_func_type)
36210 d->flag, d->comparison);
36211
36212 gcc_unreachable ();
36213 }
36214
36215 /* This returns the target-specific builtin with code CODE if
36216 current_function_decl has visibility on this builtin, which is checked
36217 using isa flags. Returns NULL_TREE otherwise. */
36218
36219 static tree ix86_get_builtin (enum ix86_builtins code)
36220 {
36221 struct cl_target_option *opts;
36222 tree target_tree = NULL_TREE;
36223
36224 /* Determine the isa flags of current_function_decl. */
36225
36226 if (current_function_decl)
36227 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36228
36229 if (target_tree == NULL)
36230 target_tree = target_option_default_node;
36231
36232 opts = TREE_TARGET_OPTION (target_tree);
36233
36234 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36235 return ix86_builtin_decl (code, true);
36236 else
36237 return NULL_TREE;
36238 }
36239
36240 /* Returns a function decl for a vectorized version of the builtin function
36241 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36242 if it is not available. */
36243
36244 static tree
36245 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36246 tree type_in)
36247 {
36248 enum machine_mode in_mode, out_mode;
36249 int in_n, out_n;
36250 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36251
36252 if (TREE_CODE (type_out) != VECTOR_TYPE
36253 || TREE_CODE (type_in) != VECTOR_TYPE
36254 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36255 return NULL_TREE;
36256
36257 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36258 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36259 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36260 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36261
36262 switch (fn)
36263 {
36264 case BUILT_IN_SQRT:
36265 if (out_mode == DFmode && in_mode == DFmode)
36266 {
36267 if (out_n == 2 && in_n == 2)
36268 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36269 else if (out_n == 4 && in_n == 4)
36270 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36271 else if (out_n == 8 && in_n == 8)
36272 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36273 }
36274 break;
36275
36276 case BUILT_IN_EXP2F:
36277 if (out_mode == SFmode && in_mode == SFmode)
36278 {
36279 if (out_n == 16 && in_n == 16)
36280 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36281 }
36282 break;
36283
36284 case BUILT_IN_SQRTF:
36285 if (out_mode == SFmode && in_mode == SFmode)
36286 {
36287 if (out_n == 4 && in_n == 4)
36288 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36289 else if (out_n == 8 && in_n == 8)
36290 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36291 else if (out_n == 16 && in_n == 16)
36292 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36293 }
36294 break;
36295
36296 case BUILT_IN_IFLOOR:
36297 case BUILT_IN_LFLOOR:
36298 case BUILT_IN_LLFLOOR:
36299 /* The round insn does not trap on denormals. */
36300 if (flag_trapping_math || !TARGET_ROUND)
36301 break;
36302
36303 if (out_mode == SImode && in_mode == DFmode)
36304 {
36305 if (out_n == 4 && in_n == 2)
36306 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36307 else if (out_n == 8 && in_n == 4)
36308 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36309 else if (out_n == 16 && in_n == 8)
36310 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36311 }
36312 break;
36313
36314 case BUILT_IN_IFLOORF:
36315 case BUILT_IN_LFLOORF:
36316 case BUILT_IN_LLFLOORF:
36317 /* The round insn does not trap on denormals. */
36318 if (flag_trapping_math || !TARGET_ROUND)
36319 break;
36320
36321 if (out_mode == SImode && in_mode == SFmode)
36322 {
36323 if (out_n == 4 && in_n == 4)
36324 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36325 else if (out_n == 8 && in_n == 8)
36326 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36327 }
36328 break;
36329
36330 case BUILT_IN_ICEIL:
36331 case BUILT_IN_LCEIL:
36332 case BUILT_IN_LLCEIL:
36333 /* The round insn does not trap on denormals. */
36334 if (flag_trapping_math || !TARGET_ROUND)
36335 break;
36336
36337 if (out_mode == SImode && in_mode == DFmode)
36338 {
36339 if (out_n == 4 && in_n == 2)
36340 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36341 else if (out_n == 8 && in_n == 4)
36342 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36343 else if (out_n == 16 && in_n == 8)
36344 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36345 }
36346 break;
36347
36348 case BUILT_IN_ICEILF:
36349 case BUILT_IN_LCEILF:
36350 case BUILT_IN_LLCEILF:
36351 /* The round insn does not trap on denormals. */
36352 if (flag_trapping_math || !TARGET_ROUND)
36353 break;
36354
36355 if (out_mode == SImode && in_mode == SFmode)
36356 {
36357 if (out_n == 4 && in_n == 4)
36358 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36359 else if (out_n == 8 && in_n == 8)
36360 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36361 }
36362 break;
36363
36364 case BUILT_IN_IRINT:
36365 case BUILT_IN_LRINT:
36366 case BUILT_IN_LLRINT:
36367 if (out_mode == SImode && in_mode == DFmode)
36368 {
36369 if (out_n == 4 && in_n == 2)
36370 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36371 else if (out_n == 8 && in_n == 4)
36372 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36373 }
36374 break;
36375
36376 case BUILT_IN_IRINTF:
36377 case BUILT_IN_LRINTF:
36378 case BUILT_IN_LLRINTF:
36379 if (out_mode == SImode && in_mode == SFmode)
36380 {
36381 if (out_n == 4 && in_n == 4)
36382 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36383 else if (out_n == 8 && in_n == 8)
36384 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36385 }
36386 break;
36387
36388 case BUILT_IN_IROUND:
36389 case BUILT_IN_LROUND:
36390 case BUILT_IN_LLROUND:
36391 /* The round insn does not trap on denormals. */
36392 if (flag_trapping_math || !TARGET_ROUND)
36393 break;
36394
36395 if (out_mode == SImode && in_mode == DFmode)
36396 {
36397 if (out_n == 4 && in_n == 2)
36398 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36399 else if (out_n == 8 && in_n == 4)
36400 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36401 else if (out_n == 16 && in_n == 8)
36402 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36403 }
36404 break;
36405
36406 case BUILT_IN_IROUNDF:
36407 case BUILT_IN_LROUNDF:
36408 case BUILT_IN_LLROUNDF:
36409 /* The round insn does not trap on denormals. */
36410 if (flag_trapping_math || !TARGET_ROUND)
36411 break;
36412
36413 if (out_mode == SImode && in_mode == SFmode)
36414 {
36415 if (out_n == 4 && in_n == 4)
36416 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36417 else if (out_n == 8 && in_n == 8)
36418 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36419 }
36420 break;
36421
36422 case BUILT_IN_COPYSIGN:
36423 if (out_mode == DFmode && in_mode == DFmode)
36424 {
36425 if (out_n == 2 && in_n == 2)
36426 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36427 else if (out_n == 4 && in_n == 4)
36428 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36429 else if (out_n == 8 && in_n == 8)
36430 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36431 }
36432 break;
36433
36434 case BUILT_IN_COPYSIGNF:
36435 if (out_mode == SFmode && in_mode == SFmode)
36436 {
36437 if (out_n == 4 && in_n == 4)
36438 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36439 else if (out_n == 8 && in_n == 8)
36440 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36441 else if (out_n == 16 && in_n == 16)
36442 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36443 }
36444 break;
36445
36446 case BUILT_IN_FLOOR:
36447 /* The round insn does not trap on denormals. */
36448 if (flag_trapping_math || !TARGET_ROUND)
36449 break;
36450
36451 if (out_mode == DFmode && in_mode == DFmode)
36452 {
36453 if (out_n == 2 && in_n == 2)
36454 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36455 else if (out_n == 4 && in_n == 4)
36456 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36457 }
36458 break;
36459
36460 case BUILT_IN_FLOORF:
36461 /* The round insn does not trap on denormals. */
36462 if (flag_trapping_math || !TARGET_ROUND)
36463 break;
36464
36465 if (out_mode == SFmode && in_mode == SFmode)
36466 {
36467 if (out_n == 4 && in_n == 4)
36468 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36469 else if (out_n == 8 && in_n == 8)
36470 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36471 }
36472 break;
36473
36474 case BUILT_IN_CEIL:
36475 /* The round insn does not trap on denormals. */
36476 if (flag_trapping_math || !TARGET_ROUND)
36477 break;
36478
36479 if (out_mode == DFmode && in_mode == DFmode)
36480 {
36481 if (out_n == 2 && in_n == 2)
36482 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36483 else if (out_n == 4 && in_n == 4)
36484 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36485 }
36486 break;
36487
36488 case BUILT_IN_CEILF:
36489 /* The round insn does not trap on denormals. */
36490 if (flag_trapping_math || !TARGET_ROUND)
36491 break;
36492
36493 if (out_mode == SFmode && in_mode == SFmode)
36494 {
36495 if (out_n == 4 && in_n == 4)
36496 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36497 else if (out_n == 8 && in_n == 8)
36498 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36499 }
36500 break;
36501
36502 case BUILT_IN_TRUNC:
36503 /* The round insn does not trap on denormals. */
36504 if (flag_trapping_math || !TARGET_ROUND)
36505 break;
36506
36507 if (out_mode == DFmode && in_mode == DFmode)
36508 {
36509 if (out_n == 2 && in_n == 2)
36510 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36511 else if (out_n == 4 && in_n == 4)
36512 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36513 }
36514 break;
36515
36516 case BUILT_IN_TRUNCF:
36517 /* The round insn does not trap on denormals. */
36518 if (flag_trapping_math || !TARGET_ROUND)
36519 break;
36520
36521 if (out_mode == SFmode && in_mode == SFmode)
36522 {
36523 if (out_n == 4 && in_n == 4)
36524 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36525 else if (out_n == 8 && in_n == 8)
36526 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36527 }
36528 break;
36529
36530 case BUILT_IN_RINT:
36531 /* The round insn does not trap on denormals. */
36532 if (flag_trapping_math || !TARGET_ROUND)
36533 break;
36534
36535 if (out_mode == DFmode && in_mode == DFmode)
36536 {
36537 if (out_n == 2 && in_n == 2)
36538 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36539 else if (out_n == 4 && in_n == 4)
36540 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36541 }
36542 break;
36543
36544 case BUILT_IN_RINTF:
36545 /* The round insn does not trap on denormals. */
36546 if (flag_trapping_math || !TARGET_ROUND)
36547 break;
36548
36549 if (out_mode == SFmode && in_mode == SFmode)
36550 {
36551 if (out_n == 4 && in_n == 4)
36552 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36553 else if (out_n == 8 && in_n == 8)
36554 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36555 }
36556 break;
36557
36558 case BUILT_IN_ROUND:
36559 /* The round insn does not trap on denormals. */
36560 if (flag_trapping_math || !TARGET_ROUND)
36561 break;
36562
36563 if (out_mode == DFmode && in_mode == DFmode)
36564 {
36565 if (out_n == 2 && in_n == 2)
36566 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36567 else if (out_n == 4 && in_n == 4)
36568 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36569 }
36570 break;
36571
36572 case BUILT_IN_ROUNDF:
36573 /* The round insn does not trap on denormals. */
36574 if (flag_trapping_math || !TARGET_ROUND)
36575 break;
36576
36577 if (out_mode == SFmode && in_mode == SFmode)
36578 {
36579 if (out_n == 4 && in_n == 4)
36580 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36581 else if (out_n == 8 && in_n == 8)
36582 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36583 }
36584 break;
36585
36586 case BUILT_IN_FMA:
36587 if (out_mode == DFmode && in_mode == DFmode)
36588 {
36589 if (out_n == 2 && in_n == 2)
36590 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36591 if (out_n == 4 && in_n == 4)
36592 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36593 }
36594 break;
36595
36596 case BUILT_IN_FMAF:
36597 if (out_mode == SFmode && in_mode == SFmode)
36598 {
36599 if (out_n == 4 && in_n == 4)
36600 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36601 if (out_n == 8 && in_n == 8)
36602 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36603 }
36604 break;
36605
36606 default:
36607 break;
36608 }
36609
36610 /* Dispatch to a handler for a vectorization library. */
36611 if (ix86_veclib_handler)
36612 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36613 type_in);
36614
36615 return NULL_TREE;
36616 }
36617
36618 /* Handler for an SVML-style interface to
36619 a library with vectorized intrinsics. */
36620
36621 static tree
36622 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36623 {
36624 char name[20];
36625 tree fntype, new_fndecl, args;
36626 unsigned arity;
36627 const char *bname;
36628 enum machine_mode el_mode, in_mode;
36629 int n, in_n;
36630
36631 /* The SVML is suitable for unsafe math only. */
36632 if (!flag_unsafe_math_optimizations)
36633 return NULL_TREE;
36634
36635 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36636 n = TYPE_VECTOR_SUBPARTS (type_out);
36637 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36638 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36639 if (el_mode != in_mode
36640 || n != in_n)
36641 return NULL_TREE;
36642
36643 switch (fn)
36644 {
36645 case BUILT_IN_EXP:
36646 case BUILT_IN_LOG:
36647 case BUILT_IN_LOG10:
36648 case BUILT_IN_POW:
36649 case BUILT_IN_TANH:
36650 case BUILT_IN_TAN:
36651 case BUILT_IN_ATAN:
36652 case BUILT_IN_ATAN2:
36653 case BUILT_IN_ATANH:
36654 case BUILT_IN_CBRT:
36655 case BUILT_IN_SINH:
36656 case BUILT_IN_SIN:
36657 case BUILT_IN_ASINH:
36658 case BUILT_IN_ASIN:
36659 case BUILT_IN_COSH:
36660 case BUILT_IN_COS:
36661 case BUILT_IN_ACOSH:
36662 case BUILT_IN_ACOS:
36663 if (el_mode != DFmode || n != 2)
36664 return NULL_TREE;
36665 break;
36666
36667 case BUILT_IN_EXPF:
36668 case BUILT_IN_LOGF:
36669 case BUILT_IN_LOG10F:
36670 case BUILT_IN_POWF:
36671 case BUILT_IN_TANHF:
36672 case BUILT_IN_TANF:
36673 case BUILT_IN_ATANF:
36674 case BUILT_IN_ATAN2F:
36675 case BUILT_IN_ATANHF:
36676 case BUILT_IN_CBRTF:
36677 case BUILT_IN_SINHF:
36678 case BUILT_IN_SINF:
36679 case BUILT_IN_ASINHF:
36680 case BUILT_IN_ASINF:
36681 case BUILT_IN_COSHF:
36682 case BUILT_IN_COSF:
36683 case BUILT_IN_ACOSHF:
36684 case BUILT_IN_ACOSF:
36685 if (el_mode != SFmode || n != 4)
36686 return NULL_TREE;
36687 break;
36688
36689 default:
36690 return NULL_TREE;
36691 }
36692
36693 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36694
36695 if (fn == BUILT_IN_LOGF)
36696 strcpy (name, "vmlsLn4");
36697 else if (fn == BUILT_IN_LOG)
36698 strcpy (name, "vmldLn2");
36699 else if (n == 4)
36700 {
36701 sprintf (name, "vmls%s", bname+10);
36702 name[strlen (name)-1] = '4';
36703 }
36704 else
36705 sprintf (name, "vmld%s2", bname+10);
36706
36707 /* Convert to uppercase. */
36708 name[4] &= ~0x20;
36709
36710 arity = 0;
36711 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36712 args;
36713 args = TREE_CHAIN (args))
36714 arity++;
36715
36716 if (arity == 1)
36717 fntype = build_function_type_list (type_out, type_in, NULL);
36718 else
36719 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36720
36721 /* Build a function declaration for the vectorized function. */
36722 new_fndecl = build_decl (BUILTINS_LOCATION,
36723 FUNCTION_DECL, get_identifier (name), fntype);
36724 TREE_PUBLIC (new_fndecl) = 1;
36725 DECL_EXTERNAL (new_fndecl) = 1;
36726 DECL_IS_NOVOPS (new_fndecl) = 1;
36727 TREE_READONLY (new_fndecl) = 1;
36728
36729 return new_fndecl;
36730 }
36731
36732 /* Handler for an ACML-style interface to
36733 a library with vectorized intrinsics. */
36734
36735 static tree
36736 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36737 {
36738 char name[20] = "__vr.._";
36739 tree fntype, new_fndecl, args;
36740 unsigned arity;
36741 const char *bname;
36742 enum machine_mode el_mode, in_mode;
36743 int n, in_n;
36744
36745 /* The ACML is 64bits only and suitable for unsafe math only as
36746 it does not correctly support parts of IEEE with the required
36747 precision such as denormals. */
36748 if (!TARGET_64BIT
36749 || !flag_unsafe_math_optimizations)
36750 return NULL_TREE;
36751
36752 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36753 n = TYPE_VECTOR_SUBPARTS (type_out);
36754 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36755 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36756 if (el_mode != in_mode
36757 || n != in_n)
36758 return NULL_TREE;
36759
36760 switch (fn)
36761 {
36762 case BUILT_IN_SIN:
36763 case BUILT_IN_COS:
36764 case BUILT_IN_EXP:
36765 case BUILT_IN_LOG:
36766 case BUILT_IN_LOG2:
36767 case BUILT_IN_LOG10:
36768 name[4] = 'd';
36769 name[5] = '2';
36770 if (el_mode != DFmode
36771 || n != 2)
36772 return NULL_TREE;
36773 break;
36774
36775 case BUILT_IN_SINF:
36776 case BUILT_IN_COSF:
36777 case BUILT_IN_EXPF:
36778 case BUILT_IN_POWF:
36779 case BUILT_IN_LOGF:
36780 case BUILT_IN_LOG2F:
36781 case BUILT_IN_LOG10F:
36782 name[4] = 's';
36783 name[5] = '4';
36784 if (el_mode != SFmode
36785 || n != 4)
36786 return NULL_TREE;
36787 break;
36788
36789 default:
36790 return NULL_TREE;
36791 }
36792
36793 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36794 sprintf (name + 7, "%s", bname+10);
36795
36796 arity = 0;
36797 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36798 args;
36799 args = TREE_CHAIN (args))
36800 arity++;
36801
36802 if (arity == 1)
36803 fntype = build_function_type_list (type_out, type_in, NULL);
36804 else
36805 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36806
36807 /* Build a function declaration for the vectorized function. */
36808 new_fndecl = build_decl (BUILTINS_LOCATION,
36809 FUNCTION_DECL, get_identifier (name), fntype);
36810 TREE_PUBLIC (new_fndecl) = 1;
36811 DECL_EXTERNAL (new_fndecl) = 1;
36812 DECL_IS_NOVOPS (new_fndecl) = 1;
36813 TREE_READONLY (new_fndecl) = 1;
36814
36815 return new_fndecl;
36816 }
36817
36818 /* Returns a decl of a function that implements gather load with
36819 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36820 Return NULL_TREE if it is not available. */
36821
36822 static tree
36823 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36824 const_tree index_type, int scale)
36825 {
36826 bool si;
36827 enum ix86_builtins code;
36828
36829 if (! TARGET_AVX2)
36830 return NULL_TREE;
36831
36832 if ((TREE_CODE (index_type) != INTEGER_TYPE
36833 && !POINTER_TYPE_P (index_type))
36834 || (TYPE_MODE (index_type) != SImode
36835 && TYPE_MODE (index_type) != DImode))
36836 return NULL_TREE;
36837
36838 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36839 return NULL_TREE;
36840
36841 /* v*gather* insn sign extends index to pointer mode. */
36842 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36843 && TYPE_UNSIGNED (index_type))
36844 return NULL_TREE;
36845
36846 if (scale <= 0
36847 || scale > 8
36848 || (scale & (scale - 1)) != 0)
36849 return NULL_TREE;
36850
36851 si = TYPE_MODE (index_type) == SImode;
36852 switch (TYPE_MODE (mem_vectype))
36853 {
36854 case V2DFmode:
36855 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36856 break;
36857 case V4DFmode:
36858 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36859 break;
36860 case V2DImode:
36861 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36862 break;
36863 case V4DImode:
36864 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36865 break;
36866 case V4SFmode:
36867 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36868 break;
36869 case V8SFmode:
36870 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36871 break;
36872 case V4SImode:
36873 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36874 break;
36875 case V8SImode:
36876 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36877 break;
36878 case V8DFmode:
36879 if (TARGET_AVX512F)
36880 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36881 else
36882 return NULL_TREE;
36883 break;
36884 case V8DImode:
36885 if (TARGET_AVX512F)
36886 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36887 else
36888 return NULL_TREE;
36889 break;
36890 case V16SFmode:
36891 if (TARGET_AVX512F)
36892 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36893 else
36894 return NULL_TREE;
36895 break;
36896 case V16SImode:
36897 if (TARGET_AVX512F)
36898 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36899 else
36900 return NULL_TREE;
36901 break;
36902 default:
36903 return NULL_TREE;
36904 }
36905
36906 return ix86_get_builtin (code);
36907 }
36908
36909 /* Returns a code for a target-specific builtin that implements
36910 reciprocal of the function, or NULL_TREE if not available. */
36911
36912 static tree
36913 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36914 bool sqrt ATTRIBUTE_UNUSED)
36915 {
36916 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36917 && flag_finite_math_only && !flag_trapping_math
36918 && flag_unsafe_math_optimizations))
36919 return NULL_TREE;
36920
36921 if (md_fn)
36922 /* Machine dependent builtins. */
36923 switch (fn)
36924 {
36925 /* Vectorized version of sqrt to rsqrt conversion. */
36926 case IX86_BUILTIN_SQRTPS_NR:
36927 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36928
36929 case IX86_BUILTIN_SQRTPS_NR256:
36930 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36931
36932 default:
36933 return NULL_TREE;
36934 }
36935 else
36936 /* Normal builtins. */
36937 switch (fn)
36938 {
36939 /* Sqrt to rsqrt conversion. */
36940 case BUILT_IN_SQRTF:
36941 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36942
36943 default:
36944 return NULL_TREE;
36945 }
36946 }
36947 \f
36948 /* Helper for avx_vpermilps256_operand et al. This is also used by
36949 the expansion functions to turn the parallel back into a mask.
36950 The return value is 0 for no match and the imm8+1 for a match. */
36951
36952 int
36953 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36954 {
36955 unsigned i, nelt = GET_MODE_NUNITS (mode);
36956 unsigned mask = 0;
36957 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36958
36959 if (XVECLEN (par, 0) != (int) nelt)
36960 return 0;
36961
36962 /* Validate that all of the elements are constants, and not totally
36963 out of range. Copy the data into an integral array to make the
36964 subsequent checks easier. */
36965 for (i = 0; i < nelt; ++i)
36966 {
36967 rtx er = XVECEXP (par, 0, i);
36968 unsigned HOST_WIDE_INT ei;
36969
36970 if (!CONST_INT_P (er))
36971 return 0;
36972 ei = INTVAL (er);
36973 if (ei >= nelt)
36974 return 0;
36975 ipar[i] = ei;
36976 }
36977
36978 switch (mode)
36979 {
36980 case V8DFmode:
36981 /* In the 512-bit DFmode case, we can only move elements within
36982 a 128-bit lane. First fill the second part of the mask,
36983 then fallthru. */
36984 for (i = 4; i < 6; ++i)
36985 {
36986 if (ipar[i] < 4 || ipar[i] >= 6)
36987 return 0;
36988 mask |= (ipar[i] - 4) << i;
36989 }
36990 for (i = 6; i < 8; ++i)
36991 {
36992 if (ipar[i] < 6)
36993 return 0;
36994 mask |= (ipar[i] - 6) << i;
36995 }
36996 /* FALLTHRU */
36997
36998 case V4DFmode:
36999 /* In the 256-bit DFmode case, we can only move elements within
37000 a 128-bit lane. */
37001 for (i = 0; i < 2; ++i)
37002 {
37003 if (ipar[i] >= 2)
37004 return 0;
37005 mask |= ipar[i] << i;
37006 }
37007 for (i = 2; i < 4; ++i)
37008 {
37009 if (ipar[i] < 2)
37010 return 0;
37011 mask |= (ipar[i] - 2) << i;
37012 }
37013 break;
37014
37015 case V16SFmode:
37016 /* In 512 bit SFmode case, permutation in the upper 256 bits
37017 must mirror the permutation in the lower 256-bits. */
37018 for (i = 0; i < 8; ++i)
37019 if (ipar[i] + 8 != ipar[i + 8])
37020 return 0;
37021 /* FALLTHRU */
37022
37023 case V8SFmode:
37024 /* In 256 bit SFmode case, we have full freedom of
37025 movement within the low 128-bit lane, but the high 128-bit
37026 lane must mirror the exact same pattern. */
37027 for (i = 0; i < 4; ++i)
37028 if (ipar[i] + 4 != ipar[i + 4])
37029 return 0;
37030 nelt = 4;
37031 /* FALLTHRU */
37032
37033 case V2DFmode:
37034 case V4SFmode:
37035 /* In the 128-bit case, we've full freedom in the placement of
37036 the elements from the source operand. */
37037 for (i = 0; i < nelt; ++i)
37038 mask |= ipar[i] << (i * (nelt / 2));
37039 break;
37040
37041 default:
37042 gcc_unreachable ();
37043 }
37044
37045 /* Make sure success has a non-zero value by adding one. */
37046 return mask + 1;
37047 }
37048
37049 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37050 the expansion functions to turn the parallel back into a mask.
37051 The return value is 0 for no match and the imm8+1 for a match. */
37052
37053 int
37054 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37055 {
37056 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37057 unsigned mask = 0;
37058 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37059
37060 if (XVECLEN (par, 0) != (int) nelt)
37061 return 0;
37062
37063 /* Validate that all of the elements are constants, and not totally
37064 out of range. Copy the data into an integral array to make the
37065 subsequent checks easier. */
37066 for (i = 0; i < nelt; ++i)
37067 {
37068 rtx er = XVECEXP (par, 0, i);
37069 unsigned HOST_WIDE_INT ei;
37070
37071 if (!CONST_INT_P (er))
37072 return 0;
37073 ei = INTVAL (er);
37074 if (ei >= 2 * nelt)
37075 return 0;
37076 ipar[i] = ei;
37077 }
37078
37079 /* Validate that the halves of the permute are halves. */
37080 for (i = 0; i < nelt2 - 1; ++i)
37081 if (ipar[i] + 1 != ipar[i + 1])
37082 return 0;
37083 for (i = nelt2; i < nelt - 1; ++i)
37084 if (ipar[i] + 1 != ipar[i + 1])
37085 return 0;
37086
37087 /* Reconstruct the mask. */
37088 for (i = 0; i < 2; ++i)
37089 {
37090 unsigned e = ipar[i * nelt2];
37091 if (e % nelt2)
37092 return 0;
37093 e /= nelt2;
37094 mask |= e << (i * 4);
37095 }
37096
37097 /* Make sure success has a non-zero value by adding one. */
37098 return mask + 1;
37099 }
37100 \f
37101 /* Return a register priority for hard reg REGNO. */
37102 static int
37103 ix86_register_priority (int hard_regno)
37104 {
37105 /* ebp and r13 as the base always wants a displacement, r12 as the
37106 base always wants an index. So discourage their usage in an
37107 address. */
37108 if (hard_regno == R12_REG || hard_regno == R13_REG)
37109 return 0;
37110 if (hard_regno == BP_REG)
37111 return 1;
37112 /* New x86-64 int registers result in bigger code size. Discourage
37113 them. */
37114 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37115 return 2;
37116 /* New x86-64 SSE registers result in bigger code size. Discourage
37117 them. */
37118 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37119 return 2;
37120 /* Usage of AX register results in smaller code. Prefer it. */
37121 if (hard_regno == 0)
37122 return 4;
37123 return 3;
37124 }
37125
37126 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37127
37128 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37129 QImode must go into class Q_REGS.
37130 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37131 movdf to do mem-to-mem moves through integer regs. */
37132
37133 static reg_class_t
37134 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37135 {
37136 enum machine_mode mode = GET_MODE (x);
37137
37138 /* We're only allowed to return a subclass of CLASS. Many of the
37139 following checks fail for NO_REGS, so eliminate that early. */
37140 if (regclass == NO_REGS)
37141 return NO_REGS;
37142
37143 /* All classes can load zeros. */
37144 if (x == CONST0_RTX (mode))
37145 return regclass;
37146
37147 /* Force constants into memory if we are loading a (nonzero) constant into
37148 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37149 instructions to load from a constant. */
37150 if (CONSTANT_P (x)
37151 && (MAYBE_MMX_CLASS_P (regclass)
37152 || MAYBE_SSE_CLASS_P (regclass)
37153 || MAYBE_MASK_CLASS_P (regclass)))
37154 return NO_REGS;
37155
37156 /* Prefer SSE regs only, if we can use them for math. */
37157 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37158 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37159
37160 /* Floating-point constants need more complex checks. */
37161 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37162 {
37163 /* General regs can load everything. */
37164 if (reg_class_subset_p (regclass, GENERAL_REGS))
37165 return regclass;
37166
37167 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37168 zero above. We only want to wind up preferring 80387 registers if
37169 we plan on doing computation with them. */
37170 if (TARGET_80387
37171 && standard_80387_constant_p (x) > 0)
37172 {
37173 /* Limit class to non-sse. */
37174 if (regclass == FLOAT_SSE_REGS)
37175 return FLOAT_REGS;
37176 if (regclass == FP_TOP_SSE_REGS)
37177 return FP_TOP_REG;
37178 if (regclass == FP_SECOND_SSE_REGS)
37179 return FP_SECOND_REG;
37180 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37181 return regclass;
37182 }
37183
37184 return NO_REGS;
37185 }
37186
37187 /* Generally when we see PLUS here, it's the function invariant
37188 (plus soft-fp const_int). Which can only be computed into general
37189 regs. */
37190 if (GET_CODE (x) == PLUS)
37191 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37192
37193 /* QImode constants are easy to load, but non-constant QImode data
37194 must go into Q_REGS. */
37195 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37196 {
37197 if (reg_class_subset_p (regclass, Q_REGS))
37198 return regclass;
37199 if (reg_class_subset_p (Q_REGS, regclass))
37200 return Q_REGS;
37201 return NO_REGS;
37202 }
37203
37204 return regclass;
37205 }
37206
37207 /* Discourage putting floating-point values in SSE registers unless
37208 SSE math is being used, and likewise for the 387 registers. */
37209 static reg_class_t
37210 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37211 {
37212 enum machine_mode mode = GET_MODE (x);
37213
37214 /* Restrict the output reload class to the register bank that we are doing
37215 math on. If we would like not to return a subset of CLASS, reject this
37216 alternative: if reload cannot do this, it will still use its choice. */
37217 mode = GET_MODE (x);
37218 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37219 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37220
37221 if (X87_FLOAT_MODE_P (mode))
37222 {
37223 if (regclass == FP_TOP_SSE_REGS)
37224 return FP_TOP_REG;
37225 else if (regclass == FP_SECOND_SSE_REGS)
37226 return FP_SECOND_REG;
37227 else
37228 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37229 }
37230
37231 return regclass;
37232 }
37233
37234 static reg_class_t
37235 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37236 enum machine_mode mode, secondary_reload_info *sri)
37237 {
37238 /* Double-word spills from general registers to non-offsettable memory
37239 references (zero-extended addresses) require special handling. */
37240 if (TARGET_64BIT
37241 && MEM_P (x)
37242 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37243 && INTEGER_CLASS_P (rclass)
37244 && !offsettable_memref_p (x))
37245 {
37246 sri->icode = (in_p
37247 ? CODE_FOR_reload_noff_load
37248 : CODE_FOR_reload_noff_store);
37249 /* Add the cost of moving address to a temporary. */
37250 sri->extra_cost = 1;
37251
37252 return NO_REGS;
37253 }
37254
37255 /* QImode spills from non-QI registers require
37256 intermediate register on 32bit targets. */
37257 if (mode == QImode
37258 && (MAYBE_MASK_CLASS_P (rclass)
37259 || (!TARGET_64BIT && !in_p
37260 && INTEGER_CLASS_P (rclass)
37261 && MAYBE_NON_Q_CLASS_P (rclass))))
37262 {
37263 int regno;
37264
37265 if (REG_P (x))
37266 regno = REGNO (x);
37267 else
37268 regno = -1;
37269
37270 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37271 regno = true_regnum (x);
37272
37273 /* Return Q_REGS if the operand is in memory. */
37274 if (regno == -1)
37275 return Q_REGS;
37276 }
37277
37278 /* This condition handles corner case where an expression involving
37279 pointers gets vectorized. We're trying to use the address of a
37280 stack slot as a vector initializer.
37281
37282 (set (reg:V2DI 74 [ vect_cst_.2 ])
37283 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37284
37285 Eventually frame gets turned into sp+offset like this:
37286
37287 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37288 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37289 (const_int 392 [0x188]))))
37290
37291 That later gets turned into:
37292
37293 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37294 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37295 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37296
37297 We'll have the following reload recorded:
37298
37299 Reload 0: reload_in (DI) =
37300 (plus:DI (reg/f:DI 7 sp)
37301 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37302 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37303 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37304 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37305 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37306 reload_reg_rtx: (reg:V2DI 22 xmm1)
37307
37308 Which isn't going to work since SSE instructions can't handle scalar
37309 additions. Returning GENERAL_REGS forces the addition into integer
37310 register and reload can handle subsequent reloads without problems. */
37311
37312 if (in_p && GET_CODE (x) == PLUS
37313 && SSE_CLASS_P (rclass)
37314 && SCALAR_INT_MODE_P (mode))
37315 return GENERAL_REGS;
37316
37317 return NO_REGS;
37318 }
37319
37320 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37321
37322 static bool
37323 ix86_class_likely_spilled_p (reg_class_t rclass)
37324 {
37325 switch (rclass)
37326 {
37327 case AREG:
37328 case DREG:
37329 case CREG:
37330 case BREG:
37331 case AD_REGS:
37332 case SIREG:
37333 case DIREG:
37334 case SSE_FIRST_REG:
37335 case FP_TOP_REG:
37336 case FP_SECOND_REG:
37337 return true;
37338
37339 default:
37340 break;
37341 }
37342
37343 return false;
37344 }
37345
37346 /* If we are copying between general and FP registers, we need a memory
37347 location. The same is true for SSE and MMX registers.
37348
37349 To optimize register_move_cost performance, allow inline variant.
37350
37351 The macro can't work reliably when one of the CLASSES is class containing
37352 registers from multiple units (SSE, MMX, integer). We avoid this by never
37353 combining those units in single alternative in the machine description.
37354 Ensure that this constraint holds to avoid unexpected surprises.
37355
37356 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37357 enforce these sanity checks. */
37358
37359 static inline bool
37360 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37361 enum machine_mode mode, int strict)
37362 {
37363 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37364 return false;
37365 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37366 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37367 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37368 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37369 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37370 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37371 {
37372 gcc_assert (!strict || lra_in_progress);
37373 return true;
37374 }
37375
37376 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37377 return true;
37378
37379 /* ??? This is a lie. We do have moves between mmx/general, and for
37380 mmx/sse2. But by saying we need secondary memory we discourage the
37381 register allocator from using the mmx registers unless needed. */
37382 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37383 return true;
37384
37385 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37386 {
37387 /* SSE1 doesn't have any direct moves from other classes. */
37388 if (!TARGET_SSE2)
37389 return true;
37390
37391 /* If the target says that inter-unit moves are more expensive
37392 than moving through memory, then don't generate them. */
37393 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37394 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37395 return true;
37396
37397 /* Between SSE and general, we have moves no larger than word size. */
37398 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37399 return true;
37400 }
37401
37402 return false;
37403 }
37404
37405 bool
37406 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37407 enum machine_mode mode, int strict)
37408 {
37409 return inline_secondary_memory_needed (class1, class2, mode, strict);
37410 }
37411
37412 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37413
37414 On the 80386, this is the size of MODE in words,
37415 except in the FP regs, where a single reg is always enough. */
37416
37417 static unsigned char
37418 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37419 {
37420 if (MAYBE_INTEGER_CLASS_P (rclass))
37421 {
37422 if (mode == XFmode)
37423 return (TARGET_64BIT ? 2 : 3);
37424 else if (mode == XCmode)
37425 return (TARGET_64BIT ? 4 : 6);
37426 else
37427 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37428 }
37429 else
37430 {
37431 if (COMPLEX_MODE_P (mode))
37432 return 2;
37433 else
37434 return 1;
37435 }
37436 }
37437
37438 /* Return true if the registers in CLASS cannot represent the change from
37439 modes FROM to TO. */
37440
37441 bool
37442 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37443 enum reg_class regclass)
37444 {
37445 if (from == to)
37446 return false;
37447
37448 /* x87 registers can't do subreg at all, as all values are reformatted
37449 to extended precision. */
37450 if (MAYBE_FLOAT_CLASS_P (regclass))
37451 return true;
37452
37453 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37454 {
37455 /* Vector registers do not support QI or HImode loads. If we don't
37456 disallow a change to these modes, reload will assume it's ok to
37457 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37458 the vec_dupv4hi pattern. */
37459 if (GET_MODE_SIZE (from) < 4)
37460 return true;
37461
37462 /* Vector registers do not support subreg with nonzero offsets, which
37463 are otherwise valid for integer registers. Since we can't see
37464 whether we have a nonzero offset from here, prohibit all
37465 nonparadoxical subregs changing size. */
37466 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37467 return true;
37468 }
37469
37470 return false;
37471 }
37472
37473 /* Return the cost of moving data of mode M between a
37474 register and memory. A value of 2 is the default; this cost is
37475 relative to those in `REGISTER_MOVE_COST'.
37476
37477 This function is used extensively by register_move_cost that is used to
37478 build tables at startup. Make it inline in this case.
37479 When IN is 2, return maximum of in and out move cost.
37480
37481 If moving between registers and memory is more expensive than
37482 between two registers, you should define this macro to express the
37483 relative cost.
37484
37485 Model also increased moving costs of QImode registers in non
37486 Q_REGS classes.
37487 */
37488 static inline int
37489 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37490 int in)
37491 {
37492 int cost;
37493 if (FLOAT_CLASS_P (regclass))
37494 {
37495 int index;
37496 switch (mode)
37497 {
37498 case SFmode:
37499 index = 0;
37500 break;
37501 case DFmode:
37502 index = 1;
37503 break;
37504 case XFmode:
37505 index = 2;
37506 break;
37507 default:
37508 return 100;
37509 }
37510 if (in == 2)
37511 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37512 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37513 }
37514 if (SSE_CLASS_P (regclass))
37515 {
37516 int index;
37517 switch (GET_MODE_SIZE (mode))
37518 {
37519 case 4:
37520 index = 0;
37521 break;
37522 case 8:
37523 index = 1;
37524 break;
37525 case 16:
37526 index = 2;
37527 break;
37528 default:
37529 return 100;
37530 }
37531 if (in == 2)
37532 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37533 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37534 }
37535 if (MMX_CLASS_P (regclass))
37536 {
37537 int index;
37538 switch (GET_MODE_SIZE (mode))
37539 {
37540 case 4:
37541 index = 0;
37542 break;
37543 case 8:
37544 index = 1;
37545 break;
37546 default:
37547 return 100;
37548 }
37549 if (in)
37550 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37551 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37552 }
37553 switch (GET_MODE_SIZE (mode))
37554 {
37555 case 1:
37556 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37557 {
37558 if (!in)
37559 return ix86_cost->int_store[0];
37560 if (TARGET_PARTIAL_REG_DEPENDENCY
37561 && optimize_function_for_speed_p (cfun))
37562 cost = ix86_cost->movzbl_load;
37563 else
37564 cost = ix86_cost->int_load[0];
37565 if (in == 2)
37566 return MAX (cost, ix86_cost->int_store[0]);
37567 return cost;
37568 }
37569 else
37570 {
37571 if (in == 2)
37572 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37573 if (in)
37574 return ix86_cost->movzbl_load;
37575 else
37576 return ix86_cost->int_store[0] + 4;
37577 }
37578 break;
37579 case 2:
37580 if (in == 2)
37581 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37582 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37583 default:
37584 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37585 if (mode == TFmode)
37586 mode = XFmode;
37587 if (in == 2)
37588 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37589 else if (in)
37590 cost = ix86_cost->int_load[2];
37591 else
37592 cost = ix86_cost->int_store[2];
37593 return (cost * (((int) GET_MODE_SIZE (mode)
37594 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37595 }
37596 }
37597
37598 static int
37599 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37600 bool in)
37601 {
37602 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37603 }
37604
37605
37606 /* Return the cost of moving data from a register in class CLASS1 to
37607 one in class CLASS2.
37608
37609 It is not required that the cost always equal 2 when FROM is the same as TO;
37610 on some machines it is expensive to move between registers if they are not
37611 general registers. */
37612
37613 static int
37614 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37615 reg_class_t class2_i)
37616 {
37617 enum reg_class class1 = (enum reg_class) class1_i;
37618 enum reg_class class2 = (enum reg_class) class2_i;
37619
37620 /* In case we require secondary memory, compute cost of the store followed
37621 by load. In order to avoid bad register allocation choices, we need
37622 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37623
37624 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37625 {
37626 int cost = 1;
37627
37628 cost += inline_memory_move_cost (mode, class1, 2);
37629 cost += inline_memory_move_cost (mode, class2, 2);
37630
37631 /* In case of copying from general_purpose_register we may emit multiple
37632 stores followed by single load causing memory size mismatch stall.
37633 Count this as arbitrarily high cost of 20. */
37634 if (targetm.class_max_nregs (class1, mode)
37635 > targetm.class_max_nregs (class2, mode))
37636 cost += 20;
37637
37638 /* In the case of FP/MMX moves, the registers actually overlap, and we
37639 have to switch modes in order to treat them differently. */
37640 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37641 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37642 cost += 20;
37643
37644 return cost;
37645 }
37646
37647 /* Moves between SSE/MMX and integer unit are expensive. */
37648 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37649 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37650
37651 /* ??? By keeping returned value relatively high, we limit the number
37652 of moves between integer and MMX/SSE registers for all targets.
37653 Additionally, high value prevents problem with x86_modes_tieable_p(),
37654 where integer modes in MMX/SSE registers are not tieable
37655 because of missing QImode and HImode moves to, from or between
37656 MMX/SSE registers. */
37657 return MAX (8, ix86_cost->mmxsse_to_integer);
37658
37659 if (MAYBE_FLOAT_CLASS_P (class1))
37660 return ix86_cost->fp_move;
37661 if (MAYBE_SSE_CLASS_P (class1))
37662 return ix86_cost->sse_move;
37663 if (MAYBE_MMX_CLASS_P (class1))
37664 return ix86_cost->mmx_move;
37665 return 2;
37666 }
37667
37668 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37669 MODE. */
37670
37671 bool
37672 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37673 {
37674 /* Flags and only flags can only hold CCmode values. */
37675 if (CC_REGNO_P (regno))
37676 return GET_MODE_CLASS (mode) == MODE_CC;
37677 if (GET_MODE_CLASS (mode) == MODE_CC
37678 || GET_MODE_CLASS (mode) == MODE_RANDOM
37679 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37680 return false;
37681 if (STACK_REGNO_P (regno))
37682 return VALID_FP_MODE_P (mode);
37683 if (MASK_REGNO_P (regno))
37684 return VALID_MASK_REG_MODE (mode);
37685 if (SSE_REGNO_P (regno))
37686 {
37687 /* We implement the move patterns for all vector modes into and
37688 out of SSE registers, even when no operation instructions
37689 are available. */
37690
37691 /* For AVX-512 we allow, regardless of regno:
37692 - XI mode
37693 - any of 512-bit wide vector mode
37694 - any scalar mode. */
37695 if (TARGET_AVX512F
37696 && (mode == XImode
37697 || VALID_AVX512F_REG_MODE (mode)
37698 || VALID_AVX512F_SCALAR_MODE (mode)))
37699 return true;
37700
37701 /* xmm16-xmm31 are only available for AVX-512. */
37702 if (EXT_REX_SSE_REGNO_P (regno))
37703 return false;
37704
37705 /* OImode and AVX modes are available only when AVX is enabled. */
37706 return ((TARGET_AVX
37707 && VALID_AVX256_REG_OR_OI_MODE (mode))
37708 || VALID_SSE_REG_MODE (mode)
37709 || VALID_SSE2_REG_MODE (mode)
37710 || VALID_MMX_REG_MODE (mode)
37711 || VALID_MMX_REG_MODE_3DNOW (mode));
37712 }
37713 if (MMX_REGNO_P (regno))
37714 {
37715 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37716 so if the register is available at all, then we can move data of
37717 the given mode into or out of it. */
37718 return (VALID_MMX_REG_MODE (mode)
37719 || VALID_MMX_REG_MODE_3DNOW (mode));
37720 }
37721
37722 if (mode == QImode)
37723 {
37724 /* Take care for QImode values - they can be in non-QI regs,
37725 but then they do cause partial register stalls. */
37726 if (ANY_QI_REGNO_P (regno))
37727 return true;
37728 if (!TARGET_PARTIAL_REG_STALL)
37729 return true;
37730 /* LRA checks if the hard register is OK for the given mode.
37731 QImode values can live in non-QI regs, so we allow all
37732 registers here. */
37733 if (lra_in_progress)
37734 return true;
37735 return !can_create_pseudo_p ();
37736 }
37737 /* We handle both integer and floats in the general purpose registers. */
37738 else if (VALID_INT_MODE_P (mode))
37739 return true;
37740 else if (VALID_FP_MODE_P (mode))
37741 return true;
37742 else if (VALID_DFP_MODE_P (mode))
37743 return true;
37744 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37745 on to use that value in smaller contexts, this can easily force a
37746 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37747 supporting DImode, allow it. */
37748 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37749 return true;
37750
37751 return false;
37752 }
37753
37754 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37755 tieable integer mode. */
37756
37757 static bool
37758 ix86_tieable_integer_mode_p (enum machine_mode mode)
37759 {
37760 switch (mode)
37761 {
37762 case HImode:
37763 case SImode:
37764 return true;
37765
37766 case QImode:
37767 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37768
37769 case DImode:
37770 return TARGET_64BIT;
37771
37772 default:
37773 return false;
37774 }
37775 }
37776
37777 /* Return true if MODE1 is accessible in a register that can hold MODE2
37778 without copying. That is, all register classes that can hold MODE2
37779 can also hold MODE1. */
37780
37781 bool
37782 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37783 {
37784 if (mode1 == mode2)
37785 return true;
37786
37787 if (ix86_tieable_integer_mode_p (mode1)
37788 && ix86_tieable_integer_mode_p (mode2))
37789 return true;
37790
37791 /* MODE2 being XFmode implies fp stack or general regs, which means we
37792 can tie any smaller floating point modes to it. Note that we do not
37793 tie this with TFmode. */
37794 if (mode2 == XFmode)
37795 return mode1 == SFmode || mode1 == DFmode;
37796
37797 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37798 that we can tie it with SFmode. */
37799 if (mode2 == DFmode)
37800 return mode1 == SFmode;
37801
37802 /* If MODE2 is only appropriate for an SSE register, then tie with
37803 any other mode acceptable to SSE registers. */
37804 if (GET_MODE_SIZE (mode2) == 32
37805 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37806 return (GET_MODE_SIZE (mode1) == 32
37807 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37808 if (GET_MODE_SIZE (mode2) == 16
37809 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37810 return (GET_MODE_SIZE (mode1) == 16
37811 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37812
37813 /* If MODE2 is appropriate for an MMX register, then tie
37814 with any other mode acceptable to MMX registers. */
37815 if (GET_MODE_SIZE (mode2) == 8
37816 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37817 return (GET_MODE_SIZE (mode1) == 8
37818 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37819
37820 return false;
37821 }
37822
37823 /* Return the cost of moving between two registers of mode MODE. */
37824
37825 static int
37826 ix86_set_reg_reg_cost (enum machine_mode mode)
37827 {
37828 unsigned int units = UNITS_PER_WORD;
37829
37830 switch (GET_MODE_CLASS (mode))
37831 {
37832 default:
37833 break;
37834
37835 case MODE_CC:
37836 units = GET_MODE_SIZE (CCmode);
37837 break;
37838
37839 case MODE_FLOAT:
37840 if ((TARGET_SSE && mode == TFmode)
37841 || (TARGET_80387 && mode == XFmode)
37842 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37843 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37844 units = GET_MODE_SIZE (mode);
37845 break;
37846
37847 case MODE_COMPLEX_FLOAT:
37848 if ((TARGET_SSE && mode == TCmode)
37849 || (TARGET_80387 && mode == XCmode)
37850 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37851 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37852 units = GET_MODE_SIZE (mode);
37853 break;
37854
37855 case MODE_VECTOR_INT:
37856 case MODE_VECTOR_FLOAT:
37857 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37858 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37859 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37860 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37861 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37862 units = GET_MODE_SIZE (mode);
37863 }
37864
37865 /* Return the cost of moving between two registers of mode MODE,
37866 assuming that the move will be in pieces of at most UNITS bytes. */
37867 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37868 }
37869
37870 /* Compute a (partial) cost for rtx X. Return true if the complete
37871 cost has been computed, and false if subexpressions should be
37872 scanned. In either case, *TOTAL contains the cost result. */
37873
37874 static bool
37875 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37876 bool speed)
37877 {
37878 rtx mask;
37879 enum rtx_code code = (enum rtx_code) code_i;
37880 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37881 enum machine_mode mode = GET_MODE (x);
37882 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37883
37884 switch (code)
37885 {
37886 case SET:
37887 if (register_operand (SET_DEST (x), VOIDmode)
37888 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37889 {
37890 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37891 return true;
37892 }
37893 return false;
37894
37895 case CONST_INT:
37896 case CONST:
37897 case LABEL_REF:
37898 case SYMBOL_REF:
37899 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37900 *total = 3;
37901 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37902 *total = 2;
37903 else if (flag_pic && SYMBOLIC_CONST (x)
37904 && (!TARGET_64BIT
37905 || (!GET_CODE (x) != LABEL_REF
37906 && (GET_CODE (x) != SYMBOL_REF
37907 || !SYMBOL_REF_LOCAL_P (x)))))
37908 *total = 1;
37909 else
37910 *total = 0;
37911 return true;
37912
37913 case CONST_DOUBLE:
37914 if (mode == VOIDmode)
37915 {
37916 *total = 0;
37917 return true;
37918 }
37919 switch (standard_80387_constant_p (x))
37920 {
37921 case 1: /* 0.0 */
37922 *total = 1;
37923 return true;
37924 default: /* Other constants */
37925 *total = 2;
37926 return true;
37927 case 0:
37928 case -1:
37929 break;
37930 }
37931 if (SSE_FLOAT_MODE_P (mode))
37932 {
37933 case CONST_VECTOR:
37934 switch (standard_sse_constant_p (x))
37935 {
37936 case 0:
37937 break;
37938 case 1: /* 0: xor eliminates false dependency */
37939 *total = 0;
37940 return true;
37941 default: /* -1: cmp contains false dependency */
37942 *total = 1;
37943 return true;
37944 }
37945 }
37946 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37947 it'll probably end up. Add a penalty for size. */
37948 *total = (COSTS_N_INSNS (1)
37949 + (flag_pic != 0 && !TARGET_64BIT)
37950 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37951 return true;
37952
37953 case ZERO_EXTEND:
37954 /* The zero extensions is often completely free on x86_64, so make
37955 it as cheap as possible. */
37956 if (TARGET_64BIT && mode == DImode
37957 && GET_MODE (XEXP (x, 0)) == SImode)
37958 *total = 1;
37959 else if (TARGET_ZERO_EXTEND_WITH_AND)
37960 *total = cost->add;
37961 else
37962 *total = cost->movzx;
37963 return false;
37964
37965 case SIGN_EXTEND:
37966 *total = cost->movsx;
37967 return false;
37968
37969 case ASHIFT:
37970 if (SCALAR_INT_MODE_P (mode)
37971 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37972 && CONST_INT_P (XEXP (x, 1)))
37973 {
37974 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37975 if (value == 1)
37976 {
37977 *total = cost->add;
37978 return false;
37979 }
37980 if ((value == 2 || value == 3)
37981 && cost->lea <= cost->shift_const)
37982 {
37983 *total = cost->lea;
37984 return false;
37985 }
37986 }
37987 /* FALLTHRU */
37988
37989 case ROTATE:
37990 case ASHIFTRT:
37991 case LSHIFTRT:
37992 case ROTATERT:
37993 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37994 {
37995 /* ??? Should be SSE vector operation cost. */
37996 /* At least for published AMD latencies, this really is the same
37997 as the latency for a simple fpu operation like fabs. */
37998 /* V*QImode is emulated with 1-11 insns. */
37999 if (mode == V16QImode || mode == V32QImode)
38000 {
38001 int count = 11;
38002 if (TARGET_XOP && mode == V16QImode)
38003 {
38004 /* For XOP we use vpshab, which requires a broadcast of the
38005 value to the variable shift insn. For constants this
38006 means a V16Q const in mem; even when we can perform the
38007 shift with one insn set the cost to prefer paddb. */
38008 if (CONSTANT_P (XEXP (x, 1)))
38009 {
38010 *total = (cost->fabs
38011 + rtx_cost (XEXP (x, 0), code, 0, speed)
38012 + (speed ? 2 : COSTS_N_BYTES (16)));
38013 return true;
38014 }
38015 count = 3;
38016 }
38017 else if (TARGET_SSSE3)
38018 count = 7;
38019 *total = cost->fabs * count;
38020 }
38021 else
38022 *total = cost->fabs;
38023 }
38024 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38025 {
38026 if (CONST_INT_P (XEXP (x, 1)))
38027 {
38028 if (INTVAL (XEXP (x, 1)) > 32)
38029 *total = cost->shift_const + COSTS_N_INSNS (2);
38030 else
38031 *total = cost->shift_const * 2;
38032 }
38033 else
38034 {
38035 if (GET_CODE (XEXP (x, 1)) == AND)
38036 *total = cost->shift_var * 2;
38037 else
38038 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38039 }
38040 }
38041 else
38042 {
38043 if (CONST_INT_P (XEXP (x, 1)))
38044 *total = cost->shift_const;
38045 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38046 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38047 {
38048 /* Return the cost after shift-and truncation. */
38049 *total = cost->shift_var;
38050 return true;
38051 }
38052 else
38053 *total = cost->shift_var;
38054 }
38055 return false;
38056
38057 case FMA:
38058 {
38059 rtx sub;
38060
38061 gcc_assert (FLOAT_MODE_P (mode));
38062 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38063
38064 /* ??? SSE scalar/vector cost should be used here. */
38065 /* ??? Bald assumption that fma has the same cost as fmul. */
38066 *total = cost->fmul;
38067 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38068
38069 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38070 sub = XEXP (x, 0);
38071 if (GET_CODE (sub) == NEG)
38072 sub = XEXP (sub, 0);
38073 *total += rtx_cost (sub, FMA, 0, speed);
38074
38075 sub = XEXP (x, 2);
38076 if (GET_CODE (sub) == NEG)
38077 sub = XEXP (sub, 0);
38078 *total += rtx_cost (sub, FMA, 2, speed);
38079 return true;
38080 }
38081
38082 case MULT:
38083 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38084 {
38085 /* ??? SSE scalar cost should be used here. */
38086 *total = cost->fmul;
38087 return false;
38088 }
38089 else if (X87_FLOAT_MODE_P (mode))
38090 {
38091 *total = cost->fmul;
38092 return false;
38093 }
38094 else if (FLOAT_MODE_P (mode))
38095 {
38096 /* ??? SSE vector cost should be used here. */
38097 *total = cost->fmul;
38098 return false;
38099 }
38100 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38101 {
38102 /* V*QImode is emulated with 7-13 insns. */
38103 if (mode == V16QImode || mode == V32QImode)
38104 {
38105 int extra = 11;
38106 if (TARGET_XOP && mode == V16QImode)
38107 extra = 5;
38108 else if (TARGET_SSSE3)
38109 extra = 6;
38110 *total = cost->fmul * 2 + cost->fabs * extra;
38111 }
38112 /* V*DImode is emulated with 5-8 insns. */
38113 else if (mode == V2DImode || mode == V4DImode)
38114 {
38115 if (TARGET_XOP && mode == V2DImode)
38116 *total = cost->fmul * 2 + cost->fabs * 3;
38117 else
38118 *total = cost->fmul * 3 + cost->fabs * 5;
38119 }
38120 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38121 insns, including two PMULUDQ. */
38122 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38123 *total = cost->fmul * 2 + cost->fabs * 5;
38124 else
38125 *total = cost->fmul;
38126 return false;
38127 }
38128 else
38129 {
38130 rtx op0 = XEXP (x, 0);
38131 rtx op1 = XEXP (x, 1);
38132 int nbits;
38133 if (CONST_INT_P (XEXP (x, 1)))
38134 {
38135 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38136 for (nbits = 0; value != 0; value &= value - 1)
38137 nbits++;
38138 }
38139 else
38140 /* This is arbitrary. */
38141 nbits = 7;
38142
38143 /* Compute costs correctly for widening multiplication. */
38144 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38145 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38146 == GET_MODE_SIZE (mode))
38147 {
38148 int is_mulwiden = 0;
38149 enum machine_mode inner_mode = GET_MODE (op0);
38150
38151 if (GET_CODE (op0) == GET_CODE (op1))
38152 is_mulwiden = 1, op1 = XEXP (op1, 0);
38153 else if (CONST_INT_P (op1))
38154 {
38155 if (GET_CODE (op0) == SIGN_EXTEND)
38156 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38157 == INTVAL (op1);
38158 else
38159 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38160 }
38161
38162 if (is_mulwiden)
38163 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38164 }
38165
38166 *total = (cost->mult_init[MODE_INDEX (mode)]
38167 + nbits * cost->mult_bit
38168 + rtx_cost (op0, outer_code, opno, speed)
38169 + rtx_cost (op1, outer_code, opno, speed));
38170
38171 return true;
38172 }
38173
38174 case DIV:
38175 case UDIV:
38176 case MOD:
38177 case UMOD:
38178 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38179 /* ??? SSE cost should be used here. */
38180 *total = cost->fdiv;
38181 else if (X87_FLOAT_MODE_P (mode))
38182 *total = cost->fdiv;
38183 else if (FLOAT_MODE_P (mode))
38184 /* ??? SSE vector cost should be used here. */
38185 *total = cost->fdiv;
38186 else
38187 *total = cost->divide[MODE_INDEX (mode)];
38188 return false;
38189
38190 case PLUS:
38191 if (GET_MODE_CLASS (mode) == MODE_INT
38192 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38193 {
38194 if (GET_CODE (XEXP (x, 0)) == PLUS
38195 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38196 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38197 && CONSTANT_P (XEXP (x, 1)))
38198 {
38199 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38200 if (val == 2 || val == 4 || val == 8)
38201 {
38202 *total = cost->lea;
38203 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38204 outer_code, opno, speed);
38205 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38206 outer_code, opno, speed);
38207 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38208 return true;
38209 }
38210 }
38211 else if (GET_CODE (XEXP (x, 0)) == MULT
38212 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38213 {
38214 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38215 if (val == 2 || val == 4 || val == 8)
38216 {
38217 *total = cost->lea;
38218 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38219 outer_code, opno, speed);
38220 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38221 return true;
38222 }
38223 }
38224 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38225 {
38226 *total = cost->lea;
38227 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38228 outer_code, opno, speed);
38229 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38230 outer_code, opno, speed);
38231 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38232 return true;
38233 }
38234 }
38235 /* FALLTHRU */
38236
38237 case MINUS:
38238 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38239 {
38240 /* ??? SSE cost should be used here. */
38241 *total = cost->fadd;
38242 return false;
38243 }
38244 else if (X87_FLOAT_MODE_P (mode))
38245 {
38246 *total = cost->fadd;
38247 return false;
38248 }
38249 else if (FLOAT_MODE_P (mode))
38250 {
38251 /* ??? SSE vector cost should be used here. */
38252 *total = cost->fadd;
38253 return false;
38254 }
38255 /* FALLTHRU */
38256
38257 case AND:
38258 case IOR:
38259 case XOR:
38260 if (GET_MODE_CLASS (mode) == MODE_INT
38261 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38262 {
38263 *total = (cost->add * 2
38264 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38265 << (GET_MODE (XEXP (x, 0)) != DImode))
38266 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38267 << (GET_MODE (XEXP (x, 1)) != DImode)));
38268 return true;
38269 }
38270 /* FALLTHRU */
38271
38272 case NEG:
38273 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38274 {
38275 /* ??? SSE cost should be used here. */
38276 *total = cost->fchs;
38277 return false;
38278 }
38279 else if (X87_FLOAT_MODE_P (mode))
38280 {
38281 *total = cost->fchs;
38282 return false;
38283 }
38284 else if (FLOAT_MODE_P (mode))
38285 {
38286 /* ??? SSE vector cost should be used here. */
38287 *total = cost->fchs;
38288 return false;
38289 }
38290 /* FALLTHRU */
38291
38292 case NOT:
38293 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38294 {
38295 /* ??? Should be SSE vector operation cost. */
38296 /* At least for published AMD latencies, this really is the same
38297 as the latency for a simple fpu operation like fabs. */
38298 *total = cost->fabs;
38299 }
38300 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38301 *total = cost->add * 2;
38302 else
38303 *total = cost->add;
38304 return false;
38305
38306 case COMPARE:
38307 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38308 && XEXP (XEXP (x, 0), 1) == const1_rtx
38309 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38310 && XEXP (x, 1) == const0_rtx)
38311 {
38312 /* This kind of construct is implemented using test[bwl].
38313 Treat it as if we had an AND. */
38314 *total = (cost->add
38315 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38316 + rtx_cost (const1_rtx, outer_code, opno, speed));
38317 return true;
38318 }
38319 return false;
38320
38321 case FLOAT_EXTEND:
38322 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38323 *total = 0;
38324 return false;
38325
38326 case ABS:
38327 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38328 /* ??? SSE cost should be used here. */
38329 *total = cost->fabs;
38330 else if (X87_FLOAT_MODE_P (mode))
38331 *total = cost->fabs;
38332 else if (FLOAT_MODE_P (mode))
38333 /* ??? SSE vector cost should be used here. */
38334 *total = cost->fabs;
38335 return false;
38336
38337 case SQRT:
38338 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38339 /* ??? SSE cost should be used here. */
38340 *total = cost->fsqrt;
38341 else if (X87_FLOAT_MODE_P (mode))
38342 *total = cost->fsqrt;
38343 else if (FLOAT_MODE_P (mode))
38344 /* ??? SSE vector cost should be used here. */
38345 *total = cost->fsqrt;
38346 return false;
38347
38348 case UNSPEC:
38349 if (XINT (x, 1) == UNSPEC_TP)
38350 *total = 0;
38351 return false;
38352
38353 case VEC_SELECT:
38354 case VEC_CONCAT:
38355 case VEC_DUPLICATE:
38356 /* ??? Assume all of these vector manipulation patterns are
38357 recognizable. In which case they all pretty much have the
38358 same cost. */
38359 *total = cost->fabs;
38360 return true;
38361 case VEC_MERGE:
38362 mask = XEXP (x, 2);
38363 /* This is masked instruction, assume the same cost,
38364 as nonmasked variant. */
38365 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38366 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38367 else
38368 *total = cost->fabs;
38369 return true;
38370
38371 default:
38372 return false;
38373 }
38374 }
38375
38376 #if TARGET_MACHO
38377
38378 static int current_machopic_label_num;
38379
38380 /* Given a symbol name and its associated stub, write out the
38381 definition of the stub. */
38382
38383 void
38384 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38385 {
38386 unsigned int length;
38387 char *binder_name, *symbol_name, lazy_ptr_name[32];
38388 int label = ++current_machopic_label_num;
38389
38390 /* For 64-bit we shouldn't get here. */
38391 gcc_assert (!TARGET_64BIT);
38392
38393 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38394 symb = targetm.strip_name_encoding (symb);
38395
38396 length = strlen (stub);
38397 binder_name = XALLOCAVEC (char, length + 32);
38398 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38399
38400 length = strlen (symb);
38401 symbol_name = XALLOCAVEC (char, length + 32);
38402 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38403
38404 sprintf (lazy_ptr_name, "L%d$lz", label);
38405
38406 if (MACHOPIC_ATT_STUB)
38407 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38408 else if (MACHOPIC_PURE)
38409 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38410 else
38411 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38412
38413 fprintf (file, "%s:\n", stub);
38414 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38415
38416 if (MACHOPIC_ATT_STUB)
38417 {
38418 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38419 }
38420 else if (MACHOPIC_PURE)
38421 {
38422 /* PIC stub. */
38423 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38424 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38425 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38426 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38427 label, lazy_ptr_name, label);
38428 fprintf (file, "\tjmp\t*%%ecx\n");
38429 }
38430 else
38431 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38432
38433 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38434 it needs no stub-binding-helper. */
38435 if (MACHOPIC_ATT_STUB)
38436 return;
38437
38438 fprintf (file, "%s:\n", binder_name);
38439
38440 if (MACHOPIC_PURE)
38441 {
38442 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38443 fprintf (file, "\tpushl\t%%ecx\n");
38444 }
38445 else
38446 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38447
38448 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38449
38450 /* N.B. Keep the correspondence of these
38451 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38452 old-pic/new-pic/non-pic stubs; altering this will break
38453 compatibility with existing dylibs. */
38454 if (MACHOPIC_PURE)
38455 {
38456 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38457 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38458 }
38459 else
38460 /* 16-byte -mdynamic-no-pic stub. */
38461 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38462
38463 fprintf (file, "%s:\n", lazy_ptr_name);
38464 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38465 fprintf (file, ASM_LONG "%s\n", binder_name);
38466 }
38467 #endif /* TARGET_MACHO */
38468
38469 /* Order the registers for register allocator. */
38470
38471 void
38472 x86_order_regs_for_local_alloc (void)
38473 {
38474 int pos = 0;
38475 int i;
38476
38477 /* First allocate the local general purpose registers. */
38478 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38479 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38480 reg_alloc_order [pos++] = i;
38481
38482 /* Global general purpose registers. */
38483 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38484 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38485 reg_alloc_order [pos++] = i;
38486
38487 /* x87 registers come first in case we are doing FP math
38488 using them. */
38489 if (!TARGET_SSE_MATH)
38490 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38491 reg_alloc_order [pos++] = i;
38492
38493 /* SSE registers. */
38494 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38495 reg_alloc_order [pos++] = i;
38496 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38497 reg_alloc_order [pos++] = i;
38498
38499 /* Extended REX SSE registers. */
38500 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38501 reg_alloc_order [pos++] = i;
38502
38503 /* Mask register. */
38504 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38505 reg_alloc_order [pos++] = i;
38506
38507 /* x87 registers. */
38508 if (TARGET_SSE_MATH)
38509 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38510 reg_alloc_order [pos++] = i;
38511
38512 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38513 reg_alloc_order [pos++] = i;
38514
38515 /* Initialize the rest of array as we do not allocate some registers
38516 at all. */
38517 while (pos < FIRST_PSEUDO_REGISTER)
38518 reg_alloc_order [pos++] = 0;
38519 }
38520
38521 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38522 in struct attribute_spec handler. */
38523 static tree
38524 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38525 tree args,
38526 int flags ATTRIBUTE_UNUSED,
38527 bool *no_add_attrs)
38528 {
38529 if (TREE_CODE (*node) != FUNCTION_TYPE
38530 && TREE_CODE (*node) != METHOD_TYPE
38531 && TREE_CODE (*node) != FIELD_DECL
38532 && TREE_CODE (*node) != TYPE_DECL)
38533 {
38534 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38535 name);
38536 *no_add_attrs = true;
38537 return NULL_TREE;
38538 }
38539 if (TARGET_64BIT)
38540 {
38541 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38542 name);
38543 *no_add_attrs = true;
38544 return NULL_TREE;
38545 }
38546 if (is_attribute_p ("callee_pop_aggregate_return", name))
38547 {
38548 tree cst;
38549
38550 cst = TREE_VALUE (args);
38551 if (TREE_CODE (cst) != INTEGER_CST)
38552 {
38553 warning (OPT_Wattributes,
38554 "%qE attribute requires an integer constant argument",
38555 name);
38556 *no_add_attrs = true;
38557 }
38558 else if (compare_tree_int (cst, 0) != 0
38559 && compare_tree_int (cst, 1) != 0)
38560 {
38561 warning (OPT_Wattributes,
38562 "argument to %qE attribute is neither zero, nor one",
38563 name);
38564 *no_add_attrs = true;
38565 }
38566
38567 return NULL_TREE;
38568 }
38569
38570 return NULL_TREE;
38571 }
38572
38573 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38574 struct attribute_spec.handler. */
38575 static tree
38576 ix86_handle_abi_attribute (tree *node, tree name,
38577 tree args ATTRIBUTE_UNUSED,
38578 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38579 {
38580 if (TREE_CODE (*node) != FUNCTION_TYPE
38581 && TREE_CODE (*node) != METHOD_TYPE
38582 && TREE_CODE (*node) != FIELD_DECL
38583 && TREE_CODE (*node) != TYPE_DECL)
38584 {
38585 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38586 name);
38587 *no_add_attrs = true;
38588 return NULL_TREE;
38589 }
38590
38591 /* Can combine regparm with all attributes but fastcall. */
38592 if (is_attribute_p ("ms_abi", name))
38593 {
38594 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38595 {
38596 error ("ms_abi and sysv_abi attributes are not compatible");
38597 }
38598
38599 return NULL_TREE;
38600 }
38601 else if (is_attribute_p ("sysv_abi", name))
38602 {
38603 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38604 {
38605 error ("ms_abi and sysv_abi attributes are not compatible");
38606 }
38607
38608 return NULL_TREE;
38609 }
38610
38611 return NULL_TREE;
38612 }
38613
38614 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38615 struct attribute_spec.handler. */
38616 static tree
38617 ix86_handle_struct_attribute (tree *node, tree name,
38618 tree args ATTRIBUTE_UNUSED,
38619 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38620 {
38621 tree *type = NULL;
38622 if (DECL_P (*node))
38623 {
38624 if (TREE_CODE (*node) == TYPE_DECL)
38625 type = &TREE_TYPE (*node);
38626 }
38627 else
38628 type = node;
38629
38630 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38631 {
38632 warning (OPT_Wattributes, "%qE attribute ignored",
38633 name);
38634 *no_add_attrs = true;
38635 }
38636
38637 else if ((is_attribute_p ("ms_struct", name)
38638 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38639 || ((is_attribute_p ("gcc_struct", name)
38640 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38641 {
38642 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38643 name);
38644 *no_add_attrs = true;
38645 }
38646
38647 return NULL_TREE;
38648 }
38649
38650 static tree
38651 ix86_handle_fndecl_attribute (tree *node, tree name,
38652 tree args ATTRIBUTE_UNUSED,
38653 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38654 {
38655 if (TREE_CODE (*node) != FUNCTION_DECL)
38656 {
38657 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38658 name);
38659 *no_add_attrs = true;
38660 }
38661 return NULL_TREE;
38662 }
38663
38664 static bool
38665 ix86_ms_bitfield_layout_p (const_tree record_type)
38666 {
38667 return ((TARGET_MS_BITFIELD_LAYOUT
38668 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38669 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38670 }
38671
38672 /* Returns an expression indicating where the this parameter is
38673 located on entry to the FUNCTION. */
38674
38675 static rtx
38676 x86_this_parameter (tree function)
38677 {
38678 tree type = TREE_TYPE (function);
38679 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38680 int nregs;
38681
38682 if (TARGET_64BIT)
38683 {
38684 const int *parm_regs;
38685
38686 if (ix86_function_type_abi (type) == MS_ABI)
38687 parm_regs = x86_64_ms_abi_int_parameter_registers;
38688 else
38689 parm_regs = x86_64_int_parameter_registers;
38690 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38691 }
38692
38693 nregs = ix86_function_regparm (type, function);
38694
38695 if (nregs > 0 && !stdarg_p (type))
38696 {
38697 int regno;
38698 unsigned int ccvt = ix86_get_callcvt (type);
38699
38700 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38701 regno = aggr ? DX_REG : CX_REG;
38702 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38703 {
38704 regno = CX_REG;
38705 if (aggr)
38706 return gen_rtx_MEM (SImode,
38707 plus_constant (Pmode, stack_pointer_rtx, 4));
38708 }
38709 else
38710 {
38711 regno = AX_REG;
38712 if (aggr)
38713 {
38714 regno = DX_REG;
38715 if (nregs == 1)
38716 return gen_rtx_MEM (SImode,
38717 plus_constant (Pmode,
38718 stack_pointer_rtx, 4));
38719 }
38720 }
38721 return gen_rtx_REG (SImode, regno);
38722 }
38723
38724 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38725 aggr ? 8 : 4));
38726 }
38727
38728 /* Determine whether x86_output_mi_thunk can succeed. */
38729
38730 static bool
38731 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38732 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38733 HOST_WIDE_INT vcall_offset, const_tree function)
38734 {
38735 /* 64-bit can handle anything. */
38736 if (TARGET_64BIT)
38737 return true;
38738
38739 /* For 32-bit, everything's fine if we have one free register. */
38740 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38741 return true;
38742
38743 /* Need a free register for vcall_offset. */
38744 if (vcall_offset)
38745 return false;
38746
38747 /* Need a free register for GOT references. */
38748 if (flag_pic && !targetm.binds_local_p (function))
38749 return false;
38750
38751 /* Otherwise ok. */
38752 return true;
38753 }
38754
38755 /* Output the assembler code for a thunk function. THUNK_DECL is the
38756 declaration for the thunk function itself, FUNCTION is the decl for
38757 the target function. DELTA is an immediate constant offset to be
38758 added to THIS. If VCALL_OFFSET is nonzero, the word at
38759 *(*this + vcall_offset) should be added to THIS. */
38760
38761 static void
38762 x86_output_mi_thunk (FILE *file,
38763 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38764 HOST_WIDE_INT vcall_offset, tree function)
38765 {
38766 rtx this_param = x86_this_parameter (function);
38767 rtx this_reg, tmp, fnaddr;
38768 unsigned int tmp_regno;
38769
38770 if (TARGET_64BIT)
38771 tmp_regno = R10_REG;
38772 else
38773 {
38774 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38775 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38776 tmp_regno = AX_REG;
38777 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38778 tmp_regno = DX_REG;
38779 else
38780 tmp_regno = CX_REG;
38781 }
38782
38783 emit_note (NOTE_INSN_PROLOGUE_END);
38784
38785 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38786 pull it in now and let DELTA benefit. */
38787 if (REG_P (this_param))
38788 this_reg = this_param;
38789 else if (vcall_offset)
38790 {
38791 /* Put the this parameter into %eax. */
38792 this_reg = gen_rtx_REG (Pmode, AX_REG);
38793 emit_move_insn (this_reg, this_param);
38794 }
38795 else
38796 this_reg = NULL_RTX;
38797
38798 /* Adjust the this parameter by a fixed constant. */
38799 if (delta)
38800 {
38801 rtx delta_rtx = GEN_INT (delta);
38802 rtx delta_dst = this_reg ? this_reg : this_param;
38803
38804 if (TARGET_64BIT)
38805 {
38806 if (!x86_64_general_operand (delta_rtx, Pmode))
38807 {
38808 tmp = gen_rtx_REG (Pmode, tmp_regno);
38809 emit_move_insn (tmp, delta_rtx);
38810 delta_rtx = tmp;
38811 }
38812 }
38813
38814 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38815 }
38816
38817 /* Adjust the this parameter by a value stored in the vtable. */
38818 if (vcall_offset)
38819 {
38820 rtx vcall_addr, vcall_mem, this_mem;
38821
38822 tmp = gen_rtx_REG (Pmode, tmp_regno);
38823
38824 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38825 if (Pmode != ptr_mode)
38826 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38827 emit_move_insn (tmp, this_mem);
38828
38829 /* Adjust the this parameter. */
38830 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38831 if (TARGET_64BIT
38832 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38833 {
38834 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38835 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38836 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38837 }
38838
38839 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38840 if (Pmode != ptr_mode)
38841 emit_insn (gen_addsi_1_zext (this_reg,
38842 gen_rtx_REG (ptr_mode,
38843 REGNO (this_reg)),
38844 vcall_mem));
38845 else
38846 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38847 }
38848
38849 /* If necessary, drop THIS back to its stack slot. */
38850 if (this_reg && this_reg != this_param)
38851 emit_move_insn (this_param, this_reg);
38852
38853 fnaddr = XEXP (DECL_RTL (function), 0);
38854 if (TARGET_64BIT)
38855 {
38856 if (!flag_pic || targetm.binds_local_p (function)
38857 || TARGET_PECOFF)
38858 ;
38859 else
38860 {
38861 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38862 tmp = gen_rtx_CONST (Pmode, tmp);
38863 fnaddr = gen_const_mem (Pmode, tmp);
38864 }
38865 }
38866 else
38867 {
38868 if (!flag_pic || targetm.binds_local_p (function))
38869 ;
38870 #if TARGET_MACHO
38871 else if (TARGET_MACHO)
38872 {
38873 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38874 fnaddr = XEXP (fnaddr, 0);
38875 }
38876 #endif /* TARGET_MACHO */
38877 else
38878 {
38879 tmp = gen_rtx_REG (Pmode, CX_REG);
38880 output_set_got (tmp, NULL_RTX);
38881
38882 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38883 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38884 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38885 fnaddr = gen_const_mem (Pmode, fnaddr);
38886 }
38887 }
38888
38889 /* Our sibling call patterns do not allow memories, because we have no
38890 predicate that can distinguish between frame and non-frame memory.
38891 For our purposes here, we can get away with (ab)using a jump pattern,
38892 because we're going to do no optimization. */
38893 if (MEM_P (fnaddr))
38894 emit_jump_insn (gen_indirect_jump (fnaddr));
38895 else
38896 {
38897 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38898 fnaddr = legitimize_pic_address (fnaddr,
38899 gen_rtx_REG (Pmode, tmp_regno));
38900
38901 if (!sibcall_insn_operand (fnaddr, word_mode))
38902 {
38903 tmp = gen_rtx_REG (word_mode, tmp_regno);
38904 if (GET_MODE (fnaddr) != word_mode)
38905 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38906 emit_move_insn (tmp, fnaddr);
38907 fnaddr = tmp;
38908 }
38909
38910 tmp = gen_rtx_MEM (QImode, fnaddr);
38911 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38912 tmp = emit_call_insn (tmp);
38913 SIBLING_CALL_P (tmp) = 1;
38914 }
38915 emit_barrier ();
38916
38917 /* Emit just enough of rest_of_compilation to get the insns emitted.
38918 Note that use_thunk calls assemble_start_function et al. */
38919 tmp = get_insns ();
38920 shorten_branches (tmp);
38921 final_start_function (tmp, file, 1);
38922 final (tmp, file, 1);
38923 final_end_function ();
38924 }
38925
38926 static void
38927 x86_file_start (void)
38928 {
38929 default_file_start ();
38930 if (TARGET_16BIT)
38931 fputs ("\t.code16gcc\n", asm_out_file);
38932 #if TARGET_MACHO
38933 darwin_file_start ();
38934 #endif
38935 if (X86_FILE_START_VERSION_DIRECTIVE)
38936 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38937 if (X86_FILE_START_FLTUSED)
38938 fputs ("\t.global\t__fltused\n", asm_out_file);
38939 if (ix86_asm_dialect == ASM_INTEL)
38940 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38941 }
38942
38943 int
38944 x86_field_alignment (tree field, int computed)
38945 {
38946 enum machine_mode mode;
38947 tree type = TREE_TYPE (field);
38948
38949 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38950 return computed;
38951 mode = TYPE_MODE (strip_array_types (type));
38952 if (mode == DFmode || mode == DCmode
38953 || GET_MODE_CLASS (mode) == MODE_INT
38954 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38955 return MIN (32, computed);
38956 return computed;
38957 }
38958
38959 /* Output assembler code to FILE to increment profiler label # LABELNO
38960 for profiling a function entry. */
38961 void
38962 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38963 {
38964 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38965 : MCOUNT_NAME);
38966
38967 if (TARGET_64BIT)
38968 {
38969 #ifndef NO_PROFILE_COUNTERS
38970 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38971 #endif
38972
38973 if (!TARGET_PECOFF && flag_pic)
38974 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38975 else
38976 fprintf (file, "\tcall\t%s\n", mcount_name);
38977 }
38978 else if (flag_pic)
38979 {
38980 #ifndef NO_PROFILE_COUNTERS
38981 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38982 LPREFIX, labelno);
38983 #endif
38984 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38985 }
38986 else
38987 {
38988 #ifndef NO_PROFILE_COUNTERS
38989 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38990 LPREFIX, labelno);
38991 #endif
38992 fprintf (file, "\tcall\t%s\n", mcount_name);
38993 }
38994 }
38995
38996 /* We don't have exact information about the insn sizes, but we may assume
38997 quite safely that we are informed about all 1 byte insns and memory
38998 address sizes. This is enough to eliminate unnecessary padding in
38999 99% of cases. */
39000
39001 static int
39002 min_insn_size (rtx insn)
39003 {
39004 int l = 0, len;
39005
39006 if (!INSN_P (insn) || !active_insn_p (insn))
39007 return 0;
39008
39009 /* Discard alignments we've emit and jump instructions. */
39010 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39011 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39012 return 0;
39013
39014 /* Important case - calls are always 5 bytes.
39015 It is common to have many calls in the row. */
39016 if (CALL_P (insn)
39017 && symbolic_reference_mentioned_p (PATTERN (insn))
39018 && !SIBLING_CALL_P (insn))
39019 return 5;
39020 len = get_attr_length (insn);
39021 if (len <= 1)
39022 return 1;
39023
39024 /* For normal instructions we rely on get_attr_length being exact,
39025 with a few exceptions. */
39026 if (!JUMP_P (insn))
39027 {
39028 enum attr_type type = get_attr_type (insn);
39029
39030 switch (type)
39031 {
39032 case TYPE_MULTI:
39033 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39034 || asm_noperands (PATTERN (insn)) >= 0)
39035 return 0;
39036 break;
39037 case TYPE_OTHER:
39038 case TYPE_FCMP:
39039 break;
39040 default:
39041 /* Otherwise trust get_attr_length. */
39042 return len;
39043 }
39044
39045 l = get_attr_length_address (insn);
39046 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39047 l = 4;
39048 }
39049 if (l)
39050 return 1+l;
39051 else
39052 return 2;
39053 }
39054
39055 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39056
39057 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39058 window. */
39059
39060 static void
39061 ix86_avoid_jump_mispredicts (void)
39062 {
39063 rtx insn, start = get_insns ();
39064 int nbytes = 0, njumps = 0;
39065 int isjump = 0;
39066
39067 /* Look for all minimal intervals of instructions containing 4 jumps.
39068 The intervals are bounded by START and INSN. NBYTES is the total
39069 size of instructions in the interval including INSN and not including
39070 START. When the NBYTES is smaller than 16 bytes, it is possible
39071 that the end of START and INSN ends up in the same 16byte page.
39072
39073 The smallest offset in the page INSN can start is the case where START
39074 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39075 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39076
39077 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39078 have to, control transfer to label(s) can be performed through other
39079 means, and also we estimate minimum length of all asm stmts as 0. */
39080 for (insn = start; insn; insn = NEXT_INSN (insn))
39081 {
39082 int min_size;
39083
39084 if (LABEL_P (insn))
39085 {
39086 int align = label_to_alignment (insn);
39087 int max_skip = label_to_max_skip (insn);
39088
39089 if (max_skip > 15)
39090 max_skip = 15;
39091 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39092 already in the current 16 byte page, because otherwise
39093 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39094 bytes to reach 16 byte boundary. */
39095 if (align <= 0
39096 || (align <= 3 && max_skip != (1 << align) - 1))
39097 max_skip = 0;
39098 if (dump_file)
39099 fprintf (dump_file, "Label %i with max_skip %i\n",
39100 INSN_UID (insn), max_skip);
39101 if (max_skip)
39102 {
39103 while (nbytes + max_skip >= 16)
39104 {
39105 start = NEXT_INSN (start);
39106 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39107 || CALL_P (start))
39108 njumps--, isjump = 1;
39109 else
39110 isjump = 0;
39111 nbytes -= min_insn_size (start);
39112 }
39113 }
39114 continue;
39115 }
39116
39117 min_size = min_insn_size (insn);
39118 nbytes += min_size;
39119 if (dump_file)
39120 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39121 INSN_UID (insn), min_size);
39122 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39123 || CALL_P (insn))
39124 njumps++;
39125 else
39126 continue;
39127
39128 while (njumps > 3)
39129 {
39130 start = NEXT_INSN (start);
39131 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39132 || CALL_P (start))
39133 njumps--, isjump = 1;
39134 else
39135 isjump = 0;
39136 nbytes -= min_insn_size (start);
39137 }
39138 gcc_assert (njumps >= 0);
39139 if (dump_file)
39140 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39141 INSN_UID (start), INSN_UID (insn), nbytes);
39142
39143 if (njumps == 3 && isjump && nbytes < 16)
39144 {
39145 int padsize = 15 - nbytes + min_insn_size (insn);
39146
39147 if (dump_file)
39148 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39149 INSN_UID (insn), padsize);
39150 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39151 }
39152 }
39153 }
39154 #endif
39155
39156 /* AMD Athlon works faster
39157 when RET is not destination of conditional jump or directly preceded
39158 by other jump instruction. We avoid the penalty by inserting NOP just
39159 before the RET instructions in such cases. */
39160 static void
39161 ix86_pad_returns (void)
39162 {
39163 edge e;
39164 edge_iterator ei;
39165
39166 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39167 {
39168 basic_block bb = e->src;
39169 rtx ret = BB_END (bb);
39170 rtx prev;
39171 bool replace = false;
39172
39173 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39174 || optimize_bb_for_size_p (bb))
39175 continue;
39176 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39177 if (active_insn_p (prev) || LABEL_P (prev))
39178 break;
39179 if (prev && LABEL_P (prev))
39180 {
39181 edge e;
39182 edge_iterator ei;
39183
39184 FOR_EACH_EDGE (e, ei, bb->preds)
39185 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39186 && !(e->flags & EDGE_FALLTHRU))
39187 {
39188 replace = true;
39189 break;
39190 }
39191 }
39192 if (!replace)
39193 {
39194 prev = prev_active_insn (ret);
39195 if (prev
39196 && ((JUMP_P (prev) && any_condjump_p (prev))
39197 || CALL_P (prev)))
39198 replace = true;
39199 /* Empty functions get branch mispredict even when
39200 the jump destination is not visible to us. */
39201 if (!prev && !optimize_function_for_size_p (cfun))
39202 replace = true;
39203 }
39204 if (replace)
39205 {
39206 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39207 delete_insn (ret);
39208 }
39209 }
39210 }
39211
39212 /* Count the minimum number of instructions in BB. Return 4 if the
39213 number of instructions >= 4. */
39214
39215 static int
39216 ix86_count_insn_bb (basic_block bb)
39217 {
39218 rtx insn;
39219 int insn_count = 0;
39220
39221 /* Count number of instructions in this block. Return 4 if the number
39222 of instructions >= 4. */
39223 FOR_BB_INSNS (bb, insn)
39224 {
39225 /* Only happen in exit blocks. */
39226 if (JUMP_P (insn)
39227 && ANY_RETURN_P (PATTERN (insn)))
39228 break;
39229
39230 if (NONDEBUG_INSN_P (insn)
39231 && GET_CODE (PATTERN (insn)) != USE
39232 && GET_CODE (PATTERN (insn)) != CLOBBER)
39233 {
39234 insn_count++;
39235 if (insn_count >= 4)
39236 return insn_count;
39237 }
39238 }
39239
39240 return insn_count;
39241 }
39242
39243
39244 /* Count the minimum number of instructions in code path in BB.
39245 Return 4 if the number of instructions >= 4. */
39246
39247 static int
39248 ix86_count_insn (basic_block bb)
39249 {
39250 edge e;
39251 edge_iterator ei;
39252 int min_prev_count;
39253
39254 /* Only bother counting instructions along paths with no
39255 more than 2 basic blocks between entry and exit. Given
39256 that BB has an edge to exit, determine if a predecessor
39257 of BB has an edge from entry. If so, compute the number
39258 of instructions in the predecessor block. If there
39259 happen to be multiple such blocks, compute the minimum. */
39260 min_prev_count = 4;
39261 FOR_EACH_EDGE (e, ei, bb->preds)
39262 {
39263 edge prev_e;
39264 edge_iterator prev_ei;
39265
39266 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39267 {
39268 min_prev_count = 0;
39269 break;
39270 }
39271 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39272 {
39273 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39274 {
39275 int count = ix86_count_insn_bb (e->src);
39276 if (count < min_prev_count)
39277 min_prev_count = count;
39278 break;
39279 }
39280 }
39281 }
39282
39283 if (min_prev_count < 4)
39284 min_prev_count += ix86_count_insn_bb (bb);
39285
39286 return min_prev_count;
39287 }
39288
39289 /* Pad short function to 4 instructions. */
39290
39291 static void
39292 ix86_pad_short_function (void)
39293 {
39294 edge e;
39295 edge_iterator ei;
39296
39297 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39298 {
39299 rtx ret = BB_END (e->src);
39300 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39301 {
39302 int insn_count = ix86_count_insn (e->src);
39303
39304 /* Pad short function. */
39305 if (insn_count < 4)
39306 {
39307 rtx insn = ret;
39308
39309 /* Find epilogue. */
39310 while (insn
39311 && (!NOTE_P (insn)
39312 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39313 insn = PREV_INSN (insn);
39314
39315 if (!insn)
39316 insn = ret;
39317
39318 /* Two NOPs count as one instruction. */
39319 insn_count = 2 * (4 - insn_count);
39320 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39321 }
39322 }
39323 }
39324 }
39325
39326 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39327 the epilogue, the Windows system unwinder will apply epilogue logic and
39328 produce incorrect offsets. This can be avoided by adding a nop between
39329 the last insn that can throw and the first insn of the epilogue. */
39330
39331 static void
39332 ix86_seh_fixup_eh_fallthru (void)
39333 {
39334 edge e;
39335 edge_iterator ei;
39336
39337 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39338 {
39339 rtx insn, next;
39340
39341 /* Find the beginning of the epilogue. */
39342 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39343 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39344 break;
39345 if (insn == NULL)
39346 continue;
39347
39348 /* We only care about preceding insns that can throw. */
39349 insn = prev_active_insn (insn);
39350 if (insn == NULL || !can_throw_internal (insn))
39351 continue;
39352
39353 /* Do not separate calls from their debug information. */
39354 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39355 if (NOTE_P (next)
39356 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39357 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39358 insn = next;
39359 else
39360 break;
39361
39362 emit_insn_after (gen_nops (const1_rtx), insn);
39363 }
39364 }
39365
39366 /* Implement machine specific optimizations. We implement padding of returns
39367 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39368 static void
39369 ix86_reorg (void)
39370 {
39371 /* We are freeing block_for_insn in the toplev to keep compatibility
39372 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39373 compute_bb_for_insn ();
39374
39375 if (TARGET_SEH && current_function_has_exception_handlers ())
39376 ix86_seh_fixup_eh_fallthru ();
39377
39378 if (optimize && optimize_function_for_speed_p (cfun))
39379 {
39380 if (TARGET_PAD_SHORT_FUNCTION)
39381 ix86_pad_short_function ();
39382 else if (TARGET_PAD_RETURNS)
39383 ix86_pad_returns ();
39384 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39385 if (TARGET_FOUR_JUMP_LIMIT)
39386 ix86_avoid_jump_mispredicts ();
39387 #endif
39388 }
39389 }
39390
39391 /* Return nonzero when QImode register that must be represented via REX prefix
39392 is used. */
39393 bool
39394 x86_extended_QIreg_mentioned_p (rtx insn)
39395 {
39396 int i;
39397 extract_insn_cached (insn);
39398 for (i = 0; i < recog_data.n_operands; i++)
39399 if (GENERAL_REG_P (recog_data.operand[i])
39400 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39401 return true;
39402 return false;
39403 }
39404
39405 /* Return nonzero when P points to register encoded via REX prefix.
39406 Called via for_each_rtx. */
39407 static int
39408 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39409 {
39410 unsigned int regno;
39411 if (!REG_P (*p))
39412 return 0;
39413 regno = REGNO (*p);
39414 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39415 }
39416
39417 /* Return true when INSN mentions register that must be encoded using REX
39418 prefix. */
39419 bool
39420 x86_extended_reg_mentioned_p (rtx insn)
39421 {
39422 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39423 extended_reg_mentioned_1, NULL);
39424 }
39425
39426 /* If profitable, negate (without causing overflow) integer constant
39427 of mode MODE at location LOC. Return true in this case. */
39428 bool
39429 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39430 {
39431 HOST_WIDE_INT val;
39432
39433 if (!CONST_INT_P (*loc))
39434 return false;
39435
39436 switch (mode)
39437 {
39438 case DImode:
39439 /* DImode x86_64 constants must fit in 32 bits. */
39440 gcc_assert (x86_64_immediate_operand (*loc, mode));
39441
39442 mode = SImode;
39443 break;
39444
39445 case SImode:
39446 case HImode:
39447 case QImode:
39448 break;
39449
39450 default:
39451 gcc_unreachable ();
39452 }
39453
39454 /* Avoid overflows. */
39455 if (mode_signbit_p (mode, *loc))
39456 return false;
39457
39458 val = INTVAL (*loc);
39459
39460 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39461 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39462 if ((val < 0 && val != -128)
39463 || val == 128)
39464 {
39465 *loc = GEN_INT (-val);
39466 return true;
39467 }
39468
39469 return false;
39470 }
39471
39472 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39473 optabs would emit if we didn't have TFmode patterns. */
39474
39475 void
39476 x86_emit_floatuns (rtx operands[2])
39477 {
39478 rtx neglab, donelab, i0, i1, f0, in, out;
39479 enum machine_mode mode, inmode;
39480
39481 inmode = GET_MODE (operands[1]);
39482 gcc_assert (inmode == SImode || inmode == DImode);
39483
39484 out = operands[0];
39485 in = force_reg (inmode, operands[1]);
39486 mode = GET_MODE (out);
39487 neglab = gen_label_rtx ();
39488 donelab = gen_label_rtx ();
39489 f0 = gen_reg_rtx (mode);
39490
39491 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39492
39493 expand_float (out, in, 0);
39494
39495 emit_jump_insn (gen_jump (donelab));
39496 emit_barrier ();
39497
39498 emit_label (neglab);
39499
39500 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39501 1, OPTAB_DIRECT);
39502 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39503 1, OPTAB_DIRECT);
39504 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39505
39506 expand_float (f0, i0, 0);
39507
39508 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39509
39510 emit_label (donelab);
39511 }
39512 \f
39513 /* AVX512F does support 64-byte integer vector operations,
39514 thus the longest vector we are faced with is V64QImode. */
39515 #define MAX_VECT_LEN 64
39516
39517 struct expand_vec_perm_d
39518 {
39519 rtx target, op0, op1;
39520 unsigned char perm[MAX_VECT_LEN];
39521 enum machine_mode vmode;
39522 unsigned char nelt;
39523 bool one_operand_p;
39524 bool testing_p;
39525 };
39526
39527 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39528 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39529 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39530
39531 /* Get a vector mode of the same size as the original but with elements
39532 twice as wide. This is only guaranteed to apply to integral vectors. */
39533
39534 static inline enum machine_mode
39535 get_mode_wider_vector (enum machine_mode o)
39536 {
39537 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39538 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39539 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39540 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39541 return n;
39542 }
39543
39544 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39545 fill target with val via vec_duplicate. */
39546
39547 static bool
39548 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39549 {
39550 bool ok;
39551 rtx insn, dup;
39552
39553 /* First attempt to recognize VAL as-is. */
39554 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39555 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39556 if (recog_memoized (insn) < 0)
39557 {
39558 rtx seq;
39559 /* If that fails, force VAL into a register. */
39560
39561 start_sequence ();
39562 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39563 seq = get_insns ();
39564 end_sequence ();
39565 if (seq)
39566 emit_insn_before (seq, insn);
39567
39568 ok = recog_memoized (insn) >= 0;
39569 gcc_assert (ok);
39570 }
39571 return true;
39572 }
39573
39574 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39575 with all elements equal to VAR. Return true if successful. */
39576
39577 static bool
39578 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39579 rtx target, rtx val)
39580 {
39581 bool ok;
39582
39583 switch (mode)
39584 {
39585 case V2SImode:
39586 case V2SFmode:
39587 if (!mmx_ok)
39588 return false;
39589 /* FALLTHRU */
39590
39591 case V4DFmode:
39592 case V4DImode:
39593 case V8SFmode:
39594 case V8SImode:
39595 case V2DFmode:
39596 case V2DImode:
39597 case V4SFmode:
39598 case V4SImode:
39599 case V16SImode:
39600 case V8DImode:
39601 case V16SFmode:
39602 case V8DFmode:
39603 return ix86_vector_duplicate_value (mode, target, val);
39604
39605 case V4HImode:
39606 if (!mmx_ok)
39607 return false;
39608 if (TARGET_SSE || TARGET_3DNOW_A)
39609 {
39610 rtx x;
39611
39612 val = gen_lowpart (SImode, val);
39613 x = gen_rtx_TRUNCATE (HImode, val);
39614 x = gen_rtx_VEC_DUPLICATE (mode, x);
39615 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39616 return true;
39617 }
39618 goto widen;
39619
39620 case V8QImode:
39621 if (!mmx_ok)
39622 return false;
39623 goto widen;
39624
39625 case V8HImode:
39626 if (TARGET_SSE2)
39627 {
39628 struct expand_vec_perm_d dperm;
39629 rtx tmp1, tmp2;
39630
39631 permute:
39632 memset (&dperm, 0, sizeof (dperm));
39633 dperm.target = target;
39634 dperm.vmode = mode;
39635 dperm.nelt = GET_MODE_NUNITS (mode);
39636 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39637 dperm.one_operand_p = true;
39638
39639 /* Extend to SImode using a paradoxical SUBREG. */
39640 tmp1 = gen_reg_rtx (SImode);
39641 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39642
39643 /* Insert the SImode value as low element of a V4SImode vector. */
39644 tmp2 = gen_reg_rtx (V4SImode);
39645 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39646 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39647
39648 ok = (expand_vec_perm_1 (&dperm)
39649 || expand_vec_perm_broadcast_1 (&dperm));
39650 gcc_assert (ok);
39651 return ok;
39652 }
39653 goto widen;
39654
39655 case V16QImode:
39656 if (TARGET_SSE2)
39657 goto permute;
39658 goto widen;
39659
39660 widen:
39661 /* Replicate the value once into the next wider mode and recurse. */
39662 {
39663 enum machine_mode smode, wsmode, wvmode;
39664 rtx x;
39665
39666 smode = GET_MODE_INNER (mode);
39667 wvmode = get_mode_wider_vector (mode);
39668 wsmode = GET_MODE_INNER (wvmode);
39669
39670 val = convert_modes (wsmode, smode, val, true);
39671 x = expand_simple_binop (wsmode, ASHIFT, val,
39672 GEN_INT (GET_MODE_BITSIZE (smode)),
39673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39674 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39675
39676 x = gen_reg_rtx (wvmode);
39677 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39678 gcc_assert (ok);
39679 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39680 return ok;
39681 }
39682
39683 case V16HImode:
39684 case V32QImode:
39685 {
39686 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39687 rtx x = gen_reg_rtx (hvmode);
39688
39689 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39690 gcc_assert (ok);
39691
39692 x = gen_rtx_VEC_CONCAT (mode, x, x);
39693 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39694 }
39695 return true;
39696
39697 default:
39698 return false;
39699 }
39700 }
39701
39702 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39703 whose ONE_VAR element is VAR, and other elements are zero. Return true
39704 if successful. */
39705
39706 static bool
39707 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39708 rtx target, rtx var, int one_var)
39709 {
39710 enum machine_mode vsimode;
39711 rtx new_target;
39712 rtx x, tmp;
39713 bool use_vector_set = false;
39714
39715 switch (mode)
39716 {
39717 case V2DImode:
39718 /* For SSE4.1, we normally use vector set. But if the second
39719 element is zero and inter-unit moves are OK, we use movq
39720 instead. */
39721 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39722 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39723 && one_var == 0));
39724 break;
39725 case V16QImode:
39726 case V4SImode:
39727 case V4SFmode:
39728 use_vector_set = TARGET_SSE4_1;
39729 break;
39730 case V8HImode:
39731 use_vector_set = TARGET_SSE2;
39732 break;
39733 case V4HImode:
39734 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39735 break;
39736 case V32QImode:
39737 case V16HImode:
39738 case V8SImode:
39739 case V8SFmode:
39740 case V4DFmode:
39741 use_vector_set = TARGET_AVX;
39742 break;
39743 case V4DImode:
39744 /* Use ix86_expand_vector_set in 64bit mode only. */
39745 use_vector_set = TARGET_AVX && TARGET_64BIT;
39746 break;
39747 default:
39748 break;
39749 }
39750
39751 if (use_vector_set)
39752 {
39753 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39754 var = force_reg (GET_MODE_INNER (mode), var);
39755 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39756 return true;
39757 }
39758
39759 switch (mode)
39760 {
39761 case V2SFmode:
39762 case V2SImode:
39763 if (!mmx_ok)
39764 return false;
39765 /* FALLTHRU */
39766
39767 case V2DFmode:
39768 case V2DImode:
39769 if (one_var != 0)
39770 return false;
39771 var = force_reg (GET_MODE_INNER (mode), var);
39772 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39773 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39774 return true;
39775
39776 case V4SFmode:
39777 case V4SImode:
39778 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39779 new_target = gen_reg_rtx (mode);
39780 else
39781 new_target = target;
39782 var = force_reg (GET_MODE_INNER (mode), var);
39783 x = gen_rtx_VEC_DUPLICATE (mode, var);
39784 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39785 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39786 if (one_var != 0)
39787 {
39788 /* We need to shuffle the value to the correct position, so
39789 create a new pseudo to store the intermediate result. */
39790
39791 /* With SSE2, we can use the integer shuffle insns. */
39792 if (mode != V4SFmode && TARGET_SSE2)
39793 {
39794 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39795 const1_rtx,
39796 GEN_INT (one_var == 1 ? 0 : 1),
39797 GEN_INT (one_var == 2 ? 0 : 1),
39798 GEN_INT (one_var == 3 ? 0 : 1)));
39799 if (target != new_target)
39800 emit_move_insn (target, new_target);
39801 return true;
39802 }
39803
39804 /* Otherwise convert the intermediate result to V4SFmode and
39805 use the SSE1 shuffle instructions. */
39806 if (mode != V4SFmode)
39807 {
39808 tmp = gen_reg_rtx (V4SFmode);
39809 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39810 }
39811 else
39812 tmp = new_target;
39813
39814 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39815 const1_rtx,
39816 GEN_INT (one_var == 1 ? 0 : 1),
39817 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39818 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39819
39820 if (mode != V4SFmode)
39821 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39822 else if (tmp != target)
39823 emit_move_insn (target, tmp);
39824 }
39825 else if (target != new_target)
39826 emit_move_insn (target, new_target);
39827 return true;
39828
39829 case V8HImode:
39830 case V16QImode:
39831 vsimode = V4SImode;
39832 goto widen;
39833 case V4HImode:
39834 case V8QImode:
39835 if (!mmx_ok)
39836 return false;
39837 vsimode = V2SImode;
39838 goto widen;
39839 widen:
39840 if (one_var != 0)
39841 return false;
39842
39843 /* Zero extend the variable element to SImode and recurse. */
39844 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39845
39846 x = gen_reg_rtx (vsimode);
39847 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39848 var, one_var))
39849 gcc_unreachable ();
39850
39851 emit_move_insn (target, gen_lowpart (mode, x));
39852 return true;
39853
39854 default:
39855 return false;
39856 }
39857 }
39858
39859 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39860 consisting of the values in VALS. It is known that all elements
39861 except ONE_VAR are constants. Return true if successful. */
39862
39863 static bool
39864 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39865 rtx target, rtx vals, int one_var)
39866 {
39867 rtx var = XVECEXP (vals, 0, one_var);
39868 enum machine_mode wmode;
39869 rtx const_vec, x;
39870
39871 const_vec = copy_rtx (vals);
39872 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39873 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39874
39875 switch (mode)
39876 {
39877 case V2DFmode:
39878 case V2DImode:
39879 case V2SFmode:
39880 case V2SImode:
39881 /* For the two element vectors, it's just as easy to use
39882 the general case. */
39883 return false;
39884
39885 case V4DImode:
39886 /* Use ix86_expand_vector_set in 64bit mode only. */
39887 if (!TARGET_64BIT)
39888 return false;
39889 case V4DFmode:
39890 case V8SFmode:
39891 case V8SImode:
39892 case V16HImode:
39893 case V32QImode:
39894 case V4SFmode:
39895 case V4SImode:
39896 case V8HImode:
39897 case V4HImode:
39898 break;
39899
39900 case V16QImode:
39901 if (TARGET_SSE4_1)
39902 break;
39903 wmode = V8HImode;
39904 goto widen;
39905 case V8QImode:
39906 wmode = V4HImode;
39907 goto widen;
39908 widen:
39909 /* There's no way to set one QImode entry easily. Combine
39910 the variable value with its adjacent constant value, and
39911 promote to an HImode set. */
39912 x = XVECEXP (vals, 0, one_var ^ 1);
39913 if (one_var & 1)
39914 {
39915 var = convert_modes (HImode, QImode, var, true);
39916 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39917 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39918 x = GEN_INT (INTVAL (x) & 0xff);
39919 }
39920 else
39921 {
39922 var = convert_modes (HImode, QImode, var, true);
39923 x = gen_int_mode (INTVAL (x) << 8, HImode);
39924 }
39925 if (x != const0_rtx)
39926 var = expand_simple_binop (HImode, IOR, var, x, var,
39927 1, OPTAB_LIB_WIDEN);
39928
39929 x = gen_reg_rtx (wmode);
39930 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39931 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39932
39933 emit_move_insn (target, gen_lowpart (mode, x));
39934 return true;
39935
39936 default:
39937 return false;
39938 }
39939
39940 emit_move_insn (target, const_vec);
39941 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39942 return true;
39943 }
39944
39945 /* A subroutine of ix86_expand_vector_init_general. Use vector
39946 concatenate to handle the most general case: all values variable,
39947 and none identical. */
39948
39949 static void
39950 ix86_expand_vector_init_concat (enum machine_mode mode,
39951 rtx target, rtx *ops, int n)
39952 {
39953 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39954 rtx first[16], second[8], third[4];
39955 rtvec v;
39956 int i, j;
39957
39958 switch (n)
39959 {
39960 case 2:
39961 switch (mode)
39962 {
39963 case V16SImode:
39964 cmode = V8SImode;
39965 break;
39966 case V16SFmode:
39967 cmode = V8SFmode;
39968 break;
39969 case V8DImode:
39970 cmode = V4DImode;
39971 break;
39972 case V8DFmode:
39973 cmode = V4DFmode;
39974 break;
39975 case V8SImode:
39976 cmode = V4SImode;
39977 break;
39978 case V8SFmode:
39979 cmode = V4SFmode;
39980 break;
39981 case V4DImode:
39982 cmode = V2DImode;
39983 break;
39984 case V4DFmode:
39985 cmode = V2DFmode;
39986 break;
39987 case V4SImode:
39988 cmode = V2SImode;
39989 break;
39990 case V4SFmode:
39991 cmode = V2SFmode;
39992 break;
39993 case V2DImode:
39994 cmode = DImode;
39995 break;
39996 case V2SImode:
39997 cmode = SImode;
39998 break;
39999 case V2DFmode:
40000 cmode = DFmode;
40001 break;
40002 case V2SFmode:
40003 cmode = SFmode;
40004 break;
40005 default:
40006 gcc_unreachable ();
40007 }
40008
40009 if (!register_operand (ops[1], cmode))
40010 ops[1] = force_reg (cmode, ops[1]);
40011 if (!register_operand (ops[0], cmode))
40012 ops[0] = force_reg (cmode, ops[0]);
40013 emit_insn (gen_rtx_SET (VOIDmode, target,
40014 gen_rtx_VEC_CONCAT (mode, ops[0],
40015 ops[1])));
40016 break;
40017
40018 case 4:
40019 switch (mode)
40020 {
40021 case V4DImode:
40022 cmode = V2DImode;
40023 break;
40024 case V4DFmode:
40025 cmode = V2DFmode;
40026 break;
40027 case V4SImode:
40028 cmode = V2SImode;
40029 break;
40030 case V4SFmode:
40031 cmode = V2SFmode;
40032 break;
40033 default:
40034 gcc_unreachable ();
40035 }
40036 goto half;
40037
40038 case 8:
40039 switch (mode)
40040 {
40041 case V8DImode:
40042 cmode = V2DImode;
40043 hmode = V4DImode;
40044 break;
40045 case V8DFmode:
40046 cmode = V2DFmode;
40047 hmode = V4DFmode;
40048 break;
40049 case V8SImode:
40050 cmode = V2SImode;
40051 hmode = V4SImode;
40052 break;
40053 case V8SFmode:
40054 cmode = V2SFmode;
40055 hmode = V4SFmode;
40056 break;
40057 default:
40058 gcc_unreachable ();
40059 }
40060 goto half;
40061
40062 case 16:
40063 switch (mode)
40064 {
40065 case V16SImode:
40066 cmode = V2SImode;
40067 hmode = V4SImode;
40068 gmode = V8SImode;
40069 break;
40070 case V16SFmode:
40071 cmode = V2SFmode;
40072 hmode = V4SFmode;
40073 gmode = V8SFmode;
40074 break;
40075 default:
40076 gcc_unreachable ();
40077 }
40078 goto half;
40079
40080 half:
40081 /* FIXME: We process inputs backward to help RA. PR 36222. */
40082 i = n - 1;
40083 j = (n >> 1) - 1;
40084 for (; i > 0; i -= 2, j--)
40085 {
40086 first[j] = gen_reg_rtx (cmode);
40087 v = gen_rtvec (2, ops[i - 1], ops[i]);
40088 ix86_expand_vector_init (false, first[j],
40089 gen_rtx_PARALLEL (cmode, v));
40090 }
40091
40092 n >>= 1;
40093 if (n > 4)
40094 {
40095 gcc_assert (hmode != VOIDmode);
40096 gcc_assert (gmode != VOIDmode);
40097 for (i = j = 0; i < n; i += 2, j++)
40098 {
40099 second[j] = gen_reg_rtx (hmode);
40100 ix86_expand_vector_init_concat (hmode, second [j],
40101 &first [i], 2);
40102 }
40103 n >>= 1;
40104 for (i = j = 0; i < n; i += 2, j++)
40105 {
40106 third[j] = gen_reg_rtx (gmode);
40107 ix86_expand_vector_init_concat (gmode, third[j],
40108 &second[i], 2);
40109 }
40110 n >>= 1;
40111 ix86_expand_vector_init_concat (mode, target, third, n);
40112 }
40113 else if (n > 2)
40114 {
40115 gcc_assert (hmode != VOIDmode);
40116 for (i = j = 0; i < n; i += 2, j++)
40117 {
40118 second[j] = gen_reg_rtx (hmode);
40119 ix86_expand_vector_init_concat (hmode, second [j],
40120 &first [i], 2);
40121 }
40122 n >>= 1;
40123 ix86_expand_vector_init_concat (mode, target, second, n);
40124 }
40125 else
40126 ix86_expand_vector_init_concat (mode, target, first, n);
40127 break;
40128
40129 default:
40130 gcc_unreachable ();
40131 }
40132 }
40133
40134 /* A subroutine of ix86_expand_vector_init_general. Use vector
40135 interleave to handle the most general case: all values variable,
40136 and none identical. */
40137
40138 static void
40139 ix86_expand_vector_init_interleave (enum machine_mode mode,
40140 rtx target, rtx *ops, int n)
40141 {
40142 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40143 int i, j;
40144 rtx op0, op1;
40145 rtx (*gen_load_even) (rtx, rtx, rtx);
40146 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40147 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40148
40149 switch (mode)
40150 {
40151 case V8HImode:
40152 gen_load_even = gen_vec_setv8hi;
40153 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40154 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40155 inner_mode = HImode;
40156 first_imode = V4SImode;
40157 second_imode = V2DImode;
40158 third_imode = VOIDmode;
40159 break;
40160 case V16QImode:
40161 gen_load_even = gen_vec_setv16qi;
40162 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40163 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40164 inner_mode = QImode;
40165 first_imode = V8HImode;
40166 second_imode = V4SImode;
40167 third_imode = V2DImode;
40168 break;
40169 default:
40170 gcc_unreachable ();
40171 }
40172
40173 for (i = 0; i < n; i++)
40174 {
40175 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40176 op0 = gen_reg_rtx (SImode);
40177 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40178
40179 /* Insert the SImode value as low element of V4SImode vector. */
40180 op1 = gen_reg_rtx (V4SImode);
40181 op0 = gen_rtx_VEC_MERGE (V4SImode,
40182 gen_rtx_VEC_DUPLICATE (V4SImode,
40183 op0),
40184 CONST0_RTX (V4SImode),
40185 const1_rtx);
40186 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40187
40188 /* Cast the V4SImode vector back to a vector in orignal mode. */
40189 op0 = gen_reg_rtx (mode);
40190 emit_move_insn (op0, gen_lowpart (mode, op1));
40191
40192 /* Load even elements into the second position. */
40193 emit_insn (gen_load_even (op0,
40194 force_reg (inner_mode,
40195 ops [i + i + 1]),
40196 const1_rtx));
40197
40198 /* Cast vector to FIRST_IMODE vector. */
40199 ops[i] = gen_reg_rtx (first_imode);
40200 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40201 }
40202
40203 /* Interleave low FIRST_IMODE vectors. */
40204 for (i = j = 0; i < n; i += 2, j++)
40205 {
40206 op0 = gen_reg_rtx (first_imode);
40207 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40208
40209 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40210 ops[j] = gen_reg_rtx (second_imode);
40211 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40212 }
40213
40214 /* Interleave low SECOND_IMODE vectors. */
40215 switch (second_imode)
40216 {
40217 case V4SImode:
40218 for (i = j = 0; i < n / 2; i += 2, j++)
40219 {
40220 op0 = gen_reg_rtx (second_imode);
40221 emit_insn (gen_interleave_second_low (op0, ops[i],
40222 ops[i + 1]));
40223
40224 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40225 vector. */
40226 ops[j] = gen_reg_rtx (third_imode);
40227 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40228 }
40229 second_imode = V2DImode;
40230 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40231 /* FALLTHRU */
40232
40233 case V2DImode:
40234 op0 = gen_reg_rtx (second_imode);
40235 emit_insn (gen_interleave_second_low (op0, ops[0],
40236 ops[1]));
40237
40238 /* Cast the SECOND_IMODE vector back to a vector on original
40239 mode. */
40240 emit_insn (gen_rtx_SET (VOIDmode, target,
40241 gen_lowpart (mode, op0)));
40242 break;
40243
40244 default:
40245 gcc_unreachable ();
40246 }
40247 }
40248
40249 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40250 all values variable, and none identical. */
40251
40252 static void
40253 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40254 rtx target, rtx vals)
40255 {
40256 rtx ops[64], op0, op1;
40257 enum machine_mode half_mode = VOIDmode;
40258 int n, i;
40259
40260 switch (mode)
40261 {
40262 case V2SFmode:
40263 case V2SImode:
40264 if (!mmx_ok && !TARGET_SSE)
40265 break;
40266 /* FALLTHRU */
40267
40268 case V16SImode:
40269 case V16SFmode:
40270 case V8DFmode:
40271 case V8DImode:
40272 case V8SFmode:
40273 case V8SImode:
40274 case V4DFmode:
40275 case V4DImode:
40276 case V4SFmode:
40277 case V4SImode:
40278 case V2DFmode:
40279 case V2DImode:
40280 n = GET_MODE_NUNITS (mode);
40281 for (i = 0; i < n; i++)
40282 ops[i] = XVECEXP (vals, 0, i);
40283 ix86_expand_vector_init_concat (mode, target, ops, n);
40284 return;
40285
40286 case V32QImode:
40287 half_mode = V16QImode;
40288 goto half;
40289
40290 case V16HImode:
40291 half_mode = V8HImode;
40292 goto half;
40293
40294 half:
40295 n = GET_MODE_NUNITS (mode);
40296 for (i = 0; i < n; i++)
40297 ops[i] = XVECEXP (vals, 0, i);
40298 op0 = gen_reg_rtx (half_mode);
40299 op1 = gen_reg_rtx (half_mode);
40300 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40301 n >> 2);
40302 ix86_expand_vector_init_interleave (half_mode, op1,
40303 &ops [n >> 1], n >> 2);
40304 emit_insn (gen_rtx_SET (VOIDmode, target,
40305 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40306 return;
40307
40308 case V16QImode:
40309 if (!TARGET_SSE4_1)
40310 break;
40311 /* FALLTHRU */
40312
40313 case V8HImode:
40314 if (!TARGET_SSE2)
40315 break;
40316
40317 /* Don't use ix86_expand_vector_init_interleave if we can't
40318 move from GPR to SSE register directly. */
40319 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40320 break;
40321
40322 n = GET_MODE_NUNITS (mode);
40323 for (i = 0; i < n; i++)
40324 ops[i] = XVECEXP (vals, 0, i);
40325 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40326 return;
40327
40328 case V4HImode:
40329 case V8QImode:
40330 break;
40331
40332 default:
40333 gcc_unreachable ();
40334 }
40335
40336 {
40337 int i, j, n_elts, n_words, n_elt_per_word;
40338 enum machine_mode inner_mode;
40339 rtx words[4], shift;
40340
40341 inner_mode = GET_MODE_INNER (mode);
40342 n_elts = GET_MODE_NUNITS (mode);
40343 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40344 n_elt_per_word = n_elts / n_words;
40345 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40346
40347 for (i = 0; i < n_words; ++i)
40348 {
40349 rtx word = NULL_RTX;
40350
40351 for (j = 0; j < n_elt_per_word; ++j)
40352 {
40353 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40354 elt = convert_modes (word_mode, inner_mode, elt, true);
40355
40356 if (j == 0)
40357 word = elt;
40358 else
40359 {
40360 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40361 word, 1, OPTAB_LIB_WIDEN);
40362 word = expand_simple_binop (word_mode, IOR, word, elt,
40363 word, 1, OPTAB_LIB_WIDEN);
40364 }
40365 }
40366
40367 words[i] = word;
40368 }
40369
40370 if (n_words == 1)
40371 emit_move_insn (target, gen_lowpart (mode, words[0]));
40372 else if (n_words == 2)
40373 {
40374 rtx tmp = gen_reg_rtx (mode);
40375 emit_clobber (tmp);
40376 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40377 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40378 emit_move_insn (target, tmp);
40379 }
40380 else if (n_words == 4)
40381 {
40382 rtx tmp = gen_reg_rtx (V4SImode);
40383 gcc_assert (word_mode == SImode);
40384 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40385 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40386 emit_move_insn (target, gen_lowpart (mode, tmp));
40387 }
40388 else
40389 gcc_unreachable ();
40390 }
40391 }
40392
40393 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40394 instructions unless MMX_OK is true. */
40395
40396 void
40397 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40398 {
40399 enum machine_mode mode = GET_MODE (target);
40400 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40401 int n_elts = GET_MODE_NUNITS (mode);
40402 int n_var = 0, one_var = -1;
40403 bool all_same = true, all_const_zero = true;
40404 int i;
40405 rtx x;
40406
40407 for (i = 0; i < n_elts; ++i)
40408 {
40409 x = XVECEXP (vals, 0, i);
40410 if (!(CONST_INT_P (x)
40411 || GET_CODE (x) == CONST_DOUBLE
40412 || GET_CODE (x) == CONST_FIXED))
40413 n_var++, one_var = i;
40414 else if (x != CONST0_RTX (inner_mode))
40415 all_const_zero = false;
40416 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40417 all_same = false;
40418 }
40419
40420 /* Constants are best loaded from the constant pool. */
40421 if (n_var == 0)
40422 {
40423 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40424 return;
40425 }
40426
40427 /* If all values are identical, broadcast the value. */
40428 if (all_same
40429 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40430 XVECEXP (vals, 0, 0)))
40431 return;
40432
40433 /* Values where only one field is non-constant are best loaded from
40434 the pool and overwritten via move later. */
40435 if (n_var == 1)
40436 {
40437 if (all_const_zero
40438 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40439 XVECEXP (vals, 0, one_var),
40440 one_var))
40441 return;
40442
40443 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40444 return;
40445 }
40446
40447 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40448 }
40449
40450 void
40451 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40452 {
40453 enum machine_mode mode = GET_MODE (target);
40454 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40455 enum machine_mode half_mode;
40456 bool use_vec_merge = false;
40457 rtx tmp;
40458 static rtx (*gen_extract[6][2]) (rtx, rtx)
40459 = {
40460 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40461 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40462 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40463 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40464 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40465 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40466 };
40467 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40468 = {
40469 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40470 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40471 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40472 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40473 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40474 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40475 };
40476 int i, j, n;
40477
40478 switch (mode)
40479 {
40480 case V2SFmode:
40481 case V2SImode:
40482 if (mmx_ok)
40483 {
40484 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40485 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40486 if (elt == 0)
40487 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40488 else
40489 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40490 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40491 return;
40492 }
40493 break;
40494
40495 case V2DImode:
40496 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40497 if (use_vec_merge)
40498 break;
40499
40500 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40501 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40502 if (elt == 0)
40503 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40504 else
40505 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40506 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40507 return;
40508
40509 case V2DFmode:
40510 {
40511 rtx op0, op1;
40512
40513 /* For the two element vectors, we implement a VEC_CONCAT with
40514 the extraction of the other element. */
40515
40516 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40517 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40518
40519 if (elt == 0)
40520 op0 = val, op1 = tmp;
40521 else
40522 op0 = tmp, op1 = val;
40523
40524 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40525 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40526 }
40527 return;
40528
40529 case V4SFmode:
40530 use_vec_merge = TARGET_SSE4_1;
40531 if (use_vec_merge)
40532 break;
40533
40534 switch (elt)
40535 {
40536 case 0:
40537 use_vec_merge = true;
40538 break;
40539
40540 case 1:
40541 /* tmp = target = A B C D */
40542 tmp = copy_to_reg (target);
40543 /* target = A A B B */
40544 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40545 /* target = X A B B */
40546 ix86_expand_vector_set (false, target, val, 0);
40547 /* target = A X C D */
40548 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40549 const1_rtx, const0_rtx,
40550 GEN_INT (2+4), GEN_INT (3+4)));
40551 return;
40552
40553 case 2:
40554 /* tmp = target = A B C D */
40555 tmp = copy_to_reg (target);
40556 /* tmp = X B C D */
40557 ix86_expand_vector_set (false, tmp, val, 0);
40558 /* target = A B X D */
40559 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40560 const0_rtx, const1_rtx,
40561 GEN_INT (0+4), GEN_INT (3+4)));
40562 return;
40563
40564 case 3:
40565 /* tmp = target = A B C D */
40566 tmp = copy_to_reg (target);
40567 /* tmp = X B C D */
40568 ix86_expand_vector_set (false, tmp, val, 0);
40569 /* target = A B X D */
40570 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40571 const0_rtx, const1_rtx,
40572 GEN_INT (2+4), GEN_INT (0+4)));
40573 return;
40574
40575 default:
40576 gcc_unreachable ();
40577 }
40578 break;
40579
40580 case V4SImode:
40581 use_vec_merge = TARGET_SSE4_1;
40582 if (use_vec_merge)
40583 break;
40584
40585 /* Element 0 handled by vec_merge below. */
40586 if (elt == 0)
40587 {
40588 use_vec_merge = true;
40589 break;
40590 }
40591
40592 if (TARGET_SSE2)
40593 {
40594 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40595 store into element 0, then shuffle them back. */
40596
40597 rtx order[4];
40598
40599 order[0] = GEN_INT (elt);
40600 order[1] = const1_rtx;
40601 order[2] = const2_rtx;
40602 order[3] = GEN_INT (3);
40603 order[elt] = const0_rtx;
40604
40605 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40606 order[1], order[2], order[3]));
40607
40608 ix86_expand_vector_set (false, target, val, 0);
40609
40610 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40611 order[1], order[2], order[3]));
40612 }
40613 else
40614 {
40615 /* For SSE1, we have to reuse the V4SF code. */
40616 rtx t = gen_reg_rtx (V4SFmode);
40617 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40618 emit_move_insn (target, gen_lowpart (mode, t));
40619 }
40620 return;
40621
40622 case V8HImode:
40623 use_vec_merge = TARGET_SSE2;
40624 break;
40625 case V4HImode:
40626 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40627 break;
40628
40629 case V16QImode:
40630 use_vec_merge = TARGET_SSE4_1;
40631 break;
40632
40633 case V8QImode:
40634 break;
40635
40636 case V32QImode:
40637 half_mode = V16QImode;
40638 j = 0;
40639 n = 16;
40640 goto half;
40641
40642 case V16HImode:
40643 half_mode = V8HImode;
40644 j = 1;
40645 n = 8;
40646 goto half;
40647
40648 case V8SImode:
40649 half_mode = V4SImode;
40650 j = 2;
40651 n = 4;
40652 goto half;
40653
40654 case V4DImode:
40655 half_mode = V2DImode;
40656 j = 3;
40657 n = 2;
40658 goto half;
40659
40660 case V8SFmode:
40661 half_mode = V4SFmode;
40662 j = 4;
40663 n = 4;
40664 goto half;
40665
40666 case V4DFmode:
40667 half_mode = V2DFmode;
40668 j = 5;
40669 n = 2;
40670 goto half;
40671
40672 half:
40673 /* Compute offset. */
40674 i = elt / n;
40675 elt %= n;
40676
40677 gcc_assert (i <= 1);
40678
40679 /* Extract the half. */
40680 tmp = gen_reg_rtx (half_mode);
40681 emit_insn (gen_extract[j][i] (tmp, target));
40682
40683 /* Put val in tmp at elt. */
40684 ix86_expand_vector_set (false, tmp, val, elt);
40685
40686 /* Put it back. */
40687 emit_insn (gen_insert[j][i] (target, target, tmp));
40688 return;
40689
40690 default:
40691 break;
40692 }
40693
40694 if (use_vec_merge)
40695 {
40696 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40697 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40698 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40699 }
40700 else
40701 {
40702 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40703
40704 emit_move_insn (mem, target);
40705
40706 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40707 emit_move_insn (tmp, val);
40708
40709 emit_move_insn (target, mem);
40710 }
40711 }
40712
40713 void
40714 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40715 {
40716 enum machine_mode mode = GET_MODE (vec);
40717 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40718 bool use_vec_extr = false;
40719 rtx tmp;
40720
40721 switch (mode)
40722 {
40723 case V2SImode:
40724 case V2SFmode:
40725 if (!mmx_ok)
40726 break;
40727 /* FALLTHRU */
40728
40729 case V2DFmode:
40730 case V2DImode:
40731 use_vec_extr = true;
40732 break;
40733
40734 case V4SFmode:
40735 use_vec_extr = TARGET_SSE4_1;
40736 if (use_vec_extr)
40737 break;
40738
40739 switch (elt)
40740 {
40741 case 0:
40742 tmp = vec;
40743 break;
40744
40745 case 1:
40746 case 3:
40747 tmp = gen_reg_rtx (mode);
40748 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40749 GEN_INT (elt), GEN_INT (elt),
40750 GEN_INT (elt+4), GEN_INT (elt+4)));
40751 break;
40752
40753 case 2:
40754 tmp = gen_reg_rtx (mode);
40755 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40756 break;
40757
40758 default:
40759 gcc_unreachable ();
40760 }
40761 vec = tmp;
40762 use_vec_extr = true;
40763 elt = 0;
40764 break;
40765
40766 case V4SImode:
40767 use_vec_extr = TARGET_SSE4_1;
40768 if (use_vec_extr)
40769 break;
40770
40771 if (TARGET_SSE2)
40772 {
40773 switch (elt)
40774 {
40775 case 0:
40776 tmp = vec;
40777 break;
40778
40779 case 1:
40780 case 3:
40781 tmp = gen_reg_rtx (mode);
40782 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40783 GEN_INT (elt), GEN_INT (elt),
40784 GEN_INT (elt), GEN_INT (elt)));
40785 break;
40786
40787 case 2:
40788 tmp = gen_reg_rtx (mode);
40789 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40790 break;
40791
40792 default:
40793 gcc_unreachable ();
40794 }
40795 vec = tmp;
40796 use_vec_extr = true;
40797 elt = 0;
40798 }
40799 else
40800 {
40801 /* For SSE1, we have to reuse the V4SF code. */
40802 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40803 gen_lowpart (V4SFmode, vec), elt);
40804 return;
40805 }
40806 break;
40807
40808 case V8HImode:
40809 use_vec_extr = TARGET_SSE2;
40810 break;
40811 case V4HImode:
40812 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40813 break;
40814
40815 case V16QImode:
40816 use_vec_extr = TARGET_SSE4_1;
40817 break;
40818
40819 case V8SFmode:
40820 if (TARGET_AVX)
40821 {
40822 tmp = gen_reg_rtx (V4SFmode);
40823 if (elt < 4)
40824 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40825 else
40826 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40827 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40828 return;
40829 }
40830 break;
40831
40832 case V4DFmode:
40833 if (TARGET_AVX)
40834 {
40835 tmp = gen_reg_rtx (V2DFmode);
40836 if (elt < 2)
40837 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40838 else
40839 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40840 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40841 return;
40842 }
40843 break;
40844
40845 case V32QImode:
40846 if (TARGET_AVX)
40847 {
40848 tmp = gen_reg_rtx (V16QImode);
40849 if (elt < 16)
40850 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40851 else
40852 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40853 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40854 return;
40855 }
40856 break;
40857
40858 case V16HImode:
40859 if (TARGET_AVX)
40860 {
40861 tmp = gen_reg_rtx (V8HImode);
40862 if (elt < 8)
40863 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40864 else
40865 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40866 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40867 return;
40868 }
40869 break;
40870
40871 case V8SImode:
40872 if (TARGET_AVX)
40873 {
40874 tmp = gen_reg_rtx (V4SImode);
40875 if (elt < 4)
40876 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40877 else
40878 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40879 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40880 return;
40881 }
40882 break;
40883
40884 case V4DImode:
40885 if (TARGET_AVX)
40886 {
40887 tmp = gen_reg_rtx (V2DImode);
40888 if (elt < 2)
40889 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40890 else
40891 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40892 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40893 return;
40894 }
40895 break;
40896
40897 case V16SFmode:
40898 tmp = gen_reg_rtx (V8SFmode);
40899 if (elt < 8)
40900 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40901 else
40902 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40903 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40904 return;
40905
40906 case V8DFmode:
40907 tmp = gen_reg_rtx (V4DFmode);
40908 if (elt < 4)
40909 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40910 else
40911 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40912 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40913 return;
40914
40915 case V16SImode:
40916 tmp = gen_reg_rtx (V8SImode);
40917 if (elt < 8)
40918 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40919 else
40920 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40921 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40922 return;
40923
40924 case V8DImode:
40925 tmp = gen_reg_rtx (V4DImode);
40926 if (elt < 4)
40927 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40928 else
40929 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40930 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40931 return;
40932
40933 case V8QImode:
40934 /* ??? Could extract the appropriate HImode element and shift. */
40935 default:
40936 break;
40937 }
40938
40939 if (use_vec_extr)
40940 {
40941 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40942 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40943
40944 /* Let the rtl optimizers know about the zero extension performed. */
40945 if (inner_mode == QImode || inner_mode == HImode)
40946 {
40947 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40948 target = gen_lowpart (SImode, target);
40949 }
40950
40951 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40952 }
40953 else
40954 {
40955 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40956
40957 emit_move_insn (mem, vec);
40958
40959 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40960 emit_move_insn (target, tmp);
40961 }
40962 }
40963
40964 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40965 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40966 The upper bits of DEST are undefined, though they shouldn't cause
40967 exceptions (some bits from src or all zeros are ok). */
40968
40969 static void
40970 emit_reduc_half (rtx dest, rtx src, int i)
40971 {
40972 rtx tem, d = dest;
40973 switch (GET_MODE (src))
40974 {
40975 case V4SFmode:
40976 if (i == 128)
40977 tem = gen_sse_movhlps (dest, src, src);
40978 else
40979 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40980 GEN_INT (1 + 4), GEN_INT (1 + 4));
40981 break;
40982 case V2DFmode:
40983 tem = gen_vec_interleave_highv2df (dest, src, src);
40984 break;
40985 case V16QImode:
40986 case V8HImode:
40987 case V4SImode:
40988 case V2DImode:
40989 d = gen_reg_rtx (V1TImode);
40990 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40991 GEN_INT (i / 2));
40992 break;
40993 case V8SFmode:
40994 if (i == 256)
40995 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40996 else
40997 tem = gen_avx_shufps256 (dest, src, src,
40998 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40999 break;
41000 case V4DFmode:
41001 if (i == 256)
41002 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41003 else
41004 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41005 break;
41006 case V32QImode:
41007 case V16HImode:
41008 case V8SImode:
41009 case V4DImode:
41010 if (i == 256)
41011 {
41012 if (GET_MODE (dest) != V4DImode)
41013 d = gen_reg_rtx (V4DImode);
41014 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41015 gen_lowpart (V4DImode, src),
41016 const1_rtx);
41017 }
41018 else
41019 {
41020 d = gen_reg_rtx (V2TImode);
41021 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41022 GEN_INT (i / 2));
41023 }
41024 break;
41025 case V16SImode:
41026 case V16SFmode:
41027 case V8DImode:
41028 case V8DFmode:
41029 if (i > 128)
41030 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41031 gen_lowpart (V16SImode, src),
41032 gen_lowpart (V16SImode, src),
41033 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41034 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41035 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41036 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41037 GEN_INT (0xC), GEN_INT (0xD),
41038 GEN_INT (0xE), GEN_INT (0xF),
41039 GEN_INT (0x10), GEN_INT (0x11),
41040 GEN_INT (0x12), GEN_INT (0x13),
41041 GEN_INT (0x14), GEN_INT (0x15),
41042 GEN_INT (0x16), GEN_INT (0x17));
41043 else
41044 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41045 gen_lowpart (V16SImode, src),
41046 GEN_INT (i == 128 ? 0x2 : 0x1),
41047 GEN_INT (0x3),
41048 GEN_INT (0x3),
41049 GEN_INT (0x3),
41050 GEN_INT (i == 128 ? 0x6 : 0x5),
41051 GEN_INT (0x7),
41052 GEN_INT (0x7),
41053 GEN_INT (0x7),
41054 GEN_INT (i == 128 ? 0xA : 0x9),
41055 GEN_INT (0xB),
41056 GEN_INT (0xB),
41057 GEN_INT (0xB),
41058 GEN_INT (i == 128 ? 0xE : 0xD),
41059 GEN_INT (0xF),
41060 GEN_INT (0xF),
41061 GEN_INT (0xF));
41062 break;
41063 default:
41064 gcc_unreachable ();
41065 }
41066 emit_insn (tem);
41067 if (d != dest)
41068 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41069 }
41070
41071 /* Expand a vector reduction. FN is the binary pattern to reduce;
41072 DEST is the destination; IN is the input vector. */
41073
41074 void
41075 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41076 {
41077 rtx half, dst, vec = in;
41078 enum machine_mode mode = GET_MODE (in);
41079 int i;
41080
41081 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41082 if (TARGET_SSE4_1
41083 && mode == V8HImode
41084 && fn == gen_uminv8hi3)
41085 {
41086 emit_insn (gen_sse4_1_phminposuw (dest, in));
41087 return;
41088 }
41089
41090 for (i = GET_MODE_BITSIZE (mode);
41091 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41092 i >>= 1)
41093 {
41094 half = gen_reg_rtx (mode);
41095 emit_reduc_half (half, vec, i);
41096 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41097 dst = dest;
41098 else
41099 dst = gen_reg_rtx (mode);
41100 emit_insn (fn (dst, half, vec));
41101 vec = dst;
41102 }
41103 }
41104 \f
41105 /* Target hook for scalar_mode_supported_p. */
41106 static bool
41107 ix86_scalar_mode_supported_p (enum machine_mode mode)
41108 {
41109 if (DECIMAL_FLOAT_MODE_P (mode))
41110 return default_decimal_float_supported_p ();
41111 else if (mode == TFmode)
41112 return true;
41113 else
41114 return default_scalar_mode_supported_p (mode);
41115 }
41116
41117 /* Implements target hook vector_mode_supported_p. */
41118 static bool
41119 ix86_vector_mode_supported_p (enum machine_mode mode)
41120 {
41121 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41122 return true;
41123 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41124 return true;
41125 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41126 return true;
41127 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41128 return true;
41129 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41130 return true;
41131 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41132 return true;
41133 return false;
41134 }
41135
41136 /* Target hook for c_mode_for_suffix. */
41137 static enum machine_mode
41138 ix86_c_mode_for_suffix (char suffix)
41139 {
41140 if (suffix == 'q')
41141 return TFmode;
41142 if (suffix == 'w')
41143 return XFmode;
41144
41145 return VOIDmode;
41146 }
41147
41148 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41149
41150 We do this in the new i386 backend to maintain source compatibility
41151 with the old cc0-based compiler. */
41152
41153 static tree
41154 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41155 tree inputs ATTRIBUTE_UNUSED,
41156 tree clobbers)
41157 {
41158 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41159 clobbers);
41160 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41161 clobbers);
41162 return clobbers;
41163 }
41164
41165 /* Implements target vector targetm.asm.encode_section_info. */
41166
41167 static void ATTRIBUTE_UNUSED
41168 ix86_encode_section_info (tree decl, rtx rtl, int first)
41169 {
41170 default_encode_section_info (decl, rtl, first);
41171
41172 if (TREE_CODE (decl) == VAR_DECL
41173 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41174 && ix86_in_large_data_p (decl))
41175 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41176 }
41177
41178 /* Worker function for REVERSE_CONDITION. */
41179
41180 enum rtx_code
41181 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41182 {
41183 return (mode != CCFPmode && mode != CCFPUmode
41184 ? reverse_condition (code)
41185 : reverse_condition_maybe_unordered (code));
41186 }
41187
41188 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41189 to OPERANDS[0]. */
41190
41191 const char *
41192 output_387_reg_move (rtx insn, rtx *operands)
41193 {
41194 if (REG_P (operands[0]))
41195 {
41196 if (REG_P (operands[1])
41197 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41198 {
41199 if (REGNO (operands[0]) == FIRST_STACK_REG)
41200 return output_387_ffreep (operands, 0);
41201 return "fstp\t%y0";
41202 }
41203 if (STACK_TOP_P (operands[0]))
41204 return "fld%Z1\t%y1";
41205 return "fst\t%y0";
41206 }
41207 else if (MEM_P (operands[0]))
41208 {
41209 gcc_assert (REG_P (operands[1]));
41210 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41211 return "fstp%Z0\t%y0";
41212 else
41213 {
41214 /* There is no non-popping store to memory for XFmode.
41215 So if we need one, follow the store with a load. */
41216 if (GET_MODE (operands[0]) == XFmode)
41217 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41218 else
41219 return "fst%Z0\t%y0";
41220 }
41221 }
41222 else
41223 gcc_unreachable();
41224 }
41225
41226 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41227 FP status register is set. */
41228
41229 void
41230 ix86_emit_fp_unordered_jump (rtx label)
41231 {
41232 rtx reg = gen_reg_rtx (HImode);
41233 rtx temp;
41234
41235 emit_insn (gen_x86_fnstsw_1 (reg));
41236
41237 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41238 {
41239 emit_insn (gen_x86_sahf_1 (reg));
41240
41241 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41242 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41243 }
41244 else
41245 {
41246 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41247
41248 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41249 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41250 }
41251
41252 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41253 gen_rtx_LABEL_REF (VOIDmode, label),
41254 pc_rtx);
41255 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41256
41257 emit_jump_insn (temp);
41258 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41259 }
41260
41261 /* Output code to perform a log1p XFmode calculation. */
41262
41263 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41264 {
41265 rtx label1 = gen_label_rtx ();
41266 rtx label2 = gen_label_rtx ();
41267
41268 rtx tmp = gen_reg_rtx (XFmode);
41269 rtx tmp2 = gen_reg_rtx (XFmode);
41270 rtx test;
41271
41272 emit_insn (gen_absxf2 (tmp, op1));
41273 test = gen_rtx_GE (VOIDmode, tmp,
41274 CONST_DOUBLE_FROM_REAL_VALUE (
41275 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41276 XFmode));
41277 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41278
41279 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41280 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41281 emit_jump (label2);
41282
41283 emit_label (label1);
41284 emit_move_insn (tmp, CONST1_RTX (XFmode));
41285 emit_insn (gen_addxf3 (tmp, op1, tmp));
41286 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41287 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41288
41289 emit_label (label2);
41290 }
41291
41292 /* Emit code for round calculation. */
41293 void ix86_emit_i387_round (rtx op0, rtx op1)
41294 {
41295 enum machine_mode inmode = GET_MODE (op1);
41296 enum machine_mode outmode = GET_MODE (op0);
41297 rtx e1, e2, res, tmp, tmp1, half;
41298 rtx scratch = gen_reg_rtx (HImode);
41299 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41300 rtx jump_label = gen_label_rtx ();
41301 rtx insn;
41302 rtx (*gen_abs) (rtx, rtx);
41303 rtx (*gen_neg) (rtx, rtx);
41304
41305 switch (inmode)
41306 {
41307 case SFmode:
41308 gen_abs = gen_abssf2;
41309 break;
41310 case DFmode:
41311 gen_abs = gen_absdf2;
41312 break;
41313 case XFmode:
41314 gen_abs = gen_absxf2;
41315 break;
41316 default:
41317 gcc_unreachable ();
41318 }
41319
41320 switch (outmode)
41321 {
41322 case SFmode:
41323 gen_neg = gen_negsf2;
41324 break;
41325 case DFmode:
41326 gen_neg = gen_negdf2;
41327 break;
41328 case XFmode:
41329 gen_neg = gen_negxf2;
41330 break;
41331 case HImode:
41332 gen_neg = gen_neghi2;
41333 break;
41334 case SImode:
41335 gen_neg = gen_negsi2;
41336 break;
41337 case DImode:
41338 gen_neg = gen_negdi2;
41339 break;
41340 default:
41341 gcc_unreachable ();
41342 }
41343
41344 e1 = gen_reg_rtx (inmode);
41345 e2 = gen_reg_rtx (inmode);
41346 res = gen_reg_rtx (outmode);
41347
41348 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41349
41350 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41351
41352 /* scratch = fxam(op1) */
41353 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41354 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41355 UNSPEC_FXAM)));
41356 /* e1 = fabs(op1) */
41357 emit_insn (gen_abs (e1, op1));
41358
41359 /* e2 = e1 + 0.5 */
41360 half = force_reg (inmode, half);
41361 emit_insn (gen_rtx_SET (VOIDmode, e2,
41362 gen_rtx_PLUS (inmode, e1, half)));
41363
41364 /* res = floor(e2) */
41365 if (inmode != XFmode)
41366 {
41367 tmp1 = gen_reg_rtx (XFmode);
41368
41369 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41370 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41371 }
41372 else
41373 tmp1 = e2;
41374
41375 switch (outmode)
41376 {
41377 case SFmode:
41378 case DFmode:
41379 {
41380 rtx tmp0 = gen_reg_rtx (XFmode);
41381
41382 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41383
41384 emit_insn (gen_rtx_SET (VOIDmode, res,
41385 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41386 UNSPEC_TRUNC_NOOP)));
41387 }
41388 break;
41389 case XFmode:
41390 emit_insn (gen_frndintxf2_floor (res, tmp1));
41391 break;
41392 case HImode:
41393 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41394 break;
41395 case SImode:
41396 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41397 break;
41398 case DImode:
41399 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41400 break;
41401 default:
41402 gcc_unreachable ();
41403 }
41404
41405 /* flags = signbit(a) */
41406 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41407
41408 /* if (flags) then res = -res */
41409 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41410 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41411 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41412 pc_rtx);
41413 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41414 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41415 JUMP_LABEL (insn) = jump_label;
41416
41417 emit_insn (gen_neg (res, res));
41418
41419 emit_label (jump_label);
41420 LABEL_NUSES (jump_label) = 1;
41421
41422 emit_move_insn (op0, res);
41423 }
41424
41425 /* Output code to perform a Newton-Rhapson approximation of a single precision
41426 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41427
41428 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41429 {
41430 rtx x0, x1, e0, e1;
41431
41432 x0 = gen_reg_rtx (mode);
41433 e0 = gen_reg_rtx (mode);
41434 e1 = gen_reg_rtx (mode);
41435 x1 = gen_reg_rtx (mode);
41436
41437 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41438
41439 b = force_reg (mode, b);
41440
41441 /* x0 = rcp(b) estimate */
41442 if (mode == V16SFmode || mode == V8DFmode)
41443 emit_insn (gen_rtx_SET (VOIDmode, x0,
41444 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41445 UNSPEC_RCP14)));
41446 else
41447 emit_insn (gen_rtx_SET (VOIDmode, x0,
41448 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41449 UNSPEC_RCP)));
41450
41451 /* e0 = x0 * b */
41452 emit_insn (gen_rtx_SET (VOIDmode, e0,
41453 gen_rtx_MULT (mode, x0, b)));
41454
41455 /* e0 = x0 * e0 */
41456 emit_insn (gen_rtx_SET (VOIDmode, e0,
41457 gen_rtx_MULT (mode, x0, e0)));
41458
41459 /* e1 = x0 + x0 */
41460 emit_insn (gen_rtx_SET (VOIDmode, e1,
41461 gen_rtx_PLUS (mode, x0, x0)));
41462
41463 /* x1 = e1 - e0 */
41464 emit_insn (gen_rtx_SET (VOIDmode, x1,
41465 gen_rtx_MINUS (mode, e1, e0)));
41466
41467 /* res = a * x1 */
41468 emit_insn (gen_rtx_SET (VOIDmode, res,
41469 gen_rtx_MULT (mode, a, x1)));
41470 }
41471
41472 /* Output code to perform a Newton-Rhapson approximation of a
41473 single precision floating point [reciprocal] square root. */
41474
41475 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41476 bool recip)
41477 {
41478 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41479 REAL_VALUE_TYPE r;
41480 int unspec;
41481
41482 x0 = gen_reg_rtx (mode);
41483 e0 = gen_reg_rtx (mode);
41484 e1 = gen_reg_rtx (mode);
41485 e2 = gen_reg_rtx (mode);
41486 e3 = gen_reg_rtx (mode);
41487
41488 real_from_integer (&r, VOIDmode, -3, SIGNED);
41489 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41490
41491 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41492 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41493 unspec = UNSPEC_RSQRT;
41494
41495 if (VECTOR_MODE_P (mode))
41496 {
41497 mthree = ix86_build_const_vector (mode, true, mthree);
41498 mhalf = ix86_build_const_vector (mode, true, mhalf);
41499 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41500 if (GET_MODE_SIZE (mode) == 64)
41501 unspec = UNSPEC_RSQRT14;
41502 }
41503
41504 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41505 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41506
41507 a = force_reg (mode, a);
41508
41509 /* x0 = rsqrt(a) estimate */
41510 emit_insn (gen_rtx_SET (VOIDmode, x0,
41511 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41512 unspec)));
41513
41514 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41515 if (!recip)
41516 {
41517 rtx zero, mask;
41518
41519 zero = gen_reg_rtx (mode);
41520 mask = gen_reg_rtx (mode);
41521
41522 zero = force_reg (mode, CONST0_RTX(mode));
41523
41524 /* Handle masked compare. */
41525 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41526 {
41527 mask = gen_reg_rtx (HImode);
41528 /* Imm value 0x4 corresponds to not-equal comparison. */
41529 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41530 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41531 }
41532 else
41533 {
41534 emit_insn (gen_rtx_SET (VOIDmode, mask,
41535 gen_rtx_NE (mode, zero, a)));
41536
41537 emit_insn (gen_rtx_SET (VOIDmode, x0,
41538 gen_rtx_AND (mode, x0, mask)));
41539 }
41540 }
41541
41542 /* e0 = x0 * a */
41543 emit_insn (gen_rtx_SET (VOIDmode, e0,
41544 gen_rtx_MULT (mode, x0, a)));
41545 /* e1 = e0 * x0 */
41546 emit_insn (gen_rtx_SET (VOIDmode, e1,
41547 gen_rtx_MULT (mode, e0, x0)));
41548
41549 /* e2 = e1 - 3. */
41550 mthree = force_reg (mode, mthree);
41551 emit_insn (gen_rtx_SET (VOIDmode, e2,
41552 gen_rtx_PLUS (mode, e1, mthree)));
41553
41554 mhalf = force_reg (mode, mhalf);
41555 if (recip)
41556 /* e3 = -.5 * x0 */
41557 emit_insn (gen_rtx_SET (VOIDmode, e3,
41558 gen_rtx_MULT (mode, x0, mhalf)));
41559 else
41560 /* e3 = -.5 * e0 */
41561 emit_insn (gen_rtx_SET (VOIDmode, e3,
41562 gen_rtx_MULT (mode, e0, mhalf)));
41563 /* ret = e2 * e3 */
41564 emit_insn (gen_rtx_SET (VOIDmode, res,
41565 gen_rtx_MULT (mode, e2, e3)));
41566 }
41567
41568 #ifdef TARGET_SOLARIS
41569 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41570
41571 static void
41572 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41573 tree decl)
41574 {
41575 /* With Binutils 2.15, the "@unwind" marker must be specified on
41576 every occurrence of the ".eh_frame" section, not just the first
41577 one. */
41578 if (TARGET_64BIT
41579 && strcmp (name, ".eh_frame") == 0)
41580 {
41581 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41582 flags & SECTION_WRITE ? "aw" : "a");
41583 return;
41584 }
41585
41586 #ifndef USE_GAS
41587 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41588 {
41589 solaris_elf_asm_comdat_section (name, flags, decl);
41590 return;
41591 }
41592 #endif
41593
41594 default_elf_asm_named_section (name, flags, decl);
41595 }
41596 #endif /* TARGET_SOLARIS */
41597
41598 /* Return the mangling of TYPE if it is an extended fundamental type. */
41599
41600 static const char *
41601 ix86_mangle_type (const_tree type)
41602 {
41603 type = TYPE_MAIN_VARIANT (type);
41604
41605 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41606 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41607 return NULL;
41608
41609 switch (TYPE_MODE (type))
41610 {
41611 case TFmode:
41612 /* __float128 is "g". */
41613 return "g";
41614 case XFmode:
41615 /* "long double" or __float80 is "e". */
41616 return "e";
41617 default:
41618 return NULL;
41619 }
41620 }
41621
41622 /* For 32-bit code we can save PIC register setup by using
41623 __stack_chk_fail_local hidden function instead of calling
41624 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41625 register, so it is better to call __stack_chk_fail directly. */
41626
41627 static tree ATTRIBUTE_UNUSED
41628 ix86_stack_protect_fail (void)
41629 {
41630 return TARGET_64BIT
41631 ? default_external_stack_protect_fail ()
41632 : default_hidden_stack_protect_fail ();
41633 }
41634
41635 /* Select a format to encode pointers in exception handling data. CODE
41636 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41637 true if the symbol may be affected by dynamic relocations.
41638
41639 ??? All x86 object file formats are capable of representing this.
41640 After all, the relocation needed is the same as for the call insn.
41641 Whether or not a particular assembler allows us to enter such, I
41642 guess we'll have to see. */
41643 int
41644 asm_preferred_eh_data_format (int code, int global)
41645 {
41646 if (flag_pic)
41647 {
41648 int type = DW_EH_PE_sdata8;
41649 if (!TARGET_64BIT
41650 || ix86_cmodel == CM_SMALL_PIC
41651 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41652 type = DW_EH_PE_sdata4;
41653 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41654 }
41655 if (ix86_cmodel == CM_SMALL
41656 || (ix86_cmodel == CM_MEDIUM && code))
41657 return DW_EH_PE_udata4;
41658 return DW_EH_PE_absptr;
41659 }
41660 \f
41661 /* Expand copysign from SIGN to the positive value ABS_VALUE
41662 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41663 the sign-bit. */
41664 static void
41665 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41666 {
41667 enum machine_mode mode = GET_MODE (sign);
41668 rtx sgn = gen_reg_rtx (mode);
41669 if (mask == NULL_RTX)
41670 {
41671 enum machine_mode vmode;
41672
41673 if (mode == SFmode)
41674 vmode = V4SFmode;
41675 else if (mode == DFmode)
41676 vmode = V2DFmode;
41677 else
41678 vmode = mode;
41679
41680 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41681 if (!VECTOR_MODE_P (mode))
41682 {
41683 /* We need to generate a scalar mode mask in this case. */
41684 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41685 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41686 mask = gen_reg_rtx (mode);
41687 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41688 }
41689 }
41690 else
41691 mask = gen_rtx_NOT (mode, mask);
41692 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41693 gen_rtx_AND (mode, mask, sign)));
41694 emit_insn (gen_rtx_SET (VOIDmode, result,
41695 gen_rtx_IOR (mode, abs_value, sgn)));
41696 }
41697
41698 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41699 mask for masking out the sign-bit is stored in *SMASK, if that is
41700 non-null. */
41701 static rtx
41702 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41703 {
41704 enum machine_mode vmode, mode = GET_MODE (op0);
41705 rtx xa, mask;
41706
41707 xa = gen_reg_rtx (mode);
41708 if (mode == SFmode)
41709 vmode = V4SFmode;
41710 else if (mode == DFmode)
41711 vmode = V2DFmode;
41712 else
41713 vmode = mode;
41714 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41715 if (!VECTOR_MODE_P (mode))
41716 {
41717 /* We need to generate a scalar mode mask in this case. */
41718 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41719 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41720 mask = gen_reg_rtx (mode);
41721 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41722 }
41723 emit_insn (gen_rtx_SET (VOIDmode, xa,
41724 gen_rtx_AND (mode, op0, mask)));
41725
41726 if (smask)
41727 *smask = mask;
41728
41729 return xa;
41730 }
41731
41732 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41733 swapping the operands if SWAP_OPERANDS is true. The expanded
41734 code is a forward jump to a newly created label in case the
41735 comparison is true. The generated label rtx is returned. */
41736 static rtx
41737 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41738 bool swap_operands)
41739 {
41740 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41741 rtx label, tmp;
41742
41743 if (swap_operands)
41744 {
41745 tmp = op0;
41746 op0 = op1;
41747 op1 = tmp;
41748 }
41749
41750 label = gen_label_rtx ();
41751 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41752 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41753 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41754 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41755 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41756 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41757 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41758 JUMP_LABEL (tmp) = label;
41759
41760 return label;
41761 }
41762
41763 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41764 using comparison code CODE. Operands are swapped for the comparison if
41765 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41766 static rtx
41767 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41768 bool swap_operands)
41769 {
41770 rtx (*insn)(rtx, rtx, rtx, rtx);
41771 enum machine_mode mode = GET_MODE (op0);
41772 rtx mask = gen_reg_rtx (mode);
41773
41774 if (swap_operands)
41775 {
41776 rtx tmp = op0;
41777 op0 = op1;
41778 op1 = tmp;
41779 }
41780
41781 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41782
41783 emit_insn (insn (mask, op0, op1,
41784 gen_rtx_fmt_ee (code, mode, op0, op1)));
41785 return mask;
41786 }
41787
41788 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41789 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41790 static rtx
41791 ix86_gen_TWO52 (enum machine_mode mode)
41792 {
41793 REAL_VALUE_TYPE TWO52r;
41794 rtx TWO52;
41795
41796 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41797 TWO52 = const_double_from_real_value (TWO52r, mode);
41798 TWO52 = force_reg (mode, TWO52);
41799
41800 return TWO52;
41801 }
41802
41803 /* Expand SSE sequence for computing lround from OP1 storing
41804 into OP0. */
41805 void
41806 ix86_expand_lround (rtx op0, rtx op1)
41807 {
41808 /* C code for the stuff we're doing below:
41809 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41810 return (long)tmp;
41811 */
41812 enum machine_mode mode = GET_MODE (op1);
41813 const struct real_format *fmt;
41814 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41815 rtx adj;
41816
41817 /* load nextafter (0.5, 0.0) */
41818 fmt = REAL_MODE_FORMAT (mode);
41819 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41820 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41821
41822 /* adj = copysign (0.5, op1) */
41823 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41824 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41825
41826 /* adj = op1 + adj */
41827 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41828
41829 /* op0 = (imode)adj */
41830 expand_fix (op0, adj, 0);
41831 }
41832
41833 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41834 into OPERAND0. */
41835 void
41836 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41837 {
41838 /* C code for the stuff we're doing below (for do_floor):
41839 xi = (long)op1;
41840 xi -= (double)xi > op1 ? 1 : 0;
41841 return xi;
41842 */
41843 enum machine_mode fmode = GET_MODE (op1);
41844 enum machine_mode imode = GET_MODE (op0);
41845 rtx ireg, freg, label, tmp;
41846
41847 /* reg = (long)op1 */
41848 ireg = gen_reg_rtx (imode);
41849 expand_fix (ireg, op1, 0);
41850
41851 /* freg = (double)reg */
41852 freg = gen_reg_rtx (fmode);
41853 expand_float (freg, ireg, 0);
41854
41855 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41856 label = ix86_expand_sse_compare_and_jump (UNLE,
41857 freg, op1, !do_floor);
41858 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41859 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41860 emit_move_insn (ireg, tmp);
41861
41862 emit_label (label);
41863 LABEL_NUSES (label) = 1;
41864
41865 emit_move_insn (op0, ireg);
41866 }
41867
41868 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41869 result in OPERAND0. */
41870 void
41871 ix86_expand_rint (rtx operand0, rtx operand1)
41872 {
41873 /* C code for the stuff we're doing below:
41874 xa = fabs (operand1);
41875 if (!isless (xa, 2**52))
41876 return operand1;
41877 xa = xa + 2**52 - 2**52;
41878 return copysign (xa, operand1);
41879 */
41880 enum machine_mode mode = GET_MODE (operand0);
41881 rtx res, xa, label, TWO52, mask;
41882
41883 res = gen_reg_rtx (mode);
41884 emit_move_insn (res, operand1);
41885
41886 /* xa = abs (operand1) */
41887 xa = ix86_expand_sse_fabs (res, &mask);
41888
41889 /* if (!isless (xa, TWO52)) goto label; */
41890 TWO52 = ix86_gen_TWO52 (mode);
41891 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41892
41893 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41894 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41895
41896 ix86_sse_copysign_to_positive (res, xa, res, mask);
41897
41898 emit_label (label);
41899 LABEL_NUSES (label) = 1;
41900
41901 emit_move_insn (operand0, res);
41902 }
41903
41904 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41905 into OPERAND0. */
41906 void
41907 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41908 {
41909 /* C code for the stuff we expand below.
41910 double xa = fabs (x), x2;
41911 if (!isless (xa, TWO52))
41912 return x;
41913 xa = xa + TWO52 - TWO52;
41914 x2 = copysign (xa, x);
41915 Compensate. Floor:
41916 if (x2 > x)
41917 x2 -= 1;
41918 Compensate. Ceil:
41919 if (x2 < x)
41920 x2 -= -1;
41921 return x2;
41922 */
41923 enum machine_mode mode = GET_MODE (operand0);
41924 rtx xa, TWO52, tmp, label, one, res, mask;
41925
41926 TWO52 = ix86_gen_TWO52 (mode);
41927
41928 /* Temporary for holding the result, initialized to the input
41929 operand to ease control flow. */
41930 res = gen_reg_rtx (mode);
41931 emit_move_insn (res, operand1);
41932
41933 /* xa = abs (operand1) */
41934 xa = ix86_expand_sse_fabs (res, &mask);
41935
41936 /* if (!isless (xa, TWO52)) goto label; */
41937 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41938
41939 /* xa = xa + TWO52 - TWO52; */
41940 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41941 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41942
41943 /* xa = copysign (xa, operand1) */
41944 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41945
41946 /* generate 1.0 or -1.0 */
41947 one = force_reg (mode,
41948 const_double_from_real_value (do_floor
41949 ? dconst1 : dconstm1, mode));
41950
41951 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41952 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41953 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41954 gen_rtx_AND (mode, one, tmp)));
41955 /* We always need to subtract here to preserve signed zero. */
41956 tmp = expand_simple_binop (mode, MINUS,
41957 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41958 emit_move_insn (res, tmp);
41959
41960 emit_label (label);
41961 LABEL_NUSES (label) = 1;
41962
41963 emit_move_insn (operand0, res);
41964 }
41965
41966 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41967 into OPERAND0. */
41968 void
41969 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41970 {
41971 /* C code for the stuff we expand below.
41972 double xa = fabs (x), x2;
41973 if (!isless (xa, TWO52))
41974 return x;
41975 x2 = (double)(long)x;
41976 Compensate. Floor:
41977 if (x2 > x)
41978 x2 -= 1;
41979 Compensate. Ceil:
41980 if (x2 < x)
41981 x2 += 1;
41982 if (HONOR_SIGNED_ZEROS (mode))
41983 return copysign (x2, x);
41984 return x2;
41985 */
41986 enum machine_mode mode = GET_MODE (operand0);
41987 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41988
41989 TWO52 = ix86_gen_TWO52 (mode);
41990
41991 /* Temporary for holding the result, initialized to the input
41992 operand to ease control flow. */
41993 res = gen_reg_rtx (mode);
41994 emit_move_insn (res, operand1);
41995
41996 /* xa = abs (operand1) */
41997 xa = ix86_expand_sse_fabs (res, &mask);
41998
41999 /* if (!isless (xa, TWO52)) goto label; */
42000 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42001
42002 /* xa = (double)(long)x */
42003 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42004 expand_fix (xi, res, 0);
42005 expand_float (xa, xi, 0);
42006
42007 /* generate 1.0 */
42008 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42009
42010 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42011 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42012 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42013 gen_rtx_AND (mode, one, tmp)));
42014 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42015 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42016 emit_move_insn (res, tmp);
42017
42018 if (HONOR_SIGNED_ZEROS (mode))
42019 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42020
42021 emit_label (label);
42022 LABEL_NUSES (label) = 1;
42023
42024 emit_move_insn (operand0, res);
42025 }
42026
42027 /* Expand SSE sequence for computing round from OPERAND1 storing
42028 into OPERAND0. Sequence that works without relying on DImode truncation
42029 via cvttsd2siq that is only available on 64bit targets. */
42030 void
42031 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42032 {
42033 /* C code for the stuff we expand below.
42034 double xa = fabs (x), xa2, x2;
42035 if (!isless (xa, TWO52))
42036 return x;
42037 Using the absolute value and copying back sign makes
42038 -0.0 -> -0.0 correct.
42039 xa2 = xa + TWO52 - TWO52;
42040 Compensate.
42041 dxa = xa2 - xa;
42042 if (dxa <= -0.5)
42043 xa2 += 1;
42044 else if (dxa > 0.5)
42045 xa2 -= 1;
42046 x2 = copysign (xa2, x);
42047 return x2;
42048 */
42049 enum machine_mode mode = GET_MODE (operand0);
42050 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42051
42052 TWO52 = ix86_gen_TWO52 (mode);
42053
42054 /* Temporary for holding the result, initialized to the input
42055 operand to ease control flow. */
42056 res = gen_reg_rtx (mode);
42057 emit_move_insn (res, operand1);
42058
42059 /* xa = abs (operand1) */
42060 xa = ix86_expand_sse_fabs (res, &mask);
42061
42062 /* if (!isless (xa, TWO52)) goto label; */
42063 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42064
42065 /* xa2 = xa + TWO52 - TWO52; */
42066 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42067 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42068
42069 /* dxa = xa2 - xa; */
42070 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42071
42072 /* generate 0.5, 1.0 and -0.5 */
42073 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42074 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42075 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42076 0, OPTAB_DIRECT);
42077
42078 /* Compensate. */
42079 tmp = gen_reg_rtx (mode);
42080 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42081 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42082 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42083 gen_rtx_AND (mode, one, tmp)));
42084 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42085 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42086 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42087 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42088 gen_rtx_AND (mode, one, tmp)));
42089 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42090
42091 /* res = copysign (xa2, operand1) */
42092 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42093
42094 emit_label (label);
42095 LABEL_NUSES (label) = 1;
42096
42097 emit_move_insn (operand0, res);
42098 }
42099
42100 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42101 into OPERAND0. */
42102 void
42103 ix86_expand_trunc (rtx operand0, rtx operand1)
42104 {
42105 /* C code for SSE variant we expand below.
42106 double xa = fabs (x), x2;
42107 if (!isless (xa, TWO52))
42108 return x;
42109 x2 = (double)(long)x;
42110 if (HONOR_SIGNED_ZEROS (mode))
42111 return copysign (x2, x);
42112 return x2;
42113 */
42114 enum machine_mode mode = GET_MODE (operand0);
42115 rtx xa, xi, TWO52, label, res, mask;
42116
42117 TWO52 = ix86_gen_TWO52 (mode);
42118
42119 /* Temporary for holding the result, initialized to the input
42120 operand to ease control flow. */
42121 res = gen_reg_rtx (mode);
42122 emit_move_insn (res, operand1);
42123
42124 /* xa = abs (operand1) */
42125 xa = ix86_expand_sse_fabs (res, &mask);
42126
42127 /* if (!isless (xa, TWO52)) goto label; */
42128 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42129
42130 /* x = (double)(long)x */
42131 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42132 expand_fix (xi, res, 0);
42133 expand_float (res, xi, 0);
42134
42135 if (HONOR_SIGNED_ZEROS (mode))
42136 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42137
42138 emit_label (label);
42139 LABEL_NUSES (label) = 1;
42140
42141 emit_move_insn (operand0, res);
42142 }
42143
42144 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42145 into OPERAND0. */
42146 void
42147 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42148 {
42149 enum machine_mode mode = GET_MODE (operand0);
42150 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42151
42152 /* C code for SSE variant we expand below.
42153 double xa = fabs (x), x2;
42154 if (!isless (xa, TWO52))
42155 return x;
42156 xa2 = xa + TWO52 - TWO52;
42157 Compensate:
42158 if (xa2 > xa)
42159 xa2 -= 1.0;
42160 x2 = copysign (xa2, x);
42161 return x2;
42162 */
42163
42164 TWO52 = ix86_gen_TWO52 (mode);
42165
42166 /* Temporary for holding the result, initialized to the input
42167 operand to ease control flow. */
42168 res = gen_reg_rtx (mode);
42169 emit_move_insn (res, operand1);
42170
42171 /* xa = abs (operand1) */
42172 xa = ix86_expand_sse_fabs (res, &smask);
42173
42174 /* if (!isless (xa, TWO52)) goto label; */
42175 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42176
42177 /* res = xa + TWO52 - TWO52; */
42178 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42179 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42180 emit_move_insn (res, tmp);
42181
42182 /* generate 1.0 */
42183 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42184
42185 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42186 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42187 emit_insn (gen_rtx_SET (VOIDmode, mask,
42188 gen_rtx_AND (mode, mask, one)));
42189 tmp = expand_simple_binop (mode, MINUS,
42190 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42191 emit_move_insn (res, tmp);
42192
42193 /* res = copysign (res, operand1) */
42194 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42195
42196 emit_label (label);
42197 LABEL_NUSES (label) = 1;
42198
42199 emit_move_insn (operand0, res);
42200 }
42201
42202 /* Expand SSE sequence for computing round from OPERAND1 storing
42203 into OPERAND0. */
42204 void
42205 ix86_expand_round (rtx operand0, rtx operand1)
42206 {
42207 /* C code for the stuff we're doing below:
42208 double xa = fabs (x);
42209 if (!isless (xa, TWO52))
42210 return x;
42211 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42212 return copysign (xa, x);
42213 */
42214 enum machine_mode mode = GET_MODE (operand0);
42215 rtx res, TWO52, xa, label, xi, half, mask;
42216 const struct real_format *fmt;
42217 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42218
42219 /* Temporary for holding the result, initialized to the input
42220 operand to ease control flow. */
42221 res = gen_reg_rtx (mode);
42222 emit_move_insn (res, operand1);
42223
42224 TWO52 = ix86_gen_TWO52 (mode);
42225 xa = ix86_expand_sse_fabs (res, &mask);
42226 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42227
42228 /* load nextafter (0.5, 0.0) */
42229 fmt = REAL_MODE_FORMAT (mode);
42230 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42231 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42232
42233 /* xa = xa + 0.5 */
42234 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42235 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42236
42237 /* xa = (double)(int64_t)xa */
42238 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42239 expand_fix (xi, xa, 0);
42240 expand_float (xa, xi, 0);
42241
42242 /* res = copysign (xa, operand1) */
42243 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42244
42245 emit_label (label);
42246 LABEL_NUSES (label) = 1;
42247
42248 emit_move_insn (operand0, res);
42249 }
42250
42251 /* Expand SSE sequence for computing round
42252 from OP1 storing into OP0 using sse4 round insn. */
42253 void
42254 ix86_expand_round_sse4 (rtx op0, rtx op1)
42255 {
42256 enum machine_mode mode = GET_MODE (op0);
42257 rtx e1, e2, res, half;
42258 const struct real_format *fmt;
42259 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42260 rtx (*gen_copysign) (rtx, rtx, rtx);
42261 rtx (*gen_round) (rtx, rtx, rtx);
42262
42263 switch (mode)
42264 {
42265 case SFmode:
42266 gen_copysign = gen_copysignsf3;
42267 gen_round = gen_sse4_1_roundsf2;
42268 break;
42269 case DFmode:
42270 gen_copysign = gen_copysigndf3;
42271 gen_round = gen_sse4_1_rounddf2;
42272 break;
42273 default:
42274 gcc_unreachable ();
42275 }
42276
42277 /* round (a) = trunc (a + copysign (0.5, a)) */
42278
42279 /* load nextafter (0.5, 0.0) */
42280 fmt = REAL_MODE_FORMAT (mode);
42281 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42282 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42283 half = const_double_from_real_value (pred_half, mode);
42284
42285 /* e1 = copysign (0.5, op1) */
42286 e1 = gen_reg_rtx (mode);
42287 emit_insn (gen_copysign (e1, half, op1));
42288
42289 /* e2 = op1 + e1 */
42290 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42291
42292 /* res = trunc (e2) */
42293 res = gen_reg_rtx (mode);
42294 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42295
42296 emit_move_insn (op0, res);
42297 }
42298 \f
42299
42300 /* Table of valid machine attributes. */
42301 static const struct attribute_spec ix86_attribute_table[] =
42302 {
42303 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42304 affects_type_identity } */
42305 /* Stdcall attribute says callee is responsible for popping arguments
42306 if they are not variable. */
42307 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42308 true },
42309 /* Fastcall attribute says callee is responsible for popping arguments
42310 if they are not variable. */
42311 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42312 true },
42313 /* Thiscall attribute says callee is responsible for popping arguments
42314 if they are not variable. */
42315 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42316 true },
42317 /* Cdecl attribute says the callee is a normal C declaration */
42318 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42319 true },
42320 /* Regparm attribute specifies how many integer arguments are to be
42321 passed in registers. */
42322 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42323 true },
42324 /* Sseregparm attribute says we are using x86_64 calling conventions
42325 for FP arguments. */
42326 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42327 true },
42328 /* The transactional memory builtins are implicitly regparm or fastcall
42329 depending on the ABI. Override the generic do-nothing attribute that
42330 these builtins were declared with. */
42331 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42332 true },
42333 /* force_align_arg_pointer says this function realigns the stack at entry. */
42334 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42335 false, true, true, ix86_handle_cconv_attribute, false },
42336 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42337 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42338 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42339 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42340 false },
42341 #endif
42342 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42343 false },
42344 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42345 false },
42346 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42347 SUBTARGET_ATTRIBUTE_TABLE,
42348 #endif
42349 /* ms_abi and sysv_abi calling convention function attributes. */
42350 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42351 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42352 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42353 false },
42354 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42355 ix86_handle_callee_pop_aggregate_return, true },
42356 /* End element. */
42357 { NULL, 0, 0, false, false, false, NULL, false }
42358 };
42359
42360 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42361 static int
42362 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42363 tree vectype,
42364 int misalign ATTRIBUTE_UNUSED)
42365 {
42366 unsigned elements;
42367
42368 switch (type_of_cost)
42369 {
42370 case scalar_stmt:
42371 return ix86_cost->scalar_stmt_cost;
42372
42373 case scalar_load:
42374 return ix86_cost->scalar_load_cost;
42375
42376 case scalar_store:
42377 return ix86_cost->scalar_store_cost;
42378
42379 case vector_stmt:
42380 return ix86_cost->vec_stmt_cost;
42381
42382 case vector_load:
42383 return ix86_cost->vec_align_load_cost;
42384
42385 case vector_store:
42386 return ix86_cost->vec_store_cost;
42387
42388 case vec_to_scalar:
42389 return ix86_cost->vec_to_scalar_cost;
42390
42391 case scalar_to_vec:
42392 return ix86_cost->scalar_to_vec_cost;
42393
42394 case unaligned_load:
42395 case unaligned_store:
42396 return ix86_cost->vec_unalign_load_cost;
42397
42398 case cond_branch_taken:
42399 return ix86_cost->cond_taken_branch_cost;
42400
42401 case cond_branch_not_taken:
42402 return ix86_cost->cond_not_taken_branch_cost;
42403
42404 case vec_perm:
42405 case vec_promote_demote:
42406 return ix86_cost->vec_stmt_cost;
42407
42408 case vec_construct:
42409 elements = TYPE_VECTOR_SUBPARTS (vectype);
42410 return elements / 2 + 1;
42411
42412 default:
42413 gcc_unreachable ();
42414 }
42415 }
42416
42417 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42418 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42419 insn every time. */
42420
42421 static GTY(()) rtx vselect_insn;
42422
42423 /* Initialize vselect_insn. */
42424
42425 static void
42426 init_vselect_insn (void)
42427 {
42428 unsigned i;
42429 rtx x;
42430
42431 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42432 for (i = 0; i < MAX_VECT_LEN; ++i)
42433 XVECEXP (x, 0, i) = const0_rtx;
42434 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42435 const0_rtx), x);
42436 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42437 start_sequence ();
42438 vselect_insn = emit_insn (x);
42439 end_sequence ();
42440 }
42441
42442 /* Construct (set target (vec_select op0 (parallel perm))) and
42443 return true if that's a valid instruction in the active ISA. */
42444
42445 static bool
42446 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42447 unsigned nelt, bool testing_p)
42448 {
42449 unsigned int i;
42450 rtx x, save_vconcat;
42451 int icode;
42452
42453 if (vselect_insn == NULL_RTX)
42454 init_vselect_insn ();
42455
42456 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42457 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42458 for (i = 0; i < nelt; ++i)
42459 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42460 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42461 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42462 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42463 SET_DEST (PATTERN (vselect_insn)) = target;
42464 icode = recog_memoized (vselect_insn);
42465
42466 if (icode >= 0 && !testing_p)
42467 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42468
42469 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42470 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42471 INSN_CODE (vselect_insn) = -1;
42472
42473 return icode >= 0;
42474 }
42475
42476 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42477
42478 static bool
42479 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42480 const unsigned char *perm, unsigned nelt,
42481 bool testing_p)
42482 {
42483 enum machine_mode v2mode;
42484 rtx x;
42485 bool ok;
42486
42487 if (vselect_insn == NULL_RTX)
42488 init_vselect_insn ();
42489
42490 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42491 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42492 PUT_MODE (x, v2mode);
42493 XEXP (x, 0) = op0;
42494 XEXP (x, 1) = op1;
42495 ok = expand_vselect (target, x, perm, nelt, testing_p);
42496 XEXP (x, 0) = const0_rtx;
42497 XEXP (x, 1) = const0_rtx;
42498 return ok;
42499 }
42500
42501 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42502 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42503
42504 static bool
42505 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42506 {
42507 enum machine_mode vmode = d->vmode;
42508 unsigned i, mask, nelt = d->nelt;
42509 rtx target, op0, op1, x;
42510 rtx rperm[32], vperm;
42511
42512 if (d->one_operand_p)
42513 return false;
42514 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42515 ;
42516 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42517 ;
42518 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42519 ;
42520 else
42521 return false;
42522
42523 /* This is a blend, not a permute. Elements must stay in their
42524 respective lanes. */
42525 for (i = 0; i < nelt; ++i)
42526 {
42527 unsigned e = d->perm[i];
42528 if (!(e == i || e == i + nelt))
42529 return false;
42530 }
42531
42532 if (d->testing_p)
42533 return true;
42534
42535 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42536 decision should be extracted elsewhere, so that we only try that
42537 sequence once all budget==3 options have been tried. */
42538 target = d->target;
42539 op0 = d->op0;
42540 op1 = d->op1;
42541 mask = 0;
42542
42543 switch (vmode)
42544 {
42545 case V4DFmode:
42546 case V8SFmode:
42547 case V2DFmode:
42548 case V4SFmode:
42549 case V8HImode:
42550 case V8SImode:
42551 for (i = 0; i < nelt; ++i)
42552 mask |= (d->perm[i] >= nelt) << i;
42553 break;
42554
42555 case V2DImode:
42556 for (i = 0; i < 2; ++i)
42557 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42558 vmode = V8HImode;
42559 goto do_subreg;
42560
42561 case V4SImode:
42562 for (i = 0; i < 4; ++i)
42563 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42564 vmode = V8HImode;
42565 goto do_subreg;
42566
42567 case V16QImode:
42568 /* See if bytes move in pairs so we can use pblendw with
42569 an immediate argument, rather than pblendvb with a vector
42570 argument. */
42571 for (i = 0; i < 16; i += 2)
42572 if (d->perm[i] + 1 != d->perm[i + 1])
42573 {
42574 use_pblendvb:
42575 for (i = 0; i < nelt; ++i)
42576 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42577
42578 finish_pblendvb:
42579 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42580 vperm = force_reg (vmode, vperm);
42581
42582 if (GET_MODE_SIZE (vmode) == 16)
42583 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42584 else
42585 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42586 if (target != d->target)
42587 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42588 return true;
42589 }
42590
42591 for (i = 0; i < 8; ++i)
42592 mask |= (d->perm[i * 2] >= 16) << i;
42593 vmode = V8HImode;
42594 /* FALLTHRU */
42595
42596 do_subreg:
42597 target = gen_reg_rtx (vmode);
42598 op0 = gen_lowpart (vmode, op0);
42599 op1 = gen_lowpart (vmode, op1);
42600 break;
42601
42602 case V32QImode:
42603 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42604 for (i = 0; i < 32; i += 2)
42605 if (d->perm[i] + 1 != d->perm[i + 1])
42606 goto use_pblendvb;
42607 /* See if bytes move in quadruplets. If yes, vpblendd
42608 with immediate can be used. */
42609 for (i = 0; i < 32; i += 4)
42610 if (d->perm[i] + 2 != d->perm[i + 2])
42611 break;
42612 if (i < 32)
42613 {
42614 /* See if bytes move the same in both lanes. If yes,
42615 vpblendw with immediate can be used. */
42616 for (i = 0; i < 16; i += 2)
42617 if (d->perm[i] + 16 != d->perm[i + 16])
42618 goto use_pblendvb;
42619
42620 /* Use vpblendw. */
42621 for (i = 0; i < 16; ++i)
42622 mask |= (d->perm[i * 2] >= 32) << i;
42623 vmode = V16HImode;
42624 goto do_subreg;
42625 }
42626
42627 /* Use vpblendd. */
42628 for (i = 0; i < 8; ++i)
42629 mask |= (d->perm[i * 4] >= 32) << i;
42630 vmode = V8SImode;
42631 goto do_subreg;
42632
42633 case V16HImode:
42634 /* See if words move in pairs. If yes, vpblendd can be used. */
42635 for (i = 0; i < 16; i += 2)
42636 if (d->perm[i] + 1 != d->perm[i + 1])
42637 break;
42638 if (i < 16)
42639 {
42640 /* See if words move the same in both lanes. If not,
42641 vpblendvb must be used. */
42642 for (i = 0; i < 8; i++)
42643 if (d->perm[i] + 8 != d->perm[i + 8])
42644 {
42645 /* Use vpblendvb. */
42646 for (i = 0; i < 32; ++i)
42647 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42648
42649 vmode = V32QImode;
42650 nelt = 32;
42651 target = gen_reg_rtx (vmode);
42652 op0 = gen_lowpart (vmode, op0);
42653 op1 = gen_lowpart (vmode, op1);
42654 goto finish_pblendvb;
42655 }
42656
42657 /* Use vpblendw. */
42658 for (i = 0; i < 16; ++i)
42659 mask |= (d->perm[i] >= 16) << i;
42660 break;
42661 }
42662
42663 /* Use vpblendd. */
42664 for (i = 0; i < 8; ++i)
42665 mask |= (d->perm[i * 2] >= 16) << i;
42666 vmode = V8SImode;
42667 goto do_subreg;
42668
42669 case V4DImode:
42670 /* Use vpblendd. */
42671 for (i = 0; i < 4; ++i)
42672 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42673 vmode = V8SImode;
42674 goto do_subreg;
42675
42676 default:
42677 gcc_unreachable ();
42678 }
42679
42680 /* This matches five different patterns with the different modes. */
42681 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42682 x = gen_rtx_SET (VOIDmode, target, x);
42683 emit_insn (x);
42684 if (target != d->target)
42685 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42686
42687 return true;
42688 }
42689
42690 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42691 in terms of the variable form of vpermilps.
42692
42693 Note that we will have already failed the immediate input vpermilps,
42694 which requires that the high and low part shuffle be identical; the
42695 variable form doesn't require that. */
42696
42697 static bool
42698 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42699 {
42700 rtx rperm[8], vperm;
42701 unsigned i;
42702
42703 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42704 return false;
42705
42706 /* We can only permute within the 128-bit lane. */
42707 for (i = 0; i < 8; ++i)
42708 {
42709 unsigned e = d->perm[i];
42710 if (i < 4 ? e >= 4 : e < 4)
42711 return false;
42712 }
42713
42714 if (d->testing_p)
42715 return true;
42716
42717 for (i = 0; i < 8; ++i)
42718 {
42719 unsigned e = d->perm[i];
42720
42721 /* Within each 128-bit lane, the elements of op0 are numbered
42722 from 0 and the elements of op1 are numbered from 4. */
42723 if (e >= 8 + 4)
42724 e -= 8;
42725 else if (e >= 4)
42726 e -= 4;
42727
42728 rperm[i] = GEN_INT (e);
42729 }
42730
42731 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42732 vperm = force_reg (V8SImode, vperm);
42733 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42734
42735 return true;
42736 }
42737
42738 /* Return true if permutation D can be performed as VMODE permutation
42739 instead. */
42740
42741 static bool
42742 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42743 {
42744 unsigned int i, j, chunk;
42745
42746 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42747 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42748 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42749 return false;
42750
42751 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42752 return true;
42753
42754 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42755 for (i = 0; i < d->nelt; i += chunk)
42756 if (d->perm[i] & (chunk - 1))
42757 return false;
42758 else
42759 for (j = 1; j < chunk; ++j)
42760 if (d->perm[i] + j != d->perm[i + j])
42761 return false;
42762
42763 return true;
42764 }
42765
42766 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42767 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42768
42769 static bool
42770 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42771 {
42772 unsigned i, nelt, eltsz, mask;
42773 unsigned char perm[32];
42774 enum machine_mode vmode = V16QImode;
42775 rtx rperm[32], vperm, target, op0, op1;
42776
42777 nelt = d->nelt;
42778
42779 if (!d->one_operand_p)
42780 {
42781 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42782 {
42783 if (TARGET_AVX2
42784 && valid_perm_using_mode_p (V2TImode, d))
42785 {
42786 if (d->testing_p)
42787 return true;
42788
42789 /* Use vperm2i128 insn. The pattern uses
42790 V4DImode instead of V2TImode. */
42791 target = d->target;
42792 if (d->vmode != V4DImode)
42793 target = gen_reg_rtx (V4DImode);
42794 op0 = gen_lowpart (V4DImode, d->op0);
42795 op1 = gen_lowpart (V4DImode, d->op1);
42796 rperm[0]
42797 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42798 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42799 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42800 if (target != d->target)
42801 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42802 return true;
42803 }
42804 return false;
42805 }
42806 }
42807 else
42808 {
42809 if (GET_MODE_SIZE (d->vmode) == 16)
42810 {
42811 if (!TARGET_SSSE3)
42812 return false;
42813 }
42814 else if (GET_MODE_SIZE (d->vmode) == 32)
42815 {
42816 if (!TARGET_AVX2)
42817 return false;
42818
42819 /* V4DImode should be already handled through
42820 expand_vselect by vpermq instruction. */
42821 gcc_assert (d->vmode != V4DImode);
42822
42823 vmode = V32QImode;
42824 if (d->vmode == V8SImode
42825 || d->vmode == V16HImode
42826 || d->vmode == V32QImode)
42827 {
42828 /* First see if vpermq can be used for
42829 V8SImode/V16HImode/V32QImode. */
42830 if (valid_perm_using_mode_p (V4DImode, d))
42831 {
42832 for (i = 0; i < 4; i++)
42833 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42834 if (d->testing_p)
42835 return true;
42836 target = gen_reg_rtx (V4DImode);
42837 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42838 perm, 4, false))
42839 {
42840 emit_move_insn (d->target,
42841 gen_lowpart (d->vmode, target));
42842 return true;
42843 }
42844 return false;
42845 }
42846
42847 /* Next see if vpermd can be used. */
42848 if (valid_perm_using_mode_p (V8SImode, d))
42849 vmode = V8SImode;
42850 }
42851 /* Or if vpermps can be used. */
42852 else if (d->vmode == V8SFmode)
42853 vmode = V8SImode;
42854
42855 if (vmode == V32QImode)
42856 {
42857 /* vpshufb only works intra lanes, it is not
42858 possible to shuffle bytes in between the lanes. */
42859 for (i = 0; i < nelt; ++i)
42860 if ((d->perm[i] ^ i) & (nelt / 2))
42861 return false;
42862 }
42863 }
42864 else
42865 return false;
42866 }
42867
42868 if (d->testing_p)
42869 return true;
42870
42871 if (vmode == V8SImode)
42872 for (i = 0; i < 8; ++i)
42873 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42874 else
42875 {
42876 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42877 if (!d->one_operand_p)
42878 mask = 2 * nelt - 1;
42879 else if (vmode == V16QImode)
42880 mask = nelt - 1;
42881 else
42882 mask = nelt / 2 - 1;
42883
42884 for (i = 0; i < nelt; ++i)
42885 {
42886 unsigned j, e = d->perm[i] & mask;
42887 for (j = 0; j < eltsz; ++j)
42888 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42889 }
42890 }
42891
42892 vperm = gen_rtx_CONST_VECTOR (vmode,
42893 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42894 vperm = force_reg (vmode, vperm);
42895
42896 target = d->target;
42897 if (d->vmode != vmode)
42898 target = gen_reg_rtx (vmode);
42899 op0 = gen_lowpart (vmode, d->op0);
42900 if (d->one_operand_p)
42901 {
42902 if (vmode == V16QImode)
42903 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42904 else if (vmode == V32QImode)
42905 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42906 else if (vmode == V8SFmode)
42907 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42908 else
42909 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42910 }
42911 else
42912 {
42913 op1 = gen_lowpart (vmode, d->op1);
42914 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42915 }
42916 if (target != d->target)
42917 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42918
42919 return true;
42920 }
42921
42922 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42923 in a single instruction. */
42924
42925 static bool
42926 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42927 {
42928 unsigned i, nelt = d->nelt;
42929 unsigned char perm2[MAX_VECT_LEN];
42930
42931 /* Check plain VEC_SELECT first, because AVX has instructions that could
42932 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42933 input where SEL+CONCAT may not. */
42934 if (d->one_operand_p)
42935 {
42936 int mask = nelt - 1;
42937 bool identity_perm = true;
42938 bool broadcast_perm = true;
42939
42940 for (i = 0; i < nelt; i++)
42941 {
42942 perm2[i] = d->perm[i] & mask;
42943 if (perm2[i] != i)
42944 identity_perm = false;
42945 if (perm2[i])
42946 broadcast_perm = false;
42947 }
42948
42949 if (identity_perm)
42950 {
42951 if (!d->testing_p)
42952 emit_move_insn (d->target, d->op0);
42953 return true;
42954 }
42955 else if (broadcast_perm && TARGET_AVX2)
42956 {
42957 /* Use vpbroadcast{b,w,d}. */
42958 rtx (*gen) (rtx, rtx) = NULL;
42959 switch (d->vmode)
42960 {
42961 case V32QImode:
42962 gen = gen_avx2_pbroadcastv32qi_1;
42963 break;
42964 case V16HImode:
42965 gen = gen_avx2_pbroadcastv16hi_1;
42966 break;
42967 case V8SImode:
42968 gen = gen_avx2_pbroadcastv8si_1;
42969 break;
42970 case V16QImode:
42971 gen = gen_avx2_pbroadcastv16qi;
42972 break;
42973 case V8HImode:
42974 gen = gen_avx2_pbroadcastv8hi;
42975 break;
42976 case V8SFmode:
42977 gen = gen_avx2_vec_dupv8sf_1;
42978 break;
42979 /* For other modes prefer other shuffles this function creates. */
42980 default: break;
42981 }
42982 if (gen != NULL)
42983 {
42984 if (!d->testing_p)
42985 emit_insn (gen (d->target, d->op0));
42986 return true;
42987 }
42988 }
42989
42990 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42991 return true;
42992
42993 /* There are plenty of patterns in sse.md that are written for
42994 SEL+CONCAT and are not replicated for a single op. Perhaps
42995 that should be changed, to avoid the nastiness here. */
42996
42997 /* Recognize interleave style patterns, which means incrementing
42998 every other permutation operand. */
42999 for (i = 0; i < nelt; i += 2)
43000 {
43001 perm2[i] = d->perm[i] & mask;
43002 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43003 }
43004 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43005 d->testing_p))
43006 return true;
43007
43008 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43009 if (nelt >= 4)
43010 {
43011 for (i = 0; i < nelt; i += 4)
43012 {
43013 perm2[i + 0] = d->perm[i + 0] & mask;
43014 perm2[i + 1] = d->perm[i + 1] & mask;
43015 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43016 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43017 }
43018
43019 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43020 d->testing_p))
43021 return true;
43022 }
43023 }
43024
43025 /* Finally, try the fully general two operand permute. */
43026 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43027 d->testing_p))
43028 return true;
43029
43030 /* Recognize interleave style patterns with reversed operands. */
43031 if (!d->one_operand_p)
43032 {
43033 for (i = 0; i < nelt; ++i)
43034 {
43035 unsigned e = d->perm[i];
43036 if (e >= nelt)
43037 e -= nelt;
43038 else
43039 e += nelt;
43040 perm2[i] = e;
43041 }
43042
43043 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43044 d->testing_p))
43045 return true;
43046 }
43047
43048 /* Try the SSE4.1 blend variable merge instructions. */
43049 if (expand_vec_perm_blend (d))
43050 return true;
43051
43052 /* Try one of the AVX vpermil variable permutations. */
43053 if (expand_vec_perm_vpermil (d))
43054 return true;
43055
43056 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43057 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43058 if (expand_vec_perm_pshufb (d))
43059 return true;
43060
43061 /* Try the AVX512F vpermi2 instructions. */
43062 rtx vec[64];
43063 enum machine_mode mode = d->vmode;
43064 if (mode == V8DFmode)
43065 mode = V8DImode;
43066 else if (mode == V16SFmode)
43067 mode = V16SImode;
43068 for (i = 0; i < nelt; ++i)
43069 vec[i] = GEN_INT (d->perm[i]);
43070 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43071 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43072 return true;
43073
43074 return false;
43075 }
43076
43077 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43078 in terms of a pair of pshuflw + pshufhw instructions. */
43079
43080 static bool
43081 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43082 {
43083 unsigned char perm2[MAX_VECT_LEN];
43084 unsigned i;
43085 bool ok;
43086
43087 if (d->vmode != V8HImode || !d->one_operand_p)
43088 return false;
43089
43090 /* The two permutations only operate in 64-bit lanes. */
43091 for (i = 0; i < 4; ++i)
43092 if (d->perm[i] >= 4)
43093 return false;
43094 for (i = 4; i < 8; ++i)
43095 if (d->perm[i] < 4)
43096 return false;
43097
43098 if (d->testing_p)
43099 return true;
43100
43101 /* Emit the pshuflw. */
43102 memcpy (perm2, d->perm, 4);
43103 for (i = 4; i < 8; ++i)
43104 perm2[i] = i;
43105 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43106 gcc_assert (ok);
43107
43108 /* Emit the pshufhw. */
43109 memcpy (perm2 + 4, d->perm + 4, 4);
43110 for (i = 0; i < 4; ++i)
43111 perm2[i] = i;
43112 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43113 gcc_assert (ok);
43114
43115 return true;
43116 }
43117
43118 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43119 the permutation using the SSSE3 palignr instruction. This succeeds
43120 when all of the elements in PERM fit within one vector and we merely
43121 need to shift them down so that a single vector permutation has a
43122 chance to succeed. */
43123
43124 static bool
43125 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43126 {
43127 unsigned i, nelt = d->nelt;
43128 unsigned min, max;
43129 bool in_order, ok;
43130 rtx shift, target;
43131 struct expand_vec_perm_d dcopy;
43132
43133 /* Even with AVX, palignr only operates on 128-bit vectors. */
43134 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43135 return false;
43136
43137 min = nelt, max = 0;
43138 for (i = 0; i < nelt; ++i)
43139 {
43140 unsigned e = d->perm[i];
43141 if (e < min)
43142 min = e;
43143 if (e > max)
43144 max = e;
43145 }
43146 if (min == 0 || max - min >= nelt)
43147 return false;
43148
43149 /* Given that we have SSSE3, we know we'll be able to implement the
43150 single operand permutation after the palignr with pshufb. */
43151 if (d->testing_p)
43152 return true;
43153
43154 dcopy = *d;
43155 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43156 target = gen_reg_rtx (TImode);
43157 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43158 gen_lowpart (TImode, d->op0), shift));
43159
43160 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43161 dcopy.one_operand_p = true;
43162
43163 in_order = true;
43164 for (i = 0; i < nelt; ++i)
43165 {
43166 unsigned e = dcopy.perm[i] - min;
43167 if (e != i)
43168 in_order = false;
43169 dcopy.perm[i] = e;
43170 }
43171
43172 /* Test for the degenerate case where the alignment by itself
43173 produces the desired permutation. */
43174 if (in_order)
43175 {
43176 emit_move_insn (d->target, dcopy.op0);
43177 return true;
43178 }
43179
43180 ok = expand_vec_perm_1 (&dcopy);
43181 gcc_assert (ok);
43182
43183 return ok;
43184 }
43185
43186 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43187
43188 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43189 a two vector permutation into a single vector permutation by using
43190 an interleave operation to merge the vectors. */
43191
43192 static bool
43193 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43194 {
43195 struct expand_vec_perm_d dremap, dfinal;
43196 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43197 unsigned HOST_WIDE_INT contents;
43198 unsigned char remap[2 * MAX_VECT_LEN];
43199 rtx seq;
43200 bool ok, same_halves = false;
43201
43202 if (GET_MODE_SIZE (d->vmode) == 16)
43203 {
43204 if (d->one_operand_p)
43205 return false;
43206 }
43207 else if (GET_MODE_SIZE (d->vmode) == 32)
43208 {
43209 if (!TARGET_AVX)
43210 return false;
43211 /* For 32-byte modes allow even d->one_operand_p.
43212 The lack of cross-lane shuffling in some instructions
43213 might prevent a single insn shuffle. */
43214 dfinal = *d;
43215 dfinal.testing_p = true;
43216 /* If expand_vec_perm_interleave3 can expand this into
43217 a 3 insn sequence, give up and let it be expanded as
43218 3 insn sequence. While that is one insn longer,
43219 it doesn't need a memory operand and in the common
43220 case that both interleave low and high permutations
43221 with the same operands are adjacent needs 4 insns
43222 for both after CSE. */
43223 if (expand_vec_perm_interleave3 (&dfinal))
43224 return false;
43225 }
43226 else
43227 return false;
43228
43229 /* Examine from whence the elements come. */
43230 contents = 0;
43231 for (i = 0; i < nelt; ++i)
43232 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43233
43234 memset (remap, 0xff, sizeof (remap));
43235 dremap = *d;
43236
43237 if (GET_MODE_SIZE (d->vmode) == 16)
43238 {
43239 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43240
43241 /* Split the two input vectors into 4 halves. */
43242 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43243 h2 = h1 << nelt2;
43244 h3 = h2 << nelt2;
43245 h4 = h3 << nelt2;
43246
43247 /* If the elements from the low halves use interleave low, and similarly
43248 for interleave high. If the elements are from mis-matched halves, we
43249 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43250 if ((contents & (h1 | h3)) == contents)
43251 {
43252 /* punpckl* */
43253 for (i = 0; i < nelt2; ++i)
43254 {
43255 remap[i] = i * 2;
43256 remap[i + nelt] = i * 2 + 1;
43257 dremap.perm[i * 2] = i;
43258 dremap.perm[i * 2 + 1] = i + nelt;
43259 }
43260 if (!TARGET_SSE2 && d->vmode == V4SImode)
43261 dremap.vmode = V4SFmode;
43262 }
43263 else if ((contents & (h2 | h4)) == contents)
43264 {
43265 /* punpckh* */
43266 for (i = 0; i < nelt2; ++i)
43267 {
43268 remap[i + nelt2] = i * 2;
43269 remap[i + nelt + nelt2] = i * 2 + 1;
43270 dremap.perm[i * 2] = i + nelt2;
43271 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43272 }
43273 if (!TARGET_SSE2 && d->vmode == V4SImode)
43274 dremap.vmode = V4SFmode;
43275 }
43276 else if ((contents & (h1 | h4)) == contents)
43277 {
43278 /* shufps */
43279 for (i = 0; i < nelt2; ++i)
43280 {
43281 remap[i] = i;
43282 remap[i + nelt + nelt2] = i + nelt2;
43283 dremap.perm[i] = i;
43284 dremap.perm[i + nelt2] = i + nelt + nelt2;
43285 }
43286 if (nelt != 4)
43287 {
43288 /* shufpd */
43289 dremap.vmode = V2DImode;
43290 dremap.nelt = 2;
43291 dremap.perm[0] = 0;
43292 dremap.perm[1] = 3;
43293 }
43294 }
43295 else if ((contents & (h2 | h3)) == contents)
43296 {
43297 /* shufps */
43298 for (i = 0; i < nelt2; ++i)
43299 {
43300 remap[i + nelt2] = i;
43301 remap[i + nelt] = i + nelt2;
43302 dremap.perm[i] = i + nelt2;
43303 dremap.perm[i + nelt2] = i + nelt;
43304 }
43305 if (nelt != 4)
43306 {
43307 /* shufpd */
43308 dremap.vmode = V2DImode;
43309 dremap.nelt = 2;
43310 dremap.perm[0] = 1;
43311 dremap.perm[1] = 2;
43312 }
43313 }
43314 else
43315 return false;
43316 }
43317 else
43318 {
43319 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43320 unsigned HOST_WIDE_INT q[8];
43321 unsigned int nonzero_halves[4];
43322
43323 /* Split the two input vectors into 8 quarters. */
43324 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43325 for (i = 1; i < 8; ++i)
43326 q[i] = q[0] << (nelt4 * i);
43327 for (i = 0; i < 4; ++i)
43328 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43329 {
43330 nonzero_halves[nzcnt] = i;
43331 ++nzcnt;
43332 }
43333
43334 if (nzcnt == 1)
43335 {
43336 gcc_assert (d->one_operand_p);
43337 nonzero_halves[1] = nonzero_halves[0];
43338 same_halves = true;
43339 }
43340 else if (d->one_operand_p)
43341 {
43342 gcc_assert (nonzero_halves[0] == 0);
43343 gcc_assert (nonzero_halves[1] == 1);
43344 }
43345
43346 if (nzcnt <= 2)
43347 {
43348 if (d->perm[0] / nelt2 == nonzero_halves[1])
43349 {
43350 /* Attempt to increase the likelihood that dfinal
43351 shuffle will be intra-lane. */
43352 char tmph = nonzero_halves[0];
43353 nonzero_halves[0] = nonzero_halves[1];
43354 nonzero_halves[1] = tmph;
43355 }
43356
43357 /* vperm2f128 or vperm2i128. */
43358 for (i = 0; i < nelt2; ++i)
43359 {
43360 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43361 remap[i + nonzero_halves[0] * nelt2] = i;
43362 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43363 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43364 }
43365
43366 if (d->vmode != V8SFmode
43367 && d->vmode != V4DFmode
43368 && d->vmode != V8SImode)
43369 {
43370 dremap.vmode = V8SImode;
43371 dremap.nelt = 8;
43372 for (i = 0; i < 4; ++i)
43373 {
43374 dremap.perm[i] = i + nonzero_halves[0] * 4;
43375 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43376 }
43377 }
43378 }
43379 else if (d->one_operand_p)
43380 return false;
43381 else if (TARGET_AVX2
43382 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43383 {
43384 /* vpunpckl* */
43385 for (i = 0; i < nelt4; ++i)
43386 {
43387 remap[i] = i * 2;
43388 remap[i + nelt] = i * 2 + 1;
43389 remap[i + nelt2] = i * 2 + nelt2;
43390 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43391 dremap.perm[i * 2] = i;
43392 dremap.perm[i * 2 + 1] = i + nelt;
43393 dremap.perm[i * 2 + nelt2] = i + nelt2;
43394 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43395 }
43396 }
43397 else if (TARGET_AVX2
43398 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43399 {
43400 /* vpunpckh* */
43401 for (i = 0; i < nelt4; ++i)
43402 {
43403 remap[i + nelt4] = i * 2;
43404 remap[i + nelt + nelt4] = i * 2 + 1;
43405 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43406 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43407 dremap.perm[i * 2] = i + nelt4;
43408 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43409 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43410 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43411 }
43412 }
43413 else
43414 return false;
43415 }
43416
43417 /* Use the remapping array set up above to move the elements from their
43418 swizzled locations into their final destinations. */
43419 dfinal = *d;
43420 for (i = 0; i < nelt; ++i)
43421 {
43422 unsigned e = remap[d->perm[i]];
43423 gcc_assert (e < nelt);
43424 /* If same_halves is true, both halves of the remapped vector are the
43425 same. Avoid cross-lane accesses if possible. */
43426 if (same_halves && i >= nelt2)
43427 {
43428 gcc_assert (e < nelt2);
43429 dfinal.perm[i] = e + nelt2;
43430 }
43431 else
43432 dfinal.perm[i] = e;
43433 }
43434 if (!d->testing_p)
43435 {
43436 dremap.target = gen_reg_rtx (dremap.vmode);
43437 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43438 }
43439 dfinal.op1 = dfinal.op0;
43440 dfinal.one_operand_p = true;
43441
43442 /* Test if the final remap can be done with a single insn. For V4SFmode or
43443 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43444 start_sequence ();
43445 ok = expand_vec_perm_1 (&dfinal);
43446 seq = get_insns ();
43447 end_sequence ();
43448
43449 if (!ok)
43450 return false;
43451
43452 if (d->testing_p)
43453 return true;
43454
43455 if (dremap.vmode != dfinal.vmode)
43456 {
43457 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43458 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43459 }
43460
43461 ok = expand_vec_perm_1 (&dremap);
43462 gcc_assert (ok);
43463
43464 emit_insn (seq);
43465 return true;
43466 }
43467
43468 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43469 a single vector cross-lane permutation into vpermq followed
43470 by any of the single insn permutations. */
43471
43472 static bool
43473 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43474 {
43475 struct expand_vec_perm_d dremap, dfinal;
43476 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43477 unsigned contents[2];
43478 bool ok;
43479
43480 if (!(TARGET_AVX2
43481 && (d->vmode == V32QImode || d->vmode == V16HImode)
43482 && d->one_operand_p))
43483 return false;
43484
43485 contents[0] = 0;
43486 contents[1] = 0;
43487 for (i = 0; i < nelt2; ++i)
43488 {
43489 contents[0] |= 1u << (d->perm[i] / nelt4);
43490 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43491 }
43492
43493 for (i = 0; i < 2; ++i)
43494 {
43495 unsigned int cnt = 0;
43496 for (j = 0; j < 4; ++j)
43497 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43498 return false;
43499 }
43500
43501 if (d->testing_p)
43502 return true;
43503
43504 dremap = *d;
43505 dremap.vmode = V4DImode;
43506 dremap.nelt = 4;
43507 dremap.target = gen_reg_rtx (V4DImode);
43508 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43509 dremap.op1 = dremap.op0;
43510 dremap.one_operand_p = true;
43511 for (i = 0; i < 2; ++i)
43512 {
43513 unsigned int cnt = 0;
43514 for (j = 0; j < 4; ++j)
43515 if ((contents[i] & (1u << j)) != 0)
43516 dremap.perm[2 * i + cnt++] = j;
43517 for (; cnt < 2; ++cnt)
43518 dremap.perm[2 * i + cnt] = 0;
43519 }
43520
43521 dfinal = *d;
43522 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43523 dfinal.op1 = dfinal.op0;
43524 dfinal.one_operand_p = true;
43525 for (i = 0, j = 0; i < nelt; ++i)
43526 {
43527 if (i == nelt2)
43528 j = 2;
43529 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43530 if ((d->perm[i] / nelt4) == dremap.perm[j])
43531 ;
43532 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43533 dfinal.perm[i] |= nelt4;
43534 else
43535 gcc_unreachable ();
43536 }
43537
43538 ok = expand_vec_perm_1 (&dremap);
43539 gcc_assert (ok);
43540
43541 ok = expand_vec_perm_1 (&dfinal);
43542 gcc_assert (ok);
43543
43544 return true;
43545 }
43546
43547 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43548 a vector permutation using two instructions, vperm2f128 resp.
43549 vperm2i128 followed by any single in-lane permutation. */
43550
43551 static bool
43552 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43553 {
43554 struct expand_vec_perm_d dfirst, dsecond;
43555 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43556 bool ok;
43557
43558 if (!TARGET_AVX
43559 || GET_MODE_SIZE (d->vmode) != 32
43560 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43561 return false;
43562
43563 dsecond = *d;
43564 dsecond.one_operand_p = false;
43565 dsecond.testing_p = true;
43566
43567 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43568 immediate. For perm < 16 the second permutation uses
43569 d->op0 as first operand, for perm >= 16 it uses d->op1
43570 as first operand. The second operand is the result of
43571 vperm2[fi]128. */
43572 for (perm = 0; perm < 32; perm++)
43573 {
43574 /* Ignore permutations which do not move anything cross-lane. */
43575 if (perm < 16)
43576 {
43577 /* The second shuffle for e.g. V4DFmode has
43578 0123 and ABCD operands.
43579 Ignore AB23, as 23 is already in the second lane
43580 of the first operand. */
43581 if ((perm & 0xc) == (1 << 2)) continue;
43582 /* And 01CD, as 01 is in the first lane of the first
43583 operand. */
43584 if ((perm & 3) == 0) continue;
43585 /* And 4567, as then the vperm2[fi]128 doesn't change
43586 anything on the original 4567 second operand. */
43587 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43588 }
43589 else
43590 {
43591 /* The second shuffle for e.g. V4DFmode has
43592 4567 and ABCD operands.
43593 Ignore AB67, as 67 is already in the second lane
43594 of the first operand. */
43595 if ((perm & 0xc) == (3 << 2)) continue;
43596 /* And 45CD, as 45 is in the first lane of the first
43597 operand. */
43598 if ((perm & 3) == 2) continue;
43599 /* And 0123, as then the vperm2[fi]128 doesn't change
43600 anything on the original 0123 first operand. */
43601 if ((perm & 0xf) == (1 << 2)) continue;
43602 }
43603
43604 for (i = 0; i < nelt; i++)
43605 {
43606 j = d->perm[i] / nelt2;
43607 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43608 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43609 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43610 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43611 else
43612 break;
43613 }
43614
43615 if (i == nelt)
43616 {
43617 start_sequence ();
43618 ok = expand_vec_perm_1 (&dsecond);
43619 end_sequence ();
43620 }
43621 else
43622 ok = false;
43623
43624 if (ok)
43625 {
43626 if (d->testing_p)
43627 return true;
43628
43629 /* Found a usable second shuffle. dfirst will be
43630 vperm2f128 on d->op0 and d->op1. */
43631 dsecond.testing_p = false;
43632 dfirst = *d;
43633 dfirst.target = gen_reg_rtx (d->vmode);
43634 for (i = 0; i < nelt; i++)
43635 dfirst.perm[i] = (i & (nelt2 - 1))
43636 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43637
43638 ok = expand_vec_perm_1 (&dfirst);
43639 gcc_assert (ok);
43640
43641 /* And dsecond is some single insn shuffle, taking
43642 d->op0 and result of vperm2f128 (if perm < 16) or
43643 d->op1 and result of vperm2f128 (otherwise). */
43644 dsecond.op1 = dfirst.target;
43645 if (perm >= 16)
43646 dsecond.op0 = dfirst.op1;
43647
43648 ok = expand_vec_perm_1 (&dsecond);
43649 gcc_assert (ok);
43650
43651 return true;
43652 }
43653
43654 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43655 if (d->one_operand_p)
43656 return false;
43657 }
43658
43659 return false;
43660 }
43661
43662 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43663 a two vector permutation using 2 intra-lane interleave insns
43664 and cross-lane shuffle for 32-byte vectors. */
43665
43666 static bool
43667 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43668 {
43669 unsigned i, nelt;
43670 rtx (*gen) (rtx, rtx, rtx);
43671
43672 if (d->one_operand_p)
43673 return false;
43674 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43675 ;
43676 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43677 ;
43678 else
43679 return false;
43680
43681 nelt = d->nelt;
43682 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43683 return false;
43684 for (i = 0; i < nelt; i += 2)
43685 if (d->perm[i] != d->perm[0] + i / 2
43686 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43687 return false;
43688
43689 if (d->testing_p)
43690 return true;
43691
43692 switch (d->vmode)
43693 {
43694 case V32QImode:
43695 if (d->perm[0])
43696 gen = gen_vec_interleave_highv32qi;
43697 else
43698 gen = gen_vec_interleave_lowv32qi;
43699 break;
43700 case V16HImode:
43701 if (d->perm[0])
43702 gen = gen_vec_interleave_highv16hi;
43703 else
43704 gen = gen_vec_interleave_lowv16hi;
43705 break;
43706 case V8SImode:
43707 if (d->perm[0])
43708 gen = gen_vec_interleave_highv8si;
43709 else
43710 gen = gen_vec_interleave_lowv8si;
43711 break;
43712 case V4DImode:
43713 if (d->perm[0])
43714 gen = gen_vec_interleave_highv4di;
43715 else
43716 gen = gen_vec_interleave_lowv4di;
43717 break;
43718 case V8SFmode:
43719 if (d->perm[0])
43720 gen = gen_vec_interleave_highv8sf;
43721 else
43722 gen = gen_vec_interleave_lowv8sf;
43723 break;
43724 case V4DFmode:
43725 if (d->perm[0])
43726 gen = gen_vec_interleave_highv4df;
43727 else
43728 gen = gen_vec_interleave_lowv4df;
43729 break;
43730 default:
43731 gcc_unreachable ();
43732 }
43733
43734 emit_insn (gen (d->target, d->op0, d->op1));
43735 return true;
43736 }
43737
43738 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43739 a single vector permutation using a single intra-lane vector
43740 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43741 the non-swapped and swapped vectors together. */
43742
43743 static bool
43744 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43745 {
43746 struct expand_vec_perm_d dfirst, dsecond;
43747 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43748 rtx seq;
43749 bool ok;
43750 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43751
43752 if (!TARGET_AVX
43753 || TARGET_AVX2
43754 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43755 || !d->one_operand_p)
43756 return false;
43757
43758 dfirst = *d;
43759 for (i = 0; i < nelt; i++)
43760 dfirst.perm[i] = 0xff;
43761 for (i = 0, msk = 0; i < nelt; i++)
43762 {
43763 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43764 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43765 return false;
43766 dfirst.perm[j] = d->perm[i];
43767 if (j != i)
43768 msk |= (1 << i);
43769 }
43770 for (i = 0; i < nelt; i++)
43771 if (dfirst.perm[i] == 0xff)
43772 dfirst.perm[i] = i;
43773
43774 if (!d->testing_p)
43775 dfirst.target = gen_reg_rtx (dfirst.vmode);
43776
43777 start_sequence ();
43778 ok = expand_vec_perm_1 (&dfirst);
43779 seq = get_insns ();
43780 end_sequence ();
43781
43782 if (!ok)
43783 return false;
43784
43785 if (d->testing_p)
43786 return true;
43787
43788 emit_insn (seq);
43789
43790 dsecond = *d;
43791 dsecond.op0 = dfirst.target;
43792 dsecond.op1 = dfirst.target;
43793 dsecond.one_operand_p = true;
43794 dsecond.target = gen_reg_rtx (dsecond.vmode);
43795 for (i = 0; i < nelt; i++)
43796 dsecond.perm[i] = i ^ nelt2;
43797
43798 ok = expand_vec_perm_1 (&dsecond);
43799 gcc_assert (ok);
43800
43801 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43802 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43803 return true;
43804 }
43805
43806 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43807 permutation using two vperm2f128, followed by a vshufpd insn blending
43808 the two vectors together. */
43809
43810 static bool
43811 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43812 {
43813 struct expand_vec_perm_d dfirst, dsecond, dthird;
43814 bool ok;
43815
43816 if (!TARGET_AVX || (d->vmode != V4DFmode))
43817 return false;
43818
43819 if (d->testing_p)
43820 return true;
43821
43822 dfirst = *d;
43823 dsecond = *d;
43824 dthird = *d;
43825
43826 dfirst.perm[0] = (d->perm[0] & ~1);
43827 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43828 dfirst.perm[2] = (d->perm[2] & ~1);
43829 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43830 dsecond.perm[0] = (d->perm[1] & ~1);
43831 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43832 dsecond.perm[2] = (d->perm[3] & ~1);
43833 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43834 dthird.perm[0] = (d->perm[0] % 2);
43835 dthird.perm[1] = (d->perm[1] % 2) + 4;
43836 dthird.perm[2] = (d->perm[2] % 2) + 2;
43837 dthird.perm[3] = (d->perm[3] % 2) + 6;
43838
43839 dfirst.target = gen_reg_rtx (dfirst.vmode);
43840 dsecond.target = gen_reg_rtx (dsecond.vmode);
43841 dthird.op0 = dfirst.target;
43842 dthird.op1 = dsecond.target;
43843 dthird.one_operand_p = false;
43844
43845 canonicalize_perm (&dfirst);
43846 canonicalize_perm (&dsecond);
43847
43848 ok = expand_vec_perm_1 (&dfirst)
43849 && expand_vec_perm_1 (&dsecond)
43850 && expand_vec_perm_1 (&dthird);
43851
43852 gcc_assert (ok);
43853
43854 return true;
43855 }
43856
43857 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43858 permutation with two pshufb insns and an ior. We should have already
43859 failed all two instruction sequences. */
43860
43861 static bool
43862 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43863 {
43864 rtx rperm[2][16], vperm, l, h, op, m128;
43865 unsigned int i, nelt, eltsz;
43866
43867 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43868 return false;
43869 gcc_assert (!d->one_operand_p);
43870
43871 if (d->testing_p)
43872 return true;
43873
43874 nelt = d->nelt;
43875 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43876
43877 /* Generate two permutation masks. If the required element is within
43878 the given vector it is shuffled into the proper lane. If the required
43879 element is in the other vector, force a zero into the lane by setting
43880 bit 7 in the permutation mask. */
43881 m128 = GEN_INT (-128);
43882 for (i = 0; i < nelt; ++i)
43883 {
43884 unsigned j, e = d->perm[i];
43885 unsigned which = (e >= nelt);
43886 if (e >= nelt)
43887 e -= nelt;
43888
43889 for (j = 0; j < eltsz; ++j)
43890 {
43891 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43892 rperm[1-which][i*eltsz + j] = m128;
43893 }
43894 }
43895
43896 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43897 vperm = force_reg (V16QImode, vperm);
43898
43899 l = gen_reg_rtx (V16QImode);
43900 op = gen_lowpart (V16QImode, d->op0);
43901 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43902
43903 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43904 vperm = force_reg (V16QImode, vperm);
43905
43906 h = gen_reg_rtx (V16QImode);
43907 op = gen_lowpart (V16QImode, d->op1);
43908 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43909
43910 op = d->target;
43911 if (d->vmode != V16QImode)
43912 op = gen_reg_rtx (V16QImode);
43913 emit_insn (gen_iorv16qi3 (op, l, h));
43914 if (op != d->target)
43915 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43916
43917 return true;
43918 }
43919
43920 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43921 with two vpshufb insns, vpermq and vpor. We should have already failed
43922 all two or three instruction sequences. */
43923
43924 static bool
43925 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43926 {
43927 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43928 unsigned int i, nelt, eltsz;
43929
43930 if (!TARGET_AVX2
43931 || !d->one_operand_p
43932 || (d->vmode != V32QImode && d->vmode != V16HImode))
43933 return false;
43934
43935 if (d->testing_p)
43936 return true;
43937
43938 nelt = d->nelt;
43939 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43940
43941 /* Generate two permutation masks. If the required element is within
43942 the same lane, it is shuffled in. If the required element from the
43943 other lane, force a zero by setting bit 7 in the permutation mask.
43944 In the other mask the mask has non-negative elements if element
43945 is requested from the other lane, but also moved to the other lane,
43946 so that the result of vpshufb can have the two V2TImode halves
43947 swapped. */
43948 m128 = GEN_INT (-128);
43949 for (i = 0; i < nelt; ++i)
43950 {
43951 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43952 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43953
43954 for (j = 0; j < eltsz; ++j)
43955 {
43956 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43957 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43958 }
43959 }
43960
43961 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43962 vperm = force_reg (V32QImode, vperm);
43963
43964 h = gen_reg_rtx (V32QImode);
43965 op = gen_lowpart (V32QImode, d->op0);
43966 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43967
43968 /* Swap the 128-byte lanes of h into hp. */
43969 hp = gen_reg_rtx (V4DImode);
43970 op = gen_lowpart (V4DImode, h);
43971 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43972 const1_rtx));
43973
43974 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43975 vperm = force_reg (V32QImode, vperm);
43976
43977 l = gen_reg_rtx (V32QImode);
43978 op = gen_lowpart (V32QImode, d->op0);
43979 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43980
43981 op = d->target;
43982 if (d->vmode != V32QImode)
43983 op = gen_reg_rtx (V32QImode);
43984 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43985 if (op != d->target)
43986 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43987
43988 return true;
43989 }
43990
43991 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43992 and extract-odd permutations of two V32QImode and V16QImode operand
43993 with two vpshufb insns, vpor and vpermq. We should have already
43994 failed all two or three instruction sequences. */
43995
43996 static bool
43997 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43998 {
43999 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44000 unsigned int i, nelt, eltsz;
44001
44002 if (!TARGET_AVX2
44003 || d->one_operand_p
44004 || (d->vmode != V32QImode && d->vmode != V16HImode))
44005 return false;
44006
44007 for (i = 0; i < d->nelt; ++i)
44008 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44009 return false;
44010
44011 if (d->testing_p)
44012 return true;
44013
44014 nelt = d->nelt;
44015 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44016
44017 /* Generate two permutation masks. In the first permutation mask
44018 the first quarter will contain indexes for the first half
44019 of the op0, the second quarter will contain bit 7 set, third quarter
44020 will contain indexes for the second half of the op0 and the
44021 last quarter bit 7 set. In the second permutation mask
44022 the first quarter will contain bit 7 set, the second quarter
44023 indexes for the first half of the op1, the third quarter bit 7 set
44024 and last quarter indexes for the second half of the op1.
44025 I.e. the first mask e.g. for V32QImode extract even will be:
44026 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44027 (all values masked with 0xf except for -128) and second mask
44028 for extract even will be
44029 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44030 m128 = GEN_INT (-128);
44031 for (i = 0; i < nelt; ++i)
44032 {
44033 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44034 unsigned which = d->perm[i] >= nelt;
44035 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44036
44037 for (j = 0; j < eltsz; ++j)
44038 {
44039 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44040 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44041 }
44042 }
44043
44044 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44045 vperm = force_reg (V32QImode, vperm);
44046
44047 l = gen_reg_rtx (V32QImode);
44048 op = gen_lowpart (V32QImode, d->op0);
44049 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44050
44051 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44052 vperm = force_reg (V32QImode, vperm);
44053
44054 h = gen_reg_rtx (V32QImode);
44055 op = gen_lowpart (V32QImode, d->op1);
44056 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44057
44058 ior = gen_reg_rtx (V32QImode);
44059 emit_insn (gen_iorv32qi3 (ior, l, h));
44060
44061 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44062 op = gen_reg_rtx (V4DImode);
44063 ior = gen_lowpart (V4DImode, ior);
44064 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44065 const1_rtx, GEN_INT (3)));
44066 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44067
44068 return true;
44069 }
44070
44071 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44072 and extract-odd permutations. */
44073
44074 static bool
44075 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44076 {
44077 rtx t1, t2, t3, t4, t5;
44078
44079 switch (d->vmode)
44080 {
44081 case V4DFmode:
44082 if (d->testing_p)
44083 break;
44084 t1 = gen_reg_rtx (V4DFmode);
44085 t2 = gen_reg_rtx (V4DFmode);
44086
44087 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44088 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44089 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44090
44091 /* Now an unpck[lh]pd will produce the result required. */
44092 if (odd)
44093 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44094 else
44095 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44096 emit_insn (t3);
44097 break;
44098
44099 case V8SFmode:
44100 {
44101 int mask = odd ? 0xdd : 0x88;
44102
44103 if (d->testing_p)
44104 break;
44105 t1 = gen_reg_rtx (V8SFmode);
44106 t2 = gen_reg_rtx (V8SFmode);
44107 t3 = gen_reg_rtx (V8SFmode);
44108
44109 /* Shuffle within the 128-bit lanes to produce:
44110 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44111 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44112 GEN_INT (mask)));
44113
44114 /* Shuffle the lanes around to produce:
44115 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44116 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44117 GEN_INT (0x3)));
44118
44119 /* Shuffle within the 128-bit lanes to produce:
44120 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44121 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44122
44123 /* Shuffle within the 128-bit lanes to produce:
44124 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44125 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44126
44127 /* Shuffle the lanes around to produce:
44128 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44129 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44130 GEN_INT (0x20)));
44131 }
44132 break;
44133
44134 case V2DFmode:
44135 case V4SFmode:
44136 case V2DImode:
44137 case V4SImode:
44138 /* These are always directly implementable by expand_vec_perm_1. */
44139 gcc_unreachable ();
44140
44141 case V8HImode:
44142 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44143 return expand_vec_perm_pshufb2 (d);
44144 else
44145 {
44146 if (d->testing_p)
44147 break;
44148 /* We need 2*log2(N)-1 operations to achieve odd/even
44149 with interleave. */
44150 t1 = gen_reg_rtx (V8HImode);
44151 t2 = gen_reg_rtx (V8HImode);
44152 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44153 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44154 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44155 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44156 if (odd)
44157 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44158 else
44159 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44160 emit_insn (t3);
44161 }
44162 break;
44163
44164 case V16QImode:
44165 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44166 return expand_vec_perm_pshufb2 (d);
44167 else
44168 {
44169 if (d->testing_p)
44170 break;
44171 t1 = gen_reg_rtx (V16QImode);
44172 t2 = gen_reg_rtx (V16QImode);
44173 t3 = gen_reg_rtx (V16QImode);
44174 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44175 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44176 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44177 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44178 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44179 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44180 if (odd)
44181 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44182 else
44183 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44184 emit_insn (t3);
44185 }
44186 break;
44187
44188 case V16HImode:
44189 case V32QImode:
44190 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44191
44192 case V4DImode:
44193 if (!TARGET_AVX2)
44194 {
44195 struct expand_vec_perm_d d_copy = *d;
44196 d_copy.vmode = V4DFmode;
44197 if (d->testing_p)
44198 d_copy.target = gen_lowpart (V4DFmode, d->target);
44199 else
44200 d_copy.target = gen_reg_rtx (V4DFmode);
44201 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44202 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44203 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44204 {
44205 if (!d->testing_p)
44206 emit_move_insn (d->target,
44207 gen_lowpart (V4DImode, d_copy.target));
44208 return true;
44209 }
44210 return false;
44211 }
44212
44213 if (d->testing_p)
44214 break;
44215
44216 t1 = gen_reg_rtx (V4DImode);
44217 t2 = gen_reg_rtx (V4DImode);
44218
44219 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44220 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44221 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44222
44223 /* Now an vpunpck[lh]qdq will produce the result required. */
44224 if (odd)
44225 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44226 else
44227 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44228 emit_insn (t3);
44229 break;
44230
44231 case V8SImode:
44232 if (!TARGET_AVX2)
44233 {
44234 struct expand_vec_perm_d d_copy = *d;
44235 d_copy.vmode = V8SFmode;
44236 if (d->testing_p)
44237 d_copy.target = gen_lowpart (V8SFmode, d->target);
44238 else
44239 d_copy.target = gen_reg_rtx (V8SFmode);
44240 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44241 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44242 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44243 {
44244 if (!d->testing_p)
44245 emit_move_insn (d->target,
44246 gen_lowpart (V8SImode, d_copy.target));
44247 return true;
44248 }
44249 return false;
44250 }
44251
44252 if (d->testing_p)
44253 break;
44254
44255 t1 = gen_reg_rtx (V8SImode);
44256 t2 = gen_reg_rtx (V8SImode);
44257 t3 = gen_reg_rtx (V4DImode);
44258 t4 = gen_reg_rtx (V4DImode);
44259 t5 = gen_reg_rtx (V4DImode);
44260
44261 /* Shuffle the lanes around into
44262 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44263 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44264 gen_lowpart (V4DImode, d->op1),
44265 GEN_INT (0x20)));
44266 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44267 gen_lowpart (V4DImode, d->op1),
44268 GEN_INT (0x31)));
44269
44270 /* Swap the 2nd and 3rd position in each lane into
44271 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44272 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44273 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44274 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44275 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44276
44277 /* Now an vpunpck[lh]qdq will produce
44278 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44279 if (odd)
44280 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44281 gen_lowpart (V4DImode, t2));
44282 else
44283 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44284 gen_lowpart (V4DImode, t2));
44285 emit_insn (t3);
44286 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44287 break;
44288
44289 default:
44290 gcc_unreachable ();
44291 }
44292
44293 return true;
44294 }
44295
44296 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44297 extract-even and extract-odd permutations. */
44298
44299 static bool
44300 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44301 {
44302 unsigned i, odd, nelt = d->nelt;
44303
44304 odd = d->perm[0];
44305 if (odd != 0 && odd != 1)
44306 return false;
44307
44308 for (i = 1; i < nelt; ++i)
44309 if (d->perm[i] != 2 * i + odd)
44310 return false;
44311
44312 return expand_vec_perm_even_odd_1 (d, odd);
44313 }
44314
44315 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44316 permutations. We assume that expand_vec_perm_1 has already failed. */
44317
44318 static bool
44319 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44320 {
44321 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44322 enum machine_mode vmode = d->vmode;
44323 unsigned char perm2[4];
44324 rtx op0 = d->op0, dest;
44325 bool ok;
44326
44327 switch (vmode)
44328 {
44329 case V4DFmode:
44330 case V8SFmode:
44331 /* These are special-cased in sse.md so that we can optionally
44332 use the vbroadcast instruction. They expand to two insns
44333 if the input happens to be in a register. */
44334 gcc_unreachable ();
44335
44336 case V2DFmode:
44337 case V2DImode:
44338 case V4SFmode:
44339 case V4SImode:
44340 /* These are always implementable using standard shuffle patterns. */
44341 gcc_unreachable ();
44342
44343 case V8HImode:
44344 case V16QImode:
44345 /* These can be implemented via interleave. We save one insn by
44346 stopping once we have promoted to V4SImode and then use pshufd. */
44347 if (d->testing_p)
44348 return true;
44349 do
44350 {
44351 rtx dest;
44352 rtx (*gen) (rtx, rtx, rtx)
44353 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44354 : gen_vec_interleave_lowv8hi;
44355
44356 if (elt >= nelt2)
44357 {
44358 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44359 : gen_vec_interleave_highv8hi;
44360 elt -= nelt2;
44361 }
44362 nelt2 /= 2;
44363
44364 dest = gen_reg_rtx (vmode);
44365 emit_insn (gen (dest, op0, op0));
44366 vmode = get_mode_wider_vector (vmode);
44367 op0 = gen_lowpart (vmode, dest);
44368 }
44369 while (vmode != V4SImode);
44370
44371 memset (perm2, elt, 4);
44372 dest = gen_reg_rtx (V4SImode);
44373 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44374 gcc_assert (ok);
44375 if (!d->testing_p)
44376 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44377 return true;
44378
44379 case V32QImode:
44380 case V16HImode:
44381 case V8SImode:
44382 case V4DImode:
44383 /* For AVX2 broadcasts of the first element vpbroadcast* or
44384 vpermq should be used by expand_vec_perm_1. */
44385 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44386 return false;
44387
44388 default:
44389 gcc_unreachable ();
44390 }
44391 }
44392
44393 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44394 broadcast permutations. */
44395
44396 static bool
44397 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44398 {
44399 unsigned i, elt, nelt = d->nelt;
44400
44401 if (!d->one_operand_p)
44402 return false;
44403
44404 elt = d->perm[0];
44405 for (i = 1; i < nelt; ++i)
44406 if (d->perm[i] != elt)
44407 return false;
44408
44409 return expand_vec_perm_broadcast_1 (d);
44410 }
44411
44412 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44413 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44414 all the shorter instruction sequences. */
44415
44416 static bool
44417 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44418 {
44419 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44420 unsigned int i, nelt, eltsz;
44421 bool used[4];
44422
44423 if (!TARGET_AVX2
44424 || d->one_operand_p
44425 || (d->vmode != V32QImode && d->vmode != V16HImode))
44426 return false;
44427
44428 if (d->testing_p)
44429 return true;
44430
44431 nelt = d->nelt;
44432 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44433
44434 /* Generate 4 permutation masks. If the required element is within
44435 the same lane, it is shuffled in. If the required element from the
44436 other lane, force a zero by setting bit 7 in the permutation mask.
44437 In the other mask the mask has non-negative elements if element
44438 is requested from the other lane, but also moved to the other lane,
44439 so that the result of vpshufb can have the two V2TImode halves
44440 swapped. */
44441 m128 = GEN_INT (-128);
44442 for (i = 0; i < 32; ++i)
44443 {
44444 rperm[0][i] = m128;
44445 rperm[1][i] = m128;
44446 rperm[2][i] = m128;
44447 rperm[3][i] = m128;
44448 }
44449 used[0] = false;
44450 used[1] = false;
44451 used[2] = false;
44452 used[3] = false;
44453 for (i = 0; i < nelt; ++i)
44454 {
44455 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44456 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44457 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44458
44459 for (j = 0; j < eltsz; ++j)
44460 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44461 used[which] = true;
44462 }
44463
44464 for (i = 0; i < 2; ++i)
44465 {
44466 if (!used[2 * i + 1])
44467 {
44468 h[i] = NULL_RTX;
44469 continue;
44470 }
44471 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44472 gen_rtvec_v (32, rperm[2 * i + 1]));
44473 vperm = force_reg (V32QImode, vperm);
44474 h[i] = gen_reg_rtx (V32QImode);
44475 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44476 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44477 }
44478
44479 /* Swap the 128-byte lanes of h[X]. */
44480 for (i = 0; i < 2; ++i)
44481 {
44482 if (h[i] == NULL_RTX)
44483 continue;
44484 op = gen_reg_rtx (V4DImode);
44485 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44486 const2_rtx, GEN_INT (3), const0_rtx,
44487 const1_rtx));
44488 h[i] = gen_lowpart (V32QImode, op);
44489 }
44490
44491 for (i = 0; i < 2; ++i)
44492 {
44493 if (!used[2 * i])
44494 {
44495 l[i] = NULL_RTX;
44496 continue;
44497 }
44498 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44499 vperm = force_reg (V32QImode, vperm);
44500 l[i] = gen_reg_rtx (V32QImode);
44501 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44502 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44503 }
44504
44505 for (i = 0; i < 2; ++i)
44506 {
44507 if (h[i] && l[i])
44508 {
44509 op = gen_reg_rtx (V32QImode);
44510 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44511 l[i] = op;
44512 }
44513 else if (h[i])
44514 l[i] = h[i];
44515 }
44516
44517 gcc_assert (l[0] && l[1]);
44518 op = d->target;
44519 if (d->vmode != V32QImode)
44520 op = gen_reg_rtx (V32QImode);
44521 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44522 if (op != d->target)
44523 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44524 return true;
44525 }
44526
44527 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44528 With all of the interface bits taken care of, perform the expansion
44529 in D and return true on success. */
44530
44531 static bool
44532 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44533 {
44534 /* Try a single instruction expansion. */
44535 if (expand_vec_perm_1 (d))
44536 return true;
44537
44538 /* Try sequences of two instructions. */
44539
44540 if (expand_vec_perm_pshuflw_pshufhw (d))
44541 return true;
44542
44543 if (expand_vec_perm_palignr (d))
44544 return true;
44545
44546 if (expand_vec_perm_interleave2 (d))
44547 return true;
44548
44549 if (expand_vec_perm_broadcast (d))
44550 return true;
44551
44552 if (expand_vec_perm_vpermq_perm_1 (d))
44553 return true;
44554
44555 if (expand_vec_perm_vperm2f128 (d))
44556 return true;
44557
44558 /* Try sequences of three instructions. */
44559
44560 if (expand_vec_perm_2vperm2f128_vshuf (d))
44561 return true;
44562
44563 if (expand_vec_perm_pshufb2 (d))
44564 return true;
44565
44566 if (expand_vec_perm_interleave3 (d))
44567 return true;
44568
44569 if (expand_vec_perm_vperm2f128_vblend (d))
44570 return true;
44571
44572 /* Try sequences of four instructions. */
44573
44574 if (expand_vec_perm_vpshufb2_vpermq (d))
44575 return true;
44576
44577 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44578 return true;
44579
44580 /* ??? Look for narrow permutations whose element orderings would
44581 allow the promotion to a wider mode. */
44582
44583 /* ??? Look for sequences of interleave or a wider permute that place
44584 the data into the correct lanes for a half-vector shuffle like
44585 pshuf[lh]w or vpermilps. */
44586
44587 /* ??? Look for sequences of interleave that produce the desired results.
44588 The combinatorics of punpck[lh] get pretty ugly... */
44589
44590 if (expand_vec_perm_even_odd (d))
44591 return true;
44592
44593 /* Even longer sequences. */
44594 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44595 return true;
44596
44597 return false;
44598 }
44599
44600 /* If a permutation only uses one operand, make it clear. Returns true
44601 if the permutation references both operands. */
44602
44603 static bool
44604 canonicalize_perm (struct expand_vec_perm_d *d)
44605 {
44606 int i, which, nelt = d->nelt;
44607
44608 for (i = which = 0; i < nelt; ++i)
44609 which |= (d->perm[i] < nelt ? 1 : 2);
44610
44611 d->one_operand_p = true;
44612 switch (which)
44613 {
44614 default:
44615 gcc_unreachable();
44616
44617 case 3:
44618 if (!rtx_equal_p (d->op0, d->op1))
44619 {
44620 d->one_operand_p = false;
44621 break;
44622 }
44623 /* The elements of PERM do not suggest that only the first operand
44624 is used, but both operands are identical. Allow easier matching
44625 of the permutation by folding the permutation into the single
44626 input vector. */
44627 /* FALLTHRU */
44628
44629 case 2:
44630 for (i = 0; i < nelt; ++i)
44631 d->perm[i] &= nelt - 1;
44632 d->op0 = d->op1;
44633 break;
44634
44635 case 1:
44636 d->op1 = d->op0;
44637 break;
44638 }
44639
44640 return (which == 3);
44641 }
44642
44643 bool
44644 ix86_expand_vec_perm_const (rtx operands[4])
44645 {
44646 struct expand_vec_perm_d d;
44647 unsigned char perm[MAX_VECT_LEN];
44648 int i, nelt;
44649 bool two_args;
44650 rtx sel;
44651
44652 d.target = operands[0];
44653 d.op0 = operands[1];
44654 d.op1 = operands[2];
44655 sel = operands[3];
44656
44657 d.vmode = GET_MODE (d.target);
44658 gcc_assert (VECTOR_MODE_P (d.vmode));
44659 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44660 d.testing_p = false;
44661
44662 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44663 gcc_assert (XVECLEN (sel, 0) == nelt);
44664 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44665
44666 for (i = 0; i < nelt; ++i)
44667 {
44668 rtx e = XVECEXP (sel, 0, i);
44669 int ei = INTVAL (e) & (2 * nelt - 1);
44670 d.perm[i] = ei;
44671 perm[i] = ei;
44672 }
44673
44674 two_args = canonicalize_perm (&d);
44675
44676 if (ix86_expand_vec_perm_const_1 (&d))
44677 return true;
44678
44679 /* If the selector says both arguments are needed, but the operands are the
44680 same, the above tried to expand with one_operand_p and flattened selector.
44681 If that didn't work, retry without one_operand_p; we succeeded with that
44682 during testing. */
44683 if (two_args && d.one_operand_p)
44684 {
44685 d.one_operand_p = false;
44686 memcpy (d.perm, perm, sizeof (perm));
44687 return ix86_expand_vec_perm_const_1 (&d);
44688 }
44689
44690 return false;
44691 }
44692
44693 /* Implement targetm.vectorize.vec_perm_const_ok. */
44694
44695 static bool
44696 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44697 const unsigned char *sel)
44698 {
44699 struct expand_vec_perm_d d;
44700 unsigned int i, nelt, which;
44701 bool ret;
44702
44703 d.vmode = vmode;
44704 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44705 d.testing_p = true;
44706
44707 /* Given sufficient ISA support we can just return true here
44708 for selected vector modes. */
44709 if (d.vmode == V16SImode || d.vmode == V16SFmode
44710 || d.vmode == V8DFmode || d.vmode == V8DImode)
44711 /* All implementable with a single vpermi2 insn. */
44712 return true;
44713 if (GET_MODE_SIZE (d.vmode) == 16)
44714 {
44715 /* All implementable with a single vpperm insn. */
44716 if (TARGET_XOP)
44717 return true;
44718 /* All implementable with 2 pshufb + 1 ior. */
44719 if (TARGET_SSSE3)
44720 return true;
44721 /* All implementable with shufpd or unpck[lh]pd. */
44722 if (d.nelt == 2)
44723 return true;
44724 }
44725
44726 /* Extract the values from the vector CST into the permutation
44727 array in D. */
44728 memcpy (d.perm, sel, nelt);
44729 for (i = which = 0; i < nelt; ++i)
44730 {
44731 unsigned char e = d.perm[i];
44732 gcc_assert (e < 2 * nelt);
44733 which |= (e < nelt ? 1 : 2);
44734 }
44735
44736 /* For all elements from second vector, fold the elements to first. */
44737 if (which == 2)
44738 for (i = 0; i < nelt; ++i)
44739 d.perm[i] -= nelt;
44740
44741 /* Check whether the mask can be applied to the vector type. */
44742 d.one_operand_p = (which != 3);
44743
44744 /* Implementable with shufps or pshufd. */
44745 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44746 return true;
44747
44748 /* Otherwise we have to go through the motions and see if we can
44749 figure out how to generate the requested permutation. */
44750 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44751 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44752 if (!d.one_operand_p)
44753 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44754
44755 start_sequence ();
44756 ret = ix86_expand_vec_perm_const_1 (&d);
44757 end_sequence ();
44758
44759 return ret;
44760 }
44761
44762 void
44763 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44764 {
44765 struct expand_vec_perm_d d;
44766 unsigned i, nelt;
44767
44768 d.target = targ;
44769 d.op0 = op0;
44770 d.op1 = op1;
44771 d.vmode = GET_MODE (targ);
44772 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44773 d.one_operand_p = false;
44774 d.testing_p = false;
44775
44776 for (i = 0; i < nelt; ++i)
44777 d.perm[i] = i * 2 + odd;
44778
44779 /* We'll either be able to implement the permutation directly... */
44780 if (expand_vec_perm_1 (&d))
44781 return;
44782
44783 /* ... or we use the special-case patterns. */
44784 expand_vec_perm_even_odd_1 (&d, odd);
44785 }
44786
44787 static void
44788 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44789 {
44790 struct expand_vec_perm_d d;
44791 unsigned i, nelt, base;
44792 bool ok;
44793
44794 d.target = targ;
44795 d.op0 = op0;
44796 d.op1 = op1;
44797 d.vmode = GET_MODE (targ);
44798 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44799 d.one_operand_p = false;
44800 d.testing_p = false;
44801
44802 base = high_p ? nelt / 2 : 0;
44803 for (i = 0; i < nelt / 2; ++i)
44804 {
44805 d.perm[i * 2] = i + base;
44806 d.perm[i * 2 + 1] = i + base + nelt;
44807 }
44808
44809 /* Note that for AVX this isn't one instruction. */
44810 ok = ix86_expand_vec_perm_const_1 (&d);
44811 gcc_assert (ok);
44812 }
44813
44814
44815 /* Expand a vector operation CODE for a V*QImode in terms of the
44816 same operation on V*HImode. */
44817
44818 void
44819 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44820 {
44821 enum machine_mode qimode = GET_MODE (dest);
44822 enum machine_mode himode;
44823 rtx (*gen_il) (rtx, rtx, rtx);
44824 rtx (*gen_ih) (rtx, rtx, rtx);
44825 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44826 struct expand_vec_perm_d d;
44827 bool ok, full_interleave;
44828 bool uns_p = false;
44829 int i;
44830
44831 switch (qimode)
44832 {
44833 case V16QImode:
44834 himode = V8HImode;
44835 gen_il = gen_vec_interleave_lowv16qi;
44836 gen_ih = gen_vec_interleave_highv16qi;
44837 break;
44838 case V32QImode:
44839 himode = V16HImode;
44840 gen_il = gen_avx2_interleave_lowv32qi;
44841 gen_ih = gen_avx2_interleave_highv32qi;
44842 break;
44843 default:
44844 gcc_unreachable ();
44845 }
44846
44847 op2_l = op2_h = op2;
44848 switch (code)
44849 {
44850 case MULT:
44851 /* Unpack data such that we've got a source byte in each low byte of
44852 each word. We don't care what goes into the high byte of each word.
44853 Rather than trying to get zero in there, most convenient is to let
44854 it be a copy of the low byte. */
44855 op2_l = gen_reg_rtx (qimode);
44856 op2_h = gen_reg_rtx (qimode);
44857 emit_insn (gen_il (op2_l, op2, op2));
44858 emit_insn (gen_ih (op2_h, op2, op2));
44859 /* FALLTHRU */
44860
44861 op1_l = gen_reg_rtx (qimode);
44862 op1_h = gen_reg_rtx (qimode);
44863 emit_insn (gen_il (op1_l, op1, op1));
44864 emit_insn (gen_ih (op1_h, op1, op1));
44865 full_interleave = qimode == V16QImode;
44866 break;
44867
44868 case ASHIFT:
44869 case LSHIFTRT:
44870 uns_p = true;
44871 /* FALLTHRU */
44872 case ASHIFTRT:
44873 op1_l = gen_reg_rtx (himode);
44874 op1_h = gen_reg_rtx (himode);
44875 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44876 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44877 full_interleave = true;
44878 break;
44879 default:
44880 gcc_unreachable ();
44881 }
44882
44883 /* Perform the operation. */
44884 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44885 1, OPTAB_DIRECT);
44886 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44887 1, OPTAB_DIRECT);
44888 gcc_assert (res_l && res_h);
44889
44890 /* Merge the data back into the right place. */
44891 d.target = dest;
44892 d.op0 = gen_lowpart (qimode, res_l);
44893 d.op1 = gen_lowpart (qimode, res_h);
44894 d.vmode = qimode;
44895 d.nelt = GET_MODE_NUNITS (qimode);
44896 d.one_operand_p = false;
44897 d.testing_p = false;
44898
44899 if (full_interleave)
44900 {
44901 /* For SSE2, we used an full interleave, so the desired
44902 results are in the even elements. */
44903 for (i = 0; i < 32; ++i)
44904 d.perm[i] = i * 2;
44905 }
44906 else
44907 {
44908 /* For AVX, the interleave used above was not cross-lane. So the
44909 extraction is evens but with the second and third quarter swapped.
44910 Happily, that is even one insn shorter than even extraction. */
44911 for (i = 0; i < 32; ++i)
44912 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44913 }
44914
44915 ok = ix86_expand_vec_perm_const_1 (&d);
44916 gcc_assert (ok);
44917
44918 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44919 gen_rtx_fmt_ee (code, qimode, op1, op2));
44920 }
44921
44922 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44923 if op is CONST_VECTOR with all odd elements equal to their
44924 preceding element. */
44925
44926 static bool
44927 const_vector_equal_evenodd_p (rtx op)
44928 {
44929 enum machine_mode mode = GET_MODE (op);
44930 int i, nunits = GET_MODE_NUNITS (mode);
44931 if (GET_CODE (op) != CONST_VECTOR
44932 || nunits != CONST_VECTOR_NUNITS (op))
44933 return false;
44934 for (i = 0; i < nunits; i += 2)
44935 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44936 return false;
44937 return true;
44938 }
44939
44940 void
44941 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44942 bool uns_p, bool odd_p)
44943 {
44944 enum machine_mode mode = GET_MODE (op1);
44945 enum machine_mode wmode = GET_MODE (dest);
44946 rtx x;
44947 rtx orig_op1 = op1, orig_op2 = op2;
44948
44949 if (!nonimmediate_operand (op1, mode))
44950 op1 = force_reg (mode, op1);
44951 if (!nonimmediate_operand (op2, mode))
44952 op2 = force_reg (mode, op2);
44953
44954 /* We only play even/odd games with vectors of SImode. */
44955 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44956
44957 /* If we're looking for the odd results, shift those members down to
44958 the even slots. For some cpus this is faster than a PSHUFD. */
44959 if (odd_p)
44960 {
44961 /* For XOP use vpmacsdqh, but only for smult, as it is only
44962 signed. */
44963 if (TARGET_XOP && mode == V4SImode && !uns_p)
44964 {
44965 x = force_reg (wmode, CONST0_RTX (wmode));
44966 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44967 return;
44968 }
44969
44970 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44971 if (!const_vector_equal_evenodd_p (orig_op1))
44972 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44973 x, NULL, 1, OPTAB_DIRECT);
44974 if (!const_vector_equal_evenodd_p (orig_op2))
44975 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44976 x, NULL, 1, OPTAB_DIRECT);
44977 op1 = gen_lowpart (mode, op1);
44978 op2 = gen_lowpart (mode, op2);
44979 }
44980
44981 if (mode == V16SImode)
44982 {
44983 if (uns_p)
44984 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44985 else
44986 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44987 }
44988 else if (mode == V8SImode)
44989 {
44990 if (uns_p)
44991 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44992 else
44993 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44994 }
44995 else if (uns_p)
44996 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44997 else if (TARGET_SSE4_1)
44998 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44999 else
45000 {
45001 rtx s1, s2, t0, t1, t2;
45002
45003 /* The easiest way to implement this without PMULDQ is to go through
45004 the motions as if we are performing a full 64-bit multiply. With
45005 the exception that we need to do less shuffling of the elements. */
45006
45007 /* Compute the sign-extension, aka highparts, of the two operands. */
45008 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45009 op1, pc_rtx, pc_rtx);
45010 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45011 op2, pc_rtx, pc_rtx);
45012
45013 /* Multiply LO(A) * HI(B), and vice-versa. */
45014 t1 = gen_reg_rtx (wmode);
45015 t2 = gen_reg_rtx (wmode);
45016 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45017 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45018
45019 /* Multiply LO(A) * LO(B). */
45020 t0 = gen_reg_rtx (wmode);
45021 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45022
45023 /* Combine and shift the highparts into place. */
45024 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45025 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45026 1, OPTAB_DIRECT);
45027
45028 /* Combine high and low parts. */
45029 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45030 return;
45031 }
45032 emit_insn (x);
45033 }
45034
45035 void
45036 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45037 bool uns_p, bool high_p)
45038 {
45039 enum machine_mode wmode = GET_MODE (dest);
45040 enum machine_mode mode = GET_MODE (op1);
45041 rtx t1, t2, t3, t4, mask;
45042
45043 switch (mode)
45044 {
45045 case V4SImode:
45046 t1 = gen_reg_rtx (mode);
45047 t2 = gen_reg_rtx (mode);
45048 if (TARGET_XOP && !uns_p)
45049 {
45050 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45051 shuffle the elements once so that all elements are in the right
45052 place for immediate use: { A C B D }. */
45053 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45054 const1_rtx, GEN_INT (3)));
45055 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45056 const1_rtx, GEN_INT (3)));
45057 }
45058 else
45059 {
45060 /* Put the elements into place for the multiply. */
45061 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45062 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45063 high_p = false;
45064 }
45065 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45066 break;
45067
45068 case V8SImode:
45069 /* Shuffle the elements between the lanes. After this we
45070 have { A B E F | C D G H } for each operand. */
45071 t1 = gen_reg_rtx (V4DImode);
45072 t2 = gen_reg_rtx (V4DImode);
45073 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45074 const0_rtx, const2_rtx,
45075 const1_rtx, GEN_INT (3)));
45076 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45077 const0_rtx, const2_rtx,
45078 const1_rtx, GEN_INT (3)));
45079
45080 /* Shuffle the elements within the lanes. After this we
45081 have { A A B B | C C D D } or { E E F F | G G H H }. */
45082 t3 = gen_reg_rtx (V8SImode);
45083 t4 = gen_reg_rtx (V8SImode);
45084 mask = GEN_INT (high_p
45085 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45086 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45087 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45088 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45089
45090 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45091 break;
45092
45093 case V8HImode:
45094 case V16HImode:
45095 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45096 uns_p, OPTAB_DIRECT);
45097 t2 = expand_binop (mode,
45098 uns_p ? umul_highpart_optab : smul_highpart_optab,
45099 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45100 gcc_assert (t1 && t2);
45101
45102 t3 = gen_reg_rtx (mode);
45103 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45104 emit_move_insn (dest, gen_lowpart (wmode, t3));
45105 break;
45106
45107 case V16QImode:
45108 case V32QImode:
45109 t1 = gen_reg_rtx (wmode);
45110 t2 = gen_reg_rtx (wmode);
45111 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45112 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45113
45114 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45115 break;
45116
45117 default:
45118 gcc_unreachable ();
45119 }
45120 }
45121
45122 void
45123 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45124 {
45125 rtx res_1, res_2, res_3, res_4;
45126
45127 res_1 = gen_reg_rtx (V4SImode);
45128 res_2 = gen_reg_rtx (V4SImode);
45129 res_3 = gen_reg_rtx (V2DImode);
45130 res_4 = gen_reg_rtx (V2DImode);
45131 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45132 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45133
45134 /* Move the results in element 2 down to element 1; we don't care
45135 what goes in elements 2 and 3. Then we can merge the parts
45136 back together with an interleave.
45137
45138 Note that two other sequences were tried:
45139 (1) Use interleaves at the start instead of psrldq, which allows
45140 us to use a single shufps to merge things back at the end.
45141 (2) Use shufps here to combine the two vectors, then pshufd to
45142 put the elements in the correct order.
45143 In both cases the cost of the reformatting stall was too high
45144 and the overall sequence slower. */
45145
45146 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45147 const0_rtx, const2_rtx,
45148 const0_rtx, const0_rtx));
45149 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45150 const0_rtx, const2_rtx,
45151 const0_rtx, const0_rtx));
45152 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45153
45154 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45155 }
45156
45157 void
45158 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45159 {
45160 enum machine_mode mode = GET_MODE (op0);
45161 rtx t1, t2, t3, t4, t5, t6;
45162
45163 if (TARGET_XOP && mode == V2DImode)
45164 {
45165 /* op1: A,B,C,D, op2: E,F,G,H */
45166 op1 = gen_lowpart (V4SImode, op1);
45167 op2 = gen_lowpart (V4SImode, op2);
45168
45169 t1 = gen_reg_rtx (V4SImode);
45170 t2 = gen_reg_rtx (V4SImode);
45171 t3 = gen_reg_rtx (V2DImode);
45172 t4 = gen_reg_rtx (V2DImode);
45173
45174 /* t1: B,A,D,C */
45175 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45176 GEN_INT (1),
45177 GEN_INT (0),
45178 GEN_INT (3),
45179 GEN_INT (2)));
45180
45181 /* t2: (B*E),(A*F),(D*G),(C*H) */
45182 emit_insn (gen_mulv4si3 (t2, t1, op2));
45183
45184 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45185 emit_insn (gen_xop_phadddq (t3, t2));
45186
45187 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45188 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45189
45190 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45191 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45192 }
45193 else
45194 {
45195 enum machine_mode nmode;
45196 rtx (*umul) (rtx, rtx, rtx);
45197
45198 if (mode == V2DImode)
45199 {
45200 umul = gen_vec_widen_umult_even_v4si;
45201 nmode = V4SImode;
45202 }
45203 else if (mode == V4DImode)
45204 {
45205 umul = gen_vec_widen_umult_even_v8si;
45206 nmode = V8SImode;
45207 }
45208 else if (mode == V8DImode)
45209 {
45210 umul = gen_vec_widen_umult_even_v16si;
45211 nmode = V16SImode;
45212 }
45213 else
45214 gcc_unreachable ();
45215
45216
45217 /* Multiply low parts. */
45218 t1 = gen_reg_rtx (mode);
45219 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45220
45221 /* Shift input vectors right 32 bits so we can multiply high parts. */
45222 t6 = GEN_INT (32);
45223 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45224 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45225
45226 /* Multiply high parts by low parts. */
45227 t4 = gen_reg_rtx (mode);
45228 t5 = gen_reg_rtx (mode);
45229 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45230 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45231
45232 /* Combine and shift the highparts back. */
45233 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45234 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45235
45236 /* Combine high and low parts. */
45237 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45238 }
45239
45240 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45241 gen_rtx_MULT (mode, op1, op2));
45242 }
45243
45244 /* Calculate integer abs() using only SSE2 instructions. */
45245
45246 void
45247 ix86_expand_sse2_abs (rtx target, rtx input)
45248 {
45249 enum machine_mode mode = GET_MODE (target);
45250 rtx tmp0, tmp1, x;
45251
45252 switch (mode)
45253 {
45254 /* For 32-bit signed integer X, the best way to calculate the absolute
45255 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45256 case V4SImode:
45257 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45258 GEN_INT (GET_MODE_BITSIZE
45259 (GET_MODE_INNER (mode)) - 1),
45260 NULL, 0, OPTAB_DIRECT);
45261 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45262 NULL, 0, OPTAB_DIRECT);
45263 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45264 target, 0, OPTAB_DIRECT);
45265 break;
45266
45267 /* For 16-bit signed integer X, the best way to calculate the absolute
45268 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45269 case V8HImode:
45270 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45271
45272 x = expand_simple_binop (mode, SMAX, tmp0, input,
45273 target, 0, OPTAB_DIRECT);
45274 break;
45275
45276 /* For 8-bit signed integer X, the best way to calculate the absolute
45277 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45278 as SSE2 provides the PMINUB insn. */
45279 case V16QImode:
45280 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45281
45282 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45283 target, 0, OPTAB_DIRECT);
45284 break;
45285
45286 default:
45287 gcc_unreachable ();
45288 }
45289
45290 if (x != target)
45291 emit_move_insn (target, x);
45292 }
45293
45294 /* Expand an insert into a vector register through pinsr insn.
45295 Return true if successful. */
45296
45297 bool
45298 ix86_expand_pinsr (rtx *operands)
45299 {
45300 rtx dst = operands[0];
45301 rtx src = operands[3];
45302
45303 unsigned int size = INTVAL (operands[1]);
45304 unsigned int pos = INTVAL (operands[2]);
45305
45306 if (GET_CODE (dst) == SUBREG)
45307 {
45308 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45309 dst = SUBREG_REG (dst);
45310 }
45311
45312 if (GET_CODE (src) == SUBREG)
45313 src = SUBREG_REG (src);
45314
45315 switch (GET_MODE (dst))
45316 {
45317 case V16QImode:
45318 case V8HImode:
45319 case V4SImode:
45320 case V2DImode:
45321 {
45322 enum machine_mode srcmode, dstmode;
45323 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45324
45325 srcmode = mode_for_size (size, MODE_INT, 0);
45326
45327 switch (srcmode)
45328 {
45329 case QImode:
45330 if (!TARGET_SSE4_1)
45331 return false;
45332 dstmode = V16QImode;
45333 pinsr = gen_sse4_1_pinsrb;
45334 break;
45335
45336 case HImode:
45337 if (!TARGET_SSE2)
45338 return false;
45339 dstmode = V8HImode;
45340 pinsr = gen_sse2_pinsrw;
45341 break;
45342
45343 case SImode:
45344 if (!TARGET_SSE4_1)
45345 return false;
45346 dstmode = V4SImode;
45347 pinsr = gen_sse4_1_pinsrd;
45348 break;
45349
45350 case DImode:
45351 gcc_assert (TARGET_64BIT);
45352 if (!TARGET_SSE4_1)
45353 return false;
45354 dstmode = V2DImode;
45355 pinsr = gen_sse4_1_pinsrq;
45356 break;
45357
45358 default:
45359 return false;
45360 }
45361
45362 rtx d = dst;
45363 if (GET_MODE (dst) != dstmode)
45364 d = gen_reg_rtx (dstmode);
45365 src = gen_lowpart (srcmode, src);
45366
45367 pos /= size;
45368
45369 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45370 GEN_INT (1 << pos)));
45371 if (d != dst)
45372 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45373 return true;
45374 }
45375
45376 default:
45377 return false;
45378 }
45379 }
45380 \f
45381 /* This function returns the calling abi specific va_list type node.
45382 It returns the FNDECL specific va_list type. */
45383
45384 static tree
45385 ix86_fn_abi_va_list (tree fndecl)
45386 {
45387 if (!TARGET_64BIT)
45388 return va_list_type_node;
45389 gcc_assert (fndecl != NULL_TREE);
45390
45391 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45392 return ms_va_list_type_node;
45393 else
45394 return sysv_va_list_type_node;
45395 }
45396
45397 /* Returns the canonical va_list type specified by TYPE. If there
45398 is no valid TYPE provided, it return NULL_TREE. */
45399
45400 static tree
45401 ix86_canonical_va_list_type (tree type)
45402 {
45403 tree wtype, htype;
45404
45405 /* Resolve references and pointers to va_list type. */
45406 if (TREE_CODE (type) == MEM_REF)
45407 type = TREE_TYPE (type);
45408 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45409 type = TREE_TYPE (type);
45410 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45411 type = TREE_TYPE (type);
45412
45413 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45414 {
45415 wtype = va_list_type_node;
45416 gcc_assert (wtype != NULL_TREE);
45417 htype = type;
45418 if (TREE_CODE (wtype) == ARRAY_TYPE)
45419 {
45420 /* If va_list is an array type, the argument may have decayed
45421 to a pointer type, e.g. by being passed to another function.
45422 In that case, unwrap both types so that we can compare the
45423 underlying records. */
45424 if (TREE_CODE (htype) == ARRAY_TYPE
45425 || POINTER_TYPE_P (htype))
45426 {
45427 wtype = TREE_TYPE (wtype);
45428 htype = TREE_TYPE (htype);
45429 }
45430 }
45431 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45432 return va_list_type_node;
45433 wtype = sysv_va_list_type_node;
45434 gcc_assert (wtype != NULL_TREE);
45435 htype = type;
45436 if (TREE_CODE (wtype) == ARRAY_TYPE)
45437 {
45438 /* If va_list is an array type, the argument may have decayed
45439 to a pointer type, e.g. by being passed to another function.
45440 In that case, unwrap both types so that we can compare the
45441 underlying records. */
45442 if (TREE_CODE (htype) == ARRAY_TYPE
45443 || POINTER_TYPE_P (htype))
45444 {
45445 wtype = TREE_TYPE (wtype);
45446 htype = TREE_TYPE (htype);
45447 }
45448 }
45449 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45450 return sysv_va_list_type_node;
45451 wtype = ms_va_list_type_node;
45452 gcc_assert (wtype != NULL_TREE);
45453 htype = type;
45454 if (TREE_CODE (wtype) == ARRAY_TYPE)
45455 {
45456 /* If va_list is an array type, the argument may have decayed
45457 to a pointer type, e.g. by being passed to another function.
45458 In that case, unwrap both types so that we can compare the
45459 underlying records. */
45460 if (TREE_CODE (htype) == ARRAY_TYPE
45461 || POINTER_TYPE_P (htype))
45462 {
45463 wtype = TREE_TYPE (wtype);
45464 htype = TREE_TYPE (htype);
45465 }
45466 }
45467 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45468 return ms_va_list_type_node;
45469 return NULL_TREE;
45470 }
45471 return std_canonical_va_list_type (type);
45472 }
45473
45474 /* Iterate through the target-specific builtin types for va_list.
45475 IDX denotes the iterator, *PTREE is set to the result type of
45476 the va_list builtin, and *PNAME to its internal type.
45477 Returns zero if there is no element for this index, otherwise
45478 IDX should be increased upon the next call.
45479 Note, do not iterate a base builtin's name like __builtin_va_list.
45480 Used from c_common_nodes_and_builtins. */
45481
45482 static int
45483 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45484 {
45485 if (TARGET_64BIT)
45486 {
45487 switch (idx)
45488 {
45489 default:
45490 break;
45491
45492 case 0:
45493 *ptree = ms_va_list_type_node;
45494 *pname = "__builtin_ms_va_list";
45495 return 1;
45496
45497 case 1:
45498 *ptree = sysv_va_list_type_node;
45499 *pname = "__builtin_sysv_va_list";
45500 return 1;
45501 }
45502 }
45503
45504 return 0;
45505 }
45506
45507 #undef TARGET_SCHED_DISPATCH
45508 #define TARGET_SCHED_DISPATCH has_dispatch
45509 #undef TARGET_SCHED_DISPATCH_DO
45510 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45511 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45512 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45513 #undef TARGET_SCHED_REORDER
45514 #define TARGET_SCHED_REORDER ix86_sched_reorder
45515 #undef TARGET_SCHED_ADJUST_PRIORITY
45516 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45517 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45518 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45519 ix86_dependencies_evaluation_hook
45520
45521 /* The size of the dispatch window is the total number of bytes of
45522 object code allowed in a window. */
45523 #define DISPATCH_WINDOW_SIZE 16
45524
45525 /* Number of dispatch windows considered for scheduling. */
45526 #define MAX_DISPATCH_WINDOWS 3
45527
45528 /* Maximum number of instructions in a window. */
45529 #define MAX_INSN 4
45530
45531 /* Maximum number of immediate operands in a window. */
45532 #define MAX_IMM 4
45533
45534 /* Maximum number of immediate bits allowed in a window. */
45535 #define MAX_IMM_SIZE 128
45536
45537 /* Maximum number of 32 bit immediates allowed in a window. */
45538 #define MAX_IMM_32 4
45539
45540 /* Maximum number of 64 bit immediates allowed in a window. */
45541 #define MAX_IMM_64 2
45542
45543 /* Maximum total of loads or prefetches allowed in a window. */
45544 #define MAX_LOAD 2
45545
45546 /* Maximum total of stores allowed in a window. */
45547 #define MAX_STORE 1
45548
45549 #undef BIG
45550 #define BIG 100
45551
45552
45553 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45554 enum dispatch_group {
45555 disp_no_group = 0,
45556 disp_load,
45557 disp_store,
45558 disp_load_store,
45559 disp_prefetch,
45560 disp_imm,
45561 disp_imm_32,
45562 disp_imm_64,
45563 disp_branch,
45564 disp_cmp,
45565 disp_jcc,
45566 disp_last
45567 };
45568
45569 /* Number of allowable groups in a dispatch window. It is an array
45570 indexed by dispatch_group enum. 100 is used as a big number,
45571 because the number of these kind of operations does not have any
45572 effect in dispatch window, but we need them for other reasons in
45573 the table. */
45574 static unsigned int num_allowable_groups[disp_last] = {
45575 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45576 };
45577
45578 char group_name[disp_last + 1][16] = {
45579 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45580 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45581 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45582 };
45583
45584 /* Instruction path. */
45585 enum insn_path {
45586 no_path = 0,
45587 path_single, /* Single micro op. */
45588 path_double, /* Double micro op. */
45589 path_multi, /* Instructions with more than 2 micro op.. */
45590 last_path
45591 };
45592
45593 /* sched_insn_info defines a window to the instructions scheduled in
45594 the basic block. It contains a pointer to the insn_info table and
45595 the instruction scheduled.
45596
45597 Windows are allocated for each basic block and are linked
45598 together. */
45599 typedef struct sched_insn_info_s {
45600 rtx insn;
45601 enum dispatch_group group;
45602 enum insn_path path;
45603 int byte_len;
45604 int imm_bytes;
45605 } sched_insn_info;
45606
45607 /* Linked list of dispatch windows. This is a two way list of
45608 dispatch windows of a basic block. It contains information about
45609 the number of uops in the window and the total number of
45610 instructions and of bytes in the object code for this dispatch
45611 window. */
45612 typedef struct dispatch_windows_s {
45613 int num_insn; /* Number of insn in the window. */
45614 int num_uops; /* Number of uops in the window. */
45615 int window_size; /* Number of bytes in the window. */
45616 int window_num; /* Window number between 0 or 1. */
45617 int num_imm; /* Number of immediates in an insn. */
45618 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45619 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45620 int imm_size; /* Total immediates in the window. */
45621 int num_loads; /* Total memory loads in the window. */
45622 int num_stores; /* Total memory stores in the window. */
45623 int violation; /* Violation exists in window. */
45624 sched_insn_info *window; /* Pointer to the window. */
45625 struct dispatch_windows_s *next;
45626 struct dispatch_windows_s *prev;
45627 } dispatch_windows;
45628
45629 /* Immediate valuse used in an insn. */
45630 typedef struct imm_info_s
45631 {
45632 int imm;
45633 int imm32;
45634 int imm64;
45635 } imm_info;
45636
45637 static dispatch_windows *dispatch_window_list;
45638 static dispatch_windows *dispatch_window_list1;
45639
45640 /* Get dispatch group of insn. */
45641
45642 static enum dispatch_group
45643 get_mem_group (rtx insn)
45644 {
45645 enum attr_memory memory;
45646
45647 if (INSN_CODE (insn) < 0)
45648 return disp_no_group;
45649 memory = get_attr_memory (insn);
45650 if (memory == MEMORY_STORE)
45651 return disp_store;
45652
45653 if (memory == MEMORY_LOAD)
45654 return disp_load;
45655
45656 if (memory == MEMORY_BOTH)
45657 return disp_load_store;
45658
45659 return disp_no_group;
45660 }
45661
45662 /* Return true if insn is a compare instruction. */
45663
45664 static bool
45665 is_cmp (rtx insn)
45666 {
45667 enum attr_type type;
45668
45669 type = get_attr_type (insn);
45670 return (type == TYPE_TEST
45671 || type == TYPE_ICMP
45672 || type == TYPE_FCMP
45673 || GET_CODE (PATTERN (insn)) == COMPARE);
45674 }
45675
45676 /* Return true if a dispatch violation encountered. */
45677
45678 static bool
45679 dispatch_violation (void)
45680 {
45681 if (dispatch_window_list->next)
45682 return dispatch_window_list->next->violation;
45683 return dispatch_window_list->violation;
45684 }
45685
45686 /* Return true if insn is a branch instruction. */
45687
45688 static bool
45689 is_branch (rtx insn)
45690 {
45691 return (CALL_P (insn) || JUMP_P (insn));
45692 }
45693
45694 /* Return true if insn is a prefetch instruction. */
45695
45696 static bool
45697 is_prefetch (rtx insn)
45698 {
45699 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45700 }
45701
45702 /* This function initializes a dispatch window and the list container holding a
45703 pointer to the window. */
45704
45705 static void
45706 init_window (int window_num)
45707 {
45708 int i;
45709 dispatch_windows *new_list;
45710
45711 if (window_num == 0)
45712 new_list = dispatch_window_list;
45713 else
45714 new_list = dispatch_window_list1;
45715
45716 new_list->num_insn = 0;
45717 new_list->num_uops = 0;
45718 new_list->window_size = 0;
45719 new_list->next = NULL;
45720 new_list->prev = NULL;
45721 new_list->window_num = window_num;
45722 new_list->num_imm = 0;
45723 new_list->num_imm_32 = 0;
45724 new_list->num_imm_64 = 0;
45725 new_list->imm_size = 0;
45726 new_list->num_loads = 0;
45727 new_list->num_stores = 0;
45728 new_list->violation = false;
45729
45730 for (i = 0; i < MAX_INSN; i++)
45731 {
45732 new_list->window[i].insn = NULL;
45733 new_list->window[i].group = disp_no_group;
45734 new_list->window[i].path = no_path;
45735 new_list->window[i].byte_len = 0;
45736 new_list->window[i].imm_bytes = 0;
45737 }
45738 return;
45739 }
45740
45741 /* This function allocates and initializes a dispatch window and the
45742 list container holding a pointer to the window. */
45743
45744 static dispatch_windows *
45745 allocate_window (void)
45746 {
45747 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45748 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45749
45750 return new_list;
45751 }
45752
45753 /* This routine initializes the dispatch scheduling information. It
45754 initiates building dispatch scheduler tables and constructs the
45755 first dispatch window. */
45756
45757 static void
45758 init_dispatch_sched (void)
45759 {
45760 /* Allocate a dispatch list and a window. */
45761 dispatch_window_list = allocate_window ();
45762 dispatch_window_list1 = allocate_window ();
45763 init_window (0);
45764 init_window (1);
45765 }
45766
45767 /* This function returns true if a branch is detected. End of a basic block
45768 does not have to be a branch, but here we assume only branches end a
45769 window. */
45770
45771 static bool
45772 is_end_basic_block (enum dispatch_group group)
45773 {
45774 return group == disp_branch;
45775 }
45776
45777 /* This function is called when the end of a window processing is reached. */
45778
45779 static void
45780 process_end_window (void)
45781 {
45782 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45783 if (dispatch_window_list->next)
45784 {
45785 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45786 gcc_assert (dispatch_window_list->window_size
45787 + dispatch_window_list1->window_size <= 48);
45788 init_window (1);
45789 }
45790 init_window (0);
45791 }
45792
45793 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45794 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45795 for 48 bytes of instructions. Note that these windows are not dispatch
45796 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45797
45798 static dispatch_windows *
45799 allocate_next_window (int window_num)
45800 {
45801 if (window_num == 0)
45802 {
45803 if (dispatch_window_list->next)
45804 init_window (1);
45805 init_window (0);
45806 return dispatch_window_list;
45807 }
45808
45809 dispatch_window_list->next = dispatch_window_list1;
45810 dispatch_window_list1->prev = dispatch_window_list;
45811
45812 return dispatch_window_list1;
45813 }
45814
45815 /* Increment the number of immediate operands of an instruction. */
45816
45817 static int
45818 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45819 {
45820 if (*in_rtx == 0)
45821 return 0;
45822
45823 switch ( GET_CODE (*in_rtx))
45824 {
45825 case CONST:
45826 case SYMBOL_REF:
45827 case CONST_INT:
45828 (imm_values->imm)++;
45829 if (x86_64_immediate_operand (*in_rtx, SImode))
45830 (imm_values->imm32)++;
45831 else
45832 (imm_values->imm64)++;
45833 break;
45834
45835 case CONST_DOUBLE:
45836 (imm_values->imm)++;
45837 (imm_values->imm64)++;
45838 break;
45839
45840 case CODE_LABEL:
45841 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45842 {
45843 (imm_values->imm)++;
45844 (imm_values->imm32)++;
45845 }
45846 break;
45847
45848 default:
45849 break;
45850 }
45851
45852 return 0;
45853 }
45854
45855 /* Compute number of immediate operands of an instruction. */
45856
45857 static void
45858 find_constant (rtx in_rtx, imm_info *imm_values)
45859 {
45860 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45861 (rtx_function) find_constant_1, (void *) imm_values);
45862 }
45863
45864 /* Return total size of immediate operands of an instruction along with number
45865 of corresponding immediate-operands. It initializes its parameters to zero
45866 befor calling FIND_CONSTANT.
45867 INSN is the input instruction. IMM is the total of immediates.
45868 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45869 bit immediates. */
45870
45871 static int
45872 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45873 {
45874 imm_info imm_values = {0, 0, 0};
45875
45876 find_constant (insn, &imm_values);
45877 *imm = imm_values.imm;
45878 *imm32 = imm_values.imm32;
45879 *imm64 = imm_values.imm64;
45880 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45881 }
45882
45883 /* This function indicates if an operand of an instruction is an
45884 immediate. */
45885
45886 static bool
45887 has_immediate (rtx insn)
45888 {
45889 int num_imm_operand;
45890 int num_imm32_operand;
45891 int num_imm64_operand;
45892
45893 if (insn)
45894 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45895 &num_imm64_operand);
45896 return false;
45897 }
45898
45899 /* Return single or double path for instructions. */
45900
45901 static enum insn_path
45902 get_insn_path (rtx insn)
45903 {
45904 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45905
45906 if ((int)path == 0)
45907 return path_single;
45908
45909 if ((int)path == 1)
45910 return path_double;
45911
45912 return path_multi;
45913 }
45914
45915 /* Return insn dispatch group. */
45916
45917 static enum dispatch_group
45918 get_insn_group (rtx insn)
45919 {
45920 enum dispatch_group group = get_mem_group (insn);
45921 if (group)
45922 return group;
45923
45924 if (is_branch (insn))
45925 return disp_branch;
45926
45927 if (is_cmp (insn))
45928 return disp_cmp;
45929
45930 if (has_immediate (insn))
45931 return disp_imm;
45932
45933 if (is_prefetch (insn))
45934 return disp_prefetch;
45935
45936 return disp_no_group;
45937 }
45938
45939 /* Count number of GROUP restricted instructions in a dispatch
45940 window WINDOW_LIST. */
45941
45942 static int
45943 count_num_restricted (rtx insn, dispatch_windows *window_list)
45944 {
45945 enum dispatch_group group = get_insn_group (insn);
45946 int imm_size;
45947 int num_imm_operand;
45948 int num_imm32_operand;
45949 int num_imm64_operand;
45950
45951 if (group == disp_no_group)
45952 return 0;
45953
45954 if (group == disp_imm)
45955 {
45956 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45957 &num_imm64_operand);
45958 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45959 || num_imm_operand + window_list->num_imm > MAX_IMM
45960 || (num_imm32_operand > 0
45961 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45962 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45963 || (num_imm64_operand > 0
45964 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45965 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45966 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45967 && num_imm64_operand > 0
45968 && ((window_list->num_imm_64 > 0
45969 && window_list->num_insn >= 2)
45970 || window_list->num_insn >= 3)))
45971 return BIG;
45972
45973 return 1;
45974 }
45975
45976 if ((group == disp_load_store
45977 && (window_list->num_loads >= MAX_LOAD
45978 || window_list->num_stores >= MAX_STORE))
45979 || ((group == disp_load
45980 || group == disp_prefetch)
45981 && window_list->num_loads >= MAX_LOAD)
45982 || (group == disp_store
45983 && window_list->num_stores >= MAX_STORE))
45984 return BIG;
45985
45986 return 1;
45987 }
45988
45989 /* This function returns true if insn satisfies dispatch rules on the
45990 last window scheduled. */
45991
45992 static bool
45993 fits_dispatch_window (rtx insn)
45994 {
45995 dispatch_windows *window_list = dispatch_window_list;
45996 dispatch_windows *window_list_next = dispatch_window_list->next;
45997 unsigned int num_restrict;
45998 enum dispatch_group group = get_insn_group (insn);
45999 enum insn_path path = get_insn_path (insn);
46000 int sum;
46001
46002 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46003 instructions should be given the lowest priority in the
46004 scheduling process in Haifa scheduler to make sure they will be
46005 scheduled in the same dispatch window as the reference to them. */
46006 if (group == disp_jcc || group == disp_cmp)
46007 return false;
46008
46009 /* Check nonrestricted. */
46010 if (group == disp_no_group || group == disp_branch)
46011 return true;
46012
46013 /* Get last dispatch window. */
46014 if (window_list_next)
46015 window_list = window_list_next;
46016
46017 if (window_list->window_num == 1)
46018 {
46019 sum = window_list->prev->window_size + window_list->window_size;
46020
46021 if (sum == 32
46022 || (min_insn_size (insn) + sum) >= 48)
46023 /* Window 1 is full. Go for next window. */
46024 return true;
46025 }
46026
46027 num_restrict = count_num_restricted (insn, window_list);
46028
46029 if (num_restrict > num_allowable_groups[group])
46030 return false;
46031
46032 /* See if it fits in the first window. */
46033 if (window_list->window_num == 0)
46034 {
46035 /* The first widow should have only single and double path
46036 uops. */
46037 if (path == path_double
46038 && (window_list->num_uops + 2) > MAX_INSN)
46039 return false;
46040 else if (path != path_single)
46041 return false;
46042 }
46043 return true;
46044 }
46045
46046 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46047 dispatch window WINDOW_LIST. */
46048
46049 static void
46050 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46051 {
46052 int byte_len = min_insn_size (insn);
46053 int num_insn = window_list->num_insn;
46054 int imm_size;
46055 sched_insn_info *window = window_list->window;
46056 enum dispatch_group group = get_insn_group (insn);
46057 enum insn_path path = get_insn_path (insn);
46058 int num_imm_operand;
46059 int num_imm32_operand;
46060 int num_imm64_operand;
46061
46062 if (!window_list->violation && group != disp_cmp
46063 && !fits_dispatch_window (insn))
46064 window_list->violation = true;
46065
46066 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46067 &num_imm64_operand);
46068
46069 /* Initialize window with new instruction. */
46070 window[num_insn].insn = insn;
46071 window[num_insn].byte_len = byte_len;
46072 window[num_insn].group = group;
46073 window[num_insn].path = path;
46074 window[num_insn].imm_bytes = imm_size;
46075
46076 window_list->window_size += byte_len;
46077 window_list->num_insn = num_insn + 1;
46078 window_list->num_uops = window_list->num_uops + num_uops;
46079 window_list->imm_size += imm_size;
46080 window_list->num_imm += num_imm_operand;
46081 window_list->num_imm_32 += num_imm32_operand;
46082 window_list->num_imm_64 += num_imm64_operand;
46083
46084 if (group == disp_store)
46085 window_list->num_stores += 1;
46086 else if (group == disp_load
46087 || group == disp_prefetch)
46088 window_list->num_loads += 1;
46089 else if (group == disp_load_store)
46090 {
46091 window_list->num_stores += 1;
46092 window_list->num_loads += 1;
46093 }
46094 }
46095
46096 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46097 If the total bytes of instructions or the number of instructions in
46098 the window exceed allowable, it allocates a new window. */
46099
46100 static void
46101 add_to_dispatch_window (rtx insn)
46102 {
46103 int byte_len;
46104 dispatch_windows *window_list;
46105 dispatch_windows *next_list;
46106 dispatch_windows *window0_list;
46107 enum insn_path path;
46108 enum dispatch_group insn_group;
46109 bool insn_fits;
46110 int num_insn;
46111 int num_uops;
46112 int window_num;
46113 int insn_num_uops;
46114 int sum;
46115
46116 if (INSN_CODE (insn) < 0)
46117 return;
46118
46119 byte_len = min_insn_size (insn);
46120 window_list = dispatch_window_list;
46121 next_list = window_list->next;
46122 path = get_insn_path (insn);
46123 insn_group = get_insn_group (insn);
46124
46125 /* Get the last dispatch window. */
46126 if (next_list)
46127 window_list = dispatch_window_list->next;
46128
46129 if (path == path_single)
46130 insn_num_uops = 1;
46131 else if (path == path_double)
46132 insn_num_uops = 2;
46133 else
46134 insn_num_uops = (int) path;
46135
46136 /* If current window is full, get a new window.
46137 Window number zero is full, if MAX_INSN uops are scheduled in it.
46138 Window number one is full, if window zero's bytes plus window
46139 one's bytes is 32, or if the bytes of the new instruction added
46140 to the total makes it greater than 48, or it has already MAX_INSN
46141 instructions in it. */
46142 num_insn = window_list->num_insn;
46143 num_uops = window_list->num_uops;
46144 window_num = window_list->window_num;
46145 insn_fits = fits_dispatch_window (insn);
46146
46147 if (num_insn >= MAX_INSN
46148 || num_uops + insn_num_uops > MAX_INSN
46149 || !(insn_fits))
46150 {
46151 window_num = ~window_num & 1;
46152 window_list = allocate_next_window (window_num);
46153 }
46154
46155 if (window_num == 0)
46156 {
46157 add_insn_window (insn, window_list, insn_num_uops);
46158 if (window_list->num_insn >= MAX_INSN
46159 && insn_group == disp_branch)
46160 {
46161 process_end_window ();
46162 return;
46163 }
46164 }
46165 else if (window_num == 1)
46166 {
46167 window0_list = window_list->prev;
46168 sum = window0_list->window_size + window_list->window_size;
46169 if (sum == 32
46170 || (byte_len + sum) >= 48)
46171 {
46172 process_end_window ();
46173 window_list = dispatch_window_list;
46174 }
46175
46176 add_insn_window (insn, window_list, insn_num_uops);
46177 }
46178 else
46179 gcc_unreachable ();
46180
46181 if (is_end_basic_block (insn_group))
46182 {
46183 /* End of basic block is reached do end-basic-block process. */
46184 process_end_window ();
46185 return;
46186 }
46187 }
46188
46189 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46190
46191 DEBUG_FUNCTION static void
46192 debug_dispatch_window_file (FILE *file, int window_num)
46193 {
46194 dispatch_windows *list;
46195 int i;
46196
46197 if (window_num == 0)
46198 list = dispatch_window_list;
46199 else
46200 list = dispatch_window_list1;
46201
46202 fprintf (file, "Window #%d:\n", list->window_num);
46203 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46204 list->num_insn, list->num_uops, list->window_size);
46205 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46206 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46207
46208 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46209 list->num_stores);
46210 fprintf (file, " insn info:\n");
46211
46212 for (i = 0; i < MAX_INSN; i++)
46213 {
46214 if (!list->window[i].insn)
46215 break;
46216 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46217 i, group_name[list->window[i].group],
46218 i, (void *)list->window[i].insn,
46219 i, list->window[i].path,
46220 i, list->window[i].byte_len,
46221 i, list->window[i].imm_bytes);
46222 }
46223 }
46224
46225 /* Print to stdout a dispatch window. */
46226
46227 DEBUG_FUNCTION void
46228 debug_dispatch_window (int window_num)
46229 {
46230 debug_dispatch_window_file (stdout, window_num);
46231 }
46232
46233 /* Print INSN dispatch information to FILE. */
46234
46235 DEBUG_FUNCTION static void
46236 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46237 {
46238 int byte_len;
46239 enum insn_path path;
46240 enum dispatch_group group;
46241 int imm_size;
46242 int num_imm_operand;
46243 int num_imm32_operand;
46244 int num_imm64_operand;
46245
46246 if (INSN_CODE (insn) < 0)
46247 return;
46248
46249 byte_len = min_insn_size (insn);
46250 path = get_insn_path (insn);
46251 group = get_insn_group (insn);
46252 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46253 &num_imm64_operand);
46254
46255 fprintf (file, " insn info:\n");
46256 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46257 group_name[group], path, byte_len);
46258 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46259 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46260 }
46261
46262 /* Print to STDERR the status of the ready list with respect to
46263 dispatch windows. */
46264
46265 DEBUG_FUNCTION void
46266 debug_ready_dispatch (void)
46267 {
46268 int i;
46269 int no_ready = number_in_ready ();
46270
46271 fprintf (stdout, "Number of ready: %d\n", no_ready);
46272
46273 for (i = 0; i < no_ready; i++)
46274 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46275 }
46276
46277 /* This routine is the driver of the dispatch scheduler. */
46278
46279 static void
46280 do_dispatch (rtx insn, int mode)
46281 {
46282 if (mode == DISPATCH_INIT)
46283 init_dispatch_sched ();
46284 else if (mode == ADD_TO_DISPATCH_WINDOW)
46285 add_to_dispatch_window (insn);
46286 }
46287
46288 /* Return TRUE if Dispatch Scheduling is supported. */
46289
46290 static bool
46291 has_dispatch (rtx insn, int action)
46292 {
46293 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46294 && flag_dispatch_scheduler)
46295 switch (action)
46296 {
46297 default:
46298 return false;
46299
46300 case IS_DISPATCH_ON:
46301 return true;
46302 break;
46303
46304 case IS_CMP:
46305 return is_cmp (insn);
46306
46307 case DISPATCH_VIOLATION:
46308 return dispatch_violation ();
46309
46310 case FITS_DISPATCH_WINDOW:
46311 return fits_dispatch_window (insn);
46312 }
46313
46314 return false;
46315 }
46316
46317 /* Implementation of reassociation_width target hook used by
46318 reassoc phase to identify parallelism level in reassociated
46319 tree. Statements tree_code is passed in OPC. Arguments type
46320 is passed in MODE.
46321
46322 Currently parallel reassociation is enabled for Atom
46323 processors only and we set reassociation width to be 2
46324 because Atom may issue up to 2 instructions per cycle.
46325
46326 Return value should be fixed if parallel reassociation is
46327 enabled for other processors. */
46328
46329 static int
46330 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46331 enum machine_mode mode)
46332 {
46333 int res = 1;
46334
46335 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46336 res = 2;
46337 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46338 res = 2;
46339
46340 return res;
46341 }
46342
46343 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46344 place emms and femms instructions. */
46345
46346 static enum machine_mode
46347 ix86_preferred_simd_mode (enum machine_mode mode)
46348 {
46349 if (!TARGET_SSE)
46350 return word_mode;
46351
46352 switch (mode)
46353 {
46354 case QImode:
46355 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46356 case HImode:
46357 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46358 case SImode:
46359 return TARGET_AVX512F ? V16SImode :
46360 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46361 case DImode:
46362 return TARGET_AVX512F ? V8DImode :
46363 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46364
46365 case SFmode:
46366 if (TARGET_AVX512F)
46367 return V16SFmode;
46368 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46369 return V8SFmode;
46370 else
46371 return V4SFmode;
46372
46373 case DFmode:
46374 if (!TARGET_VECTORIZE_DOUBLE)
46375 return word_mode;
46376 else if (TARGET_AVX512F)
46377 return V8DFmode;
46378 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46379 return V4DFmode;
46380 else if (TARGET_SSE2)
46381 return V2DFmode;
46382 /* FALLTHRU */
46383
46384 default:
46385 return word_mode;
46386 }
46387 }
46388
46389 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46390 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46391 256bit and 128bit vectors. */
46392
46393 static unsigned int
46394 ix86_autovectorize_vector_sizes (void)
46395 {
46396 return TARGET_AVX512F ? 64 | 32 | 16 :
46397 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46398 }
46399
46400 \f
46401
46402 /* Return class of registers which could be used for pseudo of MODE
46403 and of class RCLASS for spilling instead of memory. Return NO_REGS
46404 if it is not possible or non-profitable. */
46405 static reg_class_t
46406 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46407 {
46408 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46409 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46410 && INTEGER_CLASS_P (rclass))
46411 return ALL_SSE_REGS;
46412 return NO_REGS;
46413 }
46414
46415 /* Implement targetm.vectorize.init_cost. */
46416
46417 static void *
46418 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46419 {
46420 unsigned *cost = XNEWVEC (unsigned, 3);
46421 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46422 return cost;
46423 }
46424
46425 /* Implement targetm.vectorize.add_stmt_cost. */
46426
46427 static unsigned
46428 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46429 struct _stmt_vec_info *stmt_info, int misalign,
46430 enum vect_cost_model_location where)
46431 {
46432 unsigned *cost = (unsigned *) data;
46433 unsigned retval = 0;
46434
46435 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46436 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46437
46438 /* Statements in an inner loop relative to the loop being
46439 vectorized are weighted more heavily. The value here is
46440 arbitrary and could potentially be improved with analysis. */
46441 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46442 count *= 50; /* FIXME. */
46443
46444 retval = (unsigned) (count * stmt_cost);
46445
46446 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46447 for Silvermont as it has out of order integer pipeline and can execute
46448 2 scalar instruction per tick, but has in order SIMD pipeline. */
46449 if (TARGET_SILVERMONT || TARGET_INTEL)
46450 if (stmt_info && stmt_info->stmt)
46451 {
46452 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46453 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46454 retval = (retval * 17) / 10;
46455 }
46456
46457 cost[where] += retval;
46458
46459 return retval;
46460 }
46461
46462 /* Implement targetm.vectorize.finish_cost. */
46463
46464 static void
46465 ix86_finish_cost (void *data, unsigned *prologue_cost,
46466 unsigned *body_cost, unsigned *epilogue_cost)
46467 {
46468 unsigned *cost = (unsigned *) data;
46469 *prologue_cost = cost[vect_prologue];
46470 *body_cost = cost[vect_body];
46471 *epilogue_cost = cost[vect_epilogue];
46472 }
46473
46474 /* Implement targetm.vectorize.destroy_cost_data. */
46475
46476 static void
46477 ix86_destroy_cost_data (void *data)
46478 {
46479 free (data);
46480 }
46481
46482 /* Validate target specific memory model bits in VAL. */
46483
46484 static unsigned HOST_WIDE_INT
46485 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46486 {
46487 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46488 bool strong;
46489
46490 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46491 |MEMMODEL_MASK)
46492 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46493 {
46494 warning (OPT_Winvalid_memory_model,
46495 "Unknown architecture specific memory model");
46496 return MEMMODEL_SEQ_CST;
46497 }
46498 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46499 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46500 {
46501 warning (OPT_Winvalid_memory_model,
46502 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46503 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46504 }
46505 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46506 {
46507 warning (OPT_Winvalid_memory_model,
46508 "HLE_RELEASE not used with RELEASE or stronger memory model");
46509 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46510 }
46511 return val;
46512 }
46513
46514 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46515 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46516 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46517 or number of vecsize_mangle variants that should be emitted. */
46518
46519 static int
46520 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46521 struct cgraph_simd_clone *clonei,
46522 tree base_type, int num)
46523 {
46524 int ret = 1;
46525
46526 if (clonei->simdlen
46527 && (clonei->simdlen < 2
46528 || clonei->simdlen > 16
46529 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46530 {
46531 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46532 "unsupported simdlen %d", clonei->simdlen);
46533 return 0;
46534 }
46535
46536 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46537 if (TREE_CODE (ret_type) != VOID_TYPE)
46538 switch (TYPE_MODE (ret_type))
46539 {
46540 case QImode:
46541 case HImode:
46542 case SImode:
46543 case DImode:
46544 case SFmode:
46545 case DFmode:
46546 /* case SCmode: */
46547 /* case DCmode: */
46548 break;
46549 default:
46550 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46551 "unsupported return type %qT for simd\n", ret_type);
46552 return 0;
46553 }
46554
46555 tree t;
46556 int i;
46557
46558 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46559 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46560 switch (TYPE_MODE (TREE_TYPE (t)))
46561 {
46562 case QImode:
46563 case HImode:
46564 case SImode:
46565 case DImode:
46566 case SFmode:
46567 case DFmode:
46568 /* case SCmode: */
46569 /* case DCmode: */
46570 break;
46571 default:
46572 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46573 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46574 return 0;
46575 }
46576
46577 if (clonei->cilk_elemental)
46578 {
46579 /* Parse here processor clause. If not present, default to 'b'. */
46580 clonei->vecsize_mangle = 'b';
46581 }
46582 else if (!TREE_PUBLIC (node->decl))
46583 {
46584 /* If the function isn't exported, we can pick up just one ISA
46585 for the clones. */
46586 if (TARGET_AVX2)
46587 clonei->vecsize_mangle = 'd';
46588 else if (TARGET_AVX)
46589 clonei->vecsize_mangle = 'c';
46590 else
46591 clonei->vecsize_mangle = 'b';
46592 ret = 1;
46593 }
46594 else
46595 {
46596 clonei->vecsize_mangle = "bcd"[num];
46597 ret = 3;
46598 }
46599 switch (clonei->vecsize_mangle)
46600 {
46601 case 'b':
46602 clonei->vecsize_int = 128;
46603 clonei->vecsize_float = 128;
46604 break;
46605 case 'c':
46606 clonei->vecsize_int = 128;
46607 clonei->vecsize_float = 256;
46608 break;
46609 case 'd':
46610 clonei->vecsize_int = 256;
46611 clonei->vecsize_float = 256;
46612 break;
46613 }
46614 if (clonei->simdlen == 0)
46615 {
46616 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46617 clonei->simdlen = clonei->vecsize_int;
46618 else
46619 clonei->simdlen = clonei->vecsize_float;
46620 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46621 if (clonei->simdlen > 16)
46622 clonei->simdlen = 16;
46623 }
46624 return ret;
46625 }
46626
46627 /* Add target attribute to SIMD clone NODE if needed. */
46628
46629 static void
46630 ix86_simd_clone_adjust (struct cgraph_node *node)
46631 {
46632 const char *str = NULL;
46633 gcc_assert (node->decl == cfun->decl);
46634 switch (node->simdclone->vecsize_mangle)
46635 {
46636 case 'b':
46637 if (!TARGET_SSE2)
46638 str = "sse2";
46639 break;
46640 case 'c':
46641 if (!TARGET_AVX)
46642 str = "avx";
46643 break;
46644 case 'd':
46645 if (!TARGET_AVX2)
46646 str = "avx2";
46647 break;
46648 default:
46649 gcc_unreachable ();
46650 }
46651 if (str == NULL)
46652 return;
46653 push_cfun (NULL);
46654 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46655 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46656 gcc_assert (ok);
46657 pop_cfun ();
46658 ix86_previous_fndecl = NULL_TREE;
46659 ix86_set_current_function (node->decl);
46660 }
46661
46662 /* If SIMD clone NODE can't be used in a vectorized loop
46663 in current function, return -1, otherwise return a badness of using it
46664 (0 if it is most desirable from vecsize_mangle point of view, 1
46665 slightly less desirable, etc.). */
46666
46667 static int
46668 ix86_simd_clone_usable (struct cgraph_node *node)
46669 {
46670 switch (node->simdclone->vecsize_mangle)
46671 {
46672 case 'b':
46673 if (!TARGET_SSE2)
46674 return -1;
46675 if (!TARGET_AVX)
46676 return 0;
46677 return TARGET_AVX2 ? 2 : 1;
46678 case 'c':
46679 if (!TARGET_AVX)
46680 return -1;
46681 return TARGET_AVX2 ? 1 : 0;
46682 break;
46683 case 'd':
46684 if (!TARGET_AVX2)
46685 return -1;
46686 return 0;
46687 default:
46688 gcc_unreachable ();
46689 }
46690 }
46691
46692 /* This function gives out the number of memory references.
46693 This value determines the unrolling factor for
46694 bdver3 and bdver4 architectures. */
46695
46696 static int
46697 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46698 {
46699 if (*x != NULL_RTX && MEM_P (*x))
46700 {
46701 enum machine_mode mode;
46702 unsigned int n_words;
46703
46704 mode = GET_MODE (*x);
46705 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46706
46707 if (n_words > 4)
46708 (*mem_count)+=2;
46709 else
46710 (*mem_count)+=1;
46711 }
46712 return 0;
46713 }
46714
46715 /* This function adjusts the unroll factor based on
46716 the hardware capabilities. For ex, bdver3 has
46717 a loop buffer which makes unrolling of smaller
46718 loops less important. This function decides the
46719 unroll factor using number of memory references
46720 (value 32 is used) as a heuristic. */
46721
46722 static unsigned
46723 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46724 {
46725 basic_block *bbs;
46726 rtx insn;
46727 unsigned i;
46728 unsigned mem_count = 0;
46729
46730 if (!TARGET_ADJUST_UNROLL)
46731 return nunroll;
46732
46733 /* Count the number of memory references within the loop body. */
46734 bbs = get_loop_body (loop);
46735 for (i = 0; i < loop->num_nodes; i++)
46736 {
46737 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46738 if (NONDEBUG_INSN_P (insn))
46739 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46740 }
46741 free (bbs);
46742
46743 if (mem_count && mem_count <=32)
46744 return 32/mem_count;
46745
46746 return nunroll;
46747 }
46748
46749
46750 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46751
46752 static bool
46753 ix86_float_exceptions_rounding_supported_p (void)
46754 {
46755 /* For x87 floating point with standard excess precision handling,
46756 there is no adddf3 pattern (since x87 floating point only has
46757 XFmode operations) so the default hook implementation gets this
46758 wrong. */
46759 return TARGET_80387 || TARGET_SSE_MATH;
46760 }
46761
46762 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46763
46764 static void
46765 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46766 {
46767 if (!TARGET_80387 && !TARGET_SSE_MATH)
46768 return;
46769 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46770 if (TARGET_80387)
46771 {
46772 tree fenv_index_type = build_index_type (size_int (6));
46773 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46774 tree fenv_var = create_tmp_var (fenv_type, NULL);
46775 mark_addressable (fenv_var);
46776 tree fenv_ptr = build_pointer_type (fenv_type);
46777 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46778 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46779 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46780 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46781 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46782 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46783 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46784 tree hold_fnclex = build_call_expr (fnclex, 0);
46785 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46786 hold_fnclex);
46787 *clear = build_call_expr (fnclex, 0);
46788 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46789 mark_addressable (sw_var);
46790 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46791 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46792 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46793 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46794 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46795 exceptions_var, exceptions_x87);
46796 *update = build2 (COMPOUND_EXPR, integer_type_node,
46797 fnstsw_call, update_mod);
46798 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46799 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46800 }
46801 if (TARGET_SSE_MATH)
46802 {
46803 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46804 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46805 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46806 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46807 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46808 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46809 mxcsr_orig_var, stmxcsr_hold_call);
46810 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46811 mxcsr_orig_var,
46812 build_int_cst (unsigned_type_node, 0x1f80));
46813 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46814 build_int_cst (unsigned_type_node, 0xffffffc0));
46815 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46816 mxcsr_mod_var, hold_mod_val);
46817 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46818 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46819 hold_assign_orig, hold_assign_mod);
46820 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46821 ldmxcsr_hold_call);
46822 if (*hold)
46823 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46824 else
46825 *hold = hold_all;
46826 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46827 if (*clear)
46828 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46829 ldmxcsr_clear_call);
46830 else
46831 *clear = ldmxcsr_clear_call;
46832 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46833 tree exceptions_sse = fold_convert (integer_type_node,
46834 stxmcsr_update_call);
46835 if (*update)
46836 {
46837 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46838 exceptions_var, exceptions_sse);
46839 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46840 exceptions_var, exceptions_mod);
46841 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46842 exceptions_assign);
46843 }
46844 else
46845 *update = build2 (MODIFY_EXPR, integer_type_node,
46846 exceptions_var, exceptions_sse);
46847 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46848 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46849 ldmxcsr_update_call);
46850 }
46851 tree atomic_feraiseexcept
46852 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46853 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46854 1, exceptions_var);
46855 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46856 atomic_feraiseexcept_call);
46857 }
46858
46859 /* Initialize the GCC target structure. */
46860 #undef TARGET_RETURN_IN_MEMORY
46861 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46862
46863 #undef TARGET_LEGITIMIZE_ADDRESS
46864 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46865
46866 #undef TARGET_ATTRIBUTE_TABLE
46867 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46868 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46869 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46870 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46871 # undef TARGET_MERGE_DECL_ATTRIBUTES
46872 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46873 #endif
46874
46875 #undef TARGET_COMP_TYPE_ATTRIBUTES
46876 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46877
46878 #undef TARGET_INIT_BUILTINS
46879 #define TARGET_INIT_BUILTINS ix86_init_builtins
46880 #undef TARGET_BUILTIN_DECL
46881 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46882 #undef TARGET_EXPAND_BUILTIN
46883 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46884
46885 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46886 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46887 ix86_builtin_vectorized_function
46888
46889 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46890 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46891
46892 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46893 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46894
46895 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46896 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46897
46898 #undef TARGET_BUILTIN_RECIPROCAL
46899 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46900
46901 #undef TARGET_ASM_FUNCTION_EPILOGUE
46902 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46903
46904 #undef TARGET_ENCODE_SECTION_INFO
46905 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46906 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46907 #else
46908 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46909 #endif
46910
46911 #undef TARGET_ASM_OPEN_PAREN
46912 #define TARGET_ASM_OPEN_PAREN ""
46913 #undef TARGET_ASM_CLOSE_PAREN
46914 #define TARGET_ASM_CLOSE_PAREN ""
46915
46916 #undef TARGET_ASM_BYTE_OP
46917 #define TARGET_ASM_BYTE_OP ASM_BYTE
46918
46919 #undef TARGET_ASM_ALIGNED_HI_OP
46920 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46921 #undef TARGET_ASM_ALIGNED_SI_OP
46922 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46923 #ifdef ASM_QUAD
46924 #undef TARGET_ASM_ALIGNED_DI_OP
46925 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46926 #endif
46927
46928 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46929 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46930
46931 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46932 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46933
46934 #undef TARGET_ASM_UNALIGNED_HI_OP
46935 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46936 #undef TARGET_ASM_UNALIGNED_SI_OP
46937 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46938 #undef TARGET_ASM_UNALIGNED_DI_OP
46939 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46940
46941 #undef TARGET_PRINT_OPERAND
46942 #define TARGET_PRINT_OPERAND ix86_print_operand
46943 #undef TARGET_PRINT_OPERAND_ADDRESS
46944 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46945 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46946 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46947 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46948 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46949
46950 #undef TARGET_SCHED_INIT_GLOBAL
46951 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46952 #undef TARGET_SCHED_ADJUST_COST
46953 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46954 #undef TARGET_SCHED_ISSUE_RATE
46955 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46956 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46957 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46958 ia32_multipass_dfa_lookahead
46959 #undef TARGET_SCHED_MACRO_FUSION_P
46960 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46961 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46962 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46963
46964 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46965 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46966
46967 #undef TARGET_MEMMODEL_CHECK
46968 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46969
46970 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46971 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46972
46973 #ifdef HAVE_AS_TLS
46974 #undef TARGET_HAVE_TLS
46975 #define TARGET_HAVE_TLS true
46976 #endif
46977 #undef TARGET_CANNOT_FORCE_CONST_MEM
46978 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46979 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46980 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46981
46982 #undef TARGET_DELEGITIMIZE_ADDRESS
46983 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46984
46985 #undef TARGET_MS_BITFIELD_LAYOUT_P
46986 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46987
46988 #if TARGET_MACHO
46989 #undef TARGET_BINDS_LOCAL_P
46990 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46991 #endif
46992 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46993 #undef TARGET_BINDS_LOCAL_P
46994 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46995 #endif
46996
46997 #undef TARGET_ASM_OUTPUT_MI_THUNK
46998 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46999 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47000 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47001
47002 #undef TARGET_ASM_FILE_START
47003 #define TARGET_ASM_FILE_START x86_file_start
47004
47005 #undef TARGET_OPTION_OVERRIDE
47006 #define TARGET_OPTION_OVERRIDE ix86_option_override
47007
47008 #undef TARGET_REGISTER_MOVE_COST
47009 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47010 #undef TARGET_MEMORY_MOVE_COST
47011 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47012 #undef TARGET_RTX_COSTS
47013 #define TARGET_RTX_COSTS ix86_rtx_costs
47014 #undef TARGET_ADDRESS_COST
47015 #define TARGET_ADDRESS_COST ix86_address_cost
47016
47017 #undef TARGET_FIXED_CONDITION_CODE_REGS
47018 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47019 #undef TARGET_CC_MODES_COMPATIBLE
47020 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47021
47022 #undef TARGET_MACHINE_DEPENDENT_REORG
47023 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47024
47025 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47026 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47027
47028 #undef TARGET_BUILD_BUILTIN_VA_LIST
47029 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47030
47031 #undef TARGET_FOLD_BUILTIN
47032 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47033
47034 #undef TARGET_COMPARE_VERSION_PRIORITY
47035 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47036
47037 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47038 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47039 ix86_generate_version_dispatcher_body
47040
47041 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47042 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47043 ix86_get_function_versions_dispatcher
47044
47045 #undef TARGET_ENUM_VA_LIST_P
47046 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47047
47048 #undef TARGET_FN_ABI_VA_LIST
47049 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47050
47051 #undef TARGET_CANONICAL_VA_LIST_TYPE
47052 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47053
47054 #undef TARGET_EXPAND_BUILTIN_VA_START
47055 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47056
47057 #undef TARGET_MD_ASM_CLOBBERS
47058 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47059
47060 #undef TARGET_PROMOTE_PROTOTYPES
47061 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47062 #undef TARGET_SETUP_INCOMING_VARARGS
47063 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47064 #undef TARGET_MUST_PASS_IN_STACK
47065 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47066 #undef TARGET_FUNCTION_ARG_ADVANCE
47067 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47068 #undef TARGET_FUNCTION_ARG
47069 #define TARGET_FUNCTION_ARG ix86_function_arg
47070 #undef TARGET_FUNCTION_ARG_BOUNDARY
47071 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47072 #undef TARGET_PASS_BY_REFERENCE
47073 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47074 #undef TARGET_INTERNAL_ARG_POINTER
47075 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47076 #undef TARGET_UPDATE_STACK_BOUNDARY
47077 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47078 #undef TARGET_GET_DRAP_RTX
47079 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47080 #undef TARGET_STRICT_ARGUMENT_NAMING
47081 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47082 #undef TARGET_STATIC_CHAIN
47083 #define TARGET_STATIC_CHAIN ix86_static_chain
47084 #undef TARGET_TRAMPOLINE_INIT
47085 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47086 #undef TARGET_RETURN_POPS_ARGS
47087 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47088
47089 #undef TARGET_LEGITIMATE_COMBINED_INSN
47090 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47091
47092 #undef TARGET_ASAN_SHADOW_OFFSET
47093 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47094
47095 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47096 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47097
47098 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47099 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47100
47101 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47102 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47103
47104 #undef TARGET_C_MODE_FOR_SUFFIX
47105 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47106
47107 #ifdef HAVE_AS_TLS
47108 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47109 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47110 #endif
47111
47112 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47113 #undef TARGET_INSERT_ATTRIBUTES
47114 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47115 #endif
47116
47117 #undef TARGET_MANGLE_TYPE
47118 #define TARGET_MANGLE_TYPE ix86_mangle_type
47119
47120 #if !TARGET_MACHO
47121 #undef TARGET_STACK_PROTECT_FAIL
47122 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47123 #endif
47124
47125 #undef TARGET_FUNCTION_VALUE
47126 #define TARGET_FUNCTION_VALUE ix86_function_value
47127
47128 #undef TARGET_FUNCTION_VALUE_REGNO_P
47129 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47130
47131 #undef TARGET_PROMOTE_FUNCTION_MODE
47132 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47133
47134 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47135 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47136
47137 #undef TARGET_INSTANTIATE_DECLS
47138 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47139
47140 #undef TARGET_SECONDARY_RELOAD
47141 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47142
47143 #undef TARGET_CLASS_MAX_NREGS
47144 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47145
47146 #undef TARGET_PREFERRED_RELOAD_CLASS
47147 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47148 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47149 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47150 #undef TARGET_CLASS_LIKELY_SPILLED_P
47151 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47152
47153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47155 ix86_builtin_vectorization_cost
47156 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47157 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47158 ix86_vectorize_vec_perm_const_ok
47159 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47160 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47161 ix86_preferred_simd_mode
47162 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47163 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47164 ix86_autovectorize_vector_sizes
47165 #undef TARGET_VECTORIZE_INIT_COST
47166 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47167 #undef TARGET_VECTORIZE_ADD_STMT_COST
47168 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47169 #undef TARGET_VECTORIZE_FINISH_COST
47170 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47171 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47172 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47173
47174 #undef TARGET_SET_CURRENT_FUNCTION
47175 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47176
47177 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47178 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47179
47180 #undef TARGET_OPTION_SAVE
47181 #define TARGET_OPTION_SAVE ix86_function_specific_save
47182
47183 #undef TARGET_OPTION_RESTORE
47184 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47185
47186 #undef TARGET_OPTION_PRINT
47187 #define TARGET_OPTION_PRINT ix86_function_specific_print
47188
47189 #undef TARGET_OPTION_FUNCTION_VERSIONS
47190 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47191
47192 #undef TARGET_CAN_INLINE_P
47193 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47194
47195 #undef TARGET_EXPAND_TO_RTL_HOOK
47196 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47197
47198 #undef TARGET_LEGITIMATE_ADDRESS_P
47199 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47200
47201 #undef TARGET_LRA_P
47202 #define TARGET_LRA_P hook_bool_void_true
47203
47204 #undef TARGET_REGISTER_PRIORITY
47205 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47206
47207 #undef TARGET_REGISTER_USAGE_LEVELING_P
47208 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47209
47210 #undef TARGET_LEGITIMATE_CONSTANT_P
47211 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47212
47213 #undef TARGET_FRAME_POINTER_REQUIRED
47214 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47215
47216 #undef TARGET_CAN_ELIMINATE
47217 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47218
47219 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47220 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47221
47222 #undef TARGET_ASM_CODE_END
47223 #define TARGET_ASM_CODE_END ix86_code_end
47224
47225 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47226 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47227
47228 #if TARGET_MACHO
47229 #undef TARGET_INIT_LIBFUNCS
47230 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47231 #endif
47232
47233 #undef TARGET_LOOP_UNROLL_ADJUST
47234 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47235
47236 #undef TARGET_SPILL_CLASS
47237 #define TARGET_SPILL_CLASS ix86_spill_class
47238
47239 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47240 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47241 ix86_simd_clone_compute_vecsize_and_simdlen
47242
47243 #undef TARGET_SIMD_CLONE_ADJUST
47244 #define TARGET_SIMD_CLONE_ADJUST \
47245 ix86_simd_clone_adjust
47246
47247 #undef TARGET_SIMD_CLONE_USABLE
47248 #define TARGET_SIMD_CLONE_USABLE \
47249 ix86_simd_clone_usable
47250
47251 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47252 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47253 ix86_float_exceptions_rounding_supported_p
47254
47255 #undef TARGET_MODE_EMIT
47256 #define TARGET_MODE_EMIT ix86_emit_mode_set
47257
47258 #undef TARGET_MODE_NEEDED
47259 #define TARGET_MODE_NEEDED ix86_mode_needed
47260
47261 #undef TARGET_MODE_AFTER
47262 #define TARGET_MODE_AFTER ix86_mode_after
47263
47264 #undef TARGET_MODE_ENTRY
47265 #define TARGET_MODE_ENTRY ix86_mode_entry
47266
47267 #undef TARGET_MODE_EXIT
47268 #define TARGET_MODE_EXIT ix86_mode_exit
47269
47270 #undef TARGET_MODE_PRIORITY
47271 #define TARGET_MODE_PRIORITY ix86_mode_priority
47272
47273 struct gcc_target targetm = TARGET_INITIALIZER;
47274 \f
47275 #include "gt-i386.h"