1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 #include "stringpool.h"
29 #include "stor-layout.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
45 #include "diagnostic-core.h"
47 #include "basic-block.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
59 #include "tm-constrs.h"
63 #include "sched-int.h"
67 #include "diagnostic.h"
69 #include "tree-pass.h"
72 #include "pass_manager.h"
74 static rtx
legitimize_dllimport_symbol (rtx
, bool);
75 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
76 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
78 #ifndef CHECK_STACK_LIMIT
79 #define CHECK_STACK_LIMIT (-1)
82 /* Return index of given mode in mult and division cost tables. */
83 #define MODE_INDEX(mode) \
84 ((mode) == QImode ? 0 \
85 : (mode) == HImode ? 1 \
86 : (mode) == SImode ? 2 \
87 : (mode) == DImode ? 3 \
90 /* Processor costs (relative to an add) */
91 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
92 #define COSTS_N_BYTES(N) ((N) * 2)
94 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
96 static stringop_algs ix86_size_memcpy
[2] = {
97 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
98 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
99 static stringop_algs ix86_size_memset
[2] = {
100 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
101 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
104 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
105 COSTS_N_BYTES (2), /* cost of an add instruction */
106 COSTS_N_BYTES (3), /* cost of a lea instruction */
107 COSTS_N_BYTES (2), /* variable shift costs */
108 COSTS_N_BYTES (3), /* constant shift costs */
109 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
110 COSTS_N_BYTES (3), /* HI */
111 COSTS_N_BYTES (3), /* SI */
112 COSTS_N_BYTES (3), /* DI */
113 COSTS_N_BYTES (5)}, /* other */
114 0, /* cost of multiply per each bit set */
115 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
116 COSTS_N_BYTES (3), /* HI */
117 COSTS_N_BYTES (3), /* SI */
118 COSTS_N_BYTES (3), /* DI */
119 COSTS_N_BYTES (5)}, /* other */
120 COSTS_N_BYTES (3), /* cost of movsx */
121 COSTS_N_BYTES (3), /* cost of movzx */
122 0, /* "large" insn */
124 2, /* cost for loading QImode using movzbl */
125 {2, 2, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 2, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 2}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {2, 2, 2}, /* cost of storing fp registers
133 in SFmode, DFmode and XFmode */
134 3, /* cost of moving MMX register */
135 {3, 3}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {3, 3}, /* cost of storing MMX registers
138 in SImode and DImode */
139 3, /* cost of moving SSE register */
140 {3, 3, 3}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {3, 3, 3}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3, /* MMX or SSE register to integer */
145 0, /* size of l1 cache */
146 0, /* size of l2 cache */
147 0, /* size of prefetch block */
148 0, /* number of parallel prefetches */
150 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
151 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
152 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
153 COSTS_N_BYTES (2), /* cost of FABS instruction. */
154 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
155 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
158 1, /* scalar_stmt_cost. */
159 1, /* scalar load_cost. */
160 1, /* scalar_store_cost. */
161 1, /* vec_stmt_cost. */
162 1, /* vec_to_scalar_cost. */
163 1, /* scalar_to_vec_cost. */
164 1, /* vec_align_load_cost. */
165 1, /* vec_unalign_load_cost. */
166 1, /* vec_store_cost. */
167 1, /* cond_taken_branch_cost. */
168 1, /* cond_not_taken_branch_cost. */
171 /* Processor costs (relative to an add) */
172 static stringop_algs i386_memcpy
[2] = {
173 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
174 DUMMY_STRINGOP_ALGS
};
175 static stringop_algs i386_memset
[2] = {
176 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
177 DUMMY_STRINGOP_ALGS
};
180 struct processor_costs i386_cost
= { /* 386 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (6), /* HI */
187 COSTS_N_INSNS (6), /* SI */
188 COSTS_N_INSNS (6), /* DI */
189 COSTS_N_INSNS (6)}, /* other */
190 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (23), /* HI */
193 COSTS_N_INSNS (23), /* SI */
194 COSTS_N_INSNS (23), /* DI */
195 COSTS_N_INSNS (23)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of l1 cache */
222 0, /* size of l2 cache */
223 0, /* size of prefetch block */
224 0, /* number of parallel prefetches */
226 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
227 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
228 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
229 COSTS_N_INSNS (22), /* cost of FABS instruction. */
230 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
231 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
234 1, /* scalar_stmt_cost. */
235 1, /* scalar load_cost. */
236 1, /* scalar_store_cost. */
237 1, /* vec_stmt_cost. */
238 1, /* vec_to_scalar_cost. */
239 1, /* scalar_to_vec_cost. */
240 1, /* vec_align_load_cost. */
241 2, /* vec_unalign_load_cost. */
242 1, /* vec_store_cost. */
243 3, /* cond_taken_branch_cost. */
244 1, /* cond_not_taken_branch_cost. */
247 static stringop_algs i486_memcpy
[2] = {
248 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
249 DUMMY_STRINGOP_ALGS
};
250 static stringop_algs i486_memset
[2] = {
251 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
252 DUMMY_STRINGOP_ALGS
};
255 struct processor_costs i486_cost
= { /* 486 specific costs */
256 COSTS_N_INSNS (1), /* cost of an add instruction */
257 COSTS_N_INSNS (1), /* cost of a lea instruction */
258 COSTS_N_INSNS (3), /* variable shift costs */
259 COSTS_N_INSNS (2), /* constant shift costs */
260 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
261 COSTS_N_INSNS (12), /* HI */
262 COSTS_N_INSNS (12), /* SI */
263 COSTS_N_INSNS (12), /* DI */
264 COSTS_N_INSNS (12)}, /* other */
265 1, /* cost of multiply per each bit set */
266 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
267 COSTS_N_INSNS (40), /* HI */
268 COSTS_N_INSNS (40), /* SI */
269 COSTS_N_INSNS (40), /* DI */
270 COSTS_N_INSNS (40)}, /* other */
271 COSTS_N_INSNS (3), /* cost of movsx */
272 COSTS_N_INSNS (2), /* cost of movzx */
273 15, /* "large" insn */
275 4, /* cost for loading QImode using movzbl */
276 {2, 4, 2}, /* cost of loading integer registers
277 in QImode, HImode and SImode.
278 Relative to reg-reg move (2). */
279 {2, 4, 2}, /* cost of storing integer registers */
280 2, /* cost of reg,reg fld/fst */
281 {8, 8, 8}, /* cost of loading fp registers
282 in SFmode, DFmode and XFmode */
283 {8, 8, 8}, /* cost of storing fp registers
284 in SFmode, DFmode and XFmode */
285 2, /* cost of moving MMX register */
286 {4, 8}, /* cost of loading MMX registers
287 in SImode and DImode */
288 {4, 8}, /* cost of storing MMX registers
289 in SImode and DImode */
290 2, /* cost of moving SSE register */
291 {4, 8, 16}, /* cost of loading SSE registers
292 in SImode, DImode and TImode */
293 {4, 8, 16}, /* cost of storing SSE registers
294 in SImode, DImode and TImode */
295 3, /* MMX or SSE register to integer */
296 4, /* size of l1 cache. 486 has 8kB cache
297 shared for code and data, so 4kB is
298 not really precise. */
299 4, /* size of l2 cache */
300 0, /* size of prefetch block */
301 0, /* number of parallel prefetches */
303 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
304 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
305 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
306 COSTS_N_INSNS (3), /* cost of FABS instruction. */
307 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
308 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
311 1, /* scalar_stmt_cost. */
312 1, /* scalar load_cost. */
313 1, /* scalar_store_cost. */
314 1, /* vec_stmt_cost. */
315 1, /* vec_to_scalar_cost. */
316 1, /* scalar_to_vec_cost. */
317 1, /* vec_align_load_cost. */
318 2, /* vec_unalign_load_cost. */
319 1, /* vec_store_cost. */
320 3, /* cond_taken_branch_cost. */
321 1, /* cond_not_taken_branch_cost. */
324 static stringop_algs pentium_memcpy
[2] = {
325 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
326 DUMMY_STRINGOP_ALGS
};
327 static stringop_algs pentium_memset
[2] = {
328 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
329 DUMMY_STRINGOP_ALGS
};
332 struct processor_costs pentium_cost
= {
333 COSTS_N_INSNS (1), /* cost of an add instruction */
334 COSTS_N_INSNS (1), /* cost of a lea instruction */
335 COSTS_N_INSNS (4), /* variable shift costs */
336 COSTS_N_INSNS (1), /* constant shift costs */
337 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
338 COSTS_N_INSNS (11), /* HI */
339 COSTS_N_INSNS (11), /* SI */
340 COSTS_N_INSNS (11), /* DI */
341 COSTS_N_INSNS (11)}, /* other */
342 0, /* cost of multiply per each bit set */
343 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
344 COSTS_N_INSNS (25), /* HI */
345 COSTS_N_INSNS (25), /* SI */
346 COSTS_N_INSNS (25), /* DI */
347 COSTS_N_INSNS (25)}, /* other */
348 COSTS_N_INSNS (3), /* cost of movsx */
349 COSTS_N_INSNS (2), /* cost of movzx */
350 8, /* "large" insn */
352 6, /* cost for loading QImode using movzbl */
353 {2, 4, 2}, /* cost of loading integer registers
354 in QImode, HImode and SImode.
355 Relative to reg-reg move (2). */
356 {2, 4, 2}, /* cost of storing integer registers */
357 2, /* cost of reg,reg fld/fst */
358 {2, 2, 6}, /* cost of loading fp registers
359 in SFmode, DFmode and XFmode */
360 {4, 4, 6}, /* cost of storing fp registers
361 in SFmode, DFmode and XFmode */
362 8, /* cost of moving MMX register */
363 {8, 8}, /* cost of loading MMX registers
364 in SImode and DImode */
365 {8, 8}, /* cost of storing MMX registers
366 in SImode and DImode */
367 2, /* cost of moving SSE register */
368 {4, 8, 16}, /* cost of loading SSE registers
369 in SImode, DImode and TImode */
370 {4, 8, 16}, /* cost of storing SSE registers
371 in SImode, DImode and TImode */
372 3, /* MMX or SSE register to integer */
373 8, /* size of l1 cache. */
374 8, /* size of l2 cache */
375 0, /* size of prefetch block */
376 0, /* number of parallel prefetches */
378 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
379 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
380 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
381 COSTS_N_INSNS (1), /* cost of FABS instruction. */
382 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
383 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
386 1, /* scalar_stmt_cost. */
387 1, /* scalar load_cost. */
388 1, /* scalar_store_cost. */
389 1, /* vec_stmt_cost. */
390 1, /* vec_to_scalar_cost. */
391 1, /* scalar_to_vec_cost. */
392 1, /* vec_align_load_cost. */
393 2, /* vec_unalign_load_cost. */
394 1, /* vec_store_cost. */
395 3, /* cond_taken_branch_cost. */
396 1, /* cond_not_taken_branch_cost. */
399 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
400 (we ensure the alignment). For small blocks inline loop is still a
401 noticeable win, for bigger blocks either rep movsl or rep movsb is
402 way to go. Rep movsb has apparently more expensive startup time in CPU,
403 but after 4K the difference is down in the noise. */
404 static stringop_algs pentiumpro_memcpy
[2] = {
405 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
406 {8192, rep_prefix_4_byte
, false},
407 {-1, rep_prefix_1_byte
, false}}},
408 DUMMY_STRINGOP_ALGS
};
409 static stringop_algs pentiumpro_memset
[2] = {
410 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
411 {8192, rep_prefix_4_byte
, false},
412 {-1, libcall
, false}}},
413 DUMMY_STRINGOP_ALGS
};
415 struct processor_costs pentiumpro_cost
= {
416 COSTS_N_INSNS (1), /* cost of an add instruction */
417 COSTS_N_INSNS (1), /* cost of a lea instruction */
418 COSTS_N_INSNS (1), /* variable shift costs */
419 COSTS_N_INSNS (1), /* constant shift costs */
420 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
421 COSTS_N_INSNS (4), /* HI */
422 COSTS_N_INSNS (4), /* SI */
423 COSTS_N_INSNS (4), /* DI */
424 COSTS_N_INSNS (4)}, /* other */
425 0, /* cost of multiply per each bit set */
426 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
427 COSTS_N_INSNS (17), /* HI */
428 COSTS_N_INSNS (17), /* SI */
429 COSTS_N_INSNS (17), /* DI */
430 COSTS_N_INSNS (17)}, /* other */
431 COSTS_N_INSNS (1), /* cost of movsx */
432 COSTS_N_INSNS (1), /* cost of movzx */
433 8, /* "large" insn */
435 2, /* cost for loading QImode using movzbl */
436 {4, 4, 4}, /* cost of loading integer registers
437 in QImode, HImode and SImode.
438 Relative to reg-reg move (2). */
439 {2, 2, 2}, /* cost of storing integer registers */
440 2, /* cost of reg,reg fld/fst */
441 {2, 2, 6}, /* cost of loading fp registers
442 in SFmode, DFmode and XFmode */
443 {4, 4, 6}, /* cost of storing fp registers
444 in SFmode, DFmode and XFmode */
445 2, /* cost of moving MMX register */
446 {2, 2}, /* cost of loading MMX registers
447 in SImode and DImode */
448 {2, 2}, /* cost of storing MMX registers
449 in SImode and DImode */
450 2, /* cost of moving SSE register */
451 {2, 2, 8}, /* cost of loading SSE registers
452 in SImode, DImode and TImode */
453 {2, 2, 8}, /* cost of storing SSE registers
454 in SImode, DImode and TImode */
455 3, /* MMX or SSE register to integer */
456 8, /* size of l1 cache. */
457 256, /* size of l2 cache */
458 32, /* size of prefetch block */
459 6, /* number of parallel prefetches */
461 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
462 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
463 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
464 COSTS_N_INSNS (2), /* cost of FABS instruction. */
465 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
466 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
469 1, /* scalar_stmt_cost. */
470 1, /* scalar load_cost. */
471 1, /* scalar_store_cost. */
472 1, /* vec_stmt_cost. */
473 1, /* vec_to_scalar_cost. */
474 1, /* scalar_to_vec_cost. */
475 1, /* vec_align_load_cost. */
476 2, /* vec_unalign_load_cost. */
477 1, /* vec_store_cost. */
478 3, /* cond_taken_branch_cost. */
479 1, /* cond_not_taken_branch_cost. */
482 static stringop_algs geode_memcpy
[2] = {
483 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
484 DUMMY_STRINGOP_ALGS
};
485 static stringop_algs geode_memset
[2] = {
486 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
487 DUMMY_STRINGOP_ALGS
};
489 struct processor_costs geode_cost
= {
490 COSTS_N_INSNS (1), /* cost of an add instruction */
491 COSTS_N_INSNS (1), /* cost of a lea instruction */
492 COSTS_N_INSNS (2), /* variable shift costs */
493 COSTS_N_INSNS (1), /* constant shift costs */
494 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
495 COSTS_N_INSNS (4), /* HI */
496 COSTS_N_INSNS (7), /* SI */
497 COSTS_N_INSNS (7), /* DI */
498 COSTS_N_INSNS (7)}, /* other */
499 0, /* cost of multiply per each bit set */
500 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
501 COSTS_N_INSNS (23), /* HI */
502 COSTS_N_INSNS (39), /* SI */
503 COSTS_N_INSNS (39), /* DI */
504 COSTS_N_INSNS (39)}, /* other */
505 COSTS_N_INSNS (1), /* cost of movsx */
506 COSTS_N_INSNS (1), /* cost of movzx */
507 8, /* "large" insn */
509 1, /* cost for loading QImode using movzbl */
510 {1, 1, 1}, /* cost of loading integer registers
511 in QImode, HImode and SImode.
512 Relative to reg-reg move (2). */
513 {1, 1, 1}, /* cost of storing integer registers */
514 1, /* cost of reg,reg fld/fst */
515 {1, 1, 1}, /* cost of loading fp registers
516 in SFmode, DFmode and XFmode */
517 {4, 6, 6}, /* cost of storing fp registers
518 in SFmode, DFmode and XFmode */
520 1, /* cost of moving MMX register */
521 {1, 1}, /* cost of loading MMX registers
522 in SImode and DImode */
523 {1, 1}, /* cost of storing MMX registers
524 in SImode and DImode */
525 1, /* cost of moving SSE register */
526 {1, 1, 1}, /* cost of loading SSE registers
527 in SImode, DImode and TImode */
528 {1, 1, 1}, /* cost of storing SSE registers
529 in SImode, DImode and TImode */
530 1, /* MMX or SSE register to integer */
531 64, /* size of l1 cache. */
532 128, /* size of l2 cache. */
533 32, /* size of prefetch block */
534 1, /* number of parallel prefetches */
536 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
537 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
538 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
539 COSTS_N_INSNS (1), /* cost of FABS instruction. */
540 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
541 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
544 1, /* scalar_stmt_cost. */
545 1, /* scalar load_cost. */
546 1, /* scalar_store_cost. */
547 1, /* vec_stmt_cost. */
548 1, /* vec_to_scalar_cost. */
549 1, /* scalar_to_vec_cost. */
550 1, /* vec_align_load_cost. */
551 2, /* vec_unalign_load_cost. */
552 1, /* vec_store_cost. */
553 3, /* cond_taken_branch_cost. */
554 1, /* cond_not_taken_branch_cost. */
557 static stringop_algs k6_memcpy
[2] = {
558 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
559 DUMMY_STRINGOP_ALGS
};
560 static stringop_algs k6_memset
[2] = {
561 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
562 DUMMY_STRINGOP_ALGS
};
564 struct processor_costs k6_cost
= {
565 COSTS_N_INSNS (1), /* cost of an add instruction */
566 COSTS_N_INSNS (2), /* cost of a lea instruction */
567 COSTS_N_INSNS (1), /* variable shift costs */
568 COSTS_N_INSNS (1), /* constant shift costs */
569 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
570 COSTS_N_INSNS (3), /* HI */
571 COSTS_N_INSNS (3), /* SI */
572 COSTS_N_INSNS (3), /* DI */
573 COSTS_N_INSNS (3)}, /* other */
574 0, /* cost of multiply per each bit set */
575 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
576 COSTS_N_INSNS (18), /* HI */
577 COSTS_N_INSNS (18), /* SI */
578 COSTS_N_INSNS (18), /* DI */
579 COSTS_N_INSNS (18)}, /* other */
580 COSTS_N_INSNS (2), /* cost of movsx */
581 COSTS_N_INSNS (2), /* cost of movzx */
582 8, /* "large" insn */
584 3, /* cost for loading QImode using movzbl */
585 {4, 5, 4}, /* cost of loading integer registers
586 in QImode, HImode and SImode.
587 Relative to reg-reg move (2). */
588 {2, 3, 2}, /* cost of storing integer registers */
589 4, /* cost of reg,reg fld/fst */
590 {6, 6, 6}, /* cost of loading fp registers
591 in SFmode, DFmode and XFmode */
592 {4, 4, 4}, /* cost of storing fp registers
593 in SFmode, DFmode and XFmode */
594 2, /* cost of moving MMX register */
595 {2, 2}, /* cost of loading MMX registers
596 in SImode and DImode */
597 {2, 2}, /* cost of storing MMX registers
598 in SImode and DImode */
599 2, /* cost of moving SSE register */
600 {2, 2, 8}, /* cost of loading SSE registers
601 in SImode, DImode and TImode */
602 {2, 2, 8}, /* cost of storing SSE registers
603 in SImode, DImode and TImode */
604 6, /* MMX or SSE register to integer */
605 32, /* size of l1 cache. */
606 32, /* size of l2 cache. Some models
607 have integrated l2 cache, but
608 optimizing for k6 is not important
609 enough to worry about that. */
610 32, /* size of prefetch block */
611 1, /* number of parallel prefetches */
613 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
614 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
615 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
616 COSTS_N_INSNS (2), /* cost of FABS instruction. */
617 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
618 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
621 1, /* scalar_stmt_cost. */
622 1, /* scalar load_cost. */
623 1, /* scalar_store_cost. */
624 1, /* vec_stmt_cost. */
625 1, /* vec_to_scalar_cost. */
626 1, /* scalar_to_vec_cost. */
627 1, /* vec_align_load_cost. */
628 2, /* vec_unalign_load_cost. */
629 1, /* vec_store_cost. */
630 3, /* cond_taken_branch_cost. */
631 1, /* cond_not_taken_branch_cost. */
634 /* For some reason, Athlon deals better with REP prefix (relative to loops)
635 compared to K8. Alignment becomes important after 8 bytes for memcpy and
636 128 bytes for memset. */
637 static stringop_algs athlon_memcpy
[2] = {
638 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
639 DUMMY_STRINGOP_ALGS
};
640 static stringop_algs athlon_memset
[2] = {
641 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
642 DUMMY_STRINGOP_ALGS
};
644 struct processor_costs athlon_cost
= {
645 COSTS_N_INSNS (1), /* cost of an add instruction */
646 COSTS_N_INSNS (2), /* cost of a lea instruction */
647 COSTS_N_INSNS (1), /* variable shift costs */
648 COSTS_N_INSNS (1), /* constant shift costs */
649 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
650 COSTS_N_INSNS (5), /* HI */
651 COSTS_N_INSNS (5), /* SI */
652 COSTS_N_INSNS (5), /* DI */
653 COSTS_N_INSNS (5)}, /* other */
654 0, /* cost of multiply per each bit set */
655 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
656 COSTS_N_INSNS (26), /* HI */
657 COSTS_N_INSNS (42), /* SI */
658 COSTS_N_INSNS (74), /* DI */
659 COSTS_N_INSNS (74)}, /* other */
660 COSTS_N_INSNS (1), /* cost of movsx */
661 COSTS_N_INSNS (1), /* cost of movzx */
662 8, /* "large" insn */
664 4, /* cost for loading QImode using movzbl */
665 {3, 4, 3}, /* cost of loading integer registers
666 in QImode, HImode and SImode.
667 Relative to reg-reg move (2). */
668 {3, 4, 3}, /* cost of storing integer registers */
669 4, /* cost of reg,reg fld/fst */
670 {4, 4, 12}, /* cost of loading fp registers
671 in SFmode, DFmode and XFmode */
672 {6, 6, 8}, /* cost of storing fp registers
673 in SFmode, DFmode and XFmode */
674 2, /* cost of moving MMX register */
675 {4, 4}, /* cost of loading MMX registers
676 in SImode and DImode */
677 {4, 4}, /* cost of storing MMX registers
678 in SImode and DImode */
679 2, /* cost of moving SSE register */
680 {4, 4, 6}, /* cost of loading SSE registers
681 in SImode, DImode and TImode */
682 {4, 4, 5}, /* cost of storing SSE registers
683 in SImode, DImode and TImode */
684 5, /* MMX or SSE register to integer */
685 64, /* size of l1 cache. */
686 256, /* size of l2 cache. */
687 64, /* size of prefetch block */
688 6, /* number of parallel prefetches */
690 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
691 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
692 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
693 COSTS_N_INSNS (2), /* cost of FABS instruction. */
694 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
695 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
698 1, /* scalar_stmt_cost. */
699 1, /* scalar load_cost. */
700 1, /* scalar_store_cost. */
701 1, /* vec_stmt_cost. */
702 1, /* vec_to_scalar_cost. */
703 1, /* scalar_to_vec_cost. */
704 1, /* vec_align_load_cost. */
705 2, /* vec_unalign_load_cost. */
706 1, /* vec_store_cost. */
707 3, /* cond_taken_branch_cost. */
708 1, /* cond_not_taken_branch_cost. */
711 /* K8 has optimized REP instruction for medium sized blocks, but for very
712 small blocks it is better to use loop. For large blocks, libcall can
713 do nontemporary accesses and beat inline considerably. */
714 static stringop_algs k8_memcpy
[2] = {
715 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
716 {-1, rep_prefix_4_byte
, false}}},
717 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
718 {-1, libcall
, false}}}};
719 static stringop_algs k8_memset
[2] = {
720 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
721 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
722 {libcall
, {{48, unrolled_loop
, false},
723 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
725 struct processor_costs k8_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (2), /* cost of a lea instruction */
728 COSTS_N_INSNS (1), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (4), /* HI */
732 COSTS_N_INSNS (3), /* SI */
733 COSTS_N_INSNS (4), /* DI */
734 COSTS_N_INSNS (5)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (26), /* HI */
738 COSTS_N_INSNS (42), /* SI */
739 COSTS_N_INSNS (74), /* DI */
740 COSTS_N_INSNS (74)}, /* other */
741 COSTS_N_INSNS (1), /* cost of movsx */
742 COSTS_N_INSNS (1), /* cost of movzx */
743 8, /* "large" insn */
745 4, /* cost for loading QImode using movzbl */
746 {3, 4, 3}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {3, 4, 3}, /* cost of storing integer registers */
750 4, /* cost of reg,reg fld/fst */
751 {4, 4, 12}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {6, 6, 8}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 2, /* cost of moving MMX register */
756 {3, 3}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {4, 4}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 3, 6}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 4, 5}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 5, /* MMX or SSE register to integer */
766 64, /* size of l1 cache. */
767 512, /* size of l2 cache. */
768 64, /* size of prefetch block */
769 /* New AMD processors never drop prefetches; if they cannot be performed
770 immediately, they are queued. We set number of simultaneous prefetches
771 to a large constant to reflect this (it probably is not a good idea not
772 to limit number of prefetches at all, as their execution also takes some
774 100, /* number of parallel prefetches */
776 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
777 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
778 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
779 COSTS_N_INSNS (2), /* cost of FABS instruction. */
780 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
781 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
785 4, /* scalar_stmt_cost. */
786 2, /* scalar load_cost. */
787 2, /* scalar_store_cost. */
788 5, /* vec_stmt_cost. */
789 0, /* vec_to_scalar_cost. */
790 2, /* scalar_to_vec_cost. */
791 2, /* vec_align_load_cost. */
792 3, /* vec_unalign_load_cost. */
793 3, /* vec_store_cost. */
794 3, /* cond_taken_branch_cost. */
795 2, /* cond_not_taken_branch_cost. */
798 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
799 very small blocks it is better to use loop. For large blocks, libcall can
800 do nontemporary accesses and beat inline considerably. */
801 static stringop_algs amdfam10_memcpy
[2] = {
802 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
803 {-1, rep_prefix_4_byte
, false}}},
804 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
805 {-1, libcall
, false}}}};
806 static stringop_algs amdfam10_memset
[2] = {
807 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
808 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
809 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
810 {-1, libcall
, false}}}};
811 struct processor_costs amdfam10_cost
= {
812 COSTS_N_INSNS (1), /* cost of an add instruction */
813 COSTS_N_INSNS (2), /* cost of a lea instruction */
814 COSTS_N_INSNS (1), /* variable shift costs */
815 COSTS_N_INSNS (1), /* constant shift costs */
816 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
817 COSTS_N_INSNS (4), /* HI */
818 COSTS_N_INSNS (3), /* SI */
819 COSTS_N_INSNS (4), /* DI */
820 COSTS_N_INSNS (5)}, /* other */
821 0, /* cost of multiply per each bit set */
822 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
823 COSTS_N_INSNS (35), /* HI */
824 COSTS_N_INSNS (51), /* SI */
825 COSTS_N_INSNS (83), /* DI */
826 COSTS_N_INSNS (83)}, /* other */
827 COSTS_N_INSNS (1), /* cost of movsx */
828 COSTS_N_INSNS (1), /* cost of movzx */
829 8, /* "large" insn */
831 4, /* cost for loading QImode using movzbl */
832 {3, 4, 3}, /* cost of loading integer registers
833 in QImode, HImode and SImode.
834 Relative to reg-reg move (2). */
835 {3, 4, 3}, /* cost of storing integer registers */
836 4, /* cost of reg,reg fld/fst */
837 {4, 4, 12}, /* cost of loading fp registers
838 in SFmode, DFmode and XFmode */
839 {6, 6, 8}, /* cost of storing fp registers
840 in SFmode, DFmode and XFmode */
841 2, /* cost of moving MMX register */
842 {3, 3}, /* cost of loading MMX registers
843 in SImode and DImode */
844 {4, 4}, /* cost of storing MMX registers
845 in SImode and DImode */
846 2, /* cost of moving SSE register */
847 {4, 4, 3}, /* cost of loading SSE registers
848 in SImode, DImode and TImode */
849 {4, 4, 5}, /* cost of storing SSE registers
850 in SImode, DImode and TImode */
851 3, /* MMX or SSE register to integer */
853 MOVD reg64, xmmreg Double FSTORE 4
854 MOVD reg32, xmmreg Double FSTORE 4
856 MOVD reg64, xmmreg Double FADD 3
858 MOVD reg32, xmmreg Double FADD 3
860 64, /* size of l1 cache. */
861 512, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 /* New AMD processors never drop prefetches; if they cannot be performed
864 immediately, they are queued. We set number of simultaneous prefetches
865 to a large constant to reflect this (it probably is not a good idea not
866 to limit number of prefetches at all, as their execution also takes some
868 100, /* number of parallel prefetches */
870 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
871 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
872 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
873 COSTS_N_INSNS (2), /* cost of FABS instruction. */
874 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
875 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
879 4, /* scalar_stmt_cost. */
880 2, /* scalar load_cost. */
881 2, /* scalar_store_cost. */
882 6, /* vec_stmt_cost. */
883 0, /* vec_to_scalar_cost. */
884 2, /* scalar_to_vec_cost. */
885 2, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 2, /* vec_store_cost. */
888 2, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
892 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
893 very small blocks it is better to use loop. For large blocks, libcall
894 can do nontemporary accesses and beat inline considerably. */
895 static stringop_algs bdver1_memcpy
[2] = {
896 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
897 {-1, rep_prefix_4_byte
, false}}},
898 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
899 {-1, libcall
, false}}}};
900 static stringop_algs bdver1_memset
[2] = {
901 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
902 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
903 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
904 {-1, libcall
, false}}}};
906 const struct processor_costs bdver1_cost
= {
907 COSTS_N_INSNS (1), /* cost of an add instruction */
908 COSTS_N_INSNS (1), /* cost of a lea instruction */
909 COSTS_N_INSNS (1), /* variable shift costs */
910 COSTS_N_INSNS (1), /* constant shift costs */
911 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
912 COSTS_N_INSNS (4), /* HI */
913 COSTS_N_INSNS (4), /* SI */
914 COSTS_N_INSNS (6), /* DI */
915 COSTS_N_INSNS (6)}, /* other */
916 0, /* cost of multiply per each bit set */
917 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
918 COSTS_N_INSNS (35), /* HI */
919 COSTS_N_INSNS (51), /* SI */
920 COSTS_N_INSNS (83), /* DI */
921 COSTS_N_INSNS (83)}, /* other */
922 COSTS_N_INSNS (1), /* cost of movsx */
923 COSTS_N_INSNS (1), /* cost of movzx */
924 8, /* "large" insn */
926 4, /* cost for loading QImode using movzbl */
927 {5, 5, 4}, /* cost of loading integer registers
928 in QImode, HImode and SImode.
929 Relative to reg-reg move (2). */
930 {4, 4, 4}, /* cost of storing integer registers */
931 2, /* cost of reg,reg fld/fst */
932 {5, 5, 12}, /* cost of loading fp registers
933 in SFmode, DFmode and XFmode */
934 {4, 4, 8}, /* cost of storing fp registers
935 in SFmode, DFmode and XFmode */
936 2, /* cost of moving MMX register */
937 {4, 4}, /* cost of loading MMX registers
938 in SImode and DImode */
939 {4, 4}, /* cost of storing MMX registers
940 in SImode and DImode */
941 2, /* cost of moving SSE register */
942 {4, 4, 4}, /* cost of loading SSE registers
943 in SImode, DImode and TImode */
944 {4, 4, 4}, /* cost of storing SSE registers
945 in SImode, DImode and TImode */
946 2, /* MMX or SSE register to integer */
948 MOVD reg64, xmmreg Double FSTORE 4
949 MOVD reg32, xmmreg Double FSTORE 4
951 MOVD reg64, xmmreg Double FADD 3
953 MOVD reg32, xmmreg Double FADD 3
955 16, /* size of l1 cache. */
956 2048, /* size of l2 cache. */
957 64, /* size of prefetch block */
958 /* New AMD processors never drop prefetches; if they cannot be performed
959 immediately, they are queued. We set number of simultaneous prefetches
960 to a large constant to reflect this (it probably is not a good idea not
961 to limit number of prefetches at all, as their execution also takes some
963 100, /* number of parallel prefetches */
965 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
966 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
967 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
968 COSTS_N_INSNS (2), /* cost of FABS instruction. */
969 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
970 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
974 6, /* scalar_stmt_cost. */
975 4, /* scalar load_cost. */
976 4, /* scalar_store_cost. */
977 6, /* vec_stmt_cost. */
978 0, /* vec_to_scalar_cost. */
979 2, /* scalar_to_vec_cost. */
980 4, /* vec_align_load_cost. */
981 4, /* vec_unalign_load_cost. */
982 4, /* vec_store_cost. */
983 2, /* cond_taken_branch_cost. */
984 1, /* cond_not_taken_branch_cost. */
987 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
988 very small blocks it is better to use loop. For large blocks, libcall
989 can do nontemporary accesses and beat inline considerably. */
991 static stringop_algs bdver2_memcpy
[2] = {
992 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
993 {-1, rep_prefix_4_byte
, false}}},
994 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
995 {-1, libcall
, false}}}};
996 static stringop_algs bdver2_memset
[2] = {
997 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
998 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
999 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1000 {-1, libcall
, false}}}};
1002 const struct processor_costs bdver2_cost
= {
1003 COSTS_N_INSNS (1), /* cost of an add instruction */
1004 COSTS_N_INSNS (1), /* cost of a lea instruction */
1005 COSTS_N_INSNS (1), /* variable shift costs */
1006 COSTS_N_INSNS (1), /* constant shift costs */
1007 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1008 COSTS_N_INSNS (4), /* HI */
1009 COSTS_N_INSNS (4), /* SI */
1010 COSTS_N_INSNS (6), /* DI */
1011 COSTS_N_INSNS (6)}, /* other */
1012 0, /* cost of multiply per each bit set */
1013 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1014 COSTS_N_INSNS (35), /* HI */
1015 COSTS_N_INSNS (51), /* SI */
1016 COSTS_N_INSNS (83), /* DI */
1017 COSTS_N_INSNS (83)}, /* other */
1018 COSTS_N_INSNS (1), /* cost of movsx */
1019 COSTS_N_INSNS (1), /* cost of movzx */
1020 8, /* "large" insn */
1022 4, /* cost for loading QImode using movzbl */
1023 {5, 5, 4}, /* cost of loading integer registers
1024 in QImode, HImode and SImode.
1025 Relative to reg-reg move (2). */
1026 {4, 4, 4}, /* cost of storing integer registers */
1027 2, /* cost of reg,reg fld/fst */
1028 {5, 5, 12}, /* cost of loading fp registers
1029 in SFmode, DFmode and XFmode */
1030 {4, 4, 8}, /* cost of storing fp registers
1031 in SFmode, DFmode and XFmode */
1032 2, /* cost of moving MMX register */
1033 {4, 4}, /* cost of loading MMX registers
1034 in SImode and DImode */
1035 {4, 4}, /* cost of storing MMX registers
1036 in SImode and DImode */
1037 2, /* cost of moving SSE register */
1038 {4, 4, 4}, /* cost of loading SSE registers
1039 in SImode, DImode and TImode */
1040 {4, 4, 4}, /* cost of storing SSE registers
1041 in SImode, DImode and TImode */
1042 2, /* MMX or SSE register to integer */
1044 MOVD reg64, xmmreg Double FSTORE 4
1045 MOVD reg32, xmmreg Double FSTORE 4
1047 MOVD reg64, xmmreg Double FADD 3
1049 MOVD reg32, xmmreg Double FADD 3
1051 16, /* size of l1 cache. */
1052 2048, /* size of l2 cache. */
1053 64, /* size of prefetch block */
1054 /* New AMD processors never drop prefetches; if they cannot be performed
1055 immediately, they are queued. We set number of simultaneous prefetches
1056 to a large constant to reflect this (it probably is not a good idea not
1057 to limit number of prefetches at all, as their execution also takes some
1059 100, /* number of parallel prefetches */
1060 2, /* Branch cost */
1061 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1062 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1063 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1064 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1065 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1066 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1070 6, /* scalar_stmt_cost. */
1071 4, /* scalar load_cost. */
1072 4, /* scalar_store_cost. */
1073 6, /* vec_stmt_cost. */
1074 0, /* vec_to_scalar_cost. */
1075 2, /* scalar_to_vec_cost. */
1076 4, /* vec_align_load_cost. */
1077 4, /* vec_unalign_load_cost. */
1078 4, /* vec_store_cost. */
1079 2, /* cond_taken_branch_cost. */
1080 1, /* cond_not_taken_branch_cost. */
1084 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1085 very small blocks it is better to use loop. For large blocks, libcall
1086 can do nontemporary accesses and beat inline considerably. */
1087 static stringop_algs bdver3_memcpy
[2] = {
1088 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1089 {-1, rep_prefix_4_byte
, false}}},
1090 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1091 {-1, libcall
, false}}}};
1092 static stringop_algs bdver3_memset
[2] = {
1093 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1094 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1095 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1096 {-1, libcall
, false}}}};
1097 struct processor_costs bdver3_cost
= {
1098 COSTS_N_INSNS (1), /* cost of an add instruction */
1099 COSTS_N_INSNS (1), /* cost of a lea instruction */
1100 COSTS_N_INSNS (1), /* variable shift costs */
1101 COSTS_N_INSNS (1), /* constant shift costs */
1102 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1103 COSTS_N_INSNS (4), /* HI */
1104 COSTS_N_INSNS (4), /* SI */
1105 COSTS_N_INSNS (6), /* DI */
1106 COSTS_N_INSNS (6)}, /* other */
1107 0, /* cost of multiply per each bit set */
1108 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1109 COSTS_N_INSNS (35), /* HI */
1110 COSTS_N_INSNS (51), /* SI */
1111 COSTS_N_INSNS (83), /* DI */
1112 COSTS_N_INSNS (83)}, /* other */
1113 COSTS_N_INSNS (1), /* cost of movsx */
1114 COSTS_N_INSNS (1), /* cost of movzx */
1115 8, /* "large" insn */
1117 4, /* cost for loading QImode using movzbl */
1118 {5, 5, 4}, /* cost of loading integer registers
1119 in QImode, HImode and SImode.
1120 Relative to reg-reg move (2). */
1121 {4, 4, 4}, /* cost of storing integer registers */
1122 2, /* cost of reg,reg fld/fst */
1123 {5, 5, 12}, /* cost of loading fp registers
1124 in SFmode, DFmode and XFmode */
1125 {4, 4, 8}, /* cost of storing fp registers
1126 in SFmode, DFmode and XFmode */
1127 2, /* cost of moving MMX register */
1128 {4, 4}, /* cost of loading MMX registers
1129 in SImode and DImode */
1130 {4, 4}, /* cost of storing MMX registers
1131 in SImode and DImode */
1132 2, /* cost of moving SSE register */
1133 {4, 4, 4}, /* cost of loading SSE registers
1134 in SImode, DImode and TImode */
1135 {4, 4, 4}, /* cost of storing SSE registers
1136 in SImode, DImode and TImode */
1137 2, /* MMX or SSE register to integer */
1138 16, /* size of l1 cache. */
1139 2048, /* size of l2 cache. */
1140 64, /* size of prefetch block */
1141 /* New AMD processors never drop prefetches; if they cannot be performed
1142 immediately, they are queued. We set number of simultaneous prefetches
1143 to a large constant to reflect this (it probably is not a good idea not
1144 to limit number of prefetches at all, as their execution also takes some
1146 100, /* number of parallel prefetches */
1147 2, /* Branch cost */
1148 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1149 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1150 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1151 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1152 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1153 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1157 6, /* scalar_stmt_cost. */
1158 4, /* scalar load_cost. */
1159 4, /* scalar_store_cost. */
1160 6, /* vec_stmt_cost. */
1161 0, /* vec_to_scalar_cost. */
1162 2, /* scalar_to_vec_cost. */
1163 4, /* vec_align_load_cost. */
1164 4, /* vec_unalign_load_cost. */
1165 4, /* vec_store_cost. */
1166 2, /* cond_taken_branch_cost. */
1167 1, /* cond_not_taken_branch_cost. */
1170 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1171 very small blocks it is better to use loop. For large blocks, libcall
1172 can do nontemporary accesses and beat inline considerably. */
1173 static stringop_algs bdver4_memcpy
[2] = {
1174 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1175 {-1, rep_prefix_4_byte
, false}}},
1176 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1177 {-1, libcall
, false}}}};
1178 static stringop_algs bdver4_memset
[2] = {
1179 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1180 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1181 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1182 {-1, libcall
, false}}}};
1183 struct processor_costs bdver4_cost
= {
1184 COSTS_N_INSNS (1), /* cost of an add instruction */
1185 COSTS_N_INSNS (1), /* cost of a lea instruction */
1186 COSTS_N_INSNS (1), /* variable shift costs */
1187 COSTS_N_INSNS (1), /* constant shift costs */
1188 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1189 COSTS_N_INSNS (4), /* HI */
1190 COSTS_N_INSNS (4), /* SI */
1191 COSTS_N_INSNS (6), /* DI */
1192 COSTS_N_INSNS (6)}, /* other */
1193 0, /* cost of multiply per each bit set */
1194 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1195 COSTS_N_INSNS (35), /* HI */
1196 COSTS_N_INSNS (51), /* SI */
1197 COSTS_N_INSNS (83), /* DI */
1198 COSTS_N_INSNS (83)}, /* other */
1199 COSTS_N_INSNS (1), /* cost of movsx */
1200 COSTS_N_INSNS (1), /* cost of movzx */
1201 8, /* "large" insn */
1203 4, /* cost for loading QImode using movzbl */
1204 {5, 5, 4}, /* cost of loading integer registers
1205 in QImode, HImode and SImode.
1206 Relative to reg-reg move (2). */
1207 {4, 4, 4}, /* cost of storing integer registers */
1208 2, /* cost of reg,reg fld/fst */
1209 {5, 5, 12}, /* cost of loading fp registers
1210 in SFmode, DFmode and XFmode */
1211 {4, 4, 8}, /* cost of storing fp registers
1212 in SFmode, DFmode and XFmode */
1213 2, /* cost of moving MMX register */
1214 {4, 4}, /* cost of loading MMX registers
1215 in SImode and DImode */
1216 {4, 4}, /* cost of storing MMX registers
1217 in SImode and DImode */
1218 2, /* cost of moving SSE register */
1219 {4, 4, 4}, /* cost of loading SSE registers
1220 in SImode, DImode and TImode */
1221 {4, 4, 4}, /* cost of storing SSE registers
1222 in SImode, DImode and TImode */
1223 2, /* MMX or SSE register to integer */
1224 16, /* size of l1 cache. */
1225 2048, /* size of l2 cache. */
1226 64, /* size of prefetch block */
1227 /* New AMD processors never drop prefetches; if they cannot be performed
1228 immediately, they are queued. We set number of simultaneous prefetches
1229 to a large constant to reflect this (it probably is not a good idea not
1230 to limit number of prefetches at all, as their execution also takes some
1232 100, /* number of parallel prefetches */
1233 2, /* Branch cost */
1234 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1235 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1236 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1237 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1238 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1239 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1243 6, /* scalar_stmt_cost. */
1244 4, /* scalar load_cost. */
1245 4, /* scalar_store_cost. */
1246 6, /* vec_stmt_cost. */
1247 0, /* vec_to_scalar_cost. */
1248 2, /* scalar_to_vec_cost. */
1249 4, /* vec_align_load_cost. */
1250 4, /* vec_unalign_load_cost. */
1251 4, /* vec_store_cost. */
1252 2, /* cond_taken_branch_cost. */
1253 1, /* cond_not_taken_branch_cost. */
1256 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1257 very small blocks it is better to use loop. For large blocks, libcall can
1258 do nontemporary accesses and beat inline considerably. */
1259 static stringop_algs btver1_memcpy
[2] = {
1260 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1261 {-1, rep_prefix_4_byte
, false}}},
1262 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1263 {-1, libcall
, false}}}};
1264 static stringop_algs btver1_memset
[2] = {
1265 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1266 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1267 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1268 {-1, libcall
, false}}}};
1269 const struct processor_costs btver1_cost
= {
1270 COSTS_N_INSNS (1), /* cost of an add instruction */
1271 COSTS_N_INSNS (2), /* cost of a lea instruction */
1272 COSTS_N_INSNS (1), /* variable shift costs */
1273 COSTS_N_INSNS (1), /* constant shift costs */
1274 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1275 COSTS_N_INSNS (4), /* HI */
1276 COSTS_N_INSNS (3), /* SI */
1277 COSTS_N_INSNS (4), /* DI */
1278 COSTS_N_INSNS (5)}, /* other */
1279 0, /* cost of multiply per each bit set */
1280 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1281 COSTS_N_INSNS (35), /* HI */
1282 COSTS_N_INSNS (51), /* SI */
1283 COSTS_N_INSNS (83), /* DI */
1284 COSTS_N_INSNS (83)}, /* other */
1285 COSTS_N_INSNS (1), /* cost of movsx */
1286 COSTS_N_INSNS (1), /* cost of movzx */
1287 8, /* "large" insn */
1289 4, /* cost for loading QImode using movzbl */
1290 {3, 4, 3}, /* cost of loading integer registers
1291 in QImode, HImode and SImode.
1292 Relative to reg-reg move (2). */
1293 {3, 4, 3}, /* cost of storing integer registers */
1294 4, /* cost of reg,reg fld/fst */
1295 {4, 4, 12}, /* cost of loading fp registers
1296 in SFmode, DFmode and XFmode */
1297 {6, 6, 8}, /* cost of storing fp registers
1298 in SFmode, DFmode and XFmode */
1299 2, /* cost of moving MMX register */
1300 {3, 3}, /* cost of loading MMX registers
1301 in SImode and DImode */
1302 {4, 4}, /* cost of storing MMX registers
1303 in SImode and DImode */
1304 2, /* cost of moving SSE register */
1305 {4, 4, 3}, /* cost of loading SSE registers
1306 in SImode, DImode and TImode */
1307 {4, 4, 5}, /* cost of storing SSE registers
1308 in SImode, DImode and TImode */
1309 3, /* MMX or SSE register to integer */
1311 MOVD reg64, xmmreg Double FSTORE 4
1312 MOVD reg32, xmmreg Double FSTORE 4
1314 MOVD reg64, xmmreg Double FADD 3
1316 MOVD reg32, xmmreg Double FADD 3
1318 32, /* size of l1 cache. */
1319 512, /* size of l2 cache. */
1320 64, /* size of prefetch block */
1321 100, /* number of parallel prefetches */
1322 2, /* Branch cost */
1323 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1324 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1325 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1326 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1327 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1328 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1332 4, /* scalar_stmt_cost. */
1333 2, /* scalar load_cost. */
1334 2, /* scalar_store_cost. */
1335 6, /* vec_stmt_cost. */
1336 0, /* vec_to_scalar_cost. */
1337 2, /* scalar_to_vec_cost. */
1338 2, /* vec_align_load_cost. */
1339 2, /* vec_unalign_load_cost. */
1340 2, /* vec_store_cost. */
1341 2, /* cond_taken_branch_cost. */
1342 1, /* cond_not_taken_branch_cost. */
1345 static stringop_algs btver2_memcpy
[2] = {
1346 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1347 {-1, rep_prefix_4_byte
, false}}},
1348 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1349 {-1, libcall
, false}}}};
1350 static stringop_algs btver2_memset
[2] = {
1351 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1352 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1353 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1354 {-1, libcall
, false}}}};
1355 const struct processor_costs btver2_cost
= {
1356 COSTS_N_INSNS (1), /* cost of an add instruction */
1357 COSTS_N_INSNS (2), /* cost of a lea instruction */
1358 COSTS_N_INSNS (1), /* variable shift costs */
1359 COSTS_N_INSNS (1), /* constant shift costs */
1360 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1361 COSTS_N_INSNS (4), /* HI */
1362 COSTS_N_INSNS (3), /* SI */
1363 COSTS_N_INSNS (4), /* DI */
1364 COSTS_N_INSNS (5)}, /* other */
1365 0, /* cost of multiply per each bit set */
1366 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1367 COSTS_N_INSNS (35), /* HI */
1368 COSTS_N_INSNS (51), /* SI */
1369 COSTS_N_INSNS (83), /* DI */
1370 COSTS_N_INSNS (83)}, /* other */
1371 COSTS_N_INSNS (1), /* cost of movsx */
1372 COSTS_N_INSNS (1), /* cost of movzx */
1373 8, /* "large" insn */
1375 4, /* cost for loading QImode using movzbl */
1376 {3, 4, 3}, /* cost of loading integer registers
1377 in QImode, HImode and SImode.
1378 Relative to reg-reg move (2). */
1379 {3, 4, 3}, /* cost of storing integer registers */
1380 4, /* cost of reg,reg fld/fst */
1381 {4, 4, 12}, /* cost of loading fp registers
1382 in SFmode, DFmode and XFmode */
1383 {6, 6, 8}, /* cost of storing fp registers
1384 in SFmode, DFmode and XFmode */
1385 2, /* cost of moving MMX register */
1386 {3, 3}, /* cost of loading MMX registers
1387 in SImode and DImode */
1388 {4, 4}, /* cost of storing MMX registers
1389 in SImode and DImode */
1390 2, /* cost of moving SSE register */
1391 {4, 4, 3}, /* cost of loading SSE registers
1392 in SImode, DImode and TImode */
1393 {4, 4, 5}, /* cost of storing SSE registers
1394 in SImode, DImode and TImode */
1395 3, /* MMX or SSE register to integer */
1397 MOVD reg64, xmmreg Double FSTORE 4
1398 MOVD reg32, xmmreg Double FSTORE 4
1400 MOVD reg64, xmmreg Double FADD 3
1402 MOVD reg32, xmmreg Double FADD 3
1404 32, /* size of l1 cache. */
1405 2048, /* size of l2 cache. */
1406 64, /* size of prefetch block */
1407 100, /* number of parallel prefetches */
1408 2, /* Branch cost */
1409 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1410 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1411 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1414 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1417 4, /* scalar_stmt_cost. */
1418 2, /* scalar load_cost. */
1419 2, /* scalar_store_cost. */
1420 6, /* vec_stmt_cost. */
1421 0, /* vec_to_scalar_cost. */
1422 2, /* scalar_to_vec_cost. */
1423 2, /* vec_align_load_cost. */
1424 2, /* vec_unalign_load_cost. */
1425 2, /* vec_store_cost. */
1426 2, /* cond_taken_branch_cost. */
1427 1, /* cond_not_taken_branch_cost. */
1430 static stringop_algs pentium4_memcpy
[2] = {
1431 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1432 DUMMY_STRINGOP_ALGS
};
1433 static stringop_algs pentium4_memset
[2] = {
1434 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1435 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1436 DUMMY_STRINGOP_ALGS
};
1439 struct processor_costs pentium4_cost
= {
1440 COSTS_N_INSNS (1), /* cost of an add instruction */
1441 COSTS_N_INSNS (3), /* cost of a lea instruction */
1442 COSTS_N_INSNS (4), /* variable shift costs */
1443 COSTS_N_INSNS (4), /* constant shift costs */
1444 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1445 COSTS_N_INSNS (15), /* HI */
1446 COSTS_N_INSNS (15), /* SI */
1447 COSTS_N_INSNS (15), /* DI */
1448 COSTS_N_INSNS (15)}, /* other */
1449 0, /* cost of multiply per each bit set */
1450 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1451 COSTS_N_INSNS (56), /* HI */
1452 COSTS_N_INSNS (56), /* SI */
1453 COSTS_N_INSNS (56), /* DI */
1454 COSTS_N_INSNS (56)}, /* other */
1455 COSTS_N_INSNS (1), /* cost of movsx */
1456 COSTS_N_INSNS (1), /* cost of movzx */
1457 16, /* "large" insn */
1459 2, /* cost for loading QImode using movzbl */
1460 {4, 5, 4}, /* cost of loading integer registers
1461 in QImode, HImode and SImode.
1462 Relative to reg-reg move (2). */
1463 {2, 3, 2}, /* cost of storing integer registers */
1464 2, /* cost of reg,reg fld/fst */
1465 {2, 2, 6}, /* cost of loading fp registers
1466 in SFmode, DFmode and XFmode */
1467 {4, 4, 6}, /* cost of storing fp registers
1468 in SFmode, DFmode and XFmode */
1469 2, /* cost of moving MMX register */
1470 {2, 2}, /* cost of loading MMX registers
1471 in SImode and DImode */
1472 {2, 2}, /* cost of storing MMX registers
1473 in SImode and DImode */
1474 12, /* cost of moving SSE register */
1475 {12, 12, 12}, /* cost of loading SSE registers
1476 in SImode, DImode and TImode */
1477 {2, 2, 8}, /* cost of storing SSE registers
1478 in SImode, DImode and TImode */
1479 10, /* MMX or SSE register to integer */
1480 8, /* size of l1 cache. */
1481 256, /* size of l2 cache. */
1482 64, /* size of prefetch block */
1483 6, /* number of parallel prefetches */
1484 2, /* Branch cost */
1485 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1486 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1487 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1488 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1489 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1490 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1493 1, /* scalar_stmt_cost. */
1494 1, /* scalar load_cost. */
1495 1, /* scalar_store_cost. */
1496 1, /* vec_stmt_cost. */
1497 1, /* vec_to_scalar_cost. */
1498 1, /* scalar_to_vec_cost. */
1499 1, /* vec_align_load_cost. */
1500 2, /* vec_unalign_load_cost. */
1501 1, /* vec_store_cost. */
1502 3, /* cond_taken_branch_cost. */
1503 1, /* cond_not_taken_branch_cost. */
1506 static stringop_algs nocona_memcpy
[2] = {
1507 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1508 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1509 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1511 static stringop_algs nocona_memset
[2] = {
1512 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1513 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1514 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1515 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1518 struct processor_costs nocona_cost
= {
1519 COSTS_N_INSNS (1), /* cost of an add instruction */
1520 COSTS_N_INSNS (1), /* cost of a lea instruction */
1521 COSTS_N_INSNS (1), /* variable shift costs */
1522 COSTS_N_INSNS (1), /* constant shift costs */
1523 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1524 COSTS_N_INSNS (10), /* HI */
1525 COSTS_N_INSNS (10), /* SI */
1526 COSTS_N_INSNS (10), /* DI */
1527 COSTS_N_INSNS (10)}, /* other */
1528 0, /* cost of multiply per each bit set */
1529 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1530 COSTS_N_INSNS (66), /* HI */
1531 COSTS_N_INSNS (66), /* SI */
1532 COSTS_N_INSNS (66), /* DI */
1533 COSTS_N_INSNS (66)}, /* other */
1534 COSTS_N_INSNS (1), /* cost of movsx */
1535 COSTS_N_INSNS (1), /* cost of movzx */
1536 16, /* "large" insn */
1537 17, /* MOVE_RATIO */
1538 4, /* cost for loading QImode using movzbl */
1539 {4, 4, 4}, /* cost of loading integer registers
1540 in QImode, HImode and SImode.
1541 Relative to reg-reg move (2). */
1542 {4, 4, 4}, /* cost of storing integer registers */
1543 3, /* cost of reg,reg fld/fst */
1544 {12, 12, 12}, /* cost of loading fp registers
1545 in SFmode, DFmode and XFmode */
1546 {4, 4, 4}, /* cost of storing fp registers
1547 in SFmode, DFmode and XFmode */
1548 6, /* cost of moving MMX register */
1549 {12, 12}, /* cost of loading MMX registers
1550 in SImode and DImode */
1551 {12, 12}, /* cost of storing MMX registers
1552 in SImode and DImode */
1553 6, /* cost of moving SSE register */
1554 {12, 12, 12}, /* cost of loading SSE registers
1555 in SImode, DImode and TImode */
1556 {12, 12, 12}, /* cost of storing SSE registers
1557 in SImode, DImode and TImode */
1558 8, /* MMX or SSE register to integer */
1559 8, /* size of l1 cache. */
1560 1024, /* size of l2 cache. */
1561 128, /* size of prefetch block */
1562 8, /* number of parallel prefetches */
1563 1, /* Branch cost */
1564 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1565 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1566 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1567 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1568 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1569 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1572 1, /* scalar_stmt_cost. */
1573 1, /* scalar load_cost. */
1574 1, /* scalar_store_cost. */
1575 1, /* vec_stmt_cost. */
1576 1, /* vec_to_scalar_cost. */
1577 1, /* scalar_to_vec_cost. */
1578 1, /* vec_align_load_cost. */
1579 2, /* vec_unalign_load_cost. */
1580 1, /* vec_store_cost. */
1581 3, /* cond_taken_branch_cost. */
1582 1, /* cond_not_taken_branch_cost. */
1585 static stringop_algs atom_memcpy
[2] = {
1586 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1587 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1588 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1589 static stringop_algs atom_memset
[2] = {
1590 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1591 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1592 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1593 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1595 struct processor_costs atom_cost
= {
1596 COSTS_N_INSNS (1), /* cost of an add instruction */
1597 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1598 COSTS_N_INSNS (1), /* variable shift costs */
1599 COSTS_N_INSNS (1), /* constant shift costs */
1600 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1601 COSTS_N_INSNS (4), /* HI */
1602 COSTS_N_INSNS (3), /* SI */
1603 COSTS_N_INSNS (4), /* DI */
1604 COSTS_N_INSNS (2)}, /* other */
1605 0, /* cost of multiply per each bit set */
1606 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1607 COSTS_N_INSNS (26), /* HI */
1608 COSTS_N_INSNS (42), /* SI */
1609 COSTS_N_INSNS (74), /* DI */
1610 COSTS_N_INSNS (74)}, /* other */
1611 COSTS_N_INSNS (1), /* cost of movsx */
1612 COSTS_N_INSNS (1), /* cost of movzx */
1613 8, /* "large" insn */
1614 17, /* MOVE_RATIO */
1615 4, /* cost for loading QImode using movzbl */
1616 {4, 4, 4}, /* cost of loading integer registers
1617 in QImode, HImode and SImode.
1618 Relative to reg-reg move (2). */
1619 {4, 4, 4}, /* cost of storing integer registers */
1620 4, /* cost of reg,reg fld/fst */
1621 {12, 12, 12}, /* cost of loading fp registers
1622 in SFmode, DFmode and XFmode */
1623 {6, 6, 8}, /* cost of storing fp registers
1624 in SFmode, DFmode and XFmode */
1625 2, /* cost of moving MMX register */
1626 {8, 8}, /* cost of loading MMX registers
1627 in SImode and DImode */
1628 {8, 8}, /* cost of storing MMX registers
1629 in SImode and DImode */
1630 2, /* cost of moving SSE register */
1631 {8, 8, 8}, /* cost of loading SSE registers
1632 in SImode, DImode and TImode */
1633 {8, 8, 8}, /* cost of storing SSE registers
1634 in SImode, DImode and TImode */
1635 5, /* MMX or SSE register to integer */
1636 32, /* size of l1 cache. */
1637 256, /* size of l2 cache. */
1638 64, /* size of prefetch block */
1639 6, /* number of parallel prefetches */
1640 3, /* Branch cost */
1641 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1642 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1643 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1644 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1645 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1646 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1649 1, /* scalar_stmt_cost. */
1650 1, /* scalar load_cost. */
1651 1, /* scalar_store_cost. */
1652 1, /* vec_stmt_cost. */
1653 1, /* vec_to_scalar_cost. */
1654 1, /* scalar_to_vec_cost. */
1655 1, /* vec_align_load_cost. */
1656 2, /* vec_unalign_load_cost. */
1657 1, /* vec_store_cost. */
1658 3, /* cond_taken_branch_cost. */
1659 1, /* cond_not_taken_branch_cost. */
1662 static stringop_algs slm_memcpy
[2] = {
1663 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1664 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1665 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1666 static stringop_algs slm_memset
[2] = {
1667 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1668 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1669 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1670 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1672 struct processor_costs slm_cost
= {
1673 COSTS_N_INSNS (1), /* cost of an add instruction */
1674 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1675 COSTS_N_INSNS (1), /* variable shift costs */
1676 COSTS_N_INSNS (1), /* constant shift costs */
1677 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1678 COSTS_N_INSNS (4), /* HI */
1679 COSTS_N_INSNS (3), /* SI */
1680 COSTS_N_INSNS (4), /* DI */
1681 COSTS_N_INSNS (2)}, /* other */
1682 0, /* cost of multiply per each bit set */
1683 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1684 COSTS_N_INSNS (26), /* HI */
1685 COSTS_N_INSNS (42), /* SI */
1686 COSTS_N_INSNS (74), /* DI */
1687 COSTS_N_INSNS (74)}, /* other */
1688 COSTS_N_INSNS (1), /* cost of movsx */
1689 COSTS_N_INSNS (1), /* cost of movzx */
1690 8, /* "large" insn */
1691 17, /* MOVE_RATIO */
1692 4, /* cost for loading QImode using movzbl */
1693 {4, 4, 4}, /* cost of loading integer registers
1694 in QImode, HImode and SImode.
1695 Relative to reg-reg move (2). */
1696 {4, 4, 4}, /* cost of storing integer registers */
1697 4, /* cost of reg,reg fld/fst */
1698 {12, 12, 12}, /* cost of loading fp registers
1699 in SFmode, DFmode and XFmode */
1700 {6, 6, 8}, /* cost of storing fp registers
1701 in SFmode, DFmode and XFmode */
1702 2, /* cost of moving MMX register */
1703 {8, 8}, /* cost of loading MMX registers
1704 in SImode and DImode */
1705 {8, 8}, /* cost of storing MMX registers
1706 in SImode and DImode */
1707 2, /* cost of moving SSE register */
1708 {8, 8, 8}, /* cost of loading SSE registers
1709 in SImode, DImode and TImode */
1710 {8, 8, 8}, /* cost of storing SSE registers
1711 in SImode, DImode and TImode */
1712 5, /* MMX or SSE register to integer */
1713 32, /* size of l1 cache. */
1714 256, /* size of l2 cache. */
1715 64, /* size of prefetch block */
1716 6, /* number of parallel prefetches */
1717 3, /* Branch cost */
1718 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1719 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1720 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1721 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1722 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1723 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1726 1, /* scalar_stmt_cost. */
1727 1, /* scalar load_cost. */
1728 1, /* scalar_store_cost. */
1729 1, /* vec_stmt_cost. */
1730 1, /* vec_to_scalar_cost. */
1731 1, /* scalar_to_vec_cost. */
1732 1, /* vec_align_load_cost. */
1733 2, /* vec_unalign_load_cost. */
1734 1, /* vec_store_cost. */
1735 3, /* cond_taken_branch_cost. */
1736 1, /* cond_not_taken_branch_cost. */
1739 /* Generic should produce code tuned for Core-i7 (and newer chips)
1740 and btver1 (and newer chips). */
1742 static stringop_algs generic_memcpy
[2] = {
1743 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1744 {-1, libcall
, false}}},
1745 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1746 {-1, libcall
, false}}}};
1747 static stringop_algs generic_memset
[2] = {
1748 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1749 {-1, libcall
, false}}},
1750 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1751 {-1, libcall
, false}}}};
1753 struct processor_costs generic_cost
= {
1754 COSTS_N_INSNS (1), /* cost of an add instruction */
1755 /* On all chips taken into consideration lea is 2 cycles and more. With
1756 this cost however our current implementation of synth_mult results in
1757 use of unnecessary temporary registers causing regression on several
1758 SPECfp benchmarks. */
1759 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1760 COSTS_N_INSNS (1), /* variable shift costs */
1761 COSTS_N_INSNS (1), /* constant shift costs */
1762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1763 COSTS_N_INSNS (4), /* HI */
1764 COSTS_N_INSNS (3), /* SI */
1765 COSTS_N_INSNS (4), /* DI */
1766 COSTS_N_INSNS (2)}, /* other */
1767 0, /* cost of multiply per each bit set */
1768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1769 COSTS_N_INSNS (26), /* HI */
1770 COSTS_N_INSNS (42), /* SI */
1771 COSTS_N_INSNS (74), /* DI */
1772 COSTS_N_INSNS (74)}, /* other */
1773 COSTS_N_INSNS (1), /* cost of movsx */
1774 COSTS_N_INSNS (1), /* cost of movzx */
1775 8, /* "large" insn */
1776 17, /* MOVE_RATIO */
1777 4, /* cost for loading QImode using movzbl */
1778 {4, 4, 4}, /* cost of loading integer registers
1779 in QImode, HImode and SImode.
1780 Relative to reg-reg move (2). */
1781 {4, 4, 4}, /* cost of storing integer registers */
1782 4, /* cost of reg,reg fld/fst */
1783 {12, 12, 12}, /* cost of loading fp registers
1784 in SFmode, DFmode and XFmode */
1785 {6, 6, 8}, /* cost of storing fp registers
1786 in SFmode, DFmode and XFmode */
1787 2, /* cost of moving MMX register */
1788 {8, 8}, /* cost of loading MMX registers
1789 in SImode and DImode */
1790 {8, 8}, /* cost of storing MMX registers
1791 in SImode and DImode */
1792 2, /* cost of moving SSE register */
1793 {8, 8, 8}, /* cost of loading SSE registers
1794 in SImode, DImode and TImode */
1795 {8, 8, 8}, /* cost of storing SSE registers
1796 in SImode, DImode and TImode */
1797 5, /* MMX or SSE register to integer */
1798 32, /* size of l1 cache. */
1799 512, /* size of l2 cache. */
1800 64, /* size of prefetch block */
1801 6, /* number of parallel prefetches */
1802 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1803 value is increased to perhaps more appropriate value of 5. */
1804 3, /* Branch cost */
1805 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1806 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1807 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1808 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1809 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1810 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1813 1, /* scalar_stmt_cost. */
1814 1, /* scalar load_cost. */
1815 1, /* scalar_store_cost. */
1816 1, /* vec_stmt_cost. */
1817 1, /* vec_to_scalar_cost. */
1818 1, /* scalar_to_vec_cost. */
1819 1, /* vec_align_load_cost. */
1820 2, /* vec_unalign_load_cost. */
1821 1, /* vec_store_cost. */
1822 3, /* cond_taken_branch_cost. */
1823 1, /* cond_not_taken_branch_cost. */
1826 /* core_cost should produce code tuned for Core familly of CPUs. */
1827 static stringop_algs core_memcpy
[2] = {
1828 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1829 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1830 {-1, libcall
, false}}}};
1831 static stringop_algs core_memset
[2] = {
1832 {libcall
, {{6, loop_1_byte
, true},
1834 {8192, rep_prefix_4_byte
, true},
1835 {-1, libcall
, false}}},
1836 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1837 {-1, libcall
, false}}}};
1840 struct processor_costs core_cost
= {
1841 COSTS_N_INSNS (1), /* cost of an add instruction */
1842 /* On all chips taken into consideration lea is 2 cycles and more. With
1843 this cost however our current implementation of synth_mult results in
1844 use of unnecessary temporary registers causing regression on several
1845 SPECfp benchmarks. */
1846 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1847 COSTS_N_INSNS (1), /* variable shift costs */
1848 COSTS_N_INSNS (1), /* constant shift costs */
1849 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1850 COSTS_N_INSNS (4), /* HI */
1851 COSTS_N_INSNS (3), /* SI */
1852 COSTS_N_INSNS (4), /* DI */
1853 COSTS_N_INSNS (2)}, /* other */
1854 0, /* cost of multiply per each bit set */
1855 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1856 COSTS_N_INSNS (26), /* HI */
1857 COSTS_N_INSNS (42), /* SI */
1858 COSTS_N_INSNS (74), /* DI */
1859 COSTS_N_INSNS (74)}, /* other */
1860 COSTS_N_INSNS (1), /* cost of movsx */
1861 COSTS_N_INSNS (1), /* cost of movzx */
1862 8, /* "large" insn */
1863 17, /* MOVE_RATIO */
1864 4, /* cost for loading QImode using movzbl */
1865 {4, 4, 4}, /* cost of loading integer registers
1866 in QImode, HImode and SImode.
1867 Relative to reg-reg move (2). */
1868 {4, 4, 4}, /* cost of storing integer registers */
1869 4, /* cost of reg,reg fld/fst */
1870 {12, 12, 12}, /* cost of loading fp registers
1871 in SFmode, DFmode and XFmode */
1872 {6, 6, 8}, /* cost of storing fp registers
1873 in SFmode, DFmode and XFmode */
1874 2, /* cost of moving MMX register */
1875 {8, 8}, /* cost of loading MMX registers
1876 in SImode and DImode */
1877 {8, 8}, /* cost of storing MMX registers
1878 in SImode and DImode */
1879 2, /* cost of moving SSE register */
1880 {8, 8, 8}, /* cost of loading SSE registers
1881 in SImode, DImode and TImode */
1882 {8, 8, 8}, /* cost of storing SSE registers
1883 in SImode, DImode and TImode */
1884 5, /* MMX or SSE register to integer */
1885 64, /* size of l1 cache. */
1886 512, /* size of l2 cache. */
1887 64, /* size of prefetch block */
1888 6, /* number of parallel prefetches */
1889 /* FIXME perhaps more appropriate value is 5. */
1890 3, /* Branch cost */
1891 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1892 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1893 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1894 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1895 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1896 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 1, /* scalar_stmt_cost. */
1900 1, /* scalar load_cost. */
1901 1, /* scalar_store_cost. */
1902 1, /* vec_stmt_cost. */
1903 1, /* vec_to_scalar_cost. */
1904 1, /* scalar_to_vec_cost. */
1905 1, /* vec_align_load_cost. */
1906 2, /* vec_unalign_load_cost. */
1907 1, /* vec_store_cost. */
1908 3, /* cond_taken_branch_cost. */
1909 1, /* cond_not_taken_branch_cost. */
1913 /* Set by -mtune. */
1914 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1916 /* Set by -mtune or -Os. */
1917 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1919 /* Processor feature/optimization bitmasks. */
1920 #define m_386 (1<<PROCESSOR_I386)
1921 #define m_486 (1<<PROCESSOR_I486)
1922 #define m_PENT (1<<PROCESSOR_PENTIUM)
1923 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1924 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1925 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1926 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1927 #define m_CORE2 (1<<PROCESSOR_CORE2)
1928 #define m_COREI7 (1<<PROCESSOR_COREI7)
1929 #define m_COREI7_AVX (1<<PROCESSOR_COREI7_AVX)
1930 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1931 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_COREI7_AVX | m_HASWELL)
1932 #define m_ATOM (1<<PROCESSOR_ATOM)
1933 #define m_SLM (1<<PROCESSOR_SLM)
1935 #define m_GEODE (1<<PROCESSOR_GEODE)
1936 #define m_K6 (1<<PROCESSOR_K6)
1937 #define m_K6_GEODE (m_K6 | m_GEODE)
1938 #define m_K8 (1<<PROCESSOR_K8)
1939 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1940 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1941 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1942 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1943 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1944 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1945 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
1946 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1947 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1948 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
1949 #define m_BTVER (m_BTVER1 | m_BTVER2)
1950 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1952 #define m_GENERIC (1<<PROCESSOR_GENERIC)
1954 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1956 #define DEF_TUNE(tune, name, selector) name,
1957 #include "x86-tune.def"
1961 /* Feature tests against the various tunings. */
1962 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1964 /* Feature tests against the various tunings used to create ix86_tune_features
1965 based on the processor mask. */
1966 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1968 #define DEF_TUNE(tune, name, selector) selector,
1969 #include "x86-tune.def"
1973 /* Feature tests against the various architecture variations. */
1974 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1976 /* Feature tests against the various architecture variations, used to create
1977 ix86_arch_features based on the processor mask. */
1978 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1979 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1980 ~(m_386
| m_486
| m_PENT
| m_K6
),
1982 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1985 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1988 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1991 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1995 /* In case the average insn count for single function invocation is
1996 lower than this constant, emit fast (but longer) prologue and
1998 #define FAST_PROLOGUE_INSN_COUNT 20
2000 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2001 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2002 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2003 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2005 /* Array of the smallest class containing reg number REGNO, indexed by
2006 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2008 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2010 /* ax, dx, cx, bx */
2011 AREG
, DREG
, CREG
, BREG
,
2012 /* si, di, bp, sp */
2013 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2015 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2016 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2019 /* flags, fpsr, fpcr, frame */
2020 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2022 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2025 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2028 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2029 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2030 /* SSE REX registers */
2031 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2033 /* AVX-512 SSE registers */
2034 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2035 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2036 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2037 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2038 /* Mask registers. */
2039 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2040 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2041 /* MPX bound registers */
2042 BND_REGS
, BND_REGS
, BND_REGS
, BND_REGS
,
2045 /* The "default" register map used in 32bit mode. */
2047 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2049 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2050 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2051 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2052 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2053 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2054 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2055 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2056 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2057 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2058 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2059 101, 102, 103, 104, /* bound registers */
2062 /* The "default" register map used in 64bit mode. */
2064 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2066 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2067 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2068 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2069 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2070 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2071 8,9,10,11,12,13,14,15, /* extended integer registers */
2072 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2073 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2074 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2075 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2076 126, 127, 128, 129, /* bound registers */
2079 /* Define the register numbers to be used in Dwarf debugging information.
2080 The SVR4 reference port C compiler uses the following register numbers
2081 in its Dwarf output code:
2082 0 for %eax (gcc regno = 0)
2083 1 for %ecx (gcc regno = 2)
2084 2 for %edx (gcc regno = 1)
2085 3 for %ebx (gcc regno = 3)
2086 4 for %esp (gcc regno = 7)
2087 5 for %ebp (gcc regno = 6)
2088 6 for %esi (gcc regno = 4)
2089 7 for %edi (gcc regno = 5)
2090 The following three DWARF register numbers are never generated by
2091 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2092 believes these numbers have these meanings.
2093 8 for %eip (no gcc equivalent)
2094 9 for %eflags (gcc regno = 17)
2095 10 for %trapno (no gcc equivalent)
2096 It is not at all clear how we should number the FP stack registers
2097 for the x86 architecture. If the version of SDB on x86/svr4 were
2098 a bit less brain dead with respect to floating-point then we would
2099 have a precedent to follow with respect to DWARF register numbers
2100 for x86 FP registers, but the SDB on x86/svr4 is so completely
2101 broken with respect to FP registers that it is hardly worth thinking
2102 of it as something to strive for compatibility with.
2103 The version of x86/svr4 SDB I have at the moment does (partially)
2104 seem to believe that DWARF register number 11 is associated with
2105 the x86 register %st(0), but that's about all. Higher DWARF
2106 register numbers don't seem to be associated with anything in
2107 particular, and even for DWARF regno 11, SDB only seems to under-
2108 stand that it should say that a variable lives in %st(0) (when
2109 asked via an `=' command) if we said it was in DWARF regno 11,
2110 but SDB still prints garbage when asked for the value of the
2111 variable in question (via a `/' command).
2112 (Also note that the labels SDB prints for various FP stack regs
2113 when doing an `x' command are all wrong.)
2114 Note that these problems generally don't affect the native SVR4
2115 C compiler because it doesn't allow the use of -O with -g and
2116 because when it is *not* optimizing, it allocates a memory
2117 location for each floating-point variable, and the memory
2118 location is what gets described in the DWARF AT_location
2119 attribute for the variable in question.
2120 Regardless of the severe mental illness of the x86/svr4 SDB, we
2121 do something sensible here and we use the following DWARF
2122 register numbers. Note that these are all stack-top-relative
2124 11 for %st(0) (gcc regno = 8)
2125 12 for %st(1) (gcc regno = 9)
2126 13 for %st(2) (gcc regno = 10)
2127 14 for %st(3) (gcc regno = 11)
2128 15 for %st(4) (gcc regno = 12)
2129 16 for %st(5) (gcc regno = 13)
2130 17 for %st(6) (gcc regno = 14)
2131 18 for %st(7) (gcc regno = 15)
2133 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2135 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2136 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2137 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2138 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2139 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2140 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2144 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2145 -1, -1, -1, -1, /* bound registers */
2148 /* Define parameter passing and return registers. */
2150 static int const x86_64_int_parameter_registers
[6] =
2152 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2155 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2157 CX_REG
, DX_REG
, R8_REG
, R9_REG
2160 static int const x86_64_int_return_registers
[4] =
2162 AX_REG
, DX_REG
, DI_REG
, SI_REG
2165 /* Additional registers that are clobbered by SYSV calls. */
2167 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2171 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2172 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2175 /* Define the structure for the machine field in struct function. */
2177 struct GTY(()) stack_local_entry
{
2178 unsigned short mode
;
2181 struct stack_local_entry
*next
;
2184 /* Structure describing stack frame layout.
2185 Stack grows downward:
2191 saved static chain if ix86_static_chain_on_stack
2193 saved frame pointer if frame_pointer_needed
2194 <- HARD_FRAME_POINTER
2200 <- sse_regs_save_offset
2203 [va_arg registers] |
2207 [padding2] | = to_allocate
2216 int outgoing_arguments_size
;
2218 /* The offsets relative to ARG_POINTER. */
2219 HOST_WIDE_INT frame_pointer_offset
;
2220 HOST_WIDE_INT hard_frame_pointer_offset
;
2221 HOST_WIDE_INT stack_pointer_offset
;
2222 HOST_WIDE_INT hfp_save_offset
;
2223 HOST_WIDE_INT reg_save_offset
;
2224 HOST_WIDE_INT sse_reg_save_offset
;
2226 /* When save_regs_using_mov is set, emit prologue using
2227 move instead of push instructions. */
2228 bool save_regs_using_mov
;
2231 /* Which cpu are we scheduling for. */
2232 enum attr_cpu ix86_schedule
;
2234 /* Which cpu are we optimizing for. */
2235 enum processor_type ix86_tune
;
2237 /* Which instruction set architecture to use. */
2238 enum processor_type ix86_arch
;
2240 /* True if processor has SSE prefetch instruction. */
2241 unsigned char x86_prefetch_sse
;
2243 /* -mstackrealign option */
2244 static const char ix86_force_align_arg_pointer_string
[]
2245 = "force_align_arg_pointer";
2247 static rtx (*ix86_gen_leave
) (void);
2248 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2249 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2250 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2251 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2252 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2253 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2254 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2255 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2256 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2257 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2258 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2260 /* Preferred alignment for stack boundary in bits. */
2261 unsigned int ix86_preferred_stack_boundary
;
2263 /* Alignment for incoming stack boundary in bits specified at
2265 static unsigned int ix86_user_incoming_stack_boundary
;
2267 /* Default alignment for incoming stack boundary in bits. */
2268 static unsigned int ix86_default_incoming_stack_boundary
;
2270 /* Alignment for incoming stack boundary in bits. */
2271 unsigned int ix86_incoming_stack_boundary
;
2273 /* Calling abi specific va_list type nodes. */
2274 static GTY(()) tree sysv_va_list_type_node
;
2275 static GTY(()) tree ms_va_list_type_node
;
2277 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2278 char internal_label_prefix
[16];
2279 int internal_label_prefix_len
;
2281 /* Fence to use after loop using movnt. */
2284 /* Register class used for passing given 64bit part of the argument.
2285 These represent classes as documented by the PS ABI, with the exception
2286 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2287 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2289 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2290 whenever possible (upper half does contain padding). */
2291 enum x86_64_reg_class
2294 X86_64_INTEGER_CLASS
,
2295 X86_64_INTEGERSI_CLASS
,
2302 X86_64_COMPLEX_X87_CLASS
,
2306 #define MAX_CLASSES 4
2308 /* Table of constants used by fldpi, fldln2, etc.... */
2309 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2310 static bool ext_80387_constants_init
= 0;
2313 static struct machine_function
* ix86_init_machine_status (void);
2314 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2315 static bool ix86_function_value_regno_p (const unsigned int);
2316 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2318 static rtx
ix86_static_chain (const_tree
, bool);
2319 static int ix86_function_regparm (const_tree
, const_tree
);
2320 static void ix86_compute_frame_layout (struct ix86_frame
*);
2321 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2323 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2324 static tree
ix86_canonical_va_list_type (tree
);
2325 static void predict_jump (int);
2326 static unsigned int split_stack_prologue_scratch_regno (void);
2327 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2329 enum ix86_function_specific_strings
2331 IX86_FUNCTION_SPECIFIC_ARCH
,
2332 IX86_FUNCTION_SPECIFIC_TUNE
,
2333 IX86_FUNCTION_SPECIFIC_MAX
2336 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2337 const char *, enum fpmath_unit
, bool);
2338 static void ix86_function_specific_save (struct cl_target_option
*,
2339 struct gcc_options
*opts
);
2340 static void ix86_function_specific_restore (struct gcc_options
*opts
,
2341 struct cl_target_option
*);
2342 static void ix86_function_specific_print (FILE *, int,
2343 struct cl_target_option
*);
2344 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2345 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2346 struct gcc_options
*,
2347 struct gcc_options
*,
2348 struct gcc_options
*);
2349 static bool ix86_can_inline_p (tree
, tree
);
2350 static void ix86_set_current_function (tree
);
2351 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2353 static enum calling_abi
ix86_function_abi (const_tree
);
2356 #ifndef SUBTARGET32_DEFAULT_CPU
2357 #define SUBTARGET32_DEFAULT_CPU "i386"
2360 /* Whether -mtune= or -march= were specified */
2361 static int ix86_tune_defaulted
;
2362 static int ix86_arch_specified
;
2364 /* Vectorization library interface and handlers. */
2365 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2367 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2368 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2370 /* Processor target table, indexed by processor number */
2373 const struct processor_costs
*cost
; /* Processor costs */
2374 const int align_loop
; /* Default alignments. */
2375 const int align_loop_max_skip
;
2376 const int align_jump
;
2377 const int align_jump_max_skip
;
2378 const int align_func
;
2381 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2383 {&i386_cost
, 4, 3, 4, 3, 4},
2384 {&i486_cost
, 16, 15, 16, 15, 16},
2385 {&pentium_cost
, 16, 7, 16, 7, 16},
2386 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2387 {&geode_cost
, 0, 0, 0, 0, 0},
2388 {&k6_cost
, 32, 7, 32, 7, 32},
2389 {&athlon_cost
, 16, 7, 16, 7, 16},
2390 {&pentium4_cost
, 0, 0, 0, 0, 0},
2391 {&k8_cost
, 16, 7, 16, 7, 16},
2392 {&nocona_cost
, 0, 0, 0, 0, 0},
2394 {&core_cost
, 16, 10, 16, 10, 16},
2396 {&core_cost
, 16, 10, 16, 10, 16},
2398 {&core_cost
, 16, 10, 16, 10, 16},
2400 {&core_cost
, 16, 10, 16, 10, 16},
2401 {&generic_cost
, 16, 10, 16, 10, 16},
2402 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2403 {&bdver1_cost
, 16, 10, 16, 7, 11},
2404 {&bdver2_cost
, 16, 10, 16, 7, 11},
2405 {&bdver3_cost
, 16, 10, 16, 7, 11},
2406 {&bdver4_cost
, 16, 10, 16, 7, 11},
2407 {&btver1_cost
, 16, 10, 16, 7, 11},
2408 {&btver2_cost
, 16, 10, 16, 7, 11},
2409 {&atom_cost
, 16, 15, 16, 7, 16},
2410 {&slm_cost
, 16, 15, 16, 7, 16}
2413 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2450 gate_insert_vzeroupper (void)
2452 return TARGET_AVX
&& !TARGET_AVX512F
&& TARGET_VZEROUPPER
;
2456 rest_of_handle_insert_vzeroupper (void)
2460 /* vzeroupper instructions are inserted immediately after reload to
2461 account for possible spills from 256bit registers. The pass
2462 reuses mode switching infrastructure by re-running mode insertion
2463 pass, so disable entities that have already been processed. */
2464 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2465 ix86_optimize_mode_switching
[i
] = 0;
2467 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2469 /* Call optimize_mode_switching. */
2470 g
->get_passes ()->execute_pass_mode_switching ();
2476 const pass_data pass_data_insert_vzeroupper
=
2478 RTL_PASS
, /* type */
2479 "vzeroupper", /* name */
2480 OPTGROUP_NONE
, /* optinfo_flags */
2481 true, /* has_gate */
2482 true, /* has_execute */
2483 TV_NONE
, /* tv_id */
2484 0, /* properties_required */
2485 0, /* properties_provided */
2486 0, /* properties_destroyed */
2487 0, /* todo_flags_start */
2488 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2491 class pass_insert_vzeroupper
: public rtl_opt_pass
2494 pass_insert_vzeroupper(gcc::context
*ctxt
)
2495 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2498 /* opt_pass methods: */
2499 bool gate () { return gate_insert_vzeroupper (); }
2500 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2502 }; // class pass_insert_vzeroupper
2507 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2509 return new pass_insert_vzeroupper (ctxt
);
2512 /* Return true if a red-zone is in use. */
2515 ix86_using_red_zone (void)
2517 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2520 /* Return a string that documents the current -m options. The caller is
2521 responsible for freeing the string. */
2524 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2525 const char *tune
, enum fpmath_unit fpmath
,
2528 struct ix86_target_opts
2530 const char *option
; /* option string */
2531 HOST_WIDE_INT mask
; /* isa mask options */
2534 /* This table is ordered so that options like -msse4.2 that imply
2535 preceding options while match those first. */
2536 static struct ix86_target_opts isa_opts
[] =
2538 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2539 { "-mfma", OPTION_MASK_ISA_FMA
},
2540 { "-mxop", OPTION_MASK_ISA_XOP
},
2541 { "-mlwp", OPTION_MASK_ISA_LWP
},
2542 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2543 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2544 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2545 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2546 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2547 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2548 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2549 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2550 { "-msse3", OPTION_MASK_ISA_SSE3
},
2551 { "-msse2", OPTION_MASK_ISA_SSE2
},
2552 { "-msse", OPTION_MASK_ISA_SSE
},
2553 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2554 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2555 { "-mmmx", OPTION_MASK_ISA_MMX
},
2556 { "-mabm", OPTION_MASK_ISA_ABM
},
2557 { "-mbmi", OPTION_MASK_ISA_BMI
},
2558 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2559 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2560 { "-mhle", OPTION_MASK_ISA_HLE
},
2561 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2562 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2563 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2564 { "-madx", OPTION_MASK_ISA_ADX
},
2565 { "-mtbm", OPTION_MASK_ISA_TBM
},
2566 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2567 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2568 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2569 { "-maes", OPTION_MASK_ISA_AES
},
2570 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2571 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2572 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2573 { "-mf16c", OPTION_MASK_ISA_F16C
},
2574 { "-mrtm", OPTION_MASK_ISA_RTM
},
2575 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2576 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2577 { "-mmpx", OPTION_MASK_ISA_MPX
},
2581 static struct ix86_target_opts flag_opts
[] =
2583 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2584 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2585 { "-m80387", MASK_80387
},
2586 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2587 { "-malign-double", MASK_ALIGN_DOUBLE
},
2588 { "-mcld", MASK_CLD
},
2589 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2590 { "-mieee-fp", MASK_IEEE_FP
},
2591 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2592 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2593 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2594 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2595 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2596 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2597 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2598 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2599 { "-mrecip", MASK_RECIP
},
2600 { "-mrtd", MASK_RTD
},
2601 { "-msseregparm", MASK_SSEREGPARM
},
2602 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2603 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2604 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2605 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2606 { "-mvzeroupper", MASK_VZEROUPPER
},
2607 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2608 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2609 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2612 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2615 char target_other
[40];
2625 memset (opts
, '\0', sizeof (opts
));
2627 /* Add -march= option. */
2630 opts
[num
][0] = "-march=";
2631 opts
[num
++][1] = arch
;
2634 /* Add -mtune= option. */
2637 opts
[num
][0] = "-mtune=";
2638 opts
[num
++][1] = tune
;
2641 /* Add -m32/-m64/-mx32. */
2642 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2644 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2648 isa
&= ~ (OPTION_MASK_ISA_64BIT
2649 | OPTION_MASK_ABI_64
2650 | OPTION_MASK_ABI_X32
);
2654 opts
[num
++][0] = abi
;
2656 /* Pick out the options in isa options. */
2657 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2659 if ((isa
& isa_opts
[i
].mask
) != 0)
2661 opts
[num
++][0] = isa_opts
[i
].option
;
2662 isa
&= ~ isa_opts
[i
].mask
;
2666 if (isa
&& add_nl_p
)
2668 opts
[num
++][0] = isa_other
;
2669 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2673 /* Add flag options. */
2674 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2676 if ((flags
& flag_opts
[i
].mask
) != 0)
2678 opts
[num
++][0] = flag_opts
[i
].option
;
2679 flags
&= ~ flag_opts
[i
].mask
;
2683 if (flags
&& add_nl_p
)
2685 opts
[num
++][0] = target_other
;
2686 sprintf (target_other
, "(other flags: %#x)", flags
);
2689 /* Add -fpmath= option. */
2692 opts
[num
][0] = "-mfpmath=";
2693 switch ((int) fpmath
)
2696 opts
[num
++][1] = "387";
2700 opts
[num
++][1] = "sse";
2703 case FPMATH_387
| FPMATH_SSE
:
2704 opts
[num
++][1] = "sse+387";
2716 gcc_assert (num
< ARRAY_SIZE (opts
));
2718 /* Size the string. */
2720 sep_len
= (add_nl_p
) ? 3 : 1;
2721 for (i
= 0; i
< num
; i
++)
2724 for (j
= 0; j
< 2; j
++)
2726 len
+= strlen (opts
[i
][j
]);
2729 /* Build the string. */
2730 ret
= ptr
= (char *) xmalloc (len
);
2733 for (i
= 0; i
< num
; i
++)
2737 for (j
= 0; j
< 2; j
++)
2738 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2745 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2753 for (j
= 0; j
< 2; j
++)
2756 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2758 line_len
+= len2
[j
];
2763 gcc_assert (ret
+ len
>= ptr
);
2768 /* Return true, if profiling code should be emitted before
2769 prologue. Otherwise it returns false.
2770 Note: For x86 with "hotfix" it is sorried. */
2772 ix86_profile_before_prologue (void)
2774 return flag_fentry
!= 0;
2777 /* Function that is callable from the debugger to print the current
2779 void ATTRIBUTE_UNUSED
2780 ix86_debug_options (void)
2782 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2783 ix86_arch_string
, ix86_tune_string
,
2788 fprintf (stderr
, "%s\n\n", opts
);
2792 fputs ("<no options>\n\n", stderr
);
2797 static const char *stringop_alg_names
[] = {
2799 #define DEF_ALG(alg, name) #name,
2800 #include "stringop.def"
2805 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2806 The string is of the following form (or comma separated list of it):
2808 strategy_alg:max_size:[align|noalign]
2810 where the full size range for the strategy is either [0, max_size] or
2811 [min_size, max_size], in which min_size is the max_size + 1 of the
2812 preceding range. The last size range must have max_size == -1.
2817 -mmemcpy-strategy=libcall:-1:noalign
2819 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2823 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2825 This is to tell the compiler to use the following strategy for memset
2826 1) when the expected size is between [1, 16], use rep_8byte strategy;
2827 2) when the size is between [17, 2048], use vector_loop;
2828 3) when the size is > 2048, use libcall. */
2830 struct stringop_size_range
2838 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2840 const struct stringop_algs
*default_algs
;
2841 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2842 char *curr_range_str
, *next_range_str
;
2846 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2848 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2850 curr_range_str
= strategy_str
;
2858 next_range_str
= strchr (curr_range_str
, ',');
2860 *next_range_str
++ = '\0';
2862 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2863 alg_name
, &maxs
, align
))
2865 error ("wrong arg %s to option %s", curr_range_str
,
2866 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2870 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2872 error ("size ranges of option %s should be increasing",
2873 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2877 for (i
= 0; i
< last_alg
; i
++)
2879 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2881 alg
= (stringop_alg
) i
;
2888 error ("wrong stringop strategy name %s specified for option %s",
2890 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2894 input_ranges
[n
].max
= maxs
;
2895 input_ranges
[n
].alg
= alg
;
2896 if (!strcmp (align
, "align"))
2897 input_ranges
[n
].noalign
= false;
2898 else if (!strcmp (align
, "noalign"))
2899 input_ranges
[n
].noalign
= true;
2902 error ("unknown alignment %s specified for option %s",
2903 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2907 curr_range_str
= next_range_str
;
2909 while (curr_range_str
);
2911 if (input_ranges
[n
- 1].max
!= -1)
2913 error ("the max value for the last size range should be -1"
2915 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2919 if (n
> MAX_STRINGOP_ALGS
)
2921 error ("too many size ranges specified in option %s",
2922 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2926 /* Now override the default algs array. */
2927 for (i
= 0; i
< n
; i
++)
2929 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2930 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2931 = input_ranges
[i
].alg
;
2932 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2933 = input_ranges
[i
].noalign
;
2938 /* parse -mtune-ctrl= option. When DUMP is true,
2939 print the features that are explicitly set. */
2942 parse_mtune_ctrl_str (bool dump
)
2944 if (!ix86_tune_ctrl_string
)
2947 char *next_feature_string
= NULL
;
2948 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
2949 char *orig
= curr_feature_string
;
2955 next_feature_string
= strchr (curr_feature_string
, ',');
2956 if (next_feature_string
)
2957 *next_feature_string
++ = '\0';
2958 if (*curr_feature_string
== '^')
2960 curr_feature_string
++;
2963 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2965 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
2967 ix86_tune_features
[i
] = !clear
;
2969 fprintf (stderr
, "Explicitly %s feature %s\n",
2970 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
2974 if (i
== X86_TUNE_LAST
)
2975 error ("Unknown parameter to option -mtune-ctrl: %s",
2976 clear
? curr_feature_string
- 1 : curr_feature_string
);
2977 curr_feature_string
= next_feature_string
;
2979 while (curr_feature_string
);
2983 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2987 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
2989 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
2992 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2994 if (ix86_tune_no_default
)
2995 ix86_tune_features
[i
] = 0;
2997 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3002 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
3003 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3004 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
3005 ix86_tune_features
[i
] ? "on" : "off");
3008 parse_mtune_ctrl_str (dump
);
3012 /* Override various settings based on options. If MAIN_ARGS_P, the
3013 options are from the command line, otherwise they are from
3017 ix86_option_override_internal (bool main_args_p
,
3018 struct gcc_options
*opts
,
3019 struct gcc_options
*opts_set
)
3022 unsigned int ix86_arch_mask
;
3023 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
3028 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3029 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3030 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3031 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3032 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3033 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3034 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3035 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3036 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3037 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3038 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3039 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3040 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3041 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3042 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3043 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3044 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3045 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3046 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3047 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3048 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3049 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3050 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3051 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3052 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3053 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3054 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3055 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3056 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3057 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3058 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3059 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3060 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3061 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3062 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3063 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3064 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3065 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3066 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3067 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3068 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3069 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3070 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3071 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3072 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3074 /* if this reaches 64, need to widen struct pta flags below */
3078 const char *const name
; /* processor name or nickname. */
3079 const enum processor_type processor
;
3080 const enum attr_cpu schedule
;
3081 const unsigned HOST_WIDE_INT flags
;
3083 const processor_alias_table
[] =
3085 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3086 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3087 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3088 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3089 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3090 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3091 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3092 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3093 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3094 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3095 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3096 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3097 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3098 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3099 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3100 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3101 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3102 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3103 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3104 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3105 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3106 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3107 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3108 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3109 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3110 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3111 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3112 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3113 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3114 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3115 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3116 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3117 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3118 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3119 {"corei7-avx", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3120 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3121 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3122 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3123 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3124 {"core-avx-i", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3125 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3126 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3127 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3128 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3129 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3130 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3131 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3132 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3133 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3134 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3136 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3137 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3138 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3139 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3140 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3141 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3143 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3144 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3145 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3146 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3147 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3148 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3149 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3150 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3151 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3152 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3153 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3154 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3155 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3156 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3157 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3158 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3159 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3160 {"k8", PROCESSOR_K8
, CPU_K8
,
3161 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3162 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3163 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3164 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3165 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3166 {"opteron", PROCESSOR_K8
, CPU_K8
,
3167 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3168 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3169 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3170 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3171 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3172 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3173 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3174 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3175 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3176 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3177 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3178 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3179 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3180 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3181 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3182 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3183 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3184 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3185 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3186 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3187 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3188 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3189 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3190 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3191 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3192 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3193 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3194 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3195 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3196 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3197 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3198 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3199 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3200 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3201 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3202 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3203 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3204 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3205 {"bdver4", PROCESSOR_BDVER4
, CPU_BDVER4
,
3206 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3207 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3208 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
3209 | PTA_FMA4
| PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_BMI2
3210 | PTA_TBM
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
3211 | PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
},
3212 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3213 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3214 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3215 | PTA_FXSR
| PTA_XSAVE
},
3216 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3217 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3218 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3219 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3220 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3221 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3223 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3225 | PTA_HLE
/* flags are only used for -march switch. */ },
3228 /* -mrecip options. */
3231 const char *string
; /* option name */
3232 unsigned int mask
; /* mask bits to set */
3234 const recip_options
[] =
3236 { "all", RECIP_MASK_ALL
},
3237 { "none", RECIP_MASK_NONE
},
3238 { "div", RECIP_MASK_DIV
},
3239 { "sqrt", RECIP_MASK_SQRT
},
3240 { "vec-div", RECIP_MASK_VEC_DIV
},
3241 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3244 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3246 /* Set up prefix/suffix so the error messages refer to either the command
3247 line argument, or the attribute(target). */
3256 prefix
= "option(\"";
3261 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3262 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3263 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3264 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3265 #ifdef TARGET_BI_ARCH
3268 #if TARGET_BI_ARCH == 1
3269 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3270 is on and OPTION_MASK_ABI_X32 is off. We turn off
3271 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3273 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3274 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3276 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3277 on and OPTION_MASK_ABI_64 is off. We turn off
3278 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3280 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3281 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3286 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3288 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3289 OPTION_MASK_ABI_64 for TARGET_X32. */
3290 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3291 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3293 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3295 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3296 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3297 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3298 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3301 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3302 SUBTARGET_OVERRIDE_OPTIONS
;
3305 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3306 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3309 /* -fPIC is the default for x86_64. */
3310 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3311 opts
->x_flag_pic
= 2;
3313 /* Need to check -mtune=generic first. */
3314 if (opts
->x_ix86_tune_string
)
3316 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3317 || !strcmp (opts
->x_ix86_tune_string
, "i686")
3318 /* As special support for cross compilers we read -mtune=native
3319 as -mtune=generic. With native compilers we won't see the
3320 -mtune=native, as it was changed by the driver. */
3321 || !strcmp (opts
->x_ix86_tune_string
, "native"))
3323 opts
->x_ix86_tune_string
= "generic";
3325 /* If this call is for setting the option attribute, allow the
3326 generic that was previously set. */
3327 else if (!main_args_p
3328 && !strcmp (opts
->x_ix86_tune_string
, "generic"))
3330 else if (!strncmp (opts
->x_ix86_tune_string
, "generic", 7))
3331 error ("bad value (%s) for %stune=%s %s",
3332 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3333 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3334 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3335 "%stune=k8%s or %stune=generic%s instead as appropriate",
3336 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3340 if (opts
->x_ix86_arch_string
)
3341 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
3342 if (!opts
->x_ix86_tune_string
)
3344 opts
->x_ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3345 ix86_tune_defaulted
= 1;
3348 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3349 or defaulted. We need to use a sensible tune option. */
3350 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3351 || !strcmp (opts
->x_ix86_tune_string
, "x86-64")
3352 || !strcmp (opts
->x_ix86_tune_string
, "i686"))
3354 opts
->x_ix86_tune_string
= "generic";
3358 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
3359 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3361 /* rep; movq isn't available in 32-bit code. */
3362 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3363 opts
->x_ix86_stringop_alg
= no_stringop
;
3366 if (!opts
->x_ix86_arch_string
)
3367 opts
->x_ix86_arch_string
3368 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3369 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3371 ix86_arch_specified
= 1;
3373 if (opts_set
->x_ix86_pmode
)
3375 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3376 && opts
->x_ix86_pmode
== PMODE_SI
)
3377 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3378 && opts
->x_ix86_pmode
== PMODE_DI
))
3379 error ("address mode %qs not supported in the %s bit mode",
3380 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
3381 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
3384 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3385 ? PMODE_DI
: PMODE_SI
;
3387 if (!opts_set
->x_ix86_abi
)
3388 opts
->x_ix86_abi
= DEFAULT_ABI
;
3390 /* For targets using ms ABI enable ms-extensions, if not
3391 explicit turned off. For non-ms ABI we turn off this
3393 if (!opts_set
->x_flag_ms_extensions
)
3394 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3396 if (opts_set
->x_ix86_cmodel
)
3398 switch (opts
->x_ix86_cmodel
)
3402 if (opts
->x_flag_pic
)
3403 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
3404 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3405 error ("code model %qs not supported in the %s bit mode",
3411 if (opts
->x_flag_pic
)
3412 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
3413 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3414 error ("code model %qs not supported in the %s bit mode",
3416 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3417 error ("code model %qs not supported in x32 mode",
3423 if (opts
->x_flag_pic
)
3424 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
3425 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3426 error ("code model %qs not supported in the %s bit mode",
3428 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3429 error ("code model %qs not supported in x32 mode",
3434 if (opts
->x_flag_pic
)
3435 error ("code model %s does not support PIC mode", "32");
3436 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3437 error ("code model %qs not supported in the %s bit mode",
3442 if (opts
->x_flag_pic
)
3444 error ("code model %s does not support PIC mode", "kernel");
3445 opts
->x_ix86_cmodel
= CM_32
;
3447 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3448 error ("code model %qs not supported in the %s bit mode",
3458 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3459 use of rip-relative addressing. This eliminates fixups that
3460 would otherwise be needed if this object is to be placed in a
3461 DLL, and is essentially just as efficient as direct addressing. */
3462 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3463 && (TARGET_RDOS
|| TARGET_PECOFF
))
3464 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
3465 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3466 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3468 opts
->x_ix86_cmodel
= CM_32
;
3470 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
3472 error ("-masm=intel not supported in this configuration");
3473 opts
->x_ix86_asm_dialect
= ASM_ATT
;
3475 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
3476 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3477 sorry ("%i-bit mode not compiled in",
3478 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3480 for (i
= 0; i
< pta_size
; i
++)
3481 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
3483 ix86_schedule
= processor_alias_table
[i
].schedule
;
3484 ix86_arch
= processor_alias_table
[i
].processor
;
3485 /* Default cpu tuning to the architecture. */
3486 ix86_tune
= ix86_arch
;
3488 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3489 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3490 error ("CPU you selected does not support x86-64 "
3493 if (processor_alias_table
[i
].flags
& PTA_MMX
3494 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3495 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3496 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3497 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3498 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3499 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3500 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3501 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3502 if (processor_alias_table
[i
].flags
& PTA_SSE
3503 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3504 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3505 if (processor_alias_table
[i
].flags
& PTA_SSE2
3506 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3507 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3508 if (processor_alias_table
[i
].flags
& PTA_SSE3
3509 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3510 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3511 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3512 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3513 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3514 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3515 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3516 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3517 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3518 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3519 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3520 if (processor_alias_table
[i
].flags
& PTA_AVX
3521 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3522 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3523 if (processor_alias_table
[i
].flags
& PTA_AVX2
3524 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3525 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3526 if (processor_alias_table
[i
].flags
& PTA_FMA
3527 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3528 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3529 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3530 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3531 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3532 if (processor_alias_table
[i
].flags
& PTA_FMA4
3533 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3534 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3535 if (processor_alias_table
[i
].flags
& PTA_XOP
3536 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3537 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3538 if (processor_alias_table
[i
].flags
& PTA_LWP
3539 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3540 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3541 if (processor_alias_table
[i
].flags
& PTA_ABM
3542 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3543 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3544 if (processor_alias_table
[i
].flags
& PTA_BMI
3545 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3546 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3547 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3548 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3549 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3550 if (processor_alias_table
[i
].flags
& PTA_TBM
3551 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3552 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3553 if (processor_alias_table
[i
].flags
& PTA_BMI2
3554 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3555 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3556 if (processor_alias_table
[i
].flags
& PTA_CX16
3557 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3558 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3559 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3560 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3561 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3562 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3563 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3564 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3565 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3566 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3567 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3568 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3569 if (processor_alias_table
[i
].flags
& PTA_AES
3570 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3571 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3572 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3573 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3574 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3575 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3576 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3577 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3578 if (processor_alias_table
[i
].flags
& PTA_RDRND
3579 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3580 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3581 if (processor_alias_table
[i
].flags
& PTA_F16C
3582 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3583 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3584 if (processor_alias_table
[i
].flags
& PTA_RTM
3585 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3586 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3587 if (processor_alias_table
[i
].flags
& PTA_HLE
3588 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3589 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3590 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3591 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3592 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3593 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3594 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3595 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3596 if (processor_alias_table
[i
].flags
& PTA_ADX
3597 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3598 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3599 if (processor_alias_table
[i
].flags
& PTA_FXSR
3600 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3601 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3602 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3603 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3604 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3605 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3606 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3607 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3608 if (processor_alias_table
[i
].flags
& PTA_AVX512F
3609 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
3610 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
3611 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
3612 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
3613 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
3614 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
3615 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
3616 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
3617 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
3618 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
3619 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
3620 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3621 x86_prefetch_sse
= true;
3626 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
3627 error ("generic CPU can be used only for %stune=%s %s",
3628 prefix
, suffix
, sw
);
3629 else if (!strncmp (opts
->x_ix86_arch_string
, "generic", 7) || i
== pta_size
)
3630 error ("bad value (%s) for %sarch=%s %s",
3631 opts
->x_ix86_arch_string
, prefix
, suffix
, sw
);
3633 ix86_arch_mask
= 1u << ix86_arch
;
3634 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3635 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3637 for (i
= 0; i
< pta_size
; i
++)
3638 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
3640 ix86_schedule
= processor_alias_table
[i
].schedule
;
3641 ix86_tune
= processor_alias_table
[i
].processor
;
3642 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3644 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3646 if (ix86_tune_defaulted
)
3648 opts
->x_ix86_tune_string
= "x86-64";
3649 for (i
= 0; i
< pta_size
; i
++)
3650 if (! strcmp (opts
->x_ix86_tune_string
,
3651 processor_alias_table
[i
].name
))
3653 ix86_schedule
= processor_alias_table
[i
].schedule
;
3654 ix86_tune
= processor_alias_table
[i
].processor
;
3657 error ("CPU you selected does not support x86-64 "
3661 /* Intel CPUs have always interpreted SSE prefetch instructions as
3662 NOPs; so, we can enable SSE prefetch instructions even when
3663 -mtune (rather than -march) points us to a processor that has them.
3664 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3665 higher processors. */
3667 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3668 x86_prefetch_sse
= true;
3672 if (ix86_tune_specified
&& i
== pta_size
)
3673 error ("bad value (%s) for %stune=%s %s",
3674 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3676 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
3678 #ifndef USE_IX86_FRAME_POINTER
3679 #define USE_IX86_FRAME_POINTER 0
3682 #ifndef USE_X86_64_FRAME_POINTER
3683 #define USE_X86_64_FRAME_POINTER 0
3686 /* Set the default values for switches whose default depends on TARGET_64BIT
3687 in case they weren't overwritten by command line options. */
3688 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3690 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3691 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3692 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3693 opts
->x_flag_unwind_tables
3694 = opts
->x_flag_asynchronous_unwind_tables
= 1;
3695 if (opts
->x_flag_pcc_struct_return
== 2)
3696 opts
->x_flag_pcc_struct_return
= 0;
3700 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3701 opts
->x_flag_omit_frame_pointer
3702 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
3703 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3704 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3705 if (opts
->x_flag_pcc_struct_return
== 2)
3706 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3709 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3710 if (opts
->x_optimize_size
)
3711 ix86_cost
= &ix86_size_cost
;
3713 ix86_cost
= ix86_tune_cost
;
3715 /* Arrange to set up i386_stack_locals for all functions. */
3716 init_machine_status
= ix86_init_machine_status
;
3718 /* Validate -mregparm= value. */
3719 if (opts_set
->x_ix86_regparm
)
3721 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3722 warning (0, "-mregparm is ignored in 64-bit mode");
3723 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
3725 error ("-mregparm=%d is not between 0 and %d",
3726 opts
->x_ix86_regparm
, REGPARM_MAX
);
3727 opts
->x_ix86_regparm
= 0;
3730 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3731 opts
->x_ix86_regparm
= REGPARM_MAX
;
3733 /* Default align_* from the processor table. */
3734 if (opts
->x_align_loops
== 0)
3736 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3737 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3739 if (opts
->x_align_jumps
== 0)
3741 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3742 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3744 if (opts
->x_align_functions
== 0)
3746 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
3749 /* Provide default for -mbranch-cost= value. */
3750 if (!opts_set
->x_ix86_branch_cost
)
3751 opts
->x_ix86_branch_cost
= ix86_cost
->branch_cost
;
3753 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3755 opts
->x_target_flags
3756 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
3758 /* Enable by default the SSE and MMX builtins. Do allow the user to
3759 explicitly disable any of these. In particular, disabling SSE and
3760 MMX for kernel code is extremely useful. */
3761 if (!ix86_arch_specified
)
3762 opts
->x_ix86_isa_flags
3763 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3764 | TARGET_SUBTARGET64_ISA_DEFAULT
)
3765 & ~opts
->x_ix86_isa_flags_explicit
);
3767 if (TARGET_RTD_P (opts
->x_target_flags
))
3768 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3772 opts
->x_target_flags
3773 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
3775 if (!ix86_arch_specified
)
3776 opts
->x_ix86_isa_flags
3777 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
3779 /* i386 ABI does not specify red zone. It still makes sense to use it
3780 when programmer takes care to stack from being destroyed. */
3781 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
3782 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
3785 /* Keep nonleaf frame pointers. */
3786 if (opts
->x_flag_omit_frame_pointer
)
3787 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3788 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
3789 opts
->x_flag_omit_frame_pointer
= 1;
3791 /* If we're doing fast math, we don't care about comparison order
3792 wrt NaNs. This lets us use a shorter comparison sequence. */
3793 if (opts
->x_flag_finite_math_only
)
3794 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
3796 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3797 since the insns won't need emulation. */
3798 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
3799 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3801 /* Likewise, if the target doesn't have a 387, or we've specified
3802 software floating point, don't use 387 inline intrinsics. */
3803 if (!TARGET_80387_P (opts
->x_target_flags
))
3804 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
3806 /* Turn on MMX builtins for -msse. */
3807 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3808 opts
->x_ix86_isa_flags
3809 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
3811 /* Enable SSE prefetch. */
3812 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
3813 || (TARGET_PRFCHW
&& !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
)))
3814 x86_prefetch_sse
= true;
3816 /* Enable prefetch{,w} instructions for -m3dnow. */
3817 if (TARGET_3DNOW_P (opts
->x_ix86_isa_flags
))
3818 opts
->x_ix86_isa_flags
3819 |= OPTION_MASK_ISA_PRFCHW
& ~opts
->x_ix86_isa_flags_explicit
;
3821 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3822 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
3823 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
3824 opts
->x_ix86_isa_flags
3825 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3827 /* Enable lzcnt instruction for -mabm. */
3828 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
3829 opts
->x_ix86_isa_flags
3830 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3832 /* Validate -mpreferred-stack-boundary= value or default it to
3833 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3834 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3835 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
3837 int min
= (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3838 ? (TARGET_SSE_P (opts
->x_ix86_isa_flags
) ? 4 : 3) : 2);
3839 int max
= (TARGET_SEH
? 4 : 12);
3841 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
3842 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
3845 error ("-mpreferred-stack-boundary is not supported "
3848 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3849 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
3852 ix86_preferred_stack_boundary
3853 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3856 /* Set the default value for -mstackrealign. */
3857 if (opts
->x_ix86_force_align_arg_pointer
== -1)
3858 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3860 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3862 /* Validate -mincoming-stack-boundary= value or default it to
3863 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3864 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3865 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
3867 if (ix86_incoming_stack_boundary_arg
3868 < (TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2)
3869 || ix86_incoming_stack_boundary_arg
> 12)
3870 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3871 ix86_incoming_stack_boundary_arg
,
3872 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2);
3875 ix86_user_incoming_stack_boundary
3876 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3877 ix86_incoming_stack_boundary
3878 = ix86_user_incoming_stack_boundary
;
3882 /* Accept -msseregparm only if at least SSE support is enabled. */
3883 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
3884 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3885 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3887 if (opts_set
->x_ix86_fpmath
)
3889 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
3891 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3893 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3894 opts
->x_ix86_fpmath
= FPMATH_387
;
3896 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
3897 && !TARGET_80387_P (opts
->x_target_flags
))
3899 warning (0, "387 instruction set disabled, using SSE arithmetics");
3900 opts
->x_ix86_fpmath
= FPMATH_SSE
;
3904 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3905 fpmath=387. The second is however default at many targets since the
3906 extra 80bit precision of temporaries is considered to be part of ABI.
3907 Overwrite the default at least for -ffast-math.
3908 TODO: -mfpmath=both seems to produce same performing code with bit
3909 smaller binaries. It is however not clear if register allocation is
3910 ready for this setting.
3911 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3912 codegen. We may switch to 387 with -ffast-math for size optimized
3914 else if (fast_math_flags_set_p (&global_options
)
3915 && TARGET_SSE2_P (opts
->x_ix86_isa_flags
))
3916 ix86_fpmath
= FPMATH_SSE
;
3918 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
3920 /* If the i387 is disabled, then do not return values in it. */
3921 if (!TARGET_80387_P (opts
->x_target_flags
))
3922 opts
->x_target_flags
&= ~MASK_FLOAT_RETURNS
;
3924 /* Use external vectorized library in vectorizing intrinsics. */
3925 if (opts_set
->x_ix86_veclibabi_type
)
3926 switch (opts
->x_ix86_veclibabi_type
)
3928 case ix86_veclibabi_type_svml
:
3929 ix86_veclib_handler
= ix86_veclibabi_svml
;
3932 case ix86_veclibabi_type_acml
:
3933 ix86_veclib_handler
= ix86_veclibabi_acml
;
3940 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
3941 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3942 && !opts
->x_optimize_size
)
3943 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3945 /* If stack probes are required, the space used for large function
3946 arguments on the stack must also be probed, so enable
3947 -maccumulate-outgoing-args so this happens in the prologue. */
3948 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
3949 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3951 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3952 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3953 "for correctness", prefix
, suffix
);
3954 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3957 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3960 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3961 p
= strchr (internal_label_prefix
, 'X');
3962 internal_label_prefix_len
= p
- internal_label_prefix
;
3966 /* When scheduling description is not available, disable scheduler pass
3967 so it won't slow down the compilation and make x87 code slower. */
3968 if (!TARGET_SCHEDULE
)
3969 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
3971 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3972 ix86_tune_cost
->simultaneous_prefetches
,
3973 opts
->x_param_values
,
3974 opts_set
->x_param_values
);
3975 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3976 ix86_tune_cost
->prefetch_block
,
3977 opts
->x_param_values
,
3978 opts_set
->x_param_values
);
3979 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3980 ix86_tune_cost
->l1_cache_size
,
3981 opts
->x_param_values
,
3982 opts_set
->x_param_values
);
3983 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3984 ix86_tune_cost
->l2_cache_size
,
3985 opts
->x_param_values
,
3986 opts_set
->x_param_values
);
3988 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3989 if (opts
->x_flag_prefetch_loop_arrays
< 0
3991 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
3992 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3993 opts
->x_flag_prefetch_loop_arrays
= 1;
3995 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3996 can be opts->x_optimized to ap = __builtin_next_arg (0). */
3997 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
3998 targetm
.expand_builtin_va_start
= NULL
;
4000 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4002 ix86_gen_leave
= gen_leave_rex64
;
4003 if (Pmode
== DImode
)
4005 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
4006 ix86_gen_tls_local_dynamic_base_64
4007 = gen_tls_local_dynamic_base_64_di
;
4011 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
4012 ix86_gen_tls_local_dynamic_base_64
4013 = gen_tls_local_dynamic_base_64_si
;
4017 ix86_gen_leave
= gen_leave
;
4019 if (Pmode
== DImode
)
4021 ix86_gen_add3
= gen_adddi3
;
4022 ix86_gen_sub3
= gen_subdi3
;
4023 ix86_gen_sub3_carry
= gen_subdi3_carry
;
4024 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
4025 ix86_gen_andsp
= gen_anddi3
;
4026 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
4027 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
4028 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
4029 ix86_gen_monitor
= gen_sse3_monitor_di
;
4033 ix86_gen_add3
= gen_addsi3
;
4034 ix86_gen_sub3
= gen_subsi3
;
4035 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4036 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4037 ix86_gen_andsp
= gen_andsi3
;
4038 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4039 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4040 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4041 ix86_gen_monitor
= gen_sse3_monitor_si
;
4045 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4046 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4047 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
4050 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
)
4052 if (opts
->x_flag_fentry
> 0)
4053 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4055 opts
->x_flag_fentry
= 0;
4057 else if (TARGET_SEH
)
4059 if (opts
->x_flag_fentry
== 0)
4060 sorry ("-mno-fentry isn%'t compatible with SEH");
4061 opts
->x_flag_fentry
= 1;
4063 else if (opts
->x_flag_fentry
< 0)
4065 #if defined(PROFILE_BEFORE_PROLOGUE)
4066 opts
->x_flag_fentry
= 1;
4068 opts
->x_flag_fentry
= 0;
4072 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4073 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4074 AVX unaligned load/store. */
4075 if (!opts
->x_optimize_size
)
4077 if (flag_expensive_optimizations
4078 && !(opts_set
->x_target_flags
& MASK_VZEROUPPER
))
4079 opts
->x_target_flags
|= MASK_VZEROUPPER
;
4080 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL
]
4081 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4082 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4083 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL
]
4084 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4085 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4086 /* Enable 128-bit AVX instruction generation
4087 for the auto-vectorizer. */
4088 if (TARGET_AVX128_OPTIMAL
4089 && !(opts_set
->x_target_flags
& MASK_PREFER_AVX128
))
4090 opts
->x_target_flags
|= MASK_PREFER_AVX128
;
4093 if (opts
->x_ix86_recip_name
)
4095 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
4097 unsigned int mask
, i
;
4100 while ((q
= strtok (p
, ",")) != NULL
)
4111 if (!strcmp (q
, "default"))
4112 mask
= RECIP_MASK_ALL
;
4115 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4116 if (!strcmp (q
, recip_options
[i
].string
))
4118 mask
= recip_options
[i
].mask
;
4122 if (i
== ARRAY_SIZE (recip_options
))
4124 error ("unknown option for -mrecip=%s", q
);
4126 mask
= RECIP_MASK_NONE
;
4130 opts
->x_recip_mask_explicit
|= mask
;
4132 opts
->x_recip_mask
&= ~mask
;
4134 opts
->x_recip_mask
|= mask
;
4138 if (TARGET_RECIP_P (opts
->x_target_flags
))
4139 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
4140 else if (opts_set
->x_target_flags
& MASK_RECIP
)
4141 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
4143 /* Default long double to 64-bit for Bionic. */
4144 if (TARGET_HAS_BIONIC
4145 && !(opts_set
->x_target_flags
& MASK_LONG_DOUBLE_64
))
4146 opts
->x_target_flags
|= MASK_LONG_DOUBLE_64
;
4148 /* Save the initial options in case the user does function specific
4151 target_option_default_node
= target_option_current_node
4152 = build_target_option_node (opts
);
4154 /* Handle stack protector */
4155 if (!opts_set
->x_ix86_stack_protector_guard
)
4156 opts
->x_ix86_stack_protector_guard
4157 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4159 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4160 if (opts
->x_ix86_tune_memcpy_strategy
)
4162 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
4163 ix86_parse_stringop_strategy_string (str
, false);
4167 if (opts
->x_ix86_tune_memset_strategy
)
4169 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
4170 ix86_parse_stringop_strategy_string (str
, true);
4175 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4178 ix86_option_override (void)
4180 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4181 static struct register_pass_info insert_vzeroupper_info
4182 = { pass_insert_vzeroupper
, "reload",
4183 1, PASS_POS_INSERT_AFTER
4186 ix86_option_override_internal (true, &global_options
, &global_options_set
);
4189 /* This needs to be done at start up. It's convenient to do it here. */
4190 register_pass (&insert_vzeroupper_info
);
4193 /* Update register usage after having seen the compiler flags. */
4196 ix86_conditional_register_usage (void)
4201 /* The PIC register, if it exists, is fixed. */
4202 j
= PIC_OFFSET_TABLE_REGNUM
;
4203 if (j
!= INVALID_REGNUM
)
4204 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4206 /* For 32-bit targets, squash the REX registers. */
4209 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4210 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4211 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4212 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4213 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4214 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4217 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4218 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4219 : TARGET_64BIT
? (1 << 2)
4222 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4224 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4226 /* Set/reset conditionally defined registers from
4227 CALL_USED_REGISTERS initializer. */
4228 if (call_used_regs
[i
] > 1)
4229 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4231 /* Calculate registers of CLOBBERED_REGS register set
4232 as call used registers from GENERAL_REGS register set. */
4233 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4234 && call_used_regs
[i
])
4235 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4238 /* If MMX is disabled, squash the registers. */
4240 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4241 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4242 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4244 /* If SSE is disabled, squash the registers. */
4246 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4247 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4248 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4250 /* If the FPU is disabled, squash the registers. */
4251 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4252 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4253 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4254 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4256 /* If AVX512F is disabled, squash the registers. */
4257 if (! TARGET_AVX512F
)
4259 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4260 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4262 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4263 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4266 /* If MPX is disabled, squash the registers. */
4268 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
4269 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4273 /* Save the current options */
4276 ix86_function_specific_save (struct cl_target_option
*ptr
,
4277 struct gcc_options
*opts
)
4279 ptr
->arch
= ix86_arch
;
4280 ptr
->schedule
= ix86_schedule
;
4281 ptr
->tune
= ix86_tune
;
4282 ptr
->branch_cost
= ix86_branch_cost
;
4283 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4284 ptr
->arch_specified
= ix86_arch_specified
;
4285 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
4286 ptr
->x_ix86_target_flags_explicit
= opts
->x_ix86_target_flags_explicit
;
4287 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
4289 /* The fields are char but the variables are not; make sure the
4290 values fit in the fields. */
4291 gcc_assert (ptr
->arch
== ix86_arch
);
4292 gcc_assert (ptr
->schedule
== ix86_schedule
);
4293 gcc_assert (ptr
->tune
== ix86_tune
);
4294 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4297 /* Restore the current options */
4300 ix86_function_specific_restore (struct gcc_options
*opts
,
4301 struct cl_target_option
*ptr
)
4303 enum processor_type old_tune
= ix86_tune
;
4304 enum processor_type old_arch
= ix86_arch
;
4305 unsigned int ix86_arch_mask
;
4308 ix86_arch
= (enum processor_type
) ptr
->arch
;
4309 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4310 ix86_tune
= (enum processor_type
) ptr
->tune
;
4311 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
4312 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4313 ix86_arch_specified
= ptr
->arch_specified
;
4314 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4315 opts
->x_ix86_target_flags_explicit
= ptr
->x_ix86_target_flags_explicit
;
4316 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4318 /* Recreate the arch feature tests if the arch changed */
4319 if (old_arch
!= ix86_arch
)
4321 ix86_arch_mask
= 1u << ix86_arch
;
4322 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4323 ix86_arch_features
[i
]
4324 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4327 /* Recreate the tune optimization tests */
4328 if (old_tune
!= ix86_tune
)
4329 set_ix86_tune_features (ix86_tune
, false);
4332 /* Print the current options */
4335 ix86_function_specific_print (FILE *file
, int indent
,
4336 struct cl_target_option
*ptr
)
4339 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4340 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4342 fprintf (file
, "%*sarch = %d (%s)\n",
4345 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4346 ? cpu_names
[ptr
->arch
]
4349 fprintf (file
, "%*stune = %d (%s)\n",
4352 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4353 ? cpu_names
[ptr
->tune
]
4356 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4360 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4361 free (target_string
);
4366 /* Inner function to process the attribute((target(...))), take an argument and
4367 set the current options from the argument. If we have a list, recursively go
4371 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4372 struct gcc_options
*opts
,
4373 struct gcc_options
*opts_set
,
4374 struct gcc_options
*enum_opts_set
)
4379 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4380 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4381 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4382 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4383 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4399 enum ix86_opt_type type
;
4404 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4405 IX86_ATTR_ISA ("abm", OPT_mabm
),
4406 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4407 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4408 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4409 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4410 IX86_ATTR_ISA ("aes", OPT_maes
),
4411 IX86_ATTR_ISA ("avx", OPT_mavx
),
4412 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4413 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
4414 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
4415 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
4416 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
4417 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4418 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4419 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4420 IX86_ATTR_ISA ("sse", OPT_msse
),
4421 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4422 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4423 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4424 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4425 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4426 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4427 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4428 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4429 IX86_ATTR_ISA ("fma", OPT_mfma
),
4430 IX86_ATTR_ISA ("xop", OPT_mxop
),
4431 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4432 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4433 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4434 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4435 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4436 IX86_ATTR_ISA ("hle", OPT_mhle
),
4437 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4438 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4439 IX86_ATTR_ISA ("adx", OPT_madx
),
4440 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4441 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4442 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4445 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4447 /* string options */
4448 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4449 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4452 IX86_ATTR_YES ("cld",
4456 IX86_ATTR_NO ("fancy-math-387",
4457 OPT_mfancy_math_387
,
4458 MASK_NO_FANCY_MATH_387
),
4460 IX86_ATTR_YES ("ieee-fp",
4464 IX86_ATTR_YES ("inline-all-stringops",
4465 OPT_minline_all_stringops
,
4466 MASK_INLINE_ALL_STRINGOPS
),
4468 IX86_ATTR_YES ("inline-stringops-dynamically",
4469 OPT_minline_stringops_dynamically
,
4470 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4472 IX86_ATTR_NO ("align-stringops",
4473 OPT_mno_align_stringops
,
4474 MASK_NO_ALIGN_STRINGOPS
),
4476 IX86_ATTR_YES ("recip",
4482 /* If this is a list, recurse to get the options. */
4483 if (TREE_CODE (args
) == TREE_LIST
)
4487 for (; args
; args
= TREE_CHAIN (args
))
4488 if (TREE_VALUE (args
)
4489 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4490 p_strings
, opts
, opts_set
,
4497 else if (TREE_CODE (args
) != STRING_CST
)
4499 error ("attribute %<target%> argument not a string");
4503 /* Handle multiple arguments separated by commas. */
4504 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4506 while (next_optstr
&& *next_optstr
!= '\0')
4508 char *p
= next_optstr
;
4510 char *comma
= strchr (next_optstr
, ',');
4511 const char *opt_string
;
4512 size_t len
, opt_len
;
4517 enum ix86_opt_type type
= ix86_opt_unknown
;
4523 len
= comma
- next_optstr
;
4524 next_optstr
= comma
+ 1;
4532 /* Recognize no-xxx. */
4533 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4542 /* Find the option. */
4545 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4547 type
= attrs
[i
].type
;
4548 opt_len
= attrs
[i
].len
;
4549 if (ch
== attrs
[i
].string
[0]
4550 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4553 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4556 mask
= attrs
[i
].mask
;
4557 opt_string
= attrs
[i
].string
;
4562 /* Process the option. */
4565 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4569 else if (type
== ix86_opt_isa
)
4571 struct cl_decoded_option decoded
;
4573 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4574 ix86_handle_option (opts
, opts_set
,
4575 &decoded
, input_location
);
4578 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4580 if (type
== ix86_opt_no
)
4581 opt_set_p
= !opt_set_p
;
4584 opts
->x_target_flags
|= mask
;
4586 opts
->x_target_flags
&= ~mask
;
4589 else if (type
== ix86_opt_str
)
4593 error ("option(\"%s\") was already specified", opt_string
);
4597 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4600 else if (type
== ix86_opt_enum
)
4605 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4607 set_option (opts
, enum_opts_set
, opt
, value
,
4608 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4612 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4624 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4627 ix86_valid_target_attribute_tree (tree args
,
4628 struct gcc_options
*opts
,
4629 struct gcc_options
*opts_set
)
4631 const char *orig_arch_string
= ix86_arch_string
;
4632 const char *orig_tune_string
= ix86_tune_string
;
4633 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
4634 int orig_tune_defaulted
= ix86_tune_defaulted
;
4635 int orig_arch_specified
= ix86_arch_specified
;
4636 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4639 struct cl_target_option
*def
4640 = TREE_TARGET_OPTION (target_option_default_node
);
4641 struct gcc_options enum_opts_set
;
4643 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4645 /* Process each of the options on the chain. */
4646 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
4647 opts_set
, &enum_opts_set
))
4648 return error_mark_node
;
4650 /* If the changed options are different from the default, rerun
4651 ix86_option_override_internal, and then save the options away.
4652 The string options are are attribute options, and will be undone
4653 when we copy the save structure. */
4654 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
4655 || opts
->x_target_flags
!= def
->x_target_flags
4656 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4657 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4658 || enum_opts_set
.x_ix86_fpmath
)
4660 /* If we are using the default tune= or arch=, undo the string assigned,
4661 and use the default. */
4662 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4663 opts
->x_ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4664 else if (!orig_arch_specified
)
4665 opts
->x_ix86_arch_string
= NULL
;
4667 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4668 opts
->x_ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4669 else if (orig_tune_defaulted
)
4670 opts
->x_ix86_tune_string
= NULL
;
4672 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4673 if (enum_opts_set
.x_ix86_fpmath
)
4674 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4675 else if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4676 && TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4678 opts
->x_ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4679 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4682 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4683 ix86_option_override_internal (false, opts
, opts_set
);
4685 /* Add any builtin functions with the new isa if any. */
4686 ix86_add_new_builtins (opts
->x_ix86_isa_flags
);
4688 /* Save the current options unless we are validating options for
4690 t
= build_target_option_node (opts
);
4692 opts
->x_ix86_arch_string
= orig_arch_string
;
4693 opts
->x_ix86_tune_string
= orig_tune_string
;
4694 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
4696 /* Free up memory allocated to hold the strings */
4697 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4698 free (option_strings
[i
]);
4704 /* Hook to validate attribute((target("string"))). */
4707 ix86_valid_target_attribute_p (tree fndecl
,
4708 tree
ARG_UNUSED (name
),
4710 int ARG_UNUSED (flags
))
4712 struct gcc_options func_options
;
4713 tree new_target
, new_optimize
;
4716 /* attribute((target("default"))) does nothing, beyond
4717 affecting multi-versioning. */
4718 if (TREE_VALUE (args
)
4719 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4720 && TREE_CHAIN (args
) == NULL_TREE
4721 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4724 tree old_optimize
= build_optimization_node (&global_options
);
4726 /* Get the optimization options of the current function. */
4727 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4730 func_optimize
= old_optimize
;
4732 /* Init func_options. */
4733 memset (&func_options
, 0, sizeof (func_options
));
4734 init_options_struct (&func_options
, NULL
);
4735 lang_hooks
.init_options_struct (&func_options
);
4737 cl_optimization_restore (&func_options
,
4738 TREE_OPTIMIZATION (func_optimize
));
4740 /* Initialize func_options to the default before its target options can
4742 cl_target_option_restore (&func_options
,
4743 TREE_TARGET_OPTION (target_option_default_node
));
4745 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
4746 &global_options_set
);
4748 new_optimize
= build_optimization_node (&func_options
);
4750 if (new_target
== error_mark_node
)
4753 else if (fndecl
&& new_target
)
4755 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4757 if (old_optimize
!= new_optimize
)
4758 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4765 /* Hook to determine if one function can safely inline another. */
4768 ix86_can_inline_p (tree caller
, tree callee
)
4771 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4772 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4774 /* If callee has no option attributes, then it is ok to inline. */
4778 /* If caller has no option attributes, but callee does then it is not ok to
4780 else if (!caller_tree
)
4785 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4786 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4788 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4789 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4791 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4792 != callee_opts
->x_ix86_isa_flags
)
4795 /* See if we have the same non-isa options. */
4796 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4799 /* See if arch, tune, etc. are the same. */
4800 else if (caller_opts
->arch
!= callee_opts
->arch
)
4803 else if (caller_opts
->tune
!= callee_opts
->tune
)
4806 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4809 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4820 /* Remember the last target of ix86_set_current_function. */
4821 static GTY(()) tree ix86_previous_fndecl
;
4823 /* Invalidate ix86_previous_fndecl cache. */
4825 ix86_reset_previous_fndecl (void)
4827 ix86_previous_fndecl
= NULL_TREE
;
4830 /* Establish appropriate back-end context for processing the function
4831 FNDECL. The argument might be NULL to indicate processing at top
4832 level, outside of any function scope. */
4834 ix86_set_current_function (tree fndecl
)
4836 /* Only change the context if the function changes. This hook is called
4837 several times in the course of compiling a function, and we don't want to
4838 slow things down too much or call target_reinit when it isn't safe. */
4839 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4841 tree old_tree
= (ix86_previous_fndecl
4842 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4845 tree new_tree
= (fndecl
4846 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4849 ix86_previous_fndecl
= fndecl
;
4850 if (old_tree
== new_tree
)
4855 cl_target_option_restore (&global_options
,
4856 TREE_TARGET_OPTION (new_tree
));
4862 struct cl_target_option
*def
4863 = TREE_TARGET_OPTION (target_option_current_node
);
4865 cl_target_option_restore (&global_options
, def
);
4872 /* Return true if this goes in large data/bss. */
4875 ix86_in_large_data_p (tree exp
)
4877 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4880 /* Functions are never large data. */
4881 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4884 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4886 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4887 if (strcmp (section
, ".ldata") == 0
4888 || strcmp (section
, ".lbss") == 0)
4894 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4896 /* If this is an incomplete type with size 0, then we can't put it
4897 in data because it might be too big when completed. */
4898 if (!size
|| size
> ix86_section_threshold
)
4905 /* Switch to the appropriate section for output of DECL.
4906 DECL is either a `VAR_DECL' node or a constant of some sort.
4907 RELOC indicates whether forming the initial value of DECL requires
4908 link-time relocations. */
4910 ATTRIBUTE_UNUSED
static section
*
4911 x86_64_elf_select_section (tree decl
, int reloc
,
4912 unsigned HOST_WIDE_INT align
)
4914 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4915 && ix86_in_large_data_p (decl
))
4917 const char *sname
= NULL
;
4918 unsigned int flags
= SECTION_WRITE
;
4919 switch (categorize_decl_for_section (decl
, reloc
))
4924 case SECCAT_DATA_REL
:
4925 sname
= ".ldata.rel";
4927 case SECCAT_DATA_REL_LOCAL
:
4928 sname
= ".ldata.rel.local";
4930 case SECCAT_DATA_REL_RO
:
4931 sname
= ".ldata.rel.ro";
4933 case SECCAT_DATA_REL_RO_LOCAL
:
4934 sname
= ".ldata.rel.ro.local";
4938 flags
|= SECTION_BSS
;
4941 case SECCAT_RODATA_MERGE_STR
:
4942 case SECCAT_RODATA_MERGE_STR_INIT
:
4943 case SECCAT_RODATA_MERGE_CONST
:
4947 case SECCAT_SRODATA
:
4954 /* We don't split these for medium model. Place them into
4955 default sections and hope for best. */
4960 /* We might get called with string constants, but get_named_section
4961 doesn't like them as they are not DECLs. Also, we need to set
4962 flags in that case. */
4964 return get_section (sname
, flags
, NULL
);
4965 return get_named_section (decl
, sname
, reloc
);
4968 return default_elf_select_section (decl
, reloc
, align
);
4971 /* Select a set of attributes for section NAME based on the properties
4972 of DECL and whether or not RELOC indicates that DECL's initializer
4973 might contain runtime relocations. */
4975 static unsigned int ATTRIBUTE_UNUSED
4976 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
4978 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
4980 if (decl
== NULL_TREE
4981 && (strcmp (name
, ".ldata.rel.ro") == 0
4982 || strcmp (name
, ".ldata.rel.ro.local") == 0))
4983 flags
|= SECTION_RELRO
;
4985 if (strcmp (name
, ".lbss") == 0
4986 || strncmp (name
, ".lbss.", 5) == 0
4987 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
4988 flags
|= SECTION_BSS
;
4993 /* Build up a unique section name, expressed as a
4994 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4995 RELOC indicates whether the initial value of EXP requires
4996 link-time relocations. */
4998 static void ATTRIBUTE_UNUSED
4999 x86_64_elf_unique_section (tree decl
, int reloc
)
5001 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5002 && ix86_in_large_data_p (decl
))
5004 const char *prefix
= NULL
;
5005 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5006 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
5008 switch (categorize_decl_for_section (decl
, reloc
))
5011 case SECCAT_DATA_REL
:
5012 case SECCAT_DATA_REL_LOCAL
:
5013 case SECCAT_DATA_REL_RO
:
5014 case SECCAT_DATA_REL_RO_LOCAL
:
5015 prefix
= one_only
? ".ld" : ".ldata";
5018 prefix
= one_only
? ".lb" : ".lbss";
5021 case SECCAT_RODATA_MERGE_STR
:
5022 case SECCAT_RODATA_MERGE_STR_INIT
:
5023 case SECCAT_RODATA_MERGE_CONST
:
5024 prefix
= one_only
? ".lr" : ".lrodata";
5026 case SECCAT_SRODATA
:
5033 /* We don't split these for medium model. Place them into
5034 default sections and hope for best. */
5039 const char *name
, *linkonce
;
5042 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
5043 name
= targetm
.strip_name_encoding (name
);
5045 /* If we're using one_only, then there needs to be a .gnu.linkonce
5046 prefix to the section name. */
5047 linkonce
= one_only
? ".gnu.linkonce" : "";
5049 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
5051 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
5055 default_unique_section (decl
, reloc
);
5058 #ifdef COMMON_ASM_OP
5059 /* This says how to output assembler code to declare an
5060 uninitialized external linkage data object.
5062 For medium model x86-64 we need to use .largecomm opcode for
5065 x86_elf_aligned_common (FILE *file
,
5066 const char *name
, unsigned HOST_WIDE_INT size
,
5069 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5070 && size
> (unsigned int)ix86_section_threshold
)
5071 fputs (".largecomm\t", file
);
5073 fputs (COMMON_ASM_OP
, file
);
5074 assemble_name (file
, name
);
5075 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
5076 size
, align
/ BITS_PER_UNIT
);
5080 /* Utility function for targets to use in implementing
5081 ASM_OUTPUT_ALIGNED_BSS. */
5084 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
5085 const char *name
, unsigned HOST_WIDE_INT size
,
5088 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5089 && size
> (unsigned int)ix86_section_threshold
)
5090 switch_to_section (get_named_section (decl
, ".lbss", 0));
5092 switch_to_section (bss_section
);
5093 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
5094 #ifdef ASM_DECLARE_OBJECT_NAME
5095 last_assemble_variable_decl
= decl
;
5096 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
5098 /* Standard thing is just output label for the object. */
5099 ASM_OUTPUT_LABEL (file
, name
);
5100 #endif /* ASM_DECLARE_OBJECT_NAME */
5101 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
5104 /* Decide whether we must probe the stack before any space allocation
5105 on this target. It's essentially TARGET_STACK_PROBE except when
5106 -fstack-check causes the stack to be already probed differently. */
5109 ix86_target_stack_probe (void)
5111 /* Do not probe the stack twice if static stack checking is enabled. */
5112 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5115 return TARGET_STACK_PROBE
;
5118 /* Decide whether we can make a sibling call to a function. DECL is the
5119 declaration of the function being targeted by the call and EXP is the
5120 CALL_EXPR representing the call. */
5123 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5125 tree type
, decl_or_type
;
5128 /* If we are generating position-independent code, we cannot sibcall
5129 optimize any indirect call, or a direct call to a global function,
5130 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5134 && (!decl
|| !targetm
.binds_local_p (decl
)))
5137 /* If we need to align the outgoing stack, then sibcalling would
5138 unalign the stack, which may break the called function. */
5139 if (ix86_minimum_incoming_stack_boundary (true)
5140 < PREFERRED_STACK_BOUNDARY
)
5145 decl_or_type
= decl
;
5146 type
= TREE_TYPE (decl
);
5150 /* We're looking at the CALL_EXPR, we need the type of the function. */
5151 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5152 type
= TREE_TYPE (type
); /* pointer type */
5153 type
= TREE_TYPE (type
); /* function type */
5154 decl_or_type
= type
;
5157 /* Check that the return value locations are the same. Like
5158 if we are returning floats on the 80387 register stack, we cannot
5159 make a sibcall from a function that doesn't return a float to a
5160 function that does or, conversely, from a function that does return
5161 a float to a function that doesn't; the necessary stack adjustment
5162 would not be executed. This is also the place we notice
5163 differences in the return value ABI. Note that it is ok for one
5164 of the functions to have void return type as long as the return
5165 value of the other is passed in a register. */
5166 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5167 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5169 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5171 if (!rtx_equal_p (a
, b
))
5174 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5176 else if (!rtx_equal_p (a
, b
))
5181 /* The SYSV ABI has more call-clobbered registers;
5182 disallow sibcalls from MS to SYSV. */
5183 if (cfun
->machine
->call_abi
== MS_ABI
5184 && ix86_function_type_abi (type
) == SYSV_ABI
)
5189 /* If this call is indirect, we'll need to be able to use a
5190 call-clobbered register for the address of the target function.
5191 Make sure that all such registers are not used for passing
5192 parameters. Note that DLLIMPORT functions are indirect. */
5194 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5196 if (ix86_function_regparm (type
, NULL
) >= 3)
5198 /* ??? Need to count the actual number of registers to be used,
5199 not the possible number of registers. Fix later. */
5205 /* Otherwise okay. That also includes certain types of indirect calls. */
5209 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5210 and "sseregparm" calling convention attributes;
5211 arguments as in struct attribute_spec.handler. */
5214 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5216 int flags ATTRIBUTE_UNUSED
,
5219 if (TREE_CODE (*node
) != FUNCTION_TYPE
5220 && TREE_CODE (*node
) != METHOD_TYPE
5221 && TREE_CODE (*node
) != FIELD_DECL
5222 && TREE_CODE (*node
) != TYPE_DECL
)
5224 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5226 *no_add_attrs
= true;
5230 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5231 if (is_attribute_p ("regparm", name
))
5235 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5237 error ("fastcall and regparm attributes are not compatible");
5240 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5242 error ("regparam and thiscall attributes are not compatible");
5245 cst
= TREE_VALUE (args
);
5246 if (TREE_CODE (cst
) != INTEGER_CST
)
5248 warning (OPT_Wattributes
,
5249 "%qE attribute requires an integer constant argument",
5251 *no_add_attrs
= true;
5253 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5255 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5257 *no_add_attrs
= true;
5265 /* Do not warn when emulating the MS ABI. */
5266 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5267 && TREE_CODE (*node
) != METHOD_TYPE
)
5268 || ix86_function_type_abi (*node
) != MS_ABI
)
5269 warning (OPT_Wattributes
, "%qE attribute ignored",
5271 *no_add_attrs
= true;
5275 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5276 if (is_attribute_p ("fastcall", name
))
5278 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5280 error ("fastcall and cdecl attributes are not compatible");
5282 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5284 error ("fastcall and stdcall attributes are not compatible");
5286 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5288 error ("fastcall and regparm attributes are not compatible");
5290 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5292 error ("fastcall and thiscall attributes are not compatible");
5296 /* Can combine stdcall with fastcall (redundant), regparm and
5298 else if (is_attribute_p ("stdcall", name
))
5300 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5302 error ("stdcall and cdecl attributes are not compatible");
5304 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5306 error ("stdcall and fastcall attributes are not compatible");
5308 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5310 error ("stdcall and thiscall attributes are not compatible");
5314 /* Can combine cdecl with regparm and sseregparm. */
5315 else if (is_attribute_p ("cdecl", name
))
5317 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5319 error ("stdcall and cdecl attributes are not compatible");
5321 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5323 error ("fastcall and cdecl attributes are not compatible");
5325 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5327 error ("cdecl and thiscall attributes are not compatible");
5330 else if (is_attribute_p ("thiscall", name
))
5332 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5333 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5335 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5337 error ("stdcall and thiscall attributes are not compatible");
5339 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5341 error ("fastcall and thiscall attributes are not compatible");
5343 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5345 error ("cdecl and thiscall attributes are not compatible");
5349 /* Can combine sseregparm with all attributes. */
5354 /* The transactional memory builtins are implicitly regparm or fastcall
5355 depending on the ABI. Override the generic do-nothing attribute that
5356 these builtins were declared with, and replace it with one of the two
5357 attributes that we expect elsewhere. */
5360 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5361 tree args ATTRIBUTE_UNUSED
,
5362 int flags
, bool *no_add_attrs
)
5366 /* In no case do we want to add the placeholder attribute. */
5367 *no_add_attrs
= true;
5369 /* The 64-bit ABI is unchanged for transactional memory. */
5373 /* ??? Is there a better way to validate 32-bit windows? We have
5374 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5375 if (CHECK_STACK_LIMIT
> 0)
5376 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5379 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5380 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5382 decl_attributes (node
, alt
, flags
);
5387 /* This function determines from TYPE the calling-convention. */
5390 ix86_get_callcvt (const_tree type
)
5392 unsigned int ret
= 0;
5397 return IX86_CALLCVT_CDECL
;
5399 attrs
= TYPE_ATTRIBUTES (type
);
5400 if (attrs
!= NULL_TREE
)
5402 if (lookup_attribute ("cdecl", attrs
))
5403 ret
|= IX86_CALLCVT_CDECL
;
5404 else if (lookup_attribute ("stdcall", attrs
))
5405 ret
|= IX86_CALLCVT_STDCALL
;
5406 else if (lookup_attribute ("fastcall", attrs
))
5407 ret
|= IX86_CALLCVT_FASTCALL
;
5408 else if (lookup_attribute ("thiscall", attrs
))
5409 ret
|= IX86_CALLCVT_THISCALL
;
5411 /* Regparam isn't allowed for thiscall and fastcall. */
5412 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5414 if (lookup_attribute ("regparm", attrs
))
5415 ret
|= IX86_CALLCVT_REGPARM
;
5416 if (lookup_attribute ("sseregparm", attrs
))
5417 ret
|= IX86_CALLCVT_SSEREGPARM
;
5420 if (IX86_BASE_CALLCVT(ret
) != 0)
5424 is_stdarg
= stdarg_p (type
);
5425 if (TARGET_RTD
&& !is_stdarg
)
5426 return IX86_CALLCVT_STDCALL
| ret
;
5430 || TREE_CODE (type
) != METHOD_TYPE
5431 || ix86_function_type_abi (type
) != MS_ABI
)
5432 return IX86_CALLCVT_CDECL
| ret
;
5434 return IX86_CALLCVT_THISCALL
;
5437 /* Return 0 if the attributes for two types are incompatible, 1 if they
5438 are compatible, and 2 if they are nearly compatible (which causes a
5439 warning to be generated). */
5442 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5444 unsigned int ccvt1
, ccvt2
;
5446 if (TREE_CODE (type1
) != FUNCTION_TYPE
5447 && TREE_CODE (type1
) != METHOD_TYPE
)
5450 ccvt1
= ix86_get_callcvt (type1
);
5451 ccvt2
= ix86_get_callcvt (type2
);
5454 if (ix86_function_regparm (type1
, NULL
)
5455 != ix86_function_regparm (type2
, NULL
))
5461 /* Return the regparm value for a function with the indicated TYPE and DECL.
5462 DECL may be NULL when calling function indirectly
5463 or considering a libcall. */
5466 ix86_function_regparm (const_tree type
, const_tree decl
)
5473 return (ix86_function_type_abi (type
) == SYSV_ABI
5474 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5475 ccvt
= ix86_get_callcvt (type
);
5476 regparm
= ix86_regparm
;
5478 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5480 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5483 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5487 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5489 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5492 /* Use register calling convention for local functions when possible. */
5494 && TREE_CODE (decl
) == FUNCTION_DECL
5496 && !(profile_flag
&& !flag_fentry
))
5498 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5499 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5500 if (i
&& i
->local
&& i
->can_change_signature
)
5502 int local_regparm
, globals
= 0, regno
;
5504 /* Make sure no regparm register is taken by a
5505 fixed register variable. */
5506 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5507 if (fixed_regs
[local_regparm
])
5510 /* We don't want to use regparm(3) for nested functions as
5511 these use a static chain pointer in the third argument. */
5512 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5515 /* In 32-bit mode save a register for the split stack. */
5516 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5519 /* Each fixed register usage increases register pressure,
5520 so less registers should be used for argument passing.
5521 This functionality can be overriden by an explicit
5523 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5524 if (fixed_regs
[regno
])
5528 = globals
< local_regparm
? local_regparm
- globals
: 0;
5530 if (local_regparm
> regparm
)
5531 regparm
= local_regparm
;
5538 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5539 DFmode (2) arguments in SSE registers for a function with the
5540 indicated TYPE and DECL. DECL may be NULL when calling function
5541 indirectly or considering a libcall. Otherwise return 0. */
5544 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5546 gcc_assert (!TARGET_64BIT
);
5548 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5549 by the sseregparm attribute. */
5550 if (TARGET_SSEREGPARM
5551 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5558 error ("calling %qD with attribute sseregparm without "
5559 "SSE/SSE2 enabled", decl
);
5561 error ("calling %qT with attribute sseregparm without "
5562 "SSE/SSE2 enabled", type
);
5570 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5571 (and DFmode for SSE2) arguments in SSE registers. */
5572 if (decl
&& TARGET_SSE_MATH
&& optimize
5573 && !(profile_flag
&& !flag_fentry
))
5575 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5576 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5577 if (i
&& i
->local
&& i
->can_change_signature
)
5578 return TARGET_SSE2
? 2 : 1;
5584 /* Return true if EAX is live at the start of the function. Used by
5585 ix86_expand_prologue to determine if we need special help before
5586 calling allocate_stack_worker. */
5589 ix86_eax_live_at_start_p (void)
5591 /* Cheat. Don't bother working forward from ix86_function_regparm
5592 to the function type to whether an actual argument is located in
5593 eax. Instead just look at cfg info, which is still close enough
5594 to correct at this point. This gives false positives for broken
5595 functions that might use uninitialized data that happens to be
5596 allocated in eax, but who cares? */
5597 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
5601 ix86_keep_aggregate_return_pointer (tree fntype
)
5607 attr
= lookup_attribute ("callee_pop_aggregate_return",
5608 TYPE_ATTRIBUTES (fntype
));
5610 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5612 /* For 32-bit MS-ABI the default is to keep aggregate
5614 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5617 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5620 /* Value is the number of bytes of arguments automatically
5621 popped when returning from a subroutine call.
5622 FUNDECL is the declaration node of the function (as a tree),
5623 FUNTYPE is the data type of the function (as a tree),
5624 or for a library call it is an identifier node for the subroutine name.
5625 SIZE is the number of bytes of arguments passed on the stack.
5627 On the 80386, the RTD insn may be used to pop them if the number
5628 of args is fixed, but if the number is variable then the caller
5629 must pop them all. RTD can't be used for library calls now
5630 because the library is compiled with the Unix compiler.
5631 Use of RTD is a selectable option, since it is incompatible with
5632 standard Unix calling sequences. If the option is not selected,
5633 the caller must always pop the args.
5635 The attribute stdcall is equivalent to RTD on a per module basis. */
5638 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5642 /* None of the 64-bit ABIs pop arguments. */
5646 ccvt
= ix86_get_callcvt (funtype
);
5648 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5649 | IX86_CALLCVT_THISCALL
)) != 0
5650 && ! stdarg_p (funtype
))
5653 /* Lose any fake structure return argument if it is passed on the stack. */
5654 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5655 && !ix86_keep_aggregate_return_pointer (funtype
))
5657 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5659 return GET_MODE_SIZE (Pmode
);
5665 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5668 ix86_legitimate_combined_insn (rtx insn
)
5670 /* Check operand constraints in case hard registers were propagated
5671 into insn pattern. This check prevents combine pass from
5672 generating insn patterns with invalid hard register operands.
5673 These invalid insns can eventually confuse reload to error out
5674 with a spill failure. See also PRs 46829 and 46843. */
5675 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5679 extract_insn (insn
);
5680 preprocess_constraints ();
5682 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5684 rtx op
= recog_data
.operand
[i
];
5685 enum machine_mode mode
= GET_MODE (op
);
5686 struct operand_alternative
*op_alt
;
5691 /* A unary operator may be accepted by the predicate, but it
5692 is irrelevant for matching constraints. */
5696 if (GET_CODE (op
) == SUBREG
)
5698 if (REG_P (SUBREG_REG (op
))
5699 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5700 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5701 GET_MODE (SUBREG_REG (op
)),
5704 op
= SUBREG_REG (op
);
5707 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5710 op_alt
= recog_op_alt
[i
];
5712 /* Operand has no constraints, anything is OK. */
5713 win
= !recog_data
.n_alternatives
;
5715 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5717 if (op_alt
[j
].anything_ok
5718 || (op_alt
[j
].matches
!= -1
5720 (recog_data
.operand
[i
],
5721 recog_data
.operand
[op_alt
[j
].matches
]))
5722 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5737 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5739 static unsigned HOST_WIDE_INT
5740 ix86_asan_shadow_offset (void)
5742 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5743 : HOST_WIDE_INT_C (0x7fff8000))
5744 : (HOST_WIDE_INT_1
<< 29);
5747 /* Argument support functions. */
5749 /* Return true when register may be used to pass function parameters. */
5751 ix86_function_arg_regno_p (int regno
)
5754 const int *parm_regs
;
5759 return (regno
< REGPARM_MAX
5760 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5762 return (regno
< REGPARM_MAX
5763 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5764 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5765 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5766 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5769 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5770 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5773 /* TODO: The function should depend on current function ABI but
5774 builtins.c would need updating then. Therefore we use the
5777 /* RAX is used as hidden argument to va_arg functions. */
5778 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5781 if (ix86_abi
== MS_ABI
)
5782 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5784 parm_regs
= x86_64_int_parameter_registers
;
5785 for (i
= 0; i
< (ix86_abi
== MS_ABI
5786 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5787 if (regno
== parm_regs
[i
])
5792 /* Return if we do not know how to pass TYPE solely in registers. */
5795 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5797 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5800 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5801 The layout_type routine is crafty and tries to trick us into passing
5802 currently unsupported vector types on the stack by using TImode. */
5803 return (!TARGET_64BIT
&& mode
== TImode
5804 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5807 /* It returns the size, in bytes, of the area reserved for arguments passed
5808 in registers for the function represented by fndecl dependent to the used
5811 ix86_reg_parm_stack_space (const_tree fndecl
)
5813 enum calling_abi call_abi
= SYSV_ABI
;
5814 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5815 call_abi
= ix86_function_abi (fndecl
);
5817 call_abi
= ix86_function_type_abi (fndecl
);
5818 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5823 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5826 ix86_function_type_abi (const_tree fntype
)
5828 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5830 enum calling_abi abi
= ix86_abi
;
5831 if (abi
== SYSV_ABI
)
5833 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5836 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5843 /* We add this as a workaround in order to use libc_has_function
5846 ix86_libc_has_function (enum function_class fn_class
)
5848 return targetm
.libc_has_function (fn_class
);
5852 ix86_function_ms_hook_prologue (const_tree fn
)
5854 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5856 if (decl_function_context (fn
) != NULL_TREE
)
5857 error_at (DECL_SOURCE_LOCATION (fn
),
5858 "ms_hook_prologue is not compatible with nested function");
5865 static enum calling_abi
5866 ix86_function_abi (const_tree fndecl
)
5870 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5873 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5876 ix86_cfun_abi (void)
5880 return cfun
->machine
->call_abi
;
5883 /* Write the extra assembler code needed to declare a function properly. */
5886 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5889 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5893 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5894 unsigned int filler_cc
= 0xcccccccc;
5896 for (i
= 0; i
< filler_count
; i
+= 4)
5897 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5900 #ifdef SUBTARGET_ASM_UNWIND_INIT
5901 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5904 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5906 /* Output magic byte marker, if hot-patch attribute is set. */
5911 /* leaq [%rsp + 0], %rsp */
5912 asm_fprintf (asm_out_file
, ASM_BYTE
5913 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5917 /* movl.s %edi, %edi
5919 movl.s %esp, %ebp */
5920 asm_fprintf (asm_out_file
, ASM_BYTE
5921 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5927 extern void init_regs (void);
5929 /* Implementation of call abi switching target hook. Specific to FNDECL
5930 the specific call register sets are set. See also
5931 ix86_conditional_register_usage for more details. */
5933 ix86_call_abi_override (const_tree fndecl
)
5935 if (fndecl
== NULL_TREE
)
5936 cfun
->machine
->call_abi
= ix86_abi
;
5938 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5941 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5942 expensive re-initialization of init_regs each time we switch function context
5943 since this is needed only during RTL expansion. */
5945 ix86_maybe_switch_abi (void)
5948 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5952 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5953 for a call to a function whose data type is FNTYPE.
5954 For a library call, FNTYPE is 0. */
5957 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5958 tree fntype
, /* tree ptr for function decl */
5959 rtx libname
, /* SYMBOL_REF of library name or 0 */
5963 struct cgraph_local_info
*i
;
5965 memset (cum
, 0, sizeof (*cum
));
5969 i
= cgraph_local_info (fndecl
);
5970 cum
->call_abi
= ix86_function_abi (fndecl
);
5975 cum
->call_abi
= ix86_function_type_abi (fntype
);
5978 cum
->caller
= caller
;
5980 /* Set up the number of registers to use for passing arguments. */
5982 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5983 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5984 "or subtarget optimization implying it");
5985 cum
->nregs
= ix86_regparm
;
5988 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5989 ? X86_64_REGPARM_MAX
5990 : X86_64_MS_REGPARM_MAX
);
5994 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5997 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5998 ? X86_64_SSE_REGPARM_MAX
5999 : X86_64_MS_SSE_REGPARM_MAX
);
6003 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
6004 cum
->warn_avx
= true;
6005 cum
->warn_sse
= true;
6006 cum
->warn_mmx
= true;
6008 /* Because type might mismatch in between caller and callee, we need to
6009 use actual type of function for local calls.
6010 FIXME: cgraph_analyze can be told to actually record if function uses
6011 va_start so for local functions maybe_vaarg can be made aggressive
6013 FIXME: once typesytem is fixed, we won't need this code anymore. */
6014 if (i
&& i
->local
&& i
->can_change_signature
)
6015 fntype
= TREE_TYPE (fndecl
);
6016 cum
->maybe_vaarg
= (fntype
6017 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
6022 /* If there are variable arguments, then we won't pass anything
6023 in registers in 32-bit mode. */
6024 if (stdarg_p (fntype
))
6035 /* Use ecx and edx registers if function has fastcall attribute,
6036 else look for regparm information. */
6039 unsigned int ccvt
= ix86_get_callcvt (fntype
);
6040 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
6043 cum
->fastcall
= 1; /* Same first register as in fastcall. */
6045 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
6051 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
6054 /* Set up the number of SSE registers used for passing SFmode
6055 and DFmode arguments. Warn for mismatching ABI. */
6056 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
6060 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6061 But in the case of vector types, it is some vector mode.
6063 When we have only some of our vector isa extensions enabled, then there
6064 are some modes for which vector_mode_supported_p is false. For these
6065 modes, the generic vector support in gcc will choose some non-vector mode
6066 in order to implement the type. By computing the natural mode, we'll
6067 select the proper ABI location for the operand and not depend on whatever
6068 the middle-end decides to do with these vector types.
6070 The midde-end can't deal with the vector types > 16 bytes. In this
6071 case, we return the original mode and warn ABI change if CUM isn't
6074 static enum machine_mode
6075 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
6077 enum machine_mode mode
= TYPE_MODE (type
);
6079 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
6081 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6082 if ((size
== 8 || size
== 16 || size
== 32)
6083 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6084 && TYPE_VECTOR_SUBPARTS (type
) > 1)
6086 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
6088 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
6089 mode
= MIN_MODE_VECTOR_FLOAT
;
6091 mode
= MIN_MODE_VECTOR_INT
;
6093 /* Get the mode which has this inner mode and number of units. */
6094 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
6095 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
6096 && GET_MODE_INNER (mode
) == innermode
)
6098 if (size
== 32 && !TARGET_AVX
)
6100 static bool warnedavx
;
6107 warning (0, "AVX vector argument without AVX "
6108 "enabled changes the ABI");
6110 return TYPE_MODE (type
);
6112 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
6114 static bool warnedsse
;
6121 warning (0, "SSE vector argument without SSE "
6122 "enabled changes the ABI");
6137 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6138 this may not agree with the mode that the type system has chosen for the
6139 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6140 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6143 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6148 if (orig_mode
!= BLKmode
)
6149 tmp
= gen_rtx_REG (orig_mode
, regno
);
6152 tmp
= gen_rtx_REG (mode
, regno
);
6153 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6154 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6160 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6161 of this code is to classify each 8bytes of incoming argument by the register
6162 class and assign registers accordingly. */
6164 /* Return the union class of CLASS1 and CLASS2.
6165 See the x86-64 PS ABI for details. */
6167 static enum x86_64_reg_class
6168 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6170 /* Rule #1: If both classes are equal, this is the resulting class. */
6171 if (class1
== class2
)
6174 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6176 if (class1
== X86_64_NO_CLASS
)
6178 if (class2
== X86_64_NO_CLASS
)
6181 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6182 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6183 return X86_64_MEMORY_CLASS
;
6185 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6186 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6187 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6188 return X86_64_INTEGERSI_CLASS
;
6189 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6190 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6191 return X86_64_INTEGER_CLASS
;
6193 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6195 if (class1
== X86_64_X87_CLASS
6196 || class1
== X86_64_X87UP_CLASS
6197 || class1
== X86_64_COMPLEX_X87_CLASS
6198 || class2
== X86_64_X87_CLASS
6199 || class2
== X86_64_X87UP_CLASS
6200 || class2
== X86_64_COMPLEX_X87_CLASS
)
6201 return X86_64_MEMORY_CLASS
;
6203 /* Rule #6: Otherwise class SSE is used. */
6204 return X86_64_SSE_CLASS
;
6207 /* Classify the argument of type TYPE and mode MODE.
6208 CLASSES will be filled by the register class used to pass each word
6209 of the operand. The number of words is returned. In case the parameter
6210 should be passed in memory, 0 is returned. As a special case for zero
6211 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6213 BIT_OFFSET is used internally for handling records and specifies offset
6214 of the offset in bits modulo 256 to avoid overflow cases.
6216 See the x86-64 PS ABI for details.
6220 classify_argument (enum machine_mode mode
, const_tree type
,
6221 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6223 HOST_WIDE_INT bytes
=
6224 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6226 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6228 /* Variable sized entities are always passed/returned in memory. */
6232 if (mode
!= VOIDmode
6233 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6236 if (type
&& AGGREGATE_TYPE_P (type
))
6240 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6242 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6246 for (i
= 0; i
< words
; i
++)
6247 classes
[i
] = X86_64_NO_CLASS
;
6249 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6250 signalize memory class, so handle it as special case. */
6253 classes
[0] = X86_64_NO_CLASS
;
6257 /* Classify each field of record and merge classes. */
6258 switch (TREE_CODE (type
))
6261 /* And now merge the fields of structure. */
6262 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6264 if (TREE_CODE (field
) == FIELD_DECL
)
6268 if (TREE_TYPE (field
) == error_mark_node
)
6271 /* Bitfields are always classified as integer. Handle them
6272 early, since later code would consider them to be
6273 misaligned integers. */
6274 if (DECL_BIT_FIELD (field
))
6276 for (i
= (int_bit_position (field
)
6277 + (bit_offset
% 64)) / 8 / 8;
6278 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6279 + tree_to_shwi (DECL_SIZE (field
))
6282 merge_classes (X86_64_INTEGER_CLASS
,
6289 type
= TREE_TYPE (field
);
6291 /* Flexible array member is ignored. */
6292 if (TYPE_MODE (type
) == BLKmode
6293 && TREE_CODE (type
) == ARRAY_TYPE
6294 && TYPE_SIZE (type
) == NULL_TREE
6295 && TYPE_DOMAIN (type
) != NULL_TREE
6296 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6301 if (!warned
&& warn_psabi
)
6304 inform (input_location
,
6305 "the ABI of passing struct with"
6306 " a flexible array member has"
6307 " changed in GCC 4.4");
6311 num
= classify_argument (TYPE_MODE (type
), type
,
6313 (int_bit_position (field
)
6314 + bit_offset
) % 256);
6317 pos
= (int_bit_position (field
)
6318 + (bit_offset
% 64)) / 8 / 8;
6319 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6321 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6328 /* Arrays are handled as small records. */
6331 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6332 TREE_TYPE (type
), subclasses
, bit_offset
);
6336 /* The partial classes are now full classes. */
6337 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6338 subclasses
[0] = X86_64_SSE_CLASS
;
6339 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6340 && !((bit_offset
% 64) == 0 && bytes
== 4))
6341 subclasses
[0] = X86_64_INTEGER_CLASS
;
6343 for (i
= 0; i
< words
; i
++)
6344 classes
[i
] = subclasses
[i
% num
];
6349 case QUAL_UNION_TYPE
:
6350 /* Unions are similar to RECORD_TYPE but offset is always 0.
6352 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6354 if (TREE_CODE (field
) == FIELD_DECL
)
6358 if (TREE_TYPE (field
) == error_mark_node
)
6361 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6362 TREE_TYPE (field
), subclasses
,
6366 for (i
= 0; i
< num
; i
++)
6367 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6378 /* When size > 16 bytes, if the first one isn't
6379 X86_64_SSE_CLASS or any other ones aren't
6380 X86_64_SSEUP_CLASS, everything should be passed in
6382 if (classes
[0] != X86_64_SSE_CLASS
)
6385 for (i
= 1; i
< words
; i
++)
6386 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6390 /* Final merger cleanup. */
6391 for (i
= 0; i
< words
; i
++)
6393 /* If one class is MEMORY, everything should be passed in
6395 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6398 /* The X86_64_SSEUP_CLASS should be always preceded by
6399 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6400 if (classes
[i
] == X86_64_SSEUP_CLASS
6401 && classes
[i
- 1] != X86_64_SSE_CLASS
6402 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6404 /* The first one should never be X86_64_SSEUP_CLASS. */
6405 gcc_assert (i
!= 0);
6406 classes
[i
] = X86_64_SSE_CLASS
;
6409 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6410 everything should be passed in memory. */
6411 if (classes
[i
] == X86_64_X87UP_CLASS
6412 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6416 /* The first one should never be X86_64_X87UP_CLASS. */
6417 gcc_assert (i
!= 0);
6418 if (!warned
&& warn_psabi
)
6421 inform (input_location
,
6422 "the ABI of passing union with long double"
6423 " has changed in GCC 4.4");
6431 /* Compute alignment needed. We align all types to natural boundaries with
6432 exception of XFmode that is aligned to 64bits. */
6433 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6435 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6438 mode_alignment
= 128;
6439 else if (mode
== XCmode
)
6440 mode_alignment
= 256;
6441 if (COMPLEX_MODE_P (mode
))
6442 mode_alignment
/= 2;
6443 /* Misaligned fields are always returned in memory. */
6444 if (bit_offset
% mode_alignment
)
6448 /* for V1xx modes, just use the base mode */
6449 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6450 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6451 mode
= GET_MODE_INNER (mode
);
6453 /* Classification of atomic types. */
6458 classes
[0] = X86_64_SSE_CLASS
;
6461 classes
[0] = X86_64_SSE_CLASS
;
6462 classes
[1] = X86_64_SSEUP_CLASS
;
6472 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6476 classes
[0] = X86_64_INTEGERSI_CLASS
;
6479 else if (size
<= 64)
6481 classes
[0] = X86_64_INTEGER_CLASS
;
6484 else if (size
<= 64+32)
6486 classes
[0] = X86_64_INTEGER_CLASS
;
6487 classes
[1] = X86_64_INTEGERSI_CLASS
;
6490 else if (size
<= 64+64)
6492 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6500 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6504 /* OImode shouldn't be used directly. */
6509 if (!(bit_offset
% 64))
6510 classes
[0] = X86_64_SSESF_CLASS
;
6512 classes
[0] = X86_64_SSE_CLASS
;
6515 classes
[0] = X86_64_SSEDF_CLASS
;
6518 classes
[0] = X86_64_X87_CLASS
;
6519 classes
[1] = X86_64_X87UP_CLASS
;
6522 classes
[0] = X86_64_SSE_CLASS
;
6523 classes
[1] = X86_64_SSEUP_CLASS
;
6526 classes
[0] = X86_64_SSE_CLASS
;
6527 if (!(bit_offset
% 64))
6533 if (!warned
&& warn_psabi
)
6536 inform (input_location
,
6537 "the ABI of passing structure with complex float"
6538 " member has changed in GCC 4.4");
6540 classes
[1] = X86_64_SSESF_CLASS
;
6544 classes
[0] = X86_64_SSEDF_CLASS
;
6545 classes
[1] = X86_64_SSEDF_CLASS
;
6548 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6551 /* This modes is larger than 16 bytes. */
6559 classes
[0] = X86_64_SSE_CLASS
;
6560 classes
[1] = X86_64_SSEUP_CLASS
;
6561 classes
[2] = X86_64_SSEUP_CLASS
;
6562 classes
[3] = X86_64_SSEUP_CLASS
;
6570 classes
[0] = X86_64_SSE_CLASS
;
6571 classes
[1] = X86_64_SSEUP_CLASS
;
6579 classes
[0] = X86_64_SSE_CLASS
;
6585 gcc_assert (VECTOR_MODE_P (mode
));
6590 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6592 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6593 classes
[0] = X86_64_INTEGERSI_CLASS
;
6595 classes
[0] = X86_64_INTEGER_CLASS
;
6596 classes
[1] = X86_64_INTEGER_CLASS
;
6597 return 1 + (bytes
> 8);
6601 /* Examine the argument and return set number of register required in each
6602 class. Return 0 iff parameter should be passed in memory. */
6604 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6605 int *int_nregs
, int *sse_nregs
)
6607 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6608 int n
= classify_argument (mode
, type
, regclass
, 0);
6614 for (n
--; n
>= 0; n
--)
6615 switch (regclass
[n
])
6617 case X86_64_INTEGER_CLASS
:
6618 case X86_64_INTEGERSI_CLASS
:
6621 case X86_64_SSE_CLASS
:
6622 case X86_64_SSESF_CLASS
:
6623 case X86_64_SSEDF_CLASS
:
6626 case X86_64_NO_CLASS
:
6627 case X86_64_SSEUP_CLASS
:
6629 case X86_64_X87_CLASS
:
6630 case X86_64_X87UP_CLASS
:
6634 case X86_64_COMPLEX_X87_CLASS
:
6635 return in_return
? 2 : 0;
6636 case X86_64_MEMORY_CLASS
:
6642 /* Construct container for the argument used by GCC interface. See
6643 FUNCTION_ARG for the detailed description. */
6646 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6647 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6648 const int *intreg
, int sse_regno
)
6650 /* The following variables hold the static issued_error state. */
6651 static bool issued_sse_arg_error
;
6652 static bool issued_sse_ret_error
;
6653 static bool issued_x87_ret_error
;
6655 enum machine_mode tmpmode
;
6657 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6658 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6662 int needed_sseregs
, needed_intregs
;
6663 rtx exp
[MAX_CLASSES
];
6666 n
= classify_argument (mode
, type
, regclass
, 0);
6669 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6672 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6675 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6676 some less clueful developer tries to use floating-point anyway. */
6677 if (needed_sseregs
&& !TARGET_SSE
)
6681 if (!issued_sse_ret_error
)
6683 error ("SSE register return with SSE disabled");
6684 issued_sse_ret_error
= true;
6687 else if (!issued_sse_arg_error
)
6689 error ("SSE register argument with SSE disabled");
6690 issued_sse_arg_error
= true;
6695 /* Likewise, error if the ABI requires us to return values in the
6696 x87 registers and the user specified -mno-80387. */
6697 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6698 for (i
= 0; i
< n
; i
++)
6699 if (regclass
[i
] == X86_64_X87_CLASS
6700 || regclass
[i
] == X86_64_X87UP_CLASS
6701 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6703 if (!issued_x87_ret_error
)
6705 error ("x87 register return with x87 disabled");
6706 issued_x87_ret_error
= true;
6711 /* First construct simple cases. Avoid SCmode, since we want to use
6712 single register to pass this type. */
6713 if (n
== 1 && mode
!= SCmode
)
6714 switch (regclass
[0])
6716 case X86_64_INTEGER_CLASS
:
6717 case X86_64_INTEGERSI_CLASS
:
6718 return gen_rtx_REG (mode
, intreg
[0]);
6719 case X86_64_SSE_CLASS
:
6720 case X86_64_SSESF_CLASS
:
6721 case X86_64_SSEDF_CLASS
:
6722 if (mode
!= BLKmode
)
6723 return gen_reg_or_parallel (mode
, orig_mode
,
6724 SSE_REGNO (sse_regno
));
6726 case X86_64_X87_CLASS
:
6727 case X86_64_COMPLEX_X87_CLASS
:
6728 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6729 case X86_64_NO_CLASS
:
6730 /* Zero sized array, struct or class. */
6736 && regclass
[0] == X86_64_SSE_CLASS
6737 && regclass
[1] == X86_64_SSEUP_CLASS
6739 return gen_reg_or_parallel (mode
, orig_mode
,
6740 SSE_REGNO (sse_regno
));
6742 && regclass
[0] == X86_64_SSE_CLASS
6743 && regclass
[1] == X86_64_SSEUP_CLASS
6744 && regclass
[2] == X86_64_SSEUP_CLASS
6745 && regclass
[3] == X86_64_SSEUP_CLASS
6747 return gen_reg_or_parallel (mode
, orig_mode
,
6748 SSE_REGNO (sse_regno
));
6750 && regclass
[0] == X86_64_X87_CLASS
6751 && regclass
[1] == X86_64_X87UP_CLASS
)
6752 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6755 && regclass
[0] == X86_64_INTEGER_CLASS
6756 && regclass
[1] == X86_64_INTEGER_CLASS
6757 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6758 && intreg
[0] + 1 == intreg
[1])
6759 return gen_rtx_REG (mode
, intreg
[0]);
6761 /* Otherwise figure out the entries of the PARALLEL. */
6762 for (i
= 0; i
< n
; i
++)
6766 switch (regclass
[i
])
6768 case X86_64_NO_CLASS
:
6770 case X86_64_INTEGER_CLASS
:
6771 case X86_64_INTEGERSI_CLASS
:
6772 /* Merge TImodes on aligned occasions here too. */
6773 if (i
* 8 + 8 > bytes
)
6775 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6776 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6780 /* We've requested 24 bytes we
6781 don't have mode for. Use DImode. */
6782 if (tmpmode
== BLKmode
)
6785 = gen_rtx_EXPR_LIST (VOIDmode
,
6786 gen_rtx_REG (tmpmode
, *intreg
),
6790 case X86_64_SSESF_CLASS
:
6792 = gen_rtx_EXPR_LIST (VOIDmode
,
6793 gen_rtx_REG (SFmode
,
6794 SSE_REGNO (sse_regno
)),
6798 case X86_64_SSEDF_CLASS
:
6800 = gen_rtx_EXPR_LIST (VOIDmode
,
6801 gen_rtx_REG (DFmode
,
6802 SSE_REGNO (sse_regno
)),
6806 case X86_64_SSE_CLASS
:
6814 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6824 && regclass
[1] == X86_64_SSEUP_CLASS
6825 && regclass
[2] == X86_64_SSEUP_CLASS
6826 && regclass
[3] == X86_64_SSEUP_CLASS
);
6834 = gen_rtx_EXPR_LIST (VOIDmode
,
6835 gen_rtx_REG (tmpmode
,
6836 SSE_REGNO (sse_regno
)),
6845 /* Empty aligned struct, union or class. */
6849 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6850 for (i
= 0; i
< nexps
; i
++)
6851 XVECEXP (ret
, 0, i
) = exp
[i
];
6855 /* Update the data in CUM to advance over an argument of mode MODE
6856 and data type TYPE. (TYPE is null for libcalls where that information
6857 may not be available.) */
6860 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6861 const_tree type
, HOST_WIDE_INT bytes
,
6862 HOST_WIDE_INT words
)
6878 cum
->words
+= words
;
6879 cum
->nregs
-= words
;
6880 cum
->regno
+= words
;
6882 if (cum
->nregs
<= 0)
6890 /* OImode shouldn't be used directly. */
6894 if (cum
->float_in_sse
< 2)
6897 if (cum
->float_in_sse
< 1)
6914 if (!type
|| !AGGREGATE_TYPE_P (type
))
6916 cum
->sse_words
+= words
;
6917 cum
->sse_nregs
-= 1;
6918 cum
->sse_regno
+= 1;
6919 if (cum
->sse_nregs
<= 0)
6933 if (!type
|| !AGGREGATE_TYPE_P (type
))
6935 cum
->mmx_words
+= words
;
6936 cum
->mmx_nregs
-= 1;
6937 cum
->mmx_regno
+= 1;
6938 if (cum
->mmx_nregs
<= 0)
6949 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6950 const_tree type
, HOST_WIDE_INT words
, bool named
)
6952 int int_nregs
, sse_nregs
;
6954 /* Unnamed 256bit vector mode parameters are passed on stack. */
6955 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6958 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6959 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6961 cum
->nregs
-= int_nregs
;
6962 cum
->sse_nregs
-= sse_nregs
;
6963 cum
->regno
+= int_nregs
;
6964 cum
->sse_regno
+= sse_nregs
;
6968 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6969 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6970 cum
->words
+= words
;
6975 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6976 HOST_WIDE_INT words
)
6978 /* Otherwise, this should be passed indirect. */
6979 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6981 cum
->words
+= words
;
6989 /* Update the data in CUM to advance over an argument of mode MODE and
6990 data type TYPE. (TYPE is null for libcalls where that information
6991 may not be available.) */
6994 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6995 const_tree type
, bool named
)
6997 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6998 HOST_WIDE_INT bytes
, words
;
7000 if (mode
== BLKmode
)
7001 bytes
= int_size_in_bytes (type
);
7003 bytes
= GET_MODE_SIZE (mode
);
7004 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7007 mode
= type_natural_mode (type
, NULL
);
7009 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7010 function_arg_advance_ms_64 (cum
, bytes
, words
);
7011 else if (TARGET_64BIT
)
7012 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
7014 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
7017 /* Define where to put the arguments to a function.
7018 Value is zero to push the argument on the stack,
7019 or a hard register in which to store the argument.
7021 MODE is the argument's machine mode.
7022 TYPE is the data type of the argument (as a tree).
7023 This is null for libcalls where that information may
7025 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7026 the preceding args and about the function being called.
7027 NAMED is nonzero if this argument is a named parameter
7028 (otherwise it is an extra parameter matching an ellipsis). */
7031 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7032 enum machine_mode orig_mode
, const_tree type
,
7033 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
7035 static bool warnedsse
, warnedmmx
;
7037 /* Avoid the AL settings for the Unix64 ABI. */
7038 if (mode
== VOIDmode
)
7054 if (words
<= cum
->nregs
)
7056 int regno
= cum
->regno
;
7058 /* Fastcall allocates the first two DWORD (SImode) or
7059 smaller arguments to ECX and EDX if it isn't an
7065 || (type
&& AGGREGATE_TYPE_P (type
)))
7068 /* ECX not EAX is the first allocated register. */
7069 if (regno
== AX_REG
)
7072 return gen_rtx_REG (mode
, regno
);
7077 if (cum
->float_in_sse
< 2)
7080 if (cum
->float_in_sse
< 1)
7084 /* In 32bit, we pass TImode in xmm registers. */
7091 if (!type
|| !AGGREGATE_TYPE_P (type
))
7093 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
7096 warning (0, "SSE vector argument without SSE enabled "
7100 return gen_reg_or_parallel (mode
, orig_mode
,
7101 cum
->sse_regno
+ FIRST_SSE_REG
);
7106 /* OImode shouldn't be used directly. */
7115 if (!type
|| !AGGREGATE_TYPE_P (type
))
7118 return gen_reg_or_parallel (mode
, orig_mode
,
7119 cum
->sse_regno
+ FIRST_SSE_REG
);
7129 if (!type
|| !AGGREGATE_TYPE_P (type
))
7131 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7134 warning (0, "MMX vector argument without MMX enabled "
7138 return gen_reg_or_parallel (mode
, orig_mode
,
7139 cum
->mmx_regno
+ FIRST_MMX_REG
);
7148 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7149 enum machine_mode orig_mode
, const_tree type
, bool named
)
7151 /* Handle a hidden AL argument containing number of registers
7152 for varargs x86-64 functions. */
7153 if (mode
== VOIDmode
)
7154 return GEN_INT (cum
->maybe_vaarg
7155 ? (cum
->sse_nregs
< 0
7156 ? X86_64_SSE_REGPARM_MAX
7171 /* Unnamed 256bit vector mode parameters are passed on stack. */
7177 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7179 &x86_64_int_parameter_registers
[cum
->regno
],
7184 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7185 enum machine_mode orig_mode
, bool named
,
7186 HOST_WIDE_INT bytes
)
7190 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7191 We use value of -2 to specify that current function call is MSABI. */
7192 if (mode
== VOIDmode
)
7193 return GEN_INT (-2);
7195 /* If we've run out of registers, it goes on the stack. */
7196 if (cum
->nregs
== 0)
7199 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7201 /* Only floating point modes are passed in anything but integer regs. */
7202 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7205 regno
= cum
->regno
+ FIRST_SSE_REG
;
7210 /* Unnamed floating parameters are passed in both the
7211 SSE and integer registers. */
7212 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7213 t2
= gen_rtx_REG (mode
, regno
);
7214 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7215 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7216 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7219 /* Handle aggregated types passed in register. */
7220 if (orig_mode
== BLKmode
)
7222 if (bytes
> 0 && bytes
<= 8)
7223 mode
= (bytes
> 4 ? DImode
: SImode
);
7224 if (mode
== BLKmode
)
7228 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7231 /* Return where to put the arguments to a function.
7232 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7234 MODE is the argument's machine mode. TYPE is the data type of the
7235 argument. It is null for libcalls where that information may not be
7236 available. CUM gives information about the preceding args and about
7237 the function being called. NAMED is nonzero if this argument is a
7238 named parameter (otherwise it is an extra parameter matching an
7242 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7243 const_tree type
, bool named
)
7245 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7246 enum machine_mode mode
= omode
;
7247 HOST_WIDE_INT bytes
, words
;
7250 if (mode
== BLKmode
)
7251 bytes
= int_size_in_bytes (type
);
7253 bytes
= GET_MODE_SIZE (mode
);
7254 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7256 /* To simplify the code below, represent vector types with a vector mode
7257 even if MMX/SSE are not active. */
7258 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7259 mode
= type_natural_mode (type
, cum
);
7261 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7262 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7263 else if (TARGET_64BIT
)
7264 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7266 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7271 /* A C expression that indicates when an argument must be passed by
7272 reference. If nonzero for an argument, a copy of that argument is
7273 made in memory and a pointer to the argument is passed instead of
7274 the argument itself. The pointer is passed in whatever way is
7275 appropriate for passing a pointer to that type. */
7278 ix86_pass_by_reference (cumulative_args_t cum_v
, enum machine_mode mode
,
7279 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7281 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7283 /* See Windows x64 Software Convention. */
7284 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7286 int msize
= (int) GET_MODE_SIZE (mode
);
7289 /* Arrays are passed by reference. */
7290 if (TREE_CODE (type
) == ARRAY_TYPE
)
7293 if (AGGREGATE_TYPE_P (type
))
7295 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7296 are passed by reference. */
7297 msize
= int_size_in_bytes (type
);
7301 /* __m128 is passed by reference. */
7303 case 1: case 2: case 4: case 8:
7309 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7315 /* Return true when TYPE should be 128bit aligned for 32bit argument
7316 passing ABI. XXX: This function is obsolete and is only used for
7317 checking psABI compatibility with previous versions of GCC. */
7320 ix86_compat_aligned_value_p (const_tree type
)
7322 enum machine_mode mode
= TYPE_MODE (type
);
7323 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7327 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7329 if (TYPE_ALIGN (type
) < 128)
7332 if (AGGREGATE_TYPE_P (type
))
7334 /* Walk the aggregates recursively. */
7335 switch (TREE_CODE (type
))
7339 case QUAL_UNION_TYPE
:
7343 /* Walk all the structure fields. */
7344 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7346 if (TREE_CODE (field
) == FIELD_DECL
7347 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7354 /* Just for use if some languages passes arrays by value. */
7355 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7366 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7367 XXX: This function is obsolete and is only used for checking psABI
7368 compatibility with previous versions of GCC. */
7371 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7372 const_tree type
, unsigned int align
)
7374 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7375 natural boundaries. */
7376 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7378 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7379 make an exception for SSE modes since these require 128bit
7382 The handling here differs from field_alignment. ICC aligns MMX
7383 arguments to 4 byte boundaries, while structure fields are aligned
7384 to 8 byte boundaries. */
7387 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7388 align
= PARM_BOUNDARY
;
7392 if (!ix86_compat_aligned_value_p (type
))
7393 align
= PARM_BOUNDARY
;
7396 if (align
> BIGGEST_ALIGNMENT
)
7397 align
= BIGGEST_ALIGNMENT
;
7401 /* Return true when TYPE should be 128bit aligned for 32bit argument
7405 ix86_contains_aligned_value_p (const_tree type
)
7407 enum machine_mode mode
= TYPE_MODE (type
);
7409 if (mode
== XFmode
|| mode
== XCmode
)
7412 if (TYPE_ALIGN (type
) < 128)
7415 if (AGGREGATE_TYPE_P (type
))
7417 /* Walk the aggregates recursively. */
7418 switch (TREE_CODE (type
))
7422 case QUAL_UNION_TYPE
:
7426 /* Walk all the structure fields. */
7427 for (field
= TYPE_FIELDS (type
);
7429 field
= DECL_CHAIN (field
))
7431 if (TREE_CODE (field
) == FIELD_DECL
7432 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7439 /* Just for use if some languages passes arrays by value. */
7440 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7449 return TYPE_ALIGN (type
) >= 128;
7454 /* Gives the alignment boundary, in bits, of an argument with the
7455 specified mode and type. */
7458 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7463 /* Since the main variant type is used for call, we convert it to
7464 the main variant type. */
7465 type
= TYPE_MAIN_VARIANT (type
);
7466 align
= TYPE_ALIGN (type
);
7469 align
= GET_MODE_ALIGNMENT (mode
);
7470 if (align
< PARM_BOUNDARY
)
7471 align
= PARM_BOUNDARY
;
7475 unsigned int saved_align
= align
;
7479 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7482 if (mode
== XFmode
|| mode
== XCmode
)
7483 align
= PARM_BOUNDARY
;
7485 else if (!ix86_contains_aligned_value_p (type
))
7486 align
= PARM_BOUNDARY
;
7489 align
= PARM_BOUNDARY
;
7494 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7498 inform (input_location
,
7499 "The ABI for passing parameters with %d-byte"
7500 " alignment has changed in GCC 4.6",
7501 align
/ BITS_PER_UNIT
);
7508 /* Return true if N is a possible register number of function value. */
7511 ix86_function_value_regno_p (const unsigned int regno
)
7520 return TARGET_64BIT
&& ix86_abi
!= MS_ABI
;
7522 /* Complex values are returned in %st(0)/%st(1) pair. */
7525 /* TODO: The function should depend on current function ABI but
7526 builtins.c would need updating then. Therefore we use the
7528 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7530 return TARGET_FLOAT_RETURNS_IN_80387
;
7532 /* Complex values are returned in %xmm0/%xmm1 pair. */
7538 if (TARGET_MACHO
|| TARGET_64BIT
)
7546 /* Define how to find the value returned by a function.
7547 VALTYPE is the data type of the value (as a tree).
7548 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7549 otherwise, FUNC is 0. */
7552 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7553 const_tree fntype
, const_tree fn
)
7557 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7558 we normally prevent this case when mmx is not available. However
7559 some ABIs may require the result to be returned like DImode. */
7560 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7561 regno
= FIRST_MMX_REG
;
7563 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7564 we prevent this case when sse is not available. However some ABIs
7565 may require the result to be returned like integer TImode. */
7566 else if (mode
== TImode
7567 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7568 regno
= FIRST_SSE_REG
;
7570 /* 32-byte vector modes in %ymm0. */
7571 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7572 regno
= FIRST_SSE_REG
;
7574 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7575 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7576 regno
= FIRST_FLOAT_REG
;
7578 /* Most things go in %eax. */
7581 /* Override FP return register with %xmm0 for local functions when
7582 SSE math is enabled or for functions with sseregparm attribute. */
7583 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7585 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7586 if ((sse_level
>= 1 && mode
== SFmode
)
7587 || (sse_level
== 2 && mode
== DFmode
))
7588 regno
= FIRST_SSE_REG
;
7591 /* OImode shouldn't be used directly. */
7592 gcc_assert (mode
!= OImode
);
7594 return gen_rtx_REG (orig_mode
, regno
);
7598 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7603 /* Handle libcalls, which don't provide a type node. */
7604 if (valtype
== NULL
)
7618 regno
= FIRST_SSE_REG
;
7622 regno
= FIRST_FLOAT_REG
;
7630 return gen_rtx_REG (mode
, regno
);
7632 else if (POINTER_TYPE_P (valtype
))
7634 /* Pointers are always returned in word_mode. */
7638 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7639 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7640 x86_64_int_return_registers
, 0);
7642 /* For zero sized structures, construct_container returns NULL, but we
7643 need to keep rest of compiler happy by returning meaningful value. */
7645 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7651 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7654 unsigned int regno
= AX_REG
;
7658 switch (GET_MODE_SIZE (mode
))
7661 if (valtype
!= NULL_TREE
7662 && !VECTOR_INTEGER_TYPE_P (valtype
)
7663 && !VECTOR_INTEGER_TYPE_P (valtype
)
7664 && !INTEGRAL_TYPE_P (valtype
)
7665 && !VECTOR_FLOAT_TYPE_P (valtype
))
7667 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7668 && !COMPLEX_MODE_P (mode
))
7669 regno
= FIRST_SSE_REG
;
7673 if (mode
== SFmode
|| mode
== DFmode
)
7674 regno
= FIRST_SSE_REG
;
7680 return gen_rtx_REG (orig_mode
, regno
);
7684 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7685 enum machine_mode orig_mode
, enum machine_mode mode
)
7687 const_tree fn
, fntype
;
7690 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7691 fn
= fntype_or_decl
;
7692 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7694 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7695 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7696 else if (TARGET_64BIT
)
7697 return function_value_64 (orig_mode
, mode
, valtype
);
7699 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7703 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7704 bool outgoing ATTRIBUTE_UNUSED
)
7706 enum machine_mode mode
, orig_mode
;
7708 orig_mode
= TYPE_MODE (valtype
);
7709 mode
= type_natural_mode (valtype
, NULL
);
7710 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7713 /* Pointer function arguments and return values are promoted to
7716 static enum machine_mode
7717 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7718 int *punsignedp
, const_tree fntype
,
7721 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7723 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7726 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7730 /* Return true if a structure, union or array with MODE containing FIELD
7731 should be accessed using BLKmode. */
7734 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7736 /* Union with XFmode must be in BLKmode. */
7737 return (mode
== XFmode
7738 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7739 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7743 ix86_libcall_value (enum machine_mode mode
)
7745 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7748 /* Return true iff type is returned in memory. */
7750 static bool ATTRIBUTE_UNUSED
7751 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7755 if (mode
== BLKmode
)
7758 size
= int_size_in_bytes (type
);
7760 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7763 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7765 /* User-created vectors small enough to fit in EAX. */
7769 /* MMX/3dNow values are returned in MM0,
7770 except when it doesn't exits or the ABI prescribes otherwise. */
7772 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7774 /* SSE values are returned in XMM0, except when it doesn't exist. */
7778 /* AVX values are returned in YMM0, except when it doesn't exist. */
7789 /* OImode shouldn't be used directly. */
7790 gcc_assert (mode
!= OImode
);
7795 static bool ATTRIBUTE_UNUSED
7796 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7798 int needed_intregs
, needed_sseregs
;
7799 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7802 static bool ATTRIBUTE_UNUSED
7803 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7805 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7807 /* __m128 is returned in xmm0. */
7808 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7809 || VECTOR_FLOAT_TYPE_P (type
))
7810 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7811 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7814 /* Otherwise, the size must be exactly in [1248]. */
7815 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7819 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7821 #ifdef SUBTARGET_RETURN_IN_MEMORY
7822 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7824 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7828 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7829 return return_in_memory_ms_64 (type
, mode
);
7831 return return_in_memory_64 (type
, mode
);
7834 return return_in_memory_32 (type
, mode
);
7838 /* When returning SSE vector types, we have a choice of either
7839 (1) being abi incompatible with a -march switch, or
7840 (2) generating an error.
7841 Given no good solution, I think the safest thing is one warning.
7842 The user won't be able to use -Werror, but....
7844 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7845 called in response to actually generating a caller or callee that
7846 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7847 via aggregate_value_p for general type probing from tree-ssa. */
7850 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7852 static bool warnedsse
, warnedmmx
;
7854 if (!TARGET_64BIT
&& type
)
7856 /* Look at the return type of the function, not the function type. */
7857 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7859 if (!TARGET_SSE
&& !warnedsse
)
7862 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7865 warning (0, "SSE vector return without SSE enabled "
7870 if (!TARGET_MMX
&& !warnedmmx
)
7872 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7875 warning (0, "MMX vector return without MMX enabled "
7885 /* Create the va_list data type. */
7887 /* Returns the calling convention specific va_list date type.
7888 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7891 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7893 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7895 /* For i386 we use plain pointer to argument area. */
7896 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7897 return build_pointer_type (char_type_node
);
7899 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7900 type_decl
= build_decl (BUILTINS_LOCATION
,
7901 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7903 f_gpr
= build_decl (BUILTINS_LOCATION
,
7904 FIELD_DECL
, get_identifier ("gp_offset"),
7905 unsigned_type_node
);
7906 f_fpr
= build_decl (BUILTINS_LOCATION
,
7907 FIELD_DECL
, get_identifier ("fp_offset"),
7908 unsigned_type_node
);
7909 f_ovf
= build_decl (BUILTINS_LOCATION
,
7910 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7912 f_sav
= build_decl (BUILTINS_LOCATION
,
7913 FIELD_DECL
, get_identifier ("reg_save_area"),
7916 va_list_gpr_counter_field
= f_gpr
;
7917 va_list_fpr_counter_field
= f_fpr
;
7919 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7920 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7921 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7922 DECL_FIELD_CONTEXT (f_sav
) = record
;
7924 TYPE_STUB_DECL (record
) = type_decl
;
7925 TYPE_NAME (record
) = type_decl
;
7926 TYPE_FIELDS (record
) = f_gpr
;
7927 DECL_CHAIN (f_gpr
) = f_fpr
;
7928 DECL_CHAIN (f_fpr
) = f_ovf
;
7929 DECL_CHAIN (f_ovf
) = f_sav
;
7931 layout_type (record
);
7933 /* The correct type is an array type of one element. */
7934 return build_array_type (record
, build_index_type (size_zero_node
));
7937 /* Setup the builtin va_list data type and for 64-bit the additional
7938 calling convention specific va_list data types. */
7941 ix86_build_builtin_va_list (void)
7943 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7945 /* Initialize abi specific va_list builtin types. */
7949 if (ix86_abi
== MS_ABI
)
7951 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7952 if (TREE_CODE (t
) != RECORD_TYPE
)
7953 t
= build_variant_type_copy (t
);
7954 sysv_va_list_type_node
= t
;
7959 if (TREE_CODE (t
) != RECORD_TYPE
)
7960 t
= build_variant_type_copy (t
);
7961 sysv_va_list_type_node
= t
;
7963 if (ix86_abi
!= MS_ABI
)
7965 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7966 if (TREE_CODE (t
) != RECORD_TYPE
)
7967 t
= build_variant_type_copy (t
);
7968 ms_va_list_type_node
= t
;
7973 if (TREE_CODE (t
) != RECORD_TYPE
)
7974 t
= build_variant_type_copy (t
);
7975 ms_va_list_type_node
= t
;
7982 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7985 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7991 /* GPR size of varargs save area. */
7992 if (cfun
->va_list_gpr_size
)
7993 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7995 ix86_varargs_gpr_size
= 0;
7997 /* FPR size of varargs save area. We don't need it if we don't pass
7998 anything in SSE registers. */
7999 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8000 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
8002 ix86_varargs_fpr_size
= 0;
8004 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
8007 save_area
= frame_pointer_rtx
;
8008 set
= get_varargs_alias_set ();
8010 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
8011 if (max
> X86_64_REGPARM_MAX
)
8012 max
= X86_64_REGPARM_MAX
;
8014 for (i
= cum
->regno
; i
< max
; i
++)
8016 mem
= gen_rtx_MEM (word_mode
,
8017 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
8018 MEM_NOTRAP_P (mem
) = 1;
8019 set_mem_alias_set (mem
, set
);
8020 emit_move_insn (mem
,
8021 gen_rtx_REG (word_mode
,
8022 x86_64_int_parameter_registers
[i
]));
8025 if (ix86_varargs_fpr_size
)
8027 enum machine_mode smode
;
8030 /* Now emit code to save SSE registers. The AX parameter contains number
8031 of SSE parameter registers used to call this function, though all we
8032 actually check here is the zero/non-zero status. */
8034 label
= gen_label_rtx ();
8035 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
8036 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
8039 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8040 we used movdqa (i.e. TImode) instead? Perhaps even better would
8041 be if we could determine the real mode of the data, via a hook
8042 into pass_stdarg. Ignore all that for now. */
8044 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
8045 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
8047 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
8048 if (max
> X86_64_SSE_REGPARM_MAX
)
8049 max
= X86_64_SSE_REGPARM_MAX
;
8051 for (i
= cum
->sse_regno
; i
< max
; ++i
)
8053 mem
= plus_constant (Pmode
, save_area
,
8054 i
* 16 + ix86_varargs_gpr_size
);
8055 mem
= gen_rtx_MEM (smode
, mem
);
8056 MEM_NOTRAP_P (mem
) = 1;
8057 set_mem_alias_set (mem
, set
);
8058 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
8060 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
8068 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
8070 alias_set_type set
= get_varargs_alias_set ();
8073 /* Reset to zero, as there might be a sysv vaarg used
8075 ix86_varargs_gpr_size
= 0;
8076 ix86_varargs_fpr_size
= 0;
8078 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
8082 mem
= gen_rtx_MEM (Pmode
,
8083 plus_constant (Pmode
, virtual_incoming_args_rtx
,
8084 i
* UNITS_PER_WORD
));
8085 MEM_NOTRAP_P (mem
) = 1;
8086 set_mem_alias_set (mem
, set
);
8088 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
8089 emit_move_insn (mem
, reg
);
8094 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
8095 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
8098 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8099 CUMULATIVE_ARGS next_cum
;
8102 /* This argument doesn't appear to be used anymore. Which is good,
8103 because the old code here didn't suppress rtl generation. */
8104 gcc_assert (!no_rtl
);
8109 fntype
= TREE_TYPE (current_function_decl
);
8111 /* For varargs, we do not want to skip the dummy va_dcl argument.
8112 For stdargs, we do want to skip the last named argument. */
8114 if (stdarg_p (fntype
))
8115 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
8118 if (cum
->call_abi
== MS_ABI
)
8119 setup_incoming_varargs_ms_64 (&next_cum
);
8121 setup_incoming_varargs_64 (&next_cum
);
8124 /* Checks if TYPE is of kind va_list char *. */
8127 is_va_list_char_pointer (tree type
)
8131 /* For 32-bit it is always true. */
8134 canonic
= ix86_canonical_va_list_type (type
);
8135 return (canonic
== ms_va_list_type_node
8136 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8139 /* Implement va_start. */
8142 ix86_va_start (tree valist
, rtx nextarg
)
8144 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8145 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8146 tree gpr
, fpr
, ovf
, sav
, t
;
8150 if (flag_split_stack
8151 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8153 unsigned int scratch_regno
;
8155 /* When we are splitting the stack, we can't refer to the stack
8156 arguments using internal_arg_pointer, because they may be on
8157 the old stack. The split stack prologue will arrange to
8158 leave a pointer to the old stack arguments in a scratch
8159 register, which we here copy to a pseudo-register. The split
8160 stack prologue can't set the pseudo-register directly because
8161 it (the prologue) runs before any registers have been saved. */
8163 scratch_regno
= split_stack_prologue_scratch_regno ();
8164 if (scratch_regno
!= INVALID_REGNUM
)
8168 reg
= gen_reg_rtx (Pmode
);
8169 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8172 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8176 push_topmost_sequence ();
8177 emit_insn_after (seq
, entry_of_function ());
8178 pop_topmost_sequence ();
8182 /* Only 64bit target needs something special. */
8183 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8185 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8186 std_expand_builtin_va_start (valist
, nextarg
);
8191 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8192 next
= expand_binop (ptr_mode
, add_optab
,
8193 cfun
->machine
->split_stack_varargs_pointer
,
8194 crtl
->args
.arg_offset_rtx
,
8195 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8196 convert_move (va_r
, next
, 0);
8201 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8202 f_fpr
= DECL_CHAIN (f_gpr
);
8203 f_ovf
= DECL_CHAIN (f_fpr
);
8204 f_sav
= DECL_CHAIN (f_ovf
);
8206 valist
= build_simple_mem_ref (valist
);
8207 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8208 /* The following should be folded into the MEM_REF offset. */
8209 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8211 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8213 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8215 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8218 /* Count number of gp and fp argument registers used. */
8219 words
= crtl
->args
.info
.words
;
8220 n_gpr
= crtl
->args
.info
.regno
;
8221 n_fpr
= crtl
->args
.info
.sse_regno
;
8223 if (cfun
->va_list_gpr_size
)
8225 type
= TREE_TYPE (gpr
);
8226 t
= build2 (MODIFY_EXPR
, type
,
8227 gpr
, build_int_cst (type
, n_gpr
* 8));
8228 TREE_SIDE_EFFECTS (t
) = 1;
8229 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8232 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8234 type
= TREE_TYPE (fpr
);
8235 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8236 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8237 TREE_SIDE_EFFECTS (t
) = 1;
8238 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8241 /* Find the overflow area. */
8242 type
= TREE_TYPE (ovf
);
8243 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8244 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8246 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8247 t
= make_tree (type
, ovf_rtx
);
8249 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8250 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8251 TREE_SIDE_EFFECTS (t
) = 1;
8252 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8254 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8256 /* Find the register save area.
8257 Prologue of the function save it right above stack frame. */
8258 type
= TREE_TYPE (sav
);
8259 t
= make_tree (type
, frame_pointer_rtx
);
8260 if (!ix86_varargs_gpr_size
)
8261 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8262 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8263 TREE_SIDE_EFFECTS (t
) = 1;
8264 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8268 /* Implement va_arg. */
8271 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8274 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8275 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8276 tree gpr
, fpr
, ovf
, sav
, t
;
8278 tree lab_false
, lab_over
= NULL_TREE
;
8283 enum machine_mode nat_mode
;
8284 unsigned int arg_boundary
;
8286 /* Only 64bit target needs something special. */
8287 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8288 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8290 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8291 f_fpr
= DECL_CHAIN (f_gpr
);
8292 f_ovf
= DECL_CHAIN (f_fpr
);
8293 f_sav
= DECL_CHAIN (f_ovf
);
8295 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8296 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8297 valist
= build_va_arg_indirect_ref (valist
);
8298 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8299 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8300 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8302 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8304 type
= build_pointer_type (type
);
8305 size
= int_size_in_bytes (type
);
8306 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8308 nat_mode
= type_natural_mode (type
, NULL
);
8317 /* Unnamed 256bit vector mode parameters are passed on stack. */
8318 if (!TARGET_64BIT_MS_ABI
)
8325 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8326 type
, 0, X86_64_REGPARM_MAX
,
8327 X86_64_SSE_REGPARM_MAX
, intreg
,
8332 /* Pull the value out of the saved registers. */
8334 addr
= create_tmp_var (ptr_type_node
, "addr");
8338 int needed_intregs
, needed_sseregs
;
8340 tree int_addr
, sse_addr
;
8342 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8343 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8345 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8347 need_temp
= (!REG_P (container
)
8348 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8349 || TYPE_ALIGN (type
) > 128));
8351 /* In case we are passing structure, verify that it is consecutive block
8352 on the register save area. If not we need to do moves. */
8353 if (!need_temp
&& !REG_P (container
))
8355 /* Verify that all registers are strictly consecutive */
8356 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8360 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8362 rtx slot
= XVECEXP (container
, 0, i
);
8363 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8364 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8372 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8374 rtx slot
= XVECEXP (container
, 0, i
);
8375 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8376 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8388 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8389 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8392 /* First ensure that we fit completely in registers. */
8395 t
= build_int_cst (TREE_TYPE (gpr
),
8396 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8397 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8398 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8399 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8400 gimplify_and_add (t
, pre_p
);
8404 t
= build_int_cst (TREE_TYPE (fpr
),
8405 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8406 + X86_64_REGPARM_MAX
* 8);
8407 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8408 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8409 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8410 gimplify_and_add (t
, pre_p
);
8413 /* Compute index to start of area used for integer regs. */
8416 /* int_addr = gpr + sav; */
8417 t
= fold_build_pointer_plus (sav
, gpr
);
8418 gimplify_assign (int_addr
, t
, pre_p
);
8422 /* sse_addr = fpr + sav; */
8423 t
= fold_build_pointer_plus (sav
, fpr
);
8424 gimplify_assign (sse_addr
, t
, pre_p
);
8428 int i
, prev_size
= 0;
8429 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8432 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8433 gimplify_assign (addr
, t
, pre_p
);
8435 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8437 rtx slot
= XVECEXP (container
, 0, i
);
8438 rtx reg
= XEXP (slot
, 0);
8439 enum machine_mode mode
= GET_MODE (reg
);
8445 tree dest_addr
, dest
;
8446 int cur_size
= GET_MODE_SIZE (mode
);
8448 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8449 prev_size
= INTVAL (XEXP (slot
, 1));
8450 if (prev_size
+ cur_size
> size
)
8452 cur_size
= size
- prev_size
;
8453 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8454 if (mode
== BLKmode
)
8457 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8458 if (mode
== GET_MODE (reg
))
8459 addr_type
= build_pointer_type (piece_type
);
8461 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8463 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8466 if (SSE_REGNO_P (REGNO (reg
)))
8468 src_addr
= sse_addr
;
8469 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8473 src_addr
= int_addr
;
8474 src_offset
= REGNO (reg
) * 8;
8476 src_addr
= fold_convert (addr_type
, src_addr
);
8477 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8479 dest_addr
= fold_convert (daddr_type
, addr
);
8480 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8481 if (cur_size
== GET_MODE_SIZE (mode
))
8483 src
= build_va_arg_indirect_ref (src_addr
);
8484 dest
= build_va_arg_indirect_ref (dest_addr
);
8486 gimplify_assign (dest
, src
, pre_p
);
8491 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8492 3, dest_addr
, src_addr
,
8493 size_int (cur_size
));
8494 gimplify_and_add (copy
, pre_p
);
8496 prev_size
+= cur_size
;
8502 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8503 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8504 gimplify_assign (gpr
, t
, pre_p
);
8509 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8510 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8511 gimplify_assign (fpr
, t
, pre_p
);
8514 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8516 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8519 /* ... otherwise out of the overflow area. */
8521 /* When we align parameter on stack for caller, if the parameter
8522 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8523 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8524 here with caller. */
8525 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8526 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8527 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8529 /* Care for on-stack alignment if needed. */
8530 if (arg_boundary
<= 64 || size
== 0)
8534 HOST_WIDE_INT align
= arg_boundary
/ 8;
8535 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8536 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8537 build_int_cst (TREE_TYPE (t
), -align
));
8540 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8541 gimplify_assign (addr
, t
, pre_p
);
8543 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8544 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8547 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8549 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8550 addr
= fold_convert (ptrtype
, addr
);
8553 addr
= build_va_arg_indirect_ref (addr
);
8554 return build_va_arg_indirect_ref (addr
);
8557 /* Return true if OPNUM's MEM should be matched
8558 in movabs* patterns. */
8561 ix86_check_movabs (rtx insn
, int opnum
)
8565 set
= PATTERN (insn
);
8566 if (GET_CODE (set
) == PARALLEL
)
8567 set
= XVECEXP (set
, 0, 0);
8568 gcc_assert (GET_CODE (set
) == SET
);
8569 mem
= XEXP (set
, opnum
);
8570 while (GET_CODE (mem
) == SUBREG
)
8571 mem
= SUBREG_REG (mem
);
8572 gcc_assert (MEM_P (mem
));
8573 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8576 /* Initialize the table of extra 80387 mathematical constants. */
8579 init_ext_80387_constants (void)
8581 static const char * cst
[5] =
8583 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8584 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8585 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8586 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8587 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8591 for (i
= 0; i
< 5; i
++)
8593 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8594 /* Ensure each constant is rounded to XFmode precision. */
8595 real_convert (&ext_80387_constants_table
[i
],
8596 XFmode
, &ext_80387_constants_table
[i
]);
8599 ext_80387_constants_init
= 1;
8602 /* Return non-zero if the constant is something that
8603 can be loaded with a special instruction. */
8606 standard_80387_constant_p (rtx x
)
8608 enum machine_mode mode
= GET_MODE (x
);
8612 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8615 if (x
== CONST0_RTX (mode
))
8617 if (x
== CONST1_RTX (mode
))
8620 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8622 /* For XFmode constants, try to find a special 80387 instruction when
8623 optimizing for size or on those CPUs that benefit from them. */
8625 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8629 if (! ext_80387_constants_init
)
8630 init_ext_80387_constants ();
8632 for (i
= 0; i
< 5; i
++)
8633 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8637 /* Load of the constant -0.0 or -1.0 will be split as
8638 fldz;fchs or fld1;fchs sequence. */
8639 if (real_isnegzero (&r
))
8641 if (real_identical (&r
, &dconstm1
))
8647 /* Return the opcode of the special instruction to be used to load
8651 standard_80387_constant_opcode (rtx x
)
8653 switch (standard_80387_constant_p (x
))
8677 /* Return the CONST_DOUBLE representing the 80387 constant that is
8678 loaded by the specified special instruction. The argument IDX
8679 matches the return value from standard_80387_constant_p. */
8682 standard_80387_constant_rtx (int idx
)
8686 if (! ext_80387_constants_init
)
8687 init_ext_80387_constants ();
8703 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8707 /* Return 1 if X is all 0s and 2 if x is all 1s
8708 in supported SSE/AVX vector mode. */
8711 standard_sse_constant_p (rtx x
)
8713 enum machine_mode mode
= GET_MODE (x
);
8715 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8717 if (vector_all_ones_operand (x
, mode
))
8739 /* Return the opcode of the special instruction to be used to load
8743 standard_sse_constant_opcode (rtx insn
, rtx x
)
8745 switch (standard_sse_constant_p (x
))
8748 switch (get_attr_mode (insn
))
8751 return "%vpxor\t%0, %d0";
8753 return "%vxorpd\t%0, %d0";
8755 return "%vxorps\t%0, %d0";
8758 return "vpxor\t%x0, %x0, %x0";
8760 return "vxorpd\t%x0, %x0, %x0";
8762 return "vxorps\t%x0, %x0, %x0";
8769 if (get_attr_mode (insn
) == MODE_XI
8770 || get_attr_mode (insn
) == MODE_V8DF
8771 || get_attr_mode (insn
) == MODE_V16SF
)
8772 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8774 return "vpcmpeqd\t%0, %0, %0";
8776 return "pcmpeqd\t%0, %0";
8784 /* Returns true if OP contains a symbol reference */
8787 symbolic_reference_mentioned_p (rtx op
)
8792 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8795 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8796 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8802 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8803 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8807 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8814 /* Return true if it is appropriate to emit `ret' instructions in the
8815 body of a function. Do this only if the epilogue is simple, needing a
8816 couple of insns. Prior to reloading, we can't tell how many registers
8817 must be saved, so return false then. Return false if there is no frame
8818 marker to de-allocate. */
8821 ix86_can_use_return_insn_p (void)
8823 struct ix86_frame frame
;
8825 if (! reload_completed
|| frame_pointer_needed
)
8828 /* Don't allow more than 32k pop, since that's all we can do
8829 with one instruction. */
8830 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8833 ix86_compute_frame_layout (&frame
);
8834 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8835 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8838 /* Value should be nonzero if functions must have frame pointers.
8839 Zero means the frame pointer need not be set up (and parms may
8840 be accessed via the stack pointer) in functions that seem suitable. */
8843 ix86_frame_pointer_required (void)
8845 /* If we accessed previous frames, then the generated code expects
8846 to be able to access the saved ebp value in our frame. */
8847 if (cfun
->machine
->accesses_prev_frame
)
8850 /* Several x86 os'es need a frame pointer for other reasons,
8851 usually pertaining to setjmp. */
8852 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8855 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8856 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8859 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8860 allocation is 4GB. */
8861 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8864 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8865 turns off the frame pointer by default. Turn it back on now if
8866 we've not got a leaf function. */
8867 if (TARGET_OMIT_LEAF_FRAME_POINTER
8869 || ix86_current_function_calls_tls_descriptor
))
8872 if (crtl
->profile
&& !flag_fentry
)
8878 /* Record that the current function accesses previous call frames. */
8881 ix86_setup_frame_addresses (void)
8883 cfun
->machine
->accesses_prev_frame
= 1;
8886 #ifndef USE_HIDDEN_LINKONCE
8887 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8888 # define USE_HIDDEN_LINKONCE 1
8890 # define USE_HIDDEN_LINKONCE 0
8894 static int pic_labels_used
;
8896 /* Fills in the label name that should be used for a pc thunk for
8897 the given register. */
8900 get_pc_thunk_name (char name
[32], unsigned int regno
)
8902 gcc_assert (!TARGET_64BIT
);
8904 if (USE_HIDDEN_LINKONCE
)
8905 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8907 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8911 /* This function generates code for -fpic that loads %ebx with
8912 the return address of the caller and then returns. */
8915 ix86_code_end (void)
8920 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8925 if (!(pic_labels_used
& (1 << regno
)))
8928 get_pc_thunk_name (name
, regno
);
8930 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8931 get_identifier (name
),
8932 build_function_type_list (void_type_node
, NULL_TREE
));
8933 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8934 NULL_TREE
, void_type_node
);
8935 TREE_PUBLIC (decl
) = 1;
8936 TREE_STATIC (decl
) = 1;
8937 DECL_IGNORED_P (decl
) = 1;
8942 switch_to_section (darwin_sections
[text_coal_section
]);
8943 fputs ("\t.weak_definition\t", asm_out_file
);
8944 assemble_name (asm_out_file
, name
);
8945 fputs ("\n\t.private_extern\t", asm_out_file
);
8946 assemble_name (asm_out_file
, name
);
8947 putc ('\n', asm_out_file
);
8948 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8949 DECL_WEAK (decl
) = 1;
8953 if (USE_HIDDEN_LINKONCE
)
8955 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8957 targetm
.asm_out
.unique_section (decl
, 0);
8958 switch_to_section (get_named_section (decl
, NULL
, 0));
8960 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8961 fputs ("\t.hidden\t", asm_out_file
);
8962 assemble_name (asm_out_file
, name
);
8963 putc ('\n', asm_out_file
);
8964 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8968 switch_to_section (text_section
);
8969 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8972 DECL_INITIAL (decl
) = make_node (BLOCK
);
8973 current_function_decl
= decl
;
8974 init_function_start (decl
);
8975 first_function_block_is_cold
= false;
8976 /* Make sure unwind info is emitted for the thunk if needed. */
8977 final_start_function (emit_barrier (), asm_out_file
, 1);
8979 /* Pad stack IP move with 4 instructions (two NOPs count
8980 as one instruction). */
8981 if (TARGET_PAD_SHORT_FUNCTION
)
8986 fputs ("\tnop\n", asm_out_file
);
8989 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8990 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8991 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8992 output_asm_insn ("%!ret", NULL
);
8993 final_end_function ();
8994 init_insn_lengths ();
8995 free_after_compilation (cfun
);
8997 current_function_decl
= NULL
;
9000 if (flag_split_stack
)
9001 file_end_indicate_split_stack ();
9004 /* Emit code for the SET_GOT patterns. */
9007 output_set_got (rtx dest
, rtx label
)
9013 if (TARGET_VXWORKS_RTP
&& flag_pic
)
9015 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9016 xops
[2] = gen_rtx_MEM (Pmode
,
9017 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
9018 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
9020 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9021 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9022 an unadorned address. */
9023 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
9024 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
9025 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
9029 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
9034 /* We don't need a pic base, we're not producing pic. */
9037 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
9038 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
9039 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9040 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
9045 get_pc_thunk_name (name
, REGNO (dest
));
9046 pic_labels_used
|= 1 << REGNO (dest
);
9048 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
9049 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
9050 output_asm_insn ("%!call\t%X2", xops
);
9053 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9054 This is what will be referenced by the Mach-O PIC subsystem. */
9055 if (machopic_should_output_picbase_label () || !label
)
9056 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
9058 /* When we are restoring the pic base at the site of a nonlocal label,
9059 and we decided to emit the pic base above, we will still output a
9060 local label used for calculating the correction offset (even though
9061 the offset will be 0 in that case). */
9063 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9064 CODE_LABEL_NUMBER (label
));
9069 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
9074 /* Generate an "push" pattern for input ARG. */
9079 struct machine_function
*m
= cfun
->machine
;
9081 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9082 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9083 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
9085 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9086 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9088 return gen_rtx_SET (VOIDmode
,
9089 gen_rtx_MEM (word_mode
,
9090 gen_rtx_PRE_DEC (Pmode
,
9091 stack_pointer_rtx
)),
9095 /* Generate an "pop" pattern for input ARG. */
9100 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9101 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9103 return gen_rtx_SET (VOIDmode
,
9105 gen_rtx_MEM (word_mode
,
9106 gen_rtx_POST_INC (Pmode
,
9107 stack_pointer_rtx
)));
9110 /* Return >= 0 if there is an unused call-clobbered register available
9111 for the entire function. */
9114 ix86_select_alt_pic_regnum (void)
9118 && !ix86_current_function_calls_tls_descriptor
)
9121 /* Can't use the same register for both PIC and DRAP. */
9123 drap
= REGNO (crtl
->drap_reg
);
9126 for (i
= 2; i
>= 0; --i
)
9127 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9131 return INVALID_REGNUM
;
9134 /* Return TRUE if we need to save REGNO. */
9137 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9139 if (pic_offset_table_rtx
9140 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9141 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9143 || crtl
->calls_eh_return
9144 || crtl
->uses_const_pool
9145 || cfun
->has_nonlocal_label
))
9146 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9148 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9153 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9154 if (test
== INVALID_REGNUM
)
9161 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9164 return (df_regs_ever_live_p (regno
)
9165 && !call_used_regs
[regno
]
9166 && !fixed_regs
[regno
]
9167 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9170 /* Return number of saved general prupose registers. */
9173 ix86_nsaved_regs (void)
9178 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9179 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9184 /* Return number of saved SSE registrers. */
9187 ix86_nsaved_sseregs (void)
9192 if (!TARGET_64BIT_MS_ABI
)
9194 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9195 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9200 /* Given FROM and TO register numbers, say whether this elimination is
9201 allowed. If stack alignment is needed, we can only replace argument
9202 pointer with hard frame pointer, or replace frame pointer with stack
9203 pointer. Otherwise, frame pointer elimination is automatically
9204 handled and all other eliminations are valid. */
9207 ix86_can_eliminate (const int from
, const int to
)
9209 if (stack_realign_fp
)
9210 return ((from
== ARG_POINTER_REGNUM
9211 && to
== HARD_FRAME_POINTER_REGNUM
)
9212 || (from
== FRAME_POINTER_REGNUM
9213 && to
== STACK_POINTER_REGNUM
));
9215 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9218 /* Return the offset between two registers, one to be eliminated, and the other
9219 its replacement, at the start of a routine. */
9222 ix86_initial_elimination_offset (int from
, int to
)
9224 struct ix86_frame frame
;
9225 ix86_compute_frame_layout (&frame
);
9227 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9228 return frame
.hard_frame_pointer_offset
;
9229 else if (from
== FRAME_POINTER_REGNUM
9230 && to
== HARD_FRAME_POINTER_REGNUM
)
9231 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9234 gcc_assert (to
== STACK_POINTER_REGNUM
);
9236 if (from
== ARG_POINTER_REGNUM
)
9237 return frame
.stack_pointer_offset
;
9239 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9240 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9244 /* In a dynamically-aligned function, we can't know the offset from
9245 stack pointer to frame pointer, so we must ensure that setjmp
9246 eliminates fp against the hard fp (%ebp) rather than trying to
9247 index from %esp up to the top of the frame across a gap that is
9248 of unknown (at compile-time) size. */
9250 ix86_builtin_setjmp_frame_value (void)
9252 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9255 /* When using -fsplit-stack, the allocation routines set a field in
9256 the TCB to the bottom of the stack plus this much space, measured
9259 #define SPLIT_STACK_AVAILABLE 256
9261 /* Fill structure ix86_frame about frame of currently computed function. */
9264 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9266 unsigned HOST_WIDE_INT stack_alignment_needed
;
9267 HOST_WIDE_INT offset
;
9268 unsigned HOST_WIDE_INT preferred_alignment
;
9269 HOST_WIDE_INT size
= get_frame_size ();
9270 HOST_WIDE_INT to_allocate
;
9272 frame
->nregs
= ix86_nsaved_regs ();
9273 frame
->nsseregs
= ix86_nsaved_sseregs ();
9275 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9276 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9278 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9279 function prologues and leaf. */
9280 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9281 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9282 || ix86_current_function_calls_tls_descriptor
))
9284 preferred_alignment
= 16;
9285 stack_alignment_needed
= 16;
9286 crtl
->preferred_stack_boundary
= 128;
9287 crtl
->stack_alignment_needed
= 128;
9290 gcc_assert (!size
|| stack_alignment_needed
);
9291 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9292 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9294 /* For SEH we have to limit the amount of code movement into the prologue.
9295 At present we do this via a BLOCKAGE, at which point there's very little
9296 scheduling that can be done, which means that there's very little point
9297 in doing anything except PUSHs. */
9299 cfun
->machine
->use_fast_prologue_epilogue
= false;
9301 /* During reload iteration the amount of registers saved can change.
9302 Recompute the value as needed. Do not recompute when amount of registers
9303 didn't change as reload does multiple calls to the function and does not
9304 expect the decision to change within single iteration. */
9305 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
))
9306 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9308 int count
= frame
->nregs
;
9309 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9311 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9313 /* The fast prologue uses move instead of push to save registers. This
9314 is significantly longer, but also executes faster as modern hardware
9315 can execute the moves in parallel, but can't do that for push/pop.
9317 Be careful about choosing what prologue to emit: When function takes
9318 many instructions to execute we may use slow version as well as in
9319 case function is known to be outside hot spot (this is known with
9320 feedback only). Weight the size of function by number of registers
9321 to save as it is cheap to use one or two push instructions but very
9322 slow to use many of them. */
9324 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9325 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9326 || (flag_branch_probabilities
9327 && node
->frequency
< NODE_FREQUENCY_HOT
))
9328 cfun
->machine
->use_fast_prologue_epilogue
= false;
9330 cfun
->machine
->use_fast_prologue_epilogue
9331 = !expensive_function_p (count
);
9334 frame
->save_regs_using_mov
9335 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9336 /* If static stack checking is enabled and done with probes,
9337 the registers need to be saved before allocating the frame. */
9338 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9340 /* Skip return address. */
9341 offset
= UNITS_PER_WORD
;
9343 /* Skip pushed static chain. */
9344 if (ix86_static_chain_on_stack
)
9345 offset
+= UNITS_PER_WORD
;
9347 /* Skip saved base pointer. */
9348 if (frame_pointer_needed
)
9349 offset
+= UNITS_PER_WORD
;
9350 frame
->hfp_save_offset
= offset
;
9352 /* The traditional frame pointer location is at the top of the frame. */
9353 frame
->hard_frame_pointer_offset
= offset
;
9355 /* Register save area */
9356 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9357 frame
->reg_save_offset
= offset
;
9359 /* On SEH target, registers are pushed just before the frame pointer
9362 frame
->hard_frame_pointer_offset
= offset
;
9364 /* Align and set SSE register save area. */
9365 if (frame
->nsseregs
)
9367 /* The only ABI that has saved SSE registers (Win64) also has a
9368 16-byte aligned default stack, and thus we don't need to be
9369 within the re-aligned local stack frame to save them. */
9370 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9371 offset
= (offset
+ 16 - 1) & -16;
9372 offset
+= frame
->nsseregs
* 16;
9374 frame
->sse_reg_save_offset
= offset
;
9376 /* The re-aligned stack starts here. Values before this point are not
9377 directly comparable with values below this point. In order to make
9378 sure that no value happens to be the same before and after, force
9379 the alignment computation below to add a non-zero value. */
9380 if (stack_realign_fp
)
9381 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9384 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9385 offset
+= frame
->va_arg_size
;
9387 /* Align start of frame for local function. */
9388 if (stack_realign_fp
9389 || offset
!= frame
->sse_reg_save_offset
9392 || cfun
->calls_alloca
9393 || ix86_current_function_calls_tls_descriptor
)
9394 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9396 /* Frame pointer points here. */
9397 frame
->frame_pointer_offset
= offset
;
9401 /* Add outgoing arguments area. Can be skipped if we eliminated
9402 all the function calls as dead code.
9403 Skipping is however impossible when function calls alloca. Alloca
9404 expander assumes that last crtl->outgoing_args_size
9405 of stack frame are unused. */
9406 if (ACCUMULATE_OUTGOING_ARGS
9407 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9408 || ix86_current_function_calls_tls_descriptor
))
9410 offset
+= crtl
->outgoing_args_size
;
9411 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9414 frame
->outgoing_arguments_size
= 0;
9416 /* Align stack boundary. Only needed if we're calling another function
9418 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9419 || ix86_current_function_calls_tls_descriptor
)
9420 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9422 /* We've reached end of stack frame. */
9423 frame
->stack_pointer_offset
= offset
;
9425 /* Size prologue needs to allocate. */
9426 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9428 if ((!to_allocate
&& frame
->nregs
<= 1)
9429 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9430 frame
->save_regs_using_mov
= false;
9432 if (ix86_using_red_zone ()
9433 && crtl
->sp_is_unchanging
9435 && !ix86_current_function_calls_tls_descriptor
)
9437 frame
->red_zone_size
= to_allocate
;
9438 if (frame
->save_regs_using_mov
)
9439 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9440 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9441 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9444 frame
->red_zone_size
= 0;
9445 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9447 /* The SEH frame pointer location is near the bottom of the frame.
9448 This is enforced by the fact that the difference between the
9449 stack pointer and the frame pointer is limited to 240 bytes in
9450 the unwind data structure. */
9455 /* If we can leave the frame pointer where it is, do so. Also, returns
9456 the establisher frame for __builtin_frame_address (0). */
9457 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9458 if (diff
<= SEH_MAX_FRAME_SIZE
9459 && (diff
> 240 || (diff
& 15) != 0)
9460 && !crtl
->accesses_prior_frames
)
9462 /* Ideally we'd determine what portion of the local stack frame
9463 (within the constraint of the lowest 240) is most heavily used.
9464 But without that complication, simply bias the frame pointer
9465 by 128 bytes so as to maximize the amount of the local stack
9466 frame that is addressable with 8-bit offsets. */
9467 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9472 /* This is semi-inlined memory_address_length, but simplified
9473 since we know that we're always dealing with reg+offset, and
9474 to avoid having to create and discard all that rtl. */
9477 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9483 /* EBP and R13 cannot be encoded without an offset. */
9484 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9486 else if (IN_RANGE (offset
, -128, 127))
9489 /* ESP and R12 must be encoded with a SIB byte. */
9490 if (regno
== SP_REG
|| regno
== R12_REG
)
9496 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9497 The valid base registers are taken from CFUN->MACHINE->FS. */
9500 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9502 const struct machine_function
*m
= cfun
->machine
;
9503 rtx base_reg
= NULL
;
9504 HOST_WIDE_INT base_offset
= 0;
9506 if (m
->use_fast_prologue_epilogue
)
9508 /* Choose the base register most likely to allow the most scheduling
9509 opportunities. Generally FP is valid throughout the function,
9510 while DRAP must be reloaded within the epilogue. But choose either
9511 over the SP due to increased encoding size. */
9515 base_reg
= hard_frame_pointer_rtx
;
9516 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9518 else if (m
->fs
.drap_valid
)
9520 base_reg
= crtl
->drap_reg
;
9521 base_offset
= 0 - cfa_offset
;
9523 else if (m
->fs
.sp_valid
)
9525 base_reg
= stack_pointer_rtx
;
9526 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9531 HOST_WIDE_INT toffset
;
9534 /* Choose the base register with the smallest address encoding.
9535 With a tie, choose FP > DRAP > SP. */
9538 base_reg
= stack_pointer_rtx
;
9539 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9540 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9542 if (m
->fs
.drap_valid
)
9544 toffset
= 0 - cfa_offset
;
9545 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9548 base_reg
= crtl
->drap_reg
;
9549 base_offset
= toffset
;
9555 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9556 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9559 base_reg
= hard_frame_pointer_rtx
;
9560 base_offset
= toffset
;
9565 gcc_assert (base_reg
!= NULL
);
9567 return plus_constant (Pmode
, base_reg
, base_offset
);
9570 /* Emit code to save registers in the prologue. */
9573 ix86_emit_save_regs (void)
9578 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9579 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9581 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9582 RTX_FRAME_RELATED_P (insn
) = 1;
9586 /* Emit a single register save at CFA - CFA_OFFSET. */
9589 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9590 HOST_WIDE_INT cfa_offset
)
9592 struct machine_function
*m
= cfun
->machine
;
9593 rtx reg
= gen_rtx_REG (mode
, regno
);
9594 rtx mem
, addr
, base
, insn
;
9596 addr
= choose_baseaddr (cfa_offset
);
9597 mem
= gen_frame_mem (mode
, addr
);
9599 /* For SSE saves, we need to indicate the 128-bit alignment. */
9600 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9602 insn
= emit_move_insn (mem
, reg
);
9603 RTX_FRAME_RELATED_P (insn
) = 1;
9606 if (GET_CODE (base
) == PLUS
)
9607 base
= XEXP (base
, 0);
9608 gcc_checking_assert (REG_P (base
));
9610 /* When saving registers into a re-aligned local stack frame, avoid
9611 any tricky guessing by dwarf2out. */
9612 if (m
->fs
.realigned
)
9614 gcc_checking_assert (stack_realign_drap
);
9616 if (regno
== REGNO (crtl
->drap_reg
))
9618 /* A bit of a hack. We force the DRAP register to be saved in
9619 the re-aligned stack frame, which provides us with a copy
9620 of the CFA that will last past the prologue. Install it. */
9621 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9622 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9623 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9624 mem
= gen_rtx_MEM (mode
, addr
);
9625 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9629 /* The frame pointer is a stable reference within the
9630 aligned frame. Use it. */
9631 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9632 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9633 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9634 mem
= gen_rtx_MEM (mode
, addr
);
9635 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9636 gen_rtx_SET (VOIDmode
, mem
, reg
));
9640 /* The memory may not be relative to the current CFA register,
9641 which means that we may need to generate a new pattern for
9642 use by the unwind info. */
9643 else if (base
!= m
->fs
.cfa_reg
)
9645 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9646 m
->fs
.cfa_offset
- cfa_offset
);
9647 mem
= gen_rtx_MEM (mode
, addr
);
9648 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9652 /* Emit code to save registers using MOV insns.
9653 First register is stored at CFA - CFA_OFFSET. */
9655 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9659 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9660 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9662 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9663 cfa_offset
-= UNITS_PER_WORD
;
9667 /* Emit code to save SSE registers using MOV insns.
9668 First register is stored at CFA - CFA_OFFSET. */
9670 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9674 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9675 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9677 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9682 static GTY(()) rtx queued_cfa_restores
;
9684 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9685 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9686 Don't add the note if the previously saved value will be left untouched
9687 within stack red-zone till return, as unwinders can find the same value
9688 in the register and on the stack. */
9691 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9693 if (!crtl
->shrink_wrapped
9694 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9699 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9700 RTX_FRAME_RELATED_P (insn
) = 1;
9704 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9707 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9710 ix86_add_queued_cfa_restore_notes (rtx insn
)
9713 if (!queued_cfa_restores
)
9715 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9717 XEXP (last
, 1) = REG_NOTES (insn
);
9718 REG_NOTES (insn
) = queued_cfa_restores
;
9719 queued_cfa_restores
= NULL_RTX
;
9720 RTX_FRAME_RELATED_P (insn
) = 1;
9723 /* Expand prologue or epilogue stack adjustment.
9724 The pattern exist to put a dependency on all ebp-based memory accesses.
9725 STYLE should be negative if instructions should be marked as frame related,
9726 zero if %r11 register is live and cannot be freely used and positive
9730 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9731 int style
, bool set_cfa
)
9733 struct machine_function
*m
= cfun
->machine
;
9735 bool add_frame_related_expr
= false;
9737 if (Pmode
== SImode
)
9738 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9739 else if (x86_64_immediate_operand (offset
, DImode
))
9740 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9744 /* r11 is used by indirect sibcall return as well, set before the
9745 epilogue and used after the epilogue. */
9747 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9750 gcc_assert (src
!= hard_frame_pointer_rtx
9751 && dest
!= hard_frame_pointer_rtx
);
9752 tmp
= hard_frame_pointer_rtx
;
9754 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9756 add_frame_related_expr
= true;
9758 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9761 insn
= emit_insn (insn
);
9763 ix86_add_queued_cfa_restore_notes (insn
);
9769 gcc_assert (m
->fs
.cfa_reg
== src
);
9770 m
->fs
.cfa_offset
+= INTVAL (offset
);
9771 m
->fs
.cfa_reg
= dest
;
9773 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9774 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9775 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9776 RTX_FRAME_RELATED_P (insn
) = 1;
9780 RTX_FRAME_RELATED_P (insn
) = 1;
9781 if (add_frame_related_expr
)
9783 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9784 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9785 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9789 if (dest
== stack_pointer_rtx
)
9791 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9792 bool valid
= m
->fs
.sp_valid
;
9794 if (src
== hard_frame_pointer_rtx
)
9796 valid
= m
->fs
.fp_valid
;
9797 ooffset
= m
->fs
.fp_offset
;
9799 else if (src
== crtl
->drap_reg
)
9801 valid
= m
->fs
.drap_valid
;
9806 /* Else there are two possibilities: SP itself, which we set
9807 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9808 taken care of this by hand along the eh_return path. */
9809 gcc_checking_assert (src
== stack_pointer_rtx
9810 || offset
== const0_rtx
);
9813 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9814 m
->fs
.sp_valid
= valid
;
9818 /* Find an available register to be used as dynamic realign argument
9819 pointer regsiter. Such a register will be written in prologue and
9820 used in begin of body, so it must not be
9821 1. parameter passing register.
9823 We reuse static-chain register if it is available. Otherwise, we
9824 use DI for i386 and R13 for x86-64. We chose R13 since it has
9827 Return: the regno of chosen register. */
9830 find_drap_reg (void)
9832 tree decl
= cfun
->decl
;
9836 /* Use R13 for nested function or function need static chain.
9837 Since function with tail call may use any caller-saved
9838 registers in epilogue, DRAP must not use caller-saved
9839 register in such case. */
9840 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9847 /* Use DI for nested function or function need static chain.
9848 Since function with tail call may use any caller-saved
9849 registers in epilogue, DRAP must not use caller-saved
9850 register in such case. */
9851 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9854 /* Reuse static chain register if it isn't used for parameter
9856 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9858 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9859 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9866 /* Return minimum incoming stack alignment. */
9869 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9871 unsigned int incoming_stack_boundary
;
9873 /* Prefer the one specified at command line. */
9874 if (ix86_user_incoming_stack_boundary
)
9875 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9876 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9877 if -mstackrealign is used, it isn't used for sibcall check and
9878 estimated stack alignment is 128bit. */
9881 && ix86_force_align_arg_pointer
9882 && crtl
->stack_alignment_estimated
== 128)
9883 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9885 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9887 /* Incoming stack alignment can be changed on individual functions
9888 via force_align_arg_pointer attribute. We use the smallest
9889 incoming stack boundary. */
9890 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9891 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9892 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9893 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9895 /* The incoming stack frame has to be aligned at least at
9896 parm_stack_boundary. */
9897 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9898 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9900 /* Stack at entrance of main is aligned by runtime. We use the
9901 smallest incoming stack boundary. */
9902 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9903 && DECL_NAME (current_function_decl
)
9904 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9905 && DECL_FILE_SCOPE_P (current_function_decl
))
9906 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9908 return incoming_stack_boundary
;
9911 /* Update incoming stack boundary and estimated stack alignment. */
9914 ix86_update_stack_boundary (void)
9916 ix86_incoming_stack_boundary
9917 = ix86_minimum_incoming_stack_boundary (false);
9919 /* x86_64 vararg needs 16byte stack alignment for register save
9923 && crtl
->stack_alignment_estimated
< 128)
9924 crtl
->stack_alignment_estimated
= 128;
9927 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9928 needed or an rtx for DRAP otherwise. */
9931 ix86_get_drap_rtx (void)
9933 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9934 crtl
->need_drap
= true;
9936 if (stack_realign_drap
)
9938 /* Assign DRAP to vDRAP and returns vDRAP */
9939 unsigned int regno
= find_drap_reg ();
9944 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9945 crtl
->drap_reg
= arg_ptr
;
9948 drap_vreg
= copy_to_reg (arg_ptr
);
9952 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9955 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9956 RTX_FRAME_RELATED_P (insn
) = 1;
9964 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9967 ix86_internal_arg_pointer (void)
9969 return virtual_incoming_args_rtx
;
9972 struct scratch_reg
{
9977 /* Return a short-lived scratch register for use on function entry.
9978 In 32-bit mode, it is valid only after the registers are saved
9979 in the prologue. This register must be released by means of
9980 release_scratch_register_on_entry once it is dead. */
9983 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9991 /* We always use R11 in 64-bit mode. */
9996 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9998 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10000 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10001 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
10002 int regparm
= ix86_function_regparm (fntype
, decl
);
10004 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
10006 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10007 for the static chain register. */
10008 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
10009 && drap_regno
!= AX_REG
)
10011 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10012 for the static chain register. */
10013 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
10015 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
10017 /* ecx is the static chain register. */
10018 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
10020 && drap_regno
!= CX_REG
)
10022 else if (ix86_save_reg (BX_REG
, true))
10024 /* esi is the static chain register. */
10025 else if (!(regparm
== 3 && static_chain_p
)
10026 && ix86_save_reg (SI_REG
, true))
10028 else if (ix86_save_reg (DI_REG
, true))
10032 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
10037 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
10040 rtx insn
= emit_insn (gen_push (sr
->reg
));
10041 RTX_FRAME_RELATED_P (insn
) = 1;
10045 /* Release a scratch register obtained from the preceding function. */
10048 release_scratch_register_on_entry (struct scratch_reg
*sr
)
10052 struct machine_function
*m
= cfun
->machine
;
10053 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
10055 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10056 RTX_FRAME_RELATED_P (insn
) = 1;
10057 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
10058 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10059 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
10060 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10064 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10066 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10069 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
10071 /* We skip the probe for the first interval + a small dope of 4 words and
10072 probe that many bytes past the specified size to maintain a protection
10073 area at the botton of the stack. */
10074 const int dope
= 4 * UNITS_PER_WORD
;
10075 rtx size_rtx
= GEN_INT (size
), last
;
10077 /* See if we have a constant small number of probes to generate. If so,
10078 that's the easy case. The run-time loop is made up of 11 insns in the
10079 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10080 for n # of intervals. */
10081 if (size
<= 5 * PROBE_INTERVAL
)
10083 HOST_WIDE_INT i
, adjust
;
10084 bool first_probe
= true;
10086 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10087 values of N from 1 until it exceeds SIZE. If only one probe is
10088 needed, this will not generate any code. Then adjust and probe
10089 to PROBE_INTERVAL + SIZE. */
10090 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10094 adjust
= 2 * PROBE_INTERVAL
+ dope
;
10095 first_probe
= false;
10098 adjust
= PROBE_INTERVAL
;
10100 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10101 plus_constant (Pmode
, stack_pointer_rtx
,
10103 emit_stack_probe (stack_pointer_rtx
);
10107 adjust
= size
+ PROBE_INTERVAL
+ dope
;
10109 adjust
= size
+ PROBE_INTERVAL
- i
;
10111 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10112 plus_constant (Pmode
, stack_pointer_rtx
,
10114 emit_stack_probe (stack_pointer_rtx
);
10116 /* Adjust back to account for the additional first interval. */
10117 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10118 plus_constant (Pmode
, stack_pointer_rtx
,
10119 PROBE_INTERVAL
+ dope
)));
10122 /* Otherwise, do the same as above, but in a loop. Note that we must be
10123 extra careful with variables wrapping around because we might be at
10124 the very top (or the very bottom) of the address space and we have
10125 to be able to handle this case properly; in particular, we use an
10126 equality test for the loop condition. */
10129 HOST_WIDE_INT rounded_size
;
10130 struct scratch_reg sr
;
10132 get_scratch_register_on_entry (&sr
);
10135 /* Step 1: round SIZE to the previous multiple of the interval. */
10137 rounded_size
= size
& -PROBE_INTERVAL
;
10140 /* Step 2: compute initial and final value of the loop counter. */
10142 /* SP = SP_0 + PROBE_INTERVAL. */
10143 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10144 plus_constant (Pmode
, stack_pointer_rtx
,
10145 - (PROBE_INTERVAL
+ dope
))));
10147 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10148 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10149 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10150 gen_rtx_PLUS (Pmode
, sr
.reg
,
10151 stack_pointer_rtx
)));
10154 /* Step 3: the loop
10156 while (SP != LAST_ADDR)
10158 SP = SP + PROBE_INTERVAL
10162 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10163 values of N from 1 until it is equal to ROUNDED_SIZE. */
10165 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10168 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10169 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10171 if (size
!= rounded_size
)
10173 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10174 plus_constant (Pmode
, stack_pointer_rtx
,
10175 rounded_size
- size
)));
10176 emit_stack_probe (stack_pointer_rtx
);
10179 /* Adjust back to account for the additional first interval. */
10180 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10181 plus_constant (Pmode
, stack_pointer_rtx
,
10182 PROBE_INTERVAL
+ dope
)));
10184 release_scratch_register_on_entry (&sr
);
10187 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10189 /* Even if the stack pointer isn't the CFA register, we need to correctly
10190 describe the adjustments made to it, in particular differentiate the
10191 frame-related ones from the frame-unrelated ones. */
10194 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10195 XVECEXP (expr
, 0, 0)
10196 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10197 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10198 XVECEXP (expr
, 0, 1)
10199 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10200 plus_constant (Pmode
, stack_pointer_rtx
,
10201 PROBE_INTERVAL
+ dope
+ size
));
10202 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10203 RTX_FRAME_RELATED_P (last
) = 1;
10205 cfun
->machine
->fs
.sp_offset
+= size
;
10208 /* Make sure nothing is scheduled before we are done. */
10209 emit_insn (gen_blockage ());
10212 /* Adjust the stack pointer up to REG while probing it. */
10215 output_adjust_stack_and_probe (rtx reg
)
10217 static int labelno
= 0;
10218 char loop_lab
[32], end_lab
[32];
10221 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10222 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10224 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10226 /* Jump to END_LAB if SP == LAST_ADDR. */
10227 xops
[0] = stack_pointer_rtx
;
10229 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10230 fputs ("\tje\t", asm_out_file
);
10231 assemble_name_raw (asm_out_file
, end_lab
);
10232 fputc ('\n', asm_out_file
);
10234 /* SP = SP + PROBE_INTERVAL. */
10235 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10236 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10239 xops
[1] = const0_rtx
;
10240 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10242 fprintf (asm_out_file
, "\tjmp\t");
10243 assemble_name_raw (asm_out_file
, loop_lab
);
10244 fputc ('\n', asm_out_file
);
10246 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10251 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10252 inclusive. These are offsets from the current stack pointer. */
10255 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10257 /* See if we have a constant small number of probes to generate. If so,
10258 that's the easy case. The run-time loop is made up of 7 insns in the
10259 generic case while the compile-time loop is made up of n insns for n #
10261 if (size
<= 7 * PROBE_INTERVAL
)
10265 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10266 it exceeds SIZE. If only one probe is needed, this will not
10267 generate any code. Then probe at FIRST + SIZE. */
10268 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10269 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10272 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10276 /* Otherwise, do the same as above, but in a loop. Note that we must be
10277 extra careful with variables wrapping around because we might be at
10278 the very top (or the very bottom) of the address space and we have
10279 to be able to handle this case properly; in particular, we use an
10280 equality test for the loop condition. */
10283 HOST_WIDE_INT rounded_size
, last
;
10284 struct scratch_reg sr
;
10286 get_scratch_register_on_entry (&sr
);
10289 /* Step 1: round SIZE to the previous multiple of the interval. */
10291 rounded_size
= size
& -PROBE_INTERVAL
;
10294 /* Step 2: compute initial and final value of the loop counter. */
10296 /* TEST_OFFSET = FIRST. */
10297 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10299 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10300 last
= first
+ rounded_size
;
10303 /* Step 3: the loop
10305 while (TEST_ADDR != LAST_ADDR)
10307 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10311 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10312 until it is equal to ROUNDED_SIZE. */
10314 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10317 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10318 that SIZE is equal to ROUNDED_SIZE. */
10320 if (size
!= rounded_size
)
10321 emit_stack_probe (plus_constant (Pmode
,
10322 gen_rtx_PLUS (Pmode
,
10325 rounded_size
- size
));
10327 release_scratch_register_on_entry (&sr
);
10330 /* Make sure nothing is scheduled before we are done. */
10331 emit_insn (gen_blockage ());
10334 /* Probe a range of stack addresses from REG to END, inclusive. These are
10335 offsets from the current stack pointer. */
10338 output_probe_stack_range (rtx reg
, rtx end
)
10340 static int labelno
= 0;
10341 char loop_lab
[32], end_lab
[32];
10344 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10345 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10347 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10349 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10352 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10353 fputs ("\tje\t", asm_out_file
);
10354 assemble_name_raw (asm_out_file
, end_lab
);
10355 fputc ('\n', asm_out_file
);
10357 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10358 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10359 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10361 /* Probe at TEST_ADDR. */
10362 xops
[0] = stack_pointer_rtx
;
10364 xops
[2] = const0_rtx
;
10365 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10367 fprintf (asm_out_file
, "\tjmp\t");
10368 assemble_name_raw (asm_out_file
, loop_lab
);
10369 fputc ('\n', asm_out_file
);
10371 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10376 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10377 to be generated in correct form. */
10379 ix86_finalize_stack_realign_flags (void)
10381 /* Check if stack realign is really needed after reload, and
10382 stores result in cfun */
10383 unsigned int incoming_stack_boundary
10384 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10385 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10386 unsigned int stack_realign
= (incoming_stack_boundary
10388 ? crtl
->max_used_stack_slot_alignment
10389 : crtl
->stack_alignment_needed
));
10391 if (crtl
->stack_realign_finalized
)
10393 /* After stack_realign_needed is finalized, we can't no longer
10395 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10399 /* If the only reason for frame_pointer_needed is that we conservatively
10400 assumed stack realignment might be needed, but in the end nothing that
10401 needed the stack alignment had been spilled, clear frame_pointer_needed
10402 and say we don't need stack realignment. */
10404 && !crtl
->need_drap
10405 && frame_pointer_needed
10407 && flag_omit_frame_pointer
10408 && crtl
->sp_is_unchanging
10409 && !ix86_current_function_calls_tls_descriptor
10410 && !crtl
->accesses_prior_frames
10411 && !cfun
->calls_alloca
10412 && !crtl
->calls_eh_return
10413 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10414 && !ix86_frame_pointer_required ()
10415 && get_frame_size () == 0
10416 && ix86_nsaved_sseregs () == 0
10417 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10419 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10422 CLEAR_HARD_REG_SET (prologue_used
);
10423 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10424 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10425 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10426 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10427 HARD_FRAME_POINTER_REGNUM
);
10431 FOR_BB_INSNS (bb
, insn
)
10432 if (NONDEBUG_INSN_P (insn
)
10433 && requires_stack_frame_p (insn
, prologue_used
,
10434 set_up_by_prologue
))
10436 crtl
->stack_realign_needed
= stack_realign
;
10437 crtl
->stack_realign_finalized
= true;
10442 frame_pointer_needed
= false;
10443 stack_realign
= false;
10444 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10445 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10446 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10447 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10448 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10449 df_finish_pass (true);
10450 df_scan_alloc (NULL
);
10452 df_compute_regs_ever_live (true);
10456 crtl
->stack_realign_needed
= stack_realign
;
10457 crtl
->stack_realign_finalized
= true;
10460 /* Expand the prologue into a bunch of separate insns. */
10463 ix86_expand_prologue (void)
10465 struct machine_function
*m
= cfun
->machine
;
10468 struct ix86_frame frame
;
10469 HOST_WIDE_INT allocate
;
10470 bool int_registers_saved
;
10471 bool sse_registers_saved
;
10473 ix86_finalize_stack_realign_flags ();
10475 /* DRAP should not coexist with stack_realign_fp */
10476 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10478 memset (&m
->fs
, 0, sizeof (m
->fs
));
10480 /* Initialize CFA state for before the prologue. */
10481 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10482 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10484 /* Track SP offset to the CFA. We continue tracking this after we've
10485 swapped the CFA register away from SP. In the case of re-alignment
10486 this is fudged; we're interested to offsets within the local frame. */
10487 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10488 m
->fs
.sp_valid
= true;
10490 ix86_compute_frame_layout (&frame
);
10492 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10494 /* We should have already generated an error for any use of
10495 ms_hook on a nested function. */
10496 gcc_checking_assert (!ix86_static_chain_on_stack
);
10498 /* Check if profiling is active and we shall use profiling before
10499 prologue variant. If so sorry. */
10500 if (crtl
->profile
&& flag_fentry
!= 0)
10501 sorry ("ms_hook_prologue attribute isn%'t compatible "
10502 "with -mfentry for 32-bit");
10504 /* In ix86_asm_output_function_label we emitted:
10505 8b ff movl.s %edi,%edi
10507 8b ec movl.s %esp,%ebp
10509 This matches the hookable function prologue in Win32 API
10510 functions in Microsoft Windows XP Service Pack 2 and newer.
10511 Wine uses this to enable Windows apps to hook the Win32 API
10512 functions provided by Wine.
10514 What that means is that we've already set up the frame pointer. */
10516 if (frame_pointer_needed
10517 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10521 /* We've decided to use the frame pointer already set up.
10522 Describe this to the unwinder by pretending that both
10523 push and mov insns happen right here.
10525 Putting the unwind info here at the end of the ms_hook
10526 is done so that we can make absolutely certain we get
10527 the required byte sequence at the start of the function,
10528 rather than relying on an assembler that can produce
10529 the exact encoding required.
10531 However it does mean (in the unpatched case) that we have
10532 a 1 insn window where the asynchronous unwind info is
10533 incorrect. However, if we placed the unwind info at
10534 its correct location we would have incorrect unwind info
10535 in the patched case. Which is probably all moot since
10536 I don't expect Wine generates dwarf2 unwind info for the
10537 system libraries that use this feature. */
10539 insn
= emit_insn (gen_blockage ());
10541 push
= gen_push (hard_frame_pointer_rtx
);
10542 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10543 stack_pointer_rtx
);
10544 RTX_FRAME_RELATED_P (push
) = 1;
10545 RTX_FRAME_RELATED_P (mov
) = 1;
10547 RTX_FRAME_RELATED_P (insn
) = 1;
10548 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10549 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10551 /* Note that gen_push incremented m->fs.cfa_offset, even
10552 though we didn't emit the push insn here. */
10553 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10554 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10555 m
->fs
.fp_valid
= true;
10559 /* The frame pointer is not needed so pop %ebp again.
10560 This leaves us with a pristine state. */
10561 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10565 /* The first insn of a function that accepts its static chain on the
10566 stack is to push the register that would be filled in by a direct
10567 call. This insn will be skipped by the trampoline. */
10568 else if (ix86_static_chain_on_stack
)
10570 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10571 emit_insn (gen_blockage ());
10573 /* We don't want to interpret this push insn as a register save,
10574 only as a stack adjustment. The real copy of the register as
10575 a save will be done later, if needed. */
10576 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10577 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10578 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10579 RTX_FRAME_RELATED_P (insn
) = 1;
10582 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10583 of DRAP is needed and stack realignment is really needed after reload */
10584 if (stack_realign_drap
)
10586 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10588 /* Only need to push parameter pointer reg if it is caller saved. */
10589 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10591 /* Push arg pointer reg */
10592 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10593 RTX_FRAME_RELATED_P (insn
) = 1;
10596 /* Grab the argument pointer. */
10597 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10598 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10599 RTX_FRAME_RELATED_P (insn
) = 1;
10600 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10601 m
->fs
.cfa_offset
= 0;
10603 /* Align the stack. */
10604 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10606 GEN_INT (-align_bytes
)));
10607 RTX_FRAME_RELATED_P (insn
) = 1;
10609 /* Replicate the return address on the stack so that return
10610 address can be reached via (argp - 1) slot. This is needed
10611 to implement macro RETURN_ADDR_RTX and intrinsic function
10612 expand_builtin_return_addr etc. */
10613 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10614 t
= gen_frame_mem (word_mode
, t
);
10615 insn
= emit_insn (gen_push (t
));
10616 RTX_FRAME_RELATED_P (insn
) = 1;
10618 /* For the purposes of frame and register save area addressing,
10619 we've started over with a new frame. */
10620 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10621 m
->fs
.realigned
= true;
10624 int_registers_saved
= (frame
.nregs
== 0);
10625 sse_registers_saved
= (frame
.nsseregs
== 0);
10627 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10629 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10630 slower on all targets. Also sdb doesn't like it. */
10631 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10632 RTX_FRAME_RELATED_P (insn
) = 1;
10634 /* Push registers now, before setting the frame pointer
10636 if (!int_registers_saved
10638 && !frame
.save_regs_using_mov
)
10640 ix86_emit_save_regs ();
10641 int_registers_saved
= true;
10642 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10645 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10647 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10648 RTX_FRAME_RELATED_P (insn
) = 1;
10650 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10651 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10652 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10653 m
->fs
.fp_valid
= true;
10657 if (!int_registers_saved
)
10659 /* If saving registers via PUSH, do so now. */
10660 if (!frame
.save_regs_using_mov
)
10662 ix86_emit_save_regs ();
10663 int_registers_saved
= true;
10664 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10667 /* When using red zone we may start register saving before allocating
10668 the stack frame saving one cycle of the prologue. However, avoid
10669 doing this if we have to probe the stack; at least on x86_64 the
10670 stack probe can turn into a call that clobbers a red zone location. */
10671 else if (ix86_using_red_zone ()
10672 && (! TARGET_STACK_PROBE
10673 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10675 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10676 int_registers_saved
= true;
10680 if (stack_realign_fp
)
10682 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10683 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10685 /* The computation of the size of the re-aligned stack frame means
10686 that we must allocate the size of the register save area before
10687 performing the actual alignment. Otherwise we cannot guarantee
10688 that there's enough storage above the realignment point. */
10689 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10690 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10691 GEN_INT (m
->fs
.sp_offset
10692 - frame
.sse_reg_save_offset
),
10695 /* Align the stack. */
10696 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10698 GEN_INT (-align_bytes
)));
10700 /* For the purposes of register save area addressing, the stack
10701 pointer is no longer valid. As for the value of sp_offset,
10702 see ix86_compute_frame_layout, which we need to match in order
10703 to pass verification of stack_pointer_offset at the end. */
10704 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10705 m
->fs
.sp_valid
= false;
10708 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10710 if (flag_stack_usage_info
)
10712 /* We start to count from ARG_POINTER. */
10713 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10715 /* If it was realigned, take into account the fake frame. */
10716 if (stack_realign_drap
)
10718 if (ix86_static_chain_on_stack
)
10719 stack_size
+= UNITS_PER_WORD
;
10721 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10722 stack_size
+= UNITS_PER_WORD
;
10724 /* This over-estimates by 1 minimal-stack-alignment-unit but
10725 mitigates that by counting in the new return address slot. */
10726 current_function_dynamic_stack_size
10727 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10730 current_function_static_stack_size
= stack_size
;
10733 /* On SEH target with very large frame size, allocate an area to save
10734 SSE registers (as the very large allocation won't be described). */
10736 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10737 && !sse_registers_saved
)
10739 HOST_WIDE_INT sse_size
=
10740 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10742 gcc_assert (int_registers_saved
);
10744 /* No need to do stack checking as the area will be immediately
10746 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10747 GEN_INT (-sse_size
), -1,
10748 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10749 allocate
-= sse_size
;
10750 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10751 sse_registers_saved
= true;
10754 /* The stack has already been decremented by the instruction calling us
10755 so probe if the size is non-negative to preserve the protection area. */
10756 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10758 /* We expect the registers to be saved when probes are used. */
10759 gcc_assert (int_registers_saved
);
10761 if (STACK_CHECK_MOVING_SP
)
10763 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
10764 && allocate
<= PROBE_INTERVAL
))
10766 ix86_adjust_stack_and_probe (allocate
);
10772 HOST_WIDE_INT size
= allocate
;
10774 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10775 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10777 if (TARGET_STACK_PROBE
)
10779 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
10781 if (size
> PROBE_INTERVAL
)
10782 ix86_emit_probe_stack_range (0, size
);
10785 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10789 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
10791 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
10792 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
,
10793 size
- STACK_CHECK_PROTECT
);
10796 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10803 else if (!ix86_target_stack_probe ()
10804 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10806 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10807 GEN_INT (-allocate
), -1,
10808 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10812 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10814 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10815 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10816 bool eax_live
= false;
10817 bool r10_live
= false;
10820 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10821 if (!TARGET_64BIT_MS_ABI
)
10822 eax_live
= ix86_eax_live_at_start_p ();
10824 /* Note that SEH directives need to continue tracking the stack
10825 pointer even after the frame pointer has been set up. */
10828 insn
= emit_insn (gen_push (eax
));
10829 allocate
-= UNITS_PER_WORD
;
10830 if (sp_is_cfa_reg
|| TARGET_SEH
)
10833 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10834 RTX_FRAME_RELATED_P (insn
) = 1;
10840 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10841 insn
= emit_insn (gen_push (r10
));
10842 allocate
-= UNITS_PER_WORD
;
10843 if (sp_is_cfa_reg
|| TARGET_SEH
)
10846 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10847 RTX_FRAME_RELATED_P (insn
) = 1;
10851 emit_move_insn (eax
, GEN_INT (allocate
));
10852 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10854 /* Use the fact that AX still contains ALLOCATE. */
10855 adjust_stack_insn
= (Pmode
== DImode
10856 ? gen_pro_epilogue_adjust_stack_di_sub
10857 : gen_pro_epilogue_adjust_stack_si_sub
);
10859 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10860 stack_pointer_rtx
, eax
));
10862 if (sp_is_cfa_reg
|| TARGET_SEH
)
10865 m
->fs
.cfa_offset
+= allocate
;
10866 RTX_FRAME_RELATED_P (insn
) = 1;
10867 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10868 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10869 plus_constant (Pmode
, stack_pointer_rtx
,
10872 m
->fs
.sp_offset
+= allocate
;
10874 if (r10_live
&& eax_live
)
10876 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10877 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10878 gen_frame_mem (word_mode
, t
));
10879 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10880 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10881 gen_frame_mem (word_mode
, t
));
10883 else if (eax_live
|| r10_live
)
10885 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10886 emit_move_insn (gen_rtx_REG (word_mode
,
10887 (eax_live
? AX_REG
: R10_REG
)),
10888 gen_frame_mem (word_mode
, t
));
10891 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10893 /* If we havn't already set up the frame pointer, do so now. */
10894 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10896 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10897 GEN_INT (frame
.stack_pointer_offset
10898 - frame
.hard_frame_pointer_offset
));
10899 insn
= emit_insn (insn
);
10900 RTX_FRAME_RELATED_P (insn
) = 1;
10901 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10903 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10904 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10905 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10906 m
->fs
.fp_valid
= true;
10909 if (!int_registers_saved
)
10910 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10911 if (!sse_registers_saved
)
10912 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10914 pic_reg_used
= false;
10915 /* We don't use pic-register for pe-coff target. */
10916 if (pic_offset_table_rtx
10918 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10921 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10923 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10924 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10926 pic_reg_used
= true;
10933 if (ix86_cmodel
== CM_LARGE_PIC
)
10935 rtx label
, tmp_reg
;
10937 gcc_assert (Pmode
== DImode
);
10938 label
= gen_label_rtx ();
10939 emit_label (label
);
10940 LABEL_PRESERVE_P (label
) = 1;
10941 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10942 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10943 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10945 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10946 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10947 pic_offset_table_rtx
, tmp_reg
));
10950 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10954 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10955 RTX_FRAME_RELATED_P (insn
) = 1;
10956 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10960 /* In the pic_reg_used case, make sure that the got load isn't deleted
10961 when mcount needs it. Blockage to avoid call movement across mcount
10962 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10964 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10965 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10967 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10969 /* vDRAP is setup but after reload it turns out stack realign
10970 isn't necessary, here we will emit prologue to setup DRAP
10971 without stack realign adjustment */
10972 t
= choose_baseaddr (0);
10973 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10976 /* Prevent instructions from being scheduled into register save push
10977 sequence when access to the redzone area is done through frame pointer.
10978 The offset between the frame pointer and the stack pointer is calculated
10979 relative to the value of the stack pointer at the end of the function
10980 prologue, and moving instructions that access redzone area via frame
10981 pointer inside push sequence violates this assumption. */
10982 if (frame_pointer_needed
&& frame
.red_zone_size
)
10983 emit_insn (gen_memory_blockage ());
10985 /* Emit cld instruction if stringops are used in the function. */
10986 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10987 emit_insn (gen_cld ());
10989 /* SEH requires that the prologue end within 256 bytes of the start of
10990 the function. Prevent instruction schedules that would extend that.
10991 Further, prevent alloca modifications to the stack pointer from being
10992 combined with prologue modifications. */
10994 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10997 /* Emit code to restore REG using a POP insn. */
11000 ix86_emit_restore_reg_using_pop (rtx reg
)
11002 struct machine_function
*m
= cfun
->machine
;
11003 rtx insn
= emit_insn (gen_pop (reg
));
11005 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
11006 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11008 if (m
->fs
.cfa_reg
== crtl
->drap_reg
11009 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
11011 /* Previously we'd represented the CFA as an expression
11012 like *(%ebp - 8). We've just popped that value from
11013 the stack, which means we need to reset the CFA to
11014 the drap register. This will remain until we restore
11015 the stack pointer. */
11016 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11017 RTX_FRAME_RELATED_P (insn
) = 1;
11019 /* This means that the DRAP register is valid for addressing too. */
11020 m
->fs
.drap_valid
= true;
11024 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
11026 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
11027 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
11028 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
11029 RTX_FRAME_RELATED_P (insn
) = 1;
11031 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11034 /* When the frame pointer is the CFA, and we pop it, we are
11035 swapping back to the stack pointer as the CFA. This happens
11036 for stack frames that don't allocate other data, so we assume
11037 the stack pointer is now pointing at the return address, i.e.
11038 the function entry state, which makes the offset be 1 word. */
11039 if (reg
== hard_frame_pointer_rtx
)
11041 m
->fs
.fp_valid
= false;
11042 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11044 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11045 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11047 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11048 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11049 GEN_INT (m
->fs
.cfa_offset
)));
11050 RTX_FRAME_RELATED_P (insn
) = 1;
11055 /* Emit code to restore saved registers using POP insns. */
11058 ix86_emit_restore_regs_using_pop (void)
11060 unsigned int regno
;
11062 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11063 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
11064 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
11067 /* Emit code and notes for the LEAVE instruction. */
11070 ix86_emit_leave (void)
11072 struct machine_function
*m
= cfun
->machine
;
11073 rtx insn
= emit_insn (ix86_gen_leave ());
11075 ix86_add_queued_cfa_restore_notes (insn
);
11077 gcc_assert (m
->fs
.fp_valid
);
11078 m
->fs
.sp_valid
= true;
11079 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
11080 m
->fs
.fp_valid
= false;
11082 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11084 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11085 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
11087 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11088 plus_constant (Pmode
, stack_pointer_rtx
,
11090 RTX_FRAME_RELATED_P (insn
) = 1;
11092 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
11096 /* Emit code to restore saved registers using MOV insns.
11097 First register is restored from CFA - CFA_OFFSET. */
11099 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11100 bool maybe_eh_return
)
11102 struct machine_function
*m
= cfun
->machine
;
11103 unsigned int regno
;
11105 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11106 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11108 rtx reg
= gen_rtx_REG (word_mode
, regno
);
11111 mem
= choose_baseaddr (cfa_offset
);
11112 mem
= gen_frame_mem (word_mode
, mem
);
11113 insn
= emit_move_insn (reg
, mem
);
11115 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
11117 /* Previously we'd represented the CFA as an expression
11118 like *(%ebp - 8). We've just popped that value from
11119 the stack, which means we need to reset the CFA to
11120 the drap register. This will remain until we restore
11121 the stack pointer. */
11122 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11123 RTX_FRAME_RELATED_P (insn
) = 1;
11125 /* This means that the DRAP register is valid for addressing. */
11126 m
->fs
.drap_valid
= true;
11129 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11131 cfa_offset
-= UNITS_PER_WORD
;
11135 /* Emit code to restore saved registers using MOV insns.
11136 First register is restored from CFA - CFA_OFFSET. */
11138 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11139 bool maybe_eh_return
)
11141 unsigned int regno
;
11143 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11144 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11146 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11149 mem
= choose_baseaddr (cfa_offset
);
11150 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11151 set_mem_align (mem
, 128);
11152 emit_move_insn (reg
, mem
);
11154 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11160 /* Restore function stack, frame, and registers. */
11163 ix86_expand_epilogue (int style
)
11165 struct machine_function
*m
= cfun
->machine
;
11166 struct machine_frame_state frame_state_save
= m
->fs
;
11167 struct ix86_frame frame
;
11168 bool restore_regs_via_mov
;
11171 ix86_finalize_stack_realign_flags ();
11172 ix86_compute_frame_layout (&frame
);
11174 m
->fs
.sp_valid
= (!frame_pointer_needed
11175 || (crtl
->sp_is_unchanging
11176 && !stack_realign_fp
));
11177 gcc_assert (!m
->fs
.sp_valid
11178 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11180 /* The FP must be valid if the frame pointer is present. */
11181 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11182 gcc_assert (!m
->fs
.fp_valid
11183 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11185 /* We must have *some* valid pointer to the stack frame. */
11186 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11188 /* The DRAP is never valid at this point. */
11189 gcc_assert (!m
->fs
.drap_valid
);
11191 /* See the comment about red zone and frame
11192 pointer usage in ix86_expand_prologue. */
11193 if (frame_pointer_needed
&& frame
.red_zone_size
)
11194 emit_insn (gen_memory_blockage ());
11196 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11197 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11199 /* Determine the CFA offset of the end of the red-zone. */
11200 m
->fs
.red_zone_offset
= 0;
11201 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11203 /* The red-zone begins below the return address. */
11204 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11206 /* When the register save area is in the aligned portion of
11207 the stack, determine the maximum runtime displacement that
11208 matches up with the aligned frame. */
11209 if (stack_realign_drap
)
11210 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11214 /* Special care must be taken for the normal return case of a function
11215 using eh_return: the eax and edx registers are marked as saved, but
11216 not restored along this path. Adjust the save location to match. */
11217 if (crtl
->calls_eh_return
&& style
!= 2)
11218 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11220 /* EH_RETURN requires the use of moves to function properly. */
11221 if (crtl
->calls_eh_return
)
11222 restore_regs_via_mov
= true;
11223 /* SEH requires the use of pops to identify the epilogue. */
11224 else if (TARGET_SEH
)
11225 restore_regs_via_mov
= false;
11226 /* If we're only restoring one register and sp is not valid then
11227 using a move instruction to restore the register since it's
11228 less work than reloading sp and popping the register. */
11229 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11230 restore_regs_via_mov
= true;
11231 else if (TARGET_EPILOGUE_USING_MOVE
11232 && cfun
->machine
->use_fast_prologue_epilogue
11233 && (frame
.nregs
> 1
11234 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11235 restore_regs_via_mov
= true;
11236 else if (frame_pointer_needed
11238 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11239 restore_regs_via_mov
= true;
11240 else if (frame_pointer_needed
11241 && TARGET_USE_LEAVE
11242 && cfun
->machine
->use_fast_prologue_epilogue
11243 && frame
.nregs
== 1)
11244 restore_regs_via_mov
= true;
11246 restore_regs_via_mov
= false;
11248 if (restore_regs_via_mov
|| frame
.nsseregs
)
11250 /* Ensure that the entire register save area is addressable via
11251 the stack pointer, if we will restore via sp. */
11253 && m
->fs
.sp_offset
> 0x7fffffff
11254 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11255 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11257 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11258 GEN_INT (m
->fs
.sp_offset
11259 - frame
.sse_reg_save_offset
),
11261 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11265 /* If there are any SSE registers to restore, then we have to do it
11266 via moves, since there's obviously no pop for SSE regs. */
11267 if (frame
.nsseregs
)
11268 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11271 if (restore_regs_via_mov
)
11276 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11278 /* eh_return epilogues need %ecx added to the stack pointer. */
11281 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11283 /* Stack align doesn't work with eh_return. */
11284 gcc_assert (!stack_realign_drap
);
11285 /* Neither does regparm nested functions. */
11286 gcc_assert (!ix86_static_chain_on_stack
);
11288 if (frame_pointer_needed
)
11290 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11291 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11292 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11294 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11295 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11297 /* Note that we use SA as a temporary CFA, as the return
11298 address is at the proper place relative to it. We
11299 pretend this happens at the FP restore insn because
11300 prior to this insn the FP would be stored at the wrong
11301 offset relative to SA, and after this insn we have no
11302 other reasonable register to use for the CFA. We don't
11303 bother resetting the CFA to the SP for the duration of
11304 the return insn. */
11305 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11306 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11307 ix86_add_queued_cfa_restore_notes (insn
);
11308 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11309 RTX_FRAME_RELATED_P (insn
) = 1;
11311 m
->fs
.cfa_reg
= sa
;
11312 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11313 m
->fs
.fp_valid
= false;
11315 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11316 const0_rtx
, style
, false);
11320 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11321 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11322 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11323 ix86_add_queued_cfa_restore_notes (insn
);
11325 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11326 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11328 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11329 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11330 plus_constant (Pmode
, stack_pointer_rtx
,
11332 RTX_FRAME_RELATED_P (insn
) = 1;
11335 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11336 m
->fs
.sp_valid
= true;
11341 /* SEH requires that the function end with (1) a stack adjustment
11342 if necessary, (2) a sequence of pops, and (3) a return or
11343 jump instruction. Prevent insns from the function body from
11344 being scheduled into this sequence. */
11347 /* Prevent a catch region from being adjacent to the standard
11348 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11349 several other flags that would be interesting to test are
11351 if (flag_non_call_exceptions
)
11352 emit_insn (gen_nops (const1_rtx
));
11354 emit_insn (gen_blockage ());
11357 /* First step is to deallocate the stack frame so that we can
11358 pop the registers. Also do it on SEH target for very large
11359 frame as the emitted instructions aren't allowed by the ABI in
11361 if (!m
->fs
.sp_valid
11363 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11364 >= SEH_MAX_FRAME_SIZE
)))
11366 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11367 GEN_INT (m
->fs
.fp_offset
11368 - frame
.reg_save_offset
),
11371 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11373 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11374 GEN_INT (m
->fs
.sp_offset
11375 - frame
.reg_save_offset
),
11377 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11380 ix86_emit_restore_regs_using_pop ();
11383 /* If we used a stack pointer and haven't already got rid of it,
11385 if (m
->fs
.fp_valid
)
11387 /* If the stack pointer is valid and pointing at the frame
11388 pointer store address, then we only need a pop. */
11389 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11390 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11391 /* Leave results in shorter dependency chains on CPUs that are
11392 able to grok it fast. */
11393 else if (TARGET_USE_LEAVE
11394 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
11395 || !cfun
->machine
->use_fast_prologue_epilogue
)
11396 ix86_emit_leave ();
11399 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11400 hard_frame_pointer_rtx
,
11401 const0_rtx
, style
, !using_drap
);
11402 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11408 int param_ptr_offset
= UNITS_PER_WORD
;
11411 gcc_assert (stack_realign_drap
);
11413 if (ix86_static_chain_on_stack
)
11414 param_ptr_offset
+= UNITS_PER_WORD
;
11415 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11416 param_ptr_offset
+= UNITS_PER_WORD
;
11418 insn
= emit_insn (gen_rtx_SET
11419 (VOIDmode
, stack_pointer_rtx
,
11420 gen_rtx_PLUS (Pmode
,
11422 GEN_INT (-param_ptr_offset
))));
11423 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11424 m
->fs
.cfa_offset
= param_ptr_offset
;
11425 m
->fs
.sp_offset
= param_ptr_offset
;
11426 m
->fs
.realigned
= false;
11428 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11429 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11430 GEN_INT (param_ptr_offset
)));
11431 RTX_FRAME_RELATED_P (insn
) = 1;
11433 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11434 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11437 /* At this point the stack pointer must be valid, and we must have
11438 restored all of the registers. We may not have deallocated the
11439 entire stack frame. We've delayed this until now because it may
11440 be possible to merge the local stack deallocation with the
11441 deallocation forced by ix86_static_chain_on_stack. */
11442 gcc_assert (m
->fs
.sp_valid
);
11443 gcc_assert (!m
->fs
.fp_valid
);
11444 gcc_assert (!m
->fs
.realigned
);
11445 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11447 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11448 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11452 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11454 /* Sibcall epilogues don't want a return instruction. */
11457 m
->fs
= frame_state_save
;
11461 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11463 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11465 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11466 address, do explicit add, and jump indirectly to the caller. */
11468 if (crtl
->args
.pops_args
>= 65536)
11470 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11473 /* There is no "pascal" calling convention in any 64bit ABI. */
11474 gcc_assert (!TARGET_64BIT
);
11476 insn
= emit_insn (gen_pop (ecx
));
11477 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11478 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11480 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11481 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11482 add_reg_note (insn
, REG_CFA_REGISTER
,
11483 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11484 RTX_FRAME_RELATED_P (insn
) = 1;
11486 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11488 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11491 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11494 emit_jump_insn (gen_simple_return_internal ());
11496 /* Restore the state back to the state from the prologue,
11497 so that it's correct for the next epilogue. */
11498 m
->fs
= frame_state_save
;
11501 /* Reset from the function's potential modifications. */
11504 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11505 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11507 if (pic_offset_table_rtx
)
11508 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11510 /* Mach-O doesn't support labels at the end of objects, so if
11511 it looks like we might want one, insert a NOP. */
11513 rtx insn
= get_last_insn ();
11514 rtx deleted_debug_label
= NULL_RTX
;
11517 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11519 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11520 notes only, instead set their CODE_LABEL_NUMBER to -1,
11521 otherwise there would be code generation differences
11522 in between -g and -g0. */
11523 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11524 deleted_debug_label
= insn
;
11525 insn
= PREV_INSN (insn
);
11530 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11531 fputs ("\tnop\n", file
);
11532 else if (deleted_debug_label
)
11533 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11534 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11535 CODE_LABEL_NUMBER (insn
) = -1;
11541 /* Return a scratch register to use in the split stack prologue. The
11542 split stack prologue is used for -fsplit-stack. It is the first
11543 instructions in the function, even before the regular prologue.
11544 The scratch register can be any caller-saved register which is not
11545 used for parameters or for the static chain. */
11547 static unsigned int
11548 split_stack_prologue_scratch_regno (void)
11554 bool is_fastcall
, is_thiscall
;
11557 is_fastcall
= (lookup_attribute ("fastcall",
11558 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11560 is_thiscall
= (lookup_attribute ("thiscall",
11561 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11563 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11567 if (DECL_STATIC_CHAIN (cfun
->decl
))
11569 sorry ("-fsplit-stack does not support fastcall with "
11570 "nested function");
11571 return INVALID_REGNUM
;
11575 else if (is_thiscall
)
11577 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11581 else if (regparm
< 3)
11583 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11589 sorry ("-fsplit-stack does not support 2 register "
11590 " parameters for a nested function");
11591 return INVALID_REGNUM
;
11598 /* FIXME: We could make this work by pushing a register
11599 around the addition and comparison. */
11600 sorry ("-fsplit-stack does not support 3 register parameters");
11601 return INVALID_REGNUM
;
11606 /* A SYMBOL_REF for the function which allocates new stackspace for
11609 static GTY(()) rtx split_stack_fn
;
11611 /* A SYMBOL_REF for the more stack function when using the large
11614 static GTY(()) rtx split_stack_fn_large
;
11616 /* Handle -fsplit-stack. These are the first instructions in the
11617 function, even before the regular prologue. */
11620 ix86_expand_split_stack_prologue (void)
11622 struct ix86_frame frame
;
11623 HOST_WIDE_INT allocate
;
11624 unsigned HOST_WIDE_INT args_size
;
11625 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11626 rtx scratch_reg
= NULL_RTX
;
11627 rtx varargs_label
= NULL_RTX
;
11630 gcc_assert (flag_split_stack
&& reload_completed
);
11632 ix86_finalize_stack_realign_flags ();
11633 ix86_compute_frame_layout (&frame
);
11634 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11636 /* This is the label we will branch to if we have enough stack
11637 space. We expect the basic block reordering pass to reverse this
11638 branch if optimizing, so that we branch in the unlikely case. */
11639 label
= gen_label_rtx ();
11641 /* We need to compare the stack pointer minus the frame size with
11642 the stack boundary in the TCB. The stack boundary always gives
11643 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11644 can compare directly. Otherwise we need to do an addition. */
11646 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11647 UNSPEC_STACK_CHECK
);
11648 limit
= gen_rtx_CONST (Pmode
, limit
);
11649 limit
= gen_rtx_MEM (Pmode
, limit
);
11650 if (allocate
< SPLIT_STACK_AVAILABLE
)
11651 current
= stack_pointer_rtx
;
11654 unsigned int scratch_regno
;
11657 /* We need a scratch register to hold the stack pointer minus
11658 the required frame size. Since this is the very start of the
11659 function, the scratch register can be any caller-saved
11660 register which is not used for parameters. */
11661 offset
= GEN_INT (- allocate
);
11662 scratch_regno
= split_stack_prologue_scratch_regno ();
11663 if (scratch_regno
== INVALID_REGNUM
)
11665 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11666 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11668 /* We don't use ix86_gen_add3 in this case because it will
11669 want to split to lea, but when not optimizing the insn
11670 will not be split after this point. */
11671 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11672 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11677 emit_move_insn (scratch_reg
, offset
);
11678 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11679 stack_pointer_rtx
));
11681 current
= scratch_reg
;
11684 ix86_expand_branch (GEU
, current
, limit
, label
);
11685 jump_insn
= get_last_insn ();
11686 JUMP_LABEL (jump_insn
) = label
;
11688 /* Mark the jump as very likely to be taken. */
11689 add_int_reg_note (jump_insn
, REG_BR_PROB
,
11690 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
11692 if (split_stack_fn
== NULL_RTX
)
11693 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11694 fn
= split_stack_fn
;
11696 /* Get more stack space. We pass in the desired stack space and the
11697 size of the arguments to copy to the new stack. In 32-bit mode
11698 we push the parameters; __morestack will return on a new stack
11699 anyhow. In 64-bit mode we pass the parameters in r10 and
11701 allocate_rtx
= GEN_INT (allocate
);
11702 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11703 call_fusage
= NULL_RTX
;
11708 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11709 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11711 /* If this function uses a static chain, it will be in %r10.
11712 Preserve it across the call to __morestack. */
11713 if (DECL_STATIC_CHAIN (cfun
->decl
))
11717 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11718 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11719 use_reg (&call_fusage
, rax
);
11722 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11725 HOST_WIDE_INT argval
;
11727 gcc_assert (Pmode
== DImode
);
11728 /* When using the large model we need to load the address
11729 into a register, and we've run out of registers. So we
11730 switch to a different calling convention, and we call a
11731 different function: __morestack_large. We pass the
11732 argument size in the upper 32 bits of r10 and pass the
11733 frame size in the lower 32 bits. */
11734 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11735 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11737 if (split_stack_fn_large
== NULL_RTX
)
11738 split_stack_fn_large
=
11739 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11741 if (ix86_cmodel
== CM_LARGE_PIC
)
11745 label
= gen_label_rtx ();
11746 emit_label (label
);
11747 LABEL_PRESERVE_P (label
) = 1;
11748 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11749 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11750 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11751 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11753 x
= gen_rtx_CONST (Pmode
, x
);
11754 emit_move_insn (reg11
, x
);
11755 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11756 x
= gen_const_mem (Pmode
, x
);
11757 emit_move_insn (reg11
, x
);
11760 emit_move_insn (reg11
, split_stack_fn_large
);
11764 argval
= ((args_size
<< 16) << 16) + allocate
;
11765 emit_move_insn (reg10
, GEN_INT (argval
));
11769 emit_move_insn (reg10
, allocate_rtx
);
11770 emit_move_insn (reg11
, GEN_INT (args_size
));
11771 use_reg (&call_fusage
, reg11
);
11774 use_reg (&call_fusage
, reg10
);
11778 emit_insn (gen_push (GEN_INT (args_size
)));
11779 emit_insn (gen_push (allocate_rtx
));
11781 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11782 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11784 add_function_usage_to (call_insn
, call_fusage
);
11786 /* In order to make call/return prediction work right, we now need
11787 to execute a return instruction. See
11788 libgcc/config/i386/morestack.S for the details on how this works.
11790 For flow purposes gcc must not see this as a return
11791 instruction--we need control flow to continue at the subsequent
11792 label. Therefore, we use an unspec. */
11793 gcc_assert (crtl
->args
.pops_args
< 65536);
11794 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11796 /* If we are in 64-bit mode and this function uses a static chain,
11797 we saved %r10 in %rax before calling _morestack. */
11798 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11799 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11800 gen_rtx_REG (word_mode
, AX_REG
));
11802 /* If this function calls va_start, we need to store a pointer to
11803 the arguments on the old stack, because they may not have been
11804 all copied to the new stack. At this point the old stack can be
11805 found at the frame pointer value used by __morestack, because
11806 __morestack has set that up before calling back to us. Here we
11807 store that pointer in a scratch register, and in
11808 ix86_expand_prologue we store the scratch register in a stack
11810 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11812 unsigned int scratch_regno
;
11816 scratch_regno
= split_stack_prologue_scratch_regno ();
11817 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11818 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11822 return address within this function
11823 return address of caller of this function
11825 So we add three words to get to the stack arguments.
11829 return address within this function
11830 first argument to __morestack
11831 second argument to __morestack
11832 return address of caller of this function
11834 So we add five words to get to the stack arguments.
11836 words
= TARGET_64BIT
? 3 : 5;
11837 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11838 gen_rtx_PLUS (Pmode
, frame_reg
,
11839 GEN_INT (words
* UNITS_PER_WORD
))));
11841 varargs_label
= gen_label_rtx ();
11842 emit_jump_insn (gen_jump (varargs_label
));
11843 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11848 emit_label (label
);
11849 LABEL_NUSES (label
) = 1;
11851 /* If this function calls va_start, we now have to set the scratch
11852 register for the case where we do not call __morestack. In this
11853 case we need to set it based on the stack pointer. */
11854 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11856 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11857 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11858 GEN_INT (UNITS_PER_WORD
))));
11860 emit_label (varargs_label
);
11861 LABEL_NUSES (varargs_label
) = 1;
11865 /* We may have to tell the dataflow pass that the split stack prologue
11866 is initializing a scratch register. */
11869 ix86_live_on_entry (bitmap regs
)
11871 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11873 gcc_assert (flag_split_stack
);
11874 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11878 /* Extract the parts of an RTL expression that is a valid memory address
11879 for an instruction. Return 0 if the structure of the address is
11880 grossly off. Return -1 if the address contains ASHIFT, so it is not
11881 strictly valid, but still used for computing length of lea instruction. */
11884 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11886 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11887 rtx base_reg
, index_reg
;
11888 HOST_WIDE_INT scale
= 1;
11889 rtx scale_rtx
= NULL_RTX
;
11892 enum ix86_address_seg seg
= SEG_DEFAULT
;
11894 /* Allow zero-extended SImode addresses,
11895 they will be emitted with addr32 prefix. */
11896 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11898 if (GET_CODE (addr
) == ZERO_EXTEND
11899 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11901 addr
= XEXP (addr
, 0);
11902 if (CONST_INT_P (addr
))
11905 else if (GET_CODE (addr
) == AND
11906 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11908 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11909 if (addr
== NULL_RTX
)
11912 if (CONST_INT_P (addr
))
11917 /* Allow SImode subregs of DImode addresses,
11918 they will be emitted with addr32 prefix. */
11919 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11921 if (GET_CODE (addr
) == SUBREG
11922 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11924 addr
= SUBREG_REG (addr
);
11925 if (CONST_INT_P (addr
))
11932 else if (GET_CODE (addr
) == SUBREG
)
11934 if (REG_P (SUBREG_REG (addr
)))
11939 else if (GET_CODE (addr
) == PLUS
)
11941 rtx addends
[4], op
;
11949 addends
[n
++] = XEXP (op
, 1);
11952 while (GET_CODE (op
) == PLUS
);
11957 for (i
= n
; i
>= 0; --i
)
11960 switch (GET_CODE (op
))
11965 index
= XEXP (op
, 0);
11966 scale_rtx
= XEXP (op
, 1);
11972 index
= XEXP (op
, 0);
11973 tmp
= XEXP (op
, 1);
11974 if (!CONST_INT_P (tmp
))
11976 scale
= INTVAL (tmp
);
11977 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11979 scale
= 1 << scale
;
11984 if (GET_CODE (op
) != UNSPEC
)
11989 if (XINT (op
, 1) == UNSPEC_TP
11990 && TARGET_TLS_DIRECT_SEG_REFS
11991 && seg
== SEG_DEFAULT
)
11992 seg
= DEFAULT_TLS_SEG_REG
;
11998 if (!REG_P (SUBREG_REG (op
)))
12025 else if (GET_CODE (addr
) == MULT
)
12027 index
= XEXP (addr
, 0); /* index*scale */
12028 scale_rtx
= XEXP (addr
, 1);
12030 else if (GET_CODE (addr
) == ASHIFT
)
12032 /* We're called for lea too, which implements ashift on occasion. */
12033 index
= XEXP (addr
, 0);
12034 tmp
= XEXP (addr
, 1);
12035 if (!CONST_INT_P (tmp
))
12037 scale
= INTVAL (tmp
);
12038 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12040 scale
= 1 << scale
;
12044 disp
= addr
; /* displacement */
12050 else if (GET_CODE (index
) == SUBREG
12051 && REG_P (SUBREG_REG (index
)))
12057 /* Extract the integral value of scale. */
12060 if (!CONST_INT_P (scale_rtx
))
12062 scale
= INTVAL (scale_rtx
);
12065 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
12066 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
12068 /* Avoid useless 0 displacement. */
12069 if (disp
== const0_rtx
&& (base
|| index
))
12072 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12073 if (base_reg
&& index_reg
&& scale
== 1
12074 && (index_reg
== arg_pointer_rtx
12075 || index_reg
== frame_pointer_rtx
12076 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
12079 tmp
= base
, base
= index
, index
= tmp
;
12080 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
12083 /* Special case: %ebp cannot be encoded as a base without a displacement.
12087 && (base_reg
== hard_frame_pointer_rtx
12088 || base_reg
== frame_pointer_rtx
12089 || base_reg
== arg_pointer_rtx
12090 || (REG_P (base_reg
)
12091 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
12092 || REGNO (base_reg
) == R13_REG
))))
12095 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12096 Avoid this by transforming to [%esi+0].
12097 Reload calls address legitimization without cfun defined, so we need
12098 to test cfun for being non-NULL. */
12099 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
12100 && base_reg
&& !index_reg
&& !disp
12101 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
12104 /* Special case: encode reg+reg instead of reg*2. */
12105 if (!base
&& index
&& scale
== 2)
12106 base
= index
, base_reg
= index_reg
, scale
= 1;
12108 /* Special case: scaling cannot be encoded without base or displacement. */
12109 if (!base
&& !disp
&& index
&& scale
!= 1)
12113 out
->index
= index
;
12115 out
->scale
= scale
;
12121 /* Return cost of the memory address x.
12122 For i386, it is better to use a complex address than let gcc copy
12123 the address into a reg and make a new pseudo. But not if the address
12124 requires to two regs - that would mean more pseudos with longer
12127 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12128 addr_space_t as ATTRIBUTE_UNUSED
,
12129 bool speed ATTRIBUTE_UNUSED
)
12131 struct ix86_address parts
;
12133 int ok
= ix86_decompose_address (x
, &parts
);
12137 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12138 parts
.base
= SUBREG_REG (parts
.base
);
12139 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12140 parts
.index
= SUBREG_REG (parts
.index
);
12142 /* Attempt to minimize number of registers in the address. */
12144 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12146 && (!REG_P (parts
.index
)
12147 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12151 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12153 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12154 && parts
.base
!= parts
.index
)
12157 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12158 since it's predecode logic can't detect the length of instructions
12159 and it degenerates to vector decoded. Increase cost of such
12160 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12161 to split such addresses or even refuse such addresses at all.
12163 Following addressing modes are affected:
12168 The first and last case may be avoidable by explicitly coding the zero in
12169 memory address, but I don't have AMD-K6 machine handy to check this
12173 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12174 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12175 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12181 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12182 this is used for to form addresses to local data when -fPIC is in
12186 darwin_local_data_pic (rtx disp
)
12188 return (GET_CODE (disp
) == UNSPEC
12189 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12192 /* Determine if a given RTX is a valid constant. We already know this
12193 satisfies CONSTANT_P. */
12196 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12198 switch (GET_CODE (x
))
12203 if (GET_CODE (x
) == PLUS
)
12205 if (!CONST_INT_P (XEXP (x
, 1)))
12210 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12213 /* Only some unspecs are valid as "constants". */
12214 if (GET_CODE (x
) == UNSPEC
)
12215 switch (XINT (x
, 1))
12218 case UNSPEC_GOTOFF
:
12219 case UNSPEC_PLTOFF
:
12220 return TARGET_64BIT
;
12222 case UNSPEC_NTPOFF
:
12223 x
= XVECEXP (x
, 0, 0);
12224 return (GET_CODE (x
) == SYMBOL_REF
12225 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12226 case UNSPEC_DTPOFF
:
12227 x
= XVECEXP (x
, 0, 0);
12228 return (GET_CODE (x
) == SYMBOL_REF
12229 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12234 /* We must have drilled down to a symbol. */
12235 if (GET_CODE (x
) == LABEL_REF
)
12237 if (GET_CODE (x
) != SYMBOL_REF
)
12242 /* TLS symbols are never valid. */
12243 if (SYMBOL_REF_TLS_MODEL (x
))
12246 /* DLLIMPORT symbols are never valid. */
12247 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12248 && SYMBOL_REF_DLLIMPORT_P (x
))
12252 /* mdynamic-no-pic */
12253 if (MACHO_DYNAMIC_NO_PIC_P
)
12254 return machopic_symbol_defined_p (x
);
12259 if (GET_MODE (x
) == TImode
12260 && x
!= CONST0_RTX (TImode
)
12266 if (!standard_sse_constant_p (x
))
12273 /* Otherwise we handle everything else in the move patterns. */
12277 /* Determine if it's legal to put X into the constant pool. This
12278 is not possible for the address of thread-local symbols, which
12279 is checked above. */
12282 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12284 /* We can always put integral constants and vectors in memory. */
12285 switch (GET_CODE (x
))
12295 return !ix86_legitimate_constant_p (mode
, x
);
12298 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12302 is_imported_p (rtx x
)
12304 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12305 || GET_CODE (x
) != SYMBOL_REF
)
12308 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12312 /* Nonzero if the constant value X is a legitimate general operand
12313 when generating PIC code. It is given that flag_pic is on and
12314 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12317 legitimate_pic_operand_p (rtx x
)
12321 switch (GET_CODE (x
))
12324 inner
= XEXP (x
, 0);
12325 if (GET_CODE (inner
) == PLUS
12326 && CONST_INT_P (XEXP (inner
, 1)))
12327 inner
= XEXP (inner
, 0);
12329 /* Only some unspecs are valid as "constants". */
12330 if (GET_CODE (inner
) == UNSPEC
)
12331 switch (XINT (inner
, 1))
12334 case UNSPEC_GOTOFF
:
12335 case UNSPEC_PLTOFF
:
12336 return TARGET_64BIT
;
12338 x
= XVECEXP (inner
, 0, 0);
12339 return (GET_CODE (x
) == SYMBOL_REF
12340 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12341 case UNSPEC_MACHOPIC_OFFSET
:
12342 return legitimate_pic_address_disp_p (x
);
12350 return legitimate_pic_address_disp_p (x
);
12357 /* Determine if a given CONST RTX is a valid memory displacement
12361 legitimate_pic_address_disp_p (rtx disp
)
12365 /* In 64bit mode we can allow direct addresses of symbols and labels
12366 when they are not dynamic symbols. */
12369 rtx op0
= disp
, op1
;
12371 switch (GET_CODE (disp
))
12377 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12379 op0
= XEXP (XEXP (disp
, 0), 0);
12380 op1
= XEXP (XEXP (disp
, 0), 1);
12381 if (!CONST_INT_P (op1
)
12382 || INTVAL (op1
) >= 16*1024*1024
12383 || INTVAL (op1
) < -16*1024*1024)
12385 if (GET_CODE (op0
) == LABEL_REF
)
12387 if (GET_CODE (op0
) == CONST
12388 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12389 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12391 if (GET_CODE (op0
) == UNSPEC
12392 && XINT (op0
, 1) == UNSPEC_PCREL
)
12394 if (GET_CODE (op0
) != SYMBOL_REF
)
12399 /* TLS references should always be enclosed in UNSPEC.
12400 The dllimported symbol needs always to be resolved. */
12401 if (SYMBOL_REF_TLS_MODEL (op0
)
12402 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12407 if (is_imported_p (op0
))
12410 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12411 || !SYMBOL_REF_LOCAL_P (op0
))
12414 /* Function-symbols need to be resolved only for
12416 For the small-model we don't need to resolve anything
12418 if ((ix86_cmodel
!= CM_LARGE_PIC
12419 && SYMBOL_REF_FUNCTION_P (op0
))
12420 || ix86_cmodel
== CM_SMALL_PIC
)
12422 /* Non-external symbols don't need to be resolved for
12423 large, and medium-model. */
12424 if ((ix86_cmodel
== CM_LARGE_PIC
12425 || ix86_cmodel
== CM_MEDIUM_PIC
)
12426 && !SYMBOL_REF_EXTERNAL_P (op0
))
12429 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12430 && SYMBOL_REF_LOCAL_P (op0
)
12431 && ix86_cmodel
!= CM_LARGE_PIC
)
12439 if (GET_CODE (disp
) != CONST
)
12441 disp
= XEXP (disp
, 0);
12445 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12446 of GOT tables. We should not need these anyway. */
12447 if (GET_CODE (disp
) != UNSPEC
12448 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12449 && XINT (disp
, 1) != UNSPEC_GOTOFF
12450 && XINT (disp
, 1) != UNSPEC_PCREL
12451 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12454 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12455 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12461 if (GET_CODE (disp
) == PLUS
)
12463 if (!CONST_INT_P (XEXP (disp
, 1)))
12465 disp
= XEXP (disp
, 0);
12469 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12472 if (GET_CODE (disp
) != UNSPEC
)
12475 switch (XINT (disp
, 1))
12480 /* We need to check for both symbols and labels because VxWorks loads
12481 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12483 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12484 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12485 case UNSPEC_GOTOFF
:
12486 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12487 While ABI specify also 32bit relocation but we don't produce it in
12488 small PIC model at all. */
12489 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12490 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12492 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12494 case UNSPEC_GOTTPOFF
:
12495 case UNSPEC_GOTNTPOFF
:
12496 case UNSPEC_INDNTPOFF
:
12499 disp
= XVECEXP (disp
, 0, 0);
12500 return (GET_CODE (disp
) == SYMBOL_REF
12501 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12502 case UNSPEC_NTPOFF
:
12503 disp
= XVECEXP (disp
, 0, 0);
12504 return (GET_CODE (disp
) == SYMBOL_REF
12505 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12506 case UNSPEC_DTPOFF
:
12507 disp
= XVECEXP (disp
, 0, 0);
12508 return (GET_CODE (disp
) == SYMBOL_REF
12509 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12515 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12516 replace the input X, or the original X if no replacement is called for.
12517 The output parameter *WIN is 1 if the calling macro should goto WIN,
12518 0 if it should not. */
12521 ix86_legitimize_reload_address (rtx x
,
12522 enum machine_mode mode ATTRIBUTE_UNUSED
,
12523 int opnum
, int type
,
12524 int ind_levels ATTRIBUTE_UNUSED
)
12526 /* Reload can generate:
12528 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12532 This RTX is rejected from ix86_legitimate_address_p due to
12533 non-strictness of base register 97. Following this rejection,
12534 reload pushes all three components into separate registers,
12535 creating invalid memory address RTX.
12537 Following code reloads only the invalid part of the
12538 memory address RTX. */
12540 if (GET_CODE (x
) == PLUS
12541 && REG_P (XEXP (x
, 1))
12542 && GET_CODE (XEXP (x
, 0)) == PLUS
12543 && REG_P (XEXP (XEXP (x
, 0), 1)))
12546 bool something_reloaded
= false;
12548 base
= XEXP (XEXP (x
, 0), 1);
12549 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12551 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12552 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12553 opnum
, (enum reload_type
) type
);
12554 something_reloaded
= true;
12557 index
= XEXP (x
, 1);
12558 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12560 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12561 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12562 opnum
, (enum reload_type
) type
);
12563 something_reloaded
= true;
12566 gcc_assert (something_reloaded
);
12573 /* Determine if op is suitable RTX for an address register.
12574 Return naked register if a register or a register subreg is
12575 found, otherwise return NULL_RTX. */
12578 ix86_validate_address_register (rtx op
)
12580 enum machine_mode mode
= GET_MODE (op
);
12582 /* Only SImode or DImode registers can form the address. */
12583 if (mode
!= SImode
&& mode
!= DImode
)
12588 else if (GET_CODE (op
) == SUBREG
)
12590 rtx reg
= SUBREG_REG (op
);
12595 mode
= GET_MODE (reg
);
12597 /* Don't allow SUBREGs that span more than a word. It can
12598 lead to spill failures when the register is one word out
12599 of a two word structure. */
12600 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
12603 /* Allow only SUBREGs of non-eliminable hard registers. */
12604 if (register_no_elim_operand (reg
, mode
))
12608 /* Op is not a register. */
12612 /* Recognizes RTL expressions that are valid memory addresses for an
12613 instruction. The MODE argument is the machine mode for the MEM
12614 expression that wants to use this address.
12616 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12617 convert common non-canonical forms to canonical form so that they will
12621 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12622 rtx addr
, bool strict
)
12624 struct ix86_address parts
;
12625 rtx base
, index
, disp
;
12626 HOST_WIDE_INT scale
;
12627 enum ix86_address_seg seg
;
12629 if (ix86_decompose_address (addr
, &parts
) <= 0)
12630 /* Decomposition failed. */
12634 index
= parts
.index
;
12636 scale
= parts
.scale
;
12639 /* Validate base register. */
12642 rtx reg
= ix86_validate_address_register (base
);
12644 if (reg
== NULL_RTX
)
12647 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12648 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12649 /* Base is not valid. */
12653 /* Validate index register. */
12656 rtx reg
= ix86_validate_address_register (index
);
12658 if (reg
== NULL_RTX
)
12661 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12662 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12663 /* Index is not valid. */
12667 /* Index and base should have the same mode. */
12669 && GET_MODE (base
) != GET_MODE (index
))
12672 /* Address override works only on the (%reg) part of %fs:(%reg). */
12673 if (seg
!= SEG_DEFAULT
12674 && ((base
&& GET_MODE (base
) != word_mode
)
12675 || (index
&& GET_MODE (index
) != word_mode
)))
12678 /* Validate scale factor. */
12682 /* Scale without index. */
12685 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12686 /* Scale is not a valid multiplier. */
12690 /* Validate displacement. */
12693 if (GET_CODE (disp
) == CONST
12694 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12695 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12696 switch (XINT (XEXP (disp
, 0), 1))
12698 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12699 used. While ABI specify also 32bit relocations, we don't produce
12700 them at all and use IP relative instead. */
12702 case UNSPEC_GOTOFF
:
12703 gcc_assert (flag_pic
);
12705 goto is_legitimate_pic
;
12707 /* 64bit address unspec. */
12710 case UNSPEC_GOTPCREL
:
12712 gcc_assert (flag_pic
);
12713 goto is_legitimate_pic
;
12715 case UNSPEC_GOTTPOFF
:
12716 case UNSPEC_GOTNTPOFF
:
12717 case UNSPEC_INDNTPOFF
:
12718 case UNSPEC_NTPOFF
:
12719 case UNSPEC_DTPOFF
:
12722 case UNSPEC_STACK_CHECK
:
12723 gcc_assert (flag_split_stack
);
12727 /* Invalid address unspec. */
12731 else if (SYMBOLIC_CONST (disp
)
12735 && MACHOPIC_INDIRECT
12736 && !machopic_operand_p (disp
)
12742 if (TARGET_64BIT
&& (index
|| base
))
12744 /* foo@dtpoff(%rX) is ok. */
12745 if (GET_CODE (disp
) != CONST
12746 || GET_CODE (XEXP (disp
, 0)) != PLUS
12747 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12748 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12749 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12750 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12751 /* Non-constant pic memory reference. */
12754 else if ((!TARGET_MACHO
|| flag_pic
)
12755 && ! legitimate_pic_address_disp_p (disp
))
12756 /* Displacement is an invalid pic construct. */
12759 else if (MACHO_DYNAMIC_NO_PIC_P
12760 && !ix86_legitimate_constant_p (Pmode
, disp
))
12761 /* displacment must be referenced via non_lazy_pointer */
12765 /* This code used to verify that a symbolic pic displacement
12766 includes the pic_offset_table_rtx register.
12768 While this is good idea, unfortunately these constructs may
12769 be created by "adds using lea" optimization for incorrect
12778 This code is nonsensical, but results in addressing
12779 GOT table with pic_offset_table_rtx base. We can't
12780 just refuse it easily, since it gets matched by
12781 "addsi3" pattern, that later gets split to lea in the
12782 case output register differs from input. While this
12783 can be handled by separate addsi pattern for this case
12784 that never results in lea, this seems to be easier and
12785 correct fix for crash to disable this test. */
12787 else if (GET_CODE (disp
) != LABEL_REF
12788 && !CONST_INT_P (disp
)
12789 && (GET_CODE (disp
) != CONST
12790 || !ix86_legitimate_constant_p (Pmode
, disp
))
12791 && (GET_CODE (disp
) != SYMBOL_REF
12792 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12793 /* Displacement is not constant. */
12795 else if (TARGET_64BIT
12796 && !x86_64_immediate_operand (disp
, VOIDmode
))
12797 /* Displacement is out of range. */
12799 /* In x32 mode, constant addresses are sign extended to 64bit, so
12800 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12801 else if (TARGET_X32
&& !(index
|| base
)
12802 && CONST_INT_P (disp
)
12803 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
12807 /* Everything looks valid. */
12811 /* Determine if a given RTX is a valid constant address. */
12814 constant_address_p (rtx x
)
12816 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12819 /* Return a unique alias set for the GOT. */
12821 static alias_set_type
12822 ix86_GOT_alias_set (void)
12824 static alias_set_type set
= -1;
12826 set
= new_alias_set ();
12830 /* Return a legitimate reference for ORIG (an address) using the
12831 register REG. If REG is 0, a new pseudo is generated.
12833 There are two types of references that must be handled:
12835 1. Global data references must load the address from the GOT, via
12836 the PIC reg. An insn is emitted to do this load, and the reg is
12839 2. Static data references, constant pool addresses, and code labels
12840 compute the address as an offset from the GOT, whose base is in
12841 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12842 differentiate them from global data objects. The returned
12843 address is the PIC reg + an unspec constant.
12845 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12846 reg also appears in the address. */
12849 legitimize_pic_address (rtx orig
, rtx reg
)
12852 rtx new_rtx
= orig
;
12855 if (TARGET_MACHO
&& !TARGET_64BIT
)
12858 reg
= gen_reg_rtx (Pmode
);
12859 /* Use the generic Mach-O PIC machinery. */
12860 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12864 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12866 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12871 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12873 else if (TARGET_64BIT
&& !TARGET_PECOFF
12874 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12877 /* This symbol may be referenced via a displacement from the PIC
12878 base address (@GOTOFF). */
12880 if (reload_in_progress
)
12881 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12882 if (GET_CODE (addr
) == CONST
)
12883 addr
= XEXP (addr
, 0);
12884 if (GET_CODE (addr
) == PLUS
)
12886 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12888 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12891 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12892 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12894 tmpreg
= gen_reg_rtx (Pmode
);
12897 emit_move_insn (tmpreg
, new_rtx
);
12901 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12902 tmpreg
, 1, OPTAB_DIRECT
);
12906 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12908 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12910 /* This symbol may be referenced via a displacement from the PIC
12911 base address (@GOTOFF). */
12913 if (reload_in_progress
)
12914 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12915 if (GET_CODE (addr
) == CONST
)
12916 addr
= XEXP (addr
, 0);
12917 if (GET_CODE (addr
) == PLUS
)
12919 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12921 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12924 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12925 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12926 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12930 emit_move_insn (reg
, new_rtx
);
12934 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12935 /* We can't use @GOTOFF for text labels on VxWorks;
12936 see gotoff_operand. */
12937 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12939 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12943 /* For x64 PE-COFF there is no GOT table. So we use address
12945 if (TARGET_64BIT
&& TARGET_PECOFF
)
12947 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12948 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12951 reg
= gen_reg_rtx (Pmode
);
12952 emit_move_insn (reg
, new_rtx
);
12955 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12957 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12958 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12959 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12960 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12963 reg
= gen_reg_rtx (Pmode
);
12964 /* Use directly gen_movsi, otherwise the address is loaded
12965 into register for CSE. We don't want to CSE this addresses,
12966 instead we CSE addresses from the GOT table, so skip this. */
12967 emit_insn (gen_movsi (reg
, new_rtx
));
12972 /* This symbol must be referenced via a load from the
12973 Global Offset Table (@GOT). */
12975 if (reload_in_progress
)
12976 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12977 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12978 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12980 new_rtx
= force_reg (Pmode
, new_rtx
);
12981 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12982 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12983 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12986 reg
= gen_reg_rtx (Pmode
);
12987 emit_move_insn (reg
, new_rtx
);
12993 if (CONST_INT_P (addr
)
12994 && !x86_64_immediate_operand (addr
, VOIDmode
))
12998 emit_move_insn (reg
, addr
);
13002 new_rtx
= force_reg (Pmode
, addr
);
13004 else if (GET_CODE (addr
) == CONST
)
13006 addr
= XEXP (addr
, 0);
13008 /* We must match stuff we generate before. Assume the only
13009 unspecs that can get here are ours. Not that we could do
13010 anything with them anyway.... */
13011 if (GET_CODE (addr
) == UNSPEC
13012 || (GET_CODE (addr
) == PLUS
13013 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
13015 gcc_assert (GET_CODE (addr
) == PLUS
);
13017 if (GET_CODE (addr
) == PLUS
)
13019 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
13021 /* Check first to see if this is a constant offset from a @GOTOFF
13022 symbol reference. */
13023 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
13024 && CONST_INT_P (op1
))
13028 if (reload_in_progress
)
13029 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13030 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
13032 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
13033 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13034 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13038 emit_move_insn (reg
, new_rtx
);
13044 if (INTVAL (op1
) < -16*1024*1024
13045 || INTVAL (op1
) >= 16*1024*1024)
13047 if (!x86_64_immediate_operand (op1
, Pmode
))
13048 op1
= force_reg (Pmode
, op1
);
13049 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
13055 rtx base
= legitimize_pic_address (op0
, reg
);
13056 enum machine_mode mode
= GET_MODE (base
);
13058 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
13060 if (CONST_INT_P (new_rtx
))
13062 if (INTVAL (new_rtx
) < -16*1024*1024
13063 || INTVAL (new_rtx
) >= 16*1024*1024)
13065 if (!x86_64_immediate_operand (new_rtx
, mode
))
13066 new_rtx
= force_reg (mode
, new_rtx
);
13068 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
13071 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
13075 if (GET_CODE (new_rtx
) == PLUS
13076 && CONSTANT_P (XEXP (new_rtx
, 1)))
13078 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
13079 new_rtx
= XEXP (new_rtx
, 1);
13081 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
13089 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13092 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
13094 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
13096 if (GET_MODE (tp
) != tp_mode
)
13098 gcc_assert (GET_MODE (tp
) == SImode
);
13099 gcc_assert (tp_mode
== DImode
);
13101 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
13105 tp
= copy_to_mode_reg (tp_mode
, tp
);
13110 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13112 static GTY(()) rtx ix86_tls_symbol
;
13115 ix86_tls_get_addr (void)
13117 if (!ix86_tls_symbol
)
13120 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
13121 ? "___tls_get_addr" : "__tls_get_addr");
13123 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
13126 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
13128 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
13130 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
13131 gen_rtx_CONST (Pmode
, unspec
));
13134 return ix86_tls_symbol
;
13137 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13139 static GTY(()) rtx ix86_tls_module_base_symbol
;
13142 ix86_tls_module_base (void)
13144 if (!ix86_tls_module_base_symbol
)
13146 ix86_tls_module_base_symbol
13147 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
13149 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13150 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13153 return ix86_tls_module_base_symbol
;
13156 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13157 false if we expect this to be used for a memory address and true if
13158 we expect to load the address into a register. */
13161 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13163 rtx dest
, base
, off
;
13164 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13165 enum machine_mode tp_mode
= Pmode
;
13170 case TLS_MODEL_GLOBAL_DYNAMIC
:
13171 dest
= gen_reg_rtx (Pmode
);
13175 if (flag_pic
&& !TARGET_PECOFF
)
13176 pic
= pic_offset_table_rtx
;
13179 pic
= gen_reg_rtx (Pmode
);
13180 emit_insn (gen_set_got (pic
));
13184 if (TARGET_GNU2_TLS
)
13187 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13189 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13191 tp
= get_thread_pointer (Pmode
, true);
13192 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13194 if (GET_MODE (x
) != Pmode
)
13195 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13197 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13201 rtx caddr
= ix86_tls_get_addr ();
13205 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13210 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13211 insns
= get_insns ();
13214 if (GET_MODE (x
) != Pmode
)
13215 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13217 RTL_CONST_CALL_P (insns
) = 1;
13218 emit_libcall_block (insns
, dest
, rax
, x
);
13221 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13225 case TLS_MODEL_LOCAL_DYNAMIC
:
13226 base
= gen_reg_rtx (Pmode
);
13231 pic
= pic_offset_table_rtx
;
13234 pic
= gen_reg_rtx (Pmode
);
13235 emit_insn (gen_set_got (pic
));
13239 if (TARGET_GNU2_TLS
)
13241 rtx tmp
= ix86_tls_module_base ();
13244 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13246 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13248 tp
= get_thread_pointer (Pmode
, true);
13249 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13250 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13254 rtx caddr
= ix86_tls_get_addr ();
13258 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13263 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13264 insns
= get_insns ();
13267 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13268 share the LD_BASE result with other LD model accesses. */
13269 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13270 UNSPEC_TLS_LD_BASE
);
13272 RTL_CONST_CALL_P (insns
) = 1;
13273 emit_libcall_block (insns
, base
, rax
, eqv
);
13276 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13279 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13280 off
= gen_rtx_CONST (Pmode
, off
);
13282 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13284 if (TARGET_GNU2_TLS
)
13286 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13288 if (GET_MODE (x
) != Pmode
)
13289 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13291 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13295 case TLS_MODEL_INITIAL_EXEC
:
13298 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13300 /* The Sun linker took the AMD64 TLS spec literally
13301 and can only handle %rax as destination of the
13302 initial executable code sequence. */
13304 dest
= gen_reg_rtx (DImode
);
13305 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13309 /* Generate DImode references to avoid %fs:(%reg32)
13310 problems and linker IE->LE relaxation bug. */
13313 type
= UNSPEC_GOTNTPOFF
;
13317 if (reload_in_progress
)
13318 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13319 pic
= pic_offset_table_rtx
;
13320 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13322 else if (!TARGET_ANY_GNU_TLS
)
13324 pic
= gen_reg_rtx (Pmode
);
13325 emit_insn (gen_set_got (pic
));
13326 type
= UNSPEC_GOTTPOFF
;
13331 type
= UNSPEC_INDNTPOFF
;
13334 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13335 off
= gen_rtx_CONST (tp_mode
, off
);
13337 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13338 off
= gen_const_mem (tp_mode
, off
);
13339 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13341 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13343 base
= get_thread_pointer (tp_mode
,
13344 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13345 off
= force_reg (tp_mode
, off
);
13346 return gen_rtx_PLUS (tp_mode
, base
, off
);
13350 base
= get_thread_pointer (Pmode
, true);
13351 dest
= gen_reg_rtx (Pmode
);
13352 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13356 case TLS_MODEL_LOCAL_EXEC
:
13357 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13358 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13359 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13360 off
= gen_rtx_CONST (Pmode
, off
);
13362 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13364 base
= get_thread_pointer (Pmode
,
13365 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13366 return gen_rtx_PLUS (Pmode
, base
, off
);
13370 base
= get_thread_pointer (Pmode
, true);
13371 dest
= gen_reg_rtx (Pmode
);
13372 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13377 gcc_unreachable ();
13383 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13384 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13385 unique refptr-DECL symbol corresponding to symbol DECL. */
13387 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13388 htab_t dllimport_map
;
13391 get_dllimport_decl (tree decl
, bool beimport
)
13393 struct tree_map
*h
, in
;
13396 const char *prefix
;
13397 size_t namelen
, prefixlen
;
13402 if (!dllimport_map
)
13403 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13405 in
.hash
= htab_hash_pointer (decl
);
13406 in
.base
.from
= decl
;
13407 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13408 h
= (struct tree_map
*) *loc
;
13412 *loc
= h
= ggc_alloc_tree_map ();
13414 h
->base
.from
= decl
;
13415 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13416 VAR_DECL
, NULL
, ptr_type_node
);
13417 DECL_ARTIFICIAL (to
) = 1;
13418 DECL_IGNORED_P (to
) = 1;
13419 DECL_EXTERNAL (to
) = 1;
13420 TREE_READONLY (to
) = 1;
13422 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13423 name
= targetm
.strip_name_encoding (name
);
13425 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13426 ? "*__imp_" : "*__imp__";
13428 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13429 namelen
= strlen (name
);
13430 prefixlen
= strlen (prefix
);
13431 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13432 memcpy (imp_name
, prefix
, prefixlen
);
13433 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13435 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13436 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13437 SET_SYMBOL_REF_DECL (rtl
, to
);
13438 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13441 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13442 #ifdef SUB_TARGET_RECORD_STUB
13443 SUB_TARGET_RECORD_STUB (name
);
13447 rtl
= gen_const_mem (Pmode
, rtl
);
13448 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13450 SET_DECL_RTL (to
, rtl
);
13451 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13456 /* Expand SYMBOL into its corresponding far-addresse symbol.
13457 WANT_REG is true if we require the result be a register. */
13460 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13465 gcc_assert (SYMBOL_REF_DECL (symbol
));
13466 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13468 x
= DECL_RTL (imp_decl
);
13470 x
= force_reg (Pmode
, x
);
13474 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13475 true if we require the result be a register. */
13478 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13483 gcc_assert (SYMBOL_REF_DECL (symbol
));
13484 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13486 x
= DECL_RTL (imp_decl
);
13488 x
= force_reg (Pmode
, x
);
13492 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13493 is true if we require the result be a register. */
13496 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13498 if (!TARGET_PECOFF
)
13501 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13503 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13504 return legitimize_dllimport_symbol (addr
, inreg
);
13505 if (GET_CODE (addr
) == CONST
13506 && GET_CODE (XEXP (addr
, 0)) == PLUS
13507 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13508 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13510 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13511 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13515 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13517 if (GET_CODE (addr
) == SYMBOL_REF
13518 && !is_imported_p (addr
)
13519 && SYMBOL_REF_EXTERNAL_P (addr
)
13520 && SYMBOL_REF_DECL (addr
))
13521 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13523 if (GET_CODE (addr
) == CONST
13524 && GET_CODE (XEXP (addr
, 0)) == PLUS
13525 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13526 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13527 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13528 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13530 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13531 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13536 /* Try machine-dependent ways of modifying an illegitimate address
13537 to be legitimate. If we find one, return the new, valid address.
13538 This macro is used in only one place: `memory_address' in explow.c.
13540 OLDX is the address as it was before break_out_memory_refs was called.
13541 In some cases it is useful to look at this to decide what needs to be done.
13543 It is always safe for this macro to do nothing. It exists to recognize
13544 opportunities to optimize the output.
13546 For the 80386, we handle X+REG by loading X into a register R and
13547 using R+REG. R will go in a general reg and indexing will be used.
13548 However, if REG is a broken-out memory address or multiplication,
13549 nothing needs to be done because REG can certainly go in a general reg.
13551 When -fpic is used, special handling is needed for symbolic references.
13552 See comments by legitimize_pic_address in i386.c for details. */
13555 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13556 enum machine_mode mode
)
13561 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13563 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13564 if (GET_CODE (x
) == CONST
13565 && GET_CODE (XEXP (x
, 0)) == PLUS
13566 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13567 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13569 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13570 (enum tls_model
) log
, false);
13571 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13574 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13576 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13581 if (flag_pic
&& SYMBOLIC_CONST (x
))
13582 return legitimize_pic_address (x
, 0);
13585 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13586 return machopic_indirect_data_reference (x
, 0);
13589 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13590 if (GET_CODE (x
) == ASHIFT
13591 && CONST_INT_P (XEXP (x
, 1))
13592 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13595 log
= INTVAL (XEXP (x
, 1));
13596 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13597 GEN_INT (1 << log
));
13600 if (GET_CODE (x
) == PLUS
)
13602 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13604 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13605 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13606 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13609 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13610 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13611 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13612 GEN_INT (1 << log
));
13615 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13616 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13617 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13620 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13621 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13622 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13623 GEN_INT (1 << log
));
13626 /* Put multiply first if it isn't already. */
13627 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13629 rtx tmp
= XEXP (x
, 0);
13630 XEXP (x
, 0) = XEXP (x
, 1);
13635 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13636 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13637 created by virtual register instantiation, register elimination, and
13638 similar optimizations. */
13639 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13642 x
= gen_rtx_PLUS (Pmode
,
13643 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13644 XEXP (XEXP (x
, 1), 0)),
13645 XEXP (XEXP (x
, 1), 1));
13649 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13650 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13651 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13652 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13653 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13654 && CONSTANT_P (XEXP (x
, 1)))
13657 rtx other
= NULL_RTX
;
13659 if (CONST_INT_P (XEXP (x
, 1)))
13661 constant
= XEXP (x
, 1);
13662 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13664 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13666 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13667 other
= XEXP (x
, 1);
13675 x
= gen_rtx_PLUS (Pmode
,
13676 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13677 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13678 plus_constant (Pmode
, other
,
13679 INTVAL (constant
)));
13683 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13686 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13689 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13692 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13695 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13699 && REG_P (XEXP (x
, 1))
13700 && REG_P (XEXP (x
, 0)))
13703 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13706 x
= legitimize_pic_address (x
, 0);
13709 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13712 if (REG_P (XEXP (x
, 0)))
13714 rtx temp
= gen_reg_rtx (Pmode
);
13715 rtx val
= force_operand (XEXP (x
, 1), temp
);
13718 val
= convert_to_mode (Pmode
, val
, 1);
13719 emit_move_insn (temp
, val
);
13722 XEXP (x
, 1) = temp
;
13726 else if (REG_P (XEXP (x
, 1)))
13728 rtx temp
= gen_reg_rtx (Pmode
);
13729 rtx val
= force_operand (XEXP (x
, 0), temp
);
13732 val
= convert_to_mode (Pmode
, val
, 1);
13733 emit_move_insn (temp
, val
);
13736 XEXP (x
, 0) = temp
;
13744 /* Print an integer constant expression in assembler syntax. Addition
13745 and subtraction are the only arithmetic that may appear in these
13746 expressions. FILE is the stdio stream to write to, X is the rtx, and
13747 CODE is the operand print code from the output string. */
13750 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13754 switch (GET_CODE (x
))
13757 gcc_assert (flag_pic
);
13762 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13763 output_addr_const (file
, x
);
13766 const char *name
= XSTR (x
, 0);
13768 /* Mark the decl as referenced so that cgraph will
13769 output the function. */
13770 if (SYMBOL_REF_DECL (x
))
13771 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13774 if (MACHOPIC_INDIRECT
13775 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13776 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13778 assemble_name (file
, name
);
13780 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13781 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13782 fputs ("@PLT", file
);
13789 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13790 assemble_name (asm_out_file
, buf
);
13794 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13798 /* This used to output parentheses around the expression,
13799 but that does not work on the 386 (either ATT or BSD assembler). */
13800 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13804 if (GET_MODE (x
) == VOIDmode
)
13806 /* We can use %d if the number is <32 bits and positive. */
13807 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13808 fprintf (file
, "0x%lx%08lx",
13809 (unsigned long) CONST_DOUBLE_HIGH (x
),
13810 (unsigned long) CONST_DOUBLE_LOW (x
));
13812 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13815 /* We can't handle floating point constants;
13816 TARGET_PRINT_OPERAND must handle them. */
13817 output_operand_lossage ("floating constant misused");
13821 /* Some assemblers need integer constants to appear first. */
13822 if (CONST_INT_P (XEXP (x
, 0)))
13824 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13826 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13830 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13831 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13833 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13839 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13840 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13842 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13844 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13848 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13850 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13855 gcc_assert (XVECLEN (x
, 0) == 1);
13856 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13857 switch (XINT (x
, 1))
13860 fputs ("@GOT", file
);
13862 case UNSPEC_GOTOFF
:
13863 fputs ("@GOTOFF", file
);
13865 case UNSPEC_PLTOFF
:
13866 fputs ("@PLTOFF", file
);
13869 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13870 "(%rip)" : "[rip]", file
);
13872 case UNSPEC_GOTPCREL
:
13873 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13874 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13876 case UNSPEC_GOTTPOFF
:
13877 /* FIXME: This might be @TPOFF in Sun ld too. */
13878 fputs ("@gottpoff", file
);
13881 fputs ("@tpoff", file
);
13883 case UNSPEC_NTPOFF
:
13885 fputs ("@tpoff", file
);
13887 fputs ("@ntpoff", file
);
13889 case UNSPEC_DTPOFF
:
13890 fputs ("@dtpoff", file
);
13892 case UNSPEC_GOTNTPOFF
:
13894 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13895 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13897 fputs ("@gotntpoff", file
);
13899 case UNSPEC_INDNTPOFF
:
13900 fputs ("@indntpoff", file
);
13903 case UNSPEC_MACHOPIC_OFFSET
:
13905 machopic_output_function_base_name (file
);
13909 output_operand_lossage ("invalid UNSPEC as operand");
13915 output_operand_lossage ("invalid expression as operand");
13919 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13920 We need to emit DTP-relative relocations. */
13922 static void ATTRIBUTE_UNUSED
13923 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13925 fputs (ASM_LONG
, file
);
13926 output_addr_const (file
, x
);
13927 fputs ("@dtpoff", file
);
13933 fputs (", 0", file
);
13936 gcc_unreachable ();
13940 /* Return true if X is a representation of the PIC register. This copes
13941 with calls from ix86_find_base_term, where the register might have
13942 been replaced by a cselib value. */
13945 ix86_pic_register_p (rtx x
)
13947 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13948 return (pic_offset_table_rtx
13949 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13951 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13954 /* Helper function for ix86_delegitimize_address.
13955 Attempt to delegitimize TLS local-exec accesses. */
13958 ix86_delegitimize_tls_address (rtx orig_x
)
13960 rtx x
= orig_x
, unspec
;
13961 struct ix86_address addr
;
13963 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13967 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13969 if (ix86_decompose_address (x
, &addr
) == 0
13970 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13971 || addr
.disp
== NULL_RTX
13972 || GET_CODE (addr
.disp
) != CONST
)
13974 unspec
= XEXP (addr
.disp
, 0);
13975 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13976 unspec
= XEXP (unspec
, 0);
13977 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13979 x
= XVECEXP (unspec
, 0, 0);
13980 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13981 if (unspec
!= XEXP (addr
.disp
, 0))
13982 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13985 rtx idx
= addr
.index
;
13986 if (addr
.scale
!= 1)
13987 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13988 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13991 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13992 if (MEM_P (orig_x
))
13993 x
= replace_equiv_address_nv (orig_x
, x
);
13997 /* In the name of slightly smaller debug output, and to cater to
13998 general assembler lossage, recognize PIC+GOTOFF and turn it back
13999 into a direct symbol reference.
14001 On Darwin, this is necessary to avoid a crash, because Darwin
14002 has a different PIC label for each routine but the DWARF debugging
14003 information is not associated with any particular routine, so it's
14004 necessary to remove references to the PIC label from RTL stored by
14005 the DWARF output code. */
14008 ix86_delegitimize_address (rtx x
)
14010 rtx orig_x
= delegitimize_mem_from_attrs (x
);
14011 /* addend is NULL or some rtx if x is something+GOTOFF where
14012 something doesn't include the PIC register. */
14013 rtx addend
= NULL_RTX
;
14014 /* reg_addend is NULL or a multiple of some register. */
14015 rtx reg_addend
= NULL_RTX
;
14016 /* const_addend is NULL or a const_int. */
14017 rtx const_addend
= NULL_RTX
;
14018 /* This is the result, or NULL. */
14019 rtx result
= NULL_RTX
;
14028 if (GET_CODE (x
) == CONST
14029 && GET_CODE (XEXP (x
, 0)) == PLUS
14030 && GET_MODE (XEXP (x
, 0)) == Pmode
14031 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
14032 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
14033 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
14035 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
14036 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
14037 if (MEM_P (orig_x
))
14038 x
= replace_equiv_address_nv (orig_x
, x
);
14042 if (GET_CODE (x
) == CONST
14043 && GET_CODE (XEXP (x
, 0)) == UNSPEC
14044 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
14045 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
14046 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
14048 x
= XVECEXP (XEXP (x
, 0), 0, 0);
14049 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
14051 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
14059 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
14060 return ix86_delegitimize_tls_address (orig_x
);
14062 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14063 and -mcmodel=medium -fpic. */
14066 if (GET_CODE (x
) != PLUS
14067 || GET_CODE (XEXP (x
, 1)) != CONST
)
14068 return ix86_delegitimize_tls_address (orig_x
);
14070 if (ix86_pic_register_p (XEXP (x
, 0)))
14071 /* %ebx + GOT/GOTOFF */
14073 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14075 /* %ebx + %reg * scale + GOT/GOTOFF */
14076 reg_addend
= XEXP (x
, 0);
14077 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
14078 reg_addend
= XEXP (reg_addend
, 1);
14079 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
14080 reg_addend
= XEXP (reg_addend
, 0);
14083 reg_addend
= NULL_RTX
;
14084 addend
= XEXP (x
, 0);
14088 addend
= XEXP (x
, 0);
14090 x
= XEXP (XEXP (x
, 1), 0);
14091 if (GET_CODE (x
) == PLUS
14092 && CONST_INT_P (XEXP (x
, 1)))
14094 const_addend
= XEXP (x
, 1);
14098 if (GET_CODE (x
) == UNSPEC
14099 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
14100 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
14101 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
14102 && !MEM_P (orig_x
) && !addend
)))
14103 result
= XVECEXP (x
, 0, 0);
14105 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
14106 && !MEM_P (orig_x
))
14107 result
= XVECEXP (x
, 0, 0);
14110 return ix86_delegitimize_tls_address (orig_x
);
14113 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
14115 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
14118 /* If the rest of original X doesn't involve the PIC register, add
14119 addend and subtract pic_offset_table_rtx. This can happen e.g.
14121 leal (%ebx, %ecx, 4), %ecx
14123 movl foo@GOTOFF(%ecx), %edx
14124 in which case we return (%ecx - %ebx) + foo. */
14125 if (pic_offset_table_rtx
)
14126 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
14127 pic_offset_table_rtx
),
14132 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
14134 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
14135 if (result
== NULL_RTX
)
14141 /* If X is a machine specific address (i.e. a symbol or label being
14142 referenced as a displacement from the GOT implemented using an
14143 UNSPEC), then return the base term. Otherwise return X. */
14146 ix86_find_base_term (rtx x
)
14152 if (GET_CODE (x
) != CONST
)
14154 term
= XEXP (x
, 0);
14155 if (GET_CODE (term
) == PLUS
14156 && (CONST_INT_P (XEXP (term
, 1))
14157 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14158 term
= XEXP (term
, 0);
14159 if (GET_CODE (term
) != UNSPEC
14160 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14161 && XINT (term
, 1) != UNSPEC_PCREL
))
14164 return XVECEXP (term
, 0, 0);
14167 return ix86_delegitimize_address (x
);
14171 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14172 bool fp
, FILE *file
)
14174 const char *suffix
;
14176 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14178 code
= ix86_fp_compare_code_to_integer (code
);
14182 code
= reverse_condition (code
);
14233 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14237 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14238 Those same assemblers have the same but opposite lossage on cmov. */
14239 if (mode
== CCmode
)
14240 suffix
= fp
? "nbe" : "a";
14242 gcc_unreachable ();
14258 gcc_unreachable ();
14262 if (mode
== CCmode
)
14264 else if (mode
== CCCmode
)
14267 gcc_unreachable ();
14283 gcc_unreachable ();
14287 if (mode
== CCmode
)
14288 suffix
= fp
? "nb" : "ae";
14289 else if (mode
== CCCmode
)
14292 gcc_unreachable ();
14295 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14299 if (mode
== CCmode
)
14302 gcc_unreachable ();
14305 suffix
= fp
? "u" : "p";
14308 suffix
= fp
? "nu" : "np";
14311 gcc_unreachable ();
14313 fputs (suffix
, file
);
14316 /* Print the name of register X to FILE based on its machine mode and number.
14317 If CODE is 'w', pretend the mode is HImode.
14318 If CODE is 'b', pretend the mode is QImode.
14319 If CODE is 'k', pretend the mode is SImode.
14320 If CODE is 'q', pretend the mode is DImode.
14321 If CODE is 'x', pretend the mode is V4SFmode.
14322 If CODE is 't', pretend the mode is V8SFmode.
14323 If CODE is 'g', pretend the mode is V16SFmode.
14324 If CODE is 'h', pretend the reg is the 'high' byte register.
14325 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14326 If CODE is 'd', duplicate the operand for AVX instruction.
14330 print_reg (rtx x
, int code
, FILE *file
)
14333 unsigned int regno
;
14334 bool duplicated
= code
== 'd' && TARGET_AVX
;
14336 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14341 gcc_assert (TARGET_64BIT
);
14342 fputs ("rip", file
);
14346 regno
= true_regnum (x
);
14347 gcc_assert (regno
!= ARG_POINTER_REGNUM
14348 && regno
!= FRAME_POINTER_REGNUM
14349 && regno
!= FLAGS_REG
14350 && regno
!= FPSR_REG
14351 && regno
!= FPCR_REG
);
14353 if (code
== 'w' || MMX_REG_P (x
))
14355 else if (code
== 'b')
14357 else if (code
== 'k')
14359 else if (code
== 'q')
14361 else if (code
== 'y')
14363 else if (code
== 'h')
14365 else if (code
== 'x')
14367 else if (code
== 't')
14369 else if (code
== 'g')
14372 code
= GET_MODE_SIZE (GET_MODE (x
));
14374 /* Irritatingly, AMD extended registers use different naming convention
14375 from the normal registers: "r%d[bwd]" */
14376 if (REX_INT_REGNO_P (regno
))
14378 gcc_assert (TARGET_64BIT
);
14380 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14384 error ("extended registers have no high halves");
14399 error ("unsupported operand size for extended register");
14409 if (STACK_TOP_P (x
))
14418 if (! ANY_FP_REG_P (x
) && ! ANY_BND_REG_P (x
))
14419 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14424 reg
= hi_reg_name
[regno
];
14427 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14429 reg
= qi_reg_name
[regno
];
14432 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14434 reg
= qi_high_reg_name
[regno
];
14439 gcc_assert (!duplicated
);
14441 fputs (hi_reg_name
[regno
] + 1, file
);
14447 gcc_assert (!duplicated
);
14449 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14454 gcc_unreachable ();
14460 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14461 fprintf (file
, ", %%%s", reg
);
14463 fprintf (file
, ", %s", reg
);
14467 /* Locate some local-dynamic symbol still in use by this function
14468 so that we can print its name in some tls_local_dynamic_base
14472 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14476 if (GET_CODE (x
) == SYMBOL_REF
14477 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14479 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14486 static const char *
14487 get_some_local_dynamic_name (void)
14491 if (cfun
->machine
->some_ld_name
)
14492 return cfun
->machine
->some_ld_name
;
14494 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14495 if (NONDEBUG_INSN_P (insn
)
14496 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14497 return cfun
->machine
->some_ld_name
;
14502 /* Meaning of CODE:
14503 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14504 C -- print opcode suffix for set/cmov insn.
14505 c -- like C, but print reversed condition
14506 F,f -- likewise, but for floating-point.
14507 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14509 R -- print the prefix for register names.
14510 z -- print the opcode suffix for the size of the current operand.
14511 Z -- likewise, with special suffixes for x87 instructions.
14512 * -- print a star (in certain assembler syntax)
14513 A -- print an absolute memory reference.
14514 E -- print address with DImode register names if TARGET_64BIT.
14515 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14516 s -- print a shift double count, followed by the assemblers argument
14518 b -- print the QImode name of the register for the indicated operand.
14519 %b0 would print %al if operands[0] is reg 0.
14520 w -- likewise, print the HImode name of the register.
14521 k -- likewise, print the SImode name of the register.
14522 q -- likewise, print the DImode name of the register.
14523 x -- likewise, print the V4SFmode name of the register.
14524 t -- likewise, print the V8SFmode name of the register.
14525 g -- likewise, print the V16SFmode name of the register.
14526 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14527 y -- print "st(0)" instead of "st" as a register.
14528 d -- print duplicated register operand for AVX instruction.
14529 D -- print condition for SSE cmp instruction.
14530 P -- if PIC, print an @PLT suffix.
14531 p -- print raw symbol name.
14532 X -- don't print any sort of PIC '@' suffix for a symbol.
14533 & -- print some in-use local-dynamic symbol name.
14534 H -- print a memory address offset by 8; used for sse high-parts
14535 Y -- print condition for XOP pcom* instruction.
14536 + -- print a branch hint as 'cs' or 'ds' prefix
14537 ; -- print a semicolon (after prefixes due to bug in older gas).
14538 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14539 @ -- print a segment register of thread base pointer load
14540 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14541 ! -- print MPX prefix for jxx/call/ret instructions if required.
14545 ix86_print_operand (FILE *file
, rtx x
, int code
)
14552 switch (ASSEMBLER_DIALECT
)
14559 /* Intel syntax. For absolute addresses, registers should not
14560 be surrounded by braces. */
14564 ix86_print_operand (file
, x
, 0);
14571 gcc_unreachable ();
14574 ix86_print_operand (file
, x
, 0);
14578 /* Wrap address in an UNSPEC to declare special handling. */
14580 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14582 output_address (x
);
14586 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14591 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14596 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14601 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14606 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14611 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14616 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14617 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14620 switch (GET_MODE_SIZE (GET_MODE (x
)))
14635 output_operand_lossage
14636 ("invalid operand size for operand code 'O'");
14645 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14647 /* Opcodes don't get size suffixes if using Intel opcodes. */
14648 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14651 switch (GET_MODE_SIZE (GET_MODE (x
)))
14670 output_operand_lossage
14671 ("invalid operand size for operand code 'z'");
14676 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14678 (0, "non-integer operand used with operand code 'z'");
14682 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14683 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14686 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14688 switch (GET_MODE_SIZE (GET_MODE (x
)))
14691 #ifdef HAVE_AS_IX86_FILDS
14701 #ifdef HAVE_AS_IX86_FILDQ
14704 fputs ("ll", file
);
14712 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14714 /* 387 opcodes don't get size suffixes
14715 if the operands are registers. */
14716 if (STACK_REG_P (x
))
14719 switch (GET_MODE_SIZE (GET_MODE (x
)))
14740 output_operand_lossage
14741 ("invalid operand type used with operand code 'Z'");
14745 output_operand_lossage
14746 ("invalid operand size for operand code 'Z'");
14765 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14767 ix86_print_operand (file
, x
, 0);
14768 fputs (", ", file
);
14773 switch (GET_CODE (x
))
14776 fputs ("neq", file
);
14779 fputs ("eq", file
);
14783 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14787 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14791 fputs ("le", file
);
14795 fputs ("lt", file
);
14798 fputs ("unord", file
);
14801 fputs ("ord", file
);
14804 fputs ("ueq", file
);
14807 fputs ("nlt", file
);
14810 fputs ("nle", file
);
14813 fputs ("ule", file
);
14816 fputs ("ult", file
);
14819 fputs ("une", file
);
14822 output_operand_lossage ("operand is not a condition code, "
14823 "invalid operand code 'Y'");
14829 /* Little bit of braindamage here. The SSE compare instructions
14830 does use completely different names for the comparisons that the
14831 fp conditional moves. */
14832 switch (GET_CODE (x
))
14837 fputs ("eq_us", file
);
14841 fputs ("eq", file
);
14846 fputs ("nge", file
);
14850 fputs ("lt", file
);
14855 fputs ("ngt", file
);
14859 fputs ("le", file
);
14862 fputs ("unord", file
);
14867 fputs ("neq_oq", file
);
14871 fputs ("neq", file
);
14876 fputs ("ge", file
);
14880 fputs ("nlt", file
);
14885 fputs ("gt", file
);
14889 fputs ("nle", file
);
14892 fputs ("ord", file
);
14895 output_operand_lossage ("operand is not a condition code, "
14896 "invalid operand code 'D'");
14903 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14904 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14910 if (!COMPARISON_P (x
))
14912 output_operand_lossage ("operand is not a condition code, "
14913 "invalid operand code '%c'", code
);
14916 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14917 code
== 'c' || code
== 'f',
14918 code
== 'F' || code
== 'f',
14923 if (!offsettable_memref_p (x
))
14925 output_operand_lossage ("operand is not an offsettable memory "
14926 "reference, invalid operand code 'H'");
14929 /* It doesn't actually matter what mode we use here, as we're
14930 only going to use this for printing. */
14931 x
= adjust_address_nv (x
, DImode
, 8);
14932 /* Output 'qword ptr' for intel assembler dialect. */
14933 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14938 gcc_assert (CONST_INT_P (x
));
14940 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14941 #ifdef HAVE_AS_IX86_HLE
14942 fputs ("xacquire ", file
);
14944 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14946 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14947 #ifdef HAVE_AS_IX86_HLE
14948 fputs ("xrelease ", file
);
14950 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14952 /* We do not want to print value of the operand. */
14956 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
14957 fputs ("{z}", file
);
14961 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14967 const char *name
= get_some_local_dynamic_name ();
14969 output_operand_lossage ("'%%&' used without any "
14970 "local dynamic TLS references");
14972 assemble_name (file
, name
);
14981 || optimize_function_for_size_p (cfun
)
14982 || !TARGET_BRANCH_PREDICTION_HINTS
)
14985 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14988 int pred_val
= XINT (x
, 0);
14990 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14991 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14993 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14995 = final_forward_branch_p (current_output_insn
) == 0;
14997 /* Emit hints only in the case default branch prediction
14998 heuristics would fail. */
14999 if (taken
!= cputaken
)
15001 /* We use 3e (DS) prefix for taken branches and
15002 2e (CS) prefix for not taken branches. */
15004 fputs ("ds ; ", file
);
15006 fputs ("cs ; ", file
);
15014 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15020 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15023 /* The kernel uses a different segment register for performance
15024 reasons; a system call would not have to trash the userspace
15025 segment register, which would be expensive. */
15026 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
15027 fputs ("fs", file
);
15029 fputs ("gs", file
);
15033 putc (TARGET_AVX2
? 'i' : 'f', file
);
15037 if (TARGET_64BIT
&& Pmode
!= word_mode
)
15038 fputs ("addr32 ", file
);
15042 if (ix86_bnd_prefixed_insn_p (NULL_RTX
))
15043 fputs ("bnd ", file
);
15047 output_operand_lossage ("invalid operand code '%c'", code
);
15052 print_reg (x
, code
, file
);
15054 else if (MEM_P (x
))
15056 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15057 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
15058 && GET_MODE (x
) != BLKmode
)
15061 switch (GET_MODE_SIZE (GET_MODE (x
)))
15063 case 1: size
= "BYTE"; break;
15064 case 2: size
= "WORD"; break;
15065 case 4: size
= "DWORD"; break;
15066 case 8: size
= "QWORD"; break;
15067 case 12: size
= "TBYTE"; break;
15069 if (GET_MODE (x
) == XFmode
)
15074 case 32: size
= "YMMWORD"; break;
15075 case 64: size
= "ZMMWORD"; break;
15077 gcc_unreachable ();
15080 /* Check for explicit size override (codes 'b', 'w', 'k',
15084 else if (code
== 'w')
15086 else if (code
== 'k')
15088 else if (code
== 'q')
15090 else if (code
== 'x')
15093 fputs (size
, file
);
15094 fputs (" PTR ", file
);
15098 /* Avoid (%rip) for call operands. */
15099 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
15100 && !CONST_INT_P (x
))
15101 output_addr_const (file
, x
);
15102 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
15103 output_operand_lossage ("invalid constraints for operand");
15105 output_address (x
);
15108 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
15113 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15114 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
15116 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15118 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15120 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
15121 (unsigned long long) (int) l
);
15123 fprintf (file
, "0x%08x", (unsigned int) l
);
15126 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
15131 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15132 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
15134 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15136 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
15139 /* These float cases don't actually occur as immediate operands. */
15140 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
15144 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
15145 fputs (dstr
, file
);
15150 /* We have patterns that allow zero sets of memory, for instance.
15151 In 64-bit mode, we should probably support all 8-byte vectors,
15152 since we can in fact encode that into an immediate. */
15153 if (GET_CODE (x
) == CONST_VECTOR
)
15155 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
15159 if (code
!= 'P' && code
!= 'p')
15161 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
15163 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15166 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
15167 || GET_CODE (x
) == LABEL_REF
)
15169 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15172 fputs ("OFFSET FLAT:", file
);
15175 if (CONST_INT_P (x
))
15176 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15177 else if (flag_pic
|| MACHOPIC_INDIRECT
)
15178 output_pic_addr_const (file
, x
, code
);
15180 output_addr_const (file
, x
);
15185 ix86_print_operand_punct_valid_p (unsigned char code
)
15187 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15188 || code
== ';' || code
== '~' || code
== '^' || code
== '!');
15191 /* Print a memory operand whose address is ADDR. */
15194 ix86_print_operand_address (FILE *file
, rtx addr
)
15196 struct ix86_address parts
;
15197 rtx base
, index
, disp
;
15203 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15205 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15206 gcc_assert (parts
.index
== NULL_RTX
);
15207 parts
.index
= XVECEXP (addr
, 0, 1);
15208 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15209 addr
= XVECEXP (addr
, 0, 0);
15212 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15214 gcc_assert (TARGET_64BIT
);
15215 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15218 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDMK_ADDR
)
15220 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 1), &parts
);
15221 gcc_assert (parts
.base
== NULL_RTX
|| parts
.index
== NULL_RTX
);
15222 if (parts
.base
!= NULL_RTX
)
15224 parts
.index
= parts
.base
;
15227 parts
.base
= XVECEXP (addr
, 0, 0);
15228 addr
= XVECEXP (addr
, 0, 0);
15230 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDLDX_ADDR
)
15232 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15233 gcc_assert (parts
.index
== NULL_RTX
);
15234 parts
.index
= XVECEXP (addr
, 0, 1);
15235 addr
= XVECEXP (addr
, 0, 0);
15238 ok
= ix86_decompose_address (addr
, &parts
);
15243 index
= parts
.index
;
15245 scale
= parts
.scale
;
15253 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15255 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15258 gcc_unreachable ();
15261 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15262 if (TARGET_64BIT
&& !base
&& !index
)
15266 if (GET_CODE (disp
) == CONST
15267 && GET_CODE (XEXP (disp
, 0)) == PLUS
15268 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15269 symbol
= XEXP (XEXP (disp
, 0), 0);
15271 if (GET_CODE (symbol
) == LABEL_REF
15272 || (GET_CODE (symbol
) == SYMBOL_REF
15273 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15276 if (!base
&& !index
)
15278 /* Displacement only requires special attention. */
15280 if (CONST_INT_P (disp
))
15282 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15283 fputs ("ds:", file
);
15284 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15287 output_pic_addr_const (file
, disp
, 0);
15289 output_addr_const (file
, disp
);
15293 /* Print SImode register names to force addr32 prefix. */
15294 if (SImode_address_operand (addr
, VOIDmode
))
15296 #ifdef ENABLE_CHECKING
15297 gcc_assert (TARGET_64BIT
);
15298 switch (GET_CODE (addr
))
15301 gcc_assert (GET_MODE (addr
) == SImode
);
15302 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15306 gcc_assert (GET_MODE (addr
) == DImode
);
15309 gcc_unreachable ();
15312 gcc_assert (!code
);
15318 && CONST_INT_P (disp
)
15319 && INTVAL (disp
) < -16*1024*1024)
15321 /* X32 runs in 64-bit mode, where displacement, DISP, in
15322 address DISP(%r64), is encoded as 32-bit immediate sign-
15323 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15324 address is %r64 + 0xffffffffbffffd00. When %r64 <
15325 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15326 which is invalid for x32. The correct address is %r64
15327 - 0x40000300 == 0xf7ffdd64. To properly encode
15328 -0x40000300(%r64) for x32, we zero-extend negative
15329 displacement by forcing addr32 prefix which truncates
15330 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15331 zero-extend all negative displacements, including -1(%rsp).
15332 However, for small negative displacements, sign-extension
15333 won't cause overflow. We only zero-extend negative
15334 displacements if they < -16*1024*1024, which is also used
15335 to check legitimate address displacements for PIC. */
15339 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15344 output_pic_addr_const (file
, disp
, 0);
15345 else if (GET_CODE (disp
) == LABEL_REF
)
15346 output_asm_label (disp
);
15348 output_addr_const (file
, disp
);
15353 print_reg (base
, code
, file
);
15357 print_reg (index
, vsib
? 0 : code
, file
);
15358 if (scale
!= 1 || vsib
)
15359 fprintf (file
, ",%d", scale
);
15365 rtx offset
= NULL_RTX
;
15369 /* Pull out the offset of a symbol; print any symbol itself. */
15370 if (GET_CODE (disp
) == CONST
15371 && GET_CODE (XEXP (disp
, 0)) == PLUS
15372 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15374 offset
= XEXP (XEXP (disp
, 0), 1);
15375 disp
= gen_rtx_CONST (VOIDmode
,
15376 XEXP (XEXP (disp
, 0), 0));
15380 output_pic_addr_const (file
, disp
, 0);
15381 else if (GET_CODE (disp
) == LABEL_REF
)
15382 output_asm_label (disp
);
15383 else if (CONST_INT_P (disp
))
15386 output_addr_const (file
, disp
);
15392 print_reg (base
, code
, file
);
15395 if (INTVAL (offset
) >= 0)
15397 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15401 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15408 print_reg (index
, vsib
? 0 : code
, file
);
15409 if (scale
!= 1 || vsib
)
15410 fprintf (file
, "*%d", scale
);
15417 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15420 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15424 if (GET_CODE (x
) != UNSPEC
)
15427 op
= XVECEXP (x
, 0, 0);
15428 switch (XINT (x
, 1))
15430 case UNSPEC_GOTTPOFF
:
15431 output_addr_const (file
, op
);
15432 /* FIXME: This might be @TPOFF in Sun ld. */
15433 fputs ("@gottpoff", file
);
15436 output_addr_const (file
, op
);
15437 fputs ("@tpoff", file
);
15439 case UNSPEC_NTPOFF
:
15440 output_addr_const (file
, op
);
15442 fputs ("@tpoff", file
);
15444 fputs ("@ntpoff", file
);
15446 case UNSPEC_DTPOFF
:
15447 output_addr_const (file
, op
);
15448 fputs ("@dtpoff", file
);
15450 case UNSPEC_GOTNTPOFF
:
15451 output_addr_const (file
, op
);
15453 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15454 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15456 fputs ("@gotntpoff", file
);
15458 case UNSPEC_INDNTPOFF
:
15459 output_addr_const (file
, op
);
15460 fputs ("@indntpoff", file
);
15463 case UNSPEC_MACHOPIC_OFFSET
:
15464 output_addr_const (file
, op
);
15466 machopic_output_function_base_name (file
);
15470 case UNSPEC_STACK_CHECK
:
15474 gcc_assert (flag_split_stack
);
15476 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15477 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15479 gcc_unreachable ();
15482 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15493 /* Split one or more double-mode RTL references into pairs of half-mode
15494 references. The RTL can be REG, offsettable MEM, integer constant, or
15495 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15496 split and "num" is its length. lo_half and hi_half are output arrays
15497 that parallel "operands". */
15500 split_double_mode (enum machine_mode mode
, rtx operands
[],
15501 int num
, rtx lo_half
[], rtx hi_half
[])
15503 enum machine_mode half_mode
;
15509 half_mode
= DImode
;
15512 half_mode
= SImode
;
15515 gcc_unreachable ();
15518 byte
= GET_MODE_SIZE (half_mode
);
15522 rtx op
= operands
[num
];
15524 /* simplify_subreg refuse to split volatile memory addresses,
15525 but we still have to handle it. */
15528 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15529 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15533 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15534 GET_MODE (op
) == VOIDmode
15535 ? mode
: GET_MODE (op
), 0);
15536 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15537 GET_MODE (op
) == VOIDmode
15538 ? mode
: GET_MODE (op
), byte
);
15543 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15544 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15545 is the expression of the binary operation. The output may either be
15546 emitted here, or returned to the caller, like all output_* functions.
15548 There is no guarantee that the operands are the same mode, as they
15549 might be within FLOAT or FLOAT_EXTEND expressions. */
15551 #ifndef SYSV386_COMPAT
15552 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15553 wants to fix the assemblers because that causes incompatibility
15554 with gcc. No-one wants to fix gcc because that causes
15555 incompatibility with assemblers... You can use the option of
15556 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15557 #define SYSV386_COMPAT 1
15561 output_387_binary_op (rtx insn
, rtx
*operands
)
15563 static char buf
[40];
15566 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15568 #ifdef ENABLE_CHECKING
15569 /* Even if we do not want to check the inputs, this documents input
15570 constraints. Which helps in understanding the following code. */
15571 if (STACK_REG_P (operands
[0])
15572 && ((REG_P (operands
[1])
15573 && REGNO (operands
[0]) == REGNO (operands
[1])
15574 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15575 || (REG_P (operands
[2])
15576 && REGNO (operands
[0]) == REGNO (operands
[2])
15577 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15578 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15581 gcc_assert (is_sse
);
15584 switch (GET_CODE (operands
[3]))
15587 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15588 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15596 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15597 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15605 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15606 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15614 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15615 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15623 gcc_unreachable ();
15630 strcpy (buf
, ssep
);
15631 if (GET_MODE (operands
[0]) == SFmode
)
15632 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15634 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15638 strcpy (buf
, ssep
+ 1);
15639 if (GET_MODE (operands
[0]) == SFmode
)
15640 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15642 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15648 switch (GET_CODE (operands
[3]))
15652 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15654 rtx temp
= operands
[2];
15655 operands
[2] = operands
[1];
15656 operands
[1] = temp
;
15659 /* know operands[0] == operands[1]. */
15661 if (MEM_P (operands
[2]))
15667 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15669 if (STACK_TOP_P (operands
[0]))
15670 /* How is it that we are storing to a dead operand[2]?
15671 Well, presumably operands[1] is dead too. We can't
15672 store the result to st(0) as st(0) gets popped on this
15673 instruction. Instead store to operands[2] (which I
15674 think has to be st(1)). st(1) will be popped later.
15675 gcc <= 2.8.1 didn't have this check and generated
15676 assembly code that the Unixware assembler rejected. */
15677 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15679 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15683 if (STACK_TOP_P (operands
[0]))
15684 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15686 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15691 if (MEM_P (operands
[1]))
15697 if (MEM_P (operands
[2]))
15703 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15706 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15707 derived assemblers, confusingly reverse the direction of
15708 the operation for fsub{r} and fdiv{r} when the
15709 destination register is not st(0). The Intel assembler
15710 doesn't have this brain damage. Read !SYSV386_COMPAT to
15711 figure out what the hardware really does. */
15712 if (STACK_TOP_P (operands
[0]))
15713 p
= "{p\t%0, %2|rp\t%2, %0}";
15715 p
= "{rp\t%2, %0|p\t%0, %2}";
15717 if (STACK_TOP_P (operands
[0]))
15718 /* As above for fmul/fadd, we can't store to st(0). */
15719 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15721 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15726 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15729 if (STACK_TOP_P (operands
[0]))
15730 p
= "{rp\t%0, %1|p\t%1, %0}";
15732 p
= "{p\t%1, %0|rp\t%0, %1}";
15734 if (STACK_TOP_P (operands
[0]))
15735 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15737 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15742 if (STACK_TOP_P (operands
[0]))
15744 if (STACK_TOP_P (operands
[1]))
15745 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15747 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15750 else if (STACK_TOP_P (operands
[1]))
15753 p
= "{\t%1, %0|r\t%0, %1}";
15755 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15761 p
= "{r\t%2, %0|\t%0, %2}";
15763 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15769 gcc_unreachable ();
15776 /* Check if a 256bit AVX register is referenced inside of EXP. */
15779 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15783 if (GET_CODE (exp
) == SUBREG
)
15784 exp
= SUBREG_REG (exp
);
15787 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15793 /* Return needed mode for entity in optimize_mode_switching pass. */
15796 ix86_avx_u128_mode_needed (rtx insn
)
15802 /* Needed mode is set to AVX_U128_CLEAN if there are
15803 no 256bit modes used in function arguments. */
15804 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15806 link
= XEXP (link
, 1))
15808 if (GET_CODE (XEXP (link
, 0)) == USE
)
15810 rtx arg
= XEXP (XEXP (link
, 0), 0);
15812 if (ix86_check_avx256_register (&arg
, NULL
))
15813 return AVX_U128_DIRTY
;
15817 return AVX_U128_CLEAN
;
15820 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15821 changes state only when a 256bit register is written to, but we need
15822 to prevent the compiler from moving optimal insertion point above
15823 eventual read from 256bit register. */
15824 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15825 return AVX_U128_DIRTY
;
15827 return AVX_U128_ANY
;
15830 /* Return mode that i387 must be switched into
15831 prior to the execution of insn. */
15834 ix86_i387_mode_needed (int entity
, rtx insn
)
15836 enum attr_i387_cw mode
;
15838 /* The mode UNINITIALIZED is used to store control word after a
15839 function call or ASM pattern. The mode ANY specify that function
15840 has no requirements on the control word and make no changes in the
15841 bits we are interested in. */
15844 || (NONJUMP_INSN_P (insn
)
15845 && (asm_noperands (PATTERN (insn
)) >= 0
15846 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15847 return I387_CW_UNINITIALIZED
;
15849 if (recog_memoized (insn
) < 0)
15850 return I387_CW_ANY
;
15852 mode
= get_attr_i387_cw (insn
);
15857 if (mode
== I387_CW_TRUNC
)
15862 if (mode
== I387_CW_FLOOR
)
15867 if (mode
== I387_CW_CEIL
)
15872 if (mode
== I387_CW_MASK_PM
)
15877 gcc_unreachable ();
15880 return I387_CW_ANY
;
15883 /* Return mode that entity must be switched into
15884 prior to the execution of insn. */
15887 ix86_mode_needed (int entity
, rtx insn
)
15892 return ix86_avx_u128_mode_needed (insn
);
15897 return ix86_i387_mode_needed (entity
, insn
);
15899 gcc_unreachable ();
15904 /* Check if a 256bit AVX register is referenced in stores. */
15907 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15909 if (ix86_check_avx256_register (&dest
, NULL
))
15911 bool *used
= (bool *) data
;
15916 /* Calculate mode of upper 128bit AVX registers after the insn. */
15919 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15921 rtx pat
= PATTERN (insn
);
15923 if (vzeroupper_operation (pat
, VOIDmode
)
15924 || vzeroall_operation (pat
, VOIDmode
))
15925 return AVX_U128_CLEAN
;
15927 /* We know that state is clean after CALL insn if there are no
15928 256bit registers used in the function return register. */
15931 bool avx_reg256_found
= false;
15932 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15934 return avx_reg256_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
15937 /* Otherwise, return current mode. Remember that if insn
15938 references AVX 256bit registers, the mode was already changed
15939 to DIRTY from MODE_NEEDED. */
15943 /* Return the mode that an insn results in. */
15946 ix86_mode_after (int entity
, int mode
, rtx insn
)
15951 return ix86_avx_u128_mode_after (mode
, insn
);
15958 gcc_unreachable ();
15963 ix86_avx_u128_mode_entry (void)
15967 /* Entry mode is set to AVX_U128_DIRTY if there are
15968 256bit modes used in function arguments. */
15969 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15970 arg
= TREE_CHAIN (arg
))
15972 rtx incoming
= DECL_INCOMING_RTL (arg
);
15974 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15975 return AVX_U128_DIRTY
;
15978 return AVX_U128_CLEAN
;
15981 /* Return a mode that ENTITY is assumed to be
15982 switched to at function entry. */
15985 ix86_mode_entry (int entity
)
15990 return ix86_avx_u128_mode_entry ();
15995 return I387_CW_ANY
;
15997 gcc_unreachable ();
16002 ix86_avx_u128_mode_exit (void)
16004 rtx reg
= crtl
->return_rtx
;
16006 /* Exit mode is set to AVX_U128_DIRTY if there are
16007 256bit modes used in the function return register. */
16008 if (reg
&& ix86_check_avx256_register (®
, NULL
))
16009 return AVX_U128_DIRTY
;
16011 return AVX_U128_CLEAN
;
16014 /* Return a mode that ENTITY is assumed to be
16015 switched to at function exit. */
16018 ix86_mode_exit (int entity
)
16023 return ix86_avx_u128_mode_exit ();
16028 return I387_CW_ANY
;
16030 gcc_unreachable ();
16034 /* Output code to initialize control word copies used by trunc?f?i and
16035 rounding patterns. CURRENT_MODE is set to current control word,
16036 while NEW_MODE is set to new control word. */
16039 emit_i387_cw_initialization (int mode
)
16041 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
16044 enum ix86_stack_slot slot
;
16046 rtx reg
= gen_reg_rtx (HImode
);
16048 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
16049 emit_move_insn (reg
, copy_rtx (stored_mode
));
16051 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
16052 || optimize_insn_for_size_p ())
16056 case I387_CW_TRUNC
:
16057 /* round toward zero (truncate) */
16058 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
16059 slot
= SLOT_CW_TRUNC
;
16062 case I387_CW_FLOOR
:
16063 /* round down toward -oo */
16064 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16065 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
16066 slot
= SLOT_CW_FLOOR
;
16070 /* round up toward +oo */
16071 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16072 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
16073 slot
= SLOT_CW_CEIL
;
16076 case I387_CW_MASK_PM
:
16077 /* mask precision exception for nearbyint() */
16078 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16079 slot
= SLOT_CW_MASK_PM
;
16083 gcc_unreachable ();
16090 case I387_CW_TRUNC
:
16091 /* round toward zero (truncate) */
16092 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
16093 slot
= SLOT_CW_TRUNC
;
16096 case I387_CW_FLOOR
:
16097 /* round down toward -oo */
16098 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
16099 slot
= SLOT_CW_FLOOR
;
16103 /* round up toward +oo */
16104 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
16105 slot
= SLOT_CW_CEIL
;
16108 case I387_CW_MASK_PM
:
16109 /* mask precision exception for nearbyint() */
16110 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16111 slot
= SLOT_CW_MASK_PM
;
16115 gcc_unreachable ();
16119 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
16121 new_mode
= assign_386_stack_local (HImode
, slot
);
16122 emit_move_insn (new_mode
, reg
);
16125 /* Emit vzeroupper. */
16128 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
16132 /* Cancel automatic vzeroupper insertion if there are
16133 live call-saved SSE registers at the insertion point. */
16135 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
16136 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16140 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
16141 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16144 emit_insn (gen_avx_vzeroupper ());
16147 /* Generate one or more insns to set ENTITY to MODE. */
16150 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
16155 if (mode
== AVX_U128_CLEAN
)
16156 ix86_avx_emit_vzeroupper (regs_live
);
16162 if (mode
!= I387_CW_ANY
16163 && mode
!= I387_CW_UNINITIALIZED
)
16164 emit_i387_cw_initialization (mode
);
16167 gcc_unreachable ();
16171 /* Output code for INSN to convert a float to a signed int. OPERANDS
16172 are the insn operands. The output may be [HSD]Imode and the input
16173 operand may be [SDX]Fmode. */
16176 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
16178 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16179 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
16180 int round_mode
= get_attr_i387_cw (insn
);
16182 /* Jump through a hoop or two for DImode, since the hardware has no
16183 non-popping instruction. We used to do this a different way, but
16184 that was somewhat fragile and broke with post-reload splitters. */
16185 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
16186 output_asm_insn ("fld\t%y1", operands
);
16188 gcc_assert (STACK_TOP_P (operands
[1]));
16189 gcc_assert (MEM_P (operands
[0]));
16190 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
16193 output_asm_insn ("fisttp%Z0\t%0", operands
);
16196 if (round_mode
!= I387_CW_ANY
)
16197 output_asm_insn ("fldcw\t%3", operands
);
16198 if (stack_top_dies
|| dimode_p
)
16199 output_asm_insn ("fistp%Z0\t%0", operands
);
16201 output_asm_insn ("fist%Z0\t%0", operands
);
16202 if (round_mode
!= I387_CW_ANY
)
16203 output_asm_insn ("fldcw\t%2", operands
);
16209 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16210 have the values zero or one, indicates the ffreep insn's operand
16211 from the OPERANDS array. */
16213 static const char *
16214 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16216 if (TARGET_USE_FFREEP
)
16217 #ifdef HAVE_AS_IX86_FFREEP
16218 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16221 static char retval
[32];
16222 int regno
= REGNO (operands
[opno
]);
16224 gcc_assert (STACK_REGNO_P (regno
));
16226 regno
-= FIRST_STACK_REG
;
16228 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16233 return opno
? "fstp\t%y1" : "fstp\t%y0";
16237 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16238 should be used. UNORDERED_P is true when fucom should be used. */
16241 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16243 int stack_top_dies
;
16244 rtx cmp_op0
, cmp_op1
;
16245 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16249 cmp_op0
= operands
[0];
16250 cmp_op1
= operands
[1];
16254 cmp_op0
= operands
[1];
16255 cmp_op1
= operands
[2];
16260 if (GET_MODE (operands
[0]) == SFmode
)
16262 return "%vucomiss\t{%1, %0|%0, %1}";
16264 return "%vcomiss\t{%1, %0|%0, %1}";
16267 return "%vucomisd\t{%1, %0|%0, %1}";
16269 return "%vcomisd\t{%1, %0|%0, %1}";
16272 gcc_assert (STACK_TOP_P (cmp_op0
));
16274 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16276 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16278 if (stack_top_dies
)
16280 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16281 return output_387_ffreep (operands
, 1);
16284 return "ftst\n\tfnstsw\t%0";
16287 if (STACK_REG_P (cmp_op1
)
16289 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16290 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16292 /* If both the top of the 387 stack dies, and the other operand
16293 is also a stack register that dies, then this must be a
16294 `fcompp' float compare */
16298 /* There is no double popping fcomi variant. Fortunately,
16299 eflags is immune from the fstp's cc clobbering. */
16301 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16303 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16304 return output_387_ffreep (operands
, 0);
16309 return "fucompp\n\tfnstsw\t%0";
16311 return "fcompp\n\tfnstsw\t%0";
16316 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16318 static const char * const alt
[16] =
16320 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16321 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16322 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16323 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16325 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16326 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16330 "fcomi\t{%y1, %0|%0, %y1}",
16331 "fcomip\t{%y1, %0|%0, %y1}",
16332 "fucomi\t{%y1, %0|%0, %y1}",
16333 "fucomip\t{%y1, %0|%0, %y1}",
16344 mask
= eflags_p
<< 3;
16345 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16346 mask
|= unordered_p
<< 1;
16347 mask
|= stack_top_dies
;
16349 gcc_assert (mask
< 16);
16358 ix86_output_addr_vec_elt (FILE *file
, int value
)
16360 const char *directive
= ASM_LONG
;
16364 directive
= ASM_QUAD
;
16366 gcc_assert (!TARGET_64BIT
);
16369 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16373 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16375 const char *directive
= ASM_LONG
;
16378 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16379 directive
= ASM_QUAD
;
16381 gcc_assert (!TARGET_64BIT
);
16383 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16384 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16385 fprintf (file
, "%s%s%d-%s%d\n",
16386 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16387 else if (HAVE_AS_GOTOFF_IN_DATA
)
16388 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16390 else if (TARGET_MACHO
)
16392 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16393 machopic_output_function_base_name (file
);
16398 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16399 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16402 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16406 ix86_expand_clear (rtx dest
)
16410 /* We play register width games, which are only valid after reload. */
16411 gcc_assert (reload_completed
);
16413 /* Avoid HImode and its attendant prefix byte. */
16414 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16415 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16416 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16418 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16419 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16421 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16422 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16428 /* X is an unchanging MEM. If it is a constant pool reference, return
16429 the constant pool rtx, else NULL. */
16432 maybe_get_pool_constant (rtx x
)
16434 x
= ix86_delegitimize_address (XEXP (x
, 0));
16436 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16437 return get_pool_constant (x
);
16443 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16446 enum tls_model model
;
16451 if (GET_CODE (op1
) == SYMBOL_REF
)
16455 model
= SYMBOL_REF_TLS_MODEL (op1
);
16458 op1
= legitimize_tls_address (op1
, model
, true);
16459 op1
= force_operand (op1
, op0
);
16462 op1
= convert_to_mode (mode
, op1
, 1);
16464 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16467 else if (GET_CODE (op1
) == CONST
16468 && GET_CODE (XEXP (op1
, 0)) == PLUS
16469 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16471 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16472 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16475 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16477 tmp
= legitimize_tls_address (symbol
, model
, true);
16479 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16483 tmp
= force_operand (tmp
, NULL
);
16484 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16485 op0
, 1, OPTAB_DIRECT
);
16488 op1
= convert_to_mode (mode
, tmp
, 1);
16492 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16493 && symbolic_operand (op1
, mode
))
16495 if (TARGET_MACHO
&& !TARGET_64BIT
)
16498 /* dynamic-no-pic */
16499 if (MACHOPIC_INDIRECT
)
16501 rtx temp
= ((reload_in_progress
16502 || ((op0
&& REG_P (op0
))
16504 ? op0
: gen_reg_rtx (Pmode
));
16505 op1
= machopic_indirect_data_reference (op1
, temp
);
16507 op1
= machopic_legitimize_pic_address (op1
, mode
,
16508 temp
== op1
? 0 : temp
);
16510 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16512 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16516 if (GET_CODE (op0
) == MEM
)
16517 op1
= force_reg (Pmode
, op1
);
16521 if (GET_CODE (temp
) != REG
)
16522 temp
= gen_reg_rtx (Pmode
);
16523 temp
= legitimize_pic_address (op1
, temp
);
16528 /* dynamic-no-pic */
16534 op1
= force_reg (mode
, op1
);
16535 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16537 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16538 op1
= legitimize_pic_address (op1
, reg
);
16541 op1
= convert_to_mode (mode
, op1
, 1);
16548 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16549 || !push_operand (op0
, mode
))
16551 op1
= force_reg (mode
, op1
);
16553 if (push_operand (op0
, mode
)
16554 && ! general_no_elim_operand (op1
, mode
))
16555 op1
= copy_to_mode_reg (mode
, op1
);
16557 /* Force large constants in 64bit compilation into register
16558 to get them CSEed. */
16559 if (can_create_pseudo_p ()
16560 && (mode
== DImode
) && TARGET_64BIT
16561 && immediate_operand (op1
, mode
)
16562 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16563 && !register_operand (op0
, mode
)
16565 op1
= copy_to_mode_reg (mode
, op1
);
16567 if (can_create_pseudo_p ()
16568 && FLOAT_MODE_P (mode
)
16569 && GET_CODE (op1
) == CONST_DOUBLE
)
16571 /* If we are loading a floating point constant to a register,
16572 force the value to memory now, since we'll get better code
16573 out the back end. */
16575 op1
= validize_mem (force_const_mem (mode
, op1
));
16576 if (!register_operand (op0
, mode
))
16578 rtx temp
= gen_reg_rtx (mode
);
16579 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16580 emit_move_insn (op0
, temp
);
16586 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16590 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16592 rtx op0
= operands
[0], op1
= operands
[1];
16593 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16595 /* Force constants other than zero into memory. We do not know how
16596 the instructions used to build constants modify the upper 64 bits
16597 of the register, once we have that information we may be able
16598 to handle some of them more efficiently. */
16599 if (can_create_pseudo_p ()
16600 && register_operand (op0
, mode
)
16601 && (CONSTANT_P (op1
)
16602 || (GET_CODE (op1
) == SUBREG
16603 && CONSTANT_P (SUBREG_REG (op1
))))
16604 && !standard_sse_constant_p (op1
))
16605 op1
= validize_mem (force_const_mem (mode
, op1
));
16607 /* We need to check memory alignment for SSE mode since attribute
16608 can make operands unaligned. */
16609 if (can_create_pseudo_p ()
16610 && SSE_REG_MODE_P (mode
)
16611 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16612 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16616 /* ix86_expand_vector_move_misalign() does not like constants ... */
16617 if (CONSTANT_P (op1
)
16618 || (GET_CODE (op1
) == SUBREG
16619 && CONSTANT_P (SUBREG_REG (op1
))))
16620 op1
= validize_mem (force_const_mem (mode
, op1
));
16622 /* ... nor both arguments in memory. */
16623 if (!register_operand (op0
, mode
)
16624 && !register_operand (op1
, mode
))
16625 op1
= force_reg (mode
, op1
);
16627 tmp
[0] = op0
; tmp
[1] = op1
;
16628 ix86_expand_vector_move_misalign (mode
, tmp
);
16632 /* Make operand1 a register if it isn't already. */
16633 if (can_create_pseudo_p ()
16634 && !register_operand (op0
, mode
)
16635 && !register_operand (op1
, mode
))
16637 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16641 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16644 /* Split 32-byte AVX unaligned load and store if needed. */
16647 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16650 rtx (*extract
) (rtx
, rtx
, rtx
);
16651 rtx (*load_unaligned
) (rtx
, rtx
);
16652 rtx (*store_unaligned
) (rtx
, rtx
);
16653 enum machine_mode mode
;
16655 switch (GET_MODE (op0
))
16658 gcc_unreachable ();
16660 extract
= gen_avx_vextractf128v32qi
;
16661 load_unaligned
= gen_avx_loaddquv32qi
;
16662 store_unaligned
= gen_avx_storedquv32qi
;
16666 extract
= gen_avx_vextractf128v8sf
;
16667 load_unaligned
= gen_avx_loadups256
;
16668 store_unaligned
= gen_avx_storeups256
;
16672 extract
= gen_avx_vextractf128v4df
;
16673 load_unaligned
= gen_avx_loadupd256
;
16674 store_unaligned
= gen_avx_storeupd256
;
16681 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16683 rtx r
= gen_reg_rtx (mode
);
16684 m
= adjust_address (op1
, mode
, 0);
16685 emit_move_insn (r
, m
);
16686 m
= adjust_address (op1
, mode
, 16);
16687 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16688 emit_move_insn (op0
, r
);
16690 /* Normal *mov<mode>_internal pattern will handle
16691 unaligned loads just fine if misaligned_operand
16692 is true, and without the UNSPEC it can be combined
16693 with arithmetic instructions. */
16694 else if (misaligned_operand (op1
, GET_MODE (op1
)))
16695 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16697 emit_insn (load_unaligned (op0
, op1
));
16699 else if (MEM_P (op0
))
16701 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16703 m
= adjust_address (op0
, mode
, 0);
16704 emit_insn (extract (m
, op1
, const0_rtx
));
16705 m
= adjust_address (op0
, mode
, 16);
16706 emit_insn (extract (m
, op1
, const1_rtx
));
16709 emit_insn (store_unaligned (op0
, op1
));
16712 gcc_unreachable ();
16715 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16716 straight to ix86_expand_vector_move. */
16717 /* Code generation for scalar reg-reg moves of single and double precision data:
16718 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16722 if (x86_sse_partial_reg_dependency == true)
16727 Code generation for scalar loads of double precision data:
16728 if (x86_sse_split_regs == true)
16729 movlpd mem, reg (gas syntax)
16733 Code generation for unaligned packed loads of single precision data
16734 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16735 if (x86_sse_unaligned_move_optimal)
16738 if (x86_sse_partial_reg_dependency == true)
16750 Code generation for unaligned packed loads of double precision data
16751 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16752 if (x86_sse_unaligned_move_optimal)
16755 if (x86_sse_split_regs == true)
16768 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16770 rtx op0
, op1
, orig_op0
= NULL_RTX
, m
;
16771 rtx (*load_unaligned
) (rtx
, rtx
);
16772 rtx (*store_unaligned
) (rtx
, rtx
);
16777 if (GET_MODE_SIZE (mode
) == 64)
16779 switch (GET_MODE_CLASS (mode
))
16781 case MODE_VECTOR_INT
:
16783 if (GET_MODE (op0
) != V16SImode
)
16788 op0
= gen_reg_rtx (V16SImode
);
16791 op0
= gen_lowpart (V16SImode
, op0
);
16793 op1
= gen_lowpart (V16SImode
, op1
);
16796 case MODE_VECTOR_FLOAT
:
16797 switch (GET_MODE (op0
))
16800 gcc_unreachable ();
16802 load_unaligned
= gen_avx512f_loaddquv16si
;
16803 store_unaligned
= gen_avx512f_storedquv16si
;
16806 load_unaligned
= gen_avx512f_loadups512
;
16807 store_unaligned
= gen_avx512f_storeups512
;
16810 load_unaligned
= gen_avx512f_loadupd512
;
16811 store_unaligned
= gen_avx512f_storeupd512
;
16816 emit_insn (load_unaligned (op0
, op1
));
16817 else if (MEM_P (op0
))
16818 emit_insn (store_unaligned (op0
, op1
));
16820 gcc_unreachable ();
16822 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16826 gcc_unreachable ();
16833 && GET_MODE_SIZE (mode
) == 32)
16835 switch (GET_MODE_CLASS (mode
))
16837 case MODE_VECTOR_INT
:
16839 if (GET_MODE (op0
) != V32QImode
)
16844 op0
= gen_reg_rtx (V32QImode
);
16847 op0
= gen_lowpart (V32QImode
, op0
);
16849 op1
= gen_lowpart (V32QImode
, op1
);
16852 case MODE_VECTOR_FLOAT
:
16853 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16855 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16859 gcc_unreachable ();
16867 /* Normal *mov<mode>_internal pattern will handle
16868 unaligned loads just fine if misaligned_operand
16869 is true, and without the UNSPEC it can be combined
16870 with arithmetic instructions. */
16872 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
16873 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
16874 && misaligned_operand (op1
, GET_MODE (op1
)))
16875 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16876 /* ??? If we have typed data, then it would appear that using
16877 movdqu is the only way to get unaligned data loaded with
16879 else if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16881 if (GET_MODE (op0
) != V16QImode
)
16884 op0
= gen_reg_rtx (V16QImode
);
16886 op1
= gen_lowpart (V16QImode
, op1
);
16887 /* We will eventually emit movups based on insn attributes. */
16888 emit_insn (gen_sse2_loaddquv16qi (op0
, op1
));
16890 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16892 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16897 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16898 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16899 || optimize_insn_for_size_p ())
16901 /* We will eventually emit movups based on insn attributes. */
16902 emit_insn (gen_sse2_loadupd (op0
, op1
));
16906 /* When SSE registers are split into halves, we can avoid
16907 writing to the top half twice. */
16908 if (TARGET_SSE_SPLIT_REGS
)
16910 emit_clobber (op0
);
16915 /* ??? Not sure about the best option for the Intel chips.
16916 The following would seem to satisfy; the register is
16917 entirely cleared, breaking the dependency chain. We
16918 then store to the upper half, with a dependency depth
16919 of one. A rumor has it that Intel recommends two movsd
16920 followed by an unpacklpd, but this is unconfirmed. And
16921 given that the dependency depth of the unpacklpd would
16922 still be one, I'm not sure why this would be better. */
16923 zero
= CONST0_RTX (V2DFmode
);
16926 m
= adjust_address (op1
, DFmode
, 0);
16927 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16928 m
= adjust_address (op1
, DFmode
, 8);
16929 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16936 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16937 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16938 || optimize_insn_for_size_p ())
16940 if (GET_MODE (op0
) != V4SFmode
)
16943 op0
= gen_reg_rtx (V4SFmode
);
16945 op1
= gen_lowpart (V4SFmode
, op1
);
16946 emit_insn (gen_sse_loadups (op0
, op1
));
16948 emit_move_insn (orig_op0
,
16949 gen_lowpart (GET_MODE (orig_op0
), op0
));
16953 if (mode
!= V4SFmode
)
16954 t
= gen_reg_rtx (V4SFmode
);
16958 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16959 emit_move_insn (t
, CONST0_RTX (V4SFmode
));
16963 m
= adjust_address (op1
, V2SFmode
, 0);
16964 emit_insn (gen_sse_loadlps (t
, t
, m
));
16965 m
= adjust_address (op1
, V2SFmode
, 8);
16966 emit_insn (gen_sse_loadhps (t
, t
, m
));
16967 if (mode
!= V4SFmode
)
16968 emit_move_insn (op0
, gen_lowpart (mode
, t
));
16971 else if (MEM_P (op0
))
16973 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16975 op0
= gen_lowpart (V16QImode
, op0
);
16976 op1
= gen_lowpart (V16QImode
, op1
);
16977 /* We will eventually emit movups based on insn attributes. */
16978 emit_insn (gen_sse2_storedquv16qi (op0
, op1
));
16980 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16983 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16984 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16985 || optimize_insn_for_size_p ())
16986 /* We will eventually emit movups based on insn attributes. */
16987 emit_insn (gen_sse2_storeupd (op0
, op1
));
16990 m
= adjust_address (op0
, DFmode
, 0);
16991 emit_insn (gen_sse2_storelpd (m
, op1
));
16992 m
= adjust_address (op0
, DFmode
, 8);
16993 emit_insn (gen_sse2_storehpd (m
, op1
));
16998 if (mode
!= V4SFmode
)
16999 op1
= gen_lowpart (V4SFmode
, op1
);
17002 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17003 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17004 || optimize_insn_for_size_p ())
17006 op0
= gen_lowpart (V4SFmode
, op0
);
17007 emit_insn (gen_sse_storeups (op0
, op1
));
17011 m
= adjust_address (op0
, V2SFmode
, 0);
17012 emit_insn (gen_sse_storelps (m
, op1
));
17013 m
= adjust_address (op0
, V2SFmode
, 8);
17014 emit_insn (gen_sse_storehps (m
, op1
));
17019 gcc_unreachable ();
17022 /* Expand a push in MODE. This is some mode for which we do not support
17023 proper push instructions, at least from the registers that we expect
17024 the value to live in. */
17027 ix86_expand_push (enum machine_mode mode
, rtx x
)
17031 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
17032 GEN_INT (-GET_MODE_SIZE (mode
)),
17033 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
17034 if (tmp
!= stack_pointer_rtx
)
17035 emit_move_insn (stack_pointer_rtx
, tmp
);
17037 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
17039 /* When we push an operand onto stack, it has to be aligned at least
17040 at the function argument boundary. However since we don't have
17041 the argument type, we can't determine the actual argument
17043 emit_move_insn (tmp
, x
);
17046 /* Helper function of ix86_fixup_binary_operands to canonicalize
17047 operand order. Returns true if the operands should be swapped. */
17050 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
17053 rtx dst
= operands
[0];
17054 rtx src1
= operands
[1];
17055 rtx src2
= operands
[2];
17057 /* If the operation is not commutative, we can't do anything. */
17058 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
17061 /* Highest priority is that src1 should match dst. */
17062 if (rtx_equal_p (dst
, src1
))
17064 if (rtx_equal_p (dst
, src2
))
17067 /* Next highest priority is that immediate constants come second. */
17068 if (immediate_operand (src2
, mode
))
17070 if (immediate_operand (src1
, mode
))
17073 /* Lowest priority is that memory references should come second. */
17083 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17084 destination to use for the operation. If different from the true
17085 destination in operands[0], a copy operation will be required. */
17088 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
17091 rtx dst
= operands
[0];
17092 rtx src1
= operands
[1];
17093 rtx src2
= operands
[2];
17095 /* Canonicalize operand order. */
17096 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17100 /* It is invalid to swap operands of different modes. */
17101 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
17108 /* Both source operands cannot be in memory. */
17109 if (MEM_P (src1
) && MEM_P (src2
))
17111 /* Optimization: Only read from memory once. */
17112 if (rtx_equal_p (src1
, src2
))
17114 src2
= force_reg (mode
, src2
);
17117 else if (rtx_equal_p (dst
, src1
))
17118 src2
= force_reg (mode
, src2
);
17120 src1
= force_reg (mode
, src1
);
17123 /* If the destination is memory, and we do not have matching source
17124 operands, do things in registers. */
17125 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17126 dst
= gen_reg_rtx (mode
);
17128 /* Source 1 cannot be a constant. */
17129 if (CONSTANT_P (src1
))
17130 src1
= force_reg (mode
, src1
);
17132 /* Source 1 cannot be a non-matching memory. */
17133 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17134 src1
= force_reg (mode
, src1
);
17136 /* Improve address combine. */
17138 && GET_MODE_CLASS (mode
) == MODE_INT
17140 src2
= force_reg (mode
, src2
);
17142 operands
[1] = src1
;
17143 operands
[2] = src2
;
17147 /* Similarly, but assume that the destination has already been
17148 set up properly. */
17151 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
17152 enum machine_mode mode
, rtx operands
[])
17154 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17155 gcc_assert (dst
== operands
[0]);
17158 /* Attempt to expand a binary operator. Make the expansion closer to the
17159 actual machine, then just general_operand, which will allow 3 separate
17160 memory references (one output, two input) in a single insn. */
17163 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
17166 rtx src1
, src2
, dst
, op
, clob
;
17168 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17169 src1
= operands
[1];
17170 src2
= operands
[2];
17172 /* Emit the instruction. */
17174 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
17175 if (reload_in_progress
)
17177 /* Reload doesn't know about the flags register, and doesn't know that
17178 it doesn't want to clobber it. We can only do this with PLUS. */
17179 gcc_assert (code
== PLUS
);
17182 else if (reload_completed
17184 && !rtx_equal_p (dst
, src1
))
17186 /* This is going to be an LEA; avoid splitting it later. */
17191 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17192 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17195 /* Fix up the destination if needed. */
17196 if (dst
!= operands
[0])
17197 emit_move_insn (operands
[0], dst
);
17200 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17201 the given OPERANDS. */
17204 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
17207 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
17208 if (GET_CODE (operands
[1]) == SUBREG
)
17213 else if (GET_CODE (operands
[2]) == SUBREG
)
17218 /* Optimize (__m128i) d | (__m128i) e and similar code
17219 when d and e are float vectors into float vector logical
17220 insn. In C/C++ without using intrinsics there is no other way
17221 to express vector logical operation on float vectors than
17222 to cast them temporarily to integer vectors. */
17224 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17225 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
17226 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
17227 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
17228 && SUBREG_BYTE (op1
) == 0
17229 && (GET_CODE (op2
) == CONST_VECTOR
17230 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
17231 && SUBREG_BYTE (op2
) == 0))
17232 && can_create_pseudo_p ())
17235 switch (GET_MODE (SUBREG_REG (op1
)))
17241 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
17242 if (GET_CODE (op2
) == CONST_VECTOR
)
17244 op2
= gen_lowpart (GET_MODE (dst
), op2
);
17245 op2
= force_reg (GET_MODE (dst
), op2
);
17250 op2
= SUBREG_REG (operands
[2]);
17251 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
17252 op2
= force_reg (GET_MODE (dst
), op2
);
17254 op1
= SUBREG_REG (op1
);
17255 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
17256 op1
= force_reg (GET_MODE (dst
), op1
);
17257 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
17258 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
17260 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
17266 if (!nonimmediate_operand (operands
[1], mode
))
17267 operands
[1] = force_reg (mode
, operands
[1]);
17268 if (!nonimmediate_operand (operands
[2], mode
))
17269 operands
[2] = force_reg (mode
, operands
[2]);
17270 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
17271 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
17272 gen_rtx_fmt_ee (code
, mode
, operands
[1],
17276 /* Return TRUE or FALSE depending on whether the binary operator meets the
17277 appropriate constraints. */
17280 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
17283 rtx dst
= operands
[0];
17284 rtx src1
= operands
[1];
17285 rtx src2
= operands
[2];
17287 /* Both source operands cannot be in memory. */
17288 if (MEM_P (src1
) && MEM_P (src2
))
17291 /* Canonicalize operand order for commutative operators. */
17292 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17299 /* If the destination is memory, we must have a matching source operand. */
17300 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17303 /* Source 1 cannot be a constant. */
17304 if (CONSTANT_P (src1
))
17307 /* Source 1 cannot be a non-matching memory. */
17308 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17309 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17310 return (code
== AND
17313 || (TARGET_64BIT
&& mode
== DImode
))
17314 && satisfies_constraint_L (src2
));
17319 /* Attempt to expand a unary operator. Make the expansion closer to the
17320 actual machine, then just general_operand, which will allow 2 separate
17321 memory references (one output, one input) in a single insn. */
17324 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17327 int matching_memory
;
17328 rtx src
, dst
, op
, clob
;
17333 /* If the destination is memory, and we do not have matching source
17334 operands, do things in registers. */
17335 matching_memory
= 0;
17338 if (rtx_equal_p (dst
, src
))
17339 matching_memory
= 1;
17341 dst
= gen_reg_rtx (mode
);
17344 /* When source operand is memory, destination must match. */
17345 if (MEM_P (src
) && !matching_memory
)
17346 src
= force_reg (mode
, src
);
17348 /* Emit the instruction. */
17350 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17351 if (reload_in_progress
|| code
== NOT
)
17353 /* Reload doesn't know about the flags register, and doesn't know that
17354 it doesn't want to clobber it. */
17355 gcc_assert (code
== NOT
);
17360 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17361 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17364 /* Fix up the destination if needed. */
17365 if (dst
!= operands
[0])
17366 emit_move_insn (operands
[0], dst
);
17369 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17370 divisor are within the range [0-255]. */
17373 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17376 rtx end_label
, qimode_label
;
17377 rtx insn
, div
, mod
;
17378 rtx scratch
, tmp0
, tmp1
, tmp2
;
17379 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17380 rtx (*gen_zero_extend
) (rtx
, rtx
);
17381 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17386 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17387 gen_test_ccno_1
= gen_testsi_ccno_1
;
17388 gen_zero_extend
= gen_zero_extendqisi2
;
17391 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17392 gen_test_ccno_1
= gen_testdi_ccno_1
;
17393 gen_zero_extend
= gen_zero_extendqidi2
;
17396 gcc_unreachable ();
17399 end_label
= gen_label_rtx ();
17400 qimode_label
= gen_label_rtx ();
17402 scratch
= gen_reg_rtx (mode
);
17404 /* Use 8bit unsigned divimod if dividend and divisor are within
17405 the range [0-255]. */
17406 emit_move_insn (scratch
, operands
[2]);
17407 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17408 scratch
, 1, OPTAB_DIRECT
);
17409 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17410 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17411 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17412 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17413 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17415 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17416 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17417 JUMP_LABEL (insn
) = qimode_label
;
17419 /* Generate original signed/unsigned divimod. */
17420 div
= gen_divmod4_1 (operands
[0], operands
[1],
17421 operands
[2], operands
[3]);
17424 /* Branch to the end. */
17425 emit_jump_insn (gen_jump (end_label
));
17428 /* Generate 8bit unsigned divide. */
17429 emit_label (qimode_label
);
17430 /* Don't use operands[0] for result of 8bit divide since not all
17431 registers support QImode ZERO_EXTRACT. */
17432 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17433 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17434 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17435 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17439 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17440 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17444 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17445 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17448 /* Extract remainder from AH. */
17449 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17450 if (REG_P (operands
[1]))
17451 insn
= emit_move_insn (operands
[1], tmp1
);
17454 /* Need a new scratch register since the old one has result
17456 scratch
= gen_reg_rtx (mode
);
17457 emit_move_insn (scratch
, tmp1
);
17458 insn
= emit_move_insn (operands
[1], scratch
);
17460 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17462 /* Zero extend quotient from AL. */
17463 tmp1
= gen_lowpart (QImode
, tmp0
);
17464 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17465 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17467 emit_label (end_label
);
17470 /* Whether it is OK to emit CFI directives when emitting asm code. */
17475 return dwarf2out_do_cfi_asm ();
17478 #define LEA_MAX_STALL (3)
17479 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17481 /* Increase given DISTANCE in half-cycles according to
17482 dependencies between PREV and NEXT instructions.
17483 Add 1 half-cycle if there is no dependency and
17484 go to next cycle if there is some dependecy. */
17486 static unsigned int
17487 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17492 if (!prev
|| !next
)
17493 return distance
+ (distance
& 1) + 2;
17495 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17496 return distance
+ 1;
17498 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17499 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17500 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17501 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17502 return distance
+ (distance
& 1) + 2;
17504 return distance
+ 1;
17507 /* Function checks if instruction INSN defines register number
17508 REGNO1 or REGNO2. */
17511 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17516 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17517 if (DF_REF_REG_DEF_P (*def_rec
)
17518 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17519 && (regno1
== DF_REF_REGNO (*def_rec
)
17520 || regno2
== DF_REF_REGNO (*def_rec
)))
17528 /* Function checks if instruction INSN uses register number
17529 REGNO as a part of address expression. */
17532 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17536 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17537 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17543 /* Search backward for non-agu definition of register number REGNO1
17544 or register number REGNO2 in basic block starting from instruction
17545 START up to head of basic block or instruction INSN.
17547 Function puts true value into *FOUND var if definition was found
17548 and false otherwise.
17550 Distance in half-cycles between START and found instruction or head
17551 of BB is added to DISTANCE and returned. */
17554 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17555 rtx insn
, int distance
,
17556 rtx start
, bool *found
)
17558 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17566 && distance
< LEA_SEARCH_THRESHOLD
)
17568 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17570 distance
= increase_distance (prev
, next
, distance
);
17571 if (insn_defines_reg (regno1
, regno2
, prev
))
17573 if (recog_memoized (prev
) < 0
17574 || get_attr_type (prev
) != TYPE_LEA
)
17583 if (prev
== BB_HEAD (bb
))
17586 prev
= PREV_INSN (prev
);
17592 /* Search backward for non-agu definition of register number REGNO1
17593 or register number REGNO2 in INSN's basic block until
17594 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17595 2. Reach neighbour BBs boundary, or
17596 3. Reach agu definition.
17597 Returns the distance between the non-agu definition point and INSN.
17598 If no definition point, returns -1. */
17601 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17604 basic_block bb
= BLOCK_FOR_INSN (insn
);
17606 bool found
= false;
17608 if (insn
!= BB_HEAD (bb
))
17609 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17610 distance
, PREV_INSN (insn
),
17613 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17617 bool simple_loop
= false;
17619 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17622 simple_loop
= true;
17627 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17629 BB_END (bb
), &found
);
17632 int shortest_dist
= -1;
17633 bool found_in_bb
= false;
17635 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17638 = distance_non_agu_define_in_bb (regno1
, regno2
,
17644 if (shortest_dist
< 0)
17645 shortest_dist
= bb_dist
;
17646 else if (bb_dist
> 0)
17647 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17653 distance
= shortest_dist
;
17657 /* get_attr_type may modify recog data. We want to make sure
17658 that recog data is valid for instruction INSN, on which
17659 distance_non_agu_define is called. INSN is unchanged here. */
17660 extract_insn_cached (insn
);
17665 return distance
>> 1;
17668 /* Return the distance in half-cycles between INSN and the next
17669 insn that uses register number REGNO in memory address added
17670 to DISTANCE. Return -1 if REGNO0 is set.
17672 Put true value into *FOUND if register usage was found and
17674 Put true value into *REDEFINED if register redefinition was
17675 found and false otherwise. */
17678 distance_agu_use_in_bb (unsigned int regno
,
17679 rtx insn
, int distance
, rtx start
,
17680 bool *found
, bool *redefined
)
17682 basic_block bb
= NULL
;
17687 *redefined
= false;
17689 if (start
!= NULL_RTX
)
17691 bb
= BLOCK_FOR_INSN (start
);
17692 if (start
!= BB_HEAD (bb
))
17693 /* If insn and start belong to the same bb, set prev to insn,
17694 so the call to increase_distance will increase the distance
17695 between insns by 1. */
17701 && distance
< LEA_SEARCH_THRESHOLD
)
17703 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17705 distance
= increase_distance(prev
, next
, distance
);
17706 if (insn_uses_reg_mem (regno
, next
))
17708 /* Return DISTANCE if OP0 is used in memory
17709 address in NEXT. */
17714 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17716 /* Return -1 if OP0 is set in NEXT. */
17724 if (next
== BB_END (bb
))
17727 next
= NEXT_INSN (next
);
17733 /* Return the distance between INSN and the next insn that uses
17734 register number REGNO0 in memory address. Return -1 if no such
17735 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17738 distance_agu_use (unsigned int regno0
, rtx insn
)
17740 basic_block bb
= BLOCK_FOR_INSN (insn
);
17742 bool found
= false;
17743 bool redefined
= false;
17745 if (insn
!= BB_END (bb
))
17746 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17748 &found
, &redefined
);
17750 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17754 bool simple_loop
= false;
17756 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17759 simple_loop
= true;
17764 distance
= distance_agu_use_in_bb (regno0
, insn
,
17765 distance
, BB_HEAD (bb
),
17766 &found
, &redefined
);
17769 int shortest_dist
= -1;
17770 bool found_in_bb
= false;
17771 bool redefined_in_bb
= false;
17773 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17776 = distance_agu_use_in_bb (regno0
, insn
,
17777 distance
, BB_HEAD (e
->dest
),
17778 &found_in_bb
, &redefined_in_bb
);
17781 if (shortest_dist
< 0)
17782 shortest_dist
= bb_dist
;
17783 else if (bb_dist
> 0)
17784 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17790 distance
= shortest_dist
;
17794 if (!found
|| redefined
)
17797 return distance
>> 1;
17800 /* Define this macro to tune LEA priority vs ADD, it take effect when
17801 there is a dilemma of choicing LEA or ADD
17802 Negative value: ADD is more preferred than LEA
17804 Positive value: LEA is more preferred than ADD*/
17805 #define IX86_LEA_PRIORITY 0
17807 /* Return true if usage of lea INSN has performance advantage
17808 over a sequence of instructions. Instructions sequence has
17809 SPLIT_COST cycles higher latency than lea latency. */
17812 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17813 unsigned int regno2
, int split_cost
, bool has_scale
)
17815 int dist_define
, dist_use
;
17817 /* For Silvermont if using a 2-source or 3-source LEA for
17818 non-destructive destination purposes, or due to wanting
17819 ability to use SCALE, the use of LEA is justified. */
17820 if (ix86_tune
== PROCESSOR_SLM
)
17824 if (split_cost
< 1)
17826 if (regno0
== regno1
|| regno0
== regno2
)
17831 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17832 dist_use
= distance_agu_use (regno0
, insn
);
17834 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17836 /* If there is no non AGU operand definition, no AGU
17837 operand usage and split cost is 0 then both lea
17838 and non lea variants have same priority. Currently
17839 we prefer lea for 64 bit code and non lea on 32 bit
17841 if (dist_use
< 0 && split_cost
== 0)
17842 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17847 /* With longer definitions distance lea is more preferable.
17848 Here we change it to take into account splitting cost and
17850 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17852 /* If there is no use in memory addess then we just check
17853 that split cost exceeds AGU stall. */
17855 return dist_define
> LEA_MAX_STALL
;
17857 /* If this insn has both backward non-agu dependence and forward
17858 agu dependence, the one with short distance takes effect. */
17859 return dist_define
>= dist_use
;
17862 /* Return true if it is legal to clobber flags by INSN and
17863 false otherwise. */
17866 ix86_ok_to_clobber_flags (rtx insn
)
17868 basic_block bb
= BLOCK_FOR_INSN (insn
);
17874 if (NONDEBUG_INSN_P (insn
))
17876 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17877 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17880 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17884 if (insn
== BB_END (bb
))
17887 insn
= NEXT_INSN (insn
);
17890 live
= df_get_live_out(bb
);
17891 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17894 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17895 move and add to avoid AGU stalls. */
17898 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17900 unsigned int regno0
, regno1
, regno2
;
17902 /* Check if we need to optimize. */
17903 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17906 /* Check it is correct to split here. */
17907 if (!ix86_ok_to_clobber_flags(insn
))
17910 regno0
= true_regnum (operands
[0]);
17911 regno1
= true_regnum (operands
[1]);
17912 regno2
= true_regnum (operands
[2]);
17914 /* We need to split only adds with non destructive
17915 destination operand. */
17916 if (regno0
== regno1
|| regno0
== regno2
)
17919 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17922 /* Return true if we should emit lea instruction instead of mov
17926 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17928 unsigned int regno0
, regno1
;
17930 /* Check if we need to optimize. */
17931 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17934 /* Use lea for reg to reg moves only. */
17935 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17938 regno0
= true_regnum (operands
[0]);
17939 regno1
= true_regnum (operands
[1]);
17941 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17944 /* Return true if we need to split lea into a sequence of
17945 instructions to avoid AGU stalls. */
17948 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17950 unsigned int regno0
, regno1
, regno2
;
17952 struct ix86_address parts
;
17955 /* Check we need to optimize. */
17956 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17959 /* Check it is correct to split here. */
17960 if (!ix86_ok_to_clobber_flags(insn
))
17963 ok
= ix86_decompose_address (operands
[1], &parts
);
17966 /* There should be at least two components in the address. */
17967 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17968 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17971 /* We should not split into add if non legitimate pic
17972 operand is used as displacement. */
17973 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17976 regno0
= true_regnum (operands
[0]) ;
17977 regno1
= INVALID_REGNUM
;
17978 regno2
= INVALID_REGNUM
;
17981 regno1
= true_regnum (parts
.base
);
17983 regno2
= true_regnum (parts
.index
);
17987 /* Compute how many cycles we will add to execution time
17988 if split lea into a sequence of instructions. */
17989 if (parts
.base
|| parts
.index
)
17991 /* Have to use mov instruction if non desctructive
17992 destination form is used. */
17993 if (regno1
!= regno0
&& regno2
!= regno0
)
17996 /* Have to add index to base if both exist. */
17997 if (parts
.base
&& parts
.index
)
18000 /* Have to use shift and adds if scale is 2 or greater. */
18001 if (parts
.scale
> 1)
18003 if (regno0
!= regno1
)
18005 else if (regno2
== regno0
)
18008 split_cost
+= parts
.scale
;
18011 /* Have to use add instruction with immediate if
18012 disp is non zero. */
18013 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18016 /* Subtract the price of lea. */
18020 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
18024 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18025 matches destination. RTX includes clobber of FLAGS_REG. */
18028 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
18033 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
18034 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18036 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
18039 /* Return true if regno1 def is nearest to the insn. */
18042 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
18045 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
18049 while (prev
&& prev
!= start
)
18051 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
18053 prev
= PREV_INSN (prev
);
18056 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
18058 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
18060 prev
= PREV_INSN (prev
);
18063 /* None of the regs is defined in the bb. */
18067 /* Split lea instructions into a sequence of instructions
18068 which are executed on ALU to avoid AGU stalls.
18069 It is assumed that it is allowed to clobber flags register
18070 at lea position. */
18073 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
18075 unsigned int regno0
, regno1
, regno2
;
18076 struct ix86_address parts
;
18080 ok
= ix86_decompose_address (operands
[1], &parts
);
18083 target
= gen_lowpart (mode
, operands
[0]);
18085 regno0
= true_regnum (target
);
18086 regno1
= INVALID_REGNUM
;
18087 regno2
= INVALID_REGNUM
;
18091 parts
.base
= gen_lowpart (mode
, parts
.base
);
18092 regno1
= true_regnum (parts
.base
);
18097 parts
.index
= gen_lowpart (mode
, parts
.index
);
18098 regno2
= true_regnum (parts
.index
);
18102 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
18104 if (parts
.scale
> 1)
18106 /* Case r1 = r1 + ... */
18107 if (regno1
== regno0
)
18109 /* If we have a case r1 = r1 + C * r1 then we
18110 should use multiplication which is very
18111 expensive. Assume cost model is wrong if we
18112 have such case here. */
18113 gcc_assert (regno2
!= regno0
);
18115 for (adds
= parts
.scale
; adds
> 0; adds
--)
18116 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
18120 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18121 if (regno0
!= regno2
)
18122 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18124 /* Use shift for scaling. */
18125 ix86_emit_binop (ASHIFT
, mode
, target
,
18126 GEN_INT (exact_log2 (parts
.scale
)));
18129 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
18131 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18132 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18135 else if (!parts
.base
&& !parts
.index
)
18137 gcc_assert(parts
.disp
);
18138 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
18144 if (regno0
!= regno2
)
18145 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18147 else if (!parts
.index
)
18149 if (regno0
!= regno1
)
18150 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
18154 if (regno0
== regno1
)
18156 else if (regno0
== regno2
)
18162 /* Find better operand for SET instruction, depending
18163 on which definition is farther from the insn. */
18164 if (find_nearest_reg_def (insn
, regno1
, regno2
))
18165 tmp
= parts
.index
, tmp1
= parts
.base
;
18167 tmp
= parts
.base
, tmp1
= parts
.index
;
18169 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18171 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18172 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18174 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
18178 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
18181 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18182 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18186 /* Return true if it is ok to optimize an ADD operation to LEA
18187 operation to avoid flag register consumation. For most processors,
18188 ADD is faster than LEA. For the processors like ATOM, if the
18189 destination register of LEA holds an actual address which will be
18190 used soon, LEA is better and otherwise ADD is better. */
18193 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
18195 unsigned int regno0
= true_regnum (operands
[0]);
18196 unsigned int regno1
= true_regnum (operands
[1]);
18197 unsigned int regno2
= true_regnum (operands
[2]);
18199 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18200 if (regno0
!= regno1
&& regno0
!= regno2
)
18203 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18206 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
18209 /* Return true if destination reg of SET_BODY is shift count of
18213 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
18219 /* Retrieve destination of SET_BODY. */
18220 switch (GET_CODE (set_body
))
18223 set_dest
= SET_DEST (set_body
);
18224 if (!set_dest
|| !REG_P (set_dest
))
18228 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
18229 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
18237 /* Retrieve shift count of USE_BODY. */
18238 switch (GET_CODE (use_body
))
18241 shift_rtx
= XEXP (use_body
, 1);
18244 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
18245 if (ix86_dep_by_shift_count_body (set_body
,
18246 XVECEXP (use_body
, 0, i
)))
18254 && (GET_CODE (shift_rtx
) == ASHIFT
18255 || GET_CODE (shift_rtx
) == LSHIFTRT
18256 || GET_CODE (shift_rtx
) == ASHIFTRT
18257 || GET_CODE (shift_rtx
) == ROTATE
18258 || GET_CODE (shift_rtx
) == ROTATERT
))
18260 rtx shift_count
= XEXP (shift_rtx
, 1);
18262 /* Return true if shift count is dest of SET_BODY. */
18263 if (REG_P (shift_count
))
18265 /* Add check since it can be invoked before register
18266 allocation in pre-reload schedule. */
18267 if (reload_completed
18268 && true_regnum (set_dest
) == true_regnum (shift_count
))
18270 else if (REGNO(set_dest
) == REGNO(shift_count
))
18278 /* Return true if destination reg of SET_INSN is shift count of
18282 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
18284 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
18285 PATTERN (use_insn
));
18288 /* Return TRUE or FALSE depending on whether the unary operator meets the
18289 appropriate constraints. */
18292 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
18293 enum machine_mode mode ATTRIBUTE_UNUSED
,
18296 /* If one of operands is memory, source and destination must match. */
18297 if ((MEM_P (operands
[0])
18298 || MEM_P (operands
[1]))
18299 && ! rtx_equal_p (operands
[0], operands
[1]))
18304 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18305 are ok, keeping in mind the possible movddup alternative. */
18308 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
18310 if (MEM_P (operands
[0]))
18311 return rtx_equal_p (operands
[0], operands
[1 + high
]);
18312 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
18313 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18317 /* Post-reload splitter for converting an SF or DFmode value in an
18318 SSE register into an unsigned SImode. */
18321 ix86_split_convert_uns_si_sse (rtx operands
[])
18323 enum machine_mode vecmode
;
18324 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18326 large
= operands
[1];
18327 zero_or_two31
= operands
[2];
18328 input
= operands
[3];
18329 two31
= operands
[4];
18330 vecmode
= GET_MODE (large
);
18331 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18333 /* Load up the value into the low element. We must ensure that the other
18334 elements are valid floats -- zero is the easiest such value. */
18337 if (vecmode
== V4SFmode
)
18338 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18340 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18344 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18345 emit_move_insn (value
, CONST0_RTX (vecmode
));
18346 if (vecmode
== V4SFmode
)
18347 emit_insn (gen_sse_movss (value
, value
, input
));
18349 emit_insn (gen_sse2_movsd (value
, value
, input
));
18352 emit_move_insn (large
, two31
);
18353 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18355 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18356 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18358 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18359 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18361 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18362 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18364 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18365 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18367 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18368 if (vecmode
== V4SFmode
)
18369 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18371 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18374 emit_insn (gen_xorv4si3 (value
, value
, large
));
18377 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18378 Expects the 64-bit DImode to be supplied in a pair of integral
18379 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18380 -mfpmath=sse, !optimize_size only. */
18383 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18385 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18386 rtx int_xmm
, fp_xmm
;
18387 rtx biases
, exponents
;
18390 int_xmm
= gen_reg_rtx (V4SImode
);
18391 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18392 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18393 else if (TARGET_SSE_SPLIT_REGS
)
18395 emit_clobber (int_xmm
);
18396 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18400 x
= gen_reg_rtx (V2DImode
);
18401 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18402 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18405 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18406 gen_rtvec (4, GEN_INT (0x43300000UL
),
18407 GEN_INT (0x45300000UL
),
18408 const0_rtx
, const0_rtx
));
18409 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18411 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18412 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18414 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18415 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18416 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18417 (0x1.0p84 + double(fp_value_hi_xmm)).
18418 Note these exponents differ by 32. */
18420 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18422 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18423 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18424 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18425 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18426 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18427 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18428 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18429 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18430 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18432 /* Add the upper and lower DFmode values together. */
18434 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18437 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18438 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18439 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18442 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18445 /* Not used, but eases macroization of patterns. */
18447 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18448 rtx input ATTRIBUTE_UNUSED
)
18450 gcc_unreachable ();
18453 /* Convert an unsigned SImode value into a DFmode. Only currently used
18454 for SSE, but applicable anywhere. */
18457 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18459 REAL_VALUE_TYPE TWO31r
;
18462 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18463 NULL
, 1, OPTAB_DIRECT
);
18465 fp
= gen_reg_rtx (DFmode
);
18466 emit_insn (gen_floatsidf2 (fp
, x
));
18468 real_ldexp (&TWO31r
, &dconst1
, 31);
18469 x
= const_double_from_real_value (TWO31r
, DFmode
);
18471 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18473 emit_move_insn (target
, x
);
18476 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18477 32-bit mode; otherwise we have a direct convert instruction. */
18480 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18482 REAL_VALUE_TYPE TWO32r
;
18483 rtx fp_lo
, fp_hi
, x
;
18485 fp_lo
= gen_reg_rtx (DFmode
);
18486 fp_hi
= gen_reg_rtx (DFmode
);
18488 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18490 real_ldexp (&TWO32r
, &dconst1
, 32);
18491 x
= const_double_from_real_value (TWO32r
, DFmode
);
18492 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18494 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18496 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18499 emit_move_insn (target
, x
);
18502 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18503 For x86_32, -mfpmath=sse, !optimize_size only. */
18505 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18507 REAL_VALUE_TYPE ONE16r
;
18508 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18510 real_ldexp (&ONE16r
, &dconst1
, 16);
18511 x
= const_double_from_real_value (ONE16r
, SFmode
);
18512 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18513 NULL
, 0, OPTAB_DIRECT
);
18514 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18515 NULL
, 0, OPTAB_DIRECT
);
18516 fp_hi
= gen_reg_rtx (SFmode
);
18517 fp_lo
= gen_reg_rtx (SFmode
);
18518 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18519 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18520 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18522 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18524 if (!rtx_equal_p (target
, fp_hi
))
18525 emit_move_insn (target
, fp_hi
);
18528 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18529 a vector of unsigned ints VAL to vector of floats TARGET. */
18532 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18535 REAL_VALUE_TYPE TWO16r
;
18536 enum machine_mode intmode
= GET_MODE (val
);
18537 enum machine_mode fltmode
= GET_MODE (target
);
18538 rtx (*cvt
) (rtx
, rtx
);
18540 if (intmode
== V4SImode
)
18541 cvt
= gen_floatv4siv4sf2
;
18543 cvt
= gen_floatv8siv8sf2
;
18544 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18545 tmp
[0] = force_reg (intmode
, tmp
[0]);
18546 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18548 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18549 NULL_RTX
, 1, OPTAB_DIRECT
);
18550 tmp
[3] = gen_reg_rtx (fltmode
);
18551 emit_insn (cvt (tmp
[3], tmp
[1]));
18552 tmp
[4] = gen_reg_rtx (fltmode
);
18553 emit_insn (cvt (tmp
[4], tmp
[2]));
18554 real_ldexp (&TWO16r
, &dconst1
, 16);
18555 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18556 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18557 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18559 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18561 if (tmp
[7] != target
)
18562 emit_move_insn (target
, tmp
[7]);
18565 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18566 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18567 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18568 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18571 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18573 REAL_VALUE_TYPE TWO31r
;
18574 rtx two31r
, tmp
[4];
18575 enum machine_mode mode
= GET_MODE (val
);
18576 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18577 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18578 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18581 for (i
= 0; i
< 3; i
++)
18582 tmp
[i
] = gen_reg_rtx (mode
);
18583 real_ldexp (&TWO31r
, &dconst1
, 31);
18584 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18585 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18586 two31r
= force_reg (mode
, two31r
);
18589 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18590 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18591 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18592 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18593 default: gcc_unreachable ();
18595 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18596 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18597 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18599 if (intmode
== V4SImode
|| TARGET_AVX2
)
18600 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18601 gen_lowpart (intmode
, tmp
[0]),
18602 GEN_INT (31), NULL_RTX
, 0,
18606 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18607 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18608 *xorp
= expand_simple_binop (intmode
, AND
,
18609 gen_lowpart (intmode
, tmp
[0]),
18610 two31
, NULL_RTX
, 0,
18613 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18617 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18618 then replicate the value for all elements of the vector
18622 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18626 enum machine_mode scalar_mode
;
18643 n_elt
= GET_MODE_NUNITS (mode
);
18644 v
= rtvec_alloc (n_elt
);
18645 scalar_mode
= GET_MODE_INNER (mode
);
18647 RTVEC_ELT (v
, 0) = value
;
18649 for (i
= 1; i
< n_elt
; ++i
)
18650 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18652 return gen_rtx_CONST_VECTOR (mode
, v
);
18655 gcc_unreachable ();
18659 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18660 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18661 for an SSE register. If VECT is true, then replicate the mask for
18662 all elements of the vector register. If INVERT is true, then create
18663 a mask excluding the sign bit. */
18666 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18668 enum machine_mode vec_mode
, imode
;
18669 HOST_WIDE_INT hi
, lo
;
18674 /* Find the sign bit, sign extended to 2*HWI. */
18682 mode
= GET_MODE_INNER (mode
);
18684 lo
= 0x80000000, hi
= lo
< 0;
18692 mode
= GET_MODE_INNER (mode
);
18694 if (HOST_BITS_PER_WIDE_INT
>= 64)
18695 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18697 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18702 vec_mode
= VOIDmode
;
18703 if (HOST_BITS_PER_WIDE_INT
>= 64)
18706 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18713 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18717 lo
= ~lo
, hi
= ~hi
;
18723 mask
= immed_double_const (lo
, hi
, imode
);
18725 vec
= gen_rtvec (2, v
, mask
);
18726 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18727 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18734 gcc_unreachable ();
18738 lo
= ~lo
, hi
= ~hi
;
18740 /* Force this value into the low part of a fp vector constant. */
18741 mask
= immed_double_const (lo
, hi
, imode
);
18742 mask
= gen_lowpart (mode
, mask
);
18744 if (vec_mode
== VOIDmode
)
18745 return force_reg (mode
, mask
);
18747 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18748 return force_reg (vec_mode
, v
);
18751 /* Generate code for floating point ABS or NEG. */
18754 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18757 rtx mask
, set
, dst
, src
;
18758 bool use_sse
= false;
18759 bool vector_mode
= VECTOR_MODE_P (mode
);
18760 enum machine_mode vmode
= mode
;
18764 else if (mode
== TFmode
)
18766 else if (TARGET_SSE_MATH
)
18768 use_sse
= SSE_FLOAT_MODE_P (mode
);
18769 if (mode
== SFmode
)
18771 else if (mode
== DFmode
)
18775 /* NEG and ABS performed with SSE use bitwise mask operations.
18776 Create the appropriate mask now. */
18778 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18785 set
= gen_rtx_fmt_e (code
, mode
, src
);
18786 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18793 use
= gen_rtx_USE (VOIDmode
, mask
);
18795 par
= gen_rtvec (2, set
, use
);
18798 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18799 par
= gen_rtvec (3, set
, use
, clob
);
18801 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18807 /* Expand a copysign operation. Special case operand 0 being a constant. */
18810 ix86_expand_copysign (rtx operands
[])
18812 enum machine_mode mode
, vmode
;
18813 rtx dest
, op0
, op1
, mask
, nmask
;
18815 dest
= operands
[0];
18819 mode
= GET_MODE (dest
);
18821 if (mode
== SFmode
)
18823 else if (mode
== DFmode
)
18828 if (GET_CODE (op0
) == CONST_DOUBLE
)
18830 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18832 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18833 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18835 if (mode
== SFmode
|| mode
== DFmode
)
18837 if (op0
== CONST0_RTX (mode
))
18838 op0
= CONST0_RTX (vmode
);
18841 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18843 op0
= force_reg (vmode
, v
);
18846 else if (op0
!= CONST0_RTX (mode
))
18847 op0
= force_reg (mode
, op0
);
18849 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18851 if (mode
== SFmode
)
18852 copysign_insn
= gen_copysignsf3_const
;
18853 else if (mode
== DFmode
)
18854 copysign_insn
= gen_copysigndf3_const
;
18856 copysign_insn
= gen_copysigntf3_const
;
18858 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18862 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18864 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18865 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18867 if (mode
== SFmode
)
18868 copysign_insn
= gen_copysignsf3_var
;
18869 else if (mode
== DFmode
)
18870 copysign_insn
= gen_copysigndf3_var
;
18872 copysign_insn
= gen_copysigntf3_var
;
18874 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18878 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18879 be a constant, and so has already been expanded into a vector constant. */
18882 ix86_split_copysign_const (rtx operands
[])
18884 enum machine_mode mode
, vmode
;
18885 rtx dest
, op0
, mask
, x
;
18887 dest
= operands
[0];
18889 mask
= operands
[3];
18891 mode
= GET_MODE (dest
);
18892 vmode
= GET_MODE (mask
);
18894 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18895 x
= gen_rtx_AND (vmode
, dest
, mask
);
18896 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18898 if (op0
!= CONST0_RTX (vmode
))
18900 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18901 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18905 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18906 so we have to do two masks. */
18909 ix86_split_copysign_var (rtx operands
[])
18911 enum machine_mode mode
, vmode
;
18912 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18914 dest
= operands
[0];
18915 scratch
= operands
[1];
18918 nmask
= operands
[4];
18919 mask
= operands
[5];
18921 mode
= GET_MODE (dest
);
18922 vmode
= GET_MODE (mask
);
18924 if (rtx_equal_p (op0
, op1
))
18926 /* Shouldn't happen often (it's useless, obviously), but when it does
18927 we'd generate incorrect code if we continue below. */
18928 emit_move_insn (dest
, op0
);
18932 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18934 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18936 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18937 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18940 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18941 x
= gen_rtx_NOT (vmode
, dest
);
18942 x
= gen_rtx_AND (vmode
, x
, op0
);
18943 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18947 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18949 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18951 else /* alternative 2,4 */
18953 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18954 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18955 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18957 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18959 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18961 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18962 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18964 else /* alternative 3,4 */
18966 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18968 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18969 x
= gen_rtx_AND (vmode
, dest
, op0
);
18971 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18974 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18975 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18978 /* Return TRUE or FALSE depending on whether the first SET in INSN
18979 has source and destination with matching CC modes, and that the
18980 CC mode is at least as constrained as REQ_MODE. */
18983 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18986 enum machine_mode set_mode
;
18988 set
= PATTERN (insn
);
18989 if (GET_CODE (set
) == PARALLEL
)
18990 set
= XVECEXP (set
, 0, 0);
18991 gcc_assert (GET_CODE (set
) == SET
);
18992 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18994 set_mode
= GET_MODE (SET_DEST (set
));
18998 if (req_mode
!= CCNOmode
18999 && (req_mode
!= CCmode
19000 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
19004 if (req_mode
== CCGCmode
)
19008 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
19012 if (req_mode
== CCZmode
)
19022 if (set_mode
!= req_mode
)
19027 gcc_unreachable ();
19030 return GET_MODE (SET_SRC (set
)) == set_mode
;
19033 /* Generate insn patterns to do an integer compare of OPERANDS. */
19036 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19038 enum machine_mode cmpmode
;
19041 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
19042 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
19044 /* This is very simple, but making the interface the same as in the
19045 FP case makes the rest of the code easier. */
19046 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
19047 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
19049 /* Return the test that should be put into the flags user, i.e.
19050 the bcc, scc, or cmov instruction. */
19051 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
19054 /* Figure out whether to use ordered or unordered fp comparisons.
19055 Return the appropriate mode to use. */
19058 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
19060 /* ??? In order to make all comparisons reversible, we do all comparisons
19061 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19062 all forms trapping and nontrapping comparisons, we can make inequality
19063 comparisons trapping again, since it results in better code when using
19064 FCOM based compares. */
19065 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
19069 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
19071 enum machine_mode mode
= GET_MODE (op0
);
19073 if (SCALAR_FLOAT_MODE_P (mode
))
19075 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19076 return ix86_fp_compare_mode (code
);
19081 /* Only zero flag is needed. */
19082 case EQ
: /* ZF=0 */
19083 case NE
: /* ZF!=0 */
19085 /* Codes needing carry flag. */
19086 case GEU
: /* CF=0 */
19087 case LTU
: /* CF=1 */
19088 /* Detect overflow checks. They need just the carry flag. */
19089 if (GET_CODE (op0
) == PLUS
19090 && rtx_equal_p (op1
, XEXP (op0
, 0)))
19094 case GTU
: /* CF=0 & ZF=0 */
19095 case LEU
: /* CF=1 | ZF=1 */
19097 /* Codes possibly doable only with sign flag when
19098 comparing against zero. */
19099 case GE
: /* SF=OF or SF=0 */
19100 case LT
: /* SF<>OF or SF=1 */
19101 if (op1
== const0_rtx
)
19104 /* For other cases Carry flag is not required. */
19106 /* Codes doable only with sign flag when comparing
19107 against zero, but we miss jump instruction for it
19108 so we need to use relational tests against overflow
19109 that thus needs to be zero. */
19110 case GT
: /* ZF=0 & SF=OF */
19111 case LE
: /* ZF=1 | SF<>OF */
19112 if (op1
== const0_rtx
)
19116 /* strcmp pattern do (use flags) and combine may ask us for proper
19121 gcc_unreachable ();
19125 /* Return the fixed registers used for condition codes. */
19128 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
19135 /* If two condition code modes are compatible, return a condition code
19136 mode which is compatible with both. Otherwise, return
19139 static enum machine_mode
19140 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
19145 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
19148 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
19149 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
19152 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
19154 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
19160 gcc_unreachable ();
19190 /* These are only compatible with themselves, which we already
19197 /* Return a comparison we can do and that it is equivalent to
19198 swap_condition (code) apart possibly from orderedness.
19199 But, never change orderedness if TARGET_IEEE_FP, returning
19200 UNKNOWN in that case if necessary. */
19202 static enum rtx_code
19203 ix86_fp_swap_condition (enum rtx_code code
)
19207 case GT
: /* GTU - CF=0 & ZF=0 */
19208 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
19209 case GE
: /* GEU - CF=0 */
19210 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
19211 case UNLT
: /* LTU - CF=1 */
19212 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
19213 case UNLE
: /* LEU - CF=1 | ZF=1 */
19214 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
19216 return swap_condition (code
);
19220 /* Return cost of comparison CODE using the best strategy for performance.
19221 All following functions do use number of instructions as a cost metrics.
19222 In future this should be tweaked to compute bytes for optimize_size and
19223 take into account performance of various instructions on various CPUs. */
19226 ix86_fp_comparison_cost (enum rtx_code code
)
19230 /* The cost of code using bit-twiddling on %ah. */
19247 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
19251 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
19254 gcc_unreachable ();
19257 switch (ix86_fp_comparison_strategy (code
))
19259 case IX86_FPCMP_COMI
:
19260 return arith_cost
> 4 ? 3 : 2;
19261 case IX86_FPCMP_SAHF
:
19262 return arith_cost
> 4 ? 4 : 3;
19268 /* Return strategy to use for floating-point. We assume that fcomi is always
19269 preferrable where available, since that is also true when looking at size
19270 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19272 enum ix86_fpcmp_strategy
19273 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
19275 /* Do fcomi/sahf based test when profitable. */
19278 return IX86_FPCMP_COMI
;
19280 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
19281 return IX86_FPCMP_SAHF
;
19283 return IX86_FPCMP_ARITH
;
19286 /* Swap, force into registers, or otherwise massage the two operands
19287 to a fp comparison. The operands are updated in place; the new
19288 comparison code is returned. */
19290 static enum rtx_code
19291 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
19293 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
19294 rtx op0
= *pop0
, op1
= *pop1
;
19295 enum machine_mode op_mode
= GET_MODE (op0
);
19296 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
19298 /* All of the unordered compare instructions only work on registers.
19299 The same is true of the fcomi compare instructions. The XFmode
19300 compare instructions require registers except when comparing
19301 against zero or when converting operand 1 from fixed point to
19305 && (fpcmp_mode
== CCFPUmode
19306 || (op_mode
== XFmode
19307 && ! (standard_80387_constant_p (op0
) == 1
19308 || standard_80387_constant_p (op1
) == 1)
19309 && GET_CODE (op1
) != FLOAT
)
19310 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19312 op0
= force_reg (op_mode
, op0
);
19313 op1
= force_reg (op_mode
, op1
);
19317 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19318 things around if they appear profitable, otherwise force op0
19319 into a register. */
19321 if (standard_80387_constant_p (op0
) == 0
19323 && ! (standard_80387_constant_p (op1
) == 0
19326 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19327 if (new_code
!= UNKNOWN
)
19330 tmp
= op0
, op0
= op1
, op1
= tmp
;
19336 op0
= force_reg (op_mode
, op0
);
19338 if (CONSTANT_P (op1
))
19340 int tmp
= standard_80387_constant_p (op1
);
19342 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19346 op1
= force_reg (op_mode
, op1
);
19349 op1
= force_reg (op_mode
, op1
);
19353 /* Try to rearrange the comparison to make it cheaper. */
19354 if (ix86_fp_comparison_cost (code
)
19355 > ix86_fp_comparison_cost (swap_condition (code
))
19356 && (REG_P (op1
) || can_create_pseudo_p ()))
19359 tmp
= op0
, op0
= op1
, op1
= tmp
;
19360 code
= swap_condition (code
);
19362 op0
= force_reg (op_mode
, op0
);
19370 /* Convert comparison codes we use to represent FP comparison to integer
19371 code that will result in proper branch. Return UNKNOWN if no such code
19375 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19404 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19407 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19409 enum machine_mode fpcmp_mode
, intcmp_mode
;
19412 fpcmp_mode
= ix86_fp_compare_mode (code
);
19413 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19415 /* Do fcomi/sahf based test when profitable. */
19416 switch (ix86_fp_comparison_strategy (code
))
19418 case IX86_FPCMP_COMI
:
19419 intcmp_mode
= fpcmp_mode
;
19420 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19421 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19426 case IX86_FPCMP_SAHF
:
19427 intcmp_mode
= fpcmp_mode
;
19428 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19429 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19433 scratch
= gen_reg_rtx (HImode
);
19434 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19435 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19438 case IX86_FPCMP_ARITH
:
19439 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19440 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19441 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19443 scratch
= gen_reg_rtx (HImode
);
19444 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19446 /* In the unordered case, we have to check C2 for NaN's, which
19447 doesn't happen to work out to anything nice combination-wise.
19448 So do some bit twiddling on the value we've got in AH to come
19449 up with an appropriate set of condition codes. */
19451 intcmp_mode
= CCNOmode
;
19456 if (code
== GT
|| !TARGET_IEEE_FP
)
19458 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19463 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19464 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19465 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19466 intcmp_mode
= CCmode
;
19472 if (code
== LT
&& TARGET_IEEE_FP
)
19474 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19475 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19476 intcmp_mode
= CCmode
;
19481 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19487 if (code
== GE
|| !TARGET_IEEE_FP
)
19489 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19494 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19495 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19501 if (code
== LE
&& TARGET_IEEE_FP
)
19503 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19504 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19505 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19506 intcmp_mode
= CCmode
;
19511 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19517 if (code
== EQ
&& TARGET_IEEE_FP
)
19519 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19520 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19521 intcmp_mode
= CCmode
;
19526 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19532 if (code
== NE
&& TARGET_IEEE_FP
)
19534 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19535 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19541 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19547 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19551 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19556 gcc_unreachable ();
19564 /* Return the test that should be put into the flags user, i.e.
19565 the bcc, scc, or cmov instruction. */
19566 return gen_rtx_fmt_ee (code
, VOIDmode
,
19567 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19572 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19576 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19577 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19579 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19581 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19582 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19585 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19591 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19593 enum machine_mode mode
= GET_MODE (op0
);
19605 tmp
= ix86_expand_compare (code
, op0
, op1
);
19606 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19607 gen_rtx_LABEL_REF (VOIDmode
, label
),
19609 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19616 /* Expand DImode branch into multiple compare+branch. */
19618 rtx lo
[2], hi
[2], label2
;
19619 enum rtx_code code1
, code2
, code3
;
19620 enum machine_mode submode
;
19622 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19624 tmp
= op0
, op0
= op1
, op1
= tmp
;
19625 code
= swap_condition (code
);
19628 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19629 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19631 submode
= mode
== DImode
? SImode
: DImode
;
19633 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19634 avoid two branches. This costs one extra insn, so disable when
19635 optimizing for size. */
19637 if ((code
== EQ
|| code
== NE
)
19638 && (!optimize_insn_for_size_p ()
19639 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19644 if (hi
[1] != const0_rtx
)
19645 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19646 NULL_RTX
, 0, OPTAB_WIDEN
);
19649 if (lo
[1] != const0_rtx
)
19650 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19651 NULL_RTX
, 0, OPTAB_WIDEN
);
19653 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19654 NULL_RTX
, 0, OPTAB_WIDEN
);
19656 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19660 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19661 op1 is a constant and the low word is zero, then we can just
19662 examine the high word. Similarly for low word -1 and
19663 less-or-equal-than or greater-than. */
19665 if (CONST_INT_P (hi
[1]))
19668 case LT
: case LTU
: case GE
: case GEU
:
19669 if (lo
[1] == const0_rtx
)
19671 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19675 case LE
: case LEU
: case GT
: case GTU
:
19676 if (lo
[1] == constm1_rtx
)
19678 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19686 /* Otherwise, we need two or three jumps. */
19688 label2
= gen_label_rtx ();
19691 code2
= swap_condition (code
);
19692 code3
= unsigned_condition (code
);
19696 case LT
: case GT
: case LTU
: case GTU
:
19699 case LE
: code1
= LT
; code2
= GT
; break;
19700 case GE
: code1
= GT
; code2
= LT
; break;
19701 case LEU
: code1
= LTU
; code2
= GTU
; break;
19702 case GEU
: code1
= GTU
; code2
= LTU
; break;
19704 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19705 case NE
: code2
= UNKNOWN
; break;
19708 gcc_unreachable ();
19713 * if (hi(a) < hi(b)) goto true;
19714 * if (hi(a) > hi(b)) goto false;
19715 * if (lo(a) < lo(b)) goto true;
19719 if (code1
!= UNKNOWN
)
19720 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19721 if (code2
!= UNKNOWN
)
19722 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19724 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19726 if (code2
!= UNKNOWN
)
19727 emit_label (label2
);
19732 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19737 /* Split branch based on floating point condition. */
19739 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19740 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19745 if (target2
!= pc_rtx
)
19748 code
= reverse_condition_maybe_unordered (code
);
19753 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19756 /* Remove pushed operand from stack. */
19758 ix86_free_from_memory (GET_MODE (pushed
));
19760 i
= emit_jump_insn (gen_rtx_SET
19762 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19763 condition
, target1
, target2
)));
19764 if (split_branch_probability
>= 0)
19765 add_int_reg_note (i
, REG_BR_PROB
, split_branch_probability
);
19769 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19773 gcc_assert (GET_MODE (dest
) == QImode
);
19775 ret
= ix86_expand_compare (code
, op0
, op1
);
19776 PUT_MODE (ret
, QImode
);
19777 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19780 /* Expand comparison setting or clearing carry flag. Return true when
19781 successful and set pop for the operation. */
19783 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19785 enum machine_mode mode
=
19786 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19788 /* Do not handle double-mode compares that go through special path. */
19789 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19792 if (SCALAR_FLOAT_MODE_P (mode
))
19794 rtx compare_op
, compare_seq
;
19796 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19798 /* Shortcut: following common codes never translate
19799 into carry flag compares. */
19800 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19801 || code
== ORDERED
|| code
== UNORDERED
)
19804 /* These comparisons require zero flag; swap operands so they won't. */
19805 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19806 && !TARGET_IEEE_FP
)
19811 code
= swap_condition (code
);
19814 /* Try to expand the comparison and verify that we end up with
19815 carry flag based comparison. This fails to be true only when
19816 we decide to expand comparison using arithmetic that is not
19817 too common scenario. */
19819 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19820 compare_seq
= get_insns ();
19823 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19824 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19825 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19827 code
= GET_CODE (compare_op
);
19829 if (code
!= LTU
&& code
!= GEU
)
19832 emit_insn (compare_seq
);
19837 if (!INTEGRAL_MODE_P (mode
))
19846 /* Convert a==0 into (unsigned)a<1. */
19849 if (op1
!= const0_rtx
)
19852 code
= (code
== EQ
? LTU
: GEU
);
19855 /* Convert a>b into b<a or a>=b-1. */
19858 if (CONST_INT_P (op1
))
19860 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19861 /* Bail out on overflow. We still can swap operands but that
19862 would force loading of the constant into register. */
19863 if (op1
== const0_rtx
19864 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19866 code
= (code
== GTU
? GEU
: LTU
);
19873 code
= (code
== GTU
? LTU
: GEU
);
19877 /* Convert a>=0 into (unsigned)a<0x80000000. */
19880 if (mode
== DImode
|| op1
!= const0_rtx
)
19882 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19883 code
= (code
== LT
? GEU
: LTU
);
19887 if (mode
== DImode
|| op1
!= constm1_rtx
)
19889 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19890 code
= (code
== LE
? GEU
: LTU
);
19896 /* Swapping operands may cause constant to appear as first operand. */
19897 if (!nonimmediate_operand (op0
, VOIDmode
))
19899 if (!can_create_pseudo_p ())
19901 op0
= force_reg (mode
, op0
);
19903 *pop
= ix86_expand_compare (code
, op0
, op1
);
19904 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19909 ix86_expand_int_movcc (rtx operands
[])
19911 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19912 rtx compare_seq
, compare_op
;
19913 enum machine_mode mode
= GET_MODE (operands
[0]);
19914 bool sign_bit_compare_p
= false;
19915 rtx op0
= XEXP (operands
[1], 0);
19916 rtx op1
= XEXP (operands
[1], 1);
19918 if (GET_MODE (op0
) == TImode
19919 || (GET_MODE (op0
) == DImode
19924 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19925 compare_seq
= get_insns ();
19928 compare_code
= GET_CODE (compare_op
);
19930 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19931 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19932 sign_bit_compare_p
= true;
19934 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19935 HImode insns, we'd be swallowed in word prefix ops. */
19937 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19938 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19939 && CONST_INT_P (operands
[2])
19940 && CONST_INT_P (operands
[3]))
19942 rtx out
= operands
[0];
19943 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19944 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19945 HOST_WIDE_INT diff
;
19948 /* Sign bit compares are better done using shifts than we do by using
19950 if (sign_bit_compare_p
19951 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19953 /* Detect overlap between destination and compare sources. */
19956 if (!sign_bit_compare_p
)
19959 bool fpcmp
= false;
19961 compare_code
= GET_CODE (compare_op
);
19963 flags
= XEXP (compare_op
, 0);
19965 if (GET_MODE (flags
) == CCFPmode
19966 || GET_MODE (flags
) == CCFPUmode
)
19970 = ix86_fp_compare_code_to_integer (compare_code
);
19973 /* To simplify rest of code, restrict to the GEU case. */
19974 if (compare_code
== LTU
)
19976 HOST_WIDE_INT tmp
= ct
;
19979 compare_code
= reverse_condition (compare_code
);
19980 code
= reverse_condition (code
);
19985 PUT_CODE (compare_op
,
19986 reverse_condition_maybe_unordered
19987 (GET_CODE (compare_op
)));
19989 PUT_CODE (compare_op
,
19990 reverse_condition (GET_CODE (compare_op
)));
19994 if (reg_overlap_mentioned_p (out
, op0
)
19995 || reg_overlap_mentioned_p (out
, op1
))
19996 tmp
= gen_reg_rtx (mode
);
19998 if (mode
== DImode
)
19999 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
20001 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
20002 flags
, compare_op
));
20006 if (code
== GT
|| code
== GE
)
20007 code
= reverse_condition (code
);
20010 HOST_WIDE_INT tmp
= ct
;
20015 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
20028 tmp
= expand_simple_binop (mode
, PLUS
,
20030 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20041 tmp
= expand_simple_binop (mode
, IOR
,
20043 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20045 else if (diff
== -1 && ct
)
20055 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20057 tmp
= expand_simple_binop (mode
, PLUS
,
20058 copy_rtx (tmp
), GEN_INT (cf
),
20059 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20067 * andl cf - ct, dest
20077 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20080 tmp
= expand_simple_binop (mode
, AND
,
20082 gen_int_mode (cf
- ct
, mode
),
20083 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20085 tmp
= expand_simple_binop (mode
, PLUS
,
20086 copy_rtx (tmp
), GEN_INT (ct
),
20087 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20090 if (!rtx_equal_p (tmp
, out
))
20091 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
20098 enum machine_mode cmp_mode
= GET_MODE (op0
);
20101 tmp
= ct
, ct
= cf
, cf
= tmp
;
20104 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20106 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20108 /* We may be reversing unordered compare to normal compare, that
20109 is not valid in general (we may convert non-trapping condition
20110 to trapping one), however on i386 we currently emit all
20111 comparisons unordered. */
20112 compare_code
= reverse_condition_maybe_unordered (compare_code
);
20113 code
= reverse_condition_maybe_unordered (code
);
20117 compare_code
= reverse_condition (compare_code
);
20118 code
= reverse_condition (code
);
20122 compare_code
= UNKNOWN
;
20123 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
20124 && CONST_INT_P (op1
))
20126 if (op1
== const0_rtx
20127 && (code
== LT
|| code
== GE
))
20128 compare_code
= code
;
20129 else if (op1
== constm1_rtx
)
20133 else if (code
== GT
)
20138 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20139 if (compare_code
!= UNKNOWN
20140 && GET_MODE (op0
) == GET_MODE (out
)
20141 && (cf
== -1 || ct
== -1))
20143 /* If lea code below could be used, only optimize
20144 if it results in a 2 insn sequence. */
20146 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20147 || diff
== 3 || diff
== 5 || diff
== 9)
20148 || (compare_code
== LT
&& ct
== -1)
20149 || (compare_code
== GE
&& cf
== -1))
20152 * notl op1 (if necessary)
20160 code
= reverse_condition (code
);
20163 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20165 out
= expand_simple_binop (mode
, IOR
,
20167 out
, 1, OPTAB_DIRECT
);
20168 if (out
!= operands
[0])
20169 emit_move_insn (operands
[0], out
);
20176 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20177 || diff
== 3 || diff
== 5 || diff
== 9)
20178 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
20180 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
20186 * lea cf(dest*(ct-cf)),dest
20190 * This also catches the degenerate setcc-only case.
20196 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20199 /* On x86_64 the lea instruction operates on Pmode, so we need
20200 to get arithmetics done in proper mode to match. */
20202 tmp
= copy_rtx (out
);
20206 out1
= copy_rtx (out
);
20207 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
20211 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
20217 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
20220 if (!rtx_equal_p (tmp
, out
))
20223 out
= force_operand (tmp
, copy_rtx (out
));
20225 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
20227 if (!rtx_equal_p (out
, operands
[0]))
20228 emit_move_insn (operands
[0], copy_rtx (out
));
20234 * General case: Jumpful:
20235 * xorl dest,dest cmpl op1, op2
20236 * cmpl op1, op2 movl ct, dest
20237 * setcc dest jcc 1f
20238 * decl dest movl cf, dest
20239 * andl (cf-ct),dest 1:
20242 * Size 20. Size 14.
20244 * This is reasonably steep, but branch mispredict costs are
20245 * high on modern cpus, so consider failing only if optimizing
20249 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20250 && BRANCH_COST (optimize_insn_for_speed_p (),
20255 enum machine_mode cmp_mode
= GET_MODE (op0
);
20260 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20262 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20264 /* We may be reversing unordered compare to normal compare,
20265 that is not valid in general (we may convert non-trapping
20266 condition to trapping one), however on i386 we currently
20267 emit all comparisons unordered. */
20268 code
= reverse_condition_maybe_unordered (code
);
20272 code
= reverse_condition (code
);
20273 if (compare_code
!= UNKNOWN
)
20274 compare_code
= reverse_condition (compare_code
);
20278 if (compare_code
!= UNKNOWN
)
20280 /* notl op1 (if needed)
20285 For x < 0 (resp. x <= -1) there will be no notl,
20286 so if possible swap the constants to get rid of the
20288 True/false will be -1/0 while code below (store flag
20289 followed by decrement) is 0/-1, so the constants need
20290 to be exchanged once more. */
20292 if (compare_code
== GE
|| !cf
)
20294 code
= reverse_condition (code
);
20299 HOST_WIDE_INT tmp
= cf
;
20304 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20308 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20310 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20312 copy_rtx (out
), 1, OPTAB_DIRECT
);
20315 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20316 gen_int_mode (cf
- ct
, mode
),
20317 copy_rtx (out
), 1, OPTAB_DIRECT
);
20319 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20320 copy_rtx (out
), 1, OPTAB_DIRECT
);
20321 if (!rtx_equal_p (out
, operands
[0]))
20322 emit_move_insn (operands
[0], copy_rtx (out
));
20328 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20330 /* Try a few things more with specific constants and a variable. */
20333 rtx var
, orig_out
, out
, tmp
;
20335 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20338 /* If one of the two operands is an interesting constant, load a
20339 constant with the above and mask it in with a logical operation. */
20341 if (CONST_INT_P (operands
[2]))
20344 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20345 operands
[3] = constm1_rtx
, op
= and_optab
;
20346 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20347 operands
[3] = const0_rtx
, op
= ior_optab
;
20351 else if (CONST_INT_P (operands
[3]))
20354 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20355 operands
[2] = constm1_rtx
, op
= and_optab
;
20356 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20357 operands
[2] = const0_rtx
, op
= ior_optab
;
20364 orig_out
= operands
[0];
20365 tmp
= gen_reg_rtx (mode
);
20368 /* Recurse to get the constant loaded. */
20369 if (ix86_expand_int_movcc (operands
) == 0)
20372 /* Mask in the interesting variable. */
20373 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20375 if (!rtx_equal_p (out
, orig_out
))
20376 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20382 * For comparison with above,
20392 if (! nonimmediate_operand (operands
[2], mode
))
20393 operands
[2] = force_reg (mode
, operands
[2]);
20394 if (! nonimmediate_operand (operands
[3], mode
))
20395 operands
[3] = force_reg (mode
, operands
[3]);
20397 if (! register_operand (operands
[2], VOIDmode
)
20399 || ! register_operand (operands
[3], VOIDmode
)))
20400 operands
[2] = force_reg (mode
, operands
[2]);
20403 && ! register_operand (operands
[3], VOIDmode
))
20404 operands
[3] = force_reg (mode
, operands
[3]);
20406 emit_insn (compare_seq
);
20407 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20408 gen_rtx_IF_THEN_ELSE (mode
,
20409 compare_op
, operands
[2],
20414 /* Swap, force into registers, or otherwise massage the two operands
20415 to an sse comparison with a mask result. Thus we differ a bit from
20416 ix86_prepare_fp_compare_args which expects to produce a flags result.
20418 The DEST operand exists to help determine whether to commute commutative
20419 operators. The POP0/POP1 operands are updated in place. The new
20420 comparison code is returned, or UNKNOWN if not implementable. */
20422 static enum rtx_code
20423 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20424 rtx
*pop0
, rtx
*pop1
)
20432 /* AVX supports all the needed comparisons. */
20435 /* We have no LTGT as an operator. We could implement it with
20436 NE & ORDERED, but this requires an extra temporary. It's
20437 not clear that it's worth it. */
20444 /* These are supported directly. */
20451 /* AVX has 3 operand comparisons, no need to swap anything. */
20454 /* For commutative operators, try to canonicalize the destination
20455 operand to be first in the comparison - this helps reload to
20456 avoid extra moves. */
20457 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20465 /* These are not supported directly before AVX, and furthermore
20466 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20467 comparison operands to transform into something that is
20472 code
= swap_condition (code
);
20476 gcc_unreachable ();
20482 /* Detect conditional moves that exactly match min/max operational
20483 semantics. Note that this is IEEE safe, as long as we don't
20484 interchange the operands.
20486 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20487 and TRUE if the operation is successful and instructions are emitted. */
20490 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20491 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20493 enum machine_mode mode
;
20499 else if (code
== UNGE
)
20502 if_true
= if_false
;
20508 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20510 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20515 mode
= GET_MODE (dest
);
20517 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20518 but MODE may be a vector mode and thus not appropriate. */
20519 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20521 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20524 if_true
= force_reg (mode
, if_true
);
20525 v
= gen_rtvec (2, if_true
, if_false
);
20526 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20530 code
= is_min
? SMIN
: SMAX
;
20531 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20534 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20538 /* Expand an sse vector comparison. Return the register with the result. */
20541 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20542 rtx op_true
, rtx op_false
)
20544 enum machine_mode mode
= GET_MODE (dest
);
20545 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20548 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20549 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20550 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20553 || reg_overlap_mentioned_p (dest
, op_true
)
20554 || reg_overlap_mentioned_p (dest
, op_false
))
20555 dest
= gen_reg_rtx (mode
);
20557 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20558 if (cmp_mode
!= mode
)
20560 x
= force_reg (cmp_mode
, x
);
20561 convert_move (dest
, x
, false);
20564 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20569 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20570 operations. This is used for both scalar and vector conditional moves. */
20573 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20575 enum machine_mode mode
= GET_MODE (dest
);
20578 if (vector_all_ones_operand (op_true
, mode
)
20579 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20581 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20583 else if (op_false
== CONST0_RTX (mode
))
20585 op_true
= force_reg (mode
, op_true
);
20586 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20587 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20589 else if (op_true
== CONST0_RTX (mode
))
20591 op_false
= force_reg (mode
, op_false
);
20592 x
= gen_rtx_NOT (mode
, cmp
);
20593 x
= gen_rtx_AND (mode
, x
, op_false
);
20594 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20596 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20598 op_false
= force_reg (mode
, op_false
);
20599 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20600 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20602 else if (TARGET_XOP
)
20604 op_true
= force_reg (mode
, op_true
);
20606 if (!nonimmediate_operand (op_false
, mode
))
20607 op_false
= force_reg (mode
, op_false
);
20609 emit_insn (gen_rtx_SET (mode
, dest
,
20610 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20616 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20619 if (!nonimmediate_operand (op_true
, mode
))
20620 op_true
= force_reg (mode
, op_true
);
20622 op_false
= force_reg (mode
, op_false
);
20628 gen
= gen_sse4_1_blendvps
;
20632 gen
= gen_sse4_1_blendvpd
;
20640 gen
= gen_sse4_1_pblendvb
;
20641 if (mode
!= V16QImode
)
20642 d
= gen_reg_rtx (V16QImode
);
20643 op_false
= gen_lowpart (V16QImode
, op_false
);
20644 op_true
= gen_lowpart (V16QImode
, op_true
);
20645 cmp
= gen_lowpart (V16QImode
, cmp
);
20650 gen
= gen_avx_blendvps256
;
20654 gen
= gen_avx_blendvpd256
;
20662 gen
= gen_avx2_pblendvb
;
20663 if (mode
!= V32QImode
)
20664 d
= gen_reg_rtx (V32QImode
);
20665 op_false
= gen_lowpart (V32QImode
, op_false
);
20666 op_true
= gen_lowpart (V32QImode
, op_true
);
20667 cmp
= gen_lowpart (V32QImode
, cmp
);
20676 emit_insn (gen (d
, op_false
, op_true
, cmp
));
20678 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
20682 op_true
= force_reg (mode
, op_true
);
20684 t2
= gen_reg_rtx (mode
);
20686 t3
= gen_reg_rtx (mode
);
20690 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20691 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20693 x
= gen_rtx_NOT (mode
, cmp
);
20694 x
= gen_rtx_AND (mode
, x
, op_false
);
20695 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20697 x
= gen_rtx_IOR (mode
, t3
, t2
);
20698 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20703 /* Expand a floating-point conditional move. Return true if successful. */
20706 ix86_expand_fp_movcc (rtx operands
[])
20708 enum machine_mode mode
= GET_MODE (operands
[0]);
20709 enum rtx_code code
= GET_CODE (operands
[1]);
20710 rtx tmp
, compare_op
;
20711 rtx op0
= XEXP (operands
[1], 0);
20712 rtx op1
= XEXP (operands
[1], 1);
20714 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20716 enum machine_mode cmode
;
20718 /* Since we've no cmove for sse registers, don't force bad register
20719 allocation just to gain access to it. Deny movcc when the
20720 comparison mode doesn't match the move mode. */
20721 cmode
= GET_MODE (op0
);
20722 if (cmode
== VOIDmode
)
20723 cmode
= GET_MODE (op1
);
20727 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20728 if (code
== UNKNOWN
)
20731 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20732 operands
[2], operands
[3]))
20735 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20736 operands
[2], operands
[3]);
20737 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20741 if (GET_MODE (op0
) == TImode
20742 || (GET_MODE (op0
) == DImode
20746 /* The floating point conditional move instructions don't directly
20747 support conditions resulting from a signed integer comparison. */
20749 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20750 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20752 tmp
= gen_reg_rtx (QImode
);
20753 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20755 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20758 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20759 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20760 operands
[2], operands
[3])));
20765 /* Expand a floating-point vector conditional move; a vcond operation
20766 rather than a movcc operation. */
20769 ix86_expand_fp_vcond (rtx operands
[])
20771 enum rtx_code code
= GET_CODE (operands
[3]);
20774 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20775 &operands
[4], &operands
[5]);
20776 if (code
== UNKNOWN
)
20779 switch (GET_CODE (operands
[3]))
20782 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20783 operands
[5], operands
[0], operands
[0]);
20784 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20785 operands
[5], operands
[1], operands
[2]);
20789 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20790 operands
[5], operands
[0], operands
[0]);
20791 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20792 operands
[5], operands
[1], operands
[2]);
20796 gcc_unreachable ();
20798 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20800 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20804 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20805 operands
[5], operands
[1], operands
[2]))
20808 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20809 operands
[1], operands
[2]);
20810 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20814 /* Expand a signed/unsigned integral vector conditional move. */
20817 ix86_expand_int_vcond (rtx operands
[])
20819 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20820 enum machine_mode mode
= GET_MODE (operands
[4]);
20821 enum rtx_code code
= GET_CODE (operands
[3]);
20822 bool negate
= false;
20825 cop0
= operands
[4];
20826 cop1
= operands
[5];
20828 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20829 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20830 if ((code
== LT
|| code
== GE
)
20831 && data_mode
== mode
20832 && cop1
== CONST0_RTX (mode
)
20833 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20834 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20835 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20836 && (GET_MODE_SIZE (data_mode
) == 16
20837 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20839 rtx negop
= operands
[2 - (code
== LT
)];
20840 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20841 if (negop
== CONST1_RTX (data_mode
))
20843 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20844 operands
[0], 1, OPTAB_DIRECT
);
20845 if (res
!= operands
[0])
20846 emit_move_insn (operands
[0], res
);
20849 else if (GET_MODE_INNER (data_mode
) != DImode
20850 && vector_all_ones_operand (negop
, data_mode
))
20852 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20853 operands
[0], 0, OPTAB_DIRECT
);
20854 if (res
!= operands
[0])
20855 emit_move_insn (operands
[0], res
);
20860 if (!nonimmediate_operand (cop1
, mode
))
20861 cop1
= force_reg (mode
, cop1
);
20862 if (!general_operand (operands
[1], data_mode
))
20863 operands
[1] = force_reg (data_mode
, operands
[1]);
20864 if (!general_operand (operands
[2], data_mode
))
20865 operands
[2] = force_reg (data_mode
, operands
[2]);
20867 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20869 && (mode
== V16QImode
|| mode
== V8HImode
20870 || mode
== V4SImode
|| mode
== V2DImode
))
20874 /* Canonicalize the comparison to EQ, GT, GTU. */
20885 code
= reverse_condition (code
);
20891 code
= reverse_condition (code
);
20897 code
= swap_condition (code
);
20898 x
= cop0
, cop0
= cop1
, cop1
= x
;
20902 gcc_unreachable ();
20905 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20906 if (mode
== V2DImode
)
20911 /* SSE4.1 supports EQ. */
20912 if (!TARGET_SSE4_1
)
20918 /* SSE4.2 supports GT/GTU. */
20919 if (!TARGET_SSE4_2
)
20924 gcc_unreachable ();
20928 /* Unsigned parallel compare is not supported by the hardware.
20929 Play some tricks to turn this into a signed comparison
20933 cop0
= force_reg (mode
, cop0
);
20943 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20947 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20948 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20949 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20950 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20952 gcc_unreachable ();
20954 /* Subtract (-(INT MAX) - 1) from both operands to make
20956 mask
= ix86_build_signbit_mask (mode
, true, false);
20957 t1
= gen_reg_rtx (mode
);
20958 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20960 t2
= gen_reg_rtx (mode
);
20961 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20973 /* Perform a parallel unsigned saturating subtraction. */
20974 x
= gen_reg_rtx (mode
);
20975 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20976 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20979 cop1
= CONST0_RTX (mode
);
20985 gcc_unreachable ();
20990 /* Allow the comparison to be done in one mode, but the movcc to
20991 happen in another mode. */
20992 if (data_mode
== mode
)
20994 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20995 operands
[1+negate
], operands
[2-negate
]);
20999 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
21000 x
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), code
, cop0
, cop1
,
21001 operands
[1+negate
], operands
[2-negate
]);
21002 x
= gen_lowpart (data_mode
, x
);
21005 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
21006 operands
[2-negate
]);
21010 /* Expand a variable vector permutation. */
21013 ix86_expand_vec_perm (rtx operands
[])
21015 rtx target
= operands
[0];
21016 rtx op0
= operands
[1];
21017 rtx op1
= operands
[2];
21018 rtx mask
= operands
[3];
21019 rtx t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, vt
, vt2
, vec
[32];
21020 enum machine_mode mode
= GET_MODE (op0
);
21021 enum machine_mode maskmode
= GET_MODE (mask
);
21023 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
21025 /* Number of elements in the vector. */
21026 w
= GET_MODE_NUNITS (mode
);
21027 e
= GET_MODE_UNIT_SIZE (mode
);
21028 gcc_assert (w
<= 32);
21032 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
21034 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21035 an constant shuffle operand. With a tiny bit of effort we can
21036 use VPERMD instead. A re-interpretation stall for V4DFmode is
21037 unfortunate but there's no avoiding it.
21038 Similarly for V16HImode we don't have instructions for variable
21039 shuffling, while for V32QImode we can use after preparing suitable
21040 masks vpshufb; vpshufb; vpermq; vpor. */
21042 if (mode
== V16HImode
)
21044 maskmode
= mode
= V32QImode
;
21050 maskmode
= mode
= V8SImode
;
21054 t1
= gen_reg_rtx (maskmode
);
21056 /* Replicate the low bits of the V4DImode mask into V8SImode:
21058 t1 = { A A B B C C D D }. */
21059 for (i
= 0; i
< w
/ 2; ++i
)
21060 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
21061 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21062 vt
= force_reg (maskmode
, vt
);
21063 mask
= gen_lowpart (maskmode
, mask
);
21064 if (maskmode
== V8SImode
)
21065 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
21067 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
21069 /* Multiply the shuffle indicies by two. */
21070 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
21073 /* Add one to the odd shuffle indicies:
21074 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21075 for (i
= 0; i
< w
/ 2; ++i
)
21077 vec
[i
* 2] = const0_rtx
;
21078 vec
[i
* 2 + 1] = const1_rtx
;
21080 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21081 vt
= validize_mem (force_const_mem (maskmode
, vt
));
21082 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
21085 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21086 operands
[3] = mask
= t1
;
21087 target
= gen_reg_rtx (mode
);
21088 op0
= gen_lowpart (mode
, op0
);
21089 op1
= gen_lowpart (mode
, op1
);
21095 /* The VPERMD and VPERMPS instructions already properly ignore
21096 the high bits of the shuffle elements. No need for us to
21097 perform an AND ourselves. */
21098 if (one_operand_shuffle
)
21100 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
21101 if (target
!= operands
[0])
21102 emit_move_insn (operands
[0],
21103 gen_lowpart (GET_MODE (operands
[0]), target
));
21107 t1
= gen_reg_rtx (V8SImode
);
21108 t2
= gen_reg_rtx (V8SImode
);
21109 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
21110 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
21116 mask
= gen_lowpart (V8SFmode
, mask
);
21117 if (one_operand_shuffle
)
21118 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
21121 t1
= gen_reg_rtx (V8SFmode
);
21122 t2
= gen_reg_rtx (V8SFmode
);
21123 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
21124 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
21130 /* By combining the two 128-bit input vectors into one 256-bit
21131 input vector, we can use VPERMD and VPERMPS for the full
21132 two-operand shuffle. */
21133 t1
= gen_reg_rtx (V8SImode
);
21134 t2
= gen_reg_rtx (V8SImode
);
21135 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
21136 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21137 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
21138 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
21142 t1
= gen_reg_rtx (V8SFmode
);
21143 t2
= gen_reg_rtx (V8SImode
);
21144 mask
= gen_lowpart (V4SImode
, mask
);
21145 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
21146 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21147 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
21148 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
21152 t1
= gen_reg_rtx (V32QImode
);
21153 t2
= gen_reg_rtx (V32QImode
);
21154 t3
= gen_reg_rtx (V32QImode
);
21155 vt2
= GEN_INT (128);
21156 for (i
= 0; i
< 32; i
++)
21158 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21159 vt
= force_reg (V32QImode
, vt
);
21160 for (i
= 0; i
< 32; i
++)
21161 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
21162 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21163 vt2
= force_reg (V32QImode
, vt2
);
21164 /* From mask create two adjusted masks, which contain the same
21165 bits as mask in the low 7 bits of each vector element.
21166 The first mask will have the most significant bit clear
21167 if it requests element from the same 128-bit lane
21168 and MSB set if it requests element from the other 128-bit lane.
21169 The second mask will have the opposite values of the MSB,
21170 and additionally will have its 128-bit lanes swapped.
21171 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21172 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21173 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21174 stands for other 12 bytes. */
21175 /* The bit whether element is from the same lane or the other
21176 lane is bit 4, so shift it up by 3 to the MSB position. */
21177 t5
= gen_reg_rtx (V4DImode
);
21178 emit_insn (gen_ashlv4di3 (t5
, gen_lowpart (V4DImode
, mask
),
21180 /* Clear MSB bits from the mask just in case it had them set. */
21181 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
21182 /* After this t1 will have MSB set for elements from other lane. */
21183 emit_insn (gen_xorv32qi3 (t1
, gen_lowpart (V32QImode
, t5
), vt2
));
21184 /* Clear bits other than MSB. */
21185 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
21186 /* Or in the lower bits from mask into t3. */
21187 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
21188 /* And invert MSB bits in t1, so MSB is set for elements from the same
21190 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
21191 /* Swap 128-bit lanes in t3. */
21192 t6
= gen_reg_rtx (V4DImode
);
21193 emit_insn (gen_avx2_permv4di_1 (t6
, gen_lowpart (V4DImode
, t3
),
21194 const2_rtx
, GEN_INT (3),
21195 const0_rtx
, const1_rtx
));
21196 /* And or in the lower bits from mask into t1. */
21197 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
21198 if (one_operand_shuffle
)
21200 /* Each of these shuffles will put 0s in places where
21201 element from the other 128-bit lane is needed, otherwise
21202 will shuffle in the requested value. */
21203 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
,
21204 gen_lowpart (V32QImode
, t6
)));
21205 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
21206 /* For t3 the 128-bit lanes are swapped again. */
21207 t7
= gen_reg_rtx (V4DImode
);
21208 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t3
),
21209 const2_rtx
, GEN_INT (3),
21210 const0_rtx
, const1_rtx
));
21211 /* And oring both together leads to the result. */
21212 emit_insn (gen_iorv32qi3 (target
, t1
,
21213 gen_lowpart (V32QImode
, t7
)));
21214 if (target
!= operands
[0])
21215 emit_move_insn (operands
[0],
21216 gen_lowpart (GET_MODE (operands
[0]), target
));
21220 t4
= gen_reg_rtx (V32QImode
);
21221 /* Similarly to the above one_operand_shuffle code,
21222 just for repeated twice for each operand. merge_two:
21223 code will merge the two results together. */
21224 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
,
21225 gen_lowpart (V32QImode
, t6
)));
21226 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
,
21227 gen_lowpart (V32QImode
, t6
)));
21228 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
21229 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
21230 t7
= gen_reg_rtx (V4DImode
);
21231 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t4
),
21232 const2_rtx
, GEN_INT (3),
21233 const0_rtx
, const1_rtx
));
21234 t8
= gen_reg_rtx (V4DImode
);
21235 emit_insn (gen_avx2_permv4di_1 (t8
, gen_lowpart (V4DImode
, t3
),
21236 const2_rtx
, GEN_INT (3),
21237 const0_rtx
, const1_rtx
));
21238 emit_insn (gen_iorv32qi3 (t4
, t2
, gen_lowpart (V32QImode
, t7
)));
21239 emit_insn (gen_iorv32qi3 (t3
, t1
, gen_lowpart (V32QImode
, t8
)));
21245 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
21252 /* The XOP VPPERM insn supports three inputs. By ignoring the
21253 one_operand_shuffle special case, we avoid creating another
21254 set of constant vectors in memory. */
21255 one_operand_shuffle
= false;
21257 /* mask = mask & {2*w-1, ...} */
21258 vt
= GEN_INT (2*w
- 1);
21262 /* mask = mask & {w-1, ...} */
21263 vt
= GEN_INT (w
- 1);
21266 for (i
= 0; i
< w
; i
++)
21268 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21269 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21270 NULL_RTX
, 0, OPTAB_DIRECT
);
21272 /* For non-QImode operations, convert the word permutation control
21273 into a byte permutation control. */
21274 if (mode
!= V16QImode
)
21276 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
21277 GEN_INT (exact_log2 (e
)),
21278 NULL_RTX
, 0, OPTAB_DIRECT
);
21280 /* Convert mask to vector of chars. */
21281 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
21283 /* Replicate each of the input bytes into byte positions:
21284 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21285 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21286 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21287 for (i
= 0; i
< 16; ++i
)
21288 vec
[i
] = GEN_INT (i
/e
* e
);
21289 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21290 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21292 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
21294 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
21296 /* Convert it into the byte positions by doing
21297 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21298 for (i
= 0; i
< 16; ++i
)
21299 vec
[i
] = GEN_INT (i
% e
);
21300 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21301 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21302 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
21305 /* The actual shuffle operations all operate on V16QImode. */
21306 op0
= gen_lowpart (V16QImode
, op0
);
21307 op1
= gen_lowpart (V16QImode
, op1
);
21311 if (GET_MODE (target
) != V16QImode
)
21312 target
= gen_reg_rtx (V16QImode
);
21313 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
21314 if (target
!= operands
[0])
21315 emit_move_insn (operands
[0],
21316 gen_lowpart (GET_MODE (operands
[0]), target
));
21318 else if (one_operand_shuffle
)
21320 if (GET_MODE (target
) != V16QImode
)
21321 target
= gen_reg_rtx (V16QImode
);
21322 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
21323 if (target
!= operands
[0])
21324 emit_move_insn (operands
[0],
21325 gen_lowpart (GET_MODE (operands
[0]), target
));
21332 /* Shuffle the two input vectors independently. */
21333 t1
= gen_reg_rtx (V16QImode
);
21334 t2
= gen_reg_rtx (V16QImode
);
21335 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
21336 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21339 /* Then merge them together. The key is whether any given control
21340 element contained a bit set that indicates the second word. */
21341 mask
= operands
[3];
21343 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21345 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21346 more shuffle to convert the V2DI input mask into a V4SI
21347 input mask. At which point the masking that expand_int_vcond
21348 will work as desired. */
21349 rtx t3
= gen_reg_rtx (V4SImode
);
21350 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21351 const0_rtx
, const0_rtx
,
21352 const2_rtx
, const2_rtx
));
21354 maskmode
= V4SImode
;
21358 for (i
= 0; i
< w
; i
++)
21360 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21361 vt
= force_reg (maskmode
, vt
);
21362 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21363 NULL_RTX
, 0, OPTAB_DIRECT
);
21365 if (GET_MODE (target
) != mode
)
21366 target
= gen_reg_rtx (mode
);
21368 xops
[1] = gen_lowpart (mode
, t2
);
21369 xops
[2] = gen_lowpart (mode
, t1
);
21370 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21373 ok
= ix86_expand_int_vcond (xops
);
21375 if (target
!= operands
[0])
21376 emit_move_insn (operands
[0],
21377 gen_lowpart (GET_MODE (operands
[0]), target
));
21381 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21382 true if we should do zero extension, else sign extension. HIGH_P is
21383 true if we want the N/2 high elements, else the low elements. */
21386 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21388 enum machine_mode imode
= GET_MODE (src
);
21393 rtx (*unpack
)(rtx
, rtx
);
21394 rtx (*extract
)(rtx
, rtx
) = NULL
;
21395 enum machine_mode halfmode
= BLKmode
;
21401 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21403 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21404 halfmode
= V16QImode
;
21406 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21410 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21412 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21413 halfmode
= V8HImode
;
21415 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21419 unpack
= gen_avx2_zero_extendv4siv4di2
;
21421 unpack
= gen_avx2_sign_extendv4siv4di2
;
21422 halfmode
= V4SImode
;
21424 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21428 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21430 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21434 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21436 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21440 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21442 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21445 gcc_unreachable ();
21448 if (GET_MODE_SIZE (imode
) == 32)
21450 tmp
= gen_reg_rtx (halfmode
);
21451 emit_insn (extract (tmp
, src
));
21455 /* Shift higher 8 bytes to lower 8 bytes. */
21456 tmp
= gen_reg_rtx (V1TImode
);
21457 emit_insn (gen_sse2_lshrv1ti3 (tmp
, gen_lowpart (V1TImode
, src
),
21459 tmp
= gen_lowpart (imode
, tmp
);
21464 emit_insn (unpack (dest
, tmp
));
21468 rtx (*unpack
)(rtx
, rtx
, rtx
);
21474 unpack
= gen_vec_interleave_highv16qi
;
21476 unpack
= gen_vec_interleave_lowv16qi
;
21480 unpack
= gen_vec_interleave_highv8hi
;
21482 unpack
= gen_vec_interleave_lowv8hi
;
21486 unpack
= gen_vec_interleave_highv4si
;
21488 unpack
= gen_vec_interleave_lowv4si
;
21491 gcc_unreachable ();
21495 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21497 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21498 src
, pc_rtx
, pc_rtx
);
21500 rtx tmp2
= gen_reg_rtx (imode
);
21501 emit_insn (unpack (tmp2
, src
, tmp
));
21502 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), tmp2
));
21506 /* Expand conditional increment or decrement using adb/sbb instructions.
21507 The default case using setcc followed by the conditional move can be
21508 done by generic code. */
21510 ix86_expand_int_addcc (rtx operands
[])
21512 enum rtx_code code
= GET_CODE (operands
[1]);
21514 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21516 rtx val
= const0_rtx
;
21517 bool fpcmp
= false;
21518 enum machine_mode mode
;
21519 rtx op0
= XEXP (operands
[1], 0);
21520 rtx op1
= XEXP (operands
[1], 1);
21522 if (operands
[3] != const1_rtx
21523 && operands
[3] != constm1_rtx
)
21525 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21527 code
= GET_CODE (compare_op
);
21529 flags
= XEXP (compare_op
, 0);
21531 if (GET_MODE (flags
) == CCFPmode
21532 || GET_MODE (flags
) == CCFPUmode
)
21535 code
= ix86_fp_compare_code_to_integer (code
);
21542 PUT_CODE (compare_op
,
21543 reverse_condition_maybe_unordered
21544 (GET_CODE (compare_op
)));
21546 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21549 mode
= GET_MODE (operands
[0]);
21551 /* Construct either adc or sbb insn. */
21552 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21557 insn
= gen_subqi3_carry
;
21560 insn
= gen_subhi3_carry
;
21563 insn
= gen_subsi3_carry
;
21566 insn
= gen_subdi3_carry
;
21569 gcc_unreachable ();
21577 insn
= gen_addqi3_carry
;
21580 insn
= gen_addhi3_carry
;
21583 insn
= gen_addsi3_carry
;
21586 insn
= gen_adddi3_carry
;
21589 gcc_unreachable ();
21592 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21598 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21599 but works for floating pointer parameters and nonoffsetable memories.
21600 For pushes, it returns just stack offsets; the values will be saved
21601 in the right order. Maximally three parts are generated. */
21604 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21609 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21611 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21613 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21614 gcc_assert (size
>= 2 && size
<= 4);
21616 /* Optimize constant pool reference to immediates. This is used by fp
21617 moves, that force all constants to memory to allow combining. */
21618 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21620 rtx tmp
= maybe_get_pool_constant (operand
);
21625 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21627 /* The only non-offsetable memories we handle are pushes. */
21628 int ok
= push_operand (operand
, VOIDmode
);
21632 operand
= copy_rtx (operand
);
21633 PUT_MODE (operand
, word_mode
);
21634 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21638 if (GET_CODE (operand
) == CONST_VECTOR
)
21640 enum machine_mode imode
= int_mode_for_mode (mode
);
21641 /* Caution: if we looked through a constant pool memory above,
21642 the operand may actually have a different mode now. That's
21643 ok, since we want to pun this all the way back to an integer. */
21644 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21645 gcc_assert (operand
!= NULL
);
21651 if (mode
== DImode
)
21652 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21657 if (REG_P (operand
))
21659 gcc_assert (reload_completed
);
21660 for (i
= 0; i
< size
; i
++)
21661 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21663 else if (offsettable_memref_p (operand
))
21665 operand
= adjust_address (operand
, SImode
, 0);
21666 parts
[0] = operand
;
21667 for (i
= 1; i
< size
; i
++)
21668 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21670 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21675 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21679 real_to_target (l
, &r
, mode
);
21680 parts
[3] = gen_int_mode (l
[3], SImode
);
21681 parts
[2] = gen_int_mode (l
[2], SImode
);
21684 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21685 long double may not be 80-bit. */
21686 real_to_target (l
, &r
, mode
);
21687 parts
[2] = gen_int_mode (l
[2], SImode
);
21690 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21693 gcc_unreachable ();
21695 parts
[1] = gen_int_mode (l
[1], SImode
);
21696 parts
[0] = gen_int_mode (l
[0], SImode
);
21699 gcc_unreachable ();
21704 if (mode
== TImode
)
21705 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21706 if (mode
== XFmode
|| mode
== TFmode
)
21708 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21709 if (REG_P (operand
))
21711 gcc_assert (reload_completed
);
21712 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21713 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21715 else if (offsettable_memref_p (operand
))
21717 operand
= adjust_address (operand
, DImode
, 0);
21718 parts
[0] = operand
;
21719 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21721 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21726 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21727 real_to_target (l
, &r
, mode
);
21729 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21730 if (HOST_BITS_PER_WIDE_INT
>= 64)
21733 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21734 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21737 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21739 if (upper_mode
== SImode
)
21740 parts
[1] = gen_int_mode (l
[2], SImode
);
21741 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21744 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21745 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21748 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21751 gcc_unreachable ();
21758 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21759 Return false when normal moves are needed; true when all required
21760 insns have been emitted. Operands 2-4 contain the input values
21761 int the correct order; operands 5-7 contain the output values. */
21764 ix86_split_long_move (rtx operands
[])
21769 int collisions
= 0;
21770 enum machine_mode mode
= GET_MODE (operands
[0]);
21771 bool collisionparts
[4];
21773 /* The DFmode expanders may ask us to move double.
21774 For 64bit target this is single move. By hiding the fact
21775 here we simplify i386.md splitters. */
21776 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21778 /* Optimize constant pool reference to immediates. This is used by
21779 fp moves, that force all constants to memory to allow combining. */
21781 if (MEM_P (operands
[1])
21782 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21783 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21784 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21785 if (push_operand (operands
[0], VOIDmode
))
21787 operands
[0] = copy_rtx (operands
[0]);
21788 PUT_MODE (operands
[0], word_mode
);
21791 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21792 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21793 emit_move_insn (operands
[0], operands
[1]);
21797 /* The only non-offsettable memory we handle is push. */
21798 if (push_operand (operands
[0], VOIDmode
))
21801 gcc_assert (!MEM_P (operands
[0])
21802 || offsettable_memref_p (operands
[0]));
21804 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21805 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21807 /* When emitting push, take care for source operands on the stack. */
21808 if (push
&& MEM_P (operands
[1])
21809 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21811 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21813 /* Compensate for the stack decrement by 4. */
21814 if (!TARGET_64BIT
&& nparts
== 3
21815 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21816 src_base
= plus_constant (Pmode
, src_base
, 4);
21818 /* src_base refers to the stack pointer and is
21819 automatically decreased by emitted push. */
21820 for (i
= 0; i
< nparts
; i
++)
21821 part
[1][i
] = change_address (part
[1][i
],
21822 GET_MODE (part
[1][i
]), src_base
);
21825 /* We need to do copy in the right order in case an address register
21826 of the source overlaps the destination. */
21827 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21831 for (i
= 0; i
< nparts
; i
++)
21834 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21835 if (collisionparts
[i
])
21839 /* Collision in the middle part can be handled by reordering. */
21840 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21842 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21843 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21845 else if (collisions
== 1
21847 && (collisionparts
[1] || collisionparts
[2]))
21849 if (collisionparts
[1])
21851 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21852 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21856 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21857 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21861 /* If there are more collisions, we can't handle it by reordering.
21862 Do an lea to the last part and use only one colliding move. */
21863 else if (collisions
> 1)
21869 base
= part
[0][nparts
- 1];
21871 /* Handle the case when the last part isn't valid for lea.
21872 Happens in 64-bit mode storing the 12-byte XFmode. */
21873 if (GET_MODE (base
) != Pmode
)
21874 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21876 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21877 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21878 for (i
= 1; i
< nparts
; i
++)
21880 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21881 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21892 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21893 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21894 stack_pointer_rtx
, GEN_INT (-4)));
21895 emit_move_insn (part
[0][2], part
[1][2]);
21897 else if (nparts
== 4)
21899 emit_move_insn (part
[0][3], part
[1][3]);
21900 emit_move_insn (part
[0][2], part
[1][2]);
21905 /* In 64bit mode we don't have 32bit push available. In case this is
21906 register, it is OK - we will just use larger counterpart. We also
21907 retype memory - these comes from attempt to avoid REX prefix on
21908 moving of second half of TFmode value. */
21909 if (GET_MODE (part
[1][1]) == SImode
)
21911 switch (GET_CODE (part
[1][1]))
21914 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21918 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21922 gcc_unreachable ();
21925 if (GET_MODE (part
[1][0]) == SImode
)
21926 part
[1][0] = part
[1][1];
21929 emit_move_insn (part
[0][1], part
[1][1]);
21930 emit_move_insn (part
[0][0], part
[1][0]);
21934 /* Choose correct order to not overwrite the source before it is copied. */
21935 if ((REG_P (part
[0][0])
21936 && REG_P (part
[1][1])
21937 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21939 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21941 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21943 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21945 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21947 operands
[2 + i
] = part
[0][j
];
21948 operands
[6 + i
] = part
[1][j
];
21953 for (i
= 0; i
< nparts
; i
++)
21955 operands
[2 + i
] = part
[0][i
];
21956 operands
[6 + i
] = part
[1][i
];
21960 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21961 if (optimize_insn_for_size_p ())
21963 for (j
= 0; j
< nparts
- 1; j
++)
21964 if (CONST_INT_P (operands
[6 + j
])
21965 && operands
[6 + j
] != const0_rtx
21966 && REG_P (operands
[2 + j
]))
21967 for (i
= j
; i
< nparts
- 1; i
++)
21968 if (CONST_INT_P (operands
[7 + i
])
21969 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21970 operands
[7 + i
] = operands
[2 + j
];
21973 for (i
= 0; i
< nparts
; i
++)
21974 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21979 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21980 left shift by a constant, either using a single shift or
21981 a sequence of add instructions. */
21984 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21986 rtx (*insn
)(rtx
, rtx
, rtx
);
21989 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21990 && !optimize_insn_for_size_p ()))
21992 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21993 while (count
-- > 0)
21994 emit_insn (insn (operand
, operand
, operand
));
21998 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21999 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
22004 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22006 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
22007 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
22008 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22010 rtx low
[2], high
[2];
22013 if (CONST_INT_P (operands
[2]))
22015 split_double_mode (mode
, operands
, 2, low
, high
);
22016 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22018 if (count
>= half_width
)
22020 emit_move_insn (high
[0], low
[1]);
22021 emit_move_insn (low
[0], const0_rtx
);
22023 if (count
> half_width
)
22024 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
22028 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22030 if (!rtx_equal_p (operands
[0], operands
[1]))
22031 emit_move_insn (operands
[0], operands
[1]);
22033 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
22034 ix86_expand_ashl_const (low
[0], count
, mode
);
22039 split_double_mode (mode
, operands
, 1, low
, high
);
22041 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
22043 if (operands
[1] == const1_rtx
)
22045 /* Assuming we've chosen a QImode capable registers, then 1 << N
22046 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22047 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
22049 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
22051 ix86_expand_clear (low
[0]);
22052 ix86_expand_clear (high
[0]);
22053 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
22055 d
= gen_lowpart (QImode
, low
[0]);
22056 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22057 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
22058 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22060 d
= gen_lowpart (QImode
, high
[0]);
22061 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22062 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
22063 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22066 /* Otherwise, we can get the same results by manually performing
22067 a bit extract operation on bit 5/6, and then performing the two
22068 shifts. The two methods of getting 0/1 into low/high are exactly
22069 the same size. Avoiding the shift in the bit extract case helps
22070 pentium4 a bit; no one else seems to care much either way. */
22073 enum machine_mode half_mode
;
22074 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
22075 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
22076 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
22077 HOST_WIDE_INT bits
;
22080 if (mode
== DImode
)
22082 half_mode
= SImode
;
22083 gen_lshr3
= gen_lshrsi3
;
22084 gen_and3
= gen_andsi3
;
22085 gen_xor3
= gen_xorsi3
;
22090 half_mode
= DImode
;
22091 gen_lshr3
= gen_lshrdi3
;
22092 gen_and3
= gen_anddi3
;
22093 gen_xor3
= gen_xordi3
;
22097 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
22098 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
22100 x
= gen_lowpart (half_mode
, operands
[2]);
22101 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
22103 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
22104 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
22105 emit_move_insn (low
[0], high
[0]);
22106 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
22109 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22110 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
22114 if (operands
[1] == constm1_rtx
)
22116 /* For -1 << N, we can avoid the shld instruction, because we
22117 know that we're shifting 0...31/63 ones into a -1. */
22118 emit_move_insn (low
[0], constm1_rtx
);
22119 if (optimize_insn_for_size_p ())
22120 emit_move_insn (high
[0], low
[0]);
22122 emit_move_insn (high
[0], constm1_rtx
);
22126 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22128 if (!rtx_equal_p (operands
[0], operands
[1]))
22129 emit_move_insn (operands
[0], operands
[1]);
22131 split_double_mode (mode
, operands
, 1, low
, high
);
22132 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
22135 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22137 if (TARGET_CMOVE
&& scratch
)
22139 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22140 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22142 ix86_expand_clear (scratch
);
22143 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
22147 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22148 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22150 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
22155 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22157 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
22158 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
22159 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22160 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22162 rtx low
[2], high
[2];
22165 if (CONST_INT_P (operands
[2]))
22167 split_double_mode (mode
, operands
, 2, low
, high
);
22168 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22170 if (count
== GET_MODE_BITSIZE (mode
) - 1)
22172 emit_move_insn (high
[0], high
[1]);
22173 emit_insn (gen_ashr3 (high
[0], high
[0],
22174 GEN_INT (half_width
- 1)));
22175 emit_move_insn (low
[0], high
[0]);
22178 else if (count
>= half_width
)
22180 emit_move_insn (low
[0], high
[1]);
22181 emit_move_insn (high
[0], low
[0]);
22182 emit_insn (gen_ashr3 (high
[0], high
[0],
22183 GEN_INT (half_width
- 1)));
22185 if (count
> half_width
)
22186 emit_insn (gen_ashr3 (low
[0], low
[0],
22187 GEN_INT (count
- half_width
)));
22191 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22193 if (!rtx_equal_p (operands
[0], operands
[1]))
22194 emit_move_insn (operands
[0], operands
[1]);
22196 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22197 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
22202 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22204 if (!rtx_equal_p (operands
[0], operands
[1]))
22205 emit_move_insn (operands
[0], operands
[1]);
22207 split_double_mode (mode
, operands
, 1, low
, high
);
22209 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22210 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
22212 if (TARGET_CMOVE
&& scratch
)
22214 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22215 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22217 emit_move_insn (scratch
, high
[0]);
22218 emit_insn (gen_ashr3 (scratch
, scratch
,
22219 GEN_INT (half_width
- 1)));
22220 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22225 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
22226 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
22228 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
22234 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22236 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
22237 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
22238 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22239 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22241 rtx low
[2], high
[2];
22244 if (CONST_INT_P (operands
[2]))
22246 split_double_mode (mode
, operands
, 2, low
, high
);
22247 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22249 if (count
>= half_width
)
22251 emit_move_insn (low
[0], high
[1]);
22252 ix86_expand_clear (high
[0]);
22254 if (count
> half_width
)
22255 emit_insn (gen_lshr3 (low
[0], low
[0],
22256 GEN_INT (count
- half_width
)));
22260 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22262 if (!rtx_equal_p (operands
[0], operands
[1]))
22263 emit_move_insn (operands
[0], operands
[1]);
22265 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22266 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
22271 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22273 if (!rtx_equal_p (operands
[0], operands
[1]))
22274 emit_move_insn (operands
[0], operands
[1]);
22276 split_double_mode (mode
, operands
, 1, low
, high
);
22278 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22279 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
22281 if (TARGET_CMOVE
&& scratch
)
22283 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22284 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22286 ix86_expand_clear (scratch
);
22287 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22292 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22293 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22295 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
22300 /* Predict just emitted jump instruction to be taken with probability PROB. */
22302 predict_jump (int prob
)
22304 rtx insn
= get_last_insn ();
22305 gcc_assert (JUMP_P (insn
));
22306 add_int_reg_note (insn
, REG_BR_PROB
, prob
);
22309 /* Helper function for the string operations below. Dest VARIABLE whether
22310 it is aligned to VALUE bytes. If true, jump to the label. */
22312 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
22314 rtx label
= gen_label_rtx ();
22315 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
22316 if (GET_MODE (variable
) == DImode
)
22317 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
22319 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
22320 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
22323 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
22325 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22329 /* Adjust COUNTER by the VALUE. */
22331 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
22333 rtx (*gen_add
)(rtx
, rtx
, rtx
)
22334 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
22336 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
22339 /* Zero extend possibly SImode EXP to Pmode register. */
22341 ix86_zero_extend_to_Pmode (rtx exp
)
22343 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22346 /* Divide COUNTREG by SCALE. */
22348 scale_counter (rtx countreg
, int scale
)
22354 if (CONST_INT_P (countreg
))
22355 return GEN_INT (INTVAL (countreg
) / scale
);
22356 gcc_assert (REG_P (countreg
));
22358 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22359 GEN_INT (exact_log2 (scale
)),
22360 NULL
, 1, OPTAB_DIRECT
);
22364 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22365 DImode for constant loop counts. */
22367 static enum machine_mode
22368 counter_mode (rtx count_exp
)
22370 if (GET_MODE (count_exp
) != VOIDmode
)
22371 return GET_MODE (count_exp
);
22372 if (!CONST_INT_P (count_exp
))
22374 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22379 /* Copy the address to a Pmode register. This is used for x32 to
22380 truncate DImode TLS address to a SImode register. */
22383 ix86_copy_addr_to_reg (rtx addr
)
22385 if (GET_MODE (addr
) == Pmode
)
22386 return copy_addr_to_reg (addr
);
22389 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
22390 return gen_rtx_SUBREG (SImode
, copy_to_mode_reg (DImode
, addr
), 0);
22394 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22395 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22396 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22397 memory by VALUE (supposed to be in MODE).
22399 The size is rounded down to whole number of chunk size moved at once.
22400 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22404 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22405 rtx destptr
, rtx srcptr
, rtx value
,
22406 rtx count
, enum machine_mode mode
, int unroll
,
22407 int expected_size
, bool issetmem
)
22409 rtx out_label
, top_label
, iter
, tmp
;
22410 enum machine_mode iter_mode
= counter_mode (count
);
22411 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22412 rtx piece_size
= GEN_INT (piece_size_n
);
22413 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22417 top_label
= gen_label_rtx ();
22418 out_label
= gen_label_rtx ();
22419 iter
= gen_reg_rtx (iter_mode
);
22421 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22422 NULL
, 1, OPTAB_DIRECT
);
22423 /* Those two should combine. */
22424 if (piece_size
== const1_rtx
)
22426 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22428 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22430 emit_move_insn (iter
, const0_rtx
);
22432 emit_label (top_label
);
22434 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22436 /* This assert could be relaxed - in this case we'll need to compute
22437 smallest power of two, containing in PIECE_SIZE_N and pass it to
22439 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22440 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22441 destmem
= adjust_address (destmem
, mode
, 0);
22445 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22446 srcmem
= adjust_address (srcmem
, mode
, 0);
22448 /* When unrolling for chips that reorder memory reads and writes,
22449 we can save registers by using single temporary.
22450 Also using 4 temporaries is overkill in 32bit mode. */
22451 if (!TARGET_64BIT
&& 0)
22453 for (i
= 0; i
< unroll
; i
++)
22458 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22460 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22462 emit_move_insn (destmem
, srcmem
);
22468 gcc_assert (unroll
<= 4);
22469 for (i
= 0; i
< unroll
; i
++)
22471 tmpreg
[i
] = gen_reg_rtx (mode
);
22475 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22477 emit_move_insn (tmpreg
[i
], srcmem
);
22479 for (i
= 0; i
< unroll
; i
++)
22484 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22486 emit_move_insn (destmem
, tmpreg
[i
]);
22491 for (i
= 0; i
< unroll
; i
++)
22495 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22496 emit_move_insn (destmem
, value
);
22499 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22500 true, OPTAB_LIB_WIDEN
);
22502 emit_move_insn (iter
, tmp
);
22504 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22506 if (expected_size
!= -1)
22508 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22509 if (expected_size
== 0)
22511 else if (expected_size
> REG_BR_PROB_BASE
)
22512 predict_jump (REG_BR_PROB_BASE
- 1);
22514 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22517 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22518 iter
= ix86_zero_extend_to_Pmode (iter
);
22519 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22520 true, OPTAB_LIB_WIDEN
);
22521 if (tmp
!= destptr
)
22522 emit_move_insn (destptr
, tmp
);
22525 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22526 true, OPTAB_LIB_WIDEN
);
22528 emit_move_insn (srcptr
, tmp
);
22530 emit_label (out_label
);
22533 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22534 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22535 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22536 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22537 ORIG_VALUE is the original value passed to memset to fill the memory with.
22538 Other arguments have same meaning as for previous function. */
22541 expand_set_or_movmem_via_rep (rtx destmem
, rtx srcmem
,
22542 rtx destptr
, rtx srcptr
, rtx value
, rtx orig_value
,
22544 enum machine_mode mode
, bool issetmem
)
22549 HOST_WIDE_INT rounded_count
;
22551 /* If possible, it is shorter to use rep movs.
22552 TODO: Maybe it is better to move this logic to decide_alg. */
22553 if (mode
== QImode
&& CONST_INT_P (count
) && !(INTVAL (count
) & 3)
22554 && (!issetmem
|| orig_value
== const0_rtx
))
22557 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22558 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22560 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
,
22561 GET_MODE_SIZE (mode
)));
22562 if (mode
!= QImode
)
22564 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22565 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22566 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22569 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22570 if ((!issetmem
|| orig_value
== const0_rtx
) && CONST_INT_P (count
))
22572 rounded_count
= (INTVAL (count
)
22573 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22574 destmem
= shallow_copy_rtx (destmem
);
22575 set_mem_size (destmem
, rounded_count
);
22577 else if (MEM_SIZE_KNOWN_P (destmem
))
22578 clear_mem_size (destmem
);
22582 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22583 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22587 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22588 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22589 if (mode
!= QImode
)
22591 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22592 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22593 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22596 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22597 if (CONST_INT_P (count
))
22599 rounded_count
= (INTVAL (count
)
22600 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22601 srcmem
= shallow_copy_rtx (srcmem
);
22602 set_mem_size (srcmem
, rounded_count
);
22606 if (MEM_SIZE_KNOWN_P (srcmem
))
22607 clear_mem_size (srcmem
);
22609 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22614 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22616 SRC is passed by pointer to be updated on return.
22617 Return value is updated DST. */
22619 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22620 HOST_WIDE_INT size_to_move
)
22622 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22623 enum insn_code code
;
22624 enum machine_mode move_mode
;
22627 /* Find the widest mode in which we could perform moves.
22628 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22629 it until move of such size is supported. */
22630 piece_size
= 1 << floor_log2 (size_to_move
);
22631 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22632 code
= optab_handler (mov_optab
, move_mode
);
22633 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22636 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22637 code
= optab_handler (mov_optab
, move_mode
);
22640 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22641 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22642 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22644 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22645 move_mode
= mode_for_vector (word_mode
, nunits
);
22646 code
= optab_handler (mov_optab
, move_mode
);
22647 if (code
== CODE_FOR_nothing
)
22649 move_mode
= word_mode
;
22650 piece_size
= GET_MODE_SIZE (move_mode
);
22651 code
= optab_handler (mov_optab
, move_mode
);
22654 gcc_assert (code
!= CODE_FOR_nothing
);
22656 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22657 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22659 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22660 gcc_assert (size_to_move
% piece_size
== 0);
22661 adjust
= GEN_INT (piece_size
);
22662 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22664 /* We move from memory to memory, so we'll need to do it via
22665 a temporary register. */
22666 tempreg
= gen_reg_rtx (move_mode
);
22667 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22668 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22670 emit_move_insn (destptr
,
22671 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22672 emit_move_insn (srcptr
,
22673 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22675 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22677 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22681 /* Update DST and SRC rtx. */
22686 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22688 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22689 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22692 if (CONST_INT_P (count
))
22694 HOST_WIDE_INT countval
= INTVAL (count
);
22695 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22698 /* For now MAX_SIZE should be a power of 2. This assert could be
22699 relaxed, but it'll require a bit more complicated epilogue
22701 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22702 for (i
= max_size
; i
>= 1; i
>>= 1)
22704 if (epilogue_size
& i
)
22705 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22711 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22712 count
, 1, OPTAB_DIRECT
);
22713 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22714 count
, QImode
, 1, 4, false);
22718 /* When there are stringops, we can cheaply increase dest and src pointers.
22719 Otherwise we save code size by maintaining offset (zero is readily
22720 available from preceding rep operation) and using x86 addressing modes.
22722 if (TARGET_SINGLE_STRINGOP
)
22726 rtx label
= ix86_expand_aligntest (count
, 4, true);
22727 src
= change_address (srcmem
, SImode
, srcptr
);
22728 dest
= change_address (destmem
, SImode
, destptr
);
22729 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22730 emit_label (label
);
22731 LABEL_NUSES (label
) = 1;
22735 rtx label
= ix86_expand_aligntest (count
, 2, true);
22736 src
= change_address (srcmem
, HImode
, srcptr
);
22737 dest
= change_address (destmem
, HImode
, destptr
);
22738 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22739 emit_label (label
);
22740 LABEL_NUSES (label
) = 1;
22744 rtx label
= ix86_expand_aligntest (count
, 1, true);
22745 src
= change_address (srcmem
, QImode
, srcptr
);
22746 dest
= change_address (destmem
, QImode
, destptr
);
22747 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22748 emit_label (label
);
22749 LABEL_NUSES (label
) = 1;
22754 rtx offset
= force_reg (Pmode
, const0_rtx
);
22759 rtx label
= ix86_expand_aligntest (count
, 4, true);
22760 src
= change_address (srcmem
, SImode
, srcptr
);
22761 dest
= change_address (destmem
, SImode
, destptr
);
22762 emit_move_insn (dest
, src
);
22763 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22764 true, OPTAB_LIB_WIDEN
);
22766 emit_move_insn (offset
, tmp
);
22767 emit_label (label
);
22768 LABEL_NUSES (label
) = 1;
22772 rtx label
= ix86_expand_aligntest (count
, 2, true);
22773 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22774 src
= change_address (srcmem
, HImode
, tmp
);
22775 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22776 dest
= change_address (destmem
, HImode
, tmp
);
22777 emit_move_insn (dest
, src
);
22778 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22779 true, OPTAB_LIB_WIDEN
);
22781 emit_move_insn (offset
, tmp
);
22782 emit_label (label
);
22783 LABEL_NUSES (label
) = 1;
22787 rtx label
= ix86_expand_aligntest (count
, 1, true);
22788 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22789 src
= change_address (srcmem
, QImode
, tmp
);
22790 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22791 dest
= change_address (destmem
, QImode
, tmp
);
22792 emit_move_insn (dest
, src
);
22793 emit_label (label
);
22794 LABEL_NUSES (label
) = 1;
22799 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
22800 with value PROMOTED_VAL.
22801 SRC is passed by pointer to be updated on return.
22802 Return value is updated DST. */
22804 emit_memset (rtx destmem
, rtx destptr
, rtx promoted_val
,
22805 HOST_WIDE_INT size_to_move
)
22807 rtx dst
= destmem
, adjust
;
22808 enum insn_code code
;
22809 enum machine_mode move_mode
;
22812 /* Find the widest mode in which we could perform moves.
22813 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22814 it until move of such size is supported. */
22815 move_mode
= GET_MODE (promoted_val
);
22816 if (move_mode
== VOIDmode
)
22817 move_mode
= QImode
;
22818 if (size_to_move
< GET_MODE_SIZE (move_mode
))
22820 move_mode
= mode_for_size (size_to_move
* BITS_PER_UNIT
, MODE_INT
, 0);
22821 promoted_val
= gen_lowpart (move_mode
, promoted_val
);
22823 piece_size
= GET_MODE_SIZE (move_mode
);
22824 code
= optab_handler (mov_optab
, move_mode
);
22825 gcc_assert (code
!= CODE_FOR_nothing
&& promoted_val
!= NULL_RTX
);
22827 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22829 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22830 gcc_assert (size_to_move
% piece_size
== 0);
22831 adjust
= GEN_INT (piece_size
);
22832 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22834 if (piece_size
<= GET_MODE_SIZE (word_mode
))
22836 emit_insn (gen_strset (destptr
, dst
, promoted_val
));
22840 emit_insn (GEN_FCN (code
) (dst
, promoted_val
));
22842 emit_move_insn (destptr
,
22843 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22845 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22849 /* Update DST rtx. */
22852 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22854 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22855 rtx count
, int max_size
)
22858 expand_simple_binop (counter_mode (count
), AND
, count
,
22859 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22860 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22861 gen_lowpart (QImode
, value
), count
, QImode
,
22862 1, max_size
/ 2, true);
22865 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22867 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx vec_value
,
22868 rtx count
, int max_size
)
22872 if (CONST_INT_P (count
))
22874 HOST_WIDE_INT countval
= INTVAL (count
);
22875 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22878 /* For now MAX_SIZE should be a power of 2. This assert could be
22879 relaxed, but it'll require a bit more complicated epilogue
22881 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22882 for (i
= max_size
; i
>= 1; i
>>= 1)
22884 if (epilogue_size
& i
)
22886 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
22887 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
22889 destmem
= emit_memset (destmem
, destptr
, value
, i
);
22896 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22901 rtx label
= ix86_expand_aligntest (count
, 16, true);
22904 dest
= change_address (destmem
, DImode
, destptr
);
22905 emit_insn (gen_strset (destptr
, dest
, value
));
22906 emit_insn (gen_strset (destptr
, dest
, value
));
22910 dest
= change_address (destmem
, SImode
, destptr
);
22911 emit_insn (gen_strset (destptr
, dest
, value
));
22912 emit_insn (gen_strset (destptr
, dest
, value
));
22913 emit_insn (gen_strset (destptr
, dest
, value
));
22914 emit_insn (gen_strset (destptr
, dest
, value
));
22916 emit_label (label
);
22917 LABEL_NUSES (label
) = 1;
22921 rtx label
= ix86_expand_aligntest (count
, 8, true);
22924 dest
= change_address (destmem
, DImode
, destptr
);
22925 emit_insn (gen_strset (destptr
, dest
, value
));
22929 dest
= change_address (destmem
, SImode
, destptr
);
22930 emit_insn (gen_strset (destptr
, dest
, value
));
22931 emit_insn (gen_strset (destptr
, dest
, value
));
22933 emit_label (label
);
22934 LABEL_NUSES (label
) = 1;
22938 rtx label
= ix86_expand_aligntest (count
, 4, true);
22939 dest
= change_address (destmem
, SImode
, destptr
);
22940 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22941 emit_label (label
);
22942 LABEL_NUSES (label
) = 1;
22946 rtx label
= ix86_expand_aligntest (count
, 2, true);
22947 dest
= change_address (destmem
, HImode
, destptr
);
22948 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22949 emit_label (label
);
22950 LABEL_NUSES (label
) = 1;
22954 rtx label
= ix86_expand_aligntest (count
, 1, true);
22955 dest
= change_address (destmem
, QImode
, destptr
);
22956 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22957 emit_label (label
);
22958 LABEL_NUSES (label
) = 1;
22962 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
22963 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
22964 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
22966 Return value is updated DESTMEM. */
22968 expand_set_or_movmem_prologue (rtx destmem
, rtx srcmem
,
22969 rtx destptr
, rtx srcptr
, rtx value
,
22970 rtx vec_value
, rtx count
, int align
,
22971 int desired_alignment
, bool issetmem
)
22974 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22978 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22981 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
22982 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
22984 destmem
= emit_memset (destmem
, destptr
, value
, i
);
22987 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22988 ix86_adjust_counter (count
, i
);
22989 emit_label (label
);
22990 LABEL_NUSES (label
) = 1;
22991 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22997 /* Test if COUNT&SIZE is nonzero and if so, expand movme
22998 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
22999 and jump to DONE_LABEL. */
23001 expand_small_movmem_or_setmem (rtx destmem
, rtx srcmem
,
23002 rtx destptr
, rtx srcptr
,
23003 rtx value
, rtx vec_value
,
23004 rtx count
, int size
,
23005 rtx done_label
, bool issetmem
)
23007 rtx label
= ix86_expand_aligntest (count
, size
, false);
23008 enum machine_mode mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 1);
23012 /* If we do not have vector value to copy, we must reduce size. */
23017 if (GET_MODE (value
) == VOIDmode
&& size
> 8)
23019 else if (GET_MODE_SIZE (mode
) > GET_MODE_SIZE (GET_MODE (value
)))
23020 mode
= GET_MODE (value
);
23023 mode
= GET_MODE (vec_value
), value
= vec_value
;
23027 /* Choose appropriate vector mode. */
23029 mode
= TARGET_AVX
? V32QImode
: TARGET_SSE
? V16QImode
: DImode
;
23030 else if (size
>= 16)
23031 mode
= TARGET_SSE
? V16QImode
: DImode
;
23032 srcmem
= change_address (srcmem
, mode
, srcptr
);
23034 destmem
= change_address (destmem
, mode
, destptr
);
23035 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23036 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23037 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23040 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23043 emit_move_insn (destmem
, srcmem
);
23044 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23046 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23049 destmem
= offset_address (destmem
, count
, 1);
23050 destmem
= offset_address (destmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
23051 GET_MODE_SIZE (mode
));
23053 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23056 srcmem
= offset_address (srcmem
, count
, 1);
23057 srcmem
= offset_address (srcmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
23058 GET_MODE_SIZE (mode
));
23059 emit_move_insn (destmem
, srcmem
);
23061 emit_jump_insn (gen_jump (done_label
));
23064 emit_label (label
);
23065 LABEL_NUSES (label
) = 1;
23068 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23069 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23070 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23071 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23072 DONE_LABEL is a label after the whole copying sequence. The label is created
23073 on demand if *DONE_LABEL is NULL.
23074 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23075 bounds after the initial copies.
23077 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23078 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23079 we will dispatch to a library call for large blocks.
23081 In pseudocode we do:
23085 Assume that SIZE is 4. Bigger sizes are handled analogously
23088 copy 4 bytes from SRCPTR to DESTPTR
23089 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23094 copy 1 byte from SRCPTR to DESTPTR
23097 copy 2 bytes from SRCPTR to DESTPTR
23098 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23103 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23104 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23106 OLD_DESPTR = DESTPTR;
23107 Align DESTPTR up to DESIRED_ALIGN
23108 SRCPTR += DESTPTR - OLD_DESTPTR
23109 COUNT -= DEST_PTR - OLD_DESTPTR
23111 Round COUNT down to multiple of SIZE
23112 << optional caller supplied zero size guard is here >>
23113 << optional caller suppplied dynamic check is here >>
23114 << caller supplied main copy loop is here >>
23119 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem
, rtx srcmem
,
23120 rtx
*destptr
, rtx
*srcptr
,
23121 enum machine_mode mode
,
23122 rtx value
, rtx vec_value
,
23128 unsigned HOST_WIDE_INT
*min_size
,
23129 bool dynamic_check
,
23132 rtx loop_label
= NULL
, label
;
23135 int prolog_size
= 0;
23138 /* Chose proper value to copy. */
23139 if (issetmem
&& VECTOR_MODE_P (mode
))
23140 mode_value
= vec_value
;
23142 mode_value
= value
;
23143 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23145 /* See if block is big or small, handle small blocks. */
23146 if (!CONST_INT_P (*count
) && *min_size
< (unsigned HOST_WIDE_INT
)size
)
23149 loop_label
= gen_label_rtx ();
23152 *done_label
= gen_label_rtx ();
23154 emit_cmp_and_jump_insns (*count
, GEN_INT (size2
), GE
, 0, GET_MODE (*count
),
23158 /* Handle sizes > 3. */
23159 for (;size2
> 2; size2
>>= 1)
23160 expand_small_movmem_or_setmem (destmem
, srcmem
,
23164 size2
, *done_label
, issetmem
);
23165 /* Nothing to copy? Jump to DONE_LABEL if so */
23166 emit_cmp_and_jump_insns (*count
, const0_rtx
, EQ
, 0, GET_MODE (*count
),
23169 /* Do a byte copy. */
23170 destmem
= change_address (destmem
, QImode
, *destptr
);
23172 emit_move_insn (destmem
, gen_lowpart (QImode
, value
));
23175 srcmem
= change_address (srcmem
, QImode
, *srcptr
);
23176 emit_move_insn (destmem
, srcmem
);
23179 /* Handle sizes 2 and 3. */
23180 label
= ix86_expand_aligntest (*count
, 2, false);
23181 destmem
= change_address (destmem
, HImode
, *destptr
);
23182 destmem
= offset_address (destmem
, *count
, 1);
23183 destmem
= offset_address (destmem
, GEN_INT (-2), 2);
23185 emit_move_insn (destmem
, gen_lowpart (HImode
, value
));
23188 srcmem
= change_address (srcmem
, HImode
, *srcptr
);
23189 srcmem
= offset_address (srcmem
, *count
, 1);
23190 srcmem
= offset_address (srcmem
, GEN_INT (-2), 2);
23191 emit_move_insn (destmem
, srcmem
);
23194 emit_label (label
);
23195 LABEL_NUSES (label
) = 1;
23196 emit_jump_insn (gen_jump (*done_label
));
23200 gcc_assert (*min_size
>= (unsigned HOST_WIDE_INT
)size
23201 || UINTVAL (*count
) >= (unsigned HOST_WIDE_INT
)size
);
23203 /* Start memcpy for COUNT >= SIZE. */
23206 emit_label (loop_label
);
23207 LABEL_NUSES (loop_label
) = 1;
23210 /* Copy first desired_align bytes. */
23212 srcmem
= change_address (srcmem
, mode
, *srcptr
);
23213 destmem
= change_address (destmem
, mode
, *destptr
);
23214 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23215 for (n
= 0; prolog_size
< desired_align
- align
; n
++)
23218 emit_move_insn (destmem
, mode_value
);
23221 emit_move_insn (destmem
, srcmem
);
23222 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23224 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23225 prolog_size
+= GET_MODE_SIZE (mode
);
23229 /* Copy last SIZE bytes. */
23230 destmem
= offset_address (destmem
, *count
, 1);
23231 destmem
= offset_address (destmem
,
23232 GEN_INT (-size
- prolog_size
),
23235 emit_move_insn (destmem
, mode_value
);
23238 srcmem
= offset_address (srcmem
, *count
, 1);
23239 srcmem
= offset_address (srcmem
,
23240 GEN_INT (-size
- prolog_size
),
23242 emit_move_insn (destmem
, srcmem
);
23244 for (n
= 1; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23246 destmem
= offset_address (destmem
, modesize
, 1);
23248 emit_move_insn (destmem
, mode_value
);
23251 srcmem
= offset_address (srcmem
, modesize
, 1);
23252 emit_move_insn (destmem
, srcmem
);
23256 /* Align destination. */
23257 if (desired_align
> 1 && desired_align
> align
)
23259 rtx saveddest
= *destptr
;
23261 gcc_assert (desired_align
<= size
);
23262 /* Align destptr up, place it to new register. */
23263 *destptr
= expand_simple_binop (GET_MODE (*destptr
), PLUS
, *destptr
,
23264 GEN_INT (prolog_size
),
23265 NULL_RTX
, 1, OPTAB_DIRECT
);
23266 *destptr
= expand_simple_binop (GET_MODE (*destptr
), AND
, *destptr
,
23267 GEN_INT (-desired_align
),
23268 *destptr
, 1, OPTAB_DIRECT
);
23269 /* See how many bytes we skipped. */
23270 saveddest
= expand_simple_binop (GET_MODE (*destptr
), MINUS
, saveddest
,
23272 saveddest
, 1, OPTAB_DIRECT
);
23273 /* Adjust srcptr and count. */
23275 *srcptr
= expand_simple_binop (GET_MODE (*srcptr
), MINUS
, *srcptr
, saveddest
,
23276 *srcptr
, 1, OPTAB_DIRECT
);
23277 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23278 saveddest
, *count
, 1, OPTAB_DIRECT
);
23279 /* We copied at most size + prolog_size. */
23280 if (*min_size
> (unsigned HOST_WIDE_INT
)(size
+ prolog_size
))
23281 *min_size
= (*min_size
- size
) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23285 /* Our loops always round down the bock size, but for dispatch to library
23286 we need precise value. */
23288 *count
= expand_simple_binop (GET_MODE (*count
), AND
, *count
,
23289 GEN_INT (-size
), *count
, 1, OPTAB_DIRECT
);
23293 gcc_assert (prolog_size
== 0);
23294 /* Decrease count, so we won't end up copying last word twice. */
23295 if (!CONST_INT_P (*count
))
23296 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23297 constm1_rtx
, *count
, 1, OPTAB_DIRECT
);
23299 *count
= GEN_INT ((UINTVAL (*count
) - 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1));
23301 *min_size
= (*min_size
- 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23306 /* This function is like the previous one, except here we know how many bytes
23307 need to be copied. That allows us to update alignment not only of DST, which
23308 is returned, but also of SRC, which is passed as a pointer for that
23311 expand_set_or_movmem_constant_prologue (rtx dst
, rtx
*srcp
, rtx destreg
,
23312 rtx srcreg
, rtx value
, rtx vec_value
,
23313 int desired_align
, int align_bytes
,
23317 rtx orig_dst
= dst
;
23318 rtx orig_src
= NULL
;
23319 int piece_size
= 1;
23320 int copied_bytes
= 0;
23324 gcc_assert (srcp
!= NULL
);
23329 for (piece_size
= 1;
23330 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
23333 if (align_bytes
& piece_size
)
23337 if (vec_value
&& piece_size
> GET_MODE_SIZE (GET_MODE (value
)))
23338 dst
= emit_memset (dst
, destreg
, vec_value
, piece_size
);
23340 dst
= emit_memset (dst
, destreg
, value
, piece_size
);
23343 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
23344 copied_bytes
+= piece_size
;
23347 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
23348 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23349 if (MEM_SIZE_KNOWN_P (orig_dst
))
23350 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
23354 int src_align_bytes
= get_mem_align_offset (src
, desired_align
23356 if (src_align_bytes
>= 0)
23357 src_align_bytes
= desired_align
- src_align_bytes
;
23358 if (src_align_bytes
>= 0)
23360 unsigned int src_align
;
23361 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
23363 if ((src_align_bytes
& (src_align
- 1))
23364 == (align_bytes
& (src_align
- 1)))
23367 if (src_align
> (unsigned int) desired_align
)
23368 src_align
= desired_align
;
23369 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
23370 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
23372 if (MEM_SIZE_KNOWN_P (orig_src
))
23373 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
23380 /* Return true if ALG can be used in current context.
23381 Assume we expand memset if MEMSET is true. */
23383 alg_usable_p (enum stringop_alg alg
, bool memset
)
23385 if (alg
== no_stringop
)
23387 if (alg
== vector_loop
)
23388 return TARGET_SSE
|| TARGET_AVX
;
23389 /* Algorithms using the rep prefix want at least edi and ecx;
23390 additionally, memset wants eax and memcpy wants esi. Don't
23391 consider such algorithms if the user has appropriated those
23392 registers for their own purposes. */
23393 if (alg
== rep_prefix_1_byte
23394 || alg
== rep_prefix_4_byte
23395 || alg
== rep_prefix_8_byte
)
23396 return !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
23397 || (memset
? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
23401 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23402 static enum stringop_alg
23403 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
,
23404 unsigned HOST_WIDE_INT min_size
, unsigned HOST_WIDE_INT max_size
,
23405 bool memset
, bool zero_memset
, int *dynamic_check
, bool *noalign
)
23407 const struct stringop_algs
* algs
;
23408 bool optimize_for_speed
;
23410 const struct processor_costs
*cost
;
23412 bool any_alg_usable_p
= false;
23415 *dynamic_check
= -1;
23417 /* Even if the string operation call is cold, we still might spend a lot
23418 of time processing large blocks. */
23419 if (optimize_function_for_size_p (cfun
)
23420 || (optimize_insn_for_size_p ()
23422 || (expected_size
!= -1 && expected_size
< 256))))
23423 optimize_for_speed
= false;
23425 optimize_for_speed
= true;
23427 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
23429 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
23431 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
23433 /* See maximal size for user defined algorithm. */
23434 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23436 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23437 bool usable
= alg_usable_p (candidate
, memset
);
23438 any_alg_usable_p
|= usable
;
23440 if (candidate
!= libcall
&& candidate
&& usable
)
23441 max
= algs
->size
[i
].max
;
23444 /* If expected size is not known but max size is small enough
23445 so inline version is a win, set expected size into
23447 if (max
> 1 && (unsigned HOST_WIDE_INT
)max
>= max_size
&& expected_size
== -1)
23448 expected_size
= min_size
/ 2 + max_size
/ 2;
23450 /* If user specified the algorithm, honnor it if possible. */
23451 if (ix86_stringop_alg
!= no_stringop
23452 && alg_usable_p (ix86_stringop_alg
, memset
))
23453 return ix86_stringop_alg
;
23454 /* rep; movq or rep; movl is the smallest variant. */
23455 else if (!optimize_for_speed
)
23458 if (!count
|| (count
& 3) || (memset
&& !zero_memset
))
23459 return alg_usable_p (rep_prefix_1_byte
, memset
)
23460 ? rep_prefix_1_byte
: loop_1_byte
;
23462 return alg_usable_p (rep_prefix_4_byte
, memset
)
23463 ? rep_prefix_4_byte
: loop
;
23465 /* Very tiny blocks are best handled via the loop, REP is expensive to
23467 else if (expected_size
!= -1 && expected_size
< 4)
23468 return loop_1_byte
;
23469 else if (expected_size
!= -1)
23471 enum stringop_alg alg
= libcall
;
23472 bool alg_noalign
= false;
23473 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23475 /* We get here if the algorithms that were not libcall-based
23476 were rep-prefix based and we are unable to use rep prefixes
23477 based on global register usage. Break out of the loop and
23478 use the heuristic below. */
23479 if (algs
->size
[i
].max
== 0)
23481 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
23483 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23485 if (candidate
!= libcall
&& alg_usable_p (candidate
, memset
))
23488 alg_noalign
= algs
->size
[i
].noalign
;
23490 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23491 last non-libcall inline algorithm. */
23492 if (TARGET_INLINE_ALL_STRINGOPS
)
23494 /* When the current size is best to be copied by a libcall,
23495 but we are still forced to inline, run the heuristic below
23496 that will pick code for medium sized blocks. */
23497 if (alg
!= libcall
)
23499 *noalign
= alg_noalign
;
23504 else if (alg_usable_p (candidate
, memset
))
23506 *noalign
= algs
->size
[i
].noalign
;
23512 /* When asked to inline the call anyway, try to pick meaningful choice.
23513 We look for maximal size of block that is faster to copy by hand and
23514 take blocks of at most of that size guessing that average size will
23515 be roughly half of the block.
23517 If this turns out to be bad, we might simply specify the preferred
23518 choice in ix86_costs. */
23519 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23520 && (algs
->unknown_size
== libcall
23521 || !alg_usable_p (algs
->unknown_size
, memset
)))
23523 enum stringop_alg alg
;
23525 /* If there aren't any usable algorithms, then recursing on
23526 smaller sizes isn't going to find anything. Just return the
23527 simple byte-at-a-time copy loop. */
23528 if (!any_alg_usable_p
)
23530 /* Pick something reasonable. */
23531 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23532 *dynamic_check
= 128;
23533 return loop_1_byte
;
23537 alg
= decide_alg (count
, max
/ 2, min_size
, max_size
, memset
,
23538 zero_memset
, dynamic_check
, noalign
);
23539 gcc_assert (*dynamic_check
== -1);
23540 gcc_assert (alg
!= libcall
);
23541 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23542 *dynamic_check
= max
;
23545 return (alg_usable_p (algs
->unknown_size
, memset
)
23546 ? algs
->unknown_size
: libcall
);
23549 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23550 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23552 decide_alignment (int align
,
23553 enum stringop_alg alg
,
23555 enum machine_mode move_mode
)
23557 int desired_align
= 0;
23559 gcc_assert (alg
!= no_stringop
);
23561 if (alg
== libcall
)
23563 if (move_mode
== VOIDmode
)
23566 desired_align
= GET_MODE_SIZE (move_mode
);
23567 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23568 copying whole cacheline at once. */
23569 if (TARGET_PENTIUMPRO
23570 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
23575 if (desired_align
< align
)
23576 desired_align
= align
;
23577 if (expected_size
!= -1 && expected_size
< 4)
23578 desired_align
= align
;
23580 return desired_align
;
23584 /* Helper function for memcpy. For QImode value 0xXY produce
23585 0xXYXYXYXY of wide specified by MODE. This is essentially
23586 a * 0x10101010, but we can do slightly better than
23587 synth_mult by unwinding the sequence by hand on CPUs with
23590 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23592 enum machine_mode valmode
= GET_MODE (val
);
23594 int nops
= mode
== DImode
? 3 : 2;
23596 gcc_assert (mode
== SImode
|| mode
== DImode
|| val
== const0_rtx
);
23597 if (val
== const0_rtx
)
23598 return copy_to_mode_reg (mode
, CONST0_RTX (mode
));
23599 if (CONST_INT_P (val
))
23601 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23605 if (mode
== DImode
)
23606 v
|= (v
<< 16) << 16;
23607 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23610 if (valmode
== VOIDmode
)
23612 if (valmode
!= QImode
)
23613 val
= gen_lowpart (QImode
, val
);
23614 if (mode
== QImode
)
23616 if (!TARGET_PARTIAL_REG_STALL
)
23618 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23619 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23620 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23621 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23623 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23624 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23625 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23630 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23632 if (!TARGET_PARTIAL_REG_STALL
)
23633 if (mode
== SImode
)
23634 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23636 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23639 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23640 NULL
, 1, OPTAB_DIRECT
);
23642 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23644 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23645 NULL
, 1, OPTAB_DIRECT
);
23646 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23647 if (mode
== SImode
)
23649 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23650 NULL
, 1, OPTAB_DIRECT
);
23651 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23656 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23657 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23658 alignment from ALIGN to DESIRED_ALIGN. */
23660 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
,
23666 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23667 promoted_val
= promote_duplicated_reg (DImode
, val
);
23668 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23669 promoted_val
= promote_duplicated_reg (SImode
, val
);
23670 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23671 promoted_val
= promote_duplicated_reg (HImode
, val
);
23673 promoted_val
= val
;
23675 return promoted_val
;
23678 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
23679 operations when profitable. The code depends upon architecture, block size
23680 and alignment, but always has one of the following overall structures:
23682 Aligned move sequence:
23684 1) Prologue guard: Conditional that jumps up to epilogues for small
23685 blocks that can be handled by epilogue alone. This is faster
23686 but also needed for correctness, since prologue assume the block
23687 is larger than the desired alignment.
23689 Optional dynamic check for size and libcall for large
23690 blocks is emitted here too, with -minline-stringops-dynamically.
23692 2) Prologue: copy first few bytes in order to get destination
23693 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
23694 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
23695 copied. We emit either a jump tree on power of two sized
23696 blocks, or a byte loop.
23698 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23699 with specified algorithm.
23701 4) Epilogue: code copying tail of the block that is too small to be
23702 handled by main body (or up to size guarded by prologue guard).
23704 Misaligned move sequence
23706 1) missaligned move prologue/epilogue containing:
23707 a) Prologue handling small memory blocks and jumping to done_label
23708 (skipped if blocks are known to be large enough)
23709 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
23710 needed by single possibly misaligned move
23711 (skipped if alignment is not needed)
23712 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
23714 2) Zero size guard dispatching to done_label, if needed
23716 3) dispatch to library call, if needed,
23718 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23719 with specified algorithm. */
23721 ix86_expand_set_or_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx val_exp
,
23722 rtx align_exp
, rtx expected_align_exp
,
23723 rtx expected_size_exp
, rtx min_size_exp
,
23724 rtx max_size_exp
, rtx probable_max_size_exp
,
23731 rtx jump_around_label
= NULL
;
23732 HOST_WIDE_INT align
= 1;
23733 unsigned HOST_WIDE_INT count
= 0;
23734 HOST_WIDE_INT expected_size
= -1;
23735 int size_needed
= 0, epilogue_size_needed
;
23736 int desired_align
= 0, align_bytes
= 0;
23737 enum stringop_alg alg
;
23738 rtx promoted_val
= NULL
;
23739 rtx vec_promoted_val
= NULL
;
23740 bool force_loopy_epilogue
= false;
23742 bool need_zero_guard
= false;
23744 enum machine_mode move_mode
= VOIDmode
;
23745 int unroll_factor
= 1;
23746 /* TODO: Once vlaue ranges are available, fill in proper data. */
23747 unsigned HOST_WIDE_INT min_size
= 0;
23748 unsigned HOST_WIDE_INT max_size
= -1;
23749 unsigned HOST_WIDE_INT probable_max_size
= -1;
23750 bool misaligned_prologue_used
= false;
23752 if (CONST_INT_P (align_exp
))
23753 align
= INTVAL (align_exp
);
23754 /* i386 can do misaligned access on reasonably increased cost. */
23755 if (CONST_INT_P (expected_align_exp
)
23756 && INTVAL (expected_align_exp
) > align
)
23757 align
= INTVAL (expected_align_exp
);
23758 /* ALIGN is the minimum of destination and source alignment, but we care here
23759 just about destination alignment. */
23761 && MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
23762 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
23764 if (CONST_INT_P (count_exp
))
23765 min_size
= max_size
= probable_max_size
= count
= expected_size
23766 = INTVAL (count_exp
);
23770 min_size
= INTVAL (min_size_exp
);
23772 max_size
= INTVAL (max_size_exp
);
23773 if (probable_max_size_exp
)
23774 probable_max_size
= INTVAL (probable_max_size_exp
);
23775 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23776 expected_size
= INTVAL (expected_size_exp
);
23779 /* Make sure we don't need to care about overflow later on. */
23780 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23783 /* Step 0: Decide on preferred algorithm, desired alignment and
23784 size of chunks to be copied by main loop. */
23785 alg
= decide_alg (count
, expected_size
, min_size
, probable_max_size
,
23787 issetmem
&& val_exp
== const0_rtx
,
23788 &dynamic_check
, &noalign
);
23789 if (alg
== libcall
)
23791 gcc_assert (alg
!= no_stringop
);
23793 /* For now vector-version of memset is generated only for memory zeroing, as
23794 creating of promoted vector value is very cheap in this case. */
23795 if (issetmem
&& alg
== vector_loop
&& val_exp
!= const0_rtx
)
23796 alg
= unrolled_loop
;
23799 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
23800 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
23802 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
23805 move_mode
= word_mode
;
23811 gcc_unreachable ();
23813 need_zero_guard
= true;
23814 move_mode
= QImode
;
23817 need_zero_guard
= true;
23819 case unrolled_loop
:
23820 need_zero_guard
= true;
23821 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23824 need_zero_guard
= true;
23826 /* Find the widest supported mode. */
23827 move_mode
= word_mode
;
23828 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23829 != CODE_FOR_nothing
)
23830 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23832 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23833 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23834 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23836 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23837 move_mode
= mode_for_vector (word_mode
, nunits
);
23838 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23839 move_mode
= word_mode
;
23841 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23843 case rep_prefix_8_byte
:
23844 move_mode
= DImode
;
23846 case rep_prefix_4_byte
:
23847 move_mode
= SImode
;
23849 case rep_prefix_1_byte
:
23850 move_mode
= QImode
;
23853 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23854 epilogue_size_needed
= size_needed
;
23856 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23857 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23858 align
= desired_align
;
23860 /* Step 1: Prologue guard. */
23862 /* Alignment code needs count to be in register. */
23863 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23865 if (INTVAL (count_exp
) > desired_align
23866 && INTVAL (count_exp
) > size_needed
)
23869 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23870 if (align_bytes
<= 0)
23873 align_bytes
= desired_align
- align_bytes
;
23875 if (align_bytes
== 0)
23876 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23878 gcc_assert (desired_align
>= 1 && align
>= 1);
23880 /* Misaligned move sequences handle both prologue and epilogue at once.
23881 Default code generation results in a smaller code for large alignments
23882 and also avoids redundant job when sizes are known precisely. */
23883 misaligned_prologue_used
23884 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
23885 && MAX (desired_align
, epilogue_size_needed
) <= 32
23886 && desired_align
<= epilogue_size_needed
23887 && ((desired_align
> align
&& !align_bytes
)
23888 || (!count
&& epilogue_size_needed
> 1)));
23890 /* Do the cheap promotion to allow better CSE across the
23891 main loop and epilogue (ie one load of the big constant in the
23893 For now the misaligned move sequences do not have fast path
23894 without broadcasting. */
23895 if (issetmem
&& ((CONST_INT_P (val_exp
) || misaligned_prologue_used
)))
23897 if (alg
== vector_loop
)
23899 gcc_assert (val_exp
== const0_rtx
);
23900 vec_promoted_val
= promote_duplicated_reg (move_mode
, val_exp
);
23901 promoted_val
= promote_duplicated_reg_to_size (val_exp
,
23902 GET_MODE_SIZE (word_mode
),
23903 desired_align
, align
);
23907 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23908 desired_align
, align
);
23911 /* Misaligned move sequences handles both prologues and epilogues at once.
23912 Default code generation results in smaller code for large alignments and
23913 also avoids redundant job when sizes are known precisely. */
23914 if (misaligned_prologue_used
)
23916 /* Misaligned move prologue handled small blocks by itself. */
23917 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
23918 (dst
, src
, &destreg
, &srcreg
,
23919 move_mode
, promoted_val
, vec_promoted_val
,
23921 &jump_around_label
,
23922 desired_align
< align
23923 ? MAX (desired_align
, epilogue_size_needed
) : epilogue_size_needed
,
23924 desired_align
, align
, &min_size
, dynamic_check
, issetmem
);
23926 src
= change_address (src
, BLKmode
, srcreg
);
23927 dst
= change_address (dst
, BLKmode
, destreg
);
23928 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23929 epilogue_size_needed
= 0;
23930 if (need_zero_guard
&& !min_size
)
23932 /* It is possible that we copied enough so the main loop will not
23934 gcc_assert (size_needed
> 1);
23935 if (jump_around_label
== NULL_RTX
)
23936 jump_around_label
= gen_label_rtx ();
23937 emit_cmp_and_jump_insns (count_exp
,
23938 GEN_INT (size_needed
),
23939 LTU
, 0, counter_mode (count_exp
), 1, jump_around_label
);
23940 if (expected_size
== -1
23941 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23942 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23944 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23947 /* Ensure that alignment prologue won't copy past end of block. */
23948 else if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23950 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23951 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23952 Make sure it is power of 2. */
23953 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23955 /* To improve performance of small blocks, we jump around the VAL
23956 promoting mode. This mean that if the promoted VAL is not constant,
23957 we might not use it in the epilogue and have to use byte
23959 if (issetmem
&& epilogue_size_needed
> 2 && !promoted_val
)
23960 force_loopy_epilogue
= true;
23963 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23965 /* If main algorithm works on QImode, no epilogue is needed.
23966 For small sizes just don't align anything. */
23967 if (size_needed
== 1)
23968 desired_align
= align
;
23973 else if (min_size
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23975 gcc_assert (max_size
>= (unsigned HOST_WIDE_INT
)epilogue_size_needed
);
23976 label
= gen_label_rtx ();
23977 emit_cmp_and_jump_insns (count_exp
,
23978 GEN_INT (epilogue_size_needed
),
23979 LTU
, 0, counter_mode (count_exp
), 1, label
);
23980 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23981 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23983 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23987 /* Emit code to decide on runtime whether library call or inline should be
23989 if (dynamic_check
!= -1)
23991 if (!issetmem
&& CONST_INT_P (count_exp
))
23993 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23995 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23996 count_exp
= const0_rtx
;
24002 rtx hot_label
= gen_label_rtx ();
24003 jump_around_label
= gen_label_rtx ();
24004 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
24005 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
24006 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
24008 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
24010 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
24011 emit_jump (jump_around_label
);
24012 emit_label (hot_label
);
24016 /* Step 2: Alignment prologue. */
24017 /* Do the expensive promotion once we branched off the small blocks. */
24018 if (issetmem
&& !promoted_val
)
24019 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
24020 desired_align
, align
);
24022 if (desired_align
> align
&& !misaligned_prologue_used
)
24024 if (align_bytes
== 0)
24026 /* Except for the first move in prologue, we no longer know
24027 constant offset in aliasing info. It don't seems to worth
24028 the pain to maintain it for the first move, so throw away
24030 dst
= change_address (dst
, BLKmode
, destreg
);
24032 src
= change_address (src
, BLKmode
, srcreg
);
24033 dst
= expand_set_or_movmem_prologue (dst
, src
, destreg
, srcreg
,
24034 promoted_val
, vec_promoted_val
,
24035 count_exp
, align
, desired_align
,
24037 /* At most desired_align - align bytes are copied. */
24038 if (min_size
< (unsigned)(desired_align
- align
))
24041 min_size
-= desired_align
- align
;
24045 /* If we know how many bytes need to be stored before dst is
24046 sufficiently aligned, maintain aliasing info accurately. */
24047 dst
= expand_set_or_movmem_constant_prologue (dst
, &src
, destreg
,
24055 count_exp
= plus_constant (counter_mode (count_exp
),
24056 count_exp
, -align_bytes
);
24057 count
-= align_bytes
;
24058 min_size
-= align_bytes
;
24059 max_size
-= align_bytes
;
24061 if (need_zero_guard
24063 && (count
< (unsigned HOST_WIDE_INT
) size_needed
24064 || (align_bytes
== 0
24065 && count
< ((unsigned HOST_WIDE_INT
) size_needed
24066 + desired_align
- align
))))
24068 /* It is possible that we copied enough so the main loop will not
24070 gcc_assert (size_needed
> 1);
24071 if (label
== NULL_RTX
)
24072 label
= gen_label_rtx ();
24073 emit_cmp_and_jump_insns (count_exp
,
24074 GEN_INT (size_needed
),
24075 LTU
, 0, counter_mode (count_exp
), 1, label
);
24076 if (expected_size
== -1
24077 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
24078 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24080 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24083 if (label
&& size_needed
== 1)
24085 emit_label (label
);
24086 LABEL_NUSES (label
) = 1;
24088 epilogue_size_needed
= 1;
24090 promoted_val
= val_exp
;
24092 else if (label
== NULL_RTX
&& !misaligned_prologue_used
)
24093 epilogue_size_needed
= size_needed
;
24095 /* Step 3: Main loop. */
24102 gcc_unreachable ();
24105 case unrolled_loop
:
24106 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, promoted_val
,
24107 count_exp
, move_mode
, unroll_factor
,
24108 expected_size
, issetmem
);
24111 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
,
24112 vec_promoted_val
, count_exp
, move_mode
,
24113 unroll_factor
, expected_size
, issetmem
);
24115 case rep_prefix_8_byte
:
24116 case rep_prefix_4_byte
:
24117 case rep_prefix_1_byte
:
24118 expand_set_or_movmem_via_rep (dst
, src
, destreg
, srcreg
, promoted_val
,
24119 val_exp
, count_exp
, move_mode
, issetmem
);
24122 /* Adjust properly the offset of src and dest memory for aliasing. */
24123 if (CONST_INT_P (count_exp
))
24126 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
24127 (count
/ size_needed
) * size_needed
);
24128 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
24129 (count
/ size_needed
) * size_needed
);
24134 src
= change_address (src
, BLKmode
, srcreg
);
24135 dst
= change_address (dst
, BLKmode
, destreg
);
24138 /* Step 4: Epilogue to copy the remaining bytes. */
24142 /* When the main loop is done, COUNT_EXP might hold original count,
24143 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24144 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24145 bytes. Compensate if needed. */
24147 if (size_needed
< epilogue_size_needed
)
24150 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
24151 GEN_INT (size_needed
- 1), count_exp
, 1,
24153 if (tmp
!= count_exp
)
24154 emit_move_insn (count_exp
, tmp
);
24156 emit_label (label
);
24157 LABEL_NUSES (label
) = 1;
24160 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
24162 if (force_loopy_epilogue
)
24163 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
24164 epilogue_size_needed
);
24168 expand_setmem_epilogue (dst
, destreg
, promoted_val
,
24169 vec_promoted_val
, count_exp
,
24170 epilogue_size_needed
);
24172 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
24173 epilogue_size_needed
);
24176 if (jump_around_label
)
24177 emit_label (jump_around_label
);
24182 /* Expand the appropriate insns for doing strlen if not just doing
24185 out = result, initialized with the start address
24186 align_rtx = alignment of the address.
24187 scratch = scratch register, initialized with the startaddress when
24188 not aligned, otherwise undefined
24190 This is just the body. It needs the initializations mentioned above and
24191 some address computing at the end. These things are done in i386.md. */
24194 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
24198 rtx align_2_label
= NULL_RTX
;
24199 rtx align_3_label
= NULL_RTX
;
24200 rtx align_4_label
= gen_label_rtx ();
24201 rtx end_0_label
= gen_label_rtx ();
24203 rtx tmpreg
= gen_reg_rtx (SImode
);
24204 rtx scratch
= gen_reg_rtx (SImode
);
24208 if (CONST_INT_P (align_rtx
))
24209 align
= INTVAL (align_rtx
);
24211 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24213 /* Is there a known alignment and is it less than 4? */
24216 rtx scratch1
= gen_reg_rtx (Pmode
);
24217 emit_move_insn (scratch1
, out
);
24218 /* Is there a known alignment and is it not 2? */
24221 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
24222 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
24224 /* Leave just the 3 lower bits. */
24225 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
24226 NULL_RTX
, 0, OPTAB_WIDEN
);
24228 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24229 Pmode
, 1, align_4_label
);
24230 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
24231 Pmode
, 1, align_2_label
);
24232 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
24233 Pmode
, 1, align_3_label
);
24237 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24238 check if is aligned to 4 - byte. */
24240 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
24241 NULL_RTX
, 0, OPTAB_WIDEN
);
24243 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24244 Pmode
, 1, align_4_label
);
24247 mem
= change_address (src
, QImode
, out
);
24249 /* Now compare the bytes. */
24251 /* Compare the first n unaligned byte on a byte per byte basis. */
24252 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
24253 QImode
, 1, end_0_label
);
24255 /* Increment the address. */
24256 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24258 /* Not needed with an alignment of 2 */
24261 emit_label (align_2_label
);
24263 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24266 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24268 emit_label (align_3_label
);
24271 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24274 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24277 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24278 align this loop. It gives only huge programs, but does not help to
24280 emit_label (align_4_label
);
24282 mem
= change_address (src
, SImode
, out
);
24283 emit_move_insn (scratch
, mem
);
24284 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
24286 /* This formula yields a nonzero result iff one of the bytes is zero.
24287 This saves three branches inside loop and many cycles. */
24289 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
24290 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
24291 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
24292 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
24293 gen_int_mode (0x80808080, SImode
)));
24294 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
24299 rtx reg
= gen_reg_rtx (SImode
);
24300 rtx reg2
= gen_reg_rtx (Pmode
);
24301 emit_move_insn (reg
, tmpreg
);
24302 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
24304 /* If zero is not in the first two bytes, move two bytes forward. */
24305 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24306 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24307 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24308 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
24309 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
24312 /* Emit lea manually to avoid clobbering of flags. */
24313 emit_insn (gen_rtx_SET (SImode
, reg2
,
24314 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
24316 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24317 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24318 emit_insn (gen_rtx_SET (VOIDmode
, out
,
24319 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
24325 rtx end_2_label
= gen_label_rtx ();
24326 /* Is zero in the first two bytes? */
24328 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24329 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24330 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
24331 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24332 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
24334 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24335 JUMP_LABEL (tmp
) = end_2_label
;
24337 /* Not in the first two. Move two bytes forward. */
24338 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
24339 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
24341 emit_label (end_2_label
);
24345 /* Avoid branch in fixing the byte. */
24346 tmpreg
= gen_lowpart (QImode
, tmpreg
);
24347 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
24348 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24349 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
24350 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
24352 emit_label (end_0_label
);
24355 /* Expand strlen. */
24358 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
24360 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
24362 /* The generic case of strlen expander is long. Avoid it's
24363 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24365 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24366 && !TARGET_INLINE_ALL_STRINGOPS
24367 && !optimize_insn_for_size_p ()
24368 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
24371 addr
= force_reg (Pmode
, XEXP (src
, 0));
24372 scratch1
= gen_reg_rtx (Pmode
);
24374 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24375 && !optimize_insn_for_size_p ())
24377 /* Well it seems that some optimizer does not combine a call like
24378 foo(strlen(bar), strlen(bar));
24379 when the move and the subtraction is done here. It does calculate
24380 the length just once when these instructions are done inside of
24381 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24382 often used and I use one fewer register for the lifetime of
24383 output_strlen_unroll() this is better. */
24385 emit_move_insn (out
, addr
);
24387 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
24389 /* strlensi_unroll_1 returns the address of the zero at the end of
24390 the string, like memchr(), so compute the length by subtracting
24391 the start address. */
24392 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
24398 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24399 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
24402 scratch2
= gen_reg_rtx (Pmode
);
24403 scratch3
= gen_reg_rtx (Pmode
);
24404 scratch4
= force_reg (Pmode
, constm1_rtx
);
24406 emit_move_insn (scratch3
, addr
);
24407 eoschar
= force_reg (QImode
, eoschar
);
24409 src
= replace_equiv_address_nv (src
, scratch3
);
24411 /* If .md starts supporting :P, this can be done in .md. */
24412 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
24413 scratch4
), UNSPEC_SCAS
);
24414 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
24415 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
24416 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
24421 /* For given symbol (function) construct code to compute address of it's PLT
24422 entry in large x86-64 PIC model. */
24424 construct_plt_address (rtx symbol
)
24428 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
24429 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
24430 gcc_assert (Pmode
== DImode
);
24432 tmp
= gen_reg_rtx (Pmode
);
24433 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
24435 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
24436 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
24441 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
24443 rtx pop
, bool sibcall
)
24445 unsigned int const cregs_size
24446 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
24447 rtx vec
[3 + cregs_size
];
24448 rtx use
= NULL
, call
;
24449 unsigned int vec_len
= 0;
24451 if (pop
== const0_rtx
)
24453 gcc_assert (!TARGET_64BIT
|| !pop
);
24455 if (TARGET_MACHO
&& !TARGET_64BIT
)
24458 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
24459 fnaddr
= machopic_indirect_call_target (fnaddr
);
24464 /* Static functions and indirect calls don't need the pic register. */
24467 || (ix86_cmodel
== CM_LARGE_PIC
24468 && DEFAULT_ABI
!= MS_ABI
))
24469 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24470 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
24471 use_reg (&use
, pic_offset_table_rtx
);
24474 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
24476 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
24477 emit_move_insn (al
, callarg2
);
24478 use_reg (&use
, al
);
24481 if (ix86_cmodel
== CM_LARGE_PIC
24484 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24485 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
24486 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
24488 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
24489 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
24491 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
24492 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
24495 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
24497 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
24498 vec
[vec_len
++] = call
;
24502 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
24503 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
24504 vec
[vec_len
++] = pop
;
24507 if (TARGET_64BIT_MS_ABI
24508 && (!callarg2
|| INTVAL (callarg2
) != -2))
24512 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
24513 UNSPEC_MS_TO_SYSV_CALL
);
24515 for (i
= 0; i
< cregs_size
; i
++)
24517 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
24518 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
24521 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
24526 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
24527 call
= emit_call_insn (call
);
24529 CALL_INSN_FUNCTION_USAGE (call
) = use
;
24534 /* Output the assembly for a call instruction. */
24537 ix86_output_call_insn (rtx insn
, rtx call_op
)
24539 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24540 bool seh_nop_p
= false;
24543 if (SIBLING_CALL_P (insn
))
24546 xasm
= "%!jmp\t%P0";
24547 /* SEH epilogue detection requires the indirect branch case
24548 to include REX.W. */
24549 else if (TARGET_SEH
)
24550 xasm
= "%!rex.W jmp %A0";
24552 xasm
= "%!jmp\t%A0";
24554 output_asm_insn (xasm
, &call_op
);
24558 /* SEH unwinding can require an extra nop to be emitted in several
24559 circumstances. Determine if we have one of those. */
24564 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24566 /* If we get to another real insn, we don't need the nop. */
24570 /* If we get to the epilogue note, prevent a catch region from
24571 being adjacent to the standard epilogue sequence. If non-
24572 call-exceptions, we'll have done this during epilogue emission. */
24573 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24574 && !flag_non_call_exceptions
24575 && !can_throw_internal (insn
))
24582 /* If we didn't find a real insn following the call, prevent the
24583 unwinder from looking into the next function. */
24589 xasm
= "%!call\t%P0";
24591 xasm
= "%!call\t%A0";
24593 output_asm_insn (xasm
, &call_op
);
24601 /* Clear stack slot assignments remembered from previous functions.
24602 This is called from INIT_EXPANDERS once before RTL is emitted for each
24605 static struct machine_function
*
24606 ix86_init_machine_status (void)
24608 struct machine_function
*f
;
24610 f
= ggc_alloc_cleared_machine_function ();
24611 f
->use_fast_prologue_epilogue_nregs
= -1;
24612 f
->call_abi
= ix86_abi
;
24617 /* Return a MEM corresponding to a stack slot with mode MODE.
24618 Allocate a new slot if necessary.
24620 The RTL for a function can have several slots available: N is
24621 which slot to use. */
24624 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24626 struct stack_local_entry
*s
;
24628 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24630 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24631 if (s
->mode
== mode
&& s
->n
== n
)
24632 return validize_mem (copy_rtx (s
->rtl
));
24634 s
= ggc_alloc_stack_local_entry ();
24637 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24639 s
->next
= ix86_stack_locals
;
24640 ix86_stack_locals
= s
;
24641 return validize_mem (s
->rtl
);
24645 ix86_instantiate_decls (void)
24647 struct stack_local_entry
*s
;
24649 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24650 if (s
->rtl
!= NULL_RTX
)
24651 instantiate_decl_rtl (s
->rtl
);
24654 /* Check whether x86 address PARTS is a pc-relative address. */
24657 rip_relative_addr_p (struct ix86_address
*parts
)
24659 rtx base
, index
, disp
;
24661 base
= parts
->base
;
24662 index
= parts
->index
;
24663 disp
= parts
->disp
;
24665 if (disp
&& !base
&& !index
)
24671 if (GET_CODE (disp
) == CONST
)
24672 symbol
= XEXP (disp
, 0);
24673 if (GET_CODE (symbol
) == PLUS
24674 && CONST_INT_P (XEXP (symbol
, 1)))
24675 symbol
= XEXP (symbol
, 0);
24677 if (GET_CODE (symbol
) == LABEL_REF
24678 || (GET_CODE (symbol
) == SYMBOL_REF
24679 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
24680 || (GET_CODE (symbol
) == UNSPEC
24681 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
24682 || XINT (symbol
, 1) == UNSPEC_PCREL
24683 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
24690 /* Calculate the length of the memory address in the instruction encoding.
24691 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24692 or other prefixes. We never generate addr32 prefix for LEA insn. */
24695 memory_address_length (rtx addr
, bool lea
)
24697 struct ix86_address parts
;
24698 rtx base
, index
, disp
;
24702 if (GET_CODE (addr
) == PRE_DEC
24703 || GET_CODE (addr
) == POST_INC
24704 || GET_CODE (addr
) == PRE_MODIFY
24705 || GET_CODE (addr
) == POST_MODIFY
)
24708 ok
= ix86_decompose_address (addr
, &parts
);
24711 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24713 /* If this is not LEA instruction, add the length of addr32 prefix. */
24714 if (TARGET_64BIT
&& !lea
24715 && (SImode_address_operand (addr
, VOIDmode
)
24716 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24717 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24721 index
= parts
.index
;
24724 if (base
&& GET_CODE (base
) == SUBREG
)
24725 base
= SUBREG_REG (base
);
24726 if (index
&& GET_CODE (index
) == SUBREG
)
24727 index
= SUBREG_REG (index
);
24729 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24730 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24733 - esp as the base always wants an index,
24734 - ebp as the base always wants a displacement,
24735 - r12 as the base always wants an index,
24736 - r13 as the base always wants a displacement. */
24738 /* Register Indirect. */
24739 if (base
&& !index
&& !disp
)
24741 /* esp (for its index) and ebp (for its displacement) need
24742 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24744 if (base
== arg_pointer_rtx
24745 || base
== frame_pointer_rtx
24746 || REGNO (base
) == SP_REG
24747 || REGNO (base
) == BP_REG
24748 || REGNO (base
) == R12_REG
24749 || REGNO (base
) == R13_REG
)
24753 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24754 is not disp32, but disp32(%rip), so for disp32
24755 SIB byte is needed, unless print_operand_address
24756 optimizes it into disp32(%rip) or (%rip) is implied
24758 else if (disp
&& !base
&& !index
)
24761 if (rip_relative_addr_p (&parts
))
24766 /* Find the length of the displacement constant. */
24769 if (base
&& satisfies_constraint_K (disp
))
24774 /* ebp always wants a displacement. Similarly r13. */
24775 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24778 /* An index requires the two-byte modrm form.... */
24780 /* ...like esp (or r12), which always wants an index. */
24781 || base
== arg_pointer_rtx
24782 || base
== frame_pointer_rtx
24783 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24790 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24791 is set, expect that insn have 8bit immediate alternative. */
24793 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24797 extract_insn_cached (insn
);
24798 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24799 if (CONSTANT_P (recog_data
.operand
[i
]))
24801 enum attr_mode mode
= get_attr_mode (insn
);
24804 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24806 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24813 ival
= trunc_int_for_mode (ival
, HImode
);
24816 ival
= trunc_int_for_mode (ival
, SImode
);
24821 if (IN_RANGE (ival
, -128, 127))
24838 /* Immediates for DImode instructions are encoded
24839 as 32bit sign extended values. */
24844 fatal_insn ("unknown insn mode", insn
);
24850 /* Compute default value for "length_address" attribute. */
24852 ix86_attr_length_address_default (rtx insn
)
24856 if (get_attr_type (insn
) == TYPE_LEA
)
24858 rtx set
= PATTERN (insn
), addr
;
24860 if (GET_CODE (set
) == PARALLEL
)
24861 set
= XVECEXP (set
, 0, 0);
24863 gcc_assert (GET_CODE (set
) == SET
);
24865 addr
= SET_SRC (set
);
24867 return memory_address_length (addr
, true);
24870 extract_insn_cached (insn
);
24871 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24872 if (MEM_P (recog_data
.operand
[i
]))
24874 constrain_operands_cached (reload_completed
);
24875 if (which_alternative
!= -1)
24877 const char *constraints
= recog_data
.constraints
[i
];
24878 int alt
= which_alternative
;
24880 while (*constraints
== '=' || *constraints
== '+')
24883 while (*constraints
++ != ',')
24885 /* Skip ignored operands. */
24886 if (*constraints
== 'X')
24889 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24894 /* Compute default value for "length_vex" attribute. It includes
24895 2 or 3 byte VEX prefix and 1 opcode byte. */
24898 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24902 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24903 byte VEX prefix. */
24904 if (!has_0f_opcode
|| has_vex_w
)
24907 /* We can always use 2 byte VEX prefix in 32bit. */
24911 extract_insn_cached (insn
);
24913 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24914 if (REG_P (recog_data
.operand
[i
]))
24916 /* REX.W bit uses 3 byte VEX prefix. */
24917 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24918 && GENERAL_REG_P (recog_data
.operand
[i
]))
24923 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24924 if (MEM_P (recog_data
.operand
[i
])
24925 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24932 /* Return the maximum number of instructions a cpu can issue. */
24935 ix86_issue_rate (void)
24939 case PROCESSOR_PENTIUM
:
24940 case PROCESSOR_ATOM
:
24941 case PROCESSOR_SLM
:
24943 case PROCESSOR_BTVER2
:
24944 case PROCESSOR_PENTIUM4
:
24945 case PROCESSOR_NOCONA
:
24948 case PROCESSOR_PENTIUMPRO
:
24949 case PROCESSOR_ATHLON
:
24951 case PROCESSOR_AMDFAM10
:
24952 case PROCESSOR_GENERIC
:
24953 case PROCESSOR_BTVER1
:
24956 case PROCESSOR_BDVER1
:
24957 case PROCESSOR_BDVER2
:
24958 case PROCESSOR_BDVER3
:
24959 case PROCESSOR_BDVER4
:
24960 case PROCESSOR_CORE2
:
24961 case PROCESSOR_COREI7
:
24962 case PROCESSOR_COREI7_AVX
:
24963 case PROCESSOR_HASWELL
:
24971 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24972 by DEP_INSN and nothing set by DEP_INSN. */
24975 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24979 /* Simplify the test for uninteresting insns. */
24980 if (insn_type
!= TYPE_SETCC
24981 && insn_type
!= TYPE_ICMOV
24982 && insn_type
!= TYPE_FCMOV
24983 && insn_type
!= TYPE_IBR
)
24986 if ((set
= single_set (dep_insn
)) != 0)
24988 set
= SET_DEST (set
);
24991 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24992 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24993 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24994 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24996 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24997 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
25002 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
25005 /* This test is true if the dependent insn reads the flags but
25006 not any other potentially set register. */
25007 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
25010 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
25016 /* Return true iff USE_INSN has a memory address with operands set by
25020 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
25023 extract_insn_cached (use_insn
);
25024 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
25025 if (MEM_P (recog_data
.operand
[i
]))
25027 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
25028 return modified_in_p (addr
, set_insn
) != 0;
25033 /* Helper function for exact_store_load_dependency.
25034 Return true if addr is found in insn. */
25036 exact_dependency_1 (rtx addr
, rtx insn
)
25038 enum rtx_code code
;
25039 const char *format_ptr
;
25042 code
= GET_CODE (insn
);
25046 if (rtx_equal_p (addr
, insn
))
25061 format_ptr
= GET_RTX_FORMAT (code
);
25062 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
25064 switch (*format_ptr
++)
25067 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
25071 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
25072 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
25080 /* Return true if there exists exact dependency for store & load, i.e.
25081 the same memory address is used in them. */
25083 exact_store_load_dependency (rtx store
, rtx load
)
25087 set1
= single_set (store
);
25090 if (!MEM_P (SET_DEST (set1
)))
25092 set2
= single_set (load
);
25095 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
25101 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
25103 enum attr_type insn_type
, dep_insn_type
;
25104 enum attr_memory memory
;
25106 int dep_insn_code_number
;
25108 /* Anti and output dependencies have zero cost on all CPUs. */
25109 if (REG_NOTE_KIND (link
) != 0)
25112 dep_insn_code_number
= recog_memoized (dep_insn
);
25114 /* If we can't recognize the insns, we can't really do anything. */
25115 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
25118 insn_type
= get_attr_type (insn
);
25119 dep_insn_type
= get_attr_type (dep_insn
);
25123 case PROCESSOR_PENTIUM
:
25124 /* Address Generation Interlock adds a cycle of latency. */
25125 if (insn_type
== TYPE_LEA
)
25127 rtx addr
= PATTERN (insn
);
25129 if (GET_CODE (addr
) == PARALLEL
)
25130 addr
= XVECEXP (addr
, 0, 0);
25132 gcc_assert (GET_CODE (addr
) == SET
);
25134 addr
= SET_SRC (addr
);
25135 if (modified_in_p (addr
, dep_insn
))
25138 else if (ix86_agi_dependent (dep_insn
, insn
))
25141 /* ??? Compares pair with jump/setcc. */
25142 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
25145 /* Floating point stores require value to be ready one cycle earlier. */
25146 if (insn_type
== TYPE_FMOV
25147 && get_attr_memory (insn
) == MEMORY_STORE
25148 && !ix86_agi_dependent (dep_insn
, insn
))
25152 case PROCESSOR_PENTIUMPRO
:
25153 memory
= get_attr_memory (insn
);
25155 /* INT->FP conversion is expensive. */
25156 if (get_attr_fp_int_src (dep_insn
))
25159 /* There is one cycle extra latency between an FP op and a store. */
25160 if (insn_type
== TYPE_FMOV
25161 && (set
= single_set (dep_insn
)) != NULL_RTX
25162 && (set2
= single_set (insn
)) != NULL_RTX
25163 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
25164 && MEM_P (SET_DEST (set2
)))
25167 /* Show ability of reorder buffer to hide latency of load by executing
25168 in parallel with previous instruction in case
25169 previous instruction is not needed to compute the address. */
25170 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25171 && !ix86_agi_dependent (dep_insn
, insn
))
25173 /* Claim moves to take one cycle, as core can issue one load
25174 at time and the next load can start cycle later. */
25175 if (dep_insn_type
== TYPE_IMOV
25176 || dep_insn_type
== TYPE_FMOV
)
25184 memory
= get_attr_memory (insn
);
25186 /* The esp dependency is resolved before the instruction is really
25188 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25189 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25192 /* INT->FP conversion is expensive. */
25193 if (get_attr_fp_int_src (dep_insn
))
25196 /* Show ability of reorder buffer to hide latency of load by executing
25197 in parallel with previous instruction in case
25198 previous instruction is not needed to compute the address. */
25199 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25200 && !ix86_agi_dependent (dep_insn
, insn
))
25202 /* Claim moves to take one cycle, as core can issue one load
25203 at time and the next load can start cycle later. */
25204 if (dep_insn_type
== TYPE_IMOV
25205 || dep_insn_type
== TYPE_FMOV
)
25214 case PROCESSOR_ATHLON
:
25216 case PROCESSOR_AMDFAM10
:
25217 case PROCESSOR_BDVER1
:
25218 case PROCESSOR_BDVER2
:
25219 case PROCESSOR_BDVER3
:
25220 case PROCESSOR_BDVER4
:
25221 case PROCESSOR_BTVER1
:
25222 case PROCESSOR_BTVER2
:
25223 case PROCESSOR_GENERIC
:
25224 memory
= get_attr_memory (insn
);
25226 /* Stack engine allows to execute push&pop instructions in parall. */
25227 if (((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25228 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25229 && (ix86_tune
!= PROCESSOR_ATHLON
&& ix86_tune
!= PROCESSOR_K8
))
25232 /* Show ability of reorder buffer to hide latency of load by executing
25233 in parallel with previous instruction in case
25234 previous instruction is not needed to compute the address. */
25235 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25236 && !ix86_agi_dependent (dep_insn
, insn
))
25238 enum attr_unit unit
= get_attr_unit (insn
);
25241 /* Because of the difference between the length of integer and
25242 floating unit pipeline preparation stages, the memory operands
25243 for floating point are cheaper.
25245 ??? For Athlon it the difference is most probably 2. */
25246 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
25249 loadcost
= TARGET_ATHLON
? 2 : 0;
25251 if (cost
>= loadcost
)
25258 case PROCESSOR_CORE2
:
25259 case PROCESSOR_COREI7
:
25260 case PROCESSOR_COREI7_AVX
:
25261 case PROCESSOR_HASWELL
:
25262 memory
= get_attr_memory (insn
);
25264 /* Stack engine allows to execute push&pop instructions in parall. */
25265 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25266 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25269 /* Show ability of reorder buffer to hide latency of load by executing
25270 in parallel with previous instruction in case
25271 previous instruction is not needed to compute the address. */
25272 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25273 && !ix86_agi_dependent (dep_insn
, insn
))
25282 case PROCESSOR_SLM
:
25283 if (!reload_completed
)
25286 /* Increase cost of integer loads. */
25287 memory
= get_attr_memory (dep_insn
);
25288 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25290 enum attr_unit unit
= get_attr_unit (dep_insn
);
25291 if (unit
== UNIT_INTEGER
&& cost
== 1)
25293 if (memory
== MEMORY_LOAD
)
25297 /* Increase cost of ld/st for short int types only
25298 because of store forwarding issue. */
25299 rtx set
= single_set (dep_insn
);
25300 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
25301 || GET_MODE (SET_DEST (set
)) == HImode
))
25303 /* Increase cost of store/load insn if exact
25304 dependence exists and it is load insn. */
25305 enum attr_memory insn_memory
= get_attr_memory (insn
);
25306 if (insn_memory
== MEMORY_LOAD
25307 && exact_store_load_dependency (dep_insn
, insn
))
25321 /* How many alternative schedules to try. This should be as wide as the
25322 scheduling freedom in the DFA, but no wider. Making this value too
25323 large results extra work for the scheduler. */
25326 ia32_multipass_dfa_lookahead (void)
25330 case PROCESSOR_PENTIUM
:
25333 case PROCESSOR_PENTIUMPRO
:
25337 case PROCESSOR_BDVER1
:
25338 case PROCESSOR_BDVER2
:
25339 case PROCESSOR_BDVER3
:
25340 case PROCESSOR_BDVER4
:
25341 /* We use lookahead value 4 for BD both before and after reload
25342 schedules. Plan is to have value 8 included for O3. */
25345 case PROCESSOR_CORE2
:
25346 case PROCESSOR_COREI7
:
25347 case PROCESSOR_COREI7_AVX
:
25348 case PROCESSOR_HASWELL
:
25349 case PROCESSOR_ATOM
:
25350 case PROCESSOR_SLM
:
25351 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25352 as many instructions can be executed on a cycle, i.e.,
25353 issue_rate. I wonder why tuning for many CPUs does not do this. */
25354 if (reload_completed
)
25355 return ix86_issue_rate ();
25356 /* Don't use lookahead for pre-reload schedule to save compile time. */
25364 /* Return true if target platform supports macro-fusion. */
25367 ix86_macro_fusion_p ()
25369 return TARGET_FUSE_CMP_AND_BRANCH
;
25372 /* Check whether current microarchitecture support macro fusion
25373 for insn pair "CONDGEN + CONDJMP". Refer to
25374 "Intel Architectures Optimization Reference Manual". */
25377 ix86_macro_fusion_pair_p (rtx condgen
, rtx condjmp
)
25380 rtx single_set
= single_set (condgen
);
25381 enum rtx_code ccode
;
25382 rtx compare_set
= NULL_RTX
, test_if
, cond
;
25383 rtx alu_set
= NULL_RTX
, addr
= NULL_RTX
;
25385 if (get_attr_type (condgen
) != TYPE_TEST
25386 && get_attr_type (condgen
) != TYPE_ICMP
25387 && get_attr_type (condgen
) != TYPE_INCDEC
25388 && get_attr_type (condgen
) != TYPE_ALU
)
25391 if (single_set
== NULL_RTX
25392 && !TARGET_FUSE_ALU_AND_BRANCH
)
25395 if (single_set
!= NULL_RTX
)
25396 compare_set
= single_set
;
25400 rtx pat
= PATTERN (condgen
);
25401 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
25402 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
25404 rtx set_src
= SET_SRC (XVECEXP (pat
, 0, i
));
25405 if (GET_CODE (set_src
) == COMPARE
)
25406 compare_set
= XVECEXP (pat
, 0, i
);
25408 alu_set
= XVECEXP (pat
, 0, i
);
25411 if (compare_set
== NULL_RTX
)
25413 src
= SET_SRC (compare_set
);
25414 if (GET_CODE (src
) != COMPARE
)
25417 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25419 if ((MEM_P (XEXP (src
, 0))
25420 && CONST_INT_P (XEXP (src
, 1)))
25421 || (MEM_P (XEXP (src
, 1))
25422 && CONST_INT_P (XEXP (src
, 0))))
25425 /* No fusion for RIP-relative address. */
25426 if (MEM_P (XEXP (src
, 0)))
25427 addr
= XEXP (XEXP (src
, 0), 0);
25428 else if (MEM_P (XEXP (src
, 1)))
25429 addr
= XEXP (XEXP (src
, 1), 0);
25432 ix86_address parts
;
25433 int ok
= ix86_decompose_address (addr
, &parts
);
25436 if (rip_relative_addr_p (&parts
))
25440 test_if
= SET_SRC (pc_set (condjmp
));
25441 cond
= XEXP (test_if
, 0);
25442 ccode
= GET_CODE (cond
);
25443 /* Check whether conditional jump use Sign or Overflow Flags. */
25444 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25451 /* Return true for TYPE_TEST and TYPE_ICMP. */
25452 if (get_attr_type (condgen
) == TYPE_TEST
25453 || get_attr_type (condgen
) == TYPE_ICMP
)
25456 /* The following is the case that macro-fusion for alu + jmp. */
25457 if (!TARGET_FUSE_ALU_AND_BRANCH
|| !alu_set
)
25460 /* No fusion for alu op with memory destination operand. */
25461 dest
= SET_DEST (alu_set
);
25465 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25467 if (get_attr_type (condgen
) == TYPE_INCDEC
25477 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25478 execution. It is applied if
25479 (1) IMUL instruction is on the top of list;
25480 (2) There exists the only producer of independent IMUL instruction in
25482 Return index of IMUL producer if it was found and -1 otherwise. */
25484 do_reorder_for_imul (rtx
*ready
, int n_ready
)
25486 rtx insn
, set
, insn1
, insn2
;
25487 sd_iterator_def sd_it
;
25492 if (ix86_tune
!= PROCESSOR_ATOM
)
25495 /* Check that IMUL instruction is on the top of ready list. */
25496 insn
= ready
[n_ready
- 1];
25497 set
= single_set (insn
);
25500 if (!(GET_CODE (SET_SRC (set
)) == MULT
25501 && GET_MODE (SET_SRC (set
)) == SImode
))
25504 /* Search for producer of independent IMUL instruction. */
25505 for (i
= n_ready
- 2; i
>= 0; i
--)
25508 if (!NONDEBUG_INSN_P (insn
))
25510 /* Skip IMUL instruction. */
25511 insn2
= PATTERN (insn
);
25512 if (GET_CODE (insn2
) == PARALLEL
)
25513 insn2
= XVECEXP (insn2
, 0, 0);
25514 if (GET_CODE (insn2
) == SET
25515 && GET_CODE (SET_SRC (insn2
)) == MULT
25516 && GET_MODE (SET_SRC (insn2
)) == SImode
)
25519 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
25522 con
= DEP_CON (dep
);
25523 if (!NONDEBUG_INSN_P (con
))
25525 insn1
= PATTERN (con
);
25526 if (GET_CODE (insn1
) == PARALLEL
)
25527 insn1
= XVECEXP (insn1
, 0, 0);
25529 if (GET_CODE (insn1
) == SET
25530 && GET_CODE (SET_SRC (insn1
)) == MULT
25531 && GET_MODE (SET_SRC (insn1
)) == SImode
)
25533 sd_iterator_def sd_it1
;
25535 /* Check if there is no other dependee for IMUL. */
25537 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
25540 pro
= DEP_PRO (dep1
);
25541 if (!NONDEBUG_INSN_P (pro
))
25556 /* Try to find the best candidate on the top of ready list if two insns
25557 have the same priority - candidate is best if its dependees were
25558 scheduled earlier. Applied for Silvermont only.
25559 Return true if top 2 insns must be interchanged. */
25561 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
25563 rtx top
= ready
[n_ready
- 1];
25564 rtx next
= ready
[n_ready
- 2];
25566 sd_iterator_def sd_it
;
25570 #define INSN_TICK(INSN) (HID (INSN)->tick)
25572 if (ix86_tune
!= PROCESSOR_SLM
)
25575 if (!NONDEBUG_INSN_P (top
))
25577 if (!NONJUMP_INSN_P (top
))
25579 if (!NONDEBUG_INSN_P (next
))
25581 if (!NONJUMP_INSN_P (next
))
25583 set
= single_set (top
);
25586 set
= single_set (next
);
25590 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
25592 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
25594 /* Determine winner more precise. */
25595 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
25598 pro
= DEP_PRO (dep
);
25599 if (!NONDEBUG_INSN_P (pro
))
25601 if (INSN_TICK (pro
) > clock1
)
25602 clock1
= INSN_TICK (pro
);
25604 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
25607 pro
= DEP_PRO (dep
);
25608 if (!NONDEBUG_INSN_P (pro
))
25610 if (INSN_TICK (pro
) > clock2
)
25611 clock2
= INSN_TICK (pro
);
25614 if (clock1
== clock2
)
25616 /* Determine winner - load must win. */
25617 enum attr_memory memory1
, memory2
;
25618 memory1
= get_attr_memory (top
);
25619 memory2
= get_attr_memory (next
);
25620 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
25623 return (bool) (clock2
< clock1
);
25629 /* Perform possible reodering of ready list for Atom/Silvermont only.
25630 Return issue rate. */
25632 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
25635 int issue_rate
= -1;
25636 int n_ready
= *pn_ready
;
25641 /* Set up issue rate. */
25642 issue_rate
= ix86_issue_rate ();
25644 /* Do reodering for Atom/SLM only. */
25645 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
25648 /* Nothing to do if ready list contains only 1 instruction. */
25652 /* Do reodering for post-reload scheduler only. */
25653 if (!reload_completed
)
25656 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
25658 if (sched_verbose
> 1)
25659 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
25660 INSN_UID (ready
[index
]));
25662 /* Put IMUL producer (ready[index]) at the top of ready list. */
25663 insn
= ready
[index
];
25664 for (i
= index
; i
< n_ready
- 1; i
++)
25665 ready
[i
] = ready
[i
+ 1];
25666 ready
[n_ready
- 1] = insn
;
25669 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
25671 if (sched_verbose
> 1)
25672 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
25673 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
25674 /* Swap 2 top elements of ready list. */
25675 insn
= ready
[n_ready
- 1];
25676 ready
[n_ready
- 1] = ready
[n_ready
- 2];
25677 ready
[n_ready
- 2] = insn
;
25683 ix86_class_likely_spilled_p (reg_class_t
);
25685 /* Returns true if lhs of insn is HW function argument register and set up
25686 is_spilled to true if it is likely spilled HW register. */
25688 insn_is_function_arg (rtx insn
, bool* is_spilled
)
25692 if (!NONDEBUG_INSN_P (insn
))
25694 /* Call instructions are not movable, ignore it. */
25697 insn
= PATTERN (insn
);
25698 if (GET_CODE (insn
) == PARALLEL
)
25699 insn
= XVECEXP (insn
, 0, 0);
25700 if (GET_CODE (insn
) != SET
)
25702 dst
= SET_DEST (insn
);
25703 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
25704 && ix86_function_arg_regno_p (REGNO (dst
)))
25706 /* Is it likely spilled HW register? */
25707 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
25708 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
25709 *is_spilled
= true;
25715 /* Add output dependencies for chain of function adjacent arguments if only
25716 there is a move to likely spilled HW register. Return first argument
25717 if at least one dependence was added or NULL otherwise. */
25719 add_parameter_dependencies (rtx call
, rtx head
)
25723 rtx first_arg
= NULL
;
25724 bool is_spilled
= false;
25726 head
= PREV_INSN (head
);
25728 /* Find nearest to call argument passing instruction. */
25731 last
= PREV_INSN (last
);
25734 if (!NONDEBUG_INSN_P (last
))
25736 if (insn_is_function_arg (last
, &is_spilled
))
25744 insn
= PREV_INSN (last
);
25745 if (!INSN_P (insn
))
25749 if (!NONDEBUG_INSN_P (insn
))
25754 if (insn_is_function_arg (insn
, &is_spilled
))
25756 /* Add output depdendence between two function arguments if chain
25757 of output arguments contains likely spilled HW registers. */
25759 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25760 first_arg
= last
= insn
;
25770 /* Add output or anti dependency from insn to first_arg to restrict its code
25773 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25778 set
= single_set (insn
);
25781 tmp
= SET_DEST (set
);
25784 /* Add output dependency to the first function argument. */
25785 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25788 /* Add anti dependency. */
25789 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25792 /* Avoid cross block motion of function argument through adding dependency
25793 from the first non-jump instruction in bb. */
25795 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25797 rtx insn
= BB_END (bb
);
25801 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25803 rtx set
= single_set (insn
);
25806 avoid_func_arg_motion (arg
, insn
);
25810 if (insn
== BB_HEAD (bb
))
25812 insn
= PREV_INSN (insn
);
25816 /* Hook for pre-reload schedule - avoid motion of function arguments
25817 passed in likely spilled HW registers. */
25819 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25822 rtx first_arg
= NULL
;
25823 if (reload_completed
)
25825 while (head
!= tail
&& DEBUG_INSN_P (head
))
25826 head
= NEXT_INSN (head
);
25827 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25828 if (INSN_P (insn
) && CALL_P (insn
))
25830 first_arg
= add_parameter_dependencies (insn
, head
);
25833 /* Add dependee for first argument to predecessors if only
25834 region contains more than one block. */
25835 basic_block bb
= BLOCK_FOR_INSN (insn
);
25836 int rgn
= CONTAINING_RGN (bb
->index
);
25837 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25838 /* Skip trivial regions and region head blocks that can have
25839 predecessors outside of region. */
25840 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25844 /* Assume that region is SCC, i.e. all immediate predecessors
25845 of non-head block are in the same region. */
25846 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25848 /* Avoid creating of loop-carried dependencies through
25849 using topological odering in region. */
25850 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25851 add_dependee_for_func_arg (first_arg
, e
->src
);
25859 else if (first_arg
)
25860 avoid_func_arg_motion (first_arg
, insn
);
25863 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25864 HW registers to maximum, to schedule them at soon as possible. These are
25865 moves from function argument registers at the top of the function entry
25866 and moves from function return value registers after call. */
25868 ix86_adjust_priority (rtx insn
, int priority
)
25872 if (reload_completed
)
25875 if (!NONDEBUG_INSN_P (insn
))
25878 set
= single_set (insn
);
25881 rtx tmp
= SET_SRC (set
);
25883 && HARD_REGISTER_P (tmp
)
25884 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25885 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25886 return current_sched_info
->sched_max_insns_priority
;
25892 /* Model decoder of Core 2/i7.
25893 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25894 track the instruction fetch block boundaries and make sure that long
25895 (9+ bytes) instructions are assigned to D0. */
25897 /* Maximum length of an insn that can be handled by
25898 a secondary decoder unit. '8' for Core 2/i7. */
25899 static int core2i7_secondary_decoder_max_insn_size
;
25901 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25902 '16' for Core 2/i7. */
25903 static int core2i7_ifetch_block_size
;
25905 /* Maximum number of instructions decoder can handle per cycle.
25906 '6' for Core 2/i7. */
25907 static int core2i7_ifetch_block_max_insns
;
25909 typedef struct ix86_first_cycle_multipass_data_
*
25910 ix86_first_cycle_multipass_data_t
;
25911 typedef const struct ix86_first_cycle_multipass_data_
*
25912 const_ix86_first_cycle_multipass_data_t
;
25914 /* A variable to store target state across calls to max_issue within
25916 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25917 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25919 /* Initialize DATA. */
25921 core2i7_first_cycle_multipass_init (void *_data
)
25923 ix86_first_cycle_multipass_data_t data
25924 = (ix86_first_cycle_multipass_data_t
) _data
;
25926 data
->ifetch_block_len
= 0;
25927 data
->ifetch_block_n_insns
= 0;
25928 data
->ready_try_change
= NULL
;
25929 data
->ready_try_change_size
= 0;
25932 /* Advancing the cycle; reset ifetch block counts. */
25934 core2i7_dfa_post_advance_cycle (void)
25936 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25938 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25940 data
->ifetch_block_len
= 0;
25941 data
->ifetch_block_n_insns
= 0;
25944 static int min_insn_size (rtx
);
25946 /* Filter out insns from ready_try that the core will not be able to issue
25947 on current cycle due to decoder. */
25949 core2i7_first_cycle_multipass_filter_ready_try
25950 (const_ix86_first_cycle_multipass_data_t data
,
25951 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25958 if (ready_try
[n_ready
])
25961 insn
= get_ready_element (n_ready
);
25962 insn_size
= min_insn_size (insn
);
25964 if (/* If this is a too long an insn for a secondary decoder ... */
25965 (!first_cycle_insn_p
25966 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25967 /* ... or it would not fit into the ifetch block ... */
25968 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25969 /* ... or the decoder is full already ... */
25970 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25971 /* ... mask the insn out. */
25973 ready_try
[n_ready
] = 1;
25975 if (data
->ready_try_change
)
25976 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25981 /* Prepare for a new round of multipass lookahead scheduling. */
25983 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25984 bool first_cycle_insn_p
)
25986 ix86_first_cycle_multipass_data_t data
25987 = (ix86_first_cycle_multipass_data_t
) _data
;
25988 const_ix86_first_cycle_multipass_data_t prev_data
25989 = ix86_first_cycle_multipass_data
;
25991 /* Restore the state from the end of the previous round. */
25992 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25993 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25995 /* Filter instructions that cannot be issued on current cycle due to
25996 decoder restrictions. */
25997 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25998 first_cycle_insn_p
);
26001 /* INSN is being issued in current solution. Account for its impact on
26002 the decoder model. */
26004 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
26005 rtx insn
, const void *_prev_data
)
26007 ix86_first_cycle_multipass_data_t data
26008 = (ix86_first_cycle_multipass_data_t
) _data
;
26009 const_ix86_first_cycle_multipass_data_t prev_data
26010 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
26012 int insn_size
= min_insn_size (insn
);
26014 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
26015 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
26016 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
26017 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
26019 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26020 if (!data
->ready_try_change
)
26022 data
->ready_try_change
= sbitmap_alloc (n_ready
);
26023 data
->ready_try_change_size
= n_ready
;
26025 else if (data
->ready_try_change_size
< n_ready
)
26027 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
26029 data
->ready_try_change_size
= n_ready
;
26031 bitmap_clear (data
->ready_try_change
);
26033 /* Filter out insns from ready_try that the core will not be able to issue
26034 on current cycle due to decoder. */
26035 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
26039 /* Revert the effect on ready_try. */
26041 core2i7_first_cycle_multipass_backtrack (const void *_data
,
26043 int n_ready ATTRIBUTE_UNUSED
)
26045 const_ix86_first_cycle_multipass_data_t data
26046 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26047 unsigned int i
= 0;
26048 sbitmap_iterator sbi
;
26050 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
26051 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
26057 /* Save the result of multipass lookahead scheduling for the next round. */
26059 core2i7_first_cycle_multipass_end (const void *_data
)
26061 const_ix86_first_cycle_multipass_data_t data
26062 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26063 ix86_first_cycle_multipass_data_t next_data
26064 = ix86_first_cycle_multipass_data
;
26068 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
26069 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
26073 /* Deallocate target data. */
26075 core2i7_first_cycle_multipass_fini (void *_data
)
26077 ix86_first_cycle_multipass_data_t data
26078 = (ix86_first_cycle_multipass_data_t
) _data
;
26080 if (data
->ready_try_change
)
26082 sbitmap_free (data
->ready_try_change
);
26083 data
->ready_try_change
= NULL
;
26084 data
->ready_try_change_size
= 0;
26088 /* Prepare for scheduling pass. */
26090 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
26091 int verbose ATTRIBUTE_UNUSED
,
26092 int max_uid ATTRIBUTE_UNUSED
)
26094 /* Install scheduling hooks for current CPU. Some of these hooks are used
26095 in time-critical parts of the scheduler, so we only set them up when
26096 they are actually used. */
26099 case PROCESSOR_CORE2
:
26100 case PROCESSOR_COREI7
:
26101 case PROCESSOR_COREI7_AVX
:
26102 case PROCESSOR_HASWELL
:
26103 /* Do not perform multipass scheduling for pre-reload schedule
26104 to save compile time. */
26105 if (reload_completed
)
26107 targetm
.sched
.dfa_post_advance_cycle
26108 = core2i7_dfa_post_advance_cycle
;
26109 targetm
.sched
.first_cycle_multipass_init
26110 = core2i7_first_cycle_multipass_init
;
26111 targetm
.sched
.first_cycle_multipass_begin
26112 = core2i7_first_cycle_multipass_begin
;
26113 targetm
.sched
.first_cycle_multipass_issue
26114 = core2i7_first_cycle_multipass_issue
;
26115 targetm
.sched
.first_cycle_multipass_backtrack
26116 = core2i7_first_cycle_multipass_backtrack
;
26117 targetm
.sched
.first_cycle_multipass_end
26118 = core2i7_first_cycle_multipass_end
;
26119 targetm
.sched
.first_cycle_multipass_fini
26120 = core2i7_first_cycle_multipass_fini
;
26122 /* Set decoder parameters. */
26123 core2i7_secondary_decoder_max_insn_size
= 8;
26124 core2i7_ifetch_block_size
= 16;
26125 core2i7_ifetch_block_max_insns
= 6;
26128 /* ... Fall through ... */
26130 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
26131 targetm
.sched
.first_cycle_multipass_init
= NULL
;
26132 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
26133 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
26134 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
26135 targetm
.sched
.first_cycle_multipass_end
= NULL
;
26136 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
26142 /* Compute the alignment given to a constant that is being placed in memory.
26143 EXP is the constant and ALIGN is the alignment that the object would
26145 The value of this function is used instead of that alignment to align
26149 ix86_constant_alignment (tree exp
, int align
)
26151 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
26152 || TREE_CODE (exp
) == INTEGER_CST
)
26154 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
26156 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
26159 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
26160 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
26161 return BITS_PER_WORD
;
26166 /* Compute the alignment for a static variable.
26167 TYPE is the data type, and ALIGN is the alignment that
26168 the object would ordinarily have. The value of this function is used
26169 instead of that alignment to align the object. */
26172 ix86_data_alignment (tree type
, int align
, bool opt
)
26174 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
26177 && AGGREGATE_TYPE_P (type
)
26178 && TYPE_SIZE (type
)
26179 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26180 && wi::geu_p (TYPE_SIZE (type
), max_align
)
26181 && align
< max_align
)
26184 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26185 to 16byte boundary. */
26188 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
26189 && TYPE_SIZE (type
)
26190 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26191 && wi::geu_p (TYPE_SIZE (type
), 128)
26199 if (TREE_CODE (type
) == ARRAY_TYPE
)
26201 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26203 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26206 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26209 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26211 if ((TYPE_MODE (type
) == XCmode
26212 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26215 else if ((TREE_CODE (type
) == RECORD_TYPE
26216 || TREE_CODE (type
) == UNION_TYPE
26217 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26218 && TYPE_FIELDS (type
))
26220 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26222 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26225 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26226 || TREE_CODE (type
) == INTEGER_TYPE
)
26228 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26230 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26237 /* Compute the alignment for a local variable or a stack slot. EXP is
26238 the data type or decl itself, MODE is the widest mode available and
26239 ALIGN is the alignment that the object would ordinarily have. The
26240 value of this macro is used instead of that alignment to align the
26244 ix86_local_alignment (tree exp
, enum machine_mode mode
,
26245 unsigned int align
)
26249 if (exp
&& DECL_P (exp
))
26251 type
= TREE_TYPE (exp
);
26260 /* Don't do dynamic stack realignment for long long objects with
26261 -mpreferred-stack-boundary=2. */
26264 && ix86_preferred_stack_boundary
< 64
26265 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26266 && (!type
|| !TYPE_USER_ALIGN (type
))
26267 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26270 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26271 register in MODE. We will return the largest alignment of XF
26275 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
26276 align
= GET_MODE_ALIGNMENT (DFmode
);
26280 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26281 to 16byte boundary. Exact wording is:
26283 An array uses the same alignment as its elements, except that a local or
26284 global array variable of length at least 16 bytes or
26285 a C99 variable-length array variable always has alignment of at least 16 bytes.
26287 This was added to allow use of aligned SSE instructions at arrays. This
26288 rule is meant for static storage (where compiler can not do the analysis
26289 by itself). We follow it for automatic variables only when convenient.
26290 We fully control everything in the function compiled and functions from
26291 other unit can not rely on the alignment.
26293 Exclude va_list type. It is the common case of local array where
26294 we can not benefit from the alignment.
26296 TODO: Probably one should optimize for size only when var is not escaping. */
26297 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
26300 if (AGGREGATE_TYPE_P (type
)
26301 && (va_list_type_node
== NULL_TREE
26302 || (TYPE_MAIN_VARIANT (type
)
26303 != TYPE_MAIN_VARIANT (va_list_type_node
)))
26304 && TYPE_SIZE (type
)
26305 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26306 && wi::geu_p (TYPE_SIZE (type
), 16)
26310 if (TREE_CODE (type
) == ARRAY_TYPE
)
26312 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26314 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26317 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26319 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26321 if ((TYPE_MODE (type
) == XCmode
26322 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26325 else if ((TREE_CODE (type
) == RECORD_TYPE
26326 || TREE_CODE (type
) == UNION_TYPE
26327 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26328 && TYPE_FIELDS (type
))
26330 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26332 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26335 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26336 || TREE_CODE (type
) == INTEGER_TYPE
)
26339 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26341 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26347 /* Compute the minimum required alignment for dynamic stack realignment
26348 purposes for a local variable, parameter or a stack slot. EXP is
26349 the data type or decl itself, MODE is its mode and ALIGN is the
26350 alignment that the object would ordinarily have. */
26353 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
26354 unsigned int align
)
26358 if (exp
&& DECL_P (exp
))
26360 type
= TREE_TYPE (exp
);
26369 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
26372 /* Don't do dynamic stack realignment for long long objects with
26373 -mpreferred-stack-boundary=2. */
26374 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26375 && (!type
|| !TYPE_USER_ALIGN (type
))
26376 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26382 /* Find a location for the static chain incoming to a nested function.
26383 This is a register, unless all free registers are used by arguments. */
26386 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
26390 if (!DECL_STATIC_CHAIN (fndecl
))
26395 /* We always use R10 in 64-bit mode. */
26403 /* By default in 32-bit mode we use ECX to pass the static chain. */
26406 fntype
= TREE_TYPE (fndecl
);
26407 ccvt
= ix86_get_callcvt (fntype
);
26408 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
26410 /* Fastcall functions use ecx/edx for arguments, which leaves
26411 us with EAX for the static chain.
26412 Thiscall functions use ecx for arguments, which also
26413 leaves us with EAX for the static chain. */
26416 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
26418 /* Thiscall functions use ecx for arguments, which leaves
26419 us with EAX and EDX for the static chain.
26420 We are using for abi-compatibility EAX. */
26423 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
26425 /* For regparm 3, we have no free call-clobbered registers in
26426 which to store the static chain. In order to implement this,
26427 we have the trampoline push the static chain to the stack.
26428 However, we can't push a value below the return address when
26429 we call the nested function directly, so we have to use an
26430 alternate entry point. For this we use ESI, and have the
26431 alternate entry point push ESI, so that things appear the
26432 same once we're executing the nested function. */
26435 if (fndecl
== current_function_decl
)
26436 ix86_static_chain_on_stack
= true;
26437 return gen_frame_mem (SImode
,
26438 plus_constant (Pmode
,
26439 arg_pointer_rtx
, -8));
26445 return gen_rtx_REG (Pmode
, regno
);
26448 /* Emit RTL insns to initialize the variable parts of a trampoline.
26449 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26450 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26451 to be passed to the target function. */
26454 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
26460 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
26466 /* Load the function address to r11. Try to load address using
26467 the shorter movl instead of movabs. We may want to support
26468 movq for kernel mode, but kernel does not use trampolines at
26469 the moment. FNADDR is a 32bit address and may not be in
26470 DImode when ptr_mode == SImode. Always use movl in this
26472 if (ptr_mode
== SImode
26473 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
26475 fnaddr
= copy_addr_to_reg (fnaddr
);
26477 mem
= adjust_address (m_tramp
, HImode
, offset
);
26478 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
26480 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
26481 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
26486 mem
= adjust_address (m_tramp
, HImode
, offset
);
26487 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
26489 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
26490 emit_move_insn (mem
, fnaddr
);
26494 /* Load static chain using movabs to r10. Use the shorter movl
26495 instead of movabs when ptr_mode == SImode. */
26496 if (ptr_mode
== SImode
)
26507 mem
= adjust_address (m_tramp
, HImode
, offset
);
26508 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
26510 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
26511 emit_move_insn (mem
, chain_value
);
26514 /* Jump to r11; the last (unused) byte is a nop, only there to
26515 pad the write out to a single 32-bit store. */
26516 mem
= adjust_address (m_tramp
, SImode
, offset
);
26517 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
26524 /* Depending on the static chain location, either load a register
26525 with a constant, or push the constant to the stack. All of the
26526 instructions are the same size. */
26527 chain
= ix86_static_chain (fndecl
, true);
26530 switch (REGNO (chain
))
26533 opcode
= 0xb8; break;
26535 opcode
= 0xb9; break;
26537 gcc_unreachable ();
26543 mem
= adjust_address (m_tramp
, QImode
, offset
);
26544 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
26546 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26547 emit_move_insn (mem
, chain_value
);
26550 mem
= adjust_address (m_tramp
, QImode
, offset
);
26551 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
26553 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26555 /* Compute offset from the end of the jmp to the target function.
26556 In the case in which the trampoline stores the static chain on
26557 the stack, we need to skip the first insn which pushes the
26558 (call-saved) register static chain; this push is 1 byte. */
26560 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
26561 plus_constant (Pmode
, XEXP (m_tramp
, 0),
26562 offset
- (MEM_P (chain
) ? 1 : 0)),
26563 NULL_RTX
, 1, OPTAB_DIRECT
);
26564 emit_move_insn (mem
, disp
);
26567 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
26569 #ifdef HAVE_ENABLE_EXECUTE_STACK
26570 #ifdef CHECK_EXECUTE_STACK_ENABLED
26571 if (CHECK_EXECUTE_STACK_ENABLED
)
26573 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
26574 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
26578 /* The following file contains several enumerations and data structures
26579 built from the definitions in i386-builtin-types.def. */
26581 #include "i386-builtin-types.inc"
26583 /* Table for the ix86 builtin non-function types. */
26584 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
26586 /* Retrieve an element from the above table, building some of
26587 the types lazily. */
26590 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
26592 unsigned int index
;
26595 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
26597 type
= ix86_builtin_type_tab
[(int) tcode
];
26601 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
26602 if (tcode
<= IX86_BT_LAST_VECT
)
26604 enum machine_mode mode
;
26606 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
26607 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
26608 mode
= ix86_builtin_type_vect_mode
[index
];
26610 type
= build_vector_type_for_mode (itype
, mode
);
26616 index
= tcode
- IX86_BT_LAST_VECT
- 1;
26617 if (tcode
<= IX86_BT_LAST_PTR
)
26618 quals
= TYPE_UNQUALIFIED
;
26620 quals
= TYPE_QUAL_CONST
;
26622 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
26623 if (quals
!= TYPE_UNQUALIFIED
)
26624 itype
= build_qualified_type (itype
, quals
);
26626 type
= build_pointer_type (itype
);
26629 ix86_builtin_type_tab
[(int) tcode
] = type
;
26633 /* Table for the ix86 builtin function types. */
26634 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
26636 /* Retrieve an element from the above table, building some of
26637 the types lazily. */
26640 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
26644 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
26646 type
= ix86_builtin_func_type_tab
[(int) tcode
];
26650 if (tcode
<= IX86_BT_LAST_FUNC
)
26652 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
26653 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
26654 tree rtype
, atype
, args
= void_list_node
;
26657 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
26658 for (i
= after
- 1; i
> start
; --i
)
26660 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
26661 args
= tree_cons (NULL
, atype
, args
);
26664 type
= build_function_type (rtype
, args
);
26668 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
26669 enum ix86_builtin_func_type icode
;
26671 icode
= ix86_builtin_func_alias_base
[index
];
26672 type
= ix86_get_builtin_func_type (icode
);
26675 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
26680 /* Codes for all the SSE/MMX builtins. */
26683 IX86_BUILTIN_ADDPS
,
26684 IX86_BUILTIN_ADDSS
,
26685 IX86_BUILTIN_DIVPS
,
26686 IX86_BUILTIN_DIVSS
,
26687 IX86_BUILTIN_MULPS
,
26688 IX86_BUILTIN_MULSS
,
26689 IX86_BUILTIN_SUBPS
,
26690 IX86_BUILTIN_SUBSS
,
26692 IX86_BUILTIN_CMPEQPS
,
26693 IX86_BUILTIN_CMPLTPS
,
26694 IX86_BUILTIN_CMPLEPS
,
26695 IX86_BUILTIN_CMPGTPS
,
26696 IX86_BUILTIN_CMPGEPS
,
26697 IX86_BUILTIN_CMPNEQPS
,
26698 IX86_BUILTIN_CMPNLTPS
,
26699 IX86_BUILTIN_CMPNLEPS
,
26700 IX86_BUILTIN_CMPNGTPS
,
26701 IX86_BUILTIN_CMPNGEPS
,
26702 IX86_BUILTIN_CMPORDPS
,
26703 IX86_BUILTIN_CMPUNORDPS
,
26704 IX86_BUILTIN_CMPEQSS
,
26705 IX86_BUILTIN_CMPLTSS
,
26706 IX86_BUILTIN_CMPLESS
,
26707 IX86_BUILTIN_CMPNEQSS
,
26708 IX86_BUILTIN_CMPNLTSS
,
26709 IX86_BUILTIN_CMPNLESS
,
26710 IX86_BUILTIN_CMPORDSS
,
26711 IX86_BUILTIN_CMPUNORDSS
,
26713 IX86_BUILTIN_COMIEQSS
,
26714 IX86_BUILTIN_COMILTSS
,
26715 IX86_BUILTIN_COMILESS
,
26716 IX86_BUILTIN_COMIGTSS
,
26717 IX86_BUILTIN_COMIGESS
,
26718 IX86_BUILTIN_COMINEQSS
,
26719 IX86_BUILTIN_UCOMIEQSS
,
26720 IX86_BUILTIN_UCOMILTSS
,
26721 IX86_BUILTIN_UCOMILESS
,
26722 IX86_BUILTIN_UCOMIGTSS
,
26723 IX86_BUILTIN_UCOMIGESS
,
26724 IX86_BUILTIN_UCOMINEQSS
,
26726 IX86_BUILTIN_CVTPI2PS
,
26727 IX86_BUILTIN_CVTPS2PI
,
26728 IX86_BUILTIN_CVTSI2SS
,
26729 IX86_BUILTIN_CVTSI642SS
,
26730 IX86_BUILTIN_CVTSS2SI
,
26731 IX86_BUILTIN_CVTSS2SI64
,
26732 IX86_BUILTIN_CVTTPS2PI
,
26733 IX86_BUILTIN_CVTTSS2SI
,
26734 IX86_BUILTIN_CVTTSS2SI64
,
26736 IX86_BUILTIN_MAXPS
,
26737 IX86_BUILTIN_MAXSS
,
26738 IX86_BUILTIN_MINPS
,
26739 IX86_BUILTIN_MINSS
,
26741 IX86_BUILTIN_LOADUPS
,
26742 IX86_BUILTIN_STOREUPS
,
26743 IX86_BUILTIN_MOVSS
,
26745 IX86_BUILTIN_MOVHLPS
,
26746 IX86_BUILTIN_MOVLHPS
,
26747 IX86_BUILTIN_LOADHPS
,
26748 IX86_BUILTIN_LOADLPS
,
26749 IX86_BUILTIN_STOREHPS
,
26750 IX86_BUILTIN_STORELPS
,
26752 IX86_BUILTIN_MASKMOVQ
,
26753 IX86_BUILTIN_MOVMSKPS
,
26754 IX86_BUILTIN_PMOVMSKB
,
26756 IX86_BUILTIN_MOVNTPS
,
26757 IX86_BUILTIN_MOVNTQ
,
26759 IX86_BUILTIN_LOADDQU
,
26760 IX86_BUILTIN_STOREDQU
,
26762 IX86_BUILTIN_PACKSSWB
,
26763 IX86_BUILTIN_PACKSSDW
,
26764 IX86_BUILTIN_PACKUSWB
,
26766 IX86_BUILTIN_PADDB
,
26767 IX86_BUILTIN_PADDW
,
26768 IX86_BUILTIN_PADDD
,
26769 IX86_BUILTIN_PADDQ
,
26770 IX86_BUILTIN_PADDSB
,
26771 IX86_BUILTIN_PADDSW
,
26772 IX86_BUILTIN_PADDUSB
,
26773 IX86_BUILTIN_PADDUSW
,
26774 IX86_BUILTIN_PSUBB
,
26775 IX86_BUILTIN_PSUBW
,
26776 IX86_BUILTIN_PSUBD
,
26777 IX86_BUILTIN_PSUBQ
,
26778 IX86_BUILTIN_PSUBSB
,
26779 IX86_BUILTIN_PSUBSW
,
26780 IX86_BUILTIN_PSUBUSB
,
26781 IX86_BUILTIN_PSUBUSW
,
26784 IX86_BUILTIN_PANDN
,
26788 IX86_BUILTIN_PAVGB
,
26789 IX86_BUILTIN_PAVGW
,
26791 IX86_BUILTIN_PCMPEQB
,
26792 IX86_BUILTIN_PCMPEQW
,
26793 IX86_BUILTIN_PCMPEQD
,
26794 IX86_BUILTIN_PCMPGTB
,
26795 IX86_BUILTIN_PCMPGTW
,
26796 IX86_BUILTIN_PCMPGTD
,
26798 IX86_BUILTIN_PMADDWD
,
26800 IX86_BUILTIN_PMAXSW
,
26801 IX86_BUILTIN_PMAXUB
,
26802 IX86_BUILTIN_PMINSW
,
26803 IX86_BUILTIN_PMINUB
,
26805 IX86_BUILTIN_PMULHUW
,
26806 IX86_BUILTIN_PMULHW
,
26807 IX86_BUILTIN_PMULLW
,
26809 IX86_BUILTIN_PSADBW
,
26810 IX86_BUILTIN_PSHUFW
,
26812 IX86_BUILTIN_PSLLW
,
26813 IX86_BUILTIN_PSLLD
,
26814 IX86_BUILTIN_PSLLQ
,
26815 IX86_BUILTIN_PSRAW
,
26816 IX86_BUILTIN_PSRAD
,
26817 IX86_BUILTIN_PSRLW
,
26818 IX86_BUILTIN_PSRLD
,
26819 IX86_BUILTIN_PSRLQ
,
26820 IX86_BUILTIN_PSLLWI
,
26821 IX86_BUILTIN_PSLLDI
,
26822 IX86_BUILTIN_PSLLQI
,
26823 IX86_BUILTIN_PSRAWI
,
26824 IX86_BUILTIN_PSRADI
,
26825 IX86_BUILTIN_PSRLWI
,
26826 IX86_BUILTIN_PSRLDI
,
26827 IX86_BUILTIN_PSRLQI
,
26829 IX86_BUILTIN_PUNPCKHBW
,
26830 IX86_BUILTIN_PUNPCKHWD
,
26831 IX86_BUILTIN_PUNPCKHDQ
,
26832 IX86_BUILTIN_PUNPCKLBW
,
26833 IX86_BUILTIN_PUNPCKLWD
,
26834 IX86_BUILTIN_PUNPCKLDQ
,
26836 IX86_BUILTIN_SHUFPS
,
26838 IX86_BUILTIN_RCPPS
,
26839 IX86_BUILTIN_RCPSS
,
26840 IX86_BUILTIN_RSQRTPS
,
26841 IX86_BUILTIN_RSQRTPS_NR
,
26842 IX86_BUILTIN_RSQRTSS
,
26843 IX86_BUILTIN_RSQRTF
,
26844 IX86_BUILTIN_SQRTPS
,
26845 IX86_BUILTIN_SQRTPS_NR
,
26846 IX86_BUILTIN_SQRTSS
,
26848 IX86_BUILTIN_UNPCKHPS
,
26849 IX86_BUILTIN_UNPCKLPS
,
26851 IX86_BUILTIN_ANDPS
,
26852 IX86_BUILTIN_ANDNPS
,
26854 IX86_BUILTIN_XORPS
,
26857 IX86_BUILTIN_LDMXCSR
,
26858 IX86_BUILTIN_STMXCSR
,
26859 IX86_BUILTIN_SFENCE
,
26861 IX86_BUILTIN_FXSAVE
,
26862 IX86_BUILTIN_FXRSTOR
,
26863 IX86_BUILTIN_FXSAVE64
,
26864 IX86_BUILTIN_FXRSTOR64
,
26866 IX86_BUILTIN_XSAVE
,
26867 IX86_BUILTIN_XRSTOR
,
26868 IX86_BUILTIN_XSAVE64
,
26869 IX86_BUILTIN_XRSTOR64
,
26871 IX86_BUILTIN_XSAVEOPT
,
26872 IX86_BUILTIN_XSAVEOPT64
,
26874 /* 3DNow! Original */
26875 IX86_BUILTIN_FEMMS
,
26876 IX86_BUILTIN_PAVGUSB
,
26877 IX86_BUILTIN_PF2ID
,
26878 IX86_BUILTIN_PFACC
,
26879 IX86_BUILTIN_PFADD
,
26880 IX86_BUILTIN_PFCMPEQ
,
26881 IX86_BUILTIN_PFCMPGE
,
26882 IX86_BUILTIN_PFCMPGT
,
26883 IX86_BUILTIN_PFMAX
,
26884 IX86_BUILTIN_PFMIN
,
26885 IX86_BUILTIN_PFMUL
,
26886 IX86_BUILTIN_PFRCP
,
26887 IX86_BUILTIN_PFRCPIT1
,
26888 IX86_BUILTIN_PFRCPIT2
,
26889 IX86_BUILTIN_PFRSQIT1
,
26890 IX86_BUILTIN_PFRSQRT
,
26891 IX86_BUILTIN_PFSUB
,
26892 IX86_BUILTIN_PFSUBR
,
26893 IX86_BUILTIN_PI2FD
,
26894 IX86_BUILTIN_PMULHRW
,
26896 /* 3DNow! Athlon Extensions */
26897 IX86_BUILTIN_PF2IW
,
26898 IX86_BUILTIN_PFNACC
,
26899 IX86_BUILTIN_PFPNACC
,
26900 IX86_BUILTIN_PI2FW
,
26901 IX86_BUILTIN_PSWAPDSI
,
26902 IX86_BUILTIN_PSWAPDSF
,
26905 IX86_BUILTIN_ADDPD
,
26906 IX86_BUILTIN_ADDSD
,
26907 IX86_BUILTIN_DIVPD
,
26908 IX86_BUILTIN_DIVSD
,
26909 IX86_BUILTIN_MULPD
,
26910 IX86_BUILTIN_MULSD
,
26911 IX86_BUILTIN_SUBPD
,
26912 IX86_BUILTIN_SUBSD
,
26914 IX86_BUILTIN_CMPEQPD
,
26915 IX86_BUILTIN_CMPLTPD
,
26916 IX86_BUILTIN_CMPLEPD
,
26917 IX86_BUILTIN_CMPGTPD
,
26918 IX86_BUILTIN_CMPGEPD
,
26919 IX86_BUILTIN_CMPNEQPD
,
26920 IX86_BUILTIN_CMPNLTPD
,
26921 IX86_BUILTIN_CMPNLEPD
,
26922 IX86_BUILTIN_CMPNGTPD
,
26923 IX86_BUILTIN_CMPNGEPD
,
26924 IX86_BUILTIN_CMPORDPD
,
26925 IX86_BUILTIN_CMPUNORDPD
,
26926 IX86_BUILTIN_CMPEQSD
,
26927 IX86_BUILTIN_CMPLTSD
,
26928 IX86_BUILTIN_CMPLESD
,
26929 IX86_BUILTIN_CMPNEQSD
,
26930 IX86_BUILTIN_CMPNLTSD
,
26931 IX86_BUILTIN_CMPNLESD
,
26932 IX86_BUILTIN_CMPORDSD
,
26933 IX86_BUILTIN_CMPUNORDSD
,
26935 IX86_BUILTIN_COMIEQSD
,
26936 IX86_BUILTIN_COMILTSD
,
26937 IX86_BUILTIN_COMILESD
,
26938 IX86_BUILTIN_COMIGTSD
,
26939 IX86_BUILTIN_COMIGESD
,
26940 IX86_BUILTIN_COMINEQSD
,
26941 IX86_BUILTIN_UCOMIEQSD
,
26942 IX86_BUILTIN_UCOMILTSD
,
26943 IX86_BUILTIN_UCOMILESD
,
26944 IX86_BUILTIN_UCOMIGTSD
,
26945 IX86_BUILTIN_UCOMIGESD
,
26946 IX86_BUILTIN_UCOMINEQSD
,
26948 IX86_BUILTIN_MAXPD
,
26949 IX86_BUILTIN_MAXSD
,
26950 IX86_BUILTIN_MINPD
,
26951 IX86_BUILTIN_MINSD
,
26953 IX86_BUILTIN_ANDPD
,
26954 IX86_BUILTIN_ANDNPD
,
26956 IX86_BUILTIN_XORPD
,
26958 IX86_BUILTIN_SQRTPD
,
26959 IX86_BUILTIN_SQRTSD
,
26961 IX86_BUILTIN_UNPCKHPD
,
26962 IX86_BUILTIN_UNPCKLPD
,
26964 IX86_BUILTIN_SHUFPD
,
26966 IX86_BUILTIN_LOADUPD
,
26967 IX86_BUILTIN_STOREUPD
,
26968 IX86_BUILTIN_MOVSD
,
26970 IX86_BUILTIN_LOADHPD
,
26971 IX86_BUILTIN_LOADLPD
,
26973 IX86_BUILTIN_CVTDQ2PD
,
26974 IX86_BUILTIN_CVTDQ2PS
,
26976 IX86_BUILTIN_CVTPD2DQ
,
26977 IX86_BUILTIN_CVTPD2PI
,
26978 IX86_BUILTIN_CVTPD2PS
,
26979 IX86_BUILTIN_CVTTPD2DQ
,
26980 IX86_BUILTIN_CVTTPD2PI
,
26982 IX86_BUILTIN_CVTPI2PD
,
26983 IX86_BUILTIN_CVTSI2SD
,
26984 IX86_BUILTIN_CVTSI642SD
,
26986 IX86_BUILTIN_CVTSD2SI
,
26987 IX86_BUILTIN_CVTSD2SI64
,
26988 IX86_BUILTIN_CVTSD2SS
,
26989 IX86_BUILTIN_CVTSS2SD
,
26990 IX86_BUILTIN_CVTTSD2SI
,
26991 IX86_BUILTIN_CVTTSD2SI64
,
26993 IX86_BUILTIN_CVTPS2DQ
,
26994 IX86_BUILTIN_CVTPS2PD
,
26995 IX86_BUILTIN_CVTTPS2DQ
,
26997 IX86_BUILTIN_MOVNTI
,
26998 IX86_BUILTIN_MOVNTI64
,
26999 IX86_BUILTIN_MOVNTPD
,
27000 IX86_BUILTIN_MOVNTDQ
,
27002 IX86_BUILTIN_MOVQ128
,
27005 IX86_BUILTIN_MASKMOVDQU
,
27006 IX86_BUILTIN_MOVMSKPD
,
27007 IX86_BUILTIN_PMOVMSKB128
,
27009 IX86_BUILTIN_PACKSSWB128
,
27010 IX86_BUILTIN_PACKSSDW128
,
27011 IX86_BUILTIN_PACKUSWB128
,
27013 IX86_BUILTIN_PADDB128
,
27014 IX86_BUILTIN_PADDW128
,
27015 IX86_BUILTIN_PADDD128
,
27016 IX86_BUILTIN_PADDQ128
,
27017 IX86_BUILTIN_PADDSB128
,
27018 IX86_BUILTIN_PADDSW128
,
27019 IX86_BUILTIN_PADDUSB128
,
27020 IX86_BUILTIN_PADDUSW128
,
27021 IX86_BUILTIN_PSUBB128
,
27022 IX86_BUILTIN_PSUBW128
,
27023 IX86_BUILTIN_PSUBD128
,
27024 IX86_BUILTIN_PSUBQ128
,
27025 IX86_BUILTIN_PSUBSB128
,
27026 IX86_BUILTIN_PSUBSW128
,
27027 IX86_BUILTIN_PSUBUSB128
,
27028 IX86_BUILTIN_PSUBUSW128
,
27030 IX86_BUILTIN_PAND128
,
27031 IX86_BUILTIN_PANDN128
,
27032 IX86_BUILTIN_POR128
,
27033 IX86_BUILTIN_PXOR128
,
27035 IX86_BUILTIN_PAVGB128
,
27036 IX86_BUILTIN_PAVGW128
,
27038 IX86_BUILTIN_PCMPEQB128
,
27039 IX86_BUILTIN_PCMPEQW128
,
27040 IX86_BUILTIN_PCMPEQD128
,
27041 IX86_BUILTIN_PCMPGTB128
,
27042 IX86_BUILTIN_PCMPGTW128
,
27043 IX86_BUILTIN_PCMPGTD128
,
27045 IX86_BUILTIN_PMADDWD128
,
27047 IX86_BUILTIN_PMAXSW128
,
27048 IX86_BUILTIN_PMAXUB128
,
27049 IX86_BUILTIN_PMINSW128
,
27050 IX86_BUILTIN_PMINUB128
,
27052 IX86_BUILTIN_PMULUDQ
,
27053 IX86_BUILTIN_PMULUDQ128
,
27054 IX86_BUILTIN_PMULHUW128
,
27055 IX86_BUILTIN_PMULHW128
,
27056 IX86_BUILTIN_PMULLW128
,
27058 IX86_BUILTIN_PSADBW128
,
27059 IX86_BUILTIN_PSHUFHW
,
27060 IX86_BUILTIN_PSHUFLW
,
27061 IX86_BUILTIN_PSHUFD
,
27063 IX86_BUILTIN_PSLLDQI128
,
27064 IX86_BUILTIN_PSLLWI128
,
27065 IX86_BUILTIN_PSLLDI128
,
27066 IX86_BUILTIN_PSLLQI128
,
27067 IX86_BUILTIN_PSRAWI128
,
27068 IX86_BUILTIN_PSRADI128
,
27069 IX86_BUILTIN_PSRLDQI128
,
27070 IX86_BUILTIN_PSRLWI128
,
27071 IX86_BUILTIN_PSRLDI128
,
27072 IX86_BUILTIN_PSRLQI128
,
27074 IX86_BUILTIN_PSLLDQ128
,
27075 IX86_BUILTIN_PSLLW128
,
27076 IX86_BUILTIN_PSLLD128
,
27077 IX86_BUILTIN_PSLLQ128
,
27078 IX86_BUILTIN_PSRAW128
,
27079 IX86_BUILTIN_PSRAD128
,
27080 IX86_BUILTIN_PSRLW128
,
27081 IX86_BUILTIN_PSRLD128
,
27082 IX86_BUILTIN_PSRLQ128
,
27084 IX86_BUILTIN_PUNPCKHBW128
,
27085 IX86_BUILTIN_PUNPCKHWD128
,
27086 IX86_BUILTIN_PUNPCKHDQ128
,
27087 IX86_BUILTIN_PUNPCKHQDQ128
,
27088 IX86_BUILTIN_PUNPCKLBW128
,
27089 IX86_BUILTIN_PUNPCKLWD128
,
27090 IX86_BUILTIN_PUNPCKLDQ128
,
27091 IX86_BUILTIN_PUNPCKLQDQ128
,
27093 IX86_BUILTIN_CLFLUSH
,
27094 IX86_BUILTIN_MFENCE
,
27095 IX86_BUILTIN_LFENCE
,
27096 IX86_BUILTIN_PAUSE
,
27098 IX86_BUILTIN_FNSTENV
,
27099 IX86_BUILTIN_FLDENV
,
27100 IX86_BUILTIN_FNSTSW
,
27101 IX86_BUILTIN_FNCLEX
,
27103 IX86_BUILTIN_BSRSI
,
27104 IX86_BUILTIN_BSRDI
,
27105 IX86_BUILTIN_RDPMC
,
27106 IX86_BUILTIN_RDTSC
,
27107 IX86_BUILTIN_RDTSCP
,
27108 IX86_BUILTIN_ROLQI
,
27109 IX86_BUILTIN_ROLHI
,
27110 IX86_BUILTIN_RORQI
,
27111 IX86_BUILTIN_RORHI
,
27114 IX86_BUILTIN_ADDSUBPS
,
27115 IX86_BUILTIN_HADDPS
,
27116 IX86_BUILTIN_HSUBPS
,
27117 IX86_BUILTIN_MOVSHDUP
,
27118 IX86_BUILTIN_MOVSLDUP
,
27119 IX86_BUILTIN_ADDSUBPD
,
27120 IX86_BUILTIN_HADDPD
,
27121 IX86_BUILTIN_HSUBPD
,
27122 IX86_BUILTIN_LDDQU
,
27124 IX86_BUILTIN_MONITOR
,
27125 IX86_BUILTIN_MWAIT
,
27128 IX86_BUILTIN_PHADDW
,
27129 IX86_BUILTIN_PHADDD
,
27130 IX86_BUILTIN_PHADDSW
,
27131 IX86_BUILTIN_PHSUBW
,
27132 IX86_BUILTIN_PHSUBD
,
27133 IX86_BUILTIN_PHSUBSW
,
27134 IX86_BUILTIN_PMADDUBSW
,
27135 IX86_BUILTIN_PMULHRSW
,
27136 IX86_BUILTIN_PSHUFB
,
27137 IX86_BUILTIN_PSIGNB
,
27138 IX86_BUILTIN_PSIGNW
,
27139 IX86_BUILTIN_PSIGND
,
27140 IX86_BUILTIN_PALIGNR
,
27141 IX86_BUILTIN_PABSB
,
27142 IX86_BUILTIN_PABSW
,
27143 IX86_BUILTIN_PABSD
,
27145 IX86_BUILTIN_PHADDW128
,
27146 IX86_BUILTIN_PHADDD128
,
27147 IX86_BUILTIN_PHADDSW128
,
27148 IX86_BUILTIN_PHSUBW128
,
27149 IX86_BUILTIN_PHSUBD128
,
27150 IX86_BUILTIN_PHSUBSW128
,
27151 IX86_BUILTIN_PMADDUBSW128
,
27152 IX86_BUILTIN_PMULHRSW128
,
27153 IX86_BUILTIN_PSHUFB128
,
27154 IX86_BUILTIN_PSIGNB128
,
27155 IX86_BUILTIN_PSIGNW128
,
27156 IX86_BUILTIN_PSIGND128
,
27157 IX86_BUILTIN_PALIGNR128
,
27158 IX86_BUILTIN_PABSB128
,
27159 IX86_BUILTIN_PABSW128
,
27160 IX86_BUILTIN_PABSD128
,
27162 /* AMDFAM10 - SSE4A New Instructions. */
27163 IX86_BUILTIN_MOVNTSD
,
27164 IX86_BUILTIN_MOVNTSS
,
27165 IX86_BUILTIN_EXTRQI
,
27166 IX86_BUILTIN_EXTRQ
,
27167 IX86_BUILTIN_INSERTQI
,
27168 IX86_BUILTIN_INSERTQ
,
27171 IX86_BUILTIN_BLENDPD
,
27172 IX86_BUILTIN_BLENDPS
,
27173 IX86_BUILTIN_BLENDVPD
,
27174 IX86_BUILTIN_BLENDVPS
,
27175 IX86_BUILTIN_PBLENDVB128
,
27176 IX86_BUILTIN_PBLENDW128
,
27181 IX86_BUILTIN_INSERTPS128
,
27183 IX86_BUILTIN_MOVNTDQA
,
27184 IX86_BUILTIN_MPSADBW128
,
27185 IX86_BUILTIN_PACKUSDW128
,
27186 IX86_BUILTIN_PCMPEQQ
,
27187 IX86_BUILTIN_PHMINPOSUW128
,
27189 IX86_BUILTIN_PMAXSB128
,
27190 IX86_BUILTIN_PMAXSD128
,
27191 IX86_BUILTIN_PMAXUD128
,
27192 IX86_BUILTIN_PMAXUW128
,
27194 IX86_BUILTIN_PMINSB128
,
27195 IX86_BUILTIN_PMINSD128
,
27196 IX86_BUILTIN_PMINUD128
,
27197 IX86_BUILTIN_PMINUW128
,
27199 IX86_BUILTIN_PMOVSXBW128
,
27200 IX86_BUILTIN_PMOVSXBD128
,
27201 IX86_BUILTIN_PMOVSXBQ128
,
27202 IX86_BUILTIN_PMOVSXWD128
,
27203 IX86_BUILTIN_PMOVSXWQ128
,
27204 IX86_BUILTIN_PMOVSXDQ128
,
27206 IX86_BUILTIN_PMOVZXBW128
,
27207 IX86_BUILTIN_PMOVZXBD128
,
27208 IX86_BUILTIN_PMOVZXBQ128
,
27209 IX86_BUILTIN_PMOVZXWD128
,
27210 IX86_BUILTIN_PMOVZXWQ128
,
27211 IX86_BUILTIN_PMOVZXDQ128
,
27213 IX86_BUILTIN_PMULDQ128
,
27214 IX86_BUILTIN_PMULLD128
,
27216 IX86_BUILTIN_ROUNDSD
,
27217 IX86_BUILTIN_ROUNDSS
,
27219 IX86_BUILTIN_ROUNDPD
,
27220 IX86_BUILTIN_ROUNDPS
,
27222 IX86_BUILTIN_FLOORPD
,
27223 IX86_BUILTIN_CEILPD
,
27224 IX86_BUILTIN_TRUNCPD
,
27225 IX86_BUILTIN_RINTPD
,
27226 IX86_BUILTIN_ROUNDPD_AZ
,
27228 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
27229 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
27230 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
27232 IX86_BUILTIN_FLOORPS
,
27233 IX86_BUILTIN_CEILPS
,
27234 IX86_BUILTIN_TRUNCPS
,
27235 IX86_BUILTIN_RINTPS
,
27236 IX86_BUILTIN_ROUNDPS_AZ
,
27238 IX86_BUILTIN_FLOORPS_SFIX
,
27239 IX86_BUILTIN_CEILPS_SFIX
,
27240 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
27242 IX86_BUILTIN_PTESTZ
,
27243 IX86_BUILTIN_PTESTC
,
27244 IX86_BUILTIN_PTESTNZC
,
27246 IX86_BUILTIN_VEC_INIT_V2SI
,
27247 IX86_BUILTIN_VEC_INIT_V4HI
,
27248 IX86_BUILTIN_VEC_INIT_V8QI
,
27249 IX86_BUILTIN_VEC_EXT_V2DF
,
27250 IX86_BUILTIN_VEC_EXT_V2DI
,
27251 IX86_BUILTIN_VEC_EXT_V4SF
,
27252 IX86_BUILTIN_VEC_EXT_V4SI
,
27253 IX86_BUILTIN_VEC_EXT_V8HI
,
27254 IX86_BUILTIN_VEC_EXT_V2SI
,
27255 IX86_BUILTIN_VEC_EXT_V4HI
,
27256 IX86_BUILTIN_VEC_EXT_V16QI
,
27257 IX86_BUILTIN_VEC_SET_V2DI
,
27258 IX86_BUILTIN_VEC_SET_V4SF
,
27259 IX86_BUILTIN_VEC_SET_V4SI
,
27260 IX86_BUILTIN_VEC_SET_V8HI
,
27261 IX86_BUILTIN_VEC_SET_V4HI
,
27262 IX86_BUILTIN_VEC_SET_V16QI
,
27264 IX86_BUILTIN_VEC_PACK_SFIX
,
27265 IX86_BUILTIN_VEC_PACK_SFIX256
,
27268 IX86_BUILTIN_CRC32QI
,
27269 IX86_BUILTIN_CRC32HI
,
27270 IX86_BUILTIN_CRC32SI
,
27271 IX86_BUILTIN_CRC32DI
,
27273 IX86_BUILTIN_PCMPESTRI128
,
27274 IX86_BUILTIN_PCMPESTRM128
,
27275 IX86_BUILTIN_PCMPESTRA128
,
27276 IX86_BUILTIN_PCMPESTRC128
,
27277 IX86_BUILTIN_PCMPESTRO128
,
27278 IX86_BUILTIN_PCMPESTRS128
,
27279 IX86_BUILTIN_PCMPESTRZ128
,
27280 IX86_BUILTIN_PCMPISTRI128
,
27281 IX86_BUILTIN_PCMPISTRM128
,
27282 IX86_BUILTIN_PCMPISTRA128
,
27283 IX86_BUILTIN_PCMPISTRC128
,
27284 IX86_BUILTIN_PCMPISTRO128
,
27285 IX86_BUILTIN_PCMPISTRS128
,
27286 IX86_BUILTIN_PCMPISTRZ128
,
27288 IX86_BUILTIN_PCMPGTQ
,
27290 /* AES instructions */
27291 IX86_BUILTIN_AESENC128
,
27292 IX86_BUILTIN_AESENCLAST128
,
27293 IX86_BUILTIN_AESDEC128
,
27294 IX86_BUILTIN_AESDECLAST128
,
27295 IX86_BUILTIN_AESIMC128
,
27296 IX86_BUILTIN_AESKEYGENASSIST128
,
27298 /* PCLMUL instruction */
27299 IX86_BUILTIN_PCLMULQDQ128
,
27302 IX86_BUILTIN_ADDPD256
,
27303 IX86_BUILTIN_ADDPS256
,
27304 IX86_BUILTIN_ADDSUBPD256
,
27305 IX86_BUILTIN_ADDSUBPS256
,
27306 IX86_BUILTIN_ANDPD256
,
27307 IX86_BUILTIN_ANDPS256
,
27308 IX86_BUILTIN_ANDNPD256
,
27309 IX86_BUILTIN_ANDNPS256
,
27310 IX86_BUILTIN_BLENDPD256
,
27311 IX86_BUILTIN_BLENDPS256
,
27312 IX86_BUILTIN_BLENDVPD256
,
27313 IX86_BUILTIN_BLENDVPS256
,
27314 IX86_BUILTIN_DIVPD256
,
27315 IX86_BUILTIN_DIVPS256
,
27316 IX86_BUILTIN_DPPS256
,
27317 IX86_BUILTIN_HADDPD256
,
27318 IX86_BUILTIN_HADDPS256
,
27319 IX86_BUILTIN_HSUBPD256
,
27320 IX86_BUILTIN_HSUBPS256
,
27321 IX86_BUILTIN_MAXPD256
,
27322 IX86_BUILTIN_MAXPS256
,
27323 IX86_BUILTIN_MINPD256
,
27324 IX86_BUILTIN_MINPS256
,
27325 IX86_BUILTIN_MULPD256
,
27326 IX86_BUILTIN_MULPS256
,
27327 IX86_BUILTIN_ORPD256
,
27328 IX86_BUILTIN_ORPS256
,
27329 IX86_BUILTIN_SHUFPD256
,
27330 IX86_BUILTIN_SHUFPS256
,
27331 IX86_BUILTIN_SUBPD256
,
27332 IX86_BUILTIN_SUBPS256
,
27333 IX86_BUILTIN_XORPD256
,
27334 IX86_BUILTIN_XORPS256
,
27335 IX86_BUILTIN_CMPSD
,
27336 IX86_BUILTIN_CMPSS
,
27337 IX86_BUILTIN_CMPPD
,
27338 IX86_BUILTIN_CMPPS
,
27339 IX86_BUILTIN_CMPPD256
,
27340 IX86_BUILTIN_CMPPS256
,
27341 IX86_BUILTIN_CVTDQ2PD256
,
27342 IX86_BUILTIN_CVTDQ2PS256
,
27343 IX86_BUILTIN_CVTPD2PS256
,
27344 IX86_BUILTIN_CVTPS2DQ256
,
27345 IX86_BUILTIN_CVTPS2PD256
,
27346 IX86_BUILTIN_CVTTPD2DQ256
,
27347 IX86_BUILTIN_CVTPD2DQ256
,
27348 IX86_BUILTIN_CVTTPS2DQ256
,
27349 IX86_BUILTIN_EXTRACTF128PD256
,
27350 IX86_BUILTIN_EXTRACTF128PS256
,
27351 IX86_BUILTIN_EXTRACTF128SI256
,
27352 IX86_BUILTIN_VZEROALL
,
27353 IX86_BUILTIN_VZEROUPPER
,
27354 IX86_BUILTIN_VPERMILVARPD
,
27355 IX86_BUILTIN_VPERMILVARPS
,
27356 IX86_BUILTIN_VPERMILVARPD256
,
27357 IX86_BUILTIN_VPERMILVARPS256
,
27358 IX86_BUILTIN_VPERMILPD
,
27359 IX86_BUILTIN_VPERMILPS
,
27360 IX86_BUILTIN_VPERMILPD256
,
27361 IX86_BUILTIN_VPERMILPS256
,
27362 IX86_BUILTIN_VPERMIL2PD
,
27363 IX86_BUILTIN_VPERMIL2PS
,
27364 IX86_BUILTIN_VPERMIL2PD256
,
27365 IX86_BUILTIN_VPERMIL2PS256
,
27366 IX86_BUILTIN_VPERM2F128PD256
,
27367 IX86_BUILTIN_VPERM2F128PS256
,
27368 IX86_BUILTIN_VPERM2F128SI256
,
27369 IX86_BUILTIN_VBROADCASTSS
,
27370 IX86_BUILTIN_VBROADCASTSD256
,
27371 IX86_BUILTIN_VBROADCASTSS256
,
27372 IX86_BUILTIN_VBROADCASTPD256
,
27373 IX86_BUILTIN_VBROADCASTPS256
,
27374 IX86_BUILTIN_VINSERTF128PD256
,
27375 IX86_BUILTIN_VINSERTF128PS256
,
27376 IX86_BUILTIN_VINSERTF128SI256
,
27377 IX86_BUILTIN_LOADUPD256
,
27378 IX86_BUILTIN_LOADUPS256
,
27379 IX86_BUILTIN_STOREUPD256
,
27380 IX86_BUILTIN_STOREUPS256
,
27381 IX86_BUILTIN_LDDQU256
,
27382 IX86_BUILTIN_MOVNTDQ256
,
27383 IX86_BUILTIN_MOVNTPD256
,
27384 IX86_BUILTIN_MOVNTPS256
,
27385 IX86_BUILTIN_LOADDQU256
,
27386 IX86_BUILTIN_STOREDQU256
,
27387 IX86_BUILTIN_MASKLOADPD
,
27388 IX86_BUILTIN_MASKLOADPS
,
27389 IX86_BUILTIN_MASKSTOREPD
,
27390 IX86_BUILTIN_MASKSTOREPS
,
27391 IX86_BUILTIN_MASKLOADPD256
,
27392 IX86_BUILTIN_MASKLOADPS256
,
27393 IX86_BUILTIN_MASKSTOREPD256
,
27394 IX86_BUILTIN_MASKSTOREPS256
,
27395 IX86_BUILTIN_MOVSHDUP256
,
27396 IX86_BUILTIN_MOVSLDUP256
,
27397 IX86_BUILTIN_MOVDDUP256
,
27399 IX86_BUILTIN_SQRTPD256
,
27400 IX86_BUILTIN_SQRTPS256
,
27401 IX86_BUILTIN_SQRTPS_NR256
,
27402 IX86_BUILTIN_RSQRTPS256
,
27403 IX86_BUILTIN_RSQRTPS_NR256
,
27405 IX86_BUILTIN_RCPPS256
,
27407 IX86_BUILTIN_ROUNDPD256
,
27408 IX86_BUILTIN_ROUNDPS256
,
27410 IX86_BUILTIN_FLOORPD256
,
27411 IX86_BUILTIN_CEILPD256
,
27412 IX86_BUILTIN_TRUNCPD256
,
27413 IX86_BUILTIN_RINTPD256
,
27414 IX86_BUILTIN_ROUNDPD_AZ256
,
27416 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
27417 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
27418 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
27420 IX86_BUILTIN_FLOORPS256
,
27421 IX86_BUILTIN_CEILPS256
,
27422 IX86_BUILTIN_TRUNCPS256
,
27423 IX86_BUILTIN_RINTPS256
,
27424 IX86_BUILTIN_ROUNDPS_AZ256
,
27426 IX86_BUILTIN_FLOORPS_SFIX256
,
27427 IX86_BUILTIN_CEILPS_SFIX256
,
27428 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
27430 IX86_BUILTIN_UNPCKHPD256
,
27431 IX86_BUILTIN_UNPCKLPD256
,
27432 IX86_BUILTIN_UNPCKHPS256
,
27433 IX86_BUILTIN_UNPCKLPS256
,
27435 IX86_BUILTIN_SI256_SI
,
27436 IX86_BUILTIN_PS256_PS
,
27437 IX86_BUILTIN_PD256_PD
,
27438 IX86_BUILTIN_SI_SI256
,
27439 IX86_BUILTIN_PS_PS256
,
27440 IX86_BUILTIN_PD_PD256
,
27442 IX86_BUILTIN_VTESTZPD
,
27443 IX86_BUILTIN_VTESTCPD
,
27444 IX86_BUILTIN_VTESTNZCPD
,
27445 IX86_BUILTIN_VTESTZPS
,
27446 IX86_BUILTIN_VTESTCPS
,
27447 IX86_BUILTIN_VTESTNZCPS
,
27448 IX86_BUILTIN_VTESTZPD256
,
27449 IX86_BUILTIN_VTESTCPD256
,
27450 IX86_BUILTIN_VTESTNZCPD256
,
27451 IX86_BUILTIN_VTESTZPS256
,
27452 IX86_BUILTIN_VTESTCPS256
,
27453 IX86_BUILTIN_VTESTNZCPS256
,
27454 IX86_BUILTIN_PTESTZ256
,
27455 IX86_BUILTIN_PTESTC256
,
27456 IX86_BUILTIN_PTESTNZC256
,
27458 IX86_BUILTIN_MOVMSKPD256
,
27459 IX86_BUILTIN_MOVMSKPS256
,
27462 IX86_BUILTIN_MPSADBW256
,
27463 IX86_BUILTIN_PABSB256
,
27464 IX86_BUILTIN_PABSW256
,
27465 IX86_BUILTIN_PABSD256
,
27466 IX86_BUILTIN_PACKSSDW256
,
27467 IX86_BUILTIN_PACKSSWB256
,
27468 IX86_BUILTIN_PACKUSDW256
,
27469 IX86_BUILTIN_PACKUSWB256
,
27470 IX86_BUILTIN_PADDB256
,
27471 IX86_BUILTIN_PADDW256
,
27472 IX86_BUILTIN_PADDD256
,
27473 IX86_BUILTIN_PADDQ256
,
27474 IX86_BUILTIN_PADDSB256
,
27475 IX86_BUILTIN_PADDSW256
,
27476 IX86_BUILTIN_PADDUSB256
,
27477 IX86_BUILTIN_PADDUSW256
,
27478 IX86_BUILTIN_PALIGNR256
,
27479 IX86_BUILTIN_AND256I
,
27480 IX86_BUILTIN_ANDNOT256I
,
27481 IX86_BUILTIN_PAVGB256
,
27482 IX86_BUILTIN_PAVGW256
,
27483 IX86_BUILTIN_PBLENDVB256
,
27484 IX86_BUILTIN_PBLENDVW256
,
27485 IX86_BUILTIN_PCMPEQB256
,
27486 IX86_BUILTIN_PCMPEQW256
,
27487 IX86_BUILTIN_PCMPEQD256
,
27488 IX86_BUILTIN_PCMPEQQ256
,
27489 IX86_BUILTIN_PCMPGTB256
,
27490 IX86_BUILTIN_PCMPGTW256
,
27491 IX86_BUILTIN_PCMPGTD256
,
27492 IX86_BUILTIN_PCMPGTQ256
,
27493 IX86_BUILTIN_PHADDW256
,
27494 IX86_BUILTIN_PHADDD256
,
27495 IX86_BUILTIN_PHADDSW256
,
27496 IX86_BUILTIN_PHSUBW256
,
27497 IX86_BUILTIN_PHSUBD256
,
27498 IX86_BUILTIN_PHSUBSW256
,
27499 IX86_BUILTIN_PMADDUBSW256
,
27500 IX86_BUILTIN_PMADDWD256
,
27501 IX86_BUILTIN_PMAXSB256
,
27502 IX86_BUILTIN_PMAXSW256
,
27503 IX86_BUILTIN_PMAXSD256
,
27504 IX86_BUILTIN_PMAXUB256
,
27505 IX86_BUILTIN_PMAXUW256
,
27506 IX86_BUILTIN_PMAXUD256
,
27507 IX86_BUILTIN_PMINSB256
,
27508 IX86_BUILTIN_PMINSW256
,
27509 IX86_BUILTIN_PMINSD256
,
27510 IX86_BUILTIN_PMINUB256
,
27511 IX86_BUILTIN_PMINUW256
,
27512 IX86_BUILTIN_PMINUD256
,
27513 IX86_BUILTIN_PMOVMSKB256
,
27514 IX86_BUILTIN_PMOVSXBW256
,
27515 IX86_BUILTIN_PMOVSXBD256
,
27516 IX86_BUILTIN_PMOVSXBQ256
,
27517 IX86_BUILTIN_PMOVSXWD256
,
27518 IX86_BUILTIN_PMOVSXWQ256
,
27519 IX86_BUILTIN_PMOVSXDQ256
,
27520 IX86_BUILTIN_PMOVZXBW256
,
27521 IX86_BUILTIN_PMOVZXBD256
,
27522 IX86_BUILTIN_PMOVZXBQ256
,
27523 IX86_BUILTIN_PMOVZXWD256
,
27524 IX86_BUILTIN_PMOVZXWQ256
,
27525 IX86_BUILTIN_PMOVZXDQ256
,
27526 IX86_BUILTIN_PMULDQ256
,
27527 IX86_BUILTIN_PMULHRSW256
,
27528 IX86_BUILTIN_PMULHUW256
,
27529 IX86_BUILTIN_PMULHW256
,
27530 IX86_BUILTIN_PMULLW256
,
27531 IX86_BUILTIN_PMULLD256
,
27532 IX86_BUILTIN_PMULUDQ256
,
27533 IX86_BUILTIN_POR256
,
27534 IX86_BUILTIN_PSADBW256
,
27535 IX86_BUILTIN_PSHUFB256
,
27536 IX86_BUILTIN_PSHUFD256
,
27537 IX86_BUILTIN_PSHUFHW256
,
27538 IX86_BUILTIN_PSHUFLW256
,
27539 IX86_BUILTIN_PSIGNB256
,
27540 IX86_BUILTIN_PSIGNW256
,
27541 IX86_BUILTIN_PSIGND256
,
27542 IX86_BUILTIN_PSLLDQI256
,
27543 IX86_BUILTIN_PSLLWI256
,
27544 IX86_BUILTIN_PSLLW256
,
27545 IX86_BUILTIN_PSLLDI256
,
27546 IX86_BUILTIN_PSLLD256
,
27547 IX86_BUILTIN_PSLLQI256
,
27548 IX86_BUILTIN_PSLLQ256
,
27549 IX86_BUILTIN_PSRAWI256
,
27550 IX86_BUILTIN_PSRAW256
,
27551 IX86_BUILTIN_PSRADI256
,
27552 IX86_BUILTIN_PSRAD256
,
27553 IX86_BUILTIN_PSRLDQI256
,
27554 IX86_BUILTIN_PSRLWI256
,
27555 IX86_BUILTIN_PSRLW256
,
27556 IX86_BUILTIN_PSRLDI256
,
27557 IX86_BUILTIN_PSRLD256
,
27558 IX86_BUILTIN_PSRLQI256
,
27559 IX86_BUILTIN_PSRLQ256
,
27560 IX86_BUILTIN_PSUBB256
,
27561 IX86_BUILTIN_PSUBW256
,
27562 IX86_BUILTIN_PSUBD256
,
27563 IX86_BUILTIN_PSUBQ256
,
27564 IX86_BUILTIN_PSUBSB256
,
27565 IX86_BUILTIN_PSUBSW256
,
27566 IX86_BUILTIN_PSUBUSB256
,
27567 IX86_BUILTIN_PSUBUSW256
,
27568 IX86_BUILTIN_PUNPCKHBW256
,
27569 IX86_BUILTIN_PUNPCKHWD256
,
27570 IX86_BUILTIN_PUNPCKHDQ256
,
27571 IX86_BUILTIN_PUNPCKHQDQ256
,
27572 IX86_BUILTIN_PUNPCKLBW256
,
27573 IX86_BUILTIN_PUNPCKLWD256
,
27574 IX86_BUILTIN_PUNPCKLDQ256
,
27575 IX86_BUILTIN_PUNPCKLQDQ256
,
27576 IX86_BUILTIN_PXOR256
,
27577 IX86_BUILTIN_MOVNTDQA256
,
27578 IX86_BUILTIN_VBROADCASTSS_PS
,
27579 IX86_BUILTIN_VBROADCASTSS_PS256
,
27580 IX86_BUILTIN_VBROADCASTSD_PD256
,
27581 IX86_BUILTIN_VBROADCASTSI256
,
27582 IX86_BUILTIN_PBLENDD256
,
27583 IX86_BUILTIN_PBLENDD128
,
27584 IX86_BUILTIN_PBROADCASTB256
,
27585 IX86_BUILTIN_PBROADCASTW256
,
27586 IX86_BUILTIN_PBROADCASTD256
,
27587 IX86_BUILTIN_PBROADCASTQ256
,
27588 IX86_BUILTIN_PBROADCASTB128
,
27589 IX86_BUILTIN_PBROADCASTW128
,
27590 IX86_BUILTIN_PBROADCASTD128
,
27591 IX86_BUILTIN_PBROADCASTQ128
,
27592 IX86_BUILTIN_VPERMVARSI256
,
27593 IX86_BUILTIN_VPERMDF256
,
27594 IX86_BUILTIN_VPERMVARSF256
,
27595 IX86_BUILTIN_VPERMDI256
,
27596 IX86_BUILTIN_VPERMTI256
,
27597 IX86_BUILTIN_VEXTRACT128I256
,
27598 IX86_BUILTIN_VINSERT128I256
,
27599 IX86_BUILTIN_MASKLOADD
,
27600 IX86_BUILTIN_MASKLOADQ
,
27601 IX86_BUILTIN_MASKLOADD256
,
27602 IX86_BUILTIN_MASKLOADQ256
,
27603 IX86_BUILTIN_MASKSTORED
,
27604 IX86_BUILTIN_MASKSTOREQ
,
27605 IX86_BUILTIN_MASKSTORED256
,
27606 IX86_BUILTIN_MASKSTOREQ256
,
27607 IX86_BUILTIN_PSLLVV4DI
,
27608 IX86_BUILTIN_PSLLVV2DI
,
27609 IX86_BUILTIN_PSLLVV8SI
,
27610 IX86_BUILTIN_PSLLVV4SI
,
27611 IX86_BUILTIN_PSRAVV8SI
,
27612 IX86_BUILTIN_PSRAVV4SI
,
27613 IX86_BUILTIN_PSRLVV4DI
,
27614 IX86_BUILTIN_PSRLVV2DI
,
27615 IX86_BUILTIN_PSRLVV8SI
,
27616 IX86_BUILTIN_PSRLVV4SI
,
27618 IX86_BUILTIN_GATHERSIV2DF
,
27619 IX86_BUILTIN_GATHERSIV4DF
,
27620 IX86_BUILTIN_GATHERDIV2DF
,
27621 IX86_BUILTIN_GATHERDIV4DF
,
27622 IX86_BUILTIN_GATHERSIV4SF
,
27623 IX86_BUILTIN_GATHERSIV8SF
,
27624 IX86_BUILTIN_GATHERDIV4SF
,
27625 IX86_BUILTIN_GATHERDIV8SF
,
27626 IX86_BUILTIN_GATHERSIV2DI
,
27627 IX86_BUILTIN_GATHERSIV4DI
,
27628 IX86_BUILTIN_GATHERDIV2DI
,
27629 IX86_BUILTIN_GATHERDIV4DI
,
27630 IX86_BUILTIN_GATHERSIV4SI
,
27631 IX86_BUILTIN_GATHERSIV8SI
,
27632 IX86_BUILTIN_GATHERDIV4SI
,
27633 IX86_BUILTIN_GATHERDIV8SI
,
27635 /* Alternate 4 element gather for the vectorizer where
27636 all operands are 32-byte wide. */
27637 IX86_BUILTIN_GATHERALTSIV4DF
,
27638 IX86_BUILTIN_GATHERALTDIV8SF
,
27639 IX86_BUILTIN_GATHERALTSIV4DI
,
27640 IX86_BUILTIN_GATHERALTDIV8SI
,
27642 /* TFmode support builtins. */
27644 IX86_BUILTIN_HUGE_VALQ
,
27645 IX86_BUILTIN_FABSQ
,
27646 IX86_BUILTIN_COPYSIGNQ
,
27648 /* Vectorizer support builtins. */
27649 IX86_BUILTIN_CPYSGNPS
,
27650 IX86_BUILTIN_CPYSGNPD
,
27651 IX86_BUILTIN_CPYSGNPS256
,
27652 IX86_BUILTIN_CPYSGNPD256
,
27654 /* FMA4 instructions. */
27655 IX86_BUILTIN_VFMADDSS
,
27656 IX86_BUILTIN_VFMADDSD
,
27657 IX86_BUILTIN_VFMADDPS
,
27658 IX86_BUILTIN_VFMADDPD
,
27659 IX86_BUILTIN_VFMADDPS256
,
27660 IX86_BUILTIN_VFMADDPD256
,
27661 IX86_BUILTIN_VFMADDSUBPS
,
27662 IX86_BUILTIN_VFMADDSUBPD
,
27663 IX86_BUILTIN_VFMADDSUBPS256
,
27664 IX86_BUILTIN_VFMADDSUBPD256
,
27666 /* FMA3 instructions. */
27667 IX86_BUILTIN_VFMADDSS3
,
27668 IX86_BUILTIN_VFMADDSD3
,
27670 /* XOP instructions. */
27671 IX86_BUILTIN_VPCMOV
,
27672 IX86_BUILTIN_VPCMOV_V2DI
,
27673 IX86_BUILTIN_VPCMOV_V4SI
,
27674 IX86_BUILTIN_VPCMOV_V8HI
,
27675 IX86_BUILTIN_VPCMOV_V16QI
,
27676 IX86_BUILTIN_VPCMOV_V4SF
,
27677 IX86_BUILTIN_VPCMOV_V2DF
,
27678 IX86_BUILTIN_VPCMOV256
,
27679 IX86_BUILTIN_VPCMOV_V4DI256
,
27680 IX86_BUILTIN_VPCMOV_V8SI256
,
27681 IX86_BUILTIN_VPCMOV_V16HI256
,
27682 IX86_BUILTIN_VPCMOV_V32QI256
,
27683 IX86_BUILTIN_VPCMOV_V8SF256
,
27684 IX86_BUILTIN_VPCMOV_V4DF256
,
27686 IX86_BUILTIN_VPPERM
,
27688 IX86_BUILTIN_VPMACSSWW
,
27689 IX86_BUILTIN_VPMACSWW
,
27690 IX86_BUILTIN_VPMACSSWD
,
27691 IX86_BUILTIN_VPMACSWD
,
27692 IX86_BUILTIN_VPMACSSDD
,
27693 IX86_BUILTIN_VPMACSDD
,
27694 IX86_BUILTIN_VPMACSSDQL
,
27695 IX86_BUILTIN_VPMACSSDQH
,
27696 IX86_BUILTIN_VPMACSDQL
,
27697 IX86_BUILTIN_VPMACSDQH
,
27698 IX86_BUILTIN_VPMADCSSWD
,
27699 IX86_BUILTIN_VPMADCSWD
,
27701 IX86_BUILTIN_VPHADDBW
,
27702 IX86_BUILTIN_VPHADDBD
,
27703 IX86_BUILTIN_VPHADDBQ
,
27704 IX86_BUILTIN_VPHADDWD
,
27705 IX86_BUILTIN_VPHADDWQ
,
27706 IX86_BUILTIN_VPHADDDQ
,
27707 IX86_BUILTIN_VPHADDUBW
,
27708 IX86_BUILTIN_VPHADDUBD
,
27709 IX86_BUILTIN_VPHADDUBQ
,
27710 IX86_BUILTIN_VPHADDUWD
,
27711 IX86_BUILTIN_VPHADDUWQ
,
27712 IX86_BUILTIN_VPHADDUDQ
,
27713 IX86_BUILTIN_VPHSUBBW
,
27714 IX86_BUILTIN_VPHSUBWD
,
27715 IX86_BUILTIN_VPHSUBDQ
,
27717 IX86_BUILTIN_VPROTB
,
27718 IX86_BUILTIN_VPROTW
,
27719 IX86_BUILTIN_VPROTD
,
27720 IX86_BUILTIN_VPROTQ
,
27721 IX86_BUILTIN_VPROTB_IMM
,
27722 IX86_BUILTIN_VPROTW_IMM
,
27723 IX86_BUILTIN_VPROTD_IMM
,
27724 IX86_BUILTIN_VPROTQ_IMM
,
27726 IX86_BUILTIN_VPSHLB
,
27727 IX86_BUILTIN_VPSHLW
,
27728 IX86_BUILTIN_VPSHLD
,
27729 IX86_BUILTIN_VPSHLQ
,
27730 IX86_BUILTIN_VPSHAB
,
27731 IX86_BUILTIN_VPSHAW
,
27732 IX86_BUILTIN_VPSHAD
,
27733 IX86_BUILTIN_VPSHAQ
,
27735 IX86_BUILTIN_VFRCZSS
,
27736 IX86_BUILTIN_VFRCZSD
,
27737 IX86_BUILTIN_VFRCZPS
,
27738 IX86_BUILTIN_VFRCZPD
,
27739 IX86_BUILTIN_VFRCZPS256
,
27740 IX86_BUILTIN_VFRCZPD256
,
27742 IX86_BUILTIN_VPCOMEQUB
,
27743 IX86_BUILTIN_VPCOMNEUB
,
27744 IX86_BUILTIN_VPCOMLTUB
,
27745 IX86_BUILTIN_VPCOMLEUB
,
27746 IX86_BUILTIN_VPCOMGTUB
,
27747 IX86_BUILTIN_VPCOMGEUB
,
27748 IX86_BUILTIN_VPCOMFALSEUB
,
27749 IX86_BUILTIN_VPCOMTRUEUB
,
27751 IX86_BUILTIN_VPCOMEQUW
,
27752 IX86_BUILTIN_VPCOMNEUW
,
27753 IX86_BUILTIN_VPCOMLTUW
,
27754 IX86_BUILTIN_VPCOMLEUW
,
27755 IX86_BUILTIN_VPCOMGTUW
,
27756 IX86_BUILTIN_VPCOMGEUW
,
27757 IX86_BUILTIN_VPCOMFALSEUW
,
27758 IX86_BUILTIN_VPCOMTRUEUW
,
27760 IX86_BUILTIN_VPCOMEQUD
,
27761 IX86_BUILTIN_VPCOMNEUD
,
27762 IX86_BUILTIN_VPCOMLTUD
,
27763 IX86_BUILTIN_VPCOMLEUD
,
27764 IX86_BUILTIN_VPCOMGTUD
,
27765 IX86_BUILTIN_VPCOMGEUD
,
27766 IX86_BUILTIN_VPCOMFALSEUD
,
27767 IX86_BUILTIN_VPCOMTRUEUD
,
27769 IX86_BUILTIN_VPCOMEQUQ
,
27770 IX86_BUILTIN_VPCOMNEUQ
,
27771 IX86_BUILTIN_VPCOMLTUQ
,
27772 IX86_BUILTIN_VPCOMLEUQ
,
27773 IX86_BUILTIN_VPCOMGTUQ
,
27774 IX86_BUILTIN_VPCOMGEUQ
,
27775 IX86_BUILTIN_VPCOMFALSEUQ
,
27776 IX86_BUILTIN_VPCOMTRUEUQ
,
27778 IX86_BUILTIN_VPCOMEQB
,
27779 IX86_BUILTIN_VPCOMNEB
,
27780 IX86_BUILTIN_VPCOMLTB
,
27781 IX86_BUILTIN_VPCOMLEB
,
27782 IX86_BUILTIN_VPCOMGTB
,
27783 IX86_BUILTIN_VPCOMGEB
,
27784 IX86_BUILTIN_VPCOMFALSEB
,
27785 IX86_BUILTIN_VPCOMTRUEB
,
27787 IX86_BUILTIN_VPCOMEQW
,
27788 IX86_BUILTIN_VPCOMNEW
,
27789 IX86_BUILTIN_VPCOMLTW
,
27790 IX86_BUILTIN_VPCOMLEW
,
27791 IX86_BUILTIN_VPCOMGTW
,
27792 IX86_BUILTIN_VPCOMGEW
,
27793 IX86_BUILTIN_VPCOMFALSEW
,
27794 IX86_BUILTIN_VPCOMTRUEW
,
27796 IX86_BUILTIN_VPCOMEQD
,
27797 IX86_BUILTIN_VPCOMNED
,
27798 IX86_BUILTIN_VPCOMLTD
,
27799 IX86_BUILTIN_VPCOMLED
,
27800 IX86_BUILTIN_VPCOMGTD
,
27801 IX86_BUILTIN_VPCOMGED
,
27802 IX86_BUILTIN_VPCOMFALSED
,
27803 IX86_BUILTIN_VPCOMTRUED
,
27805 IX86_BUILTIN_VPCOMEQQ
,
27806 IX86_BUILTIN_VPCOMNEQ
,
27807 IX86_BUILTIN_VPCOMLTQ
,
27808 IX86_BUILTIN_VPCOMLEQ
,
27809 IX86_BUILTIN_VPCOMGTQ
,
27810 IX86_BUILTIN_VPCOMGEQ
,
27811 IX86_BUILTIN_VPCOMFALSEQ
,
27812 IX86_BUILTIN_VPCOMTRUEQ
,
27814 /* LWP instructions. */
27815 IX86_BUILTIN_LLWPCB
,
27816 IX86_BUILTIN_SLWPCB
,
27817 IX86_BUILTIN_LWPVAL32
,
27818 IX86_BUILTIN_LWPVAL64
,
27819 IX86_BUILTIN_LWPINS32
,
27820 IX86_BUILTIN_LWPINS64
,
27825 IX86_BUILTIN_XBEGIN
,
27827 IX86_BUILTIN_XABORT
,
27828 IX86_BUILTIN_XTEST
,
27830 /* BMI instructions. */
27831 IX86_BUILTIN_BEXTR32
,
27832 IX86_BUILTIN_BEXTR64
,
27835 /* TBM instructions. */
27836 IX86_BUILTIN_BEXTRI32
,
27837 IX86_BUILTIN_BEXTRI64
,
27839 /* BMI2 instructions. */
27840 IX86_BUILTIN_BZHI32
,
27841 IX86_BUILTIN_BZHI64
,
27842 IX86_BUILTIN_PDEP32
,
27843 IX86_BUILTIN_PDEP64
,
27844 IX86_BUILTIN_PEXT32
,
27845 IX86_BUILTIN_PEXT64
,
27847 /* ADX instructions. */
27848 IX86_BUILTIN_ADDCARRYX32
,
27849 IX86_BUILTIN_ADDCARRYX64
,
27851 /* FSGSBASE instructions. */
27852 IX86_BUILTIN_RDFSBASE32
,
27853 IX86_BUILTIN_RDFSBASE64
,
27854 IX86_BUILTIN_RDGSBASE32
,
27855 IX86_BUILTIN_RDGSBASE64
,
27856 IX86_BUILTIN_WRFSBASE32
,
27857 IX86_BUILTIN_WRFSBASE64
,
27858 IX86_BUILTIN_WRGSBASE32
,
27859 IX86_BUILTIN_WRGSBASE64
,
27861 /* RDRND instructions. */
27862 IX86_BUILTIN_RDRAND16_STEP
,
27863 IX86_BUILTIN_RDRAND32_STEP
,
27864 IX86_BUILTIN_RDRAND64_STEP
,
27866 /* RDSEED instructions. */
27867 IX86_BUILTIN_RDSEED16_STEP
,
27868 IX86_BUILTIN_RDSEED32_STEP
,
27869 IX86_BUILTIN_RDSEED64_STEP
,
27871 /* F16C instructions. */
27872 IX86_BUILTIN_CVTPH2PS
,
27873 IX86_BUILTIN_CVTPH2PS256
,
27874 IX86_BUILTIN_CVTPS2PH
,
27875 IX86_BUILTIN_CVTPS2PH256
,
27877 /* CFString built-in for darwin */
27878 IX86_BUILTIN_CFSTRING
,
27880 /* Builtins to get CPU type and supported features. */
27881 IX86_BUILTIN_CPU_INIT
,
27882 IX86_BUILTIN_CPU_IS
,
27883 IX86_BUILTIN_CPU_SUPPORTS
,
27888 /* Table for the ix86 builtin decls. */
27889 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27891 /* Table of all of the builtin functions that are possible with different ISA's
27892 but are waiting to be built until a function is declared to use that
27894 struct builtin_isa
{
27895 const char *name
; /* function name */
27896 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27897 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27898 bool const_p
; /* true if the declaration is constant */
27899 bool set_and_not_built_p
;
27902 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27905 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27906 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27907 function decl in the ix86_builtins array. Returns the function decl or
27908 NULL_TREE, if the builtin was not added.
27910 If the front end has a special hook for builtin functions, delay adding
27911 builtin functions that aren't in the current ISA until the ISA is changed
27912 with function specific optimization. Doing so, can save about 300K for the
27913 default compiler. When the builtin is expanded, check at that time whether
27916 If the front end doesn't have a special hook, record all builtins, even if
27917 it isn't an instruction set in the current ISA in case the user uses
27918 function specific options for a different ISA, so that we don't get scope
27919 errors if a builtin is added in the middle of a function scope. */
27922 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27923 enum ix86_builtin_func_type tcode
,
27924 enum ix86_builtins code
)
27926 tree decl
= NULL_TREE
;
27928 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27930 ix86_builtins_isa
[(int) code
].isa
= mask
;
27932 mask
&= ~OPTION_MASK_ISA_64BIT
;
27934 || (mask
& ix86_isa_flags
) != 0
27935 || (lang_hooks
.builtin_function
27936 == lang_hooks
.builtin_function_ext_scope
))
27939 tree type
= ix86_get_builtin_func_type (tcode
);
27940 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27942 ix86_builtins
[(int) code
] = decl
;
27943 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27947 ix86_builtins
[(int) code
] = NULL_TREE
;
27948 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27949 ix86_builtins_isa
[(int) code
].name
= name
;
27950 ix86_builtins_isa
[(int) code
].const_p
= false;
27951 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27958 /* Like def_builtin, but also marks the function decl "const". */
27961 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27962 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27964 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27966 TREE_READONLY (decl
) = 1;
27968 ix86_builtins_isa
[(int) code
].const_p
= true;
27973 /* Add any new builtin functions for a given ISA that may not have been
27974 declared. This saves a bit of space compared to adding all of the
27975 declarations to the tree, even if we didn't use them. */
27978 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27982 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27984 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27985 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27989 /* Don't define the builtin again. */
27990 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27992 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27993 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27994 type
, i
, BUILT_IN_MD
, NULL
,
27997 ix86_builtins
[i
] = decl
;
27998 if (ix86_builtins_isa
[i
].const_p
)
27999 TREE_READONLY (decl
) = 1;
28004 /* Bits for builtin_description.flag. */
28006 /* Set when we don't support the comparison natively, and should
28007 swap_comparison in order to support it. */
28008 #define BUILTIN_DESC_SWAP_OPERANDS 1
28010 struct builtin_description
28012 const HOST_WIDE_INT mask
;
28013 const enum insn_code icode
;
28014 const char *const name
;
28015 const enum ix86_builtins code
;
28016 const enum rtx_code comparison
;
28020 static const struct builtin_description bdesc_comi
[] =
28022 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
28023 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
28024 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
28025 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
28026 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
28027 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
28028 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
28029 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
28030 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
28031 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
28032 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
28033 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
28034 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
28035 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
28036 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
28037 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
28038 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
28039 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
28040 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
28041 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
28042 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
28043 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
28044 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
28045 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
28048 static const struct builtin_description bdesc_pcmpestr
[] =
28051 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
28052 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
28053 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
28054 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
28055 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
28056 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
28057 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
28060 static const struct builtin_description bdesc_pcmpistr
[] =
28063 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
28064 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
28065 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
28066 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
28067 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
28068 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
28069 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
28072 /* Special builtins with variable number of arguments. */
28073 static const struct builtin_description bdesc_special_args
[] =
28075 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28076 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
28077 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28079 /* 80387 (for use internally for atomic compound assignment). */
28080 { 0, CODE_FOR_fnstenv
, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28081 { 0, CODE_FOR_fldenv
, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV
, UNKNOWN
, (int) VOID_FTYPE_PCVOID
},
28082 { 0, CODE_FOR_fnstsw
, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW
, UNKNOWN
, (int) VOID_FTYPE_PUSHORT
},
28083 { 0, CODE_FOR_fnclex
, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28086 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28089 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28091 /* FXSR, XSAVE and XSAVEOPT */
28092 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28093 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28094 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28095 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28096 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28098 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28099 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28100 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28101 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28102 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28105 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28106 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
28109 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
28110 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
28111 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
28112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
28114 /* SSE or 3DNow!A */
28115 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28116 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
28119 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28120 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28121 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28122 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedquv16qi
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
28123 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28124 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
28125 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
28126 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
28127 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
28128 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddquv16qi
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
28130 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
28131 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
28134 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
28137 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
28140 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28141 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28144 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28145 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28147 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
28148 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28149 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28150 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
28151 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
28153 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28154 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28155 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28156 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28157 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddquv32qi
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28158 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedquv32qi
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
28159 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28161 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
28162 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28163 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28165 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
28166 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
28167 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
28168 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
28169 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
28170 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
28171 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
28172 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
28175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
28176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
28177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
28178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
28179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
28180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
28181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
28182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
28183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
28185 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28186 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
28187 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
28188 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
28189 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
28190 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
28193 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28194 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28195 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28196 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28197 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28198 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28199 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28200 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28203 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28204 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28205 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
28208 /* Builtins with variable number of arguments. */
28209 static const struct builtin_description bdesc_args
[] =
28211 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
28212 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
28213 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
28214 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28215 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28216 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28217 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28220 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28221 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28222 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28223 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28224 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28225 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28227 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28228 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28229 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28230 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28231 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28232 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28233 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28234 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28236 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28237 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28239 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28240 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28241 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28242 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28244 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28245 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28246 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28247 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28248 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28249 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28251 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28252 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28253 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28254 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28255 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28256 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28258 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28259 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
28260 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28262 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
28264 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28265 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28266 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28267 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28268 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28269 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28271 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28272 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28273 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28274 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28275 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28276 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28278 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28279 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28280 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28281 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28284 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28285 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28286 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28287 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28289 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28290 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28291 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28292 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28293 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28294 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28295 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28296 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28297 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28298 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28299 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28300 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28301 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28302 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28303 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28306 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28307 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28308 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28309 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28310 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28311 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28314 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28315 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28316 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28317 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28318 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28319 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28320 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28321 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28322 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28323 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28324 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28325 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28327 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28329 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28330 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28331 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28332 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28333 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28334 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28335 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28336 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28338 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28339 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28340 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28341 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28342 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28343 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28344 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28345 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28346 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28347 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28348 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28349 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28350 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28351 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28352 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28353 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28354 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28355 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28356 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28357 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28359 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28360 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28361 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28362 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28364 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28365 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28366 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28367 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28369 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28371 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28372 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28373 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28374 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28375 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28377 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
28378 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
28379 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
28381 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
28383 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28384 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28385 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28387 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
28388 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
28390 /* SSE MMX or 3Dnow!A */
28391 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28392 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28393 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28395 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28396 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28397 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28398 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28400 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
28401 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
28403 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
28406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28408 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
28410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28411 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
28412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
28414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
28417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
28422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28424 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28425 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_fix_notruncv4sfv4si
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
28429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28432 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28437 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28438 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28440 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28442 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28443 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28444 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28445 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28447 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28450 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28455 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28456 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28458 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28459 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28461 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28462 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28463 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28467 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28468 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28469 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28471 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28473 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28474 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28475 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28479 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28481 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28482 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28483 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28484 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28486 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28489 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28490 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28491 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28492 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28497 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
28500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28501 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28502 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28509 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28510 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28512 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28515 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28516 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28517 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28518 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28522 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28523 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28524 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28525 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28526 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28527 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28529 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28530 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28531 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28533 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28534 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
28536 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
28537 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28539 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
28541 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
28542 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
28543 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
28544 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
28546 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28547 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28548 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28549 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28550 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28551 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28552 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28554 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28555 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28556 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28557 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28558 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28559 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28560 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28562 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28563 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28564 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28565 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28567 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
28568 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28569 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28571 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
28573 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28576 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28577 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28580 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28581 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28583 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28584 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28585 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28586 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28587 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28588 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28591 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28592 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
28593 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28594 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
28595 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28596 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28598 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28599 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28600 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28601 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28602 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28603 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28604 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28605 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28606 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28607 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28608 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28609 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28610 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
28611 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
28612 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28613 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28614 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28615 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28616 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28617 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28618 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28619 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28620 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28621 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28624 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
28625 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
28628 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28629 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28630 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
28631 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
28632 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28633 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28634 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28635 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
28636 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
28637 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
28639 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28640 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28641 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28642 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28643 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28644 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28645 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28646 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28647 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28648 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28649 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28650 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28651 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28653 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28654 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28655 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28656 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28657 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28658 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28659 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28660 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28661 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28662 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28663 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28664 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28667 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28668 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28669 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28670 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28672 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28673 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
28674 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
28675 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28677 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28678 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28680 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28681 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28683 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28684 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
28685 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
28686 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28688 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
28689 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
28691 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28692 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28694 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28695 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28696 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28699 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28700 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
28701 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
28702 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28703 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28706 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
28707 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
28708 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
28709 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
28713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28721 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
28724 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28725 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28726 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28727 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28728 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28729 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28730 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28731 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28732 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28733 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28734 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28735 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28736 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28737 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28738 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28739 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28740 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28741 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28742 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28743 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28744 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28745 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28746 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28747 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28748 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28749 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28751 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28752 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28753 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28754 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28756 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28757 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28758 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28759 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28760 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28761 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28762 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28763 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28764 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28765 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28766 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28767 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28768 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28769 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28770 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28771 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28772 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28773 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28774 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28775 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_fix_notruncv8sfv8si
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28776 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28777 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28778 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28779 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28780 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28781 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28782 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28783 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28784 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28785 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28786 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28787 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28788 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28789 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28791 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28792 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28793 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28795 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28796 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28797 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28798 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28799 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28801 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28803 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28804 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28806 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28807 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28808 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28809 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28811 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28812 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28814 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28815 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28817 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28818 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28819 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28820 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28822 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28823 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28825 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28826 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28828 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28829 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28830 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28831 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28833 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28834 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28835 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28836 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28837 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28838 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28840 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28841 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28842 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28843 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28844 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28845 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28846 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28847 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28848 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28849 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28850 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28851 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28852 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28853 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28854 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28856 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28857 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28859 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28860 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28862 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28865 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28866 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28867 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28868 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28869 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28870 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28871 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28872 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28873 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28874 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28875 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28876 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28877 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28878 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28879 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28880 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28881 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28882 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28883 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28884 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28885 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28886 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28887 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28888 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28889 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28890 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28891 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28892 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28893 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28894 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28895 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28896 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28897 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28898 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28899 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28900 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28901 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28902 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28903 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28904 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28905 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28906 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28907 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28908 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28911 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28912 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28913 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28914 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28915 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28916 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28917 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28918 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28919 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28920 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28921 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28922 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28923 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28924 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28925 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28926 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28927 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28928 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28929 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28930 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28931 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28932 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28933 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28934 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28935 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28936 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28937 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28938 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28939 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28940 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28941 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28954 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28955 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28956 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28957 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28958 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28959 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28964 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28965 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28966 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28967 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28968 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28969 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28970 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28971 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28972 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28973 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28974 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28975 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28976 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28977 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28978 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28979 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28980 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28981 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28982 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28983 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28984 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28985 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28986 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28987 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28988 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28989 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28990 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28991 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28992 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28993 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28994 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28995 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28996 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28997 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28998 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28999 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
29000 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
29001 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29002 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29003 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29004 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29005 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29006 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29007 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29008 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29009 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29010 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29012 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
29015 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29016 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29017 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
29020 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29021 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29024 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
29025 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
29026 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
29027 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
29030 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29031 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29032 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29033 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29034 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29035 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29038 /* FMA4 and XOP. */
29039 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
29040 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
29041 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
29042 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
29043 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
29044 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
29045 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
29046 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
29047 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
29048 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
29049 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
29050 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
29051 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
29052 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
29053 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
29054 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
29055 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
29056 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
29057 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
29058 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
29059 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
29060 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
29061 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
29062 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
29063 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
29064 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
29065 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
29066 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
29067 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
29068 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
29069 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
29070 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
29071 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
29072 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
29073 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
29074 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
29075 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
29076 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
29077 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
29078 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
29079 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
29080 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
29081 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
29082 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
29083 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
29084 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
29085 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
29086 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
29087 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
29088 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
29089 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
29090 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
29092 static const struct builtin_description bdesc_multi_arg
[] =
29094 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
29095 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
29096 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29097 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
29098 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
29099 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29101 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
29102 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
29103 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29104 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
29105 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
29106 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29108 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
29109 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
29110 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29111 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
29112 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
29113 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29114 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
29115 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
29116 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29117 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
29118 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
29119 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29121 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
29122 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
29123 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29124 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
29125 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
29126 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29127 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
29128 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
29129 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29130 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
29131 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
29132 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29134 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
29135 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
29136 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29137 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29138 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
29139 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
29140 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
29142 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
29143 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
29144 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
29145 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
29146 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
29147 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29148 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29150 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
29152 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29153 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29154 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29155 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29156 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29157 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29158 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29159 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29160 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29161 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29162 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29163 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29165 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29166 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29167 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29168 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29169 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
29170 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
29171 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
29172 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
29173 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29174 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29175 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29176 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29177 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29178 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29179 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29180 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29182 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
29183 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
29184 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
29185 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
29186 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
29187 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
29189 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29190 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29191 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29192 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29193 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29194 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29195 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29196 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29197 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29198 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29199 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29200 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29201 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29202 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29203 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29205 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29206 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29207 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29208 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
29209 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
29210 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
29211 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
29213 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29214 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29215 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29216 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
29217 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
29218 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
29219 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
29221 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29222 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29223 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29224 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
29225 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
29226 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
29227 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
29229 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29230 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29231 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29232 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
29233 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
29234 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
29235 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
29237 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29238 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29239 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29240 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
29241 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
29242 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
29243 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
29245 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29246 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29247 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29248 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
29249 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
29250 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
29251 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
29253 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29254 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29255 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29256 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
29257 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
29258 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
29259 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
29261 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29262 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29263 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29264 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
29265 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
29266 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
29267 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
29269 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29270 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29271 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29272 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29273 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29274 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29275 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29276 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29278 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29279 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29280 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29281 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29282 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29283 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29284 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29285 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29287 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
29288 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
29289 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
29290 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
29294 /* TM vector builtins. */
29296 /* Reuse the existing x86-specific `struct builtin_description' cause
29297 we're lazy. Add casts to make them fit. */
29298 static const struct builtin_description bdesc_tm
[] =
29300 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29301 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29302 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29303 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29304 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29305 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29306 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29308 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29309 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29310 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29311 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29312 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29313 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29314 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29316 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29317 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29318 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29319 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29320 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29321 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29322 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29324 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29325 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29326 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29329 /* TM callbacks. */
29331 /* Return the builtin decl needed to load a vector of TYPE. */
29334 ix86_builtin_tm_load (tree type
)
29336 if (TREE_CODE (type
) == VECTOR_TYPE
)
29338 switch (tree_to_uhwi (TYPE_SIZE (type
)))
29341 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
29343 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
29345 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
29351 /* Return the builtin decl needed to store a vector of TYPE. */
29354 ix86_builtin_tm_store (tree type
)
29356 if (TREE_CODE (type
) == VECTOR_TYPE
)
29358 switch (tree_to_uhwi (TYPE_SIZE (type
)))
29361 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
29363 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
29365 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
29371 /* Initialize the transactional memory vector load/store builtins. */
29374 ix86_init_tm_builtins (void)
29376 enum ix86_builtin_func_type ftype
;
29377 const struct builtin_description
*d
;
29380 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
29381 tree attrs_log
, attrs_type_log
;
29386 /* If there are no builtins defined, we must be compiling in a
29387 language without trans-mem support. */
29388 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
29391 /* Use whatever attributes a normal TM load has. */
29392 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
29393 attrs_load
= DECL_ATTRIBUTES (decl
);
29394 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29395 /* Use whatever attributes a normal TM store has. */
29396 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
29397 attrs_store
= DECL_ATTRIBUTES (decl
);
29398 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29399 /* Use whatever attributes a normal TM log has. */
29400 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
29401 attrs_log
= DECL_ATTRIBUTES (decl
);
29402 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29404 for (i
= 0, d
= bdesc_tm
;
29405 i
< ARRAY_SIZE (bdesc_tm
);
29408 if ((d
->mask
& ix86_isa_flags
) != 0
29409 || (lang_hooks
.builtin_function
29410 == lang_hooks
.builtin_function_ext_scope
))
29412 tree type
, attrs
, attrs_type
;
29413 enum built_in_function code
= (enum built_in_function
) d
->code
;
29415 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29416 type
= ix86_get_builtin_func_type (ftype
);
29418 if (BUILTIN_TM_LOAD_P (code
))
29420 attrs
= attrs_load
;
29421 attrs_type
= attrs_type_load
;
29423 else if (BUILTIN_TM_STORE_P (code
))
29425 attrs
= attrs_store
;
29426 attrs_type
= attrs_type_store
;
29431 attrs_type
= attrs_type_log
;
29433 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
29434 /* The builtin without the prefix for
29435 calling it directly. */
29436 d
->name
+ strlen ("__builtin_"),
29438 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
29439 set the TYPE_ATTRIBUTES. */
29440 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
29442 set_builtin_decl (code
, decl
, false);
29447 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
29448 in the current target ISA to allow the user to compile particular modules
29449 with different target specific options that differ from the command line
29452 ix86_init_mmx_sse_builtins (void)
29454 const struct builtin_description
* d
;
29455 enum ix86_builtin_func_type ftype
;
29458 /* Add all special builtins with variable number of operands. */
29459 for (i
= 0, d
= bdesc_special_args
;
29460 i
< ARRAY_SIZE (bdesc_special_args
);
29466 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29467 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
29470 /* Add all builtins with variable number of operands. */
29471 for (i
= 0, d
= bdesc_args
;
29472 i
< ARRAY_SIZE (bdesc_args
);
29478 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29479 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29482 /* pcmpestr[im] insns. */
29483 for (i
= 0, d
= bdesc_pcmpestr
;
29484 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29487 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29488 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
29490 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
29491 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29494 /* pcmpistr[im] insns. */
29495 for (i
= 0, d
= bdesc_pcmpistr
;
29496 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29499 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29500 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
29502 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
29503 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29506 /* comi/ucomi insns. */
29507 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29509 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
29510 ftype
= INT_FTYPE_V2DF_V2DF
;
29512 ftype
= INT_FTYPE_V4SF_V4SF
;
29513 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29517 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
29518 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
29519 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
29520 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
29522 /* SSE or 3DNow!A */
29523 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29524 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
29525 IX86_BUILTIN_MASKMOVQ
);
29528 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
29529 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
29531 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
29532 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
29533 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
29534 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
29537 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
29538 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
29539 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
29540 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
29543 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
29544 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
29545 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
29546 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
29547 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
29548 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
29549 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
29550 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
29551 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
29552 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
29553 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
29554 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
29557 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
29558 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
29561 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
29562 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
29563 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
29564 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
29565 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
29566 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
29567 IX86_BUILTIN_RDRAND64_STEP
);
29570 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
29571 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
29572 IX86_BUILTIN_GATHERSIV2DF
);
29574 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
29575 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
29576 IX86_BUILTIN_GATHERSIV4DF
);
29578 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
29579 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
29580 IX86_BUILTIN_GATHERDIV2DF
);
29582 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
29583 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
29584 IX86_BUILTIN_GATHERDIV4DF
);
29586 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
29587 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
29588 IX86_BUILTIN_GATHERSIV4SF
);
29590 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
29591 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
29592 IX86_BUILTIN_GATHERSIV8SF
);
29594 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
29595 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
29596 IX86_BUILTIN_GATHERDIV4SF
);
29598 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
29599 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
29600 IX86_BUILTIN_GATHERDIV8SF
);
29602 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
29603 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
29604 IX86_BUILTIN_GATHERSIV2DI
);
29606 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
29607 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
29608 IX86_BUILTIN_GATHERSIV4DI
);
29610 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
29611 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
29612 IX86_BUILTIN_GATHERDIV2DI
);
29614 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
29615 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
29616 IX86_BUILTIN_GATHERDIV4DI
);
29618 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
29619 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
29620 IX86_BUILTIN_GATHERSIV4SI
);
29622 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
29623 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
29624 IX86_BUILTIN_GATHERSIV8SI
);
29626 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
29627 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
29628 IX86_BUILTIN_GATHERDIV4SI
);
29630 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
29631 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
29632 IX86_BUILTIN_GATHERDIV8SI
);
29634 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
29635 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
29636 IX86_BUILTIN_GATHERALTSIV4DF
);
29638 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
29639 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
29640 IX86_BUILTIN_GATHERALTDIV8SF
);
29642 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
29643 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
29644 IX86_BUILTIN_GATHERALTSIV4DI
);
29646 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
29647 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
29648 IX86_BUILTIN_GATHERALTDIV8SI
);
29651 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
29652 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
29654 /* MMX access to the vec_init patterns. */
29655 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
29656 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
29658 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
29659 V4HI_FTYPE_HI_HI_HI_HI
,
29660 IX86_BUILTIN_VEC_INIT_V4HI
);
29662 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
29663 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
29664 IX86_BUILTIN_VEC_INIT_V8QI
);
29666 /* Access to the vec_extract patterns. */
29667 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
29668 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
29669 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
29670 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
29671 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
29672 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
29673 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
29674 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
29675 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
29676 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
29678 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29679 "__builtin_ia32_vec_ext_v4hi",
29680 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
29682 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
29683 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
29685 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
29686 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
29688 /* Access to the vec_set patterns. */
29689 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
29690 "__builtin_ia32_vec_set_v2di",
29691 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
29693 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
29694 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
29696 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
29697 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
29699 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
29700 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
29702 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29703 "__builtin_ia32_vec_set_v4hi",
29704 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
29706 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
29707 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
29710 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
29711 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
29712 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
29713 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
29714 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
29715 "__builtin_ia32_rdseed_di_step",
29716 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
29719 def_builtin (0, "__builtin_ia32_addcarryx_u32",
29720 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
29721 def_builtin (OPTION_MASK_ISA_64BIT
,
29722 "__builtin_ia32_addcarryx_u64",
29723 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
29724 IX86_BUILTIN_ADDCARRYX64
);
29726 /* Add FMA4 multi-arg argument instructions */
29727 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29732 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29733 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29737 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29738 to return a pointer to VERSION_DECL if the outcome of the expression
29739 formed by PREDICATE_CHAIN is true. This function will be called during
29740 version dispatch to decide which function version to execute. It returns
29741 the basic block at the end, to which more conditions can be added. */
29744 add_condition_to_bb (tree function_decl
, tree version_decl
,
29745 tree predicate_chain
, basic_block new_bb
)
29747 gimple return_stmt
;
29748 tree convert_expr
, result_var
;
29749 gimple convert_stmt
;
29750 gimple call_cond_stmt
;
29751 gimple if_else_stmt
;
29753 basic_block bb1
, bb2
, bb3
;
29756 tree cond_var
, and_expr_var
= NULL_TREE
;
29759 tree predicate_decl
, predicate_arg
;
29761 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29763 gcc_assert (new_bb
!= NULL
);
29764 gseq
= bb_seq (new_bb
);
29767 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29768 build_fold_addr_expr (version_decl
));
29769 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29770 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29771 return_stmt
= gimple_build_return (result_var
);
29773 if (predicate_chain
== NULL_TREE
)
29775 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29776 gimple_seq_add_stmt (&gseq
, return_stmt
);
29777 set_bb_seq (new_bb
, gseq
);
29778 gimple_set_bb (convert_stmt
, new_bb
);
29779 gimple_set_bb (return_stmt
, new_bb
);
29784 while (predicate_chain
!= NULL
)
29786 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29787 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29788 predicate_arg
= TREE_VALUE (predicate_chain
);
29789 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29790 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29792 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29793 gimple_set_bb (call_cond_stmt
, new_bb
);
29794 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29796 predicate_chain
= TREE_CHAIN (predicate_chain
);
29798 if (and_expr_var
== NULL
)
29799 and_expr_var
= cond_var
;
29802 gimple assign_stmt
;
29803 /* Use MIN_EXPR to check if any integer is zero?.
29804 and_expr_var = min_expr <cond_var, and_expr_var> */
29805 assign_stmt
= gimple_build_assign (and_expr_var
,
29806 build2 (MIN_EXPR
, integer_type_node
,
29807 cond_var
, and_expr_var
));
29809 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29810 gimple_set_bb (assign_stmt
, new_bb
);
29811 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29815 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29817 NULL_TREE
, NULL_TREE
);
29818 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29819 gimple_set_bb (if_else_stmt
, new_bb
);
29820 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29822 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29823 gimple_seq_add_stmt (&gseq
, return_stmt
);
29824 set_bb_seq (new_bb
, gseq
);
29827 e12
= split_block (bb1
, if_else_stmt
);
29829 e12
->flags
&= ~EDGE_FALLTHRU
;
29830 e12
->flags
|= EDGE_TRUE_VALUE
;
29832 e23
= split_block (bb2
, return_stmt
);
29834 gimple_set_bb (convert_stmt
, bb2
);
29835 gimple_set_bb (return_stmt
, bb2
);
29838 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29841 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
29848 /* This parses the attribute arguments to target in DECL and determines
29849 the right builtin to use to match the platform specification.
29850 It returns the priority value for this version decl. If PREDICATE_LIST
29851 is not NULL, it stores the list of cpu features that need to be checked
29852 before dispatching this function. */
29854 static unsigned int
29855 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29858 struct cl_target_option cur_target
;
29860 struct cl_target_option
*new_target
;
29861 const char *arg_str
= NULL
;
29862 const char *attrs_str
= NULL
;
29863 char *tok_str
= NULL
;
29866 /* Priority of i386 features, greater value is higher priority. This is
29867 used to decide the order in which function dispatch must happen. For
29868 instance, a version specialized for SSE4.2 should be checked for dispatch
29869 before a version for SSE3, as SSE4.2 implies SSE3. */
29870 enum feature_priority
29891 enum feature_priority priority
= P_ZERO
;
29893 /* These are the target attribute strings for which a dispatcher is
29894 available, from fold_builtin_cpu. */
29896 static struct _feature_list
29898 const char *const name
;
29899 const enum feature_priority priority
;
29901 const feature_list
[] =
29907 {"ssse3", P_SSSE3
},
29908 {"sse4.1", P_SSE4_1
},
29909 {"sse4.2", P_SSE4_2
},
29910 {"popcnt", P_POPCNT
},
29916 static unsigned int NUM_FEATURES
29917 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29921 tree predicate_chain
= NULL_TREE
;
29922 tree predicate_decl
, predicate_arg
;
29924 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29925 gcc_assert (attrs
!= NULL
);
29927 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29929 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29930 attrs_str
= TREE_STRING_POINTER (attrs
);
29932 /* Return priority zero for default function. */
29933 if (strcmp (attrs_str
, "default") == 0)
29936 /* Handle arch= if specified. For priority, set it to be 1 more than
29937 the best instruction set the processor can handle. For instance, if
29938 there is a version for atom and a version for ssse3 (the highest ISA
29939 priority for atom), the atom version must be checked for dispatch
29940 before the ssse3 version. */
29941 if (strstr (attrs_str
, "arch=") != NULL
)
29943 cl_target_option_save (&cur_target
, &global_options
);
29944 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
29945 &global_options_set
);
29947 gcc_assert (target_node
);
29948 new_target
= TREE_TARGET_OPTION (target_node
);
29949 gcc_assert (new_target
);
29951 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29953 switch (new_target
->arch
)
29955 case PROCESSOR_CORE2
:
29957 priority
= P_PROC_SSSE3
;
29959 case PROCESSOR_COREI7
:
29960 arg_str
= "corei7";
29961 priority
= P_PROC_SSE4_2
;
29963 case PROCESSOR_COREI7_AVX
:
29964 arg_str
= "corei7-avx";
29965 priority
= P_PROC_SSE4_2
;
29967 case PROCESSOR_ATOM
:
29969 priority
= P_PROC_SSSE3
;
29971 case PROCESSOR_AMDFAM10
:
29972 arg_str
= "amdfam10h";
29973 priority
= P_PROC_SSE4_a
;
29975 case PROCESSOR_BDVER1
:
29976 arg_str
= "bdver1";
29977 priority
= P_PROC_FMA
;
29979 case PROCESSOR_BDVER2
:
29980 arg_str
= "bdver2";
29981 priority
= P_PROC_FMA
;
29986 cl_target_option_restore (&global_options
, &cur_target
);
29988 if (predicate_list
&& arg_str
== NULL
)
29990 error_at (DECL_SOURCE_LOCATION (decl
),
29991 "No dispatcher found for the versioning attributes");
29995 if (predicate_list
)
29997 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29998 /* For a C string literal the length includes the trailing NULL. */
29999 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
30000 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
30005 /* Process feature name. */
30006 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
30007 strcpy (tok_str
, attrs_str
);
30008 token
= strtok (tok_str
, ",");
30009 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
30011 while (token
!= NULL
)
30013 /* Do not process "arch=" */
30014 if (strncmp (token
, "arch=", 5) == 0)
30016 token
= strtok (NULL
, ",");
30019 for (i
= 0; i
< NUM_FEATURES
; ++i
)
30021 if (strcmp (token
, feature_list
[i
].name
) == 0)
30023 if (predicate_list
)
30025 predicate_arg
= build_string_literal (
30026 strlen (feature_list
[i
].name
) + 1,
30027 feature_list
[i
].name
);
30028 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
30031 /* Find the maximum priority feature. */
30032 if (feature_list
[i
].priority
> priority
)
30033 priority
= feature_list
[i
].priority
;
30038 if (predicate_list
&& i
== NUM_FEATURES
)
30040 error_at (DECL_SOURCE_LOCATION (decl
),
30041 "No dispatcher found for %s", token
);
30044 token
= strtok (NULL
, ",");
30048 if (predicate_list
&& predicate_chain
== NULL_TREE
)
30050 error_at (DECL_SOURCE_LOCATION (decl
),
30051 "No dispatcher found for the versioning attributes : %s",
30055 else if (predicate_list
)
30057 predicate_chain
= nreverse (predicate_chain
);
30058 *predicate_list
= predicate_chain
;
30064 /* This compares the priority of target features in function DECL1
30065 and DECL2. It returns positive value if DECL1 is higher priority,
30066 negative value if DECL2 is higher priority and 0 if they are the
30070 ix86_compare_version_priority (tree decl1
, tree decl2
)
30072 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
30073 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
30075 return (int)priority1
- (int)priority2
;
30078 /* V1 and V2 point to function versions with different priorities
30079 based on the target ISA. This function compares their priorities. */
30082 feature_compare (const void *v1
, const void *v2
)
30084 typedef struct _function_version_info
30087 tree predicate_chain
;
30088 unsigned int dispatch_priority
;
30089 } function_version_info
;
30091 const function_version_info c1
= *(const function_version_info
*)v1
;
30092 const function_version_info c2
= *(const function_version_info
*)v2
;
30093 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
30096 /* This function generates the dispatch function for
30097 multi-versioned functions. DISPATCH_DECL is the function which will
30098 contain the dispatch logic. FNDECLS are the function choices for
30099 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
30100 in DISPATCH_DECL in which the dispatch code is generated. */
30103 dispatch_function_versions (tree dispatch_decl
,
30105 basic_block
*empty_bb
)
30108 gimple ifunc_cpu_init_stmt
;
30112 vec
<tree
> *fndecls
;
30113 unsigned int num_versions
= 0;
30114 unsigned int actual_versions
= 0;
30117 struct _function_version_info
30120 tree predicate_chain
;
30121 unsigned int dispatch_priority
;
30122 }*function_version_info
;
30124 gcc_assert (dispatch_decl
!= NULL
30125 && fndecls_p
!= NULL
30126 && empty_bb
!= NULL
);
30128 /*fndecls_p is actually a vector. */
30129 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
30131 /* At least one more version other than the default. */
30132 num_versions
= fndecls
->length ();
30133 gcc_assert (num_versions
>= 2);
30135 function_version_info
= (struct _function_version_info
*)
30136 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
30138 /* The first version in the vector is the default decl. */
30139 default_decl
= (*fndecls
)[0];
30141 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
30143 gseq
= bb_seq (*empty_bb
);
30144 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
30145 constructors, so explicity call __builtin_cpu_init here. */
30146 ifunc_cpu_init_stmt
= gimple_build_call_vec (
30147 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
30148 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
30149 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
30150 set_bb_seq (*empty_bb
, gseq
);
30155 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
30157 tree version_decl
= ele
;
30158 tree predicate_chain
= NULL_TREE
;
30159 unsigned int priority
;
30160 /* Get attribute string, parse it and find the right predicate decl.
30161 The predicate function could be a lengthy combination of many
30162 features, like arch-type and various isa-variants. */
30163 priority
= get_builtin_code_for_version (version_decl
,
30166 if (predicate_chain
== NULL_TREE
)
30169 function_version_info
[actual_versions
].version_decl
= version_decl
;
30170 function_version_info
[actual_versions
].predicate_chain
30172 function_version_info
[actual_versions
].dispatch_priority
= priority
;
30176 /* Sort the versions according to descending order of dispatch priority. The
30177 priority is based on the ISA. This is not a perfect solution. There
30178 could still be ambiguity. If more than one function version is suitable
30179 to execute, which one should be dispatched? In future, allow the user
30180 to specify a dispatch priority next to the version. */
30181 qsort (function_version_info
, actual_versions
,
30182 sizeof (struct _function_version_info
), feature_compare
);
30184 for (i
= 0; i
< actual_versions
; ++i
)
30185 *empty_bb
= add_condition_to_bb (dispatch_decl
,
30186 function_version_info
[i
].version_decl
,
30187 function_version_info
[i
].predicate_chain
,
30190 /* dispatch default version at the end. */
30191 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
30194 free (function_version_info
);
30198 /* Comparator function to be used in qsort routine to sort attribute
30199 specification strings to "target". */
30202 attr_strcmp (const void *v1
, const void *v2
)
30204 const char *c1
= *(char *const*)v1
;
30205 const char *c2
= *(char *const*)v2
;
30206 return strcmp (c1
, c2
);
30209 /* ARGLIST is the argument to target attribute. This function tokenizes
30210 the comma separated arguments, sorts them and returns a string which
30211 is a unique identifier for the comma separated arguments. It also
30212 replaces non-identifier characters "=,-" with "_". */
30215 sorted_attr_string (tree arglist
)
30218 size_t str_len_sum
= 0;
30219 char **args
= NULL
;
30220 char *attr_str
, *ret_str
;
30222 unsigned int argnum
= 1;
30225 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30227 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30228 size_t len
= strlen (str
);
30229 str_len_sum
+= len
+ 1;
30230 if (arg
!= arglist
)
30232 for (i
= 0; i
< strlen (str
); i
++)
30237 attr_str
= XNEWVEC (char, str_len_sum
);
30239 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30241 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30242 size_t len
= strlen (str
);
30243 memcpy (attr_str
+ str_len_sum
, str
, len
);
30244 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
30245 str_len_sum
+= len
+ 1;
30248 /* Replace "=,-" with "_". */
30249 for (i
= 0; i
< strlen (attr_str
); i
++)
30250 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
30256 args
= XNEWVEC (char *, argnum
);
30259 attr
= strtok (attr_str
, ",");
30260 while (attr
!= NULL
)
30264 attr
= strtok (NULL
, ",");
30267 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
30269 ret_str
= XNEWVEC (char, str_len_sum
);
30271 for (i
= 0; i
< argnum
; i
++)
30273 size_t len
= strlen (args
[i
]);
30274 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
30275 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
30276 str_len_sum
+= len
+ 1;
30280 XDELETEVEC (attr_str
);
30284 /* This function changes the assembler name for functions that are
30285 versions. If DECL is a function version and has a "target"
30286 attribute, it appends the attribute string to its assembler name. */
30289 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
30292 const char *orig_name
, *version_string
;
30293 char *attr_str
, *assembler_name
;
30295 if (DECL_DECLARED_INLINE_P (decl
)
30296 && lookup_attribute ("gnu_inline",
30297 DECL_ATTRIBUTES (decl
)))
30298 error_at (DECL_SOURCE_LOCATION (decl
),
30299 "Function versions cannot be marked as gnu_inline,"
30300 " bodies have to be generated");
30302 if (DECL_VIRTUAL_P (decl
)
30303 || DECL_VINDEX (decl
))
30304 sorry ("Virtual function multiversioning not supported");
30306 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30308 /* target attribute string cannot be NULL. */
30309 gcc_assert (version_attr
!= NULL_TREE
);
30311 orig_name
= IDENTIFIER_POINTER (id
);
30313 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
30315 if (strcmp (version_string
, "default") == 0)
30318 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
30319 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
30321 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
30323 /* Allow assembler name to be modified if already set. */
30324 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
30325 SET_DECL_RTL (decl
, NULL
);
30327 tree ret
= get_identifier (assembler_name
);
30328 XDELETEVEC (attr_str
);
30329 XDELETEVEC (assembler_name
);
30333 /* This function returns true if FN1 and FN2 are versions of the same function,
30334 that is, the target strings of the function decls are different. This assumes
30335 that FN1 and FN2 have the same signature. */
30338 ix86_function_versions (tree fn1
, tree fn2
)
30341 char *target1
, *target2
;
30344 if (TREE_CODE (fn1
) != FUNCTION_DECL
30345 || TREE_CODE (fn2
) != FUNCTION_DECL
)
30348 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
30349 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
30351 /* At least one function decl should have the target attribute specified. */
30352 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
30355 /* Diagnose missing target attribute if one of the decls is already
30356 multi-versioned. */
30357 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
30359 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
30361 if (attr2
!= NULL_TREE
)
30368 error_at (DECL_SOURCE_LOCATION (fn2
),
30369 "missing %<target%> attribute for multi-versioned %D",
30371 inform (DECL_SOURCE_LOCATION (fn1
),
30372 "previous declaration of %D", fn1
);
30373 /* Prevent diagnosing of the same error multiple times. */
30374 DECL_ATTRIBUTES (fn2
)
30375 = tree_cons (get_identifier ("target"),
30376 copy_node (TREE_VALUE (attr1
)),
30377 DECL_ATTRIBUTES (fn2
));
30382 target1
= sorted_attr_string (TREE_VALUE (attr1
));
30383 target2
= sorted_attr_string (TREE_VALUE (attr2
));
30385 /* The sorted target strings must be different for fn1 and fn2
30387 if (strcmp (target1
, target2
) == 0)
30392 XDELETEVEC (target1
);
30393 XDELETEVEC (target2
);
30399 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
30401 /* For function version, add the target suffix to the assembler name. */
30402 if (TREE_CODE (decl
) == FUNCTION_DECL
30403 && DECL_FUNCTION_VERSIONED (decl
))
30404 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
30405 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
30406 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
30412 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
30413 is true, append the full path name of the source file. */
30416 make_name (tree decl
, const char *suffix
, bool make_unique
)
30418 char *global_var_name
;
30421 const char *unique_name
= NULL
;
30423 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
30425 /* Get a unique name that can be used globally without any chances
30426 of collision at link time. */
30428 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
30430 name_len
= strlen (name
) + strlen (suffix
) + 2;
30433 name_len
+= strlen (unique_name
) + 1;
30434 global_var_name
= XNEWVEC (char, name_len
);
30436 /* Use '.' to concatenate names as it is demangler friendly. */
30438 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
30441 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
30443 return global_var_name
;
30446 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30448 /* Make a dispatcher declaration for the multi-versioned function DECL.
30449 Calls to DECL function will be replaced with calls to the dispatcher
30450 by the front-end. Return the decl created. */
30453 make_dispatcher_decl (const tree decl
)
30457 tree fn_type
, func_type
;
30458 bool is_uniq
= false;
30460 if (TREE_PUBLIC (decl
) == 0)
30463 func_name
= make_name (decl
, "ifunc", is_uniq
);
30465 fn_type
= TREE_TYPE (decl
);
30466 func_type
= build_function_type (TREE_TYPE (fn_type
),
30467 TYPE_ARG_TYPES (fn_type
));
30469 func_decl
= build_fn_decl (func_name
, func_type
);
30470 XDELETEVEC (func_name
);
30471 TREE_USED (func_decl
) = 1;
30472 DECL_CONTEXT (func_decl
) = NULL_TREE
;
30473 DECL_INITIAL (func_decl
) = error_mark_node
;
30474 DECL_ARTIFICIAL (func_decl
) = 1;
30475 /* Mark this func as external, the resolver will flip it again if
30476 it gets generated. */
30477 DECL_EXTERNAL (func_decl
) = 1;
30478 /* This will be of type IFUNCs have to be externally visible. */
30479 TREE_PUBLIC (func_decl
) = 1;
30486 /* Returns true if decl is multi-versioned and DECL is the default function,
30487 that is it is not tagged with target specific optimization. */
30490 is_function_default_version (const tree decl
)
30492 if (TREE_CODE (decl
) != FUNCTION_DECL
30493 || !DECL_FUNCTION_VERSIONED (decl
))
30495 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30497 attr
= TREE_VALUE (TREE_VALUE (attr
));
30498 return (TREE_CODE (attr
) == STRING_CST
30499 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
30502 /* Make a dispatcher declaration for the multi-versioned function DECL.
30503 Calls to DECL function will be replaced with calls to the dispatcher
30504 by the front-end. Returns the decl of the dispatcher function. */
30507 ix86_get_function_versions_dispatcher (void *decl
)
30509 tree fn
= (tree
) decl
;
30510 struct cgraph_node
*node
= NULL
;
30511 struct cgraph_node
*default_node
= NULL
;
30512 struct cgraph_function_version_info
*node_v
= NULL
;
30513 struct cgraph_function_version_info
*first_v
= NULL
;
30515 tree dispatch_decl
= NULL
;
30517 struct cgraph_function_version_info
*default_version_info
= NULL
;
30519 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
30521 node
= cgraph_get_node (fn
);
30522 gcc_assert (node
!= NULL
);
30524 node_v
= get_cgraph_node_version (node
);
30525 gcc_assert (node_v
!= NULL
);
30527 if (node_v
->dispatcher_resolver
!= NULL
)
30528 return node_v
->dispatcher_resolver
;
30530 /* Find the default version and make it the first node. */
30532 /* Go to the beginning of the chain. */
30533 while (first_v
->prev
!= NULL
)
30534 first_v
= first_v
->prev
;
30535 default_version_info
= first_v
;
30536 while (default_version_info
!= NULL
)
30538 if (is_function_default_version
30539 (default_version_info
->this_node
->decl
))
30541 default_version_info
= default_version_info
->next
;
30544 /* If there is no default node, just return NULL. */
30545 if (default_version_info
== NULL
)
30548 /* Make default info the first node. */
30549 if (first_v
!= default_version_info
)
30551 default_version_info
->prev
->next
= default_version_info
->next
;
30552 if (default_version_info
->next
)
30553 default_version_info
->next
->prev
= default_version_info
->prev
;
30554 first_v
->prev
= default_version_info
;
30555 default_version_info
->next
= first_v
;
30556 default_version_info
->prev
= NULL
;
30559 default_node
= default_version_info
->this_node
;
30561 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30562 if (targetm
.has_ifunc_p ())
30564 struct cgraph_function_version_info
*it_v
= NULL
;
30565 struct cgraph_node
*dispatcher_node
= NULL
;
30566 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
30568 /* Right now, the dispatching is done via ifunc. */
30569 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
30571 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
30572 gcc_assert (dispatcher_node
!= NULL
);
30573 dispatcher_node
->dispatcher_function
= 1;
30574 dispatcher_version_info
30575 = insert_new_cgraph_node_version (dispatcher_node
);
30576 dispatcher_version_info
->next
= default_version_info
;
30577 dispatcher_node
->definition
= 1;
30579 /* Set the dispatcher for all the versions. */
30580 it_v
= default_version_info
;
30581 while (it_v
!= NULL
)
30583 it_v
->dispatcher_resolver
= dispatch_decl
;
30590 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
30591 "multiversioning needs ifunc which is not supported "
30595 return dispatch_decl
;
30598 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
30602 make_attribute (const char *name
, const char *arg_name
, tree chain
)
30605 tree attr_arg_name
;
30609 attr_name
= get_identifier (name
);
30610 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
30611 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
30612 attr
= tree_cons (attr_name
, attr_args
, chain
);
30616 /* Make the resolver function decl to dispatch the versions of
30617 a multi-versioned function, DEFAULT_DECL. Create an
30618 empty basic block in the resolver and store the pointer in
30619 EMPTY_BB. Return the decl of the resolver function. */
30622 make_resolver_func (const tree default_decl
,
30623 const tree dispatch_decl
,
30624 basic_block
*empty_bb
)
30626 char *resolver_name
;
30627 tree decl
, type
, decl_name
, t
;
30628 bool is_uniq
= false;
30630 /* IFUNC's have to be globally visible. So, if the default_decl is
30631 not, then the name of the IFUNC should be made unique. */
30632 if (TREE_PUBLIC (default_decl
) == 0)
30635 /* Append the filename to the resolver function if the versions are
30636 not externally visible. This is because the resolver function has
30637 to be externally visible for the loader to find it. So, appending
30638 the filename will prevent conflicts with a resolver function from
30639 another module which is based on the same version name. */
30640 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
30642 /* The resolver function should return a (void *). */
30643 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
30645 decl
= build_fn_decl (resolver_name
, type
);
30646 decl_name
= get_identifier (resolver_name
);
30647 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
30649 DECL_NAME (decl
) = decl_name
;
30650 TREE_USED (decl
) = 1;
30651 DECL_ARTIFICIAL (decl
) = 1;
30652 DECL_IGNORED_P (decl
) = 0;
30653 /* IFUNC resolvers have to be externally visible. */
30654 TREE_PUBLIC (decl
) = 1;
30655 DECL_UNINLINABLE (decl
) = 1;
30657 /* Resolver is not external, body is generated. */
30658 DECL_EXTERNAL (decl
) = 0;
30659 DECL_EXTERNAL (dispatch_decl
) = 0;
30661 DECL_CONTEXT (decl
) = NULL_TREE
;
30662 DECL_INITIAL (decl
) = make_node (BLOCK
);
30663 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
30665 if (DECL_COMDAT_GROUP (default_decl
)
30666 || TREE_PUBLIC (default_decl
))
30668 /* In this case, each translation unit with a call to this
30669 versioned function will put out a resolver. Ensure it
30670 is comdat to keep just one copy. */
30671 DECL_COMDAT (decl
) = 1;
30672 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
30674 /* Build result decl and add to function_decl. */
30675 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
30676 DECL_ARTIFICIAL (t
) = 1;
30677 DECL_IGNORED_P (t
) = 1;
30678 DECL_RESULT (decl
) = t
;
30680 gimplify_function_tree (decl
);
30681 push_cfun (DECL_STRUCT_FUNCTION (decl
));
30682 *empty_bb
= init_lowered_empty_function (decl
, false);
30684 cgraph_add_new_function (decl
, true);
30685 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
30689 gcc_assert (dispatch_decl
!= NULL
);
30690 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
30691 DECL_ATTRIBUTES (dispatch_decl
)
30692 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
30694 /* Create the alias for dispatch to resolver here. */
30695 /*cgraph_create_function_alias (dispatch_decl, decl);*/
30696 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
30697 XDELETEVEC (resolver_name
);
30701 /* Generate the dispatching code body to dispatch multi-versioned function
30702 DECL. The target hook is called to process the "target" attributes and
30703 provide the code to dispatch the right function at run-time. NODE points
30704 to the dispatcher decl whose body will be created. */
30707 ix86_generate_version_dispatcher_body (void *node_p
)
30709 tree resolver_decl
;
30710 basic_block empty_bb
;
30711 vec
<tree
> fn_ver_vec
= vNULL
;
30712 tree default_ver_decl
;
30713 struct cgraph_node
*versn
;
30714 struct cgraph_node
*node
;
30716 struct cgraph_function_version_info
*node_version_info
= NULL
;
30717 struct cgraph_function_version_info
*versn_info
= NULL
;
30719 node
= (cgraph_node
*)node_p
;
30721 node_version_info
= get_cgraph_node_version (node
);
30722 gcc_assert (node
->dispatcher_function
30723 && node_version_info
!= NULL
);
30725 if (node_version_info
->dispatcher_resolver
)
30726 return node_version_info
->dispatcher_resolver
;
30728 /* The first version in the chain corresponds to the default version. */
30729 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
30731 /* node is going to be an alias, so remove the finalized bit. */
30732 node
->definition
= false;
30734 resolver_decl
= make_resolver_func (default_ver_decl
,
30735 node
->decl
, &empty_bb
);
30737 node_version_info
->dispatcher_resolver
= resolver_decl
;
30739 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30741 fn_ver_vec
.create (2);
30743 for (versn_info
= node_version_info
->next
; versn_info
;
30744 versn_info
= versn_info
->next
)
30746 versn
= versn_info
->this_node
;
30747 /* Check for virtual functions here again, as by this time it should
30748 have been determined if this function needs a vtable index or
30749 not. This happens for methods in derived classes that override
30750 virtual methods in base classes but are not explicitly marked as
30752 if (DECL_VINDEX (versn
->decl
))
30753 sorry ("Virtual function multiversioning not supported");
30755 fn_ver_vec
.safe_push (versn
->decl
);
30758 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30759 fn_ver_vec
.release ();
30760 rebuild_cgraph_edges ();
30762 return resolver_decl
;
30764 /* This builds the processor_model struct type defined in
30765 libgcc/config/i386/cpuinfo.c */
30768 build_processor_model_struct (void)
30770 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30772 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30774 tree type
= make_node (RECORD_TYPE
);
30776 /* The first 3 fields are unsigned int. */
30777 for (i
= 0; i
< 3; ++i
)
30779 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30780 get_identifier (field_name
[i
]), unsigned_type_node
);
30781 if (field_chain
!= NULL_TREE
)
30782 DECL_CHAIN (field
) = field_chain
;
30783 field_chain
= field
;
30786 /* The last field is an array of unsigned integers of size one. */
30787 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30788 get_identifier (field_name
[3]),
30789 build_array_type (unsigned_type_node
,
30790 build_index_type (size_one_node
)));
30791 if (field_chain
!= NULL_TREE
)
30792 DECL_CHAIN (field
) = field_chain
;
30793 field_chain
= field
;
30795 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30799 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30802 make_var_decl (tree type
, const char *name
)
30806 new_decl
= build_decl (UNKNOWN_LOCATION
,
30808 get_identifier(name
),
30811 DECL_EXTERNAL (new_decl
) = 1;
30812 TREE_STATIC (new_decl
) = 1;
30813 TREE_PUBLIC (new_decl
) = 1;
30814 DECL_INITIAL (new_decl
) = 0;
30815 DECL_ARTIFICIAL (new_decl
) = 0;
30816 DECL_PRESERVE_P (new_decl
) = 1;
30818 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30819 assemble_variable (new_decl
, 0, 0, 0);
30824 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30825 into an integer defined in libgcc/config/i386/cpuinfo.c */
30828 fold_builtin_cpu (tree fndecl
, tree
*args
)
30831 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30832 DECL_FUNCTION_CODE (fndecl
);
30833 tree param_string_cst
= NULL
;
30835 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30836 enum processor_features
30852 /* These are the values for vendor types and cpu types and subtypes
30853 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30854 the corresponding start value. */
30855 enum processor_model
30866 M_CPU_SUBTYPE_START
,
30867 M_INTEL_COREI7_NEHALEM
,
30868 M_INTEL_COREI7_WESTMERE
,
30869 M_INTEL_COREI7_SANDYBRIDGE
,
30870 M_AMDFAM10H_BARCELONA
,
30871 M_AMDFAM10H_SHANGHAI
,
30872 M_AMDFAM10H_ISTANBUL
,
30873 M_AMDFAM15H_BDVER1
,
30874 M_AMDFAM15H_BDVER2
,
30875 M_AMDFAM15H_BDVER3
,
30879 static struct _arch_names_table
30881 const char *const name
;
30882 const enum processor_model model
;
30884 const arch_names_table
[] =
30887 {"intel", M_INTEL
},
30888 {"atom", M_INTEL_ATOM
},
30889 {"slm", M_INTEL_SLM
},
30890 {"core2", M_INTEL_CORE2
},
30891 {"corei7", M_INTEL_COREI7
},
30892 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30893 {"westmere", M_INTEL_COREI7_WESTMERE
},
30894 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30895 {"amdfam10h", M_AMDFAM10H
},
30896 {"barcelona", M_AMDFAM10H_BARCELONA
},
30897 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30898 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30899 {"amdfam15h", M_AMDFAM15H
},
30900 {"bdver1", M_AMDFAM15H_BDVER1
},
30901 {"bdver2", M_AMDFAM15H_BDVER2
},
30902 {"bdver3", M_AMDFAM15H_BDVER3
},
30903 {"bdver4", M_AMDFAM15H_BDVER4
},
30906 static struct _isa_names_table
30908 const char *const name
;
30909 const enum processor_features feature
;
30911 const isa_names_table
[] =
30915 {"popcnt", F_POPCNT
},
30919 {"ssse3", F_SSSE3
},
30920 {"sse4.1", F_SSE4_1
},
30921 {"sse4.2", F_SSE4_2
},
30926 tree __processor_model_type
= build_processor_model_struct ();
30927 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30931 varpool_add_new_variable (__cpu_model_var
);
30933 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30935 param_string_cst
= *args
;
30936 while (param_string_cst
30937 && TREE_CODE (param_string_cst
) != STRING_CST
)
30939 /* *args must be a expr that can contain other EXPRS leading to a
30941 if (!EXPR_P (param_string_cst
))
30943 error ("Parameter to builtin must be a string constant or literal");
30944 return integer_zero_node
;
30946 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30949 gcc_assert (param_string_cst
);
30951 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30957 unsigned int field_val
= 0;
30958 unsigned int NUM_ARCH_NAMES
30959 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30961 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30962 if (strcmp (arch_names_table
[i
].name
,
30963 TREE_STRING_POINTER (param_string_cst
)) == 0)
30966 if (i
== NUM_ARCH_NAMES
)
30968 error ("Parameter to builtin not valid: %s",
30969 TREE_STRING_POINTER (param_string_cst
));
30970 return integer_zero_node
;
30973 field
= TYPE_FIELDS (__processor_model_type
);
30974 field_val
= arch_names_table
[i
].model
;
30976 /* CPU types are stored in the next field. */
30977 if (field_val
> M_CPU_TYPE_START
30978 && field_val
< M_CPU_SUBTYPE_START
)
30980 field
= DECL_CHAIN (field
);
30981 field_val
-= M_CPU_TYPE_START
;
30984 /* CPU subtypes are stored in the next field. */
30985 if (field_val
> M_CPU_SUBTYPE_START
)
30987 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30988 field_val
-= M_CPU_SUBTYPE_START
;
30991 /* Get the appropriate field in __cpu_model. */
30992 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30995 /* Check the value. */
30996 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30997 build_int_cstu (unsigned_type_node
, field_val
));
30998 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
31000 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
31007 unsigned int field_val
= 0;
31008 unsigned int NUM_ISA_NAMES
31009 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
31011 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
31012 if (strcmp (isa_names_table
[i
].name
,
31013 TREE_STRING_POINTER (param_string_cst
)) == 0)
31016 if (i
== NUM_ISA_NAMES
)
31018 error ("Parameter to builtin not valid: %s",
31019 TREE_STRING_POINTER (param_string_cst
));
31020 return integer_zero_node
;
31023 field
= TYPE_FIELDS (__processor_model_type
);
31024 /* Get the last field, which is __cpu_features. */
31025 while (DECL_CHAIN (field
))
31026 field
= DECL_CHAIN (field
);
31028 /* Get the appropriate field: __cpu_model.__cpu_features */
31029 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
31032 /* Access the 0th element of __cpu_features array. */
31033 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
31034 integer_zero_node
, NULL_TREE
, NULL_TREE
);
31036 field_val
= (1 << isa_names_table
[i
].feature
);
31037 /* Return __cpu_model.__cpu_features[0] & field_val */
31038 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
31039 build_int_cstu (unsigned_type_node
, field_val
));
31040 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
31042 gcc_unreachable ();
31046 ix86_fold_builtin (tree fndecl
, int n_args
,
31047 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
31049 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
31051 enum ix86_builtins fn_code
= (enum ix86_builtins
)
31052 DECL_FUNCTION_CODE (fndecl
);
31053 if (fn_code
== IX86_BUILTIN_CPU_IS
31054 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
31056 gcc_assert (n_args
== 1);
31057 return fold_builtin_cpu (fndecl
, args
);
31061 #ifdef SUBTARGET_FOLD_BUILTIN
31062 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
31068 /* Make builtins to detect cpu type and features supported. NAME is
31069 the builtin name, CODE is the builtin code, and FTYPE is the function
31070 type of the builtin. */
31073 make_cpu_type_builtin (const char* name
, int code
,
31074 enum ix86_builtin_func_type ftype
, bool is_const
)
31079 type
= ix86_get_builtin_func_type (ftype
);
31080 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
31082 gcc_assert (decl
!= NULL_TREE
);
31083 ix86_builtins
[(int) code
] = decl
;
31084 TREE_READONLY (decl
) = is_const
;
31087 /* Make builtins to get CPU type and features supported. The created
31090 __builtin_cpu_init (), to detect cpu type and features,
31091 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
31092 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
31096 ix86_init_platform_type_builtins (void)
31098 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
31099 INT_FTYPE_VOID
, false);
31100 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
31101 INT_FTYPE_PCCHAR
, true);
31102 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
31103 INT_FTYPE_PCCHAR
, true);
31106 /* Internal method for ix86_init_builtins. */
31109 ix86_init_builtins_va_builtins_abi (void)
31111 tree ms_va_ref
, sysv_va_ref
;
31112 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
31113 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
31114 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
31115 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
31119 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
31120 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
31121 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
31123 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
31126 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
31127 fnvoid_va_start_ms
=
31128 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
31129 fnvoid_va_end_sysv
=
31130 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
31131 fnvoid_va_start_sysv
=
31132 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
31134 fnvoid_va_copy_ms
=
31135 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
31137 fnvoid_va_copy_sysv
=
31138 build_function_type_list (void_type_node
, sysv_va_ref
,
31139 sysv_va_ref
, NULL_TREE
);
31141 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
31142 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31143 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
31144 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31145 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
31146 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31147 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
31148 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31149 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
31150 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31151 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
31152 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31156 ix86_init_builtin_types (void)
31158 tree float128_type_node
, float80_type_node
;
31160 /* The __float80 type. */
31161 float80_type_node
= long_double_type_node
;
31162 if (TYPE_MODE (float80_type_node
) != XFmode
)
31164 /* The __float80 type. */
31165 float80_type_node
= make_node (REAL_TYPE
);
31167 TYPE_PRECISION (float80_type_node
) = 80;
31168 layout_type (float80_type_node
);
31170 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
31172 /* The __float128 type. */
31173 float128_type_node
= make_node (REAL_TYPE
);
31174 TYPE_PRECISION (float128_type_node
) = 128;
31175 layout_type (float128_type_node
);
31176 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
31178 /* This macro is built by i386-builtin-types.awk. */
31179 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
31183 ix86_init_builtins (void)
31187 ix86_init_builtin_types ();
31189 /* Builtins to get CPU type and features. */
31190 ix86_init_platform_type_builtins ();
31192 /* TFmode support builtins. */
31193 def_builtin_const (0, "__builtin_infq",
31194 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
31195 def_builtin_const (0, "__builtin_huge_valq",
31196 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
31198 /* We will expand them to normal call if SSE isn't available since
31199 they are used by libgcc. */
31200 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
31201 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
31202 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
31203 TREE_READONLY (t
) = 1;
31204 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
31206 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
31207 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
31208 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
31209 TREE_READONLY (t
) = 1;
31210 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
31212 ix86_init_tm_builtins ();
31213 ix86_init_mmx_sse_builtins ();
31216 ix86_init_builtins_va_builtins_abi ();
31218 #ifdef SUBTARGET_INIT_BUILTINS
31219 SUBTARGET_INIT_BUILTINS
;
31223 /* Return the ix86 builtin for CODE. */
31226 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
31228 if (code
>= IX86_BUILTIN_MAX
)
31229 return error_mark_node
;
31231 return ix86_builtins
[code
];
31234 /* Errors in the source file can cause expand_expr to return const0_rtx
31235 where we expect a vector. To avoid crashing, use one of the vector
31236 clear instructions. */
31238 safe_vector_operand (rtx x
, enum machine_mode mode
)
31240 if (x
== const0_rtx
)
31241 x
= CONST0_RTX (mode
);
31245 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
31248 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
31251 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31252 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31253 rtx op0
= expand_normal (arg0
);
31254 rtx op1
= expand_normal (arg1
);
31255 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31256 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31257 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
31259 if (VECTOR_MODE_P (mode0
))
31260 op0
= safe_vector_operand (op0
, mode0
);
31261 if (VECTOR_MODE_P (mode1
))
31262 op1
= safe_vector_operand (op1
, mode1
);
31264 if (optimize
|| !target
31265 || GET_MODE (target
) != tmode
31266 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31267 target
= gen_reg_rtx (tmode
);
31269 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
31271 rtx x
= gen_reg_rtx (V4SImode
);
31272 emit_insn (gen_sse2_loadd (x
, op1
));
31273 op1
= gen_lowpart (TImode
, x
);
31276 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31277 op0
= copy_to_mode_reg (mode0
, op0
);
31278 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
31279 op1
= copy_to_mode_reg (mode1
, op1
);
31281 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31290 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
31293 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
31294 enum ix86_builtin_func_type m_type
,
31295 enum rtx_code sub_code
)
31300 bool comparison_p
= false;
31302 bool last_arg_constant
= false;
31303 int num_memory
= 0;
31306 enum machine_mode mode
;
31309 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31313 case MULTI_ARG_4_DF2_DI_I
:
31314 case MULTI_ARG_4_DF2_DI_I1
:
31315 case MULTI_ARG_4_SF2_SI_I
:
31316 case MULTI_ARG_4_SF2_SI_I1
:
31318 last_arg_constant
= true;
31321 case MULTI_ARG_3_SF
:
31322 case MULTI_ARG_3_DF
:
31323 case MULTI_ARG_3_SF2
:
31324 case MULTI_ARG_3_DF2
:
31325 case MULTI_ARG_3_DI
:
31326 case MULTI_ARG_3_SI
:
31327 case MULTI_ARG_3_SI_DI
:
31328 case MULTI_ARG_3_HI
:
31329 case MULTI_ARG_3_HI_SI
:
31330 case MULTI_ARG_3_QI
:
31331 case MULTI_ARG_3_DI2
:
31332 case MULTI_ARG_3_SI2
:
31333 case MULTI_ARG_3_HI2
:
31334 case MULTI_ARG_3_QI2
:
31338 case MULTI_ARG_2_SF
:
31339 case MULTI_ARG_2_DF
:
31340 case MULTI_ARG_2_DI
:
31341 case MULTI_ARG_2_SI
:
31342 case MULTI_ARG_2_HI
:
31343 case MULTI_ARG_2_QI
:
31347 case MULTI_ARG_2_DI_IMM
:
31348 case MULTI_ARG_2_SI_IMM
:
31349 case MULTI_ARG_2_HI_IMM
:
31350 case MULTI_ARG_2_QI_IMM
:
31352 last_arg_constant
= true;
31355 case MULTI_ARG_1_SF
:
31356 case MULTI_ARG_1_DF
:
31357 case MULTI_ARG_1_SF2
:
31358 case MULTI_ARG_1_DF2
:
31359 case MULTI_ARG_1_DI
:
31360 case MULTI_ARG_1_SI
:
31361 case MULTI_ARG_1_HI
:
31362 case MULTI_ARG_1_QI
:
31363 case MULTI_ARG_1_SI_DI
:
31364 case MULTI_ARG_1_HI_DI
:
31365 case MULTI_ARG_1_HI_SI
:
31366 case MULTI_ARG_1_QI_DI
:
31367 case MULTI_ARG_1_QI_SI
:
31368 case MULTI_ARG_1_QI_HI
:
31372 case MULTI_ARG_2_DI_CMP
:
31373 case MULTI_ARG_2_SI_CMP
:
31374 case MULTI_ARG_2_HI_CMP
:
31375 case MULTI_ARG_2_QI_CMP
:
31377 comparison_p
= true;
31380 case MULTI_ARG_2_SF_TF
:
31381 case MULTI_ARG_2_DF_TF
:
31382 case MULTI_ARG_2_DI_TF
:
31383 case MULTI_ARG_2_SI_TF
:
31384 case MULTI_ARG_2_HI_TF
:
31385 case MULTI_ARG_2_QI_TF
:
31391 gcc_unreachable ();
31394 if (optimize
|| !target
31395 || GET_MODE (target
) != tmode
31396 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31397 target
= gen_reg_rtx (tmode
);
31399 gcc_assert (nargs
<= 4);
31401 for (i
= 0; i
< nargs
; i
++)
31403 tree arg
= CALL_EXPR_ARG (exp
, i
);
31404 rtx op
= expand_normal (arg
);
31405 int adjust
= (comparison_p
) ? 1 : 0;
31406 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
31408 if (last_arg_constant
&& i
== nargs
- 1)
31410 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
31412 enum insn_code new_icode
= icode
;
31415 case CODE_FOR_xop_vpermil2v2df3
:
31416 case CODE_FOR_xop_vpermil2v4sf3
:
31417 case CODE_FOR_xop_vpermil2v4df3
:
31418 case CODE_FOR_xop_vpermil2v8sf3
:
31419 error ("the last argument must be a 2-bit immediate");
31420 return gen_reg_rtx (tmode
);
31421 case CODE_FOR_xop_rotlv2di3
:
31422 new_icode
= CODE_FOR_rotlv2di3
;
31424 case CODE_FOR_xop_rotlv4si3
:
31425 new_icode
= CODE_FOR_rotlv4si3
;
31427 case CODE_FOR_xop_rotlv8hi3
:
31428 new_icode
= CODE_FOR_rotlv8hi3
;
31430 case CODE_FOR_xop_rotlv16qi3
:
31431 new_icode
= CODE_FOR_rotlv16qi3
;
31433 if (CONST_INT_P (op
))
31435 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
31436 op
= GEN_INT (INTVAL (op
) & mask
);
31437 gcc_checking_assert
31438 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
31442 gcc_checking_assert
31444 && insn_data
[new_icode
].operand
[0].mode
== tmode
31445 && insn_data
[new_icode
].operand
[1].mode
== tmode
31446 && insn_data
[new_icode
].operand
[2].mode
== mode
31447 && insn_data
[new_icode
].operand
[0].predicate
31448 == insn_data
[icode
].operand
[0].predicate
31449 && insn_data
[new_icode
].operand
[1].predicate
31450 == insn_data
[icode
].operand
[1].predicate
);
31456 gcc_unreachable ();
31463 if (VECTOR_MODE_P (mode
))
31464 op
= safe_vector_operand (op
, mode
);
31466 /* If we aren't optimizing, only allow one memory operand to be
31468 if (memory_operand (op
, mode
))
31471 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
31474 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
31476 op
= force_reg (mode
, op
);
31480 args
[i
].mode
= mode
;
31486 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31491 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
31492 GEN_INT ((int)sub_code
));
31493 else if (! comparison_p
)
31494 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31497 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
31501 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
31506 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31510 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
31514 gcc_unreachable ();
31524 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
31525 insns with vec_merge. */
31528 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
31532 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31533 rtx op1
, op0
= expand_normal (arg0
);
31534 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31535 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31537 if (optimize
|| !target
31538 || GET_MODE (target
) != tmode
31539 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31540 target
= gen_reg_rtx (tmode
);
31542 if (VECTOR_MODE_P (mode0
))
31543 op0
= safe_vector_operand (op0
, mode0
);
31545 if ((optimize
&& !register_operand (op0
, mode0
))
31546 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31547 op0
= copy_to_mode_reg (mode0
, op0
);
31550 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
31551 op1
= copy_to_mode_reg (mode0
, op1
);
31553 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31560 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
31563 ix86_expand_sse_compare (const struct builtin_description
*d
,
31564 tree exp
, rtx target
, bool swap
)
31567 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31568 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31569 rtx op0
= expand_normal (arg0
);
31570 rtx op1
= expand_normal (arg1
);
31572 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31573 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31574 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31575 enum rtx_code comparison
= d
->comparison
;
31577 if (VECTOR_MODE_P (mode0
))
31578 op0
= safe_vector_operand (op0
, mode0
);
31579 if (VECTOR_MODE_P (mode1
))
31580 op1
= safe_vector_operand (op1
, mode1
);
31582 /* Swap operands if we have a comparison that isn't available in
31586 rtx tmp
= gen_reg_rtx (mode1
);
31587 emit_move_insn (tmp
, op1
);
31592 if (optimize
|| !target
31593 || GET_MODE (target
) != tmode
31594 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31595 target
= gen_reg_rtx (tmode
);
31597 if ((optimize
&& !register_operand (op0
, mode0
))
31598 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
31599 op0
= copy_to_mode_reg (mode0
, op0
);
31600 if ((optimize
&& !register_operand (op1
, mode1
))
31601 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
31602 op1
= copy_to_mode_reg (mode1
, op1
);
31604 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
31605 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31612 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
31615 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
31619 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31620 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31621 rtx op0
= expand_normal (arg0
);
31622 rtx op1
= expand_normal (arg1
);
31623 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31624 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31625 enum rtx_code comparison
= d
->comparison
;
31627 if (VECTOR_MODE_P (mode0
))
31628 op0
= safe_vector_operand (op0
, mode0
);
31629 if (VECTOR_MODE_P (mode1
))
31630 op1
= safe_vector_operand (op1
, mode1
);
31632 /* Swap operands if we have a comparison that isn't available in
31634 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
31641 target
= gen_reg_rtx (SImode
);
31642 emit_move_insn (target
, const0_rtx
);
31643 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31645 if ((optimize
&& !register_operand (op0
, mode0
))
31646 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31647 op0
= copy_to_mode_reg (mode0
, op0
);
31648 if ((optimize
&& !register_operand (op1
, mode1
))
31649 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31650 op1
= copy_to_mode_reg (mode1
, op1
);
31652 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31656 emit_insn (gen_rtx_SET (VOIDmode
,
31657 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31658 gen_rtx_fmt_ee (comparison
, QImode
,
31662 return SUBREG_REG (target
);
31665 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
31668 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
31672 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31673 rtx op1
, op0
= expand_normal (arg0
);
31674 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31675 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31677 if (optimize
|| target
== 0
31678 || GET_MODE (target
) != tmode
31679 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31680 target
= gen_reg_rtx (tmode
);
31682 if (VECTOR_MODE_P (mode0
))
31683 op0
= safe_vector_operand (op0
, mode0
);
31685 if ((optimize
&& !register_operand (op0
, mode0
))
31686 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31687 op0
= copy_to_mode_reg (mode0
, op0
);
31689 op1
= GEN_INT (d
->comparison
);
31691 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
31699 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
31700 tree exp
, rtx target
)
31703 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31704 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31705 rtx op0
= expand_normal (arg0
);
31706 rtx op1
= expand_normal (arg1
);
31708 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31709 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31710 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31712 if (optimize
|| target
== 0
31713 || GET_MODE (target
) != tmode
31714 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31715 target
= gen_reg_rtx (tmode
);
31717 op0
= safe_vector_operand (op0
, mode0
);
31718 op1
= safe_vector_operand (op1
, mode1
);
31720 if ((optimize
&& !register_operand (op0
, mode0
))
31721 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31722 op0
= copy_to_mode_reg (mode0
, op0
);
31723 if ((optimize
&& !register_operand (op1
, mode1
))
31724 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31725 op1
= copy_to_mode_reg (mode1
, op1
);
31727 op2
= GEN_INT (d
->comparison
);
31729 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31736 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31739 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31743 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31744 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31745 rtx op0
= expand_normal (arg0
);
31746 rtx op1
= expand_normal (arg1
);
31747 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31748 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31749 enum rtx_code comparison
= d
->comparison
;
31751 if (VECTOR_MODE_P (mode0
))
31752 op0
= safe_vector_operand (op0
, mode0
);
31753 if (VECTOR_MODE_P (mode1
))
31754 op1
= safe_vector_operand (op1
, mode1
);
31756 target
= gen_reg_rtx (SImode
);
31757 emit_move_insn (target
, const0_rtx
);
31758 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31760 if ((optimize
&& !register_operand (op0
, mode0
))
31761 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31762 op0
= copy_to_mode_reg (mode0
, op0
);
31763 if ((optimize
&& !register_operand (op1
, mode1
))
31764 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31765 op1
= copy_to_mode_reg (mode1
, op1
);
31767 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31771 emit_insn (gen_rtx_SET (VOIDmode
,
31772 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31773 gen_rtx_fmt_ee (comparison
, QImode
,
31777 return SUBREG_REG (target
);
31780 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31783 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31784 tree exp
, rtx target
)
31787 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31788 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31789 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31790 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31791 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31792 rtx scratch0
, scratch1
;
31793 rtx op0
= expand_normal (arg0
);
31794 rtx op1
= expand_normal (arg1
);
31795 rtx op2
= expand_normal (arg2
);
31796 rtx op3
= expand_normal (arg3
);
31797 rtx op4
= expand_normal (arg4
);
31798 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31800 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31801 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31802 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31803 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31804 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31805 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31806 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31808 if (VECTOR_MODE_P (modev2
))
31809 op0
= safe_vector_operand (op0
, modev2
);
31810 if (VECTOR_MODE_P (modev4
))
31811 op2
= safe_vector_operand (op2
, modev4
);
31813 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31814 op0
= copy_to_mode_reg (modev2
, op0
);
31815 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31816 op1
= copy_to_mode_reg (modei3
, op1
);
31817 if ((optimize
&& !register_operand (op2
, modev4
))
31818 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31819 op2
= copy_to_mode_reg (modev4
, op2
);
31820 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31821 op3
= copy_to_mode_reg (modei5
, op3
);
31823 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31825 error ("the fifth argument must be an 8-bit immediate");
31829 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31831 if (optimize
|| !target
31832 || GET_MODE (target
) != tmode0
31833 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31834 target
= gen_reg_rtx (tmode0
);
31836 scratch1
= gen_reg_rtx (tmode1
);
31838 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31840 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31842 if (optimize
|| !target
31843 || GET_MODE (target
) != tmode1
31844 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31845 target
= gen_reg_rtx (tmode1
);
31847 scratch0
= gen_reg_rtx (tmode0
);
31849 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31853 gcc_assert (d
->flag
);
31855 scratch0
= gen_reg_rtx (tmode0
);
31856 scratch1
= gen_reg_rtx (tmode1
);
31858 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31868 target
= gen_reg_rtx (SImode
);
31869 emit_move_insn (target
, const0_rtx
);
31870 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31873 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31874 gen_rtx_fmt_ee (EQ
, QImode
,
31875 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31878 return SUBREG_REG (target
);
31885 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31888 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31889 tree exp
, rtx target
)
31892 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31893 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31894 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31895 rtx scratch0
, scratch1
;
31896 rtx op0
= expand_normal (arg0
);
31897 rtx op1
= expand_normal (arg1
);
31898 rtx op2
= expand_normal (arg2
);
31899 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31901 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31902 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31903 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31904 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31905 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31907 if (VECTOR_MODE_P (modev2
))
31908 op0
= safe_vector_operand (op0
, modev2
);
31909 if (VECTOR_MODE_P (modev3
))
31910 op1
= safe_vector_operand (op1
, modev3
);
31912 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31913 op0
= copy_to_mode_reg (modev2
, op0
);
31914 if ((optimize
&& !register_operand (op1
, modev3
))
31915 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31916 op1
= copy_to_mode_reg (modev3
, op1
);
31918 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31920 error ("the third argument must be an 8-bit immediate");
31924 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31926 if (optimize
|| !target
31927 || GET_MODE (target
) != tmode0
31928 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31929 target
= gen_reg_rtx (tmode0
);
31931 scratch1
= gen_reg_rtx (tmode1
);
31933 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31935 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31937 if (optimize
|| !target
31938 || GET_MODE (target
) != tmode1
31939 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31940 target
= gen_reg_rtx (tmode1
);
31942 scratch0
= gen_reg_rtx (tmode0
);
31944 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31948 gcc_assert (d
->flag
);
31950 scratch0
= gen_reg_rtx (tmode0
);
31951 scratch1
= gen_reg_rtx (tmode1
);
31953 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31963 target
= gen_reg_rtx (SImode
);
31964 emit_move_insn (target
, const0_rtx
);
31965 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31968 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31969 gen_rtx_fmt_ee (EQ
, QImode
,
31970 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31973 return SUBREG_REG (target
);
31979 /* Subroutine of ix86_expand_builtin to take care of insns with
31980 variable number of operands. */
31983 ix86_expand_args_builtin (const struct builtin_description
*d
,
31984 tree exp
, rtx target
)
31986 rtx pat
, real_target
;
31987 unsigned int i
, nargs
;
31988 unsigned int nargs_constant
= 0;
31989 int num_memory
= 0;
31993 enum machine_mode mode
;
31995 bool last_arg_count
= false;
31996 enum insn_code icode
= d
->icode
;
31997 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31998 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31999 enum machine_mode rmode
= VOIDmode
;
32001 enum rtx_code comparison
= d
->comparison
;
32003 switch ((enum ix86_builtin_func_type
) d
->flag
)
32005 case V2DF_FTYPE_V2DF_ROUND
:
32006 case V4DF_FTYPE_V4DF_ROUND
:
32007 case V4SF_FTYPE_V4SF_ROUND
:
32008 case V8SF_FTYPE_V8SF_ROUND
:
32009 case V4SI_FTYPE_V4SF_ROUND
:
32010 case V8SI_FTYPE_V8SF_ROUND
:
32011 return ix86_expand_sse_round (d
, exp
, target
);
32012 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
32013 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
32014 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
32015 case INT_FTYPE_V8SF_V8SF_PTEST
:
32016 case INT_FTYPE_V4DI_V4DI_PTEST
:
32017 case INT_FTYPE_V4DF_V4DF_PTEST
:
32018 case INT_FTYPE_V4SF_V4SF_PTEST
:
32019 case INT_FTYPE_V2DI_V2DI_PTEST
:
32020 case INT_FTYPE_V2DF_V2DF_PTEST
:
32021 return ix86_expand_sse_ptest (d
, exp
, target
);
32022 case FLOAT128_FTYPE_FLOAT128
:
32023 case FLOAT_FTYPE_FLOAT
:
32024 case INT_FTYPE_INT
:
32025 case UINT64_FTYPE_INT
:
32026 case UINT16_FTYPE_UINT16
:
32027 case INT64_FTYPE_INT64
:
32028 case INT64_FTYPE_V4SF
:
32029 case INT64_FTYPE_V2DF
:
32030 case INT_FTYPE_V16QI
:
32031 case INT_FTYPE_V8QI
:
32032 case INT_FTYPE_V8SF
:
32033 case INT_FTYPE_V4DF
:
32034 case INT_FTYPE_V4SF
:
32035 case INT_FTYPE_V2DF
:
32036 case INT_FTYPE_V32QI
:
32037 case V16QI_FTYPE_V16QI
:
32038 case V8SI_FTYPE_V8SF
:
32039 case V8SI_FTYPE_V4SI
:
32040 case V8HI_FTYPE_V8HI
:
32041 case V8HI_FTYPE_V16QI
:
32042 case V8QI_FTYPE_V8QI
:
32043 case V8SF_FTYPE_V8SF
:
32044 case V8SF_FTYPE_V8SI
:
32045 case V8SF_FTYPE_V4SF
:
32046 case V8SF_FTYPE_V8HI
:
32047 case V4SI_FTYPE_V4SI
:
32048 case V4SI_FTYPE_V16QI
:
32049 case V4SI_FTYPE_V4SF
:
32050 case V4SI_FTYPE_V8SI
:
32051 case V4SI_FTYPE_V8HI
:
32052 case V4SI_FTYPE_V4DF
:
32053 case V4SI_FTYPE_V2DF
:
32054 case V4HI_FTYPE_V4HI
:
32055 case V4DF_FTYPE_V4DF
:
32056 case V4DF_FTYPE_V4SI
:
32057 case V4DF_FTYPE_V4SF
:
32058 case V4DF_FTYPE_V2DF
:
32059 case V4SF_FTYPE_V4SF
:
32060 case V4SF_FTYPE_V4SI
:
32061 case V4SF_FTYPE_V8SF
:
32062 case V4SF_FTYPE_V4DF
:
32063 case V4SF_FTYPE_V8HI
:
32064 case V4SF_FTYPE_V2DF
:
32065 case V2DI_FTYPE_V2DI
:
32066 case V2DI_FTYPE_V16QI
:
32067 case V2DI_FTYPE_V8HI
:
32068 case V2DI_FTYPE_V4SI
:
32069 case V2DF_FTYPE_V2DF
:
32070 case V2DF_FTYPE_V4SI
:
32071 case V2DF_FTYPE_V4DF
:
32072 case V2DF_FTYPE_V4SF
:
32073 case V2DF_FTYPE_V2SI
:
32074 case V2SI_FTYPE_V2SI
:
32075 case V2SI_FTYPE_V4SF
:
32076 case V2SI_FTYPE_V2SF
:
32077 case V2SI_FTYPE_V2DF
:
32078 case V2SF_FTYPE_V2SF
:
32079 case V2SF_FTYPE_V2SI
:
32080 case V32QI_FTYPE_V32QI
:
32081 case V32QI_FTYPE_V16QI
:
32082 case V16HI_FTYPE_V16HI
:
32083 case V16HI_FTYPE_V8HI
:
32084 case V8SI_FTYPE_V8SI
:
32085 case V16HI_FTYPE_V16QI
:
32086 case V8SI_FTYPE_V16QI
:
32087 case V4DI_FTYPE_V16QI
:
32088 case V8SI_FTYPE_V8HI
:
32089 case V4DI_FTYPE_V8HI
:
32090 case V4DI_FTYPE_V4SI
:
32091 case V4DI_FTYPE_V2DI
:
32094 case V4SF_FTYPE_V4SF_VEC_MERGE
:
32095 case V2DF_FTYPE_V2DF_VEC_MERGE
:
32096 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
32097 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
32098 case V16QI_FTYPE_V16QI_V16QI
:
32099 case V16QI_FTYPE_V8HI_V8HI
:
32100 case V8QI_FTYPE_V8QI_V8QI
:
32101 case V8QI_FTYPE_V4HI_V4HI
:
32102 case V8HI_FTYPE_V8HI_V8HI
:
32103 case V8HI_FTYPE_V16QI_V16QI
:
32104 case V8HI_FTYPE_V4SI_V4SI
:
32105 case V8SF_FTYPE_V8SF_V8SF
:
32106 case V8SF_FTYPE_V8SF_V8SI
:
32107 case V4SI_FTYPE_V4SI_V4SI
:
32108 case V4SI_FTYPE_V8HI_V8HI
:
32109 case V4SI_FTYPE_V4SF_V4SF
:
32110 case V4SI_FTYPE_V2DF_V2DF
:
32111 case V4HI_FTYPE_V4HI_V4HI
:
32112 case V4HI_FTYPE_V8QI_V8QI
:
32113 case V4HI_FTYPE_V2SI_V2SI
:
32114 case V4DF_FTYPE_V4DF_V4DF
:
32115 case V4DF_FTYPE_V4DF_V4DI
:
32116 case V4SF_FTYPE_V4SF_V4SF
:
32117 case V4SF_FTYPE_V4SF_V4SI
:
32118 case V4SF_FTYPE_V4SF_V2SI
:
32119 case V4SF_FTYPE_V4SF_V2DF
:
32120 case V4SF_FTYPE_V4SF_DI
:
32121 case V4SF_FTYPE_V4SF_SI
:
32122 case V2DI_FTYPE_V2DI_V2DI
:
32123 case V2DI_FTYPE_V16QI_V16QI
:
32124 case V2DI_FTYPE_V4SI_V4SI
:
32125 case V2UDI_FTYPE_V4USI_V4USI
:
32126 case V2DI_FTYPE_V2DI_V16QI
:
32127 case V2DI_FTYPE_V2DF_V2DF
:
32128 case V2SI_FTYPE_V2SI_V2SI
:
32129 case V2SI_FTYPE_V4HI_V4HI
:
32130 case V2SI_FTYPE_V2SF_V2SF
:
32131 case V2DF_FTYPE_V2DF_V2DF
:
32132 case V2DF_FTYPE_V2DF_V4SF
:
32133 case V2DF_FTYPE_V2DF_V2DI
:
32134 case V2DF_FTYPE_V2DF_DI
:
32135 case V2DF_FTYPE_V2DF_SI
:
32136 case V2SF_FTYPE_V2SF_V2SF
:
32137 case V1DI_FTYPE_V1DI_V1DI
:
32138 case V1DI_FTYPE_V8QI_V8QI
:
32139 case V1DI_FTYPE_V2SI_V2SI
:
32140 case V32QI_FTYPE_V16HI_V16HI
:
32141 case V16HI_FTYPE_V8SI_V8SI
:
32142 case V32QI_FTYPE_V32QI_V32QI
:
32143 case V16HI_FTYPE_V32QI_V32QI
:
32144 case V16HI_FTYPE_V16HI_V16HI
:
32145 case V8SI_FTYPE_V4DF_V4DF
:
32146 case V8SI_FTYPE_V8SI_V8SI
:
32147 case V8SI_FTYPE_V16HI_V16HI
:
32148 case V4DI_FTYPE_V4DI_V4DI
:
32149 case V4DI_FTYPE_V8SI_V8SI
:
32150 case V4UDI_FTYPE_V8USI_V8USI
:
32151 if (comparison
== UNKNOWN
)
32152 return ix86_expand_binop_builtin (icode
, exp
, target
);
32155 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
32156 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
32157 gcc_assert (comparison
!= UNKNOWN
);
32161 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
32162 case V16HI_FTYPE_V16HI_SI_COUNT
:
32163 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
32164 case V8SI_FTYPE_V8SI_SI_COUNT
:
32165 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
32166 case V4DI_FTYPE_V4DI_INT_COUNT
:
32167 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
32168 case V8HI_FTYPE_V8HI_SI_COUNT
:
32169 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
32170 case V4SI_FTYPE_V4SI_SI_COUNT
:
32171 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
32172 case V4HI_FTYPE_V4HI_SI_COUNT
:
32173 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
32174 case V2DI_FTYPE_V2DI_SI_COUNT
:
32175 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
32176 case V2SI_FTYPE_V2SI_SI_COUNT
:
32177 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
32178 case V1DI_FTYPE_V1DI_SI_COUNT
:
32180 last_arg_count
= true;
32182 case UINT64_FTYPE_UINT64_UINT64
:
32183 case UINT_FTYPE_UINT_UINT
:
32184 case UINT_FTYPE_UINT_USHORT
:
32185 case UINT_FTYPE_UINT_UCHAR
:
32186 case UINT16_FTYPE_UINT16_INT
:
32187 case UINT8_FTYPE_UINT8_INT
:
32190 case V2DI_FTYPE_V2DI_INT_CONVERT
:
32193 nargs_constant
= 1;
32195 case V4DI_FTYPE_V4DI_INT_CONVERT
:
32198 nargs_constant
= 1;
32200 case V8HI_FTYPE_V8HI_INT
:
32201 case V8HI_FTYPE_V8SF_INT
:
32202 case V8HI_FTYPE_V4SF_INT
:
32203 case V8SF_FTYPE_V8SF_INT
:
32204 case V4SI_FTYPE_V4SI_INT
:
32205 case V4SI_FTYPE_V8SI_INT
:
32206 case V4HI_FTYPE_V4HI_INT
:
32207 case V4DF_FTYPE_V4DF_INT
:
32208 case V4SF_FTYPE_V4SF_INT
:
32209 case V4SF_FTYPE_V8SF_INT
:
32210 case V2DI_FTYPE_V2DI_INT
:
32211 case V2DF_FTYPE_V2DF_INT
:
32212 case V2DF_FTYPE_V4DF_INT
:
32213 case V16HI_FTYPE_V16HI_INT
:
32214 case V8SI_FTYPE_V8SI_INT
:
32215 case V4DI_FTYPE_V4DI_INT
:
32216 case V2DI_FTYPE_V4DI_INT
:
32218 nargs_constant
= 1;
32220 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
32221 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
32222 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
32223 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
32224 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
32225 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
32228 case V32QI_FTYPE_V32QI_V32QI_INT
:
32229 case V16HI_FTYPE_V16HI_V16HI_INT
:
32230 case V16QI_FTYPE_V16QI_V16QI_INT
:
32231 case V4DI_FTYPE_V4DI_V4DI_INT
:
32232 case V8HI_FTYPE_V8HI_V8HI_INT
:
32233 case V8SI_FTYPE_V8SI_V8SI_INT
:
32234 case V8SI_FTYPE_V8SI_V4SI_INT
:
32235 case V8SF_FTYPE_V8SF_V8SF_INT
:
32236 case V8SF_FTYPE_V8SF_V4SF_INT
:
32237 case V4SI_FTYPE_V4SI_V4SI_INT
:
32238 case V4DF_FTYPE_V4DF_V4DF_INT
:
32239 case V4DF_FTYPE_V4DF_V2DF_INT
:
32240 case V4SF_FTYPE_V4SF_V4SF_INT
:
32241 case V2DI_FTYPE_V2DI_V2DI_INT
:
32242 case V4DI_FTYPE_V4DI_V2DI_INT
:
32243 case V2DF_FTYPE_V2DF_V2DF_INT
:
32245 nargs_constant
= 1;
32247 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
32250 nargs_constant
= 1;
32252 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
32255 nargs_constant
= 1;
32257 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
32260 nargs_constant
= 1;
32262 case V2DI_FTYPE_V2DI_UINT_UINT
:
32264 nargs_constant
= 2;
32266 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
32267 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
32268 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
32269 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
32271 nargs_constant
= 1;
32273 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
32275 nargs_constant
= 2;
32277 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
32278 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
32282 gcc_unreachable ();
32285 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32287 if (comparison
!= UNKNOWN
)
32289 gcc_assert (nargs
== 2);
32290 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
32293 if (rmode
== VOIDmode
|| rmode
== tmode
)
32297 || GET_MODE (target
) != tmode
32298 || !insn_p
->operand
[0].predicate (target
, tmode
))
32299 target
= gen_reg_rtx (tmode
);
32300 real_target
= target
;
32304 real_target
= gen_reg_rtx (tmode
);
32305 target
= simplify_gen_subreg (rmode
, real_target
, tmode
, 0);
32308 for (i
= 0; i
< nargs
; i
++)
32310 tree arg
= CALL_EXPR_ARG (exp
, i
);
32311 rtx op
= expand_normal (arg
);
32312 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32313 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32315 if (last_arg_count
&& (i
+ 1) == nargs
)
32317 /* SIMD shift insns take either an 8-bit immediate or
32318 register as count. But builtin functions take int as
32319 count. If count doesn't match, we put it in register. */
32322 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
32323 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
32324 op
= copy_to_reg (op
);
32327 else if ((nargs
- i
) <= nargs_constant
)
32332 case CODE_FOR_avx2_inserti128
:
32333 case CODE_FOR_avx2_extracti128
:
32334 error ("the last argument must be an 1-bit immediate");
32337 case CODE_FOR_sse4_1_roundsd
:
32338 case CODE_FOR_sse4_1_roundss
:
32340 case CODE_FOR_sse4_1_roundpd
:
32341 case CODE_FOR_sse4_1_roundps
:
32342 case CODE_FOR_avx_roundpd256
:
32343 case CODE_FOR_avx_roundps256
:
32345 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
32346 case CODE_FOR_sse4_1_roundps_sfix
:
32347 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
32348 case CODE_FOR_avx_roundps_sfix256
:
32350 case CODE_FOR_sse4_1_blendps
:
32351 case CODE_FOR_avx_blendpd256
:
32352 case CODE_FOR_avx_vpermilv4df
:
32353 error ("the last argument must be a 4-bit immediate");
32356 case CODE_FOR_sse4_1_blendpd
:
32357 case CODE_FOR_avx_vpermilv2df
:
32358 case CODE_FOR_xop_vpermil2v2df3
:
32359 case CODE_FOR_xop_vpermil2v4sf3
:
32360 case CODE_FOR_xop_vpermil2v4df3
:
32361 case CODE_FOR_xop_vpermil2v8sf3
:
32362 error ("the last argument must be a 2-bit immediate");
32365 case CODE_FOR_avx_vextractf128v4df
:
32366 case CODE_FOR_avx_vextractf128v8sf
:
32367 case CODE_FOR_avx_vextractf128v8si
:
32368 case CODE_FOR_avx_vinsertf128v4df
:
32369 case CODE_FOR_avx_vinsertf128v8sf
:
32370 case CODE_FOR_avx_vinsertf128v8si
:
32371 error ("the last argument must be a 1-bit immediate");
32374 case CODE_FOR_avx_vmcmpv2df3
:
32375 case CODE_FOR_avx_vmcmpv4sf3
:
32376 case CODE_FOR_avx_cmpv2df3
:
32377 case CODE_FOR_avx_cmpv4sf3
:
32378 case CODE_FOR_avx_cmpv4df3
:
32379 case CODE_FOR_avx_cmpv8sf3
:
32380 error ("the last argument must be a 5-bit immediate");
32384 switch (nargs_constant
)
32387 if ((nargs
- i
) == nargs_constant
)
32389 error ("the next to last argument must be an 8-bit immediate");
32393 error ("the last argument must be an 8-bit immediate");
32396 gcc_unreachable ();
32403 if (VECTOR_MODE_P (mode
))
32404 op
= safe_vector_operand (op
, mode
);
32406 /* If we aren't optimizing, only allow one memory operand to
32408 if (memory_operand (op
, mode
))
32411 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
32413 if (optimize
|| !match
|| num_memory
> 1)
32414 op
= copy_to_mode_reg (mode
, op
);
32418 op
= copy_to_reg (op
);
32419 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
32424 args
[i
].mode
= mode
;
32430 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
32433 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
32436 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32440 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32441 args
[2].op
, args
[3].op
);
32444 gcc_unreachable ();
32454 /* Subroutine of ix86_expand_builtin to take care of special insns
32455 with variable number of operands. */
32458 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
32459 tree exp
, rtx target
)
32463 unsigned int i
, nargs
, arg_adjust
, memory
;
32467 enum machine_mode mode
;
32469 enum insn_code icode
= d
->icode
;
32470 bool last_arg_constant
= false;
32471 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
32472 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
32473 enum { load
, store
} klass
;
32475 switch ((enum ix86_builtin_func_type
) d
->flag
)
32477 case VOID_FTYPE_VOID
:
32478 emit_insn (GEN_FCN (icode
) (target
));
32480 case VOID_FTYPE_UINT64
:
32481 case VOID_FTYPE_UNSIGNED
:
32487 case INT_FTYPE_VOID
:
32488 case UINT64_FTYPE_VOID
:
32489 case UNSIGNED_FTYPE_VOID
:
32494 case UINT64_FTYPE_PUNSIGNED
:
32495 case V2DI_FTYPE_PV2DI
:
32496 case V4DI_FTYPE_PV4DI
:
32497 case V32QI_FTYPE_PCCHAR
:
32498 case V16QI_FTYPE_PCCHAR
:
32499 case V8SF_FTYPE_PCV4SF
:
32500 case V8SF_FTYPE_PCFLOAT
:
32501 case V4SF_FTYPE_PCFLOAT
:
32502 case V4DF_FTYPE_PCV2DF
:
32503 case V4DF_FTYPE_PCDOUBLE
:
32504 case V2DF_FTYPE_PCDOUBLE
:
32505 case VOID_FTYPE_PVOID
:
32510 case VOID_FTYPE_PV2SF_V4SF
:
32511 case VOID_FTYPE_PV4DI_V4DI
:
32512 case VOID_FTYPE_PV2DI_V2DI
:
32513 case VOID_FTYPE_PCHAR_V32QI
:
32514 case VOID_FTYPE_PCHAR_V16QI
:
32515 case VOID_FTYPE_PFLOAT_V8SF
:
32516 case VOID_FTYPE_PFLOAT_V4SF
:
32517 case VOID_FTYPE_PDOUBLE_V4DF
:
32518 case VOID_FTYPE_PDOUBLE_V2DF
:
32519 case VOID_FTYPE_PLONGLONG_LONGLONG
:
32520 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
32521 case VOID_FTYPE_PINT_INT
:
32524 /* Reserve memory operand for target. */
32525 memory
= ARRAY_SIZE (args
);
32527 case V4SF_FTYPE_V4SF_PCV2SF
:
32528 case V2DF_FTYPE_V2DF_PCDOUBLE
:
32533 case V8SF_FTYPE_PCV8SF_V8SI
:
32534 case V4DF_FTYPE_PCV4DF_V4DI
:
32535 case V4SF_FTYPE_PCV4SF_V4SI
:
32536 case V2DF_FTYPE_PCV2DF_V2DI
:
32537 case V8SI_FTYPE_PCV8SI_V8SI
:
32538 case V4DI_FTYPE_PCV4DI_V4DI
:
32539 case V4SI_FTYPE_PCV4SI_V4SI
:
32540 case V2DI_FTYPE_PCV2DI_V2DI
:
32545 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
32546 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
32547 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
32548 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
32549 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
32550 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
32551 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
32552 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
32555 /* Reserve memory operand for target. */
32556 memory
= ARRAY_SIZE (args
);
32558 case VOID_FTYPE_UINT_UINT_UINT
:
32559 case VOID_FTYPE_UINT64_UINT_UINT
:
32560 case UCHAR_FTYPE_UINT_UINT_UINT
:
32561 case UCHAR_FTYPE_UINT64_UINT_UINT
:
32564 memory
= ARRAY_SIZE (args
);
32565 last_arg_constant
= true;
32568 gcc_unreachable ();
32571 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32573 if (klass
== store
)
32575 arg
= CALL_EXPR_ARG (exp
, 0);
32576 op
= expand_normal (arg
);
32577 gcc_assert (target
== 0);
32580 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32581 target
= gen_rtx_MEM (tmode
, op
);
32584 target
= force_reg (tmode
, op
);
32592 || !register_operand (target
, tmode
)
32593 || GET_MODE (target
) != tmode
)
32594 target
= gen_reg_rtx (tmode
);
32597 for (i
= 0; i
< nargs
; i
++)
32599 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32602 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
32603 op
= expand_normal (arg
);
32604 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32606 if (last_arg_constant
&& (i
+ 1) == nargs
)
32610 if (icode
== CODE_FOR_lwp_lwpvalsi3
32611 || icode
== CODE_FOR_lwp_lwpinssi3
32612 || icode
== CODE_FOR_lwp_lwpvaldi3
32613 || icode
== CODE_FOR_lwp_lwpinsdi3
)
32614 error ("the last argument must be a 32-bit immediate");
32616 error ("the last argument must be an 8-bit immediate");
32624 /* This must be the memory operand. */
32625 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32626 op
= gen_rtx_MEM (mode
, op
);
32627 gcc_assert (GET_MODE (op
) == mode
32628 || GET_MODE (op
) == VOIDmode
);
32632 /* This must be register. */
32633 if (VECTOR_MODE_P (mode
))
32634 op
= safe_vector_operand (op
, mode
);
32636 gcc_assert (GET_MODE (op
) == mode
32637 || GET_MODE (op
) == VOIDmode
);
32638 op
= copy_to_mode_reg (mode
, op
);
32643 args
[i
].mode
= mode
;
32649 pat
= GEN_FCN (icode
) (target
);
32652 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
32655 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
32658 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
32661 gcc_unreachable ();
32667 return klass
== store
? 0 : target
;
32670 /* Return the integer constant in ARG. Constrain it to be in the range
32671 of the subparts of VEC_TYPE; issue an error if not. */
32674 get_element_number (tree vec_type
, tree arg
)
32676 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
32678 if (!tree_fits_uhwi_p (arg
)
32679 || (elt
= tree_to_uhwi (arg
), elt
> max
))
32681 error ("selector must be an integer constant in the range 0..%wi", max
);
32688 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32689 ix86_expand_vector_init. We DO have language-level syntax for this, in
32690 the form of (type){ init-list }. Except that since we can't place emms
32691 instructions from inside the compiler, we can't allow the use of MMX
32692 registers unless the user explicitly asks for it. So we do *not* define
32693 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
32694 we have builtins invoked by mmintrin.h that gives us license to emit
32695 these sorts of instructions. */
32698 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
32700 enum machine_mode tmode
= TYPE_MODE (type
);
32701 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
32702 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
32703 rtvec v
= rtvec_alloc (n_elt
);
32705 gcc_assert (VECTOR_MODE_P (tmode
));
32706 gcc_assert (call_expr_nargs (exp
) == n_elt
);
32708 for (i
= 0; i
< n_elt
; ++i
)
32710 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
32711 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
32714 if (!target
|| !register_operand (target
, tmode
))
32715 target
= gen_reg_rtx (tmode
);
32717 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
32721 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32722 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
32723 had a language-level syntax for referencing vector elements. */
32726 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
32728 enum machine_mode tmode
, mode0
;
32733 arg0
= CALL_EXPR_ARG (exp
, 0);
32734 arg1
= CALL_EXPR_ARG (exp
, 1);
32736 op0
= expand_normal (arg0
);
32737 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32739 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32740 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32741 gcc_assert (VECTOR_MODE_P (mode0
));
32743 op0
= force_reg (mode0
, op0
);
32745 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32746 target
= gen_reg_rtx (tmode
);
32748 ix86_expand_vector_extract (true, target
, op0
, elt
);
32753 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32754 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32755 a language-level syntax for referencing vector elements. */
32758 ix86_expand_vec_set_builtin (tree exp
)
32760 enum machine_mode tmode
, mode1
;
32761 tree arg0
, arg1
, arg2
;
32763 rtx op0
, op1
, target
;
32765 arg0
= CALL_EXPR_ARG (exp
, 0);
32766 arg1
= CALL_EXPR_ARG (exp
, 1);
32767 arg2
= CALL_EXPR_ARG (exp
, 2);
32769 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32770 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32771 gcc_assert (VECTOR_MODE_P (tmode
));
32773 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32774 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32775 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32777 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32778 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32780 op0
= force_reg (tmode
, op0
);
32781 op1
= force_reg (mode1
, op1
);
32783 /* OP0 is the source of these builtin functions and shouldn't be
32784 modified. Create a copy, use it and return it as target. */
32785 target
= gen_reg_rtx (tmode
);
32786 emit_move_insn (target
, op0
);
32787 ix86_expand_vector_set (true, target
, op1
, elt
);
32792 /* Expand an expression EXP that calls a built-in function,
32793 with result going to TARGET if that's convenient
32794 (and in mode MODE if that's convenient).
32795 SUBTARGET may be used as the target for computing one of EXP's operands.
32796 IGNORE is nonzero if the value is to be ignored. */
32799 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
32800 enum machine_mode mode
, int ignore
)
32802 const struct builtin_description
*d
;
32804 enum insn_code icode
;
32805 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32806 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32807 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32808 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32809 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32811 /* For CPU builtins that can be folded, fold first and expand the fold. */
32814 case IX86_BUILTIN_CPU_INIT
:
32816 /* Make it call __cpu_indicator_init in libgcc. */
32817 tree call_expr
, fndecl
, type
;
32818 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32819 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32820 call_expr
= build_call_expr (fndecl
, 0);
32821 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32823 case IX86_BUILTIN_CPU_IS
:
32824 case IX86_BUILTIN_CPU_SUPPORTS
:
32826 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32827 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32828 gcc_assert (fold_expr
!= NULL_TREE
);
32829 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32833 /* Determine whether the builtin function is available under the current ISA.
32834 Originally the builtin was not created if it wasn't applicable to the
32835 current ISA based on the command line switches. With function specific
32836 options, we need to check in the context of the function making the call
32837 whether it is supported. */
32838 if (ix86_builtins_isa
[fcode
].isa
32839 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32841 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32842 NULL
, (enum fpmath_unit
) 0, false);
32845 error ("%qE needs unknown isa option", fndecl
);
32848 gcc_assert (opts
!= NULL
);
32849 error ("%qE needs isa option %s", fndecl
, opts
);
32857 case IX86_BUILTIN_MASKMOVQ
:
32858 case IX86_BUILTIN_MASKMOVDQU
:
32859 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32860 ? CODE_FOR_mmx_maskmovq
32861 : CODE_FOR_sse2_maskmovdqu
);
32862 /* Note the arg order is different from the operand order. */
32863 arg1
= CALL_EXPR_ARG (exp
, 0);
32864 arg2
= CALL_EXPR_ARG (exp
, 1);
32865 arg0
= CALL_EXPR_ARG (exp
, 2);
32866 op0
= expand_normal (arg0
);
32867 op1
= expand_normal (arg1
);
32868 op2
= expand_normal (arg2
);
32869 mode0
= insn_data
[icode
].operand
[0].mode
;
32870 mode1
= insn_data
[icode
].operand
[1].mode
;
32871 mode2
= insn_data
[icode
].operand
[2].mode
;
32873 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32874 op0
= gen_rtx_MEM (mode1
, op0
);
32876 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32877 op0
= copy_to_mode_reg (mode0
, op0
);
32878 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32879 op1
= copy_to_mode_reg (mode1
, op1
);
32880 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32881 op2
= copy_to_mode_reg (mode2
, op2
);
32882 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32888 case IX86_BUILTIN_LDMXCSR
:
32889 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32890 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32891 emit_move_insn (target
, op0
);
32892 emit_insn (gen_sse_ldmxcsr (target
));
32895 case IX86_BUILTIN_STMXCSR
:
32896 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32897 emit_insn (gen_sse_stmxcsr (target
));
32898 return copy_to_mode_reg (SImode
, target
);
32900 case IX86_BUILTIN_CLFLUSH
:
32901 arg0
= CALL_EXPR_ARG (exp
, 0);
32902 op0
= expand_normal (arg0
);
32903 icode
= CODE_FOR_sse2_clflush
;
32904 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32905 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32907 emit_insn (gen_sse2_clflush (op0
));
32910 case IX86_BUILTIN_MONITOR
:
32911 arg0
= CALL_EXPR_ARG (exp
, 0);
32912 arg1
= CALL_EXPR_ARG (exp
, 1);
32913 arg2
= CALL_EXPR_ARG (exp
, 2);
32914 op0
= expand_normal (arg0
);
32915 op1
= expand_normal (arg1
);
32916 op2
= expand_normal (arg2
);
32918 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32920 op1
= copy_to_mode_reg (SImode
, op1
);
32922 op2
= copy_to_mode_reg (SImode
, op2
);
32923 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32926 case IX86_BUILTIN_MWAIT
:
32927 arg0
= CALL_EXPR_ARG (exp
, 0);
32928 arg1
= CALL_EXPR_ARG (exp
, 1);
32929 op0
= expand_normal (arg0
);
32930 op1
= expand_normal (arg1
);
32932 op0
= copy_to_mode_reg (SImode
, op0
);
32934 op1
= copy_to_mode_reg (SImode
, op1
);
32935 emit_insn (gen_sse3_mwait (op0
, op1
));
32938 case IX86_BUILTIN_VEC_INIT_V2SI
:
32939 case IX86_BUILTIN_VEC_INIT_V4HI
:
32940 case IX86_BUILTIN_VEC_INIT_V8QI
:
32941 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32943 case IX86_BUILTIN_VEC_EXT_V2DF
:
32944 case IX86_BUILTIN_VEC_EXT_V2DI
:
32945 case IX86_BUILTIN_VEC_EXT_V4SF
:
32946 case IX86_BUILTIN_VEC_EXT_V4SI
:
32947 case IX86_BUILTIN_VEC_EXT_V8HI
:
32948 case IX86_BUILTIN_VEC_EXT_V2SI
:
32949 case IX86_BUILTIN_VEC_EXT_V4HI
:
32950 case IX86_BUILTIN_VEC_EXT_V16QI
:
32951 return ix86_expand_vec_ext_builtin (exp
, target
);
32953 case IX86_BUILTIN_VEC_SET_V2DI
:
32954 case IX86_BUILTIN_VEC_SET_V4SF
:
32955 case IX86_BUILTIN_VEC_SET_V4SI
:
32956 case IX86_BUILTIN_VEC_SET_V8HI
:
32957 case IX86_BUILTIN_VEC_SET_V4HI
:
32958 case IX86_BUILTIN_VEC_SET_V16QI
:
32959 return ix86_expand_vec_set_builtin (exp
);
32961 case IX86_BUILTIN_INFQ
:
32962 case IX86_BUILTIN_HUGE_VALQ
:
32964 REAL_VALUE_TYPE inf
;
32968 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32970 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32973 target
= gen_reg_rtx (mode
);
32975 emit_move_insn (target
, tmp
);
32979 case IX86_BUILTIN_RDPMC
:
32980 case IX86_BUILTIN_RDTSC
:
32981 case IX86_BUILTIN_RDTSCP
:
32983 op0
= gen_reg_rtx (DImode
);
32984 op1
= gen_reg_rtx (DImode
);
32986 if (fcode
== IX86_BUILTIN_RDPMC
)
32988 arg0
= CALL_EXPR_ARG (exp
, 0);
32989 op2
= expand_normal (arg0
);
32990 if (!register_operand (op2
, SImode
))
32991 op2
= copy_to_mode_reg (SImode
, op2
);
32993 insn
= (TARGET_64BIT
32994 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32995 : gen_rdpmc (op0
, op2
));
32998 else if (fcode
== IX86_BUILTIN_RDTSC
)
33000 insn
= (TARGET_64BIT
33001 ? gen_rdtsc_rex64 (op0
, op1
)
33002 : gen_rdtsc (op0
));
33007 op2
= gen_reg_rtx (SImode
);
33009 insn
= (TARGET_64BIT
33010 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
33011 : gen_rdtscp (op0
, op2
));
33014 arg0
= CALL_EXPR_ARG (exp
, 0);
33015 op4
= expand_normal (arg0
);
33016 if (!address_operand (op4
, VOIDmode
))
33018 op4
= convert_memory_address (Pmode
, op4
);
33019 op4
= copy_addr_to_reg (op4
);
33021 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
33026 /* mode is VOIDmode if __builtin_rd* has been called
33028 if (mode
== VOIDmode
)
33030 target
= gen_reg_rtx (mode
);
33035 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
33036 op1
, 1, OPTAB_DIRECT
);
33037 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
33038 op0
, 1, OPTAB_DIRECT
);
33041 emit_move_insn (target
, op0
);
33044 case IX86_BUILTIN_FXSAVE
:
33045 case IX86_BUILTIN_FXRSTOR
:
33046 case IX86_BUILTIN_FXSAVE64
:
33047 case IX86_BUILTIN_FXRSTOR64
:
33048 case IX86_BUILTIN_FNSTENV
:
33049 case IX86_BUILTIN_FLDENV
:
33050 case IX86_BUILTIN_FNSTSW
:
33054 case IX86_BUILTIN_FXSAVE
:
33055 icode
= CODE_FOR_fxsave
;
33057 case IX86_BUILTIN_FXRSTOR
:
33058 icode
= CODE_FOR_fxrstor
;
33060 case IX86_BUILTIN_FXSAVE64
:
33061 icode
= CODE_FOR_fxsave64
;
33063 case IX86_BUILTIN_FXRSTOR64
:
33064 icode
= CODE_FOR_fxrstor64
;
33066 case IX86_BUILTIN_FNSTENV
:
33067 icode
= CODE_FOR_fnstenv
;
33069 case IX86_BUILTIN_FLDENV
:
33070 icode
= CODE_FOR_fldenv
;
33072 case IX86_BUILTIN_FNSTSW
:
33073 icode
= CODE_FOR_fnstsw
;
33077 gcc_unreachable ();
33080 arg0
= CALL_EXPR_ARG (exp
, 0);
33081 op0
= expand_normal (arg0
);
33083 if (!address_operand (op0
, VOIDmode
))
33085 op0
= convert_memory_address (Pmode
, op0
);
33086 op0
= copy_addr_to_reg (op0
);
33088 op0
= gen_rtx_MEM (mode0
, op0
);
33090 pat
= GEN_FCN (icode
) (op0
);
33095 case IX86_BUILTIN_XSAVE
:
33096 case IX86_BUILTIN_XRSTOR
:
33097 case IX86_BUILTIN_XSAVE64
:
33098 case IX86_BUILTIN_XRSTOR64
:
33099 case IX86_BUILTIN_XSAVEOPT
:
33100 case IX86_BUILTIN_XSAVEOPT64
:
33101 arg0
= CALL_EXPR_ARG (exp
, 0);
33102 arg1
= CALL_EXPR_ARG (exp
, 1);
33103 op0
= expand_normal (arg0
);
33104 op1
= expand_normal (arg1
);
33106 if (!address_operand (op0
, VOIDmode
))
33108 op0
= convert_memory_address (Pmode
, op0
);
33109 op0
= copy_addr_to_reg (op0
);
33111 op0
= gen_rtx_MEM (BLKmode
, op0
);
33113 op1
= force_reg (DImode
, op1
);
33117 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
33118 NULL
, 1, OPTAB_DIRECT
);
33121 case IX86_BUILTIN_XSAVE
:
33122 icode
= CODE_FOR_xsave_rex64
;
33124 case IX86_BUILTIN_XRSTOR
:
33125 icode
= CODE_FOR_xrstor_rex64
;
33127 case IX86_BUILTIN_XSAVE64
:
33128 icode
= CODE_FOR_xsave64
;
33130 case IX86_BUILTIN_XRSTOR64
:
33131 icode
= CODE_FOR_xrstor64
;
33133 case IX86_BUILTIN_XSAVEOPT
:
33134 icode
= CODE_FOR_xsaveopt_rex64
;
33136 case IX86_BUILTIN_XSAVEOPT64
:
33137 icode
= CODE_FOR_xsaveopt64
;
33140 gcc_unreachable ();
33143 op2
= gen_lowpart (SImode
, op2
);
33144 op1
= gen_lowpart (SImode
, op1
);
33145 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
33151 case IX86_BUILTIN_XSAVE
:
33152 icode
= CODE_FOR_xsave
;
33154 case IX86_BUILTIN_XRSTOR
:
33155 icode
= CODE_FOR_xrstor
;
33157 case IX86_BUILTIN_XSAVEOPT
:
33158 icode
= CODE_FOR_xsaveopt
;
33161 gcc_unreachable ();
33163 pat
= GEN_FCN (icode
) (op0
, op1
);
33170 case IX86_BUILTIN_LLWPCB
:
33171 arg0
= CALL_EXPR_ARG (exp
, 0);
33172 op0
= expand_normal (arg0
);
33173 icode
= CODE_FOR_lwp_llwpcb
;
33174 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
33175 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
33176 emit_insn (gen_lwp_llwpcb (op0
));
33179 case IX86_BUILTIN_SLWPCB
:
33180 icode
= CODE_FOR_lwp_slwpcb
;
33182 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
33183 target
= gen_reg_rtx (Pmode
);
33184 emit_insn (gen_lwp_slwpcb (target
));
33187 case IX86_BUILTIN_BEXTRI32
:
33188 case IX86_BUILTIN_BEXTRI64
:
33189 arg0
= CALL_EXPR_ARG (exp
, 0);
33190 arg1
= CALL_EXPR_ARG (exp
, 1);
33191 op0
= expand_normal (arg0
);
33192 op1
= expand_normal (arg1
);
33193 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
33194 ? CODE_FOR_tbm_bextri_si
33195 : CODE_FOR_tbm_bextri_di
);
33196 if (!CONST_INT_P (op1
))
33198 error ("last argument must be an immediate");
33203 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
33204 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
33205 op1
= GEN_INT (length
);
33206 op2
= GEN_INT (lsb_index
);
33207 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
33213 case IX86_BUILTIN_RDRAND16_STEP
:
33214 icode
= CODE_FOR_rdrandhi_1
;
33218 case IX86_BUILTIN_RDRAND32_STEP
:
33219 icode
= CODE_FOR_rdrandsi_1
;
33223 case IX86_BUILTIN_RDRAND64_STEP
:
33224 icode
= CODE_FOR_rdranddi_1
;
33228 op0
= gen_reg_rtx (mode0
);
33229 emit_insn (GEN_FCN (icode
) (op0
));
33231 arg0
= CALL_EXPR_ARG (exp
, 0);
33232 op1
= expand_normal (arg0
);
33233 if (!address_operand (op1
, VOIDmode
))
33235 op1
= convert_memory_address (Pmode
, op1
);
33236 op1
= copy_addr_to_reg (op1
);
33238 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33240 op1
= gen_reg_rtx (SImode
);
33241 emit_move_insn (op1
, CONST1_RTX (SImode
));
33243 /* Emit SImode conditional move. */
33244 if (mode0
== HImode
)
33246 op2
= gen_reg_rtx (SImode
);
33247 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
33249 else if (mode0
== SImode
)
33252 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
33255 target
= gen_reg_rtx (SImode
);
33257 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33259 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33260 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
33263 case IX86_BUILTIN_RDSEED16_STEP
:
33264 icode
= CODE_FOR_rdseedhi_1
;
33268 case IX86_BUILTIN_RDSEED32_STEP
:
33269 icode
= CODE_FOR_rdseedsi_1
;
33273 case IX86_BUILTIN_RDSEED64_STEP
:
33274 icode
= CODE_FOR_rdseeddi_1
;
33278 op0
= gen_reg_rtx (mode0
);
33279 emit_insn (GEN_FCN (icode
) (op0
));
33281 arg0
= CALL_EXPR_ARG (exp
, 0);
33282 op1
= expand_normal (arg0
);
33283 if (!address_operand (op1
, VOIDmode
))
33285 op1
= convert_memory_address (Pmode
, op1
);
33286 op1
= copy_addr_to_reg (op1
);
33288 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33290 op2
= gen_reg_rtx (QImode
);
33292 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33294 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
33297 target
= gen_reg_rtx (SImode
);
33299 emit_insn (gen_zero_extendqisi2 (target
, op2
));
33302 case IX86_BUILTIN_ADDCARRYX32
:
33303 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
33307 case IX86_BUILTIN_ADDCARRYX64
:
33308 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
33312 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
33313 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
33314 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
33315 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
33317 op0
= gen_reg_rtx (QImode
);
33319 /* Generate CF from input operand. */
33320 op1
= expand_normal (arg0
);
33321 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
33322 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
33324 /* Gen ADCX instruction to compute X+Y+CF. */
33325 op2
= expand_normal (arg1
);
33326 op3
= expand_normal (arg2
);
33329 op2
= copy_to_mode_reg (mode0
, op2
);
33331 op3
= copy_to_mode_reg (mode0
, op3
);
33333 op0
= gen_reg_rtx (mode0
);
33335 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
33336 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
33337 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
33339 /* Store the result. */
33340 op4
= expand_normal (arg3
);
33341 if (!address_operand (op4
, VOIDmode
))
33343 op4
= convert_memory_address (Pmode
, op4
);
33344 op4
= copy_addr_to_reg (op4
);
33346 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
33348 /* Return current CF value. */
33350 target
= gen_reg_rtx (QImode
);
33352 PUT_MODE (pat
, QImode
);
33353 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
33356 case IX86_BUILTIN_GATHERSIV2DF
:
33357 icode
= CODE_FOR_avx2_gathersiv2df
;
33359 case IX86_BUILTIN_GATHERSIV4DF
:
33360 icode
= CODE_FOR_avx2_gathersiv4df
;
33362 case IX86_BUILTIN_GATHERDIV2DF
:
33363 icode
= CODE_FOR_avx2_gatherdiv2df
;
33365 case IX86_BUILTIN_GATHERDIV4DF
:
33366 icode
= CODE_FOR_avx2_gatherdiv4df
;
33368 case IX86_BUILTIN_GATHERSIV4SF
:
33369 icode
= CODE_FOR_avx2_gathersiv4sf
;
33371 case IX86_BUILTIN_GATHERSIV8SF
:
33372 icode
= CODE_FOR_avx2_gathersiv8sf
;
33374 case IX86_BUILTIN_GATHERDIV4SF
:
33375 icode
= CODE_FOR_avx2_gatherdiv4sf
;
33377 case IX86_BUILTIN_GATHERDIV8SF
:
33378 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33380 case IX86_BUILTIN_GATHERSIV2DI
:
33381 icode
= CODE_FOR_avx2_gathersiv2di
;
33383 case IX86_BUILTIN_GATHERSIV4DI
:
33384 icode
= CODE_FOR_avx2_gathersiv4di
;
33386 case IX86_BUILTIN_GATHERDIV2DI
:
33387 icode
= CODE_FOR_avx2_gatherdiv2di
;
33389 case IX86_BUILTIN_GATHERDIV4DI
:
33390 icode
= CODE_FOR_avx2_gatherdiv4di
;
33392 case IX86_BUILTIN_GATHERSIV4SI
:
33393 icode
= CODE_FOR_avx2_gathersiv4si
;
33395 case IX86_BUILTIN_GATHERSIV8SI
:
33396 icode
= CODE_FOR_avx2_gathersiv8si
;
33398 case IX86_BUILTIN_GATHERDIV4SI
:
33399 icode
= CODE_FOR_avx2_gatherdiv4si
;
33401 case IX86_BUILTIN_GATHERDIV8SI
:
33402 icode
= CODE_FOR_avx2_gatherdiv8si
;
33404 case IX86_BUILTIN_GATHERALTSIV4DF
:
33405 icode
= CODE_FOR_avx2_gathersiv4df
;
33407 case IX86_BUILTIN_GATHERALTDIV8SF
:
33408 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33410 case IX86_BUILTIN_GATHERALTSIV4DI
:
33411 icode
= CODE_FOR_avx2_gathersiv4di
;
33413 case IX86_BUILTIN_GATHERALTDIV8SI
:
33414 icode
= CODE_FOR_avx2_gatherdiv8si
;
33418 arg0
= CALL_EXPR_ARG (exp
, 0);
33419 arg1
= CALL_EXPR_ARG (exp
, 1);
33420 arg2
= CALL_EXPR_ARG (exp
, 2);
33421 arg3
= CALL_EXPR_ARG (exp
, 3);
33422 arg4
= CALL_EXPR_ARG (exp
, 4);
33423 op0
= expand_normal (arg0
);
33424 op1
= expand_normal (arg1
);
33425 op2
= expand_normal (arg2
);
33426 op3
= expand_normal (arg3
);
33427 op4
= expand_normal (arg4
);
33428 /* Note the arg order is different from the operand order. */
33429 mode0
= insn_data
[icode
].operand
[1].mode
;
33430 mode2
= insn_data
[icode
].operand
[3].mode
;
33431 mode3
= insn_data
[icode
].operand
[4].mode
;
33432 mode4
= insn_data
[icode
].operand
[5].mode
;
33434 if (target
== NULL_RTX
33435 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
33436 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
33438 subtarget
= target
;
33440 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
33441 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
33443 rtx half
= gen_reg_rtx (V4SImode
);
33444 if (!nonimmediate_operand (op2
, V8SImode
))
33445 op2
= copy_to_mode_reg (V8SImode
, op2
);
33446 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
33449 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
33450 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
33452 rtx (*gen
) (rtx
, rtx
);
33453 rtx half
= gen_reg_rtx (mode0
);
33454 if (mode0
== V4SFmode
)
33455 gen
= gen_vec_extract_lo_v8sf
;
33457 gen
= gen_vec_extract_lo_v8si
;
33458 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
33459 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
33460 emit_insn (gen (half
, op0
));
33462 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
33463 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
33464 emit_insn (gen (half
, op3
));
33468 /* Force memory operand only with base register here. But we
33469 don't want to do it on memory operand for other builtin
33471 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
33473 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
33474 op0
= copy_to_mode_reg (mode0
, op0
);
33475 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
33476 op1
= copy_to_mode_reg (Pmode
, op1
);
33477 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
33478 op2
= copy_to_mode_reg (mode2
, op2
);
33479 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
33480 op3
= copy_to_mode_reg (mode3
, op3
);
33481 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
33483 error ("last argument must be scale 1, 2, 4, 8");
33487 /* Optimize. If mask is known to have all high bits set,
33488 replace op0 with pc_rtx to signal that the instruction
33489 overwrites the whole destination and doesn't use its
33490 previous contents. */
33493 if (TREE_CODE (arg3
) == VECTOR_CST
)
33495 unsigned int negative
= 0;
33496 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
33498 tree cst
= VECTOR_CST_ELT (arg3
, i
);
33499 if (TREE_CODE (cst
) == INTEGER_CST
33500 && tree_int_cst_sign_bit (cst
))
33502 else if (TREE_CODE (cst
) == REAL_CST
33503 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
33506 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
33509 else if (TREE_CODE (arg3
) == SSA_NAME
)
33511 /* Recognize also when mask is like:
33512 __v2df src = _mm_setzero_pd ();
33513 __v2df mask = _mm_cmpeq_pd (src, src);
33515 __v8sf src = _mm256_setzero_ps ();
33516 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
33517 as that is a cheaper way to load all ones into
33518 a register than having to load a constant from
33520 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
33521 if (is_gimple_call (def_stmt
))
33523 tree fndecl
= gimple_call_fndecl (def_stmt
);
33525 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
33526 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
33528 case IX86_BUILTIN_CMPPD
:
33529 case IX86_BUILTIN_CMPPS
:
33530 case IX86_BUILTIN_CMPPD256
:
33531 case IX86_BUILTIN_CMPPS256
:
33532 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
33535 case IX86_BUILTIN_CMPEQPD
:
33536 case IX86_BUILTIN_CMPEQPS
:
33537 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
33538 && initializer_zerop (gimple_call_arg (def_stmt
,
33549 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
33554 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
33555 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
33557 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
33558 ? V4SFmode
: V4SImode
;
33559 if (target
== NULL_RTX
)
33560 target
= gen_reg_rtx (tmode
);
33561 if (tmode
== V4SFmode
)
33562 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
33564 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
33567 target
= subtarget
;
33571 case IX86_BUILTIN_XABORT
:
33572 icode
= CODE_FOR_xabort
;
33573 arg0
= CALL_EXPR_ARG (exp
, 0);
33574 op0
= expand_normal (arg0
);
33575 mode0
= insn_data
[icode
].operand
[0].mode
;
33576 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
33578 error ("the xabort's argument must be an 8-bit immediate");
33581 emit_insn (gen_xabort (op0
));
33588 for (i
= 0, d
= bdesc_special_args
;
33589 i
< ARRAY_SIZE (bdesc_special_args
);
33591 if (d
->code
== fcode
)
33592 return ix86_expand_special_args_builtin (d
, exp
, target
);
33594 for (i
= 0, d
= bdesc_args
;
33595 i
< ARRAY_SIZE (bdesc_args
);
33597 if (d
->code
== fcode
)
33600 case IX86_BUILTIN_FABSQ
:
33601 case IX86_BUILTIN_COPYSIGNQ
:
33603 /* Emit a normal call if SSE isn't available. */
33604 return expand_call (exp
, target
, ignore
);
33606 return ix86_expand_args_builtin (d
, exp
, target
);
33609 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
33610 if (d
->code
== fcode
)
33611 return ix86_expand_sse_comi (d
, exp
, target
);
33613 for (i
= 0, d
= bdesc_pcmpestr
;
33614 i
< ARRAY_SIZE (bdesc_pcmpestr
);
33616 if (d
->code
== fcode
)
33617 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
33619 for (i
= 0, d
= bdesc_pcmpistr
;
33620 i
< ARRAY_SIZE (bdesc_pcmpistr
);
33622 if (d
->code
== fcode
)
33623 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
33625 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
33626 if (d
->code
== fcode
)
33627 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
33628 (enum ix86_builtin_func_type
)
33629 d
->flag
, d
->comparison
);
33631 gcc_unreachable ();
33634 /* Returns a function decl for a vectorized version of the builtin function
33635 with builtin function code FN and the result vector type TYPE, or NULL_TREE
33636 if it is not available. */
33639 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
33642 enum machine_mode in_mode
, out_mode
;
33644 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
33646 if (TREE_CODE (type_out
) != VECTOR_TYPE
33647 || TREE_CODE (type_in
) != VECTOR_TYPE
33648 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
33651 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33652 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
33653 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33654 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33658 case BUILT_IN_SQRT
:
33659 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33661 if (out_n
== 2 && in_n
== 2)
33662 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
33663 else if (out_n
== 4 && in_n
== 4)
33664 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
33668 case BUILT_IN_SQRTF
:
33669 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33671 if (out_n
== 4 && in_n
== 4)
33672 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
33673 else if (out_n
== 8 && in_n
== 8)
33674 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
33678 case BUILT_IN_IFLOOR
:
33679 case BUILT_IN_LFLOOR
:
33680 case BUILT_IN_LLFLOOR
:
33681 /* The round insn does not trap on denormals. */
33682 if (flag_trapping_math
|| !TARGET_ROUND
)
33685 if (out_mode
== SImode
&& in_mode
== DFmode
)
33687 if (out_n
== 4 && in_n
== 2)
33688 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
33689 else if (out_n
== 8 && in_n
== 4)
33690 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
33694 case BUILT_IN_IFLOORF
:
33695 case BUILT_IN_LFLOORF
:
33696 case BUILT_IN_LLFLOORF
:
33697 /* The round insn does not trap on denormals. */
33698 if (flag_trapping_math
|| !TARGET_ROUND
)
33701 if (out_mode
== SImode
&& in_mode
== SFmode
)
33703 if (out_n
== 4 && in_n
== 4)
33704 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
33705 else if (out_n
== 8 && in_n
== 8)
33706 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
33710 case BUILT_IN_ICEIL
:
33711 case BUILT_IN_LCEIL
:
33712 case BUILT_IN_LLCEIL
:
33713 /* The round insn does not trap on denormals. */
33714 if (flag_trapping_math
|| !TARGET_ROUND
)
33717 if (out_mode
== SImode
&& in_mode
== DFmode
)
33719 if (out_n
== 4 && in_n
== 2)
33720 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
33721 else if (out_n
== 8 && in_n
== 4)
33722 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
33726 case BUILT_IN_ICEILF
:
33727 case BUILT_IN_LCEILF
:
33728 case BUILT_IN_LLCEILF
:
33729 /* The round insn does not trap on denormals. */
33730 if (flag_trapping_math
|| !TARGET_ROUND
)
33733 if (out_mode
== SImode
&& in_mode
== SFmode
)
33735 if (out_n
== 4 && in_n
== 4)
33736 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
33737 else if (out_n
== 8 && in_n
== 8)
33738 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
33742 case BUILT_IN_IRINT
:
33743 case BUILT_IN_LRINT
:
33744 case BUILT_IN_LLRINT
:
33745 if (out_mode
== SImode
&& in_mode
== DFmode
)
33747 if (out_n
== 4 && in_n
== 2)
33748 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
33749 else if (out_n
== 8 && in_n
== 4)
33750 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
33754 case BUILT_IN_IRINTF
:
33755 case BUILT_IN_LRINTF
:
33756 case BUILT_IN_LLRINTF
:
33757 if (out_mode
== SImode
&& in_mode
== SFmode
)
33759 if (out_n
== 4 && in_n
== 4)
33760 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
33761 else if (out_n
== 8 && in_n
== 8)
33762 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
33766 case BUILT_IN_IROUND
:
33767 case BUILT_IN_LROUND
:
33768 case BUILT_IN_LLROUND
:
33769 /* The round insn does not trap on denormals. */
33770 if (flag_trapping_math
|| !TARGET_ROUND
)
33773 if (out_mode
== SImode
&& in_mode
== DFmode
)
33775 if (out_n
== 4 && in_n
== 2)
33776 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33777 else if (out_n
== 8 && in_n
== 4)
33778 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33782 case BUILT_IN_IROUNDF
:
33783 case BUILT_IN_LROUNDF
:
33784 case BUILT_IN_LLROUNDF
:
33785 /* The round insn does not trap on denormals. */
33786 if (flag_trapping_math
|| !TARGET_ROUND
)
33789 if (out_mode
== SImode
&& in_mode
== SFmode
)
33791 if (out_n
== 4 && in_n
== 4)
33792 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33793 else if (out_n
== 8 && in_n
== 8)
33794 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33798 case BUILT_IN_COPYSIGN
:
33799 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33801 if (out_n
== 2 && in_n
== 2)
33802 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33803 else if (out_n
== 4 && in_n
== 4)
33804 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33808 case BUILT_IN_COPYSIGNF
:
33809 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33811 if (out_n
== 4 && in_n
== 4)
33812 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33813 else if (out_n
== 8 && in_n
== 8)
33814 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33818 case BUILT_IN_FLOOR
:
33819 /* The round insn does not trap on denormals. */
33820 if (flag_trapping_math
|| !TARGET_ROUND
)
33823 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33825 if (out_n
== 2 && in_n
== 2)
33826 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33827 else if (out_n
== 4 && in_n
== 4)
33828 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33832 case BUILT_IN_FLOORF
:
33833 /* The round insn does not trap on denormals. */
33834 if (flag_trapping_math
|| !TARGET_ROUND
)
33837 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33839 if (out_n
== 4 && in_n
== 4)
33840 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33841 else if (out_n
== 8 && in_n
== 8)
33842 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33846 case BUILT_IN_CEIL
:
33847 /* The round insn does not trap on denormals. */
33848 if (flag_trapping_math
|| !TARGET_ROUND
)
33851 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33853 if (out_n
== 2 && in_n
== 2)
33854 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33855 else if (out_n
== 4 && in_n
== 4)
33856 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33860 case BUILT_IN_CEILF
:
33861 /* The round insn does not trap on denormals. */
33862 if (flag_trapping_math
|| !TARGET_ROUND
)
33865 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33867 if (out_n
== 4 && in_n
== 4)
33868 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33869 else if (out_n
== 8 && in_n
== 8)
33870 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33874 case BUILT_IN_TRUNC
:
33875 /* The round insn does not trap on denormals. */
33876 if (flag_trapping_math
|| !TARGET_ROUND
)
33879 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33881 if (out_n
== 2 && in_n
== 2)
33882 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33883 else if (out_n
== 4 && in_n
== 4)
33884 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33888 case BUILT_IN_TRUNCF
:
33889 /* The round insn does not trap on denormals. */
33890 if (flag_trapping_math
|| !TARGET_ROUND
)
33893 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33895 if (out_n
== 4 && in_n
== 4)
33896 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33897 else if (out_n
== 8 && in_n
== 8)
33898 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33902 case BUILT_IN_RINT
:
33903 /* The round insn does not trap on denormals. */
33904 if (flag_trapping_math
|| !TARGET_ROUND
)
33907 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33909 if (out_n
== 2 && in_n
== 2)
33910 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33911 else if (out_n
== 4 && in_n
== 4)
33912 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33916 case BUILT_IN_RINTF
:
33917 /* The round insn does not trap on denormals. */
33918 if (flag_trapping_math
|| !TARGET_ROUND
)
33921 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33923 if (out_n
== 4 && in_n
== 4)
33924 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33925 else if (out_n
== 8 && in_n
== 8)
33926 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33930 case BUILT_IN_ROUND
:
33931 /* The round insn does not trap on denormals. */
33932 if (flag_trapping_math
|| !TARGET_ROUND
)
33935 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33937 if (out_n
== 2 && in_n
== 2)
33938 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33939 else if (out_n
== 4 && in_n
== 4)
33940 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33944 case BUILT_IN_ROUNDF
:
33945 /* The round insn does not trap on denormals. */
33946 if (flag_trapping_math
|| !TARGET_ROUND
)
33949 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33951 if (out_n
== 4 && in_n
== 4)
33952 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33953 else if (out_n
== 8 && in_n
== 8)
33954 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33959 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33961 if (out_n
== 2 && in_n
== 2)
33962 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33963 if (out_n
== 4 && in_n
== 4)
33964 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33968 case BUILT_IN_FMAF
:
33969 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33971 if (out_n
== 4 && in_n
== 4)
33972 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33973 if (out_n
== 8 && in_n
== 8)
33974 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33982 /* Dispatch to a handler for a vectorization library. */
33983 if (ix86_veclib_handler
)
33984 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33990 /* Handler for an SVML-style interface to
33991 a library with vectorized intrinsics. */
33994 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33997 tree fntype
, new_fndecl
, args
;
34000 enum machine_mode el_mode
, in_mode
;
34003 /* The SVML is suitable for unsafe math only. */
34004 if (!flag_unsafe_math_optimizations
)
34007 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
34008 n
= TYPE_VECTOR_SUBPARTS (type_out
);
34009 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
34010 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
34011 if (el_mode
!= in_mode
34019 case BUILT_IN_LOG10
:
34021 case BUILT_IN_TANH
:
34023 case BUILT_IN_ATAN
:
34024 case BUILT_IN_ATAN2
:
34025 case BUILT_IN_ATANH
:
34026 case BUILT_IN_CBRT
:
34027 case BUILT_IN_SINH
:
34029 case BUILT_IN_ASINH
:
34030 case BUILT_IN_ASIN
:
34031 case BUILT_IN_COSH
:
34033 case BUILT_IN_ACOSH
:
34034 case BUILT_IN_ACOS
:
34035 if (el_mode
!= DFmode
|| n
!= 2)
34039 case BUILT_IN_EXPF
:
34040 case BUILT_IN_LOGF
:
34041 case BUILT_IN_LOG10F
:
34042 case BUILT_IN_POWF
:
34043 case BUILT_IN_TANHF
:
34044 case BUILT_IN_TANF
:
34045 case BUILT_IN_ATANF
:
34046 case BUILT_IN_ATAN2F
:
34047 case BUILT_IN_ATANHF
:
34048 case BUILT_IN_CBRTF
:
34049 case BUILT_IN_SINHF
:
34050 case BUILT_IN_SINF
:
34051 case BUILT_IN_ASINHF
:
34052 case BUILT_IN_ASINF
:
34053 case BUILT_IN_COSHF
:
34054 case BUILT_IN_COSF
:
34055 case BUILT_IN_ACOSHF
:
34056 case BUILT_IN_ACOSF
:
34057 if (el_mode
!= SFmode
|| n
!= 4)
34065 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
34067 if (fn
== BUILT_IN_LOGF
)
34068 strcpy (name
, "vmlsLn4");
34069 else if (fn
== BUILT_IN_LOG
)
34070 strcpy (name
, "vmldLn2");
34073 sprintf (name
, "vmls%s", bname
+10);
34074 name
[strlen (name
)-1] = '4';
34077 sprintf (name
, "vmld%s2", bname
+10);
34079 /* Convert to uppercase. */
34083 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
34085 args
= TREE_CHAIN (args
))
34089 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
34091 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
34093 /* Build a function declaration for the vectorized function. */
34094 new_fndecl
= build_decl (BUILTINS_LOCATION
,
34095 FUNCTION_DECL
, get_identifier (name
), fntype
);
34096 TREE_PUBLIC (new_fndecl
) = 1;
34097 DECL_EXTERNAL (new_fndecl
) = 1;
34098 DECL_IS_NOVOPS (new_fndecl
) = 1;
34099 TREE_READONLY (new_fndecl
) = 1;
34104 /* Handler for an ACML-style interface to
34105 a library with vectorized intrinsics. */
34108 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
34110 char name
[20] = "__vr.._";
34111 tree fntype
, new_fndecl
, args
;
34114 enum machine_mode el_mode
, in_mode
;
34117 /* The ACML is 64bits only and suitable for unsafe math only as
34118 it does not correctly support parts of IEEE with the required
34119 precision such as denormals. */
34121 || !flag_unsafe_math_optimizations
)
34124 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
34125 n
= TYPE_VECTOR_SUBPARTS (type_out
);
34126 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
34127 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
34128 if (el_mode
!= in_mode
34138 case BUILT_IN_LOG2
:
34139 case BUILT_IN_LOG10
:
34142 if (el_mode
!= DFmode
34147 case BUILT_IN_SINF
:
34148 case BUILT_IN_COSF
:
34149 case BUILT_IN_EXPF
:
34150 case BUILT_IN_POWF
:
34151 case BUILT_IN_LOGF
:
34152 case BUILT_IN_LOG2F
:
34153 case BUILT_IN_LOG10F
:
34156 if (el_mode
!= SFmode
34165 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
34166 sprintf (name
+ 7, "%s", bname
+10);
34169 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
34171 args
= TREE_CHAIN (args
))
34175 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
34177 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
34179 /* Build a function declaration for the vectorized function. */
34180 new_fndecl
= build_decl (BUILTINS_LOCATION
,
34181 FUNCTION_DECL
, get_identifier (name
), fntype
);
34182 TREE_PUBLIC (new_fndecl
) = 1;
34183 DECL_EXTERNAL (new_fndecl
) = 1;
34184 DECL_IS_NOVOPS (new_fndecl
) = 1;
34185 TREE_READONLY (new_fndecl
) = 1;
34190 /* Returns a decl of a function that implements gather load with
34191 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
34192 Return NULL_TREE if it is not available. */
34195 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
34196 const_tree index_type
, int scale
)
34199 enum ix86_builtins code
;
34204 if ((TREE_CODE (index_type
) != INTEGER_TYPE
34205 && !POINTER_TYPE_P (index_type
))
34206 || (TYPE_MODE (index_type
) != SImode
34207 && TYPE_MODE (index_type
) != DImode
))
34210 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
34213 /* v*gather* insn sign extends index to pointer mode. */
34214 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
34215 && TYPE_UNSIGNED (index_type
))
34220 || (scale
& (scale
- 1)) != 0)
34223 si
= TYPE_MODE (index_type
) == SImode
;
34224 switch (TYPE_MODE (mem_vectype
))
34227 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
34230 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
34233 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
34236 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
34239 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
34242 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
34245 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
34248 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
34254 return ix86_builtins
[code
];
34257 /* Returns a code for a target-specific builtin that implements
34258 reciprocal of the function, or NULL_TREE if not available. */
34261 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
34262 bool sqrt ATTRIBUTE_UNUSED
)
34264 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
34265 && flag_finite_math_only
&& !flag_trapping_math
34266 && flag_unsafe_math_optimizations
))
34270 /* Machine dependent builtins. */
34273 /* Vectorized version of sqrt to rsqrt conversion. */
34274 case IX86_BUILTIN_SQRTPS_NR
:
34275 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
34277 case IX86_BUILTIN_SQRTPS_NR256
:
34278 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
34284 /* Normal builtins. */
34287 /* Sqrt to rsqrt conversion. */
34288 case BUILT_IN_SQRTF
:
34289 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
34296 /* Helper for avx_vpermilps256_operand et al. This is also used by
34297 the expansion functions to turn the parallel back into a mask.
34298 The return value is 0 for no match and the imm8+1 for a match. */
34301 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
34303 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
34305 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34307 if (XVECLEN (par
, 0) != (int) nelt
)
34310 /* Validate that all of the elements are constants, and not totally
34311 out of range. Copy the data into an integral array to make the
34312 subsequent checks easier. */
34313 for (i
= 0; i
< nelt
; ++i
)
34315 rtx er
= XVECEXP (par
, 0, i
);
34316 unsigned HOST_WIDE_INT ei
;
34318 if (!CONST_INT_P (er
))
34329 /* In the 256-bit DFmode case, we can only move elements within
34331 for (i
= 0; i
< 2; ++i
)
34335 mask
|= ipar
[i
] << i
;
34337 for (i
= 2; i
< 4; ++i
)
34341 mask
|= (ipar
[i
] - 2) << i
;
34346 /* In the 256-bit SFmode case, we have full freedom of movement
34347 within the low 128-bit lane, but the high 128-bit lane must
34348 mirror the exact same pattern. */
34349 for (i
= 0; i
< 4; ++i
)
34350 if (ipar
[i
] + 4 != ipar
[i
+ 4])
34357 /* In the 128-bit case, we've full freedom in the placement of
34358 the elements from the source operand. */
34359 for (i
= 0; i
< nelt
; ++i
)
34360 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
34364 gcc_unreachable ();
34367 /* Make sure success has a non-zero value by adding one. */
34371 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
34372 the expansion functions to turn the parallel back into a mask.
34373 The return value is 0 for no match and the imm8+1 for a match. */
34376 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
34378 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
34380 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34382 if (XVECLEN (par
, 0) != (int) nelt
)
34385 /* Validate that all of the elements are constants, and not totally
34386 out of range. Copy the data into an integral array to make the
34387 subsequent checks easier. */
34388 for (i
= 0; i
< nelt
; ++i
)
34390 rtx er
= XVECEXP (par
, 0, i
);
34391 unsigned HOST_WIDE_INT ei
;
34393 if (!CONST_INT_P (er
))
34396 if (ei
>= 2 * nelt
)
34401 /* Validate that the halves of the permute are halves. */
34402 for (i
= 0; i
< nelt2
- 1; ++i
)
34403 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34405 for (i
= nelt2
; i
< nelt
- 1; ++i
)
34406 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34409 /* Reconstruct the mask. */
34410 for (i
= 0; i
< 2; ++i
)
34412 unsigned e
= ipar
[i
* nelt2
];
34416 mask
|= e
<< (i
* 4);
34419 /* Make sure success has a non-zero value by adding one. */
34423 /* Store OPERAND to the memory after reload is completed. This means
34424 that we can't easily use assign_stack_local. */
34426 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
34430 gcc_assert (reload_completed
);
34431 if (ix86_using_red_zone ())
34433 result
= gen_rtx_MEM (mode
,
34434 gen_rtx_PLUS (Pmode
,
34436 GEN_INT (-RED_ZONE_SIZE
)));
34437 emit_move_insn (result
, operand
);
34439 else if (TARGET_64BIT
)
34445 operand
= gen_lowpart (DImode
, operand
);
34449 gen_rtx_SET (VOIDmode
,
34450 gen_rtx_MEM (DImode
,
34451 gen_rtx_PRE_DEC (DImode
,
34452 stack_pointer_rtx
)),
34456 gcc_unreachable ();
34458 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34467 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
34469 gen_rtx_SET (VOIDmode
,
34470 gen_rtx_MEM (SImode
,
34471 gen_rtx_PRE_DEC (Pmode
,
34472 stack_pointer_rtx
)),
34475 gen_rtx_SET (VOIDmode
,
34476 gen_rtx_MEM (SImode
,
34477 gen_rtx_PRE_DEC (Pmode
,
34478 stack_pointer_rtx
)),
34483 /* Store HImodes as SImodes. */
34484 operand
= gen_lowpart (SImode
, operand
);
34488 gen_rtx_SET (VOIDmode
,
34489 gen_rtx_MEM (GET_MODE (operand
),
34490 gen_rtx_PRE_DEC (SImode
,
34491 stack_pointer_rtx
)),
34495 gcc_unreachable ();
34497 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34502 /* Free operand from the memory. */
34504 ix86_free_from_memory (enum machine_mode mode
)
34506 if (!ix86_using_red_zone ())
34510 if (mode
== DImode
|| TARGET_64BIT
)
34514 /* Use LEA to deallocate stack space. In peephole2 it will be converted
34515 to pop or add instruction if registers are available. */
34516 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
34517 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
34522 /* Return a register priority for hard reg REGNO. */
34524 ix86_register_priority (int hard_regno
)
34526 /* ebp and r13 as the base always wants a displacement, r12 as the
34527 base always wants an index. So discourage their usage in an
34529 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
34531 if (hard_regno
== BP_REG
)
34533 /* New x86-64 int registers result in bigger code size. Discourage
34535 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
34537 /* New x86-64 SSE registers result in bigger code size. Discourage
34539 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
34541 /* Usage of AX register results in smaller code. Prefer it. */
34542 if (hard_regno
== 0)
34547 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
34549 Put float CONST_DOUBLE in the constant pool instead of fp regs.
34550 QImode must go into class Q_REGS.
34551 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
34552 movdf to do mem-to-mem moves through integer regs. */
34555 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
34557 enum machine_mode mode
= GET_MODE (x
);
34559 /* We're only allowed to return a subclass of CLASS. Many of the
34560 following checks fail for NO_REGS, so eliminate that early. */
34561 if (regclass
== NO_REGS
)
34564 /* All classes can load zeros. */
34565 if (x
== CONST0_RTX (mode
))
34568 /* Force constants into memory if we are loading a (nonzero) constant into
34569 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
34570 instructions to load from a constant. */
34572 && (MAYBE_MMX_CLASS_P (regclass
)
34573 || MAYBE_SSE_CLASS_P (regclass
)
34574 || MAYBE_MASK_CLASS_P (regclass
)))
34577 /* Prefer SSE regs only, if we can use them for math. */
34578 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
34579 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34581 /* Floating-point constants need more complex checks. */
34582 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
34584 /* General regs can load everything. */
34585 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
34588 /* Floats can load 0 and 1 plus some others. Note that we eliminated
34589 zero above. We only want to wind up preferring 80387 registers if
34590 we plan on doing computation with them. */
34592 && standard_80387_constant_p (x
) > 0)
34594 /* Limit class to non-sse. */
34595 if (regclass
== FLOAT_SSE_REGS
)
34597 if (regclass
== FP_TOP_SSE_REGS
)
34599 if (regclass
== FP_SECOND_SSE_REGS
)
34600 return FP_SECOND_REG
;
34601 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
34608 /* Generally when we see PLUS here, it's the function invariant
34609 (plus soft-fp const_int). Which can only be computed into general
34611 if (GET_CODE (x
) == PLUS
)
34612 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
34614 /* QImode constants are easy to load, but non-constant QImode data
34615 must go into Q_REGS. */
34616 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
34618 if (reg_class_subset_p (regclass
, Q_REGS
))
34620 if (reg_class_subset_p (Q_REGS
, regclass
))
34628 /* Discourage putting floating-point values in SSE registers unless
34629 SSE math is being used, and likewise for the 387 registers. */
34631 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
34633 enum machine_mode mode
= GET_MODE (x
);
34635 /* Restrict the output reload class to the register bank that we are doing
34636 math on. If we would like not to return a subset of CLASS, reject this
34637 alternative: if reload cannot do this, it will still use its choice. */
34638 mode
= GET_MODE (x
);
34639 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
34640 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
34642 if (X87_FLOAT_MODE_P (mode
))
34644 if (regclass
== FP_TOP_SSE_REGS
)
34646 else if (regclass
== FP_SECOND_SSE_REGS
)
34647 return FP_SECOND_REG
;
34649 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34656 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
34657 enum machine_mode mode
, secondary_reload_info
*sri
)
34659 /* Double-word spills from general registers to non-offsettable memory
34660 references (zero-extended addresses) require special handling. */
34663 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
34664 && INTEGER_CLASS_P (rclass
)
34665 && !offsettable_memref_p (x
))
34668 ? CODE_FOR_reload_noff_load
34669 : CODE_FOR_reload_noff_store
);
34670 /* Add the cost of moving address to a temporary. */
34671 sri
->extra_cost
= 1;
34676 /* QImode spills from non-QI registers require
34677 intermediate register on 32bit targets. */
34679 && (MAYBE_MASK_CLASS_P (rclass
)
34680 || (!TARGET_64BIT
&& !in_p
34681 && INTEGER_CLASS_P (rclass
)
34682 && MAYBE_NON_Q_CLASS_P (rclass
))))
34691 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
34692 regno
= true_regnum (x
);
34694 /* Return Q_REGS if the operand is in memory. */
34699 /* This condition handles corner case where an expression involving
34700 pointers gets vectorized. We're trying to use the address of a
34701 stack slot as a vector initializer.
34703 (set (reg:V2DI 74 [ vect_cst_.2 ])
34704 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
34706 Eventually frame gets turned into sp+offset like this:
34708 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34709 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34710 (const_int 392 [0x188]))))
34712 That later gets turned into:
34714 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34715 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34716 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
34718 We'll have the following reload recorded:
34720 Reload 0: reload_in (DI) =
34721 (plus:DI (reg/f:DI 7 sp)
34722 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
34723 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34724 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
34725 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
34726 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34727 reload_reg_rtx: (reg:V2DI 22 xmm1)
34729 Which isn't going to work since SSE instructions can't handle scalar
34730 additions. Returning GENERAL_REGS forces the addition into integer
34731 register and reload can handle subsequent reloads without problems. */
34733 if (in_p
&& GET_CODE (x
) == PLUS
34734 && SSE_CLASS_P (rclass
)
34735 && SCALAR_INT_MODE_P (mode
))
34736 return GENERAL_REGS
;
34741 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
34744 ix86_class_likely_spilled_p (reg_class_t rclass
)
34755 case SSE_FIRST_REG
:
34757 case FP_SECOND_REG
:
34768 /* If we are copying between general and FP registers, we need a memory
34769 location. The same is true for SSE and MMX registers.
34771 To optimize register_move_cost performance, allow inline variant.
34773 The macro can't work reliably when one of the CLASSES is class containing
34774 registers from multiple units (SSE, MMX, integer). We avoid this by never
34775 combining those units in single alternative in the machine description.
34776 Ensure that this constraint holds to avoid unexpected surprises.
34778 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34779 enforce these sanity checks. */
34782 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34783 enum machine_mode mode
, int strict
)
34785 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34787 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34788 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34789 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34790 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34791 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34792 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34794 gcc_assert (!strict
|| lra_in_progress
);
34798 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34801 /* ??? This is a lie. We do have moves between mmx/general, and for
34802 mmx/sse2. But by saying we need secondary memory we discourage the
34803 register allocator from using the mmx registers unless needed. */
34804 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34807 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34809 /* SSE1 doesn't have any direct moves from other classes. */
34813 /* If the target says that inter-unit moves are more expensive
34814 than moving through memory, then don't generate them. */
34815 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34816 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34819 /* Between SSE and general, we have moves no larger than word size. */
34820 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34828 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34829 enum machine_mode mode
, int strict
)
34831 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34834 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34836 On the 80386, this is the size of MODE in words,
34837 except in the FP regs, where a single reg is always enough. */
34839 static unsigned char
34840 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34842 if (MAYBE_INTEGER_CLASS_P (rclass
))
34844 if (mode
== XFmode
)
34845 return (TARGET_64BIT
? 2 : 3);
34846 else if (mode
== XCmode
)
34847 return (TARGET_64BIT
? 4 : 6);
34849 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34853 if (COMPLEX_MODE_P (mode
))
34860 /* Return true if the registers in CLASS cannot represent the change from
34861 modes FROM to TO. */
34864 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34865 enum reg_class regclass
)
34870 /* x87 registers can't do subreg at all, as all values are reformatted
34871 to extended precision. */
34872 if (MAYBE_FLOAT_CLASS_P (regclass
))
34875 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34877 /* Vector registers do not support QI or HImode loads. If we don't
34878 disallow a change to these modes, reload will assume it's ok to
34879 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34880 the vec_dupv4hi pattern. */
34881 if (GET_MODE_SIZE (from
) < 4)
34884 /* Vector registers do not support subreg with nonzero offsets, which
34885 are otherwise valid for integer registers. Since we can't see
34886 whether we have a nonzero offset from here, prohibit all
34887 nonparadoxical subregs changing size. */
34888 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34895 /* Return the cost of moving data of mode M between a
34896 register and memory. A value of 2 is the default; this cost is
34897 relative to those in `REGISTER_MOVE_COST'.
34899 This function is used extensively by register_move_cost that is used to
34900 build tables at startup. Make it inline in this case.
34901 When IN is 2, return maximum of in and out move cost.
34903 If moving between registers and memory is more expensive than
34904 between two registers, you should define this macro to express the
34907 Model also increased moving costs of QImode registers in non
34911 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34915 if (FLOAT_CLASS_P (regclass
))
34933 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34934 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34936 if (SSE_CLASS_P (regclass
))
34939 switch (GET_MODE_SIZE (mode
))
34954 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34955 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34957 if (MMX_CLASS_P (regclass
))
34960 switch (GET_MODE_SIZE (mode
))
34972 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34973 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34975 switch (GET_MODE_SIZE (mode
))
34978 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34981 return ix86_cost
->int_store
[0];
34982 if (TARGET_PARTIAL_REG_DEPENDENCY
34983 && optimize_function_for_speed_p (cfun
))
34984 cost
= ix86_cost
->movzbl_load
;
34986 cost
= ix86_cost
->int_load
[0];
34988 return MAX (cost
, ix86_cost
->int_store
[0]);
34994 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34996 return ix86_cost
->movzbl_load
;
34998 return ix86_cost
->int_store
[0] + 4;
35003 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
35004 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
35006 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
35007 if (mode
== TFmode
)
35010 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
35012 cost
= ix86_cost
->int_load
[2];
35014 cost
= ix86_cost
->int_store
[2];
35015 return (cost
* (((int) GET_MODE_SIZE (mode
)
35016 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
35021 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
35024 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
35028 /* Return the cost of moving data from a register in class CLASS1 to
35029 one in class CLASS2.
35031 It is not required that the cost always equal 2 when FROM is the same as TO;
35032 on some machines it is expensive to move between registers if they are not
35033 general registers. */
35036 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
35037 reg_class_t class2_i
)
35039 enum reg_class class1
= (enum reg_class
) class1_i
;
35040 enum reg_class class2
= (enum reg_class
) class2_i
;
35042 /* In case we require secondary memory, compute cost of the store followed
35043 by load. In order to avoid bad register allocation choices, we need
35044 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
35046 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
35050 cost
+= inline_memory_move_cost (mode
, class1
, 2);
35051 cost
+= inline_memory_move_cost (mode
, class2
, 2);
35053 /* In case of copying from general_purpose_register we may emit multiple
35054 stores followed by single load causing memory size mismatch stall.
35055 Count this as arbitrarily high cost of 20. */
35056 if (targetm
.class_max_nregs (class1
, mode
)
35057 > targetm
.class_max_nregs (class2
, mode
))
35060 /* In the case of FP/MMX moves, the registers actually overlap, and we
35061 have to switch modes in order to treat them differently. */
35062 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
35063 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
35069 /* Moves between SSE/MMX and integer unit are expensive. */
35070 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
35071 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
35073 /* ??? By keeping returned value relatively high, we limit the number
35074 of moves between integer and MMX/SSE registers for all targets.
35075 Additionally, high value prevents problem with x86_modes_tieable_p(),
35076 where integer modes in MMX/SSE registers are not tieable
35077 because of missing QImode and HImode moves to, from or between
35078 MMX/SSE registers. */
35079 return MAX (8, ix86_cost
->mmxsse_to_integer
);
35081 if (MAYBE_FLOAT_CLASS_P (class1
))
35082 return ix86_cost
->fp_move
;
35083 if (MAYBE_SSE_CLASS_P (class1
))
35084 return ix86_cost
->sse_move
;
35085 if (MAYBE_MMX_CLASS_P (class1
))
35086 return ix86_cost
->mmx_move
;
35090 /* Return TRUE if hard register REGNO can hold a value of machine-mode
35094 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
35096 /* Flags and only flags can only hold CCmode values. */
35097 if (CC_REGNO_P (regno
))
35098 return GET_MODE_CLASS (mode
) == MODE_CC
;
35099 if (GET_MODE_CLASS (mode
) == MODE_CC
35100 || GET_MODE_CLASS (mode
) == MODE_RANDOM
35101 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
35103 if (STACK_REGNO_P (regno
))
35104 return VALID_FP_MODE_P (mode
);
35105 if (MASK_REGNO_P (regno
))
35106 return VALID_MASK_REG_MODE (mode
);
35107 if (BND_REGNO_P (regno
))
35108 return VALID_BND_REG_MODE (mode
);
35109 if (SSE_REGNO_P (regno
))
35111 /* We implement the move patterns for all vector modes into and
35112 out of SSE registers, even when no operation instructions
35115 /* For AVX-512 we allow, regardless of regno:
35117 - any of 512-bit wide vector mode
35118 - any scalar mode. */
35121 || VALID_AVX512F_REG_MODE (mode
)
35122 || VALID_AVX512F_SCALAR_MODE (mode
)))
35125 /* xmm16-xmm31 are only available for AVX-512. */
35126 if (EXT_REX_SSE_REGNO_P (regno
))
35129 /* OImode move is available only when AVX is enabled. */
35130 return ((TARGET_AVX
&& mode
== OImode
)
35131 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35132 || VALID_SSE_REG_MODE (mode
)
35133 || VALID_SSE2_REG_MODE (mode
)
35134 || VALID_MMX_REG_MODE (mode
)
35135 || VALID_MMX_REG_MODE_3DNOW (mode
));
35137 if (MMX_REGNO_P (regno
))
35139 /* We implement the move patterns for 3DNOW modes even in MMX mode,
35140 so if the register is available at all, then we can move data of
35141 the given mode into or out of it. */
35142 return (VALID_MMX_REG_MODE (mode
)
35143 || VALID_MMX_REG_MODE_3DNOW (mode
));
35146 if (mode
== QImode
)
35148 /* Take care for QImode values - they can be in non-QI regs,
35149 but then they do cause partial register stalls. */
35150 if (ANY_QI_REGNO_P (regno
))
35152 if (!TARGET_PARTIAL_REG_STALL
)
35154 /* LRA checks if the hard register is OK for the given mode.
35155 QImode values can live in non-QI regs, so we allow all
35157 if (lra_in_progress
)
35159 return !can_create_pseudo_p ();
35161 /* We handle both integer and floats in the general purpose registers. */
35162 else if (VALID_INT_MODE_P (mode
))
35164 else if (VALID_FP_MODE_P (mode
))
35166 else if (VALID_DFP_MODE_P (mode
))
35168 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
35169 on to use that value in smaller contexts, this can easily force a
35170 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
35171 supporting DImode, allow it. */
35172 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
35178 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
35179 tieable integer mode. */
35182 ix86_tieable_integer_mode_p (enum machine_mode mode
)
35191 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
35194 return TARGET_64BIT
;
35201 /* Return true if MODE1 is accessible in a register that can hold MODE2
35202 without copying. That is, all register classes that can hold MODE2
35203 can also hold MODE1. */
35206 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
35208 if (mode1
== mode2
)
35211 if (ix86_tieable_integer_mode_p (mode1
)
35212 && ix86_tieable_integer_mode_p (mode2
))
35215 /* MODE2 being XFmode implies fp stack or general regs, which means we
35216 can tie any smaller floating point modes to it. Note that we do not
35217 tie this with TFmode. */
35218 if (mode2
== XFmode
)
35219 return mode1
== SFmode
|| mode1
== DFmode
;
35221 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
35222 that we can tie it with SFmode. */
35223 if (mode2
== DFmode
)
35224 return mode1
== SFmode
;
35226 /* If MODE2 is only appropriate for an SSE register, then tie with
35227 any other mode acceptable to SSE registers. */
35228 if (GET_MODE_SIZE (mode2
) == 32
35229 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35230 return (GET_MODE_SIZE (mode1
) == 32
35231 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35232 if (GET_MODE_SIZE (mode2
) == 16
35233 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35234 return (GET_MODE_SIZE (mode1
) == 16
35235 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35237 /* If MODE2 is appropriate for an MMX register, then tie
35238 with any other mode acceptable to MMX registers. */
35239 if (GET_MODE_SIZE (mode2
) == 8
35240 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
35241 return (GET_MODE_SIZE (mode1
) == 8
35242 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
35247 /* Return the cost of moving between two registers of mode MODE. */
35250 ix86_set_reg_reg_cost (enum machine_mode mode
)
35252 unsigned int units
= UNITS_PER_WORD
;
35254 switch (GET_MODE_CLASS (mode
))
35260 units
= GET_MODE_SIZE (CCmode
);
35264 if ((TARGET_SSE
&& mode
== TFmode
)
35265 || (TARGET_80387
&& mode
== XFmode
)
35266 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
35267 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
35268 units
= GET_MODE_SIZE (mode
);
35271 case MODE_COMPLEX_FLOAT
:
35272 if ((TARGET_SSE
&& mode
== TCmode
)
35273 || (TARGET_80387
&& mode
== XCmode
)
35274 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
35275 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
35276 units
= GET_MODE_SIZE (mode
);
35279 case MODE_VECTOR_INT
:
35280 case MODE_VECTOR_FLOAT
:
35281 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
35282 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35283 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
35284 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
35285 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
35286 units
= GET_MODE_SIZE (mode
);
35289 /* Return the cost of moving between two registers of mode MODE,
35290 assuming that the move will be in pieces of at most UNITS bytes. */
35291 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
35294 /* Compute a (partial) cost for rtx X. Return true if the complete
35295 cost has been computed, and false if subexpressions should be
35296 scanned. In either case, *TOTAL contains the cost result. */
35299 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
35302 enum rtx_code code
= (enum rtx_code
) code_i
;
35303 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
35304 enum machine_mode mode
= GET_MODE (x
);
35305 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
35310 if (register_operand (SET_DEST (x
), VOIDmode
)
35311 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
35313 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
35322 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
35324 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
35326 else if (flag_pic
&& SYMBOLIC_CONST (x
)
35328 || (!GET_CODE (x
) != LABEL_REF
35329 && (GET_CODE (x
) != SYMBOL_REF
35330 || !SYMBOL_REF_LOCAL_P (x
)))))
35337 if (mode
== VOIDmode
)
35342 switch (standard_80387_constant_p (x
))
35347 default: /* Other constants */
35354 if (SSE_FLOAT_MODE_P (mode
))
35357 switch (standard_sse_constant_p (x
))
35361 case 1: /* 0: xor eliminates false dependency */
35364 default: /* -1: cmp contains false dependency */
35369 /* Fall back to (MEM (SYMBOL_REF)), since that's where
35370 it'll probably end up. Add a penalty for size. */
35371 *total
= (COSTS_N_INSNS (1)
35372 + (flag_pic
!= 0 && !TARGET_64BIT
)
35373 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
35377 /* The zero extensions is often completely free on x86_64, so make
35378 it as cheap as possible. */
35379 if (TARGET_64BIT
&& mode
== DImode
35380 && GET_MODE (XEXP (x
, 0)) == SImode
)
35382 else if (TARGET_ZERO_EXTEND_WITH_AND
)
35383 *total
= cost
->add
;
35385 *total
= cost
->movzx
;
35389 *total
= cost
->movsx
;
35393 if (SCALAR_INT_MODE_P (mode
)
35394 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
35395 && CONST_INT_P (XEXP (x
, 1)))
35397 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35400 *total
= cost
->add
;
35403 if ((value
== 2 || value
== 3)
35404 && cost
->lea
<= cost
->shift_const
)
35406 *total
= cost
->lea
;
35416 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35418 /* ??? Should be SSE vector operation cost. */
35419 /* At least for published AMD latencies, this really is the same
35420 as the latency for a simple fpu operation like fabs. */
35421 /* V*QImode is emulated with 1-11 insns. */
35422 if (mode
== V16QImode
|| mode
== V32QImode
)
35425 if (TARGET_XOP
&& mode
== V16QImode
)
35427 /* For XOP we use vpshab, which requires a broadcast of the
35428 value to the variable shift insn. For constants this
35429 means a V16Q const in mem; even when we can perform the
35430 shift with one insn set the cost to prefer paddb. */
35431 if (CONSTANT_P (XEXP (x
, 1)))
35433 *total
= (cost
->fabs
35434 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
35435 + (speed
? 2 : COSTS_N_BYTES (16)));
35440 else if (TARGET_SSSE3
)
35442 *total
= cost
->fabs
* count
;
35445 *total
= cost
->fabs
;
35447 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35449 if (CONST_INT_P (XEXP (x
, 1)))
35451 if (INTVAL (XEXP (x
, 1)) > 32)
35452 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
35454 *total
= cost
->shift_const
* 2;
35458 if (GET_CODE (XEXP (x
, 1)) == AND
)
35459 *total
= cost
->shift_var
* 2;
35461 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
35466 if (CONST_INT_P (XEXP (x
, 1)))
35467 *total
= cost
->shift_const
;
35468 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
35469 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
35471 /* Return the cost after shift-and truncation. */
35472 *total
= cost
->shift_var
;
35476 *total
= cost
->shift_var
;
35484 gcc_assert (FLOAT_MODE_P (mode
));
35485 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
35487 /* ??? SSE scalar/vector cost should be used here. */
35488 /* ??? Bald assumption that fma has the same cost as fmul. */
35489 *total
= cost
->fmul
;
35490 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
35492 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
35494 if (GET_CODE (sub
) == NEG
)
35495 sub
= XEXP (sub
, 0);
35496 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
35499 if (GET_CODE (sub
) == NEG
)
35500 sub
= XEXP (sub
, 0);
35501 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
35506 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35508 /* ??? SSE scalar cost should be used here. */
35509 *total
= cost
->fmul
;
35512 else if (X87_FLOAT_MODE_P (mode
))
35514 *total
= cost
->fmul
;
35517 else if (FLOAT_MODE_P (mode
))
35519 /* ??? SSE vector cost should be used here. */
35520 *total
= cost
->fmul
;
35523 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35525 /* V*QImode is emulated with 7-13 insns. */
35526 if (mode
== V16QImode
|| mode
== V32QImode
)
35529 if (TARGET_XOP
&& mode
== V16QImode
)
35531 else if (TARGET_SSSE3
)
35533 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
35535 /* V*DImode is emulated with 5-8 insns. */
35536 else if (mode
== V2DImode
|| mode
== V4DImode
)
35538 if (TARGET_XOP
&& mode
== V2DImode
)
35539 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
35541 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
35543 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
35544 insns, including two PMULUDQ. */
35545 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
35546 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
35548 *total
= cost
->fmul
;
35553 rtx op0
= XEXP (x
, 0);
35554 rtx op1
= XEXP (x
, 1);
35556 if (CONST_INT_P (XEXP (x
, 1)))
35558 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35559 for (nbits
= 0; value
!= 0; value
&= value
- 1)
35563 /* This is arbitrary. */
35566 /* Compute costs correctly for widening multiplication. */
35567 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
35568 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
35569 == GET_MODE_SIZE (mode
))
35571 int is_mulwiden
= 0;
35572 enum machine_mode inner_mode
= GET_MODE (op0
);
35574 if (GET_CODE (op0
) == GET_CODE (op1
))
35575 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
35576 else if (CONST_INT_P (op1
))
35578 if (GET_CODE (op0
) == SIGN_EXTEND
)
35579 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
35582 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
35586 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
35589 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
35590 + nbits
* cost
->mult_bit
35591 + rtx_cost (op0
, outer_code
, opno
, speed
)
35592 + rtx_cost (op1
, outer_code
, opno
, speed
));
35601 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35602 /* ??? SSE cost should be used here. */
35603 *total
= cost
->fdiv
;
35604 else if (X87_FLOAT_MODE_P (mode
))
35605 *total
= cost
->fdiv
;
35606 else if (FLOAT_MODE_P (mode
))
35607 /* ??? SSE vector cost should be used here. */
35608 *total
= cost
->fdiv
;
35610 *total
= cost
->divide
[MODE_INDEX (mode
)];
35614 if (GET_MODE_CLASS (mode
) == MODE_INT
35615 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
35617 if (GET_CODE (XEXP (x
, 0)) == PLUS
35618 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
35619 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
35620 && CONSTANT_P (XEXP (x
, 1)))
35622 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
35623 if (val
== 2 || val
== 4 || val
== 8)
35625 *total
= cost
->lea
;
35626 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35627 outer_code
, opno
, speed
);
35628 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
35629 outer_code
, opno
, speed
);
35630 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35634 else if (GET_CODE (XEXP (x
, 0)) == MULT
35635 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
35637 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
35638 if (val
== 2 || val
== 4 || val
== 8)
35640 *total
= cost
->lea
;
35641 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35642 outer_code
, opno
, speed
);
35643 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35647 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
35649 *total
= cost
->lea
;
35650 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35651 outer_code
, opno
, speed
);
35652 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35653 outer_code
, opno
, speed
);
35654 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35661 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35663 /* ??? SSE cost should be used here. */
35664 *total
= cost
->fadd
;
35667 else if (X87_FLOAT_MODE_P (mode
))
35669 *total
= cost
->fadd
;
35672 else if (FLOAT_MODE_P (mode
))
35674 /* ??? SSE vector cost should be used here. */
35675 *total
= cost
->fadd
;
35683 if (GET_MODE_CLASS (mode
) == MODE_INT
35684 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35686 *total
= (cost
->add
* 2
35687 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
35688 << (GET_MODE (XEXP (x
, 0)) != DImode
))
35689 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
35690 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
35696 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35698 /* ??? SSE cost should be used here. */
35699 *total
= cost
->fchs
;
35702 else if (X87_FLOAT_MODE_P (mode
))
35704 *total
= cost
->fchs
;
35707 else if (FLOAT_MODE_P (mode
))
35709 /* ??? SSE vector cost should be used here. */
35710 *total
= cost
->fchs
;
35716 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35718 /* ??? Should be SSE vector operation cost. */
35719 /* At least for published AMD latencies, this really is the same
35720 as the latency for a simple fpu operation like fabs. */
35721 *total
= cost
->fabs
;
35723 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35724 *total
= cost
->add
* 2;
35726 *total
= cost
->add
;
35730 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
35731 && XEXP (XEXP (x
, 0), 1) == const1_rtx
35732 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
35733 && XEXP (x
, 1) == const0_rtx
)
35735 /* This kind of construct is implemented using test[bwl].
35736 Treat it as if we had an AND. */
35737 *total
= (cost
->add
35738 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
35739 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
35745 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
35750 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35751 /* ??? SSE cost should be used here. */
35752 *total
= cost
->fabs
;
35753 else if (X87_FLOAT_MODE_P (mode
))
35754 *total
= cost
->fabs
;
35755 else if (FLOAT_MODE_P (mode
))
35756 /* ??? SSE vector cost should be used here. */
35757 *total
= cost
->fabs
;
35761 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35762 /* ??? SSE cost should be used here. */
35763 *total
= cost
->fsqrt
;
35764 else if (X87_FLOAT_MODE_P (mode
))
35765 *total
= cost
->fsqrt
;
35766 else if (FLOAT_MODE_P (mode
))
35767 /* ??? SSE vector cost should be used here. */
35768 *total
= cost
->fsqrt
;
35772 if (XINT (x
, 1) == UNSPEC_TP
)
35779 case VEC_DUPLICATE
:
35780 /* ??? Assume all of these vector manipulation patterns are
35781 recognizable. In which case they all pretty much have the
35783 *total
= cost
->fabs
;
35793 static int current_machopic_label_num
;
35795 /* Given a symbol name and its associated stub, write out the
35796 definition of the stub. */
35799 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35801 unsigned int length
;
35802 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35803 int label
= ++current_machopic_label_num
;
35805 /* For 64-bit we shouldn't get here. */
35806 gcc_assert (!TARGET_64BIT
);
35808 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35809 symb
= targetm
.strip_name_encoding (symb
);
35811 length
= strlen (stub
);
35812 binder_name
= XALLOCAVEC (char, length
+ 32);
35813 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35815 length
= strlen (symb
);
35816 symbol_name
= XALLOCAVEC (char, length
+ 32);
35817 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35819 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35821 if (MACHOPIC_ATT_STUB
)
35822 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35823 else if (MACHOPIC_PURE
)
35824 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35826 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35828 fprintf (file
, "%s:\n", stub
);
35829 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35831 if (MACHOPIC_ATT_STUB
)
35833 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35835 else if (MACHOPIC_PURE
)
35838 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35839 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35840 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35841 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35842 label
, lazy_ptr_name
, label
);
35843 fprintf (file
, "\tjmp\t*%%ecx\n");
35846 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35848 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35849 it needs no stub-binding-helper. */
35850 if (MACHOPIC_ATT_STUB
)
35853 fprintf (file
, "%s:\n", binder_name
);
35857 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35858 fprintf (file
, "\tpushl\t%%ecx\n");
35861 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35863 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35865 /* N.B. Keep the correspondence of these
35866 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35867 old-pic/new-pic/non-pic stubs; altering this will break
35868 compatibility with existing dylibs. */
35871 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35872 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35875 /* 16-byte -mdynamic-no-pic stub. */
35876 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35878 fprintf (file
, "%s:\n", lazy_ptr_name
);
35879 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35880 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35882 #endif /* TARGET_MACHO */
35884 /* Order the registers for register allocator. */
35887 x86_order_regs_for_local_alloc (void)
35892 /* First allocate the local general purpose registers. */
35893 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35894 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35895 reg_alloc_order
[pos
++] = i
;
35897 /* Global general purpose registers. */
35898 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35899 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35900 reg_alloc_order
[pos
++] = i
;
35902 /* x87 registers come first in case we are doing FP math
35904 if (!TARGET_SSE_MATH
)
35905 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35906 reg_alloc_order
[pos
++] = i
;
35908 /* SSE registers. */
35909 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35910 reg_alloc_order
[pos
++] = i
;
35911 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35912 reg_alloc_order
[pos
++] = i
;
35914 /* Extended REX SSE registers. */
35915 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
35916 reg_alloc_order
[pos
++] = i
;
35918 /* Mask register. */
35919 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
35920 reg_alloc_order
[pos
++] = i
;
35922 /* MPX bound registers. */
35923 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
35924 reg_alloc_order
[pos
++] = i
;
35926 /* x87 registers. */
35927 if (TARGET_SSE_MATH
)
35928 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35929 reg_alloc_order
[pos
++] = i
;
35931 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35932 reg_alloc_order
[pos
++] = i
;
35934 /* Initialize the rest of array as we do not allocate some registers
35936 while (pos
< FIRST_PSEUDO_REGISTER
)
35937 reg_alloc_order
[pos
++] = 0;
35940 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35941 in struct attribute_spec handler. */
35943 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35945 int flags ATTRIBUTE_UNUSED
,
35946 bool *no_add_attrs
)
35948 if (TREE_CODE (*node
) != FUNCTION_TYPE
35949 && TREE_CODE (*node
) != METHOD_TYPE
35950 && TREE_CODE (*node
) != FIELD_DECL
35951 && TREE_CODE (*node
) != TYPE_DECL
)
35953 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35955 *no_add_attrs
= true;
35960 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35962 *no_add_attrs
= true;
35965 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35969 cst
= TREE_VALUE (args
);
35970 if (TREE_CODE (cst
) != INTEGER_CST
)
35972 warning (OPT_Wattributes
,
35973 "%qE attribute requires an integer constant argument",
35975 *no_add_attrs
= true;
35977 else if (compare_tree_int (cst
, 0) != 0
35978 && compare_tree_int (cst
, 1) != 0)
35980 warning (OPT_Wattributes
,
35981 "argument to %qE attribute is neither zero, nor one",
35983 *no_add_attrs
= true;
35992 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35993 struct attribute_spec.handler. */
35995 ix86_handle_abi_attribute (tree
*node
, tree name
,
35996 tree args ATTRIBUTE_UNUSED
,
35997 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35999 if (TREE_CODE (*node
) != FUNCTION_TYPE
36000 && TREE_CODE (*node
) != METHOD_TYPE
36001 && TREE_CODE (*node
) != FIELD_DECL
36002 && TREE_CODE (*node
) != TYPE_DECL
)
36004 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
36006 *no_add_attrs
= true;
36010 /* Can combine regparm with all attributes but fastcall. */
36011 if (is_attribute_p ("ms_abi", name
))
36013 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
36015 error ("ms_abi and sysv_abi attributes are not compatible");
36020 else if (is_attribute_p ("sysv_abi", name
))
36022 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
36024 error ("ms_abi and sysv_abi attributes are not compatible");
36033 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
36034 struct attribute_spec.handler. */
36036 ix86_handle_struct_attribute (tree
*node
, tree name
,
36037 tree args ATTRIBUTE_UNUSED
,
36038 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
36041 if (DECL_P (*node
))
36043 if (TREE_CODE (*node
) == TYPE_DECL
)
36044 type
= &TREE_TYPE (*node
);
36049 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
36051 warning (OPT_Wattributes
, "%qE attribute ignored",
36053 *no_add_attrs
= true;
36056 else if ((is_attribute_p ("ms_struct", name
)
36057 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
36058 || ((is_attribute_p ("gcc_struct", name
)
36059 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
36061 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
36063 *no_add_attrs
= true;
36070 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
36071 tree args ATTRIBUTE_UNUSED
,
36072 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
36074 if (TREE_CODE (*node
) != FUNCTION_DECL
)
36076 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
36078 *no_add_attrs
= true;
36084 ix86_ms_bitfield_layout_p (const_tree record_type
)
36086 return ((TARGET_MS_BITFIELD_LAYOUT
36087 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
36088 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
36091 /* Returns an expression indicating where the this parameter is
36092 located on entry to the FUNCTION. */
36095 x86_this_parameter (tree function
)
36097 tree type
= TREE_TYPE (function
);
36098 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
36103 const int *parm_regs
;
36105 if (ix86_function_type_abi (type
) == MS_ABI
)
36106 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
36108 parm_regs
= x86_64_int_parameter_registers
;
36109 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
36112 nregs
= ix86_function_regparm (type
, function
);
36114 if (nregs
> 0 && !stdarg_p (type
))
36117 unsigned int ccvt
= ix86_get_callcvt (type
);
36119 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
36120 regno
= aggr
? DX_REG
: CX_REG
;
36121 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
36125 return gen_rtx_MEM (SImode
,
36126 plus_constant (Pmode
, stack_pointer_rtx
, 4));
36135 return gen_rtx_MEM (SImode
,
36136 plus_constant (Pmode
,
36137 stack_pointer_rtx
, 4));
36140 return gen_rtx_REG (SImode
, regno
);
36143 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
36147 /* Determine whether x86_output_mi_thunk can succeed. */
36150 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
36151 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
36152 HOST_WIDE_INT vcall_offset
, const_tree function
)
36154 /* 64-bit can handle anything. */
36158 /* For 32-bit, everything's fine if we have one free register. */
36159 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
36162 /* Need a free register for vcall_offset. */
36166 /* Need a free register for GOT references. */
36167 if (flag_pic
&& !targetm
.binds_local_p (function
))
36170 /* Otherwise ok. */
36174 /* Output the assembler code for a thunk function. THUNK_DECL is the
36175 declaration for the thunk function itself, FUNCTION is the decl for
36176 the target function. DELTA is an immediate constant offset to be
36177 added to THIS. If VCALL_OFFSET is nonzero, the word at
36178 *(*this + vcall_offset) should be added to THIS. */
36181 x86_output_mi_thunk (FILE *file
,
36182 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
36183 HOST_WIDE_INT vcall_offset
, tree function
)
36185 rtx this_param
= x86_this_parameter (function
);
36186 rtx this_reg
, tmp
, fnaddr
;
36187 unsigned int tmp_regno
;
36190 tmp_regno
= R10_REG
;
36193 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
36194 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
36195 tmp_regno
= AX_REG
;
36196 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
36197 tmp_regno
= DX_REG
;
36199 tmp_regno
= CX_REG
;
36202 emit_note (NOTE_INSN_PROLOGUE_END
);
36204 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
36205 pull it in now and let DELTA benefit. */
36206 if (REG_P (this_param
))
36207 this_reg
= this_param
;
36208 else if (vcall_offset
)
36210 /* Put the this parameter into %eax. */
36211 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
36212 emit_move_insn (this_reg
, this_param
);
36215 this_reg
= NULL_RTX
;
36217 /* Adjust the this parameter by a fixed constant. */
36220 rtx delta_rtx
= GEN_INT (delta
);
36221 rtx delta_dst
= this_reg
? this_reg
: this_param
;
36225 if (!x86_64_general_operand (delta_rtx
, Pmode
))
36227 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36228 emit_move_insn (tmp
, delta_rtx
);
36233 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
36236 /* Adjust the this parameter by a value stored in the vtable. */
36239 rtx vcall_addr
, vcall_mem
, this_mem
;
36241 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36243 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
36244 if (Pmode
!= ptr_mode
)
36245 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
36246 emit_move_insn (tmp
, this_mem
);
36248 /* Adjust the this parameter. */
36249 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
36251 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
36253 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
36254 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
36255 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
36258 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
36259 if (Pmode
!= ptr_mode
)
36260 emit_insn (gen_addsi_1_zext (this_reg
,
36261 gen_rtx_REG (ptr_mode
,
36265 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
36268 /* If necessary, drop THIS back to its stack slot. */
36269 if (this_reg
&& this_reg
!= this_param
)
36270 emit_move_insn (this_param
, this_reg
);
36272 fnaddr
= XEXP (DECL_RTL (function
), 0);
36275 if (!flag_pic
|| targetm
.binds_local_p (function
)
36280 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
36281 tmp
= gen_rtx_CONST (Pmode
, tmp
);
36282 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
36287 if (!flag_pic
|| targetm
.binds_local_p (function
))
36290 else if (TARGET_MACHO
)
36292 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
36293 fnaddr
= XEXP (fnaddr
, 0);
36295 #endif /* TARGET_MACHO */
36298 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
36299 output_set_got (tmp
, NULL_RTX
);
36301 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
36302 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
36303 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
36307 /* Our sibling call patterns do not allow memories, because we have no
36308 predicate that can distinguish between frame and non-frame memory.
36309 For our purposes here, we can get away with (ab)using a jump pattern,
36310 because we're going to do no optimization. */
36311 if (MEM_P (fnaddr
))
36312 emit_jump_insn (gen_indirect_jump (fnaddr
));
36315 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
36316 fnaddr
= legitimize_pic_address (fnaddr
,
36317 gen_rtx_REG (Pmode
, tmp_regno
));
36319 if (!sibcall_insn_operand (fnaddr
, word_mode
))
36321 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
36322 if (GET_MODE (fnaddr
) != word_mode
)
36323 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
36324 emit_move_insn (tmp
, fnaddr
);
36328 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
36329 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
36330 tmp
= emit_call_insn (tmp
);
36331 SIBLING_CALL_P (tmp
) = 1;
36335 /* Emit just enough of rest_of_compilation to get the insns emitted.
36336 Note that use_thunk calls assemble_start_function et al. */
36337 tmp
= get_insns ();
36338 shorten_branches (tmp
);
36339 final_start_function (tmp
, file
, 1);
36340 final (tmp
, file
, 1);
36341 final_end_function ();
36345 x86_file_start (void)
36347 default_file_start ();
36349 darwin_file_start ();
36351 if (X86_FILE_START_VERSION_DIRECTIVE
)
36352 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
36353 if (X86_FILE_START_FLTUSED
)
36354 fputs ("\t.global\t__fltused\n", asm_out_file
);
36355 if (ix86_asm_dialect
== ASM_INTEL
)
36356 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
36360 x86_field_alignment (tree field
, int computed
)
36362 enum machine_mode mode
;
36363 tree type
= TREE_TYPE (field
);
36365 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
36367 mode
= TYPE_MODE (strip_array_types (type
));
36368 if (mode
== DFmode
|| mode
== DCmode
36369 || GET_MODE_CLASS (mode
) == MODE_INT
36370 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
36371 return MIN (32, computed
);
36375 /* Output assembler code to FILE to increment profiler label # LABELNO
36376 for profiling a function entry. */
36378 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
36380 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
36385 #ifndef NO_PROFILE_COUNTERS
36386 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
36389 if (!TARGET_PECOFF
&& flag_pic
)
36390 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
36392 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36396 #ifndef NO_PROFILE_COUNTERS
36397 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
36400 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
36404 #ifndef NO_PROFILE_COUNTERS
36405 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
36408 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36412 /* We don't have exact information about the insn sizes, but we may assume
36413 quite safely that we are informed about all 1 byte insns and memory
36414 address sizes. This is enough to eliminate unnecessary padding in
36418 min_insn_size (rtx insn
)
36422 if (!INSN_P (insn
) || !active_insn_p (insn
))
36425 /* Discard alignments we've emit and jump instructions. */
36426 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
36427 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
36430 /* Important case - calls are always 5 bytes.
36431 It is common to have many calls in the row. */
36433 && symbolic_reference_mentioned_p (PATTERN (insn
))
36434 && !SIBLING_CALL_P (insn
))
36436 len
= get_attr_length (insn
);
36440 /* For normal instructions we rely on get_attr_length being exact,
36441 with a few exceptions. */
36442 if (!JUMP_P (insn
))
36444 enum attr_type type
= get_attr_type (insn
);
36449 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
36450 || asm_noperands (PATTERN (insn
)) >= 0)
36457 /* Otherwise trust get_attr_length. */
36461 l
= get_attr_length_address (insn
);
36462 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
36471 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36473 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
36477 ix86_avoid_jump_mispredicts (void)
36479 rtx insn
, start
= get_insns ();
36480 int nbytes
= 0, njumps
= 0;
36483 /* Look for all minimal intervals of instructions containing 4 jumps.
36484 The intervals are bounded by START and INSN. NBYTES is the total
36485 size of instructions in the interval including INSN and not including
36486 START. When the NBYTES is smaller than 16 bytes, it is possible
36487 that the end of START and INSN ends up in the same 16byte page.
36489 The smallest offset in the page INSN can start is the case where START
36490 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
36491 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
36493 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
36497 if (LABEL_P (insn
))
36499 int align
= label_to_alignment (insn
);
36500 int max_skip
= label_to_max_skip (insn
);
36504 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
36505 already in the current 16 byte page, because otherwise
36506 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
36507 bytes to reach 16 byte boundary. */
36509 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
36512 fprintf (dump_file
, "Label %i with max_skip %i\n",
36513 INSN_UID (insn
), max_skip
);
36516 while (nbytes
+ max_skip
>= 16)
36518 start
= NEXT_INSN (start
);
36519 if (JUMP_P (start
) || CALL_P (start
))
36520 njumps
--, isjump
= 1;
36523 nbytes
-= min_insn_size (start
);
36529 min_size
= min_insn_size (insn
);
36530 nbytes
+= min_size
;
36532 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
36533 INSN_UID (insn
), min_size
);
36534 if (JUMP_P (insn
) || CALL_P (insn
))
36541 start
= NEXT_INSN (start
);
36542 if (JUMP_P (start
) || CALL_P (start
))
36543 njumps
--, isjump
= 1;
36546 nbytes
-= min_insn_size (start
);
36548 gcc_assert (njumps
>= 0);
36550 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
36551 INSN_UID (start
), INSN_UID (insn
), nbytes
);
36553 if (njumps
== 3 && isjump
&& nbytes
< 16)
36555 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
36558 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
36559 INSN_UID (insn
), padsize
);
36560 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
36566 /* AMD Athlon works faster
36567 when RET is not destination of conditional jump or directly preceded
36568 by other jump instruction. We avoid the penalty by inserting NOP just
36569 before the RET instructions in such cases. */
36571 ix86_pad_returns (void)
36576 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
36578 basic_block bb
= e
->src
;
36579 rtx ret
= BB_END (bb
);
36581 bool replace
= false;
36583 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
36584 || optimize_bb_for_size_p (bb
))
36586 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
36587 if (active_insn_p (prev
) || LABEL_P (prev
))
36589 if (prev
&& LABEL_P (prev
))
36594 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36595 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
36596 && !(e
->flags
& EDGE_FALLTHRU
))
36604 prev
= prev_active_insn (ret
);
36606 && ((JUMP_P (prev
) && any_condjump_p (prev
))
36609 /* Empty functions get branch mispredict even when
36610 the jump destination is not visible to us. */
36611 if (!prev
&& !optimize_function_for_size_p (cfun
))
36616 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
36622 /* Count the minimum number of instructions in BB. Return 4 if the
36623 number of instructions >= 4. */
36626 ix86_count_insn_bb (basic_block bb
)
36629 int insn_count
= 0;
36631 /* Count number of instructions in this block. Return 4 if the number
36632 of instructions >= 4. */
36633 FOR_BB_INSNS (bb
, insn
)
36635 /* Only happen in exit blocks. */
36637 && ANY_RETURN_P (PATTERN (insn
)))
36640 if (NONDEBUG_INSN_P (insn
)
36641 && GET_CODE (PATTERN (insn
)) != USE
36642 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
36645 if (insn_count
>= 4)
36654 /* Count the minimum number of instructions in code path in BB.
36655 Return 4 if the number of instructions >= 4. */
36658 ix86_count_insn (basic_block bb
)
36662 int min_prev_count
;
36664 /* Only bother counting instructions along paths with no
36665 more than 2 basic blocks between entry and exit. Given
36666 that BB has an edge to exit, determine if a predecessor
36667 of BB has an edge from entry. If so, compute the number
36668 of instructions in the predecessor block. If there
36669 happen to be multiple such blocks, compute the minimum. */
36670 min_prev_count
= 4;
36671 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36674 edge_iterator prev_ei
;
36676 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
36678 min_prev_count
= 0;
36681 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
36683 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
36685 int count
= ix86_count_insn_bb (e
->src
);
36686 if (count
< min_prev_count
)
36687 min_prev_count
= count
;
36693 if (min_prev_count
< 4)
36694 min_prev_count
+= ix86_count_insn_bb (bb
);
36696 return min_prev_count
;
36699 /* Pad short function to 4 instructions. */
36702 ix86_pad_short_function (void)
36707 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
36709 rtx ret
= BB_END (e
->src
);
36710 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
36712 int insn_count
= ix86_count_insn (e
->src
);
36714 /* Pad short function. */
36715 if (insn_count
< 4)
36719 /* Find epilogue. */
36722 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
36723 insn
= PREV_INSN (insn
);
36728 /* Two NOPs count as one instruction. */
36729 insn_count
= 2 * (4 - insn_count
);
36730 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
36736 /* Fix up a Windows system unwinder issue. If an EH region falls through into
36737 the epilogue, the Windows system unwinder will apply epilogue logic and
36738 produce incorrect offsets. This can be avoided by adding a nop between
36739 the last insn that can throw and the first insn of the epilogue. */
36742 ix86_seh_fixup_eh_fallthru (void)
36747 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
36751 /* Find the beginning of the epilogue. */
36752 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
36753 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
36758 /* We only care about preceding insns that can throw. */
36759 insn
= prev_active_insn (insn
);
36760 if (insn
== NULL
|| !can_throw_internal (insn
))
36763 /* Do not separate calls from their debug information. */
36764 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
36766 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
36767 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
36772 emit_insn_after (gen_nops (const1_rtx
), insn
);
36776 /* Implement machine specific optimizations. We implement padding of returns
36777 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36781 /* We are freeing block_for_insn in the toplev to keep compatibility
36782 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36783 compute_bb_for_insn ();
36785 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36786 ix86_seh_fixup_eh_fallthru ();
36788 if (optimize
&& optimize_function_for_speed_p (cfun
))
36790 if (TARGET_PAD_SHORT_FUNCTION
)
36791 ix86_pad_short_function ();
36792 else if (TARGET_PAD_RETURNS
)
36793 ix86_pad_returns ();
36794 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36795 if (TARGET_FOUR_JUMP_LIMIT
)
36796 ix86_avoid_jump_mispredicts ();
36801 /* Return nonzero when QImode register that must be represented via REX prefix
36804 x86_extended_QIreg_mentioned_p (rtx insn
)
36807 extract_insn_cached (insn
);
36808 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36809 if (GENERAL_REG_P (recog_data
.operand
[i
])
36810 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36815 /* Return nonzero when P points to register encoded via REX prefix.
36816 Called via for_each_rtx. */
36818 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36820 unsigned int regno
;
36823 regno
= REGNO (*p
);
36824 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36827 /* Return true when INSN mentions register that must be encoded using REX
36830 x86_extended_reg_mentioned_p (rtx insn
)
36832 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36833 extended_reg_mentioned_1
, NULL
);
36836 /* If profitable, negate (without causing overflow) integer constant
36837 of mode MODE at location LOC. Return true in this case. */
36839 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36843 if (!CONST_INT_P (*loc
))
36849 /* DImode x86_64 constants must fit in 32 bits. */
36850 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36861 gcc_unreachable ();
36864 /* Avoid overflows. */
36865 if (mode_signbit_p (mode
, *loc
))
36868 val
= INTVAL (*loc
);
36870 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36871 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36872 if ((val
< 0 && val
!= -128)
36875 *loc
= GEN_INT (-val
);
36882 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36883 optabs would emit if we didn't have TFmode patterns. */
36886 x86_emit_floatuns (rtx operands
[2])
36888 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36889 enum machine_mode mode
, inmode
;
36891 inmode
= GET_MODE (operands
[1]);
36892 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36895 in
= force_reg (inmode
, operands
[1]);
36896 mode
= GET_MODE (out
);
36897 neglab
= gen_label_rtx ();
36898 donelab
= gen_label_rtx ();
36899 f0
= gen_reg_rtx (mode
);
36901 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36903 expand_float (out
, in
, 0);
36905 emit_jump_insn (gen_jump (donelab
));
36908 emit_label (neglab
);
36910 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36912 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36914 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36916 expand_float (f0
, i0
, 0);
36918 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36920 emit_label (donelab
);
36923 /* AVX512F does support 64-byte integer vector operations,
36924 thus the longest vector we are faced with is V64QImode. */
36925 #define MAX_VECT_LEN 64
36927 struct expand_vec_perm_d
36929 rtx target
, op0
, op1
;
36930 unsigned char perm
[MAX_VECT_LEN
];
36931 enum machine_mode vmode
;
36932 unsigned char nelt
;
36933 bool one_operand_p
;
36937 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36938 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36939 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36941 /* Get a vector mode of the same size as the original but with elements
36942 twice as wide. This is only guaranteed to apply to integral vectors. */
36944 static inline enum machine_mode
36945 get_mode_wider_vector (enum machine_mode o
)
36947 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36948 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36949 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36950 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36954 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36955 with all elements equal to VAR. Return true if successful. */
36958 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36959 rtx target
, rtx val
)
36982 /* First attempt to recognize VAL as-is. */
36983 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36984 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36985 if (recog_memoized (insn
) < 0)
36988 /* If that fails, force VAL into a register. */
36991 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36992 seq
= get_insns ();
36995 emit_insn_before (seq
, insn
);
36997 ok
= recog_memoized (insn
) >= 0;
37006 if (TARGET_SSE
|| TARGET_3DNOW_A
)
37010 val
= gen_lowpart (SImode
, val
);
37011 x
= gen_rtx_TRUNCATE (HImode
, val
);
37012 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
37013 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37026 struct expand_vec_perm_d dperm
;
37030 memset (&dperm
, 0, sizeof (dperm
));
37031 dperm
.target
= target
;
37032 dperm
.vmode
= mode
;
37033 dperm
.nelt
= GET_MODE_NUNITS (mode
);
37034 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
37035 dperm
.one_operand_p
= true;
37037 /* Extend to SImode using a paradoxical SUBREG. */
37038 tmp1
= gen_reg_rtx (SImode
);
37039 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
37041 /* Insert the SImode value as low element of a V4SImode vector. */
37042 tmp2
= gen_reg_rtx (V4SImode
);
37043 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
37044 emit_move_insn (dperm
.op0
, gen_lowpart (mode
, tmp2
));
37046 ok
= (expand_vec_perm_1 (&dperm
)
37047 || expand_vec_perm_broadcast_1 (&dperm
));
37059 /* Replicate the value once into the next wider mode and recurse. */
37061 enum machine_mode smode
, wsmode
, wvmode
;
37064 smode
= GET_MODE_INNER (mode
);
37065 wvmode
= get_mode_wider_vector (mode
);
37066 wsmode
= GET_MODE_INNER (wvmode
);
37068 val
= convert_modes (wsmode
, smode
, val
, true);
37069 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
37070 GEN_INT (GET_MODE_BITSIZE (smode
)),
37071 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
37072 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
37074 x
= gen_reg_rtx (wvmode
);
37075 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
37077 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), x
));
37084 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
37085 rtx x
= gen_reg_rtx (hvmode
);
37087 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
37090 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
37091 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37100 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37101 whose ONE_VAR element is VAR, and other elements are zero. Return true
37105 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
37106 rtx target
, rtx var
, int one_var
)
37108 enum machine_mode vsimode
;
37111 bool use_vector_set
= false;
37116 /* For SSE4.1, we normally use vector set. But if the second
37117 element is zero and inter-unit moves are OK, we use movq
37119 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
37120 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
37126 use_vector_set
= TARGET_SSE4_1
;
37129 use_vector_set
= TARGET_SSE2
;
37132 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
37139 use_vector_set
= TARGET_AVX
;
37142 /* Use ix86_expand_vector_set in 64bit mode only. */
37143 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
37149 if (use_vector_set
)
37151 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
37152 var
= force_reg (GET_MODE_INNER (mode
), var
);
37153 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37169 var
= force_reg (GET_MODE_INNER (mode
), var
);
37170 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
37171 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37176 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
37177 new_target
= gen_reg_rtx (mode
);
37179 new_target
= target
;
37180 var
= force_reg (GET_MODE_INNER (mode
), var
);
37181 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
37182 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
37183 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
37186 /* We need to shuffle the value to the correct position, so
37187 create a new pseudo to store the intermediate result. */
37189 /* With SSE2, we can use the integer shuffle insns. */
37190 if (mode
!= V4SFmode
&& TARGET_SSE2
)
37192 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
37194 GEN_INT (one_var
== 1 ? 0 : 1),
37195 GEN_INT (one_var
== 2 ? 0 : 1),
37196 GEN_INT (one_var
== 3 ? 0 : 1)));
37197 if (target
!= new_target
)
37198 emit_move_insn (target
, new_target
);
37202 /* Otherwise convert the intermediate result to V4SFmode and
37203 use the SSE1 shuffle instructions. */
37204 if (mode
!= V4SFmode
)
37206 tmp
= gen_reg_rtx (V4SFmode
);
37207 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
37212 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
37214 GEN_INT (one_var
== 1 ? 0 : 1),
37215 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
37216 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
37218 if (mode
!= V4SFmode
)
37219 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
37220 else if (tmp
!= target
)
37221 emit_move_insn (target
, tmp
);
37223 else if (target
!= new_target
)
37224 emit_move_insn (target
, new_target
);
37229 vsimode
= V4SImode
;
37235 vsimode
= V2SImode
;
37241 /* Zero extend the variable element to SImode and recurse. */
37242 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
37244 x
= gen_reg_rtx (vsimode
);
37245 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
37247 gcc_unreachable ();
37249 emit_move_insn (target
, gen_lowpart (mode
, x
));
37257 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37258 consisting of the values in VALS. It is known that all elements
37259 except ONE_VAR are constants. Return true if successful. */
37262 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
37263 rtx target
, rtx vals
, int one_var
)
37265 rtx var
= XVECEXP (vals
, 0, one_var
);
37266 enum machine_mode wmode
;
37269 const_vec
= copy_rtx (vals
);
37270 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
37271 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
37279 /* For the two element vectors, it's just as easy to use
37280 the general case. */
37284 /* Use ix86_expand_vector_set in 64bit mode only. */
37307 /* There's no way to set one QImode entry easily. Combine
37308 the variable value with its adjacent constant value, and
37309 promote to an HImode set. */
37310 x
= XVECEXP (vals
, 0, one_var
^ 1);
37313 var
= convert_modes (HImode
, QImode
, var
, true);
37314 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
37315 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
37316 x
= GEN_INT (INTVAL (x
) & 0xff);
37320 var
= convert_modes (HImode
, QImode
, var
, true);
37321 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
37323 if (x
!= const0_rtx
)
37324 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
37325 1, OPTAB_LIB_WIDEN
);
37327 x
= gen_reg_rtx (wmode
);
37328 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
37329 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
37331 emit_move_insn (target
, gen_lowpart (mode
, x
));
37338 emit_move_insn (target
, const_vec
);
37339 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37343 /* A subroutine of ix86_expand_vector_init_general. Use vector
37344 concatenate to handle the most general case: all values variable,
37345 and none identical. */
37348 ix86_expand_vector_init_concat (enum machine_mode mode
,
37349 rtx target
, rtx
*ops
, int n
)
37351 enum machine_mode cmode
, hmode
= VOIDmode
;
37352 rtx first
[8], second
[4];
37392 gcc_unreachable ();
37395 if (!register_operand (ops
[1], cmode
))
37396 ops
[1] = force_reg (cmode
, ops
[1]);
37397 if (!register_operand (ops
[0], cmode
))
37398 ops
[0] = force_reg (cmode
, ops
[0]);
37399 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37400 gen_rtx_VEC_CONCAT (mode
, ops
[0],
37420 gcc_unreachable ();
37436 gcc_unreachable ();
37441 /* FIXME: We process inputs backward to help RA. PR 36222. */
37444 for (; i
> 0; i
-= 2, j
--)
37446 first
[j
] = gen_reg_rtx (cmode
);
37447 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
37448 ix86_expand_vector_init (false, first
[j
],
37449 gen_rtx_PARALLEL (cmode
, v
));
37455 gcc_assert (hmode
!= VOIDmode
);
37456 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37458 second
[j
] = gen_reg_rtx (hmode
);
37459 ix86_expand_vector_init_concat (hmode
, second
[j
],
37463 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
37466 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
37470 gcc_unreachable ();
37474 /* A subroutine of ix86_expand_vector_init_general. Use vector
37475 interleave to handle the most general case: all values variable,
37476 and none identical. */
37479 ix86_expand_vector_init_interleave (enum machine_mode mode
,
37480 rtx target
, rtx
*ops
, int n
)
37482 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
37485 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
37486 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
37487 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
37492 gen_load_even
= gen_vec_setv8hi
;
37493 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
37494 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37495 inner_mode
= HImode
;
37496 first_imode
= V4SImode
;
37497 second_imode
= V2DImode
;
37498 third_imode
= VOIDmode
;
37501 gen_load_even
= gen_vec_setv16qi
;
37502 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
37503 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
37504 inner_mode
= QImode
;
37505 first_imode
= V8HImode
;
37506 second_imode
= V4SImode
;
37507 third_imode
= V2DImode
;
37510 gcc_unreachable ();
37513 for (i
= 0; i
< n
; i
++)
37515 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
37516 op0
= gen_reg_rtx (SImode
);
37517 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
37519 /* Insert the SImode value as low element of V4SImode vector. */
37520 op1
= gen_reg_rtx (V4SImode
);
37521 op0
= gen_rtx_VEC_MERGE (V4SImode
,
37522 gen_rtx_VEC_DUPLICATE (V4SImode
,
37524 CONST0_RTX (V4SImode
),
37526 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
37528 /* Cast the V4SImode vector back to a vector in orignal mode. */
37529 op0
= gen_reg_rtx (mode
);
37530 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
37532 /* Load even elements into the second position. */
37533 emit_insn (gen_load_even (op0
,
37534 force_reg (inner_mode
,
37538 /* Cast vector to FIRST_IMODE vector. */
37539 ops
[i
] = gen_reg_rtx (first_imode
);
37540 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
37543 /* Interleave low FIRST_IMODE vectors. */
37544 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37546 op0
= gen_reg_rtx (first_imode
);
37547 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
37549 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
37550 ops
[j
] = gen_reg_rtx (second_imode
);
37551 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
37554 /* Interleave low SECOND_IMODE vectors. */
37555 switch (second_imode
)
37558 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
37560 op0
= gen_reg_rtx (second_imode
);
37561 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
37564 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
37566 ops
[j
] = gen_reg_rtx (third_imode
);
37567 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
37569 second_imode
= V2DImode
;
37570 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37574 op0
= gen_reg_rtx (second_imode
);
37575 emit_insn (gen_interleave_second_low (op0
, ops
[0],
37578 /* Cast the SECOND_IMODE vector back to a vector on original
37580 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37581 gen_lowpart (mode
, op0
)));
37585 gcc_unreachable ();
37589 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
37590 all values variable, and none identical. */
37593 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
37594 rtx target
, rtx vals
)
37596 rtx ops
[32], op0
, op1
;
37597 enum machine_mode half_mode
= VOIDmode
;
37604 if (!mmx_ok
&& !TARGET_SSE
)
37616 n
= GET_MODE_NUNITS (mode
);
37617 for (i
= 0; i
< n
; i
++)
37618 ops
[i
] = XVECEXP (vals
, 0, i
);
37619 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
37623 half_mode
= V16QImode
;
37627 half_mode
= V8HImode
;
37631 n
= GET_MODE_NUNITS (mode
);
37632 for (i
= 0; i
< n
; i
++)
37633 ops
[i
] = XVECEXP (vals
, 0, i
);
37634 op0
= gen_reg_rtx (half_mode
);
37635 op1
= gen_reg_rtx (half_mode
);
37636 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
37638 ix86_expand_vector_init_interleave (half_mode
, op1
,
37639 &ops
[n
>> 1], n
>> 2);
37640 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37641 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
37645 if (!TARGET_SSE4_1
)
37653 /* Don't use ix86_expand_vector_init_interleave if we can't
37654 move from GPR to SSE register directly. */
37655 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
37658 n
= GET_MODE_NUNITS (mode
);
37659 for (i
= 0; i
< n
; i
++)
37660 ops
[i
] = XVECEXP (vals
, 0, i
);
37661 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
37669 gcc_unreachable ();
37673 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
37674 enum machine_mode inner_mode
;
37675 rtx words
[4], shift
;
37677 inner_mode
= GET_MODE_INNER (mode
);
37678 n_elts
= GET_MODE_NUNITS (mode
);
37679 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
37680 n_elt_per_word
= n_elts
/ n_words
;
37681 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
37683 for (i
= 0; i
< n_words
; ++i
)
37685 rtx word
= NULL_RTX
;
37687 for (j
= 0; j
< n_elt_per_word
; ++j
)
37689 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
37690 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
37696 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
37697 word
, 1, OPTAB_LIB_WIDEN
);
37698 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
37699 word
, 1, OPTAB_LIB_WIDEN
);
37707 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
37708 else if (n_words
== 2)
37710 rtx tmp
= gen_reg_rtx (mode
);
37711 emit_clobber (tmp
);
37712 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
37713 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
37714 emit_move_insn (target
, tmp
);
37716 else if (n_words
== 4)
37718 rtx tmp
= gen_reg_rtx (V4SImode
);
37719 gcc_assert (word_mode
== SImode
);
37720 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
37721 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
37722 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
37725 gcc_unreachable ();
37729 /* Initialize vector TARGET via VALS. Suppress the use of MMX
37730 instructions unless MMX_OK is true. */
37733 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
37735 enum machine_mode mode
= GET_MODE (target
);
37736 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37737 int n_elts
= GET_MODE_NUNITS (mode
);
37738 int n_var
= 0, one_var
= -1;
37739 bool all_same
= true, all_const_zero
= true;
37743 for (i
= 0; i
< n_elts
; ++i
)
37745 x
= XVECEXP (vals
, 0, i
);
37746 if (!(CONST_INT_P (x
)
37747 || GET_CODE (x
) == CONST_DOUBLE
37748 || GET_CODE (x
) == CONST_FIXED
))
37749 n_var
++, one_var
= i
;
37750 else if (x
!= CONST0_RTX (inner_mode
))
37751 all_const_zero
= false;
37752 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
37756 /* Constants are best loaded from the constant pool. */
37759 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
37763 /* If all values are identical, broadcast the value. */
37765 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
37766 XVECEXP (vals
, 0, 0)))
37769 /* Values where only one field is non-constant are best loaded from
37770 the pool and overwritten via move later. */
37774 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
37775 XVECEXP (vals
, 0, one_var
),
37779 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37783 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37787 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37789 enum machine_mode mode
= GET_MODE (target
);
37790 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37791 enum machine_mode half_mode
;
37792 bool use_vec_merge
= false;
37794 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37796 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37797 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37798 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37799 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37800 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37801 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37803 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37805 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37806 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37807 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37808 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37809 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37810 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37820 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37821 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37823 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37825 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37826 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37832 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37836 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37837 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37839 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37841 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37842 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37849 /* For the two element vectors, we implement a VEC_CONCAT with
37850 the extraction of the other element. */
37852 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37853 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37856 op0
= val
, op1
= tmp
;
37858 op0
= tmp
, op1
= val
;
37860 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37861 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37866 use_vec_merge
= TARGET_SSE4_1
;
37873 use_vec_merge
= true;
37877 /* tmp = target = A B C D */
37878 tmp
= copy_to_reg (target
);
37879 /* target = A A B B */
37880 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37881 /* target = X A B B */
37882 ix86_expand_vector_set (false, target
, val
, 0);
37883 /* target = A X C D */
37884 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37885 const1_rtx
, const0_rtx
,
37886 GEN_INT (2+4), GEN_INT (3+4)));
37890 /* tmp = target = A B C D */
37891 tmp
= copy_to_reg (target
);
37892 /* tmp = X B C D */
37893 ix86_expand_vector_set (false, tmp
, val
, 0);
37894 /* target = A B X D */
37895 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37896 const0_rtx
, const1_rtx
,
37897 GEN_INT (0+4), GEN_INT (3+4)));
37901 /* tmp = target = A B C D */
37902 tmp
= copy_to_reg (target
);
37903 /* tmp = X B C D */
37904 ix86_expand_vector_set (false, tmp
, val
, 0);
37905 /* target = A B X D */
37906 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37907 const0_rtx
, const1_rtx
,
37908 GEN_INT (2+4), GEN_INT (0+4)));
37912 gcc_unreachable ();
37917 use_vec_merge
= TARGET_SSE4_1
;
37921 /* Element 0 handled by vec_merge below. */
37924 use_vec_merge
= true;
37930 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37931 store into element 0, then shuffle them back. */
37935 order
[0] = GEN_INT (elt
);
37936 order
[1] = const1_rtx
;
37937 order
[2] = const2_rtx
;
37938 order
[3] = GEN_INT (3);
37939 order
[elt
] = const0_rtx
;
37941 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37942 order
[1], order
[2], order
[3]));
37944 ix86_expand_vector_set (false, target
, val
, 0);
37946 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37947 order
[1], order
[2], order
[3]));
37951 /* For SSE1, we have to reuse the V4SF code. */
37952 rtx t
= gen_reg_rtx (V4SFmode
);
37953 ix86_expand_vector_set (false, t
, gen_lowpart (SFmode
, val
), elt
);
37954 emit_move_insn (target
, gen_lowpart (mode
, t
));
37959 use_vec_merge
= TARGET_SSE2
;
37962 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37966 use_vec_merge
= TARGET_SSE4_1
;
37973 half_mode
= V16QImode
;
37979 half_mode
= V8HImode
;
37985 half_mode
= V4SImode
;
37991 half_mode
= V2DImode
;
37997 half_mode
= V4SFmode
;
38003 half_mode
= V2DFmode
;
38009 /* Compute offset. */
38013 gcc_assert (i
<= 1);
38015 /* Extract the half. */
38016 tmp
= gen_reg_rtx (half_mode
);
38017 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
38019 /* Put val in tmp at elt. */
38020 ix86_expand_vector_set (false, tmp
, val
, elt
);
38023 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
38032 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
38033 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
38034 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38038 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
38040 emit_move_insn (mem
, target
);
38042 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
38043 emit_move_insn (tmp
, val
);
38045 emit_move_insn (target
, mem
);
38050 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
38052 enum machine_mode mode
= GET_MODE (vec
);
38053 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
38054 bool use_vec_extr
= false;
38067 use_vec_extr
= true;
38071 use_vec_extr
= TARGET_SSE4_1
;
38083 tmp
= gen_reg_rtx (mode
);
38084 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
38085 GEN_INT (elt
), GEN_INT (elt
),
38086 GEN_INT (elt
+4), GEN_INT (elt
+4)));
38090 tmp
= gen_reg_rtx (mode
);
38091 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
38095 gcc_unreachable ();
38098 use_vec_extr
= true;
38103 use_vec_extr
= TARGET_SSE4_1
;
38117 tmp
= gen_reg_rtx (mode
);
38118 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
38119 GEN_INT (elt
), GEN_INT (elt
),
38120 GEN_INT (elt
), GEN_INT (elt
)));
38124 tmp
= gen_reg_rtx (mode
);
38125 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
38129 gcc_unreachable ();
38132 use_vec_extr
= true;
38137 /* For SSE1, we have to reuse the V4SF code. */
38138 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
38139 gen_lowpart (V4SFmode
, vec
), elt
);
38145 use_vec_extr
= TARGET_SSE2
;
38148 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
38152 use_vec_extr
= TARGET_SSE4_1
;
38158 tmp
= gen_reg_rtx (V4SFmode
);
38160 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
38162 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
38163 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38171 tmp
= gen_reg_rtx (V2DFmode
);
38173 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
38175 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
38176 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38184 tmp
= gen_reg_rtx (V16QImode
);
38186 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
38188 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
38189 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
38197 tmp
= gen_reg_rtx (V8HImode
);
38199 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
38201 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
38202 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
38210 tmp
= gen_reg_rtx (V4SImode
);
38212 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
38214 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
38215 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38223 tmp
= gen_reg_rtx (V2DImode
);
38225 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
38227 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
38228 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38234 /* ??? Could extract the appropriate HImode element and shift. */
38241 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
38242 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
38244 /* Let the rtl optimizers know about the zero extension performed. */
38245 if (inner_mode
== QImode
|| inner_mode
== HImode
)
38247 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
38248 target
= gen_lowpart (SImode
, target
);
38251 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38255 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
38257 emit_move_insn (mem
, vec
);
38259 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
38260 emit_move_insn (target
, tmp
);
38264 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
38265 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
38266 The upper bits of DEST are undefined, though they shouldn't cause
38267 exceptions (some bits from src or all zeros are ok). */
38270 emit_reduc_half (rtx dest
, rtx src
, int i
)
38273 switch (GET_MODE (src
))
38277 tem
= gen_sse_movhlps (dest
, src
, src
);
38279 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
38280 GEN_INT (1 + 4), GEN_INT (1 + 4));
38283 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
38289 d
= gen_reg_rtx (V1TImode
);
38290 tem
= gen_sse2_lshrv1ti3 (d
, gen_lowpart (V1TImode
, src
),
38295 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
38297 tem
= gen_avx_shufps256 (dest
, src
, src
,
38298 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
38302 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
38304 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
38312 if (GET_MODE (dest
) != V4DImode
)
38313 d
= gen_reg_rtx (V4DImode
);
38314 tem
= gen_avx2_permv2ti (d
, gen_lowpart (V4DImode
, src
),
38315 gen_lowpart (V4DImode
, src
),
38320 d
= gen_reg_rtx (V2TImode
);
38321 tem
= gen_avx2_lshrv2ti3 (d
, gen_lowpart (V2TImode
, src
),
38326 gcc_unreachable ();
38330 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
38333 /* Expand a vector reduction. FN is the binary pattern to reduce;
38334 DEST is the destination; IN is the input vector. */
38337 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
38339 rtx half
, dst
, vec
= in
;
38340 enum machine_mode mode
= GET_MODE (in
);
38343 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
38345 && mode
== V8HImode
38346 && fn
== gen_uminv8hi3
)
38348 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
38352 for (i
= GET_MODE_BITSIZE (mode
);
38353 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
38356 half
= gen_reg_rtx (mode
);
38357 emit_reduc_half (half
, vec
, i
);
38358 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
38361 dst
= gen_reg_rtx (mode
);
38362 emit_insn (fn (dst
, half
, vec
));
38367 /* Target hook for scalar_mode_supported_p. */
38369 ix86_scalar_mode_supported_p (enum machine_mode mode
)
38371 if (DECIMAL_FLOAT_MODE_P (mode
))
38372 return default_decimal_float_supported_p ();
38373 else if (mode
== TFmode
)
38376 return default_scalar_mode_supported_p (mode
);
38379 /* Implements target hook vector_mode_supported_p. */
38381 ix86_vector_mode_supported_p (enum machine_mode mode
)
38383 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
38385 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
38387 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
38389 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
38391 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
38396 /* Target hook for c_mode_for_suffix. */
38397 static enum machine_mode
38398 ix86_c_mode_for_suffix (char suffix
)
38408 /* Worker function for TARGET_MD_ASM_CLOBBERS.
38410 We do this in the new i386 backend to maintain source compatibility
38411 with the old cc0-based compiler. */
38414 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
38415 tree inputs ATTRIBUTE_UNUSED
,
38418 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
38420 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
38425 /* Implements target vector targetm.asm.encode_section_info. */
38427 static void ATTRIBUTE_UNUSED
38428 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
38430 default_encode_section_info (decl
, rtl
, first
);
38432 if (TREE_CODE (decl
) == VAR_DECL
38433 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
38434 && ix86_in_large_data_p (decl
))
38435 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
38438 /* Worker function for REVERSE_CONDITION. */
38441 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
38443 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
38444 ? reverse_condition (code
)
38445 : reverse_condition_maybe_unordered (code
));
38448 /* Output code to perform an x87 FP register move, from OPERANDS[1]
38452 output_387_reg_move (rtx insn
, rtx
*operands
)
38454 if (REG_P (operands
[0]))
38456 if (REG_P (operands
[1])
38457 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38459 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
38460 return output_387_ffreep (operands
, 0);
38461 return "fstp\t%y0";
38463 if (STACK_TOP_P (operands
[0]))
38464 return "fld%Z1\t%y1";
38467 else if (MEM_P (operands
[0]))
38469 gcc_assert (REG_P (operands
[1]));
38470 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38471 return "fstp%Z0\t%y0";
38474 /* There is no non-popping store to memory for XFmode.
38475 So if we need one, follow the store with a load. */
38476 if (GET_MODE (operands
[0]) == XFmode
)
38477 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
38479 return "fst%Z0\t%y0";
38486 /* Output code to perform a conditional jump to LABEL, if C2 flag in
38487 FP status register is set. */
38490 ix86_emit_fp_unordered_jump (rtx label
)
38492 rtx reg
= gen_reg_rtx (HImode
);
38495 emit_insn (gen_x86_fnstsw_1 (reg
));
38497 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
38499 emit_insn (gen_x86_sahf_1 (reg
));
38501 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
38502 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
38506 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
38508 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38509 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
38512 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
38513 gen_rtx_LABEL_REF (VOIDmode
, label
),
38515 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
38517 emit_jump_insn (temp
);
38518 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
38521 /* Output code to perform a log1p XFmode calculation. */
38523 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
38525 rtx label1
= gen_label_rtx ();
38526 rtx label2
= gen_label_rtx ();
38528 rtx tmp
= gen_reg_rtx (XFmode
);
38529 rtx tmp2
= gen_reg_rtx (XFmode
);
38532 emit_insn (gen_absxf2 (tmp
, op1
));
38533 test
= gen_rtx_GE (VOIDmode
, tmp
,
38534 CONST_DOUBLE_FROM_REAL_VALUE (
38535 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
38537 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
38539 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38540 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
38541 emit_jump (label2
);
38543 emit_label (label1
);
38544 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
38545 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
38546 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38547 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
38549 emit_label (label2
);
38552 /* Emit code for round calculation. */
38553 void ix86_emit_i387_round (rtx op0
, rtx op1
)
38555 enum machine_mode inmode
= GET_MODE (op1
);
38556 enum machine_mode outmode
= GET_MODE (op0
);
38557 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
38558 rtx scratch
= gen_reg_rtx (HImode
);
38559 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38560 rtx jump_label
= gen_label_rtx ();
38562 rtx (*gen_abs
) (rtx
, rtx
);
38563 rtx (*gen_neg
) (rtx
, rtx
);
38568 gen_abs
= gen_abssf2
;
38571 gen_abs
= gen_absdf2
;
38574 gen_abs
= gen_absxf2
;
38577 gcc_unreachable ();
38583 gen_neg
= gen_negsf2
;
38586 gen_neg
= gen_negdf2
;
38589 gen_neg
= gen_negxf2
;
38592 gen_neg
= gen_neghi2
;
38595 gen_neg
= gen_negsi2
;
38598 gen_neg
= gen_negdi2
;
38601 gcc_unreachable ();
38604 e1
= gen_reg_rtx (inmode
);
38605 e2
= gen_reg_rtx (inmode
);
38606 res
= gen_reg_rtx (outmode
);
38608 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
38610 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
38612 /* scratch = fxam(op1) */
38613 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
38614 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
38616 /* e1 = fabs(op1) */
38617 emit_insn (gen_abs (e1
, op1
));
38619 /* e2 = e1 + 0.5 */
38620 half
= force_reg (inmode
, half
);
38621 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38622 gen_rtx_PLUS (inmode
, e1
, half
)));
38624 /* res = floor(e2) */
38625 if (inmode
!= XFmode
)
38627 tmp1
= gen_reg_rtx (XFmode
);
38629 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
38630 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
38640 rtx tmp0
= gen_reg_rtx (XFmode
);
38642 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
38644 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38645 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
38646 UNSPEC_TRUNC_NOOP
)));
38650 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
38653 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
38656 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
38659 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
38662 gcc_unreachable ();
38665 /* flags = signbit(a) */
38666 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
38668 /* if (flags) then res = -res */
38669 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
38670 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
38671 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
38673 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38674 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
38675 JUMP_LABEL (insn
) = jump_label
;
38677 emit_insn (gen_neg (res
, res
));
38679 emit_label (jump_label
);
38680 LABEL_NUSES (jump_label
) = 1;
38682 emit_move_insn (op0
, res
);
38685 /* Output code to perform a Newton-Rhapson approximation of a single precision
38686 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
38688 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
38690 rtx x0
, x1
, e0
, e1
;
38692 x0
= gen_reg_rtx (mode
);
38693 e0
= gen_reg_rtx (mode
);
38694 e1
= gen_reg_rtx (mode
);
38695 x1
= gen_reg_rtx (mode
);
38697 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
38699 b
= force_reg (mode
, b
);
38701 /* x0 = rcp(b) estimate */
38702 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38703 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
38706 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38707 gen_rtx_MULT (mode
, x0
, b
)));
38710 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38711 gen_rtx_MULT (mode
, x0
, e0
)));
38714 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38715 gen_rtx_PLUS (mode
, x0
, x0
)));
38718 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
38719 gen_rtx_MINUS (mode
, e1
, e0
)));
38722 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38723 gen_rtx_MULT (mode
, a
, x1
)));
38726 /* Output code to perform a Newton-Rhapson approximation of a
38727 single precision floating point [reciprocal] square root. */
38729 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
38732 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
38735 x0
= gen_reg_rtx (mode
);
38736 e0
= gen_reg_rtx (mode
);
38737 e1
= gen_reg_rtx (mode
);
38738 e2
= gen_reg_rtx (mode
);
38739 e3
= gen_reg_rtx (mode
);
38741 real_from_integer (&r
, VOIDmode
, -3, SIGNED
);
38742 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38744 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
38745 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38747 if (VECTOR_MODE_P (mode
))
38749 mthree
= ix86_build_const_vector (mode
, true, mthree
);
38750 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
38753 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
38754 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
38756 a
= force_reg (mode
, a
);
38758 /* x0 = rsqrt(a) estimate */
38759 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38760 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
38763 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
38768 zero
= gen_reg_rtx (mode
);
38769 mask
= gen_reg_rtx (mode
);
38771 zero
= force_reg (mode
, CONST0_RTX(mode
));
38772 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38773 gen_rtx_NE (mode
, zero
, a
)));
38775 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38776 gen_rtx_AND (mode
, x0
, mask
)));
38780 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38781 gen_rtx_MULT (mode
, x0
, a
)));
38783 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38784 gen_rtx_MULT (mode
, e0
, x0
)));
38787 mthree
= force_reg (mode
, mthree
);
38788 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38789 gen_rtx_PLUS (mode
, e1
, mthree
)));
38791 mhalf
= force_reg (mode
, mhalf
);
38793 /* e3 = -.5 * x0 */
38794 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38795 gen_rtx_MULT (mode
, x0
, mhalf
)));
38797 /* e3 = -.5 * e0 */
38798 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38799 gen_rtx_MULT (mode
, e0
, mhalf
)));
38800 /* ret = e2 * e3 */
38801 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38802 gen_rtx_MULT (mode
, e2
, e3
)));
38805 #ifdef TARGET_SOLARIS
38806 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38809 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38812 /* With Binutils 2.15, the "@unwind" marker must be specified on
38813 every occurrence of the ".eh_frame" section, not just the first
38816 && strcmp (name
, ".eh_frame") == 0)
38818 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38819 flags
& SECTION_WRITE
? "aw" : "a");
38824 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38826 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38831 default_elf_asm_named_section (name
, flags
, decl
);
38833 #endif /* TARGET_SOLARIS */
38835 /* Return the mangling of TYPE if it is an extended fundamental type. */
38837 static const char *
38838 ix86_mangle_type (const_tree type
)
38840 type
= TYPE_MAIN_VARIANT (type
);
38842 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38843 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38846 switch (TYPE_MODE (type
))
38849 /* __float128 is "g". */
38852 /* "long double" or __float80 is "e". */
38859 /* For 32-bit code we can save PIC register setup by using
38860 __stack_chk_fail_local hidden function instead of calling
38861 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38862 register, so it is better to call __stack_chk_fail directly. */
38864 static tree ATTRIBUTE_UNUSED
38865 ix86_stack_protect_fail (void)
38867 return TARGET_64BIT
38868 ? default_external_stack_protect_fail ()
38869 : default_hidden_stack_protect_fail ();
38872 /* Select a format to encode pointers in exception handling data. CODE
38873 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38874 true if the symbol may be affected by dynamic relocations.
38876 ??? All x86 object file formats are capable of representing this.
38877 After all, the relocation needed is the same as for the call insn.
38878 Whether or not a particular assembler allows us to enter such, I
38879 guess we'll have to see. */
38881 asm_preferred_eh_data_format (int code
, int global
)
38885 int type
= DW_EH_PE_sdata8
;
38887 || ix86_cmodel
== CM_SMALL_PIC
38888 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38889 type
= DW_EH_PE_sdata4
;
38890 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38892 if (ix86_cmodel
== CM_SMALL
38893 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38894 return DW_EH_PE_udata4
;
38895 return DW_EH_PE_absptr
;
38898 /* Expand copysign from SIGN to the positive value ABS_VALUE
38899 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38902 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38904 enum machine_mode mode
= GET_MODE (sign
);
38905 rtx sgn
= gen_reg_rtx (mode
);
38906 if (mask
== NULL_RTX
)
38908 enum machine_mode vmode
;
38910 if (mode
== SFmode
)
38912 else if (mode
== DFmode
)
38917 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38918 if (!VECTOR_MODE_P (mode
))
38920 /* We need to generate a scalar mode mask in this case. */
38921 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38922 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38923 mask
= gen_reg_rtx (mode
);
38924 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38928 mask
= gen_rtx_NOT (mode
, mask
);
38929 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38930 gen_rtx_AND (mode
, mask
, sign
)));
38931 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38932 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38935 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38936 mask for masking out the sign-bit is stored in *SMASK, if that is
38939 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38941 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38944 xa
= gen_reg_rtx (mode
);
38945 if (mode
== SFmode
)
38947 else if (mode
== DFmode
)
38951 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38952 if (!VECTOR_MODE_P (mode
))
38954 /* We need to generate a scalar mode mask in this case. */
38955 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38956 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38957 mask
= gen_reg_rtx (mode
);
38958 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38960 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38961 gen_rtx_AND (mode
, op0
, mask
)));
38969 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38970 swapping the operands if SWAP_OPERANDS is true. The expanded
38971 code is a forward jump to a newly created label in case the
38972 comparison is true. The generated label rtx is returned. */
38974 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38975 bool swap_operands
)
38977 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
38987 label
= gen_label_rtx ();
38988 tmp
= gen_rtx_REG (fpcmp_mode
, FLAGS_REG
);
38989 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38990 gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
)));
38991 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38992 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38993 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38994 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38995 JUMP_LABEL (tmp
) = label
;
39000 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
39001 using comparison code CODE. Operands are swapped for the comparison if
39002 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
39004 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
39005 bool swap_operands
)
39007 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
39008 enum machine_mode mode
= GET_MODE (op0
);
39009 rtx mask
= gen_reg_rtx (mode
);
39018 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
39020 emit_insn (insn (mask
, op0
, op1
,
39021 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
39025 /* Generate and return a rtx of mode MODE for 2**n where n is the number
39026 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
39028 ix86_gen_TWO52 (enum machine_mode mode
)
39030 REAL_VALUE_TYPE TWO52r
;
39033 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
39034 TWO52
= const_double_from_real_value (TWO52r
, mode
);
39035 TWO52
= force_reg (mode
, TWO52
);
39040 /* Expand SSE sequence for computing lround from OP1 storing
39043 ix86_expand_lround (rtx op0
, rtx op1
)
39045 /* C code for the stuff we're doing below:
39046 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
39049 enum machine_mode mode
= GET_MODE (op1
);
39050 const struct real_format
*fmt
;
39051 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39054 /* load nextafter (0.5, 0.0) */
39055 fmt
= REAL_MODE_FORMAT (mode
);
39056 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39057 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39059 /* adj = copysign (0.5, op1) */
39060 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
39061 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
39063 /* adj = op1 + adj */
39064 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
39066 /* op0 = (imode)adj */
39067 expand_fix (op0
, adj
, 0);
39070 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
39073 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
39075 /* C code for the stuff we're doing below (for do_floor):
39077 xi -= (double)xi > op1 ? 1 : 0;
39080 enum machine_mode fmode
= GET_MODE (op1
);
39081 enum machine_mode imode
= GET_MODE (op0
);
39082 rtx ireg
, freg
, label
, tmp
;
39084 /* reg = (long)op1 */
39085 ireg
= gen_reg_rtx (imode
);
39086 expand_fix (ireg
, op1
, 0);
39088 /* freg = (double)reg */
39089 freg
= gen_reg_rtx (fmode
);
39090 expand_float (freg
, ireg
, 0);
39092 /* ireg = (freg > op1) ? ireg - 1 : ireg */
39093 label
= ix86_expand_sse_compare_and_jump (UNLE
,
39094 freg
, op1
, !do_floor
);
39095 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
39096 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
39097 emit_move_insn (ireg
, tmp
);
39099 emit_label (label
);
39100 LABEL_NUSES (label
) = 1;
39102 emit_move_insn (op0
, ireg
);
39105 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
39106 result in OPERAND0. */
39108 ix86_expand_rint (rtx operand0
, rtx operand1
)
39110 /* C code for the stuff we're doing below:
39111 xa = fabs (operand1);
39112 if (!isless (xa, 2**52))
39114 xa = xa + 2**52 - 2**52;
39115 return copysign (xa, operand1);
39117 enum machine_mode mode
= GET_MODE (operand0
);
39118 rtx res
, xa
, label
, TWO52
, mask
;
39120 res
= gen_reg_rtx (mode
);
39121 emit_move_insn (res
, operand1
);
39123 /* xa = abs (operand1) */
39124 xa
= ix86_expand_sse_fabs (res
, &mask
);
39126 /* if (!isless (xa, TWO52)) goto label; */
39127 TWO52
= ix86_gen_TWO52 (mode
);
39128 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39130 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39131 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
39133 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
39135 emit_label (label
);
39136 LABEL_NUSES (label
) = 1;
39138 emit_move_insn (operand0
, res
);
39141 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
39144 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
39146 /* C code for the stuff we expand below.
39147 double xa = fabs (x), x2;
39148 if (!isless (xa, TWO52))
39150 xa = xa + TWO52 - TWO52;
39151 x2 = copysign (xa, x);
39160 enum machine_mode mode
= GET_MODE (operand0
);
39161 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
39163 TWO52
= ix86_gen_TWO52 (mode
);
39165 /* Temporary for holding the result, initialized to the input
39166 operand to ease control flow. */
39167 res
= gen_reg_rtx (mode
);
39168 emit_move_insn (res
, operand1
);
39170 /* xa = abs (operand1) */
39171 xa
= ix86_expand_sse_fabs (res
, &mask
);
39173 /* if (!isless (xa, TWO52)) goto label; */
39174 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39176 /* xa = xa + TWO52 - TWO52; */
39177 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39178 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
39180 /* xa = copysign (xa, operand1) */
39181 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
39183 /* generate 1.0 or -1.0 */
39184 one
= force_reg (mode
,
39185 const_double_from_real_value (do_floor
39186 ? dconst1
: dconstm1
, mode
));
39188 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39189 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39190 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39191 gen_rtx_AND (mode
, one
, tmp
)));
39192 /* We always need to subtract here to preserve signed zero. */
39193 tmp
= expand_simple_binop (mode
, MINUS
,
39194 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39195 emit_move_insn (res
, tmp
);
39197 emit_label (label
);
39198 LABEL_NUSES (label
) = 1;
39200 emit_move_insn (operand0
, res
);
39203 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
39206 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
39208 /* C code for the stuff we expand below.
39209 double xa = fabs (x), x2;
39210 if (!isless (xa, TWO52))
39212 x2 = (double)(long)x;
39219 if (HONOR_SIGNED_ZEROS (mode))
39220 return copysign (x2, x);
39223 enum machine_mode mode
= GET_MODE (operand0
);
39224 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
39226 TWO52
= ix86_gen_TWO52 (mode
);
39228 /* Temporary for holding the result, initialized to the input
39229 operand to ease control flow. */
39230 res
= gen_reg_rtx (mode
);
39231 emit_move_insn (res
, operand1
);
39233 /* xa = abs (operand1) */
39234 xa
= ix86_expand_sse_fabs (res
, &mask
);
39236 /* if (!isless (xa, TWO52)) goto label; */
39237 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39239 /* xa = (double)(long)x */
39240 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39241 expand_fix (xi
, res
, 0);
39242 expand_float (xa
, xi
, 0);
39245 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39247 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39248 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39249 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39250 gen_rtx_AND (mode
, one
, tmp
)));
39251 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
39252 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39253 emit_move_insn (res
, tmp
);
39255 if (HONOR_SIGNED_ZEROS (mode
))
39256 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39258 emit_label (label
);
39259 LABEL_NUSES (label
) = 1;
39261 emit_move_insn (operand0
, res
);
39264 /* Expand SSE sequence for computing round from OPERAND1 storing
39265 into OPERAND0. Sequence that works without relying on DImode truncation
39266 via cvttsd2siq that is only available on 64bit targets. */
39268 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
39270 /* C code for the stuff we expand below.
39271 double xa = fabs (x), xa2, x2;
39272 if (!isless (xa, TWO52))
39274 Using the absolute value and copying back sign makes
39275 -0.0 -> -0.0 correct.
39276 xa2 = xa + TWO52 - TWO52;
39281 else if (dxa > 0.5)
39283 x2 = copysign (xa2, x);
39286 enum machine_mode mode
= GET_MODE (operand0
);
39287 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
39289 TWO52
= ix86_gen_TWO52 (mode
);
39291 /* Temporary for holding the result, initialized to the input
39292 operand to ease control flow. */
39293 res
= gen_reg_rtx (mode
);
39294 emit_move_insn (res
, operand1
);
39296 /* xa = abs (operand1) */
39297 xa
= ix86_expand_sse_fabs (res
, &mask
);
39299 /* if (!isless (xa, TWO52)) goto label; */
39300 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39302 /* xa2 = xa + TWO52 - TWO52; */
39303 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39304 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
39306 /* dxa = xa2 - xa; */
39307 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
39309 /* generate 0.5, 1.0 and -0.5 */
39310 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
39311 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39312 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
39316 tmp
= gen_reg_rtx (mode
);
39317 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
39318 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
39319 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39320 gen_rtx_AND (mode
, one
, tmp
)));
39321 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39322 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
39323 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
39324 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39325 gen_rtx_AND (mode
, one
, tmp
)));
39326 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39328 /* res = copysign (xa2, operand1) */
39329 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
39331 emit_label (label
);
39332 LABEL_NUSES (label
) = 1;
39334 emit_move_insn (operand0
, res
);
39337 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39340 ix86_expand_trunc (rtx operand0
, rtx operand1
)
39342 /* C code for SSE variant we expand below.
39343 double xa = fabs (x), x2;
39344 if (!isless (xa, TWO52))
39346 x2 = (double)(long)x;
39347 if (HONOR_SIGNED_ZEROS (mode))
39348 return copysign (x2, x);
39351 enum machine_mode mode
= GET_MODE (operand0
);
39352 rtx xa
, xi
, TWO52
, label
, res
, mask
;
39354 TWO52
= ix86_gen_TWO52 (mode
);
39356 /* Temporary for holding the result, initialized to the input
39357 operand to ease control flow. */
39358 res
= gen_reg_rtx (mode
);
39359 emit_move_insn (res
, operand1
);
39361 /* xa = abs (operand1) */
39362 xa
= ix86_expand_sse_fabs (res
, &mask
);
39364 /* if (!isless (xa, TWO52)) goto label; */
39365 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39367 /* x = (double)(long)x */
39368 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39369 expand_fix (xi
, res
, 0);
39370 expand_float (res
, xi
, 0);
39372 if (HONOR_SIGNED_ZEROS (mode
))
39373 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39375 emit_label (label
);
39376 LABEL_NUSES (label
) = 1;
39378 emit_move_insn (operand0
, res
);
39381 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39384 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
39386 enum machine_mode mode
= GET_MODE (operand0
);
39387 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
39389 /* C code for SSE variant we expand below.
39390 double xa = fabs (x), x2;
39391 if (!isless (xa, TWO52))
39393 xa2 = xa + TWO52 - TWO52;
39397 x2 = copysign (xa2, x);
39401 TWO52
= ix86_gen_TWO52 (mode
);
39403 /* Temporary for holding the result, initialized to the input
39404 operand to ease control flow. */
39405 res
= gen_reg_rtx (mode
);
39406 emit_move_insn (res
, operand1
);
39408 /* xa = abs (operand1) */
39409 xa
= ix86_expand_sse_fabs (res
, &smask
);
39411 /* if (!isless (xa, TWO52)) goto label; */
39412 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39414 /* res = xa + TWO52 - TWO52; */
39415 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39416 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
39417 emit_move_insn (res
, tmp
);
39420 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39422 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
39423 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
39424 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
39425 gen_rtx_AND (mode
, mask
, one
)));
39426 tmp
= expand_simple_binop (mode
, MINUS
,
39427 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
39428 emit_move_insn (res
, tmp
);
39430 /* res = copysign (res, operand1) */
39431 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
39433 emit_label (label
);
39434 LABEL_NUSES (label
) = 1;
39436 emit_move_insn (operand0
, res
);
39439 /* Expand SSE sequence for computing round from OPERAND1 storing
39442 ix86_expand_round (rtx operand0
, rtx operand1
)
39444 /* C code for the stuff we're doing below:
39445 double xa = fabs (x);
39446 if (!isless (xa, TWO52))
39448 xa = (double)(long)(xa + nextafter (0.5, 0.0));
39449 return copysign (xa, x);
39451 enum machine_mode mode
= GET_MODE (operand0
);
39452 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
39453 const struct real_format
*fmt
;
39454 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39456 /* Temporary for holding the result, initialized to the input
39457 operand to ease control flow. */
39458 res
= gen_reg_rtx (mode
);
39459 emit_move_insn (res
, operand1
);
39461 TWO52
= ix86_gen_TWO52 (mode
);
39462 xa
= ix86_expand_sse_fabs (res
, &mask
);
39463 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39465 /* load nextafter (0.5, 0.0) */
39466 fmt
= REAL_MODE_FORMAT (mode
);
39467 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39468 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39470 /* xa = xa + 0.5 */
39471 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
39472 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39474 /* xa = (double)(int64_t)xa */
39475 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39476 expand_fix (xi
, xa
, 0);
39477 expand_float (xa
, xi
, 0);
39479 /* res = copysign (xa, operand1) */
39480 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
39482 emit_label (label
);
39483 LABEL_NUSES (label
) = 1;
39485 emit_move_insn (operand0
, res
);
39488 /* Expand SSE sequence for computing round
39489 from OP1 storing into OP0 using sse4 round insn. */
39491 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
39493 enum machine_mode mode
= GET_MODE (op0
);
39494 rtx e1
, e2
, res
, half
;
39495 const struct real_format
*fmt
;
39496 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39497 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
39498 rtx (*gen_round
) (rtx
, rtx
, rtx
);
39503 gen_copysign
= gen_copysignsf3
;
39504 gen_round
= gen_sse4_1_roundsf2
;
39507 gen_copysign
= gen_copysigndf3
;
39508 gen_round
= gen_sse4_1_rounddf2
;
39511 gcc_unreachable ();
39514 /* round (a) = trunc (a + copysign (0.5, a)) */
39516 /* load nextafter (0.5, 0.0) */
39517 fmt
= REAL_MODE_FORMAT (mode
);
39518 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39519 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39520 half
= const_double_from_real_value (pred_half
, mode
);
39522 /* e1 = copysign (0.5, op1) */
39523 e1
= gen_reg_rtx (mode
);
39524 emit_insn (gen_copysign (e1
, half
, op1
));
39526 /* e2 = op1 + e1 */
39527 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
39529 /* res = trunc (e2) */
39530 res
= gen_reg_rtx (mode
);
39531 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
39533 emit_move_insn (op0
, res
);
39537 /* Table of valid machine attributes. */
39538 static const struct attribute_spec ix86_attribute_table
[] =
39540 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
39541 affects_type_identity } */
39542 /* Stdcall attribute says callee is responsible for popping arguments
39543 if they are not variable. */
39544 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39546 /* Fastcall attribute says callee is responsible for popping arguments
39547 if they are not variable. */
39548 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39550 /* Thiscall attribute says callee is responsible for popping arguments
39551 if they are not variable. */
39552 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39554 /* Cdecl attribute says the callee is a normal C declaration */
39555 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39557 /* Regparm attribute specifies how many integer arguments are to be
39558 passed in registers. */
39559 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
39561 /* Sseregparm attribute says we are using x86_64 calling conventions
39562 for FP arguments. */
39563 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39565 /* The transactional memory builtins are implicitly regparm or fastcall
39566 depending on the ABI. Override the generic do-nothing attribute that
39567 these builtins were declared with. */
39568 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
39570 /* force_align_arg_pointer says this function realigns the stack at entry. */
39571 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
39572 false, true, true, ix86_handle_cconv_attribute
, false },
39573 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39574 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
39575 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
39576 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
39579 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39581 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39583 #ifdef SUBTARGET_ATTRIBUTE_TABLE
39584 SUBTARGET_ATTRIBUTE_TABLE
,
39586 /* ms_abi and sysv_abi calling convention function attributes. */
39587 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39588 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39589 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
39591 { "callee_pop_aggregate_return", 1, 1, false, true, true,
39592 ix86_handle_callee_pop_aggregate_return
, true },
39594 { NULL
, 0, 0, false, false, false, NULL
, false }
39597 /* Implement targetm.vectorize.builtin_vectorization_cost. */
39599 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
39601 int misalign ATTRIBUTE_UNUSED
)
39605 switch (type_of_cost
)
39608 return ix86_cost
->scalar_stmt_cost
;
39611 return ix86_cost
->scalar_load_cost
;
39614 return ix86_cost
->scalar_store_cost
;
39617 return ix86_cost
->vec_stmt_cost
;
39620 return ix86_cost
->vec_align_load_cost
;
39623 return ix86_cost
->vec_store_cost
;
39625 case vec_to_scalar
:
39626 return ix86_cost
->vec_to_scalar_cost
;
39628 case scalar_to_vec
:
39629 return ix86_cost
->scalar_to_vec_cost
;
39631 case unaligned_load
:
39632 case unaligned_store
:
39633 return ix86_cost
->vec_unalign_load_cost
;
39635 case cond_branch_taken
:
39636 return ix86_cost
->cond_taken_branch_cost
;
39638 case cond_branch_not_taken
:
39639 return ix86_cost
->cond_not_taken_branch_cost
;
39642 case vec_promote_demote
:
39643 return ix86_cost
->vec_stmt_cost
;
39645 case vec_construct
:
39646 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
39647 return elements
/ 2 + 1;
39650 gcc_unreachable ();
39654 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
39655 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
39656 insn every time. */
39658 static GTY(()) rtx vselect_insn
;
39660 /* Initialize vselect_insn. */
39663 init_vselect_insn (void)
39668 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
39669 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
39670 XVECEXP (x
, 0, i
) = const0_rtx
;
39671 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
39673 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
39675 vselect_insn
= emit_insn (x
);
39679 /* Construct (set target (vec_select op0 (parallel perm))) and
39680 return true if that's a valid instruction in the active ISA. */
39683 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
39684 unsigned nelt
, bool testing_p
)
39687 rtx x
, save_vconcat
;
39690 if (vselect_insn
== NULL_RTX
)
39691 init_vselect_insn ();
39693 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
39694 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
39695 for (i
= 0; i
< nelt
; ++i
)
39696 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
39697 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39698 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
39699 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
39700 SET_DEST (PATTERN (vselect_insn
)) = target
;
39701 icode
= recog_memoized (vselect_insn
);
39703 if (icode
>= 0 && !testing_p
)
39704 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
39706 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
39707 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
39708 INSN_CODE (vselect_insn
) = -1;
39713 /* Similar, but generate a vec_concat from op0 and op1 as well. */
39716 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
39717 const unsigned char *perm
, unsigned nelt
,
39720 enum machine_mode v2mode
;
39724 if (vselect_insn
== NULL_RTX
)
39725 init_vselect_insn ();
39727 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
39728 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39729 PUT_MODE (x
, v2mode
);
39732 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
39733 XEXP (x
, 0) = const0_rtx
;
39734 XEXP (x
, 1) = const0_rtx
;
39738 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39739 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
39742 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
39744 enum machine_mode vmode
= d
->vmode
;
39745 unsigned i
, mask
, nelt
= d
->nelt
;
39746 rtx target
, op0
, op1
, x
;
39747 rtx rperm
[32], vperm
;
39749 if (d
->one_operand_p
)
39751 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
39753 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
39755 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
39760 /* This is a blend, not a permute. Elements must stay in their
39761 respective lanes. */
39762 for (i
= 0; i
< nelt
; ++i
)
39764 unsigned e
= d
->perm
[i
];
39765 if (!(e
== i
|| e
== i
+ nelt
))
39772 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
39773 decision should be extracted elsewhere, so that we only try that
39774 sequence once all budget==3 options have been tried. */
39775 target
= d
->target
;
39788 for (i
= 0; i
< nelt
; ++i
)
39789 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39793 for (i
= 0; i
< 2; ++i
)
39794 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39799 for (i
= 0; i
< 4; ++i
)
39800 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39805 /* See if bytes move in pairs so we can use pblendw with
39806 an immediate argument, rather than pblendvb with a vector
39808 for (i
= 0; i
< 16; i
+= 2)
39809 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39812 for (i
= 0; i
< nelt
; ++i
)
39813 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39816 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39817 vperm
= force_reg (vmode
, vperm
);
39819 if (GET_MODE_SIZE (vmode
) == 16)
39820 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39822 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39823 if (target
!= d
->target
)
39824 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39828 for (i
= 0; i
< 8; ++i
)
39829 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39834 target
= gen_reg_rtx (vmode
);
39835 op0
= gen_lowpart (vmode
, op0
);
39836 op1
= gen_lowpart (vmode
, op1
);
39840 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39841 for (i
= 0; i
< 32; i
+= 2)
39842 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39844 /* See if bytes move in quadruplets. If yes, vpblendd
39845 with immediate can be used. */
39846 for (i
= 0; i
< 32; i
+= 4)
39847 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39851 /* See if bytes move the same in both lanes. If yes,
39852 vpblendw with immediate can be used. */
39853 for (i
= 0; i
< 16; i
+= 2)
39854 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39857 /* Use vpblendw. */
39858 for (i
= 0; i
< 16; ++i
)
39859 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39864 /* Use vpblendd. */
39865 for (i
= 0; i
< 8; ++i
)
39866 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39871 /* See if words move in pairs. If yes, vpblendd can be used. */
39872 for (i
= 0; i
< 16; i
+= 2)
39873 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39877 /* See if words move the same in both lanes. If not,
39878 vpblendvb must be used. */
39879 for (i
= 0; i
< 8; i
++)
39880 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39882 /* Use vpblendvb. */
39883 for (i
= 0; i
< 32; ++i
)
39884 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39888 target
= gen_reg_rtx (vmode
);
39889 op0
= gen_lowpart (vmode
, op0
);
39890 op1
= gen_lowpart (vmode
, op1
);
39891 goto finish_pblendvb
;
39894 /* Use vpblendw. */
39895 for (i
= 0; i
< 16; ++i
)
39896 mask
|= (d
->perm
[i
] >= 16) << i
;
39900 /* Use vpblendd. */
39901 for (i
= 0; i
< 8; ++i
)
39902 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39907 /* Use vpblendd. */
39908 for (i
= 0; i
< 4; ++i
)
39909 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39914 gcc_unreachable ();
39917 /* This matches five different patterns with the different modes. */
39918 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39919 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39921 if (target
!= d
->target
)
39922 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39927 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39928 in terms of the variable form of vpermilps.
39930 Note that we will have already failed the immediate input vpermilps,
39931 which requires that the high and low part shuffle be identical; the
39932 variable form doesn't require that. */
39935 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39937 rtx rperm
[8], vperm
;
39940 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39943 /* We can only permute within the 128-bit lane. */
39944 for (i
= 0; i
< 8; ++i
)
39946 unsigned e
= d
->perm
[i
];
39947 if (i
< 4 ? e
>= 4 : e
< 4)
39954 for (i
= 0; i
< 8; ++i
)
39956 unsigned e
= d
->perm
[i
];
39958 /* Within each 128-bit lane, the elements of op0 are numbered
39959 from 0 and the elements of op1 are numbered from 4. */
39965 rperm
[i
] = GEN_INT (e
);
39968 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39969 vperm
= force_reg (V8SImode
, vperm
);
39970 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39975 /* Return true if permutation D can be performed as VMODE permutation
39979 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39981 unsigned int i
, j
, chunk
;
39983 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39984 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39985 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39988 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39991 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39992 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39993 if (d
->perm
[i
] & (chunk
- 1))
39996 for (j
= 1; j
< chunk
; ++j
)
39997 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
40003 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
40004 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
40007 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
40009 unsigned i
, nelt
, eltsz
, mask
;
40010 unsigned char perm
[32];
40011 enum machine_mode vmode
= V16QImode
;
40012 rtx rperm
[32], vperm
, target
, op0
, op1
;
40016 if (!d
->one_operand_p
)
40018 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
40021 && valid_perm_using_mode_p (V2TImode
, d
))
40026 /* Use vperm2i128 insn. The pattern uses
40027 V4DImode instead of V2TImode. */
40028 target
= d
->target
;
40029 if (d
->vmode
!= V4DImode
)
40030 target
= gen_reg_rtx (V4DImode
);
40031 op0
= gen_lowpart (V4DImode
, d
->op0
);
40032 op1
= gen_lowpart (V4DImode
, d
->op1
);
40034 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
40035 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
40036 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
40037 if (target
!= d
->target
)
40038 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
40046 if (GET_MODE_SIZE (d
->vmode
) == 16)
40051 else if (GET_MODE_SIZE (d
->vmode
) == 32)
40056 /* V4DImode should be already handled through
40057 expand_vselect by vpermq instruction. */
40058 gcc_assert (d
->vmode
!= V4DImode
);
40061 if (d
->vmode
== V8SImode
40062 || d
->vmode
== V16HImode
40063 || d
->vmode
== V32QImode
)
40065 /* First see if vpermq can be used for
40066 V8SImode/V16HImode/V32QImode. */
40067 if (valid_perm_using_mode_p (V4DImode
, d
))
40069 for (i
= 0; i
< 4; i
++)
40070 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
40073 target
= gen_reg_rtx (V4DImode
);
40074 if (expand_vselect (target
, gen_lowpart (V4DImode
, d
->op0
),
40077 emit_move_insn (d
->target
,
40078 gen_lowpart (d
->vmode
, target
));
40084 /* Next see if vpermd can be used. */
40085 if (valid_perm_using_mode_p (V8SImode
, d
))
40088 /* Or if vpermps can be used. */
40089 else if (d
->vmode
== V8SFmode
)
40092 if (vmode
== V32QImode
)
40094 /* vpshufb only works intra lanes, it is not
40095 possible to shuffle bytes in between the lanes. */
40096 for (i
= 0; i
< nelt
; ++i
)
40097 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
40108 if (vmode
== V8SImode
)
40109 for (i
= 0; i
< 8; ++i
)
40110 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
40113 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40114 if (!d
->one_operand_p
)
40115 mask
= 2 * nelt
- 1;
40116 else if (vmode
== V16QImode
)
40119 mask
= nelt
/ 2 - 1;
40121 for (i
= 0; i
< nelt
; ++i
)
40123 unsigned j
, e
= d
->perm
[i
] & mask
;
40124 for (j
= 0; j
< eltsz
; ++j
)
40125 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
40129 vperm
= gen_rtx_CONST_VECTOR (vmode
,
40130 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
40131 vperm
= force_reg (vmode
, vperm
);
40133 target
= d
->target
;
40134 if (d
->vmode
!= vmode
)
40135 target
= gen_reg_rtx (vmode
);
40136 op0
= gen_lowpart (vmode
, d
->op0
);
40137 if (d
->one_operand_p
)
40139 if (vmode
== V16QImode
)
40140 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
40141 else if (vmode
== V32QImode
)
40142 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
40143 else if (vmode
== V8SFmode
)
40144 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
40146 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
40150 op1
= gen_lowpart (vmode
, d
->op1
);
40151 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
40153 if (target
!= d
->target
)
40154 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
40159 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
40160 in a single instruction. */
40163 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
40165 unsigned i
, nelt
= d
->nelt
;
40166 unsigned char perm2
[MAX_VECT_LEN
];
40168 /* Check plain VEC_SELECT first, because AVX has instructions that could
40169 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
40170 input where SEL+CONCAT may not. */
40171 if (d
->one_operand_p
)
40173 int mask
= nelt
- 1;
40174 bool identity_perm
= true;
40175 bool broadcast_perm
= true;
40177 for (i
= 0; i
< nelt
; i
++)
40179 perm2
[i
] = d
->perm
[i
] & mask
;
40181 identity_perm
= false;
40183 broadcast_perm
= false;
40189 emit_move_insn (d
->target
, d
->op0
);
40192 else if (broadcast_perm
&& TARGET_AVX2
)
40194 /* Use vpbroadcast{b,w,d}. */
40195 rtx (*gen
) (rtx
, rtx
) = NULL
;
40199 gen
= gen_avx2_pbroadcastv32qi_1
;
40202 gen
= gen_avx2_pbroadcastv16hi_1
;
40205 gen
= gen_avx2_pbroadcastv8si_1
;
40208 gen
= gen_avx2_pbroadcastv16qi
;
40211 gen
= gen_avx2_pbroadcastv8hi
;
40214 gen
= gen_avx2_vec_dupv8sf_1
;
40216 /* For other modes prefer other shuffles this function creates. */
40222 emit_insn (gen (d
->target
, d
->op0
));
40227 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
40230 /* There are plenty of patterns in sse.md that are written for
40231 SEL+CONCAT and are not replicated for a single op. Perhaps
40232 that should be changed, to avoid the nastiness here. */
40234 /* Recognize interleave style patterns, which means incrementing
40235 every other permutation operand. */
40236 for (i
= 0; i
< nelt
; i
+= 2)
40238 perm2
[i
] = d
->perm
[i
] & mask
;
40239 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
40241 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40245 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
40248 for (i
= 0; i
< nelt
; i
+= 4)
40250 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
40251 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
40252 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
40253 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
40256 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40262 /* Finally, try the fully general two operand permute. */
40263 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
40267 /* Recognize interleave style patterns with reversed operands. */
40268 if (!d
->one_operand_p
)
40270 for (i
= 0; i
< nelt
; ++i
)
40272 unsigned e
= d
->perm
[i
];
40280 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
40285 /* Try the SSE4.1 blend variable merge instructions. */
40286 if (expand_vec_perm_blend (d
))
40289 /* Try one of the AVX vpermil variable permutations. */
40290 if (expand_vec_perm_vpermil (d
))
40293 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
40294 vpshufb, vpermd, vpermps or vpermq variable permutation. */
40295 if (expand_vec_perm_pshufb (d
))
40301 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
40302 in terms of a pair of pshuflw + pshufhw instructions. */
40305 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
40307 unsigned char perm2
[MAX_VECT_LEN
];
40311 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
40314 /* The two permutations only operate in 64-bit lanes. */
40315 for (i
= 0; i
< 4; ++i
)
40316 if (d
->perm
[i
] >= 4)
40318 for (i
= 4; i
< 8; ++i
)
40319 if (d
->perm
[i
] < 4)
40325 /* Emit the pshuflw. */
40326 memcpy (perm2
, d
->perm
, 4);
40327 for (i
= 4; i
< 8; ++i
)
40329 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
40332 /* Emit the pshufhw. */
40333 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
40334 for (i
= 0; i
< 4; ++i
)
40336 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
40342 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40343 the permutation using the SSSE3 palignr instruction. This succeeds
40344 when all of the elements in PERM fit within one vector and we merely
40345 need to shift them down so that a single vector permutation has a
40346 chance to succeed. */
40349 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
40351 unsigned i
, nelt
= d
->nelt
;
40355 struct expand_vec_perm_d dcopy
;
40357 /* Even with AVX, palignr only operates on 128-bit vectors. */
40358 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40361 min
= nelt
, max
= 0;
40362 for (i
= 0; i
< nelt
; ++i
)
40364 unsigned e
= d
->perm
[i
];
40370 if (min
== 0 || max
- min
>= nelt
)
40373 /* Given that we have SSSE3, we know we'll be able to implement the
40374 single operand permutation after the palignr with pshufb. */
40379 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
40380 target
= gen_reg_rtx (TImode
);
40381 emit_insn (gen_ssse3_palignrti (target
, gen_lowpart (TImode
, d
->op1
),
40382 gen_lowpart (TImode
, d
->op0
), shift
));
40384 dcopy
.op0
= dcopy
.op1
= gen_lowpart (d
->vmode
, target
);
40385 dcopy
.one_operand_p
= true;
40388 for (i
= 0; i
< nelt
; ++i
)
40390 unsigned e
= dcopy
.perm
[i
] - min
;
40396 /* Test for the degenerate case where the alignment by itself
40397 produces the desired permutation. */
40400 emit_move_insn (d
->target
, dcopy
.op0
);
40404 ok
= expand_vec_perm_1 (&dcopy
);
40410 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
40412 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40413 a two vector permutation into a single vector permutation by using
40414 an interleave operation to merge the vectors. */
40417 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
40419 struct expand_vec_perm_d dremap
, dfinal
;
40420 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40421 unsigned HOST_WIDE_INT contents
;
40422 unsigned char remap
[2 * MAX_VECT_LEN
];
40424 bool ok
, same_halves
= false;
40426 if (GET_MODE_SIZE (d
->vmode
) == 16)
40428 if (d
->one_operand_p
)
40431 else if (GET_MODE_SIZE (d
->vmode
) == 32)
40435 /* For 32-byte modes allow even d->one_operand_p.
40436 The lack of cross-lane shuffling in some instructions
40437 might prevent a single insn shuffle. */
40439 dfinal
.testing_p
= true;
40440 /* If expand_vec_perm_interleave3 can expand this into
40441 a 3 insn sequence, give up and let it be expanded as
40442 3 insn sequence. While that is one insn longer,
40443 it doesn't need a memory operand and in the common
40444 case that both interleave low and high permutations
40445 with the same operands are adjacent needs 4 insns
40446 for both after CSE. */
40447 if (expand_vec_perm_interleave3 (&dfinal
))
40453 /* Examine from whence the elements come. */
40455 for (i
= 0; i
< nelt
; ++i
)
40456 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
40458 memset (remap
, 0xff, sizeof (remap
));
40461 if (GET_MODE_SIZE (d
->vmode
) == 16)
40463 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
40465 /* Split the two input vectors into 4 halves. */
40466 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
40471 /* If the elements from the low halves use interleave low, and similarly
40472 for interleave high. If the elements are from mis-matched halves, we
40473 can use shufps for V4SF/V4SI or do a DImode shuffle. */
40474 if ((contents
& (h1
| h3
)) == contents
)
40477 for (i
= 0; i
< nelt2
; ++i
)
40480 remap
[i
+ nelt
] = i
* 2 + 1;
40481 dremap
.perm
[i
* 2] = i
;
40482 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40484 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40485 dremap
.vmode
= V4SFmode
;
40487 else if ((contents
& (h2
| h4
)) == contents
)
40490 for (i
= 0; i
< nelt2
; ++i
)
40492 remap
[i
+ nelt2
] = i
* 2;
40493 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
40494 dremap
.perm
[i
* 2] = i
+ nelt2
;
40495 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
40497 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40498 dremap
.vmode
= V4SFmode
;
40500 else if ((contents
& (h1
| h4
)) == contents
)
40503 for (i
= 0; i
< nelt2
; ++i
)
40506 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
40507 dremap
.perm
[i
] = i
;
40508 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
40513 dremap
.vmode
= V2DImode
;
40515 dremap
.perm
[0] = 0;
40516 dremap
.perm
[1] = 3;
40519 else if ((contents
& (h2
| h3
)) == contents
)
40522 for (i
= 0; i
< nelt2
; ++i
)
40524 remap
[i
+ nelt2
] = i
;
40525 remap
[i
+ nelt
] = i
+ nelt2
;
40526 dremap
.perm
[i
] = i
+ nelt2
;
40527 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
40532 dremap
.vmode
= V2DImode
;
40534 dremap
.perm
[0] = 1;
40535 dremap
.perm
[1] = 2;
40543 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
40544 unsigned HOST_WIDE_INT q
[8];
40545 unsigned int nonzero_halves
[4];
40547 /* Split the two input vectors into 8 quarters. */
40548 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
40549 for (i
= 1; i
< 8; ++i
)
40550 q
[i
] = q
[0] << (nelt4
* i
);
40551 for (i
= 0; i
< 4; ++i
)
40552 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
40554 nonzero_halves
[nzcnt
] = i
;
40560 gcc_assert (d
->one_operand_p
);
40561 nonzero_halves
[1] = nonzero_halves
[0];
40562 same_halves
= true;
40564 else if (d
->one_operand_p
)
40566 gcc_assert (nonzero_halves
[0] == 0);
40567 gcc_assert (nonzero_halves
[1] == 1);
40572 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
40574 /* Attempt to increase the likelihood that dfinal
40575 shuffle will be intra-lane. */
40576 char tmph
= nonzero_halves
[0];
40577 nonzero_halves
[0] = nonzero_halves
[1];
40578 nonzero_halves
[1] = tmph
;
40581 /* vperm2f128 or vperm2i128. */
40582 for (i
= 0; i
< nelt2
; ++i
)
40584 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
40585 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
40586 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
40587 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
40590 if (d
->vmode
!= V8SFmode
40591 && d
->vmode
!= V4DFmode
40592 && d
->vmode
!= V8SImode
)
40594 dremap
.vmode
= V8SImode
;
40596 for (i
= 0; i
< 4; ++i
)
40598 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
40599 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
40603 else if (d
->one_operand_p
)
40605 else if (TARGET_AVX2
40606 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
40609 for (i
= 0; i
< nelt4
; ++i
)
40612 remap
[i
+ nelt
] = i
* 2 + 1;
40613 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
40614 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
40615 dremap
.perm
[i
* 2] = i
;
40616 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40617 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
40618 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
40621 else if (TARGET_AVX2
40622 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
40625 for (i
= 0; i
< nelt4
; ++i
)
40627 remap
[i
+ nelt4
] = i
* 2;
40628 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
40629 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
40630 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
40631 dremap
.perm
[i
* 2] = i
+ nelt4
;
40632 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
40633 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
40634 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
40641 /* Use the remapping array set up above to move the elements from their
40642 swizzled locations into their final destinations. */
40644 for (i
= 0; i
< nelt
; ++i
)
40646 unsigned e
= remap
[d
->perm
[i
]];
40647 gcc_assert (e
< nelt
);
40648 /* If same_halves is true, both halves of the remapped vector are the
40649 same. Avoid cross-lane accesses if possible. */
40650 if (same_halves
&& i
>= nelt2
)
40652 gcc_assert (e
< nelt2
);
40653 dfinal
.perm
[i
] = e
+ nelt2
;
40656 dfinal
.perm
[i
] = e
;
40658 dremap
.target
= gen_reg_rtx (dremap
.vmode
);
40659 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40660 dfinal
.op1
= dfinal
.op0
;
40661 dfinal
.one_operand_p
= true;
40663 /* Test if the final remap can be done with a single insn. For V4SFmode or
40664 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
40666 ok
= expand_vec_perm_1 (&dfinal
);
40667 seq
= get_insns ();
40676 if (dremap
.vmode
!= dfinal
.vmode
)
40678 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
40679 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
40682 ok
= expand_vec_perm_1 (&dremap
);
40689 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40690 a single vector cross-lane permutation into vpermq followed
40691 by any of the single insn permutations. */
40694 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
40696 struct expand_vec_perm_d dremap
, dfinal
;
40697 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
40698 unsigned contents
[2];
40702 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
40703 && d
->one_operand_p
))
40708 for (i
= 0; i
< nelt2
; ++i
)
40710 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
40711 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
40714 for (i
= 0; i
< 2; ++i
)
40716 unsigned int cnt
= 0;
40717 for (j
= 0; j
< 4; ++j
)
40718 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
40726 dremap
.vmode
= V4DImode
;
40728 dremap
.target
= gen_reg_rtx (V4DImode
);
40729 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
40730 dremap
.op1
= dremap
.op0
;
40731 dremap
.one_operand_p
= true;
40732 for (i
= 0; i
< 2; ++i
)
40734 unsigned int cnt
= 0;
40735 for (j
= 0; j
< 4; ++j
)
40736 if ((contents
[i
] & (1u << j
)) != 0)
40737 dremap
.perm
[2 * i
+ cnt
++] = j
;
40738 for (; cnt
< 2; ++cnt
)
40739 dremap
.perm
[2 * i
+ cnt
] = 0;
40743 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40744 dfinal
.op1
= dfinal
.op0
;
40745 dfinal
.one_operand_p
= true;
40746 for (i
= 0, j
= 0; i
< nelt
; ++i
)
40750 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
40751 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
40753 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
40754 dfinal
.perm
[i
] |= nelt4
;
40756 gcc_unreachable ();
40759 ok
= expand_vec_perm_1 (&dremap
);
40762 ok
= expand_vec_perm_1 (&dfinal
);
40768 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
40769 a vector permutation using two instructions, vperm2f128 resp.
40770 vperm2i128 followed by any single in-lane permutation. */
40773 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
40775 struct expand_vec_perm_d dfirst
, dsecond
;
40776 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
40780 || GET_MODE_SIZE (d
->vmode
) != 32
40781 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
40785 dsecond
.one_operand_p
= false;
40786 dsecond
.testing_p
= true;
40788 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
40789 immediate. For perm < 16 the second permutation uses
40790 d->op0 as first operand, for perm >= 16 it uses d->op1
40791 as first operand. The second operand is the result of
40793 for (perm
= 0; perm
< 32; perm
++)
40795 /* Ignore permutations which do not move anything cross-lane. */
40798 /* The second shuffle for e.g. V4DFmode has
40799 0123 and ABCD operands.
40800 Ignore AB23, as 23 is already in the second lane
40801 of the first operand. */
40802 if ((perm
& 0xc) == (1 << 2)) continue;
40803 /* And 01CD, as 01 is in the first lane of the first
40805 if ((perm
& 3) == 0) continue;
40806 /* And 4567, as then the vperm2[fi]128 doesn't change
40807 anything on the original 4567 second operand. */
40808 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
40812 /* The second shuffle for e.g. V4DFmode has
40813 4567 and ABCD operands.
40814 Ignore AB67, as 67 is already in the second lane
40815 of the first operand. */
40816 if ((perm
& 0xc) == (3 << 2)) continue;
40817 /* And 45CD, as 45 is in the first lane of the first
40819 if ((perm
& 3) == 2) continue;
40820 /* And 0123, as then the vperm2[fi]128 doesn't change
40821 anything on the original 0123 first operand. */
40822 if ((perm
& 0xf) == (1 << 2)) continue;
40825 for (i
= 0; i
< nelt
; i
++)
40827 j
= d
->perm
[i
] / nelt2
;
40828 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
40829 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
40830 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
40831 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
40839 ok
= expand_vec_perm_1 (&dsecond
);
40850 /* Found a usable second shuffle. dfirst will be
40851 vperm2f128 on d->op0 and d->op1. */
40852 dsecond
.testing_p
= false;
40854 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40855 for (i
= 0; i
< nelt
; i
++)
40856 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40857 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40859 ok
= expand_vec_perm_1 (&dfirst
);
40862 /* And dsecond is some single insn shuffle, taking
40863 d->op0 and result of vperm2f128 (if perm < 16) or
40864 d->op1 and result of vperm2f128 (otherwise). */
40865 dsecond
.op1
= dfirst
.target
;
40867 dsecond
.op0
= dfirst
.op1
;
40869 ok
= expand_vec_perm_1 (&dsecond
);
40875 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40876 if (d
->one_operand_p
)
40883 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40884 a two vector permutation using 2 intra-lane interleave insns
40885 and cross-lane shuffle for 32-byte vectors. */
40888 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40891 rtx (*gen
) (rtx
, rtx
, rtx
);
40893 if (d
->one_operand_p
)
40895 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40897 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40903 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40905 for (i
= 0; i
< nelt
; i
+= 2)
40906 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40907 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40917 gen
= gen_vec_interleave_highv32qi
;
40919 gen
= gen_vec_interleave_lowv32qi
;
40923 gen
= gen_vec_interleave_highv16hi
;
40925 gen
= gen_vec_interleave_lowv16hi
;
40929 gen
= gen_vec_interleave_highv8si
;
40931 gen
= gen_vec_interleave_lowv8si
;
40935 gen
= gen_vec_interleave_highv4di
;
40937 gen
= gen_vec_interleave_lowv4di
;
40941 gen
= gen_vec_interleave_highv8sf
;
40943 gen
= gen_vec_interleave_lowv8sf
;
40947 gen
= gen_vec_interleave_highv4df
;
40949 gen
= gen_vec_interleave_lowv4df
;
40952 gcc_unreachable ();
40955 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40959 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40960 a single vector permutation using a single intra-lane vector
40961 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40962 the non-swapped and swapped vectors together. */
40965 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40967 struct expand_vec_perm_d dfirst
, dsecond
;
40968 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40971 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40975 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40976 || !d
->one_operand_p
)
40980 for (i
= 0; i
< nelt
; i
++)
40981 dfirst
.perm
[i
] = 0xff;
40982 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40984 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40985 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40987 dfirst
.perm
[j
] = d
->perm
[i
];
40991 for (i
= 0; i
< nelt
; i
++)
40992 if (dfirst
.perm
[i
] == 0xff)
40993 dfirst
.perm
[i
] = i
;
40996 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40999 ok
= expand_vec_perm_1 (&dfirst
);
41000 seq
= get_insns ();
41012 dsecond
.op0
= dfirst
.target
;
41013 dsecond
.op1
= dfirst
.target
;
41014 dsecond
.one_operand_p
= true;
41015 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
41016 for (i
= 0; i
< nelt
; i
++)
41017 dsecond
.perm
[i
] = i
^ nelt2
;
41019 ok
= expand_vec_perm_1 (&dsecond
);
41022 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
41023 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
41027 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
41028 permutation using two vperm2f128, followed by a vshufpd insn blending
41029 the two vectors together. */
41032 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
41034 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
41037 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
41047 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
41048 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
41049 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
41050 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
41051 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
41052 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
41053 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
41054 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
41055 dthird
.perm
[0] = (d
->perm
[0] % 2);
41056 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
41057 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
41058 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
41060 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
41061 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
41062 dthird
.op0
= dfirst
.target
;
41063 dthird
.op1
= dsecond
.target
;
41064 dthird
.one_operand_p
= false;
41066 canonicalize_perm (&dfirst
);
41067 canonicalize_perm (&dsecond
);
41069 ok
= expand_vec_perm_1 (&dfirst
)
41070 && expand_vec_perm_1 (&dsecond
)
41071 && expand_vec_perm_1 (&dthird
);
41078 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
41079 permutation with two pshufb insns and an ior. We should have already
41080 failed all two instruction sequences. */
41083 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
41085 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
41086 unsigned int i
, nelt
, eltsz
;
41088 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
41090 gcc_assert (!d
->one_operand_p
);
41093 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41095 /* Generate two permutation masks. If the required element is within
41096 the given vector it is shuffled into the proper lane. If the required
41097 element is in the other vector, force a zero into the lane by setting
41098 bit 7 in the permutation mask. */
41099 m128
= GEN_INT (-128);
41100 for (i
= 0; i
< nelt
; ++i
)
41102 unsigned j
, e
= d
->perm
[i
];
41103 unsigned which
= (e
>= nelt
);
41107 for (j
= 0; j
< eltsz
; ++j
)
41109 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
41110 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
41114 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
41115 vperm
= force_reg (V16QImode
, vperm
);
41117 l
= gen_reg_rtx (V16QImode
);
41118 op
= gen_lowpart (V16QImode
, d
->op0
);
41119 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
41121 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
41122 vperm
= force_reg (V16QImode
, vperm
);
41124 h
= gen_reg_rtx (V16QImode
);
41125 op
= gen_lowpart (V16QImode
, d
->op1
);
41126 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
41129 if (d
->vmode
!= V16QImode
)
41130 op
= gen_reg_rtx (V16QImode
);
41131 emit_insn (gen_iorv16qi3 (op
, l
, h
));
41132 if (op
!= d
->target
)
41133 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41138 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
41139 with two vpshufb insns, vpermq and vpor. We should have already failed
41140 all two or three instruction sequences. */
41143 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
41145 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
41146 unsigned int i
, nelt
, eltsz
;
41149 || !d
->one_operand_p
41150 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41157 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41159 /* Generate two permutation masks. If the required element is within
41160 the same lane, it is shuffled in. If the required element from the
41161 other lane, force a zero by setting bit 7 in the permutation mask.
41162 In the other mask the mask has non-negative elements if element
41163 is requested from the other lane, but also moved to the other lane,
41164 so that the result of vpshufb can have the two V2TImode halves
41166 m128
= GEN_INT (-128);
41167 for (i
= 0; i
< nelt
; ++i
)
41169 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41170 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41172 for (j
= 0; j
< eltsz
; ++j
)
41174 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
41175 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
41179 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41180 vperm
= force_reg (V32QImode
, vperm
);
41182 h
= gen_reg_rtx (V32QImode
);
41183 op
= gen_lowpart (V32QImode
, d
->op0
);
41184 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41186 /* Swap the 128-byte lanes of h into hp. */
41187 hp
= gen_reg_rtx (V4DImode
);
41188 op
= gen_lowpart (V4DImode
, h
);
41189 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
41192 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41193 vperm
= force_reg (V32QImode
, vperm
);
41195 l
= gen_reg_rtx (V32QImode
);
41196 op
= gen_lowpart (V32QImode
, d
->op0
);
41197 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41200 if (d
->vmode
!= V32QImode
)
41201 op
= gen_reg_rtx (V32QImode
);
41202 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
41203 if (op
!= d
->target
)
41204 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41209 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
41210 and extract-odd permutations of two V32QImode and V16QImode operand
41211 with two vpshufb insns, vpor and vpermq. We should have already
41212 failed all two or three instruction sequences. */
41215 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
41217 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
41218 unsigned int i
, nelt
, eltsz
;
41221 || d
->one_operand_p
41222 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41225 for (i
= 0; i
< d
->nelt
; ++i
)
41226 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
41233 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41235 /* Generate two permutation masks. In the first permutation mask
41236 the first quarter will contain indexes for the first half
41237 of the op0, the second quarter will contain bit 7 set, third quarter
41238 will contain indexes for the second half of the op0 and the
41239 last quarter bit 7 set. In the second permutation mask
41240 the first quarter will contain bit 7 set, the second quarter
41241 indexes for the first half of the op1, the third quarter bit 7 set
41242 and last quarter indexes for the second half of the op1.
41243 I.e. the first mask e.g. for V32QImode extract even will be:
41244 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
41245 (all values masked with 0xf except for -128) and second mask
41246 for extract even will be
41247 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
41248 m128
= GEN_INT (-128);
41249 for (i
= 0; i
< nelt
; ++i
)
41251 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41252 unsigned which
= d
->perm
[i
] >= nelt
;
41253 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
41255 for (j
= 0; j
< eltsz
; ++j
)
41257 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
41258 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
41262 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41263 vperm
= force_reg (V32QImode
, vperm
);
41265 l
= gen_reg_rtx (V32QImode
);
41266 op
= gen_lowpart (V32QImode
, d
->op0
);
41267 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41269 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41270 vperm
= force_reg (V32QImode
, vperm
);
41272 h
= gen_reg_rtx (V32QImode
);
41273 op
= gen_lowpart (V32QImode
, d
->op1
);
41274 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41276 ior
= gen_reg_rtx (V32QImode
);
41277 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
41279 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
41280 op
= gen_reg_rtx (V4DImode
);
41281 ior
= gen_lowpart (V4DImode
, ior
);
41282 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
41283 const1_rtx
, GEN_INT (3)));
41284 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41289 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
41290 and extract-odd permutations. */
41293 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
41295 rtx t1
, t2
, t3
, t4
, t5
;
41300 t1
= gen_reg_rtx (V4DFmode
);
41301 t2
= gen_reg_rtx (V4DFmode
);
41303 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41304 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41305 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41307 /* Now an unpck[lh]pd will produce the result required. */
41309 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
41311 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
41317 int mask
= odd
? 0xdd : 0x88;
41319 t1
= gen_reg_rtx (V8SFmode
);
41320 t2
= gen_reg_rtx (V8SFmode
);
41321 t3
= gen_reg_rtx (V8SFmode
);
41323 /* Shuffle within the 128-bit lanes to produce:
41324 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
41325 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
41328 /* Shuffle the lanes around to produce:
41329 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
41330 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
41333 /* Shuffle within the 128-bit lanes to produce:
41334 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
41335 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
41337 /* Shuffle within the 128-bit lanes to produce:
41338 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
41339 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
41341 /* Shuffle the lanes around to produce:
41342 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
41343 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
41352 /* These are always directly implementable by expand_vec_perm_1. */
41353 gcc_unreachable ();
41357 return expand_vec_perm_pshufb2 (d
);
41360 /* We need 2*log2(N)-1 operations to achieve odd/even
41361 with interleave. */
41362 t1
= gen_reg_rtx (V8HImode
);
41363 t2
= gen_reg_rtx (V8HImode
);
41364 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
41365 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
41366 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
41367 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
41369 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
41371 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
41378 return expand_vec_perm_pshufb2 (d
);
41381 t1
= gen_reg_rtx (V16QImode
);
41382 t2
= gen_reg_rtx (V16QImode
);
41383 t3
= gen_reg_rtx (V16QImode
);
41384 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
41385 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
41386 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
41387 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
41388 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
41389 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
41391 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
41393 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
41400 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
41405 struct expand_vec_perm_d d_copy
= *d
;
41406 d_copy
.vmode
= V4DFmode
;
41407 d_copy
.target
= gen_reg_rtx (V4DFmode
);
41408 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
41409 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
41410 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41413 emit_move_insn (d
->target
,
41414 gen_lowpart (V4DImode
, d_copy
.target
));
41420 t1
= gen_reg_rtx (V4DImode
);
41421 t2
= gen_reg_rtx (V4DImode
);
41423 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41424 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41425 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41427 /* Now an vpunpck[lh]qdq will produce the result required. */
41429 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
41431 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
41438 struct expand_vec_perm_d d_copy
= *d
;
41439 d_copy
.vmode
= V8SFmode
;
41440 d_copy
.target
= gen_reg_rtx (V8SFmode
);
41441 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
41442 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
41443 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41446 emit_move_insn (d
->target
,
41447 gen_lowpart (V8SImode
, d_copy
.target
));
41453 t1
= gen_reg_rtx (V8SImode
);
41454 t2
= gen_reg_rtx (V8SImode
);
41455 t3
= gen_reg_rtx (V4DImode
);
41456 t4
= gen_reg_rtx (V4DImode
);
41457 t5
= gen_reg_rtx (V4DImode
);
41459 /* Shuffle the lanes around into
41460 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
41461 emit_insn (gen_avx2_permv2ti (t3
, gen_lowpart (V4DImode
, d
->op0
),
41462 gen_lowpart (V4DImode
, d
->op1
),
41464 emit_insn (gen_avx2_permv2ti (t4
, gen_lowpart (V4DImode
, d
->op0
),
41465 gen_lowpart (V4DImode
, d
->op1
),
41468 /* Swap the 2nd and 3rd position in each lane into
41469 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
41470 emit_insn (gen_avx2_pshufdv3 (t1
, gen_lowpart (V8SImode
, t3
),
41471 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41472 emit_insn (gen_avx2_pshufdv3 (t2
, gen_lowpart (V8SImode
, t4
),
41473 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41475 /* Now an vpunpck[lh]qdq will produce
41476 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
41478 t3
= gen_avx2_interleave_highv4di (t5
, gen_lowpart (V4DImode
, t1
),
41479 gen_lowpart (V4DImode
, t2
));
41481 t3
= gen_avx2_interleave_lowv4di (t5
, gen_lowpart (V4DImode
, t1
),
41482 gen_lowpart (V4DImode
, t2
));
41484 emit_move_insn (d
->target
, gen_lowpart (V8SImode
, t5
));
41488 gcc_unreachable ();
41494 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41495 extract-even and extract-odd permutations. */
41498 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
41500 unsigned i
, odd
, nelt
= d
->nelt
;
41503 if (odd
!= 0 && odd
!= 1)
41506 for (i
= 1; i
< nelt
; ++i
)
41507 if (d
->perm
[i
] != 2 * i
+ odd
)
41510 return expand_vec_perm_even_odd_1 (d
, odd
);
41513 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
41514 permutations. We assume that expand_vec_perm_1 has already failed. */
41517 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
41519 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
41520 enum machine_mode vmode
= d
->vmode
;
41521 unsigned char perm2
[4];
41522 rtx op0
= d
->op0
, dest
;
41529 /* These are special-cased in sse.md so that we can optionally
41530 use the vbroadcast instruction. They expand to two insns
41531 if the input happens to be in a register. */
41532 gcc_unreachable ();
41538 /* These are always implementable using standard shuffle patterns. */
41539 gcc_unreachable ();
41543 /* These can be implemented via interleave. We save one insn by
41544 stopping once we have promoted to V4SImode and then use pshufd. */
41548 rtx (*gen
) (rtx
, rtx
, rtx
)
41549 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
41550 : gen_vec_interleave_lowv8hi
;
41554 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
41555 : gen_vec_interleave_highv8hi
;
41560 dest
= gen_reg_rtx (vmode
);
41561 emit_insn (gen (dest
, op0
, op0
));
41562 vmode
= get_mode_wider_vector (vmode
);
41563 op0
= gen_lowpart (vmode
, dest
);
41565 while (vmode
!= V4SImode
);
41567 memset (perm2
, elt
, 4);
41568 dest
= gen_reg_rtx (V4SImode
);
41569 ok
= expand_vselect (dest
, op0
, perm2
, 4, d
->testing_p
);
41572 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, dest
));
41579 /* For AVX2 broadcasts of the first element vpbroadcast* or
41580 vpermq should be used by expand_vec_perm_1. */
41581 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
41585 gcc_unreachable ();
41589 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41590 broadcast permutations. */
41593 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
41595 unsigned i
, elt
, nelt
= d
->nelt
;
41597 if (!d
->one_operand_p
)
41601 for (i
= 1; i
< nelt
; ++i
)
41602 if (d
->perm
[i
] != elt
)
41605 return expand_vec_perm_broadcast_1 (d
);
41608 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
41609 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
41610 all the shorter instruction sequences. */
41613 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
41615 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
41616 unsigned int i
, nelt
, eltsz
;
41620 || d
->one_operand_p
41621 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41628 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41630 /* Generate 4 permutation masks. If the required element is within
41631 the same lane, it is shuffled in. If the required element from the
41632 other lane, force a zero by setting bit 7 in the permutation mask.
41633 In the other mask the mask has non-negative elements if element
41634 is requested from the other lane, but also moved to the other lane,
41635 so that the result of vpshufb can have the two V2TImode halves
41637 m128
= GEN_INT (-128);
41638 for (i
= 0; i
< 32; ++i
)
41640 rperm
[0][i
] = m128
;
41641 rperm
[1][i
] = m128
;
41642 rperm
[2][i
] = m128
;
41643 rperm
[3][i
] = m128
;
41649 for (i
= 0; i
< nelt
; ++i
)
41651 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41652 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41653 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
41655 for (j
= 0; j
< eltsz
; ++j
)
41656 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
41657 used
[which
] = true;
41660 for (i
= 0; i
< 2; ++i
)
41662 if (!used
[2 * i
+ 1])
41667 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
41668 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
41669 vperm
= force_reg (V32QImode
, vperm
);
41670 h
[i
] = gen_reg_rtx (V32QImode
);
41671 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41672 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
41675 /* Swap the 128-byte lanes of h[X]. */
41676 for (i
= 0; i
< 2; ++i
)
41678 if (h
[i
] == NULL_RTX
)
41680 op
= gen_reg_rtx (V4DImode
);
41681 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
41682 const2_rtx
, GEN_INT (3), const0_rtx
,
41684 h
[i
] = gen_lowpart (V32QImode
, op
);
41687 for (i
= 0; i
< 2; ++i
)
41694 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
41695 vperm
= force_reg (V32QImode
, vperm
);
41696 l
[i
] = gen_reg_rtx (V32QImode
);
41697 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41698 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
41701 for (i
= 0; i
< 2; ++i
)
41705 op
= gen_reg_rtx (V32QImode
);
41706 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
41713 gcc_assert (l
[0] && l
[1]);
41715 if (d
->vmode
!= V32QImode
)
41716 op
= gen_reg_rtx (V32QImode
);
41717 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
41718 if (op
!= d
->target
)
41719 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41723 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
41724 With all of the interface bits taken care of, perform the expansion
41725 in D and return true on success. */
41728 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
41730 /* Try a single instruction expansion. */
41731 if (expand_vec_perm_1 (d
))
41734 /* Try sequences of two instructions. */
41736 if (expand_vec_perm_pshuflw_pshufhw (d
))
41739 if (expand_vec_perm_palignr (d
))
41742 if (expand_vec_perm_interleave2 (d
))
41745 if (expand_vec_perm_broadcast (d
))
41748 if (expand_vec_perm_vpermq_perm_1 (d
))
41751 if (expand_vec_perm_vperm2f128 (d
))
41754 /* Try sequences of three instructions. */
41756 if (expand_vec_perm_2vperm2f128_vshuf (d
))
41759 if (expand_vec_perm_pshufb2 (d
))
41762 if (expand_vec_perm_interleave3 (d
))
41765 if (expand_vec_perm_vperm2f128_vblend (d
))
41768 /* Try sequences of four instructions. */
41770 if (expand_vec_perm_vpshufb2_vpermq (d
))
41773 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
41776 /* ??? Look for narrow permutations whose element orderings would
41777 allow the promotion to a wider mode. */
41779 /* ??? Look for sequences of interleave or a wider permute that place
41780 the data into the correct lanes for a half-vector shuffle like
41781 pshuf[lh]w or vpermilps. */
41783 /* ??? Look for sequences of interleave that produce the desired results.
41784 The combinatorics of punpck[lh] get pretty ugly... */
41786 if (expand_vec_perm_even_odd (d
))
41789 /* Even longer sequences. */
41790 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
41796 /* If a permutation only uses one operand, make it clear. Returns true
41797 if the permutation references both operands. */
41800 canonicalize_perm (struct expand_vec_perm_d
*d
)
41802 int i
, which
, nelt
= d
->nelt
;
41804 for (i
= which
= 0; i
< nelt
; ++i
)
41805 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
41807 d
->one_operand_p
= true;
41814 if (!rtx_equal_p (d
->op0
, d
->op1
))
41816 d
->one_operand_p
= false;
41819 /* The elements of PERM do not suggest that only the first operand
41820 is used, but both operands are identical. Allow easier matching
41821 of the permutation by folding the permutation into the single
41826 for (i
= 0; i
< nelt
; ++i
)
41827 d
->perm
[i
] &= nelt
- 1;
41836 return (which
== 3);
41840 ix86_expand_vec_perm_const (rtx operands
[4])
41842 struct expand_vec_perm_d d
;
41843 unsigned char perm
[MAX_VECT_LEN
];
41848 d
.target
= operands
[0];
41849 d
.op0
= operands
[1];
41850 d
.op1
= operands
[2];
41853 d
.vmode
= GET_MODE (d
.target
);
41854 gcc_assert (VECTOR_MODE_P (d
.vmode
));
41855 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41856 d
.testing_p
= false;
41858 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
41859 gcc_assert (XVECLEN (sel
, 0) == nelt
);
41860 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
41862 for (i
= 0; i
< nelt
; ++i
)
41864 rtx e
= XVECEXP (sel
, 0, i
);
41865 int ei
= INTVAL (e
) & (2 * nelt
- 1);
41870 two_args
= canonicalize_perm (&d
);
41872 if (ix86_expand_vec_perm_const_1 (&d
))
41875 /* If the selector says both arguments are needed, but the operands are the
41876 same, the above tried to expand with one_operand_p and flattened selector.
41877 If that didn't work, retry without one_operand_p; we succeeded with that
41879 if (two_args
&& d
.one_operand_p
)
41881 d
.one_operand_p
= false;
41882 memcpy (d
.perm
, perm
, sizeof (perm
));
41883 return ix86_expand_vec_perm_const_1 (&d
);
41889 /* Implement targetm.vectorize.vec_perm_const_ok. */
41892 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41893 const unsigned char *sel
)
41895 struct expand_vec_perm_d d
;
41896 unsigned int i
, nelt
, which
;
41900 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41901 d
.testing_p
= true;
41903 /* Given sufficient ISA support we can just return true here
41904 for selected vector modes. */
41905 if (GET_MODE_SIZE (d
.vmode
) == 16)
41907 /* All implementable with a single vpperm insn. */
41910 /* All implementable with 2 pshufb + 1 ior. */
41913 /* All implementable with shufpd or unpck[lh]pd. */
41918 /* Extract the values from the vector CST into the permutation
41920 memcpy (d
.perm
, sel
, nelt
);
41921 for (i
= which
= 0; i
< nelt
; ++i
)
41923 unsigned char e
= d
.perm
[i
];
41924 gcc_assert (e
< 2 * nelt
);
41925 which
|= (e
< nelt
? 1 : 2);
41928 /* For all elements from second vector, fold the elements to first. */
41930 for (i
= 0; i
< nelt
; ++i
)
41933 /* Check whether the mask can be applied to the vector type. */
41934 d
.one_operand_p
= (which
!= 3);
41936 /* Implementable with shufps or pshufd. */
41937 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41940 /* Otherwise we have to go through the motions and see if we can
41941 figure out how to generate the requested permutation. */
41942 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41943 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41944 if (!d
.one_operand_p
)
41945 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41948 ret
= ix86_expand_vec_perm_const_1 (&d
);
41955 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41957 struct expand_vec_perm_d d
;
41963 d
.vmode
= GET_MODE (targ
);
41964 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41965 d
.one_operand_p
= false;
41966 d
.testing_p
= false;
41968 for (i
= 0; i
< nelt
; ++i
)
41969 d
.perm
[i
] = i
* 2 + odd
;
41971 /* We'll either be able to implement the permutation directly... */
41972 if (expand_vec_perm_1 (&d
))
41975 /* ... or we use the special-case patterns. */
41976 expand_vec_perm_even_odd_1 (&d
, odd
);
41980 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41982 struct expand_vec_perm_d d
;
41983 unsigned i
, nelt
, base
;
41989 d
.vmode
= GET_MODE (targ
);
41990 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41991 d
.one_operand_p
= false;
41992 d
.testing_p
= false;
41994 base
= high_p
? nelt
/ 2 : 0;
41995 for (i
= 0; i
< nelt
/ 2; ++i
)
41997 d
.perm
[i
* 2] = i
+ base
;
41998 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
42001 /* Note that for AVX this isn't one instruction. */
42002 ok
= ix86_expand_vec_perm_const_1 (&d
);
42007 /* Expand a vector operation CODE for a V*QImode in terms of the
42008 same operation on V*HImode. */
42011 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
42013 enum machine_mode qimode
= GET_MODE (dest
);
42014 enum machine_mode himode
;
42015 rtx (*gen_il
) (rtx
, rtx
, rtx
);
42016 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
42017 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
42018 struct expand_vec_perm_d d
;
42019 bool ok
, full_interleave
;
42020 bool uns_p
= false;
42027 gen_il
= gen_vec_interleave_lowv16qi
;
42028 gen_ih
= gen_vec_interleave_highv16qi
;
42031 himode
= V16HImode
;
42032 gen_il
= gen_avx2_interleave_lowv32qi
;
42033 gen_ih
= gen_avx2_interleave_highv32qi
;
42036 gcc_unreachable ();
42039 op2_l
= op2_h
= op2
;
42043 /* Unpack data such that we've got a source byte in each low byte of
42044 each word. We don't care what goes into the high byte of each word.
42045 Rather than trying to get zero in there, most convenient is to let
42046 it be a copy of the low byte. */
42047 op2_l
= gen_reg_rtx (qimode
);
42048 op2_h
= gen_reg_rtx (qimode
);
42049 emit_insn (gen_il (op2_l
, op2
, op2
));
42050 emit_insn (gen_ih (op2_h
, op2
, op2
));
42053 op1_l
= gen_reg_rtx (qimode
);
42054 op1_h
= gen_reg_rtx (qimode
);
42055 emit_insn (gen_il (op1_l
, op1
, op1
));
42056 emit_insn (gen_ih (op1_h
, op1
, op1
));
42057 full_interleave
= qimode
== V16QImode
;
42065 op1_l
= gen_reg_rtx (himode
);
42066 op1_h
= gen_reg_rtx (himode
);
42067 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
42068 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
42069 full_interleave
= true;
42072 gcc_unreachable ();
42075 /* Perform the operation. */
42076 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
42078 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
42080 gcc_assert (res_l
&& res_h
);
42082 /* Merge the data back into the right place. */
42084 d
.op0
= gen_lowpart (qimode
, res_l
);
42085 d
.op1
= gen_lowpart (qimode
, res_h
);
42087 d
.nelt
= GET_MODE_NUNITS (qimode
);
42088 d
.one_operand_p
= false;
42089 d
.testing_p
= false;
42091 if (full_interleave
)
42093 /* For SSE2, we used an full interleave, so the desired
42094 results are in the even elements. */
42095 for (i
= 0; i
< 32; ++i
)
42100 /* For AVX, the interleave used above was not cross-lane. So the
42101 extraction is evens but with the second and third quarter swapped.
42102 Happily, that is even one insn shorter than even extraction. */
42103 for (i
= 0; i
< 32; ++i
)
42104 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
42107 ok
= ix86_expand_vec_perm_const_1 (&d
);
42110 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
42111 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
42114 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
42115 if op is CONST_VECTOR with all odd elements equal to their
42116 preceding element. */
42119 const_vector_equal_evenodd_p (rtx op
)
42121 enum machine_mode mode
= GET_MODE (op
);
42122 int i
, nunits
= GET_MODE_NUNITS (mode
);
42123 if (GET_CODE (op
) != CONST_VECTOR
42124 || nunits
!= CONST_VECTOR_NUNITS (op
))
42126 for (i
= 0; i
< nunits
; i
+= 2)
42127 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
42133 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
42134 bool uns_p
, bool odd_p
)
42136 enum machine_mode mode
= GET_MODE (op1
);
42137 enum machine_mode wmode
= GET_MODE (dest
);
42139 rtx orig_op1
= op1
, orig_op2
= op2
;
42141 if (!nonimmediate_operand (op1
, mode
))
42142 op1
= force_reg (mode
, op1
);
42143 if (!nonimmediate_operand (op2
, mode
))
42144 op2
= force_reg (mode
, op2
);
42146 /* We only play even/odd games with vectors of SImode. */
42147 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
42149 /* If we're looking for the odd results, shift those members down to
42150 the even slots. For some cpus this is faster than a PSHUFD. */
42153 /* For XOP use vpmacsdqh, but only for smult, as it is only
42155 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
42157 x
= force_reg (wmode
, CONST0_RTX (wmode
));
42158 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
42162 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
42163 if (!const_vector_equal_evenodd_p (orig_op1
))
42164 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
42165 x
, NULL
, 1, OPTAB_DIRECT
);
42166 if (!const_vector_equal_evenodd_p (orig_op2
))
42167 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
42168 x
, NULL
, 1, OPTAB_DIRECT
);
42169 op1
= gen_lowpart (mode
, op1
);
42170 op2
= gen_lowpart (mode
, op2
);
42173 if (mode
== V8SImode
)
42176 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
42178 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
42181 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
42182 else if (TARGET_SSE4_1
)
42183 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
42186 rtx s1
, s2
, t0
, t1
, t2
;
42188 /* The easiest way to implement this without PMULDQ is to go through
42189 the motions as if we are performing a full 64-bit multiply. With
42190 the exception that we need to do less shuffling of the elements. */
42192 /* Compute the sign-extension, aka highparts, of the two operands. */
42193 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42194 op1
, pc_rtx
, pc_rtx
);
42195 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42196 op2
, pc_rtx
, pc_rtx
);
42198 /* Multiply LO(A) * HI(B), and vice-versa. */
42199 t1
= gen_reg_rtx (wmode
);
42200 t2
= gen_reg_rtx (wmode
);
42201 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
42202 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
42204 /* Multiply LO(A) * LO(B). */
42205 t0
= gen_reg_rtx (wmode
);
42206 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
42208 /* Combine and shift the highparts into place. */
42209 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
42210 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
42213 /* Combine high and low parts. */
42214 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
42221 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
42222 bool uns_p
, bool high_p
)
42224 enum machine_mode wmode
= GET_MODE (dest
);
42225 enum machine_mode mode
= GET_MODE (op1
);
42226 rtx t1
, t2
, t3
, t4
, mask
;
42231 t1
= gen_reg_rtx (mode
);
42232 t2
= gen_reg_rtx (mode
);
42233 if (TARGET_XOP
&& !uns_p
)
42235 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
42236 shuffle the elements once so that all elements are in the right
42237 place for immediate use: { A C B D }. */
42238 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
42239 const1_rtx
, GEN_INT (3)));
42240 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
42241 const1_rtx
, GEN_INT (3)));
42245 /* Put the elements into place for the multiply. */
42246 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
42247 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
42250 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
42254 /* Shuffle the elements between the lanes. After this we
42255 have { A B E F | C D G H } for each operand. */
42256 t1
= gen_reg_rtx (V4DImode
);
42257 t2
= gen_reg_rtx (V4DImode
);
42258 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
42259 const0_rtx
, const2_rtx
,
42260 const1_rtx
, GEN_INT (3)));
42261 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
42262 const0_rtx
, const2_rtx
,
42263 const1_rtx
, GEN_INT (3)));
42265 /* Shuffle the elements within the lanes. After this we
42266 have { A A B B | C C D D } or { E E F F | G G H H }. */
42267 t3
= gen_reg_rtx (V8SImode
);
42268 t4
= gen_reg_rtx (V8SImode
);
42269 mask
= GEN_INT (high_p
42270 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
42271 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
42272 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
42273 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
42275 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
42280 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
42281 uns_p
, OPTAB_DIRECT
);
42282 t2
= expand_binop (mode
,
42283 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
42284 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
42285 gcc_assert (t1
&& t2
);
42287 t3
= gen_reg_rtx (mode
);
42288 ix86_expand_vec_interleave (t3
, t1
, t2
, high_p
);
42289 emit_move_insn (dest
, gen_lowpart (wmode
, t3
));
42294 t1
= gen_reg_rtx (wmode
);
42295 t2
= gen_reg_rtx (wmode
);
42296 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
42297 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
42299 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
42303 gcc_unreachable ();
42308 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
42310 rtx res_1
, res_2
, res_3
, res_4
;
42312 res_1
= gen_reg_rtx (V4SImode
);
42313 res_2
= gen_reg_rtx (V4SImode
);
42314 res_3
= gen_reg_rtx (V2DImode
);
42315 res_4
= gen_reg_rtx (V2DImode
);
42316 ix86_expand_mul_widen_evenodd (res_3
, op1
, op2
, true, false);
42317 ix86_expand_mul_widen_evenodd (res_4
, op1
, op2
, true, true);
42319 /* Move the results in element 2 down to element 1; we don't care
42320 what goes in elements 2 and 3. Then we can merge the parts
42321 back together with an interleave.
42323 Note that two other sequences were tried:
42324 (1) Use interleaves at the start instead of psrldq, which allows
42325 us to use a single shufps to merge things back at the end.
42326 (2) Use shufps here to combine the two vectors, then pshufd to
42327 put the elements in the correct order.
42328 In both cases the cost of the reformatting stall was too high
42329 and the overall sequence slower. */
42331 emit_insn (gen_sse2_pshufd_1 (res_1
, gen_lowpart (V4SImode
, res_3
),
42332 const0_rtx
, const2_rtx
,
42333 const0_rtx
, const0_rtx
));
42334 emit_insn (gen_sse2_pshufd_1 (res_2
, gen_lowpart (V4SImode
, res_4
),
42335 const0_rtx
, const2_rtx
,
42336 const0_rtx
, const0_rtx
));
42337 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
42339 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
42343 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
42345 enum machine_mode mode
= GET_MODE (op0
);
42346 rtx t1
, t2
, t3
, t4
, t5
, t6
;
42348 if (TARGET_XOP
&& mode
== V2DImode
)
42350 /* op1: A,B,C,D, op2: E,F,G,H */
42351 op1
= gen_lowpart (V4SImode
, op1
);
42352 op2
= gen_lowpart (V4SImode
, op2
);
42354 t1
= gen_reg_rtx (V4SImode
);
42355 t2
= gen_reg_rtx (V4SImode
);
42356 t3
= gen_reg_rtx (V2DImode
);
42357 t4
= gen_reg_rtx (V2DImode
);
42360 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
42366 /* t2: (B*E),(A*F),(D*G),(C*H) */
42367 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
42369 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
42370 emit_insn (gen_xop_phadddq (t3
, t2
));
42372 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
42373 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
42375 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
42376 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
42380 enum machine_mode nmode
;
42381 rtx (*umul
) (rtx
, rtx
, rtx
);
42383 if (mode
== V2DImode
)
42385 umul
= gen_vec_widen_umult_even_v4si
;
42388 else if (mode
== V4DImode
)
42390 umul
= gen_vec_widen_umult_even_v8si
;
42394 gcc_unreachable ();
42397 /* Multiply low parts. */
42398 t1
= gen_reg_rtx (mode
);
42399 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
42401 /* Shift input vectors right 32 bits so we can multiply high parts. */
42403 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
42404 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
42406 /* Multiply high parts by low parts. */
42407 t4
= gen_reg_rtx (mode
);
42408 t5
= gen_reg_rtx (mode
);
42409 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
42410 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
42412 /* Combine and shift the highparts back. */
42413 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
42414 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
42416 /* Combine high and low parts. */
42417 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
42420 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
42421 gen_rtx_MULT (mode
, op1
, op2
));
42424 /* Return 1 if control tansfer instruction INSN
42425 should be encoded with bnd prefix.
42426 If insn is NULL then return 1 when control
42427 transfer instructions should be prefixed with
42428 bnd by default for current function. */
42431 ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED
)
42436 /* Calculate integer abs() using only SSE2 instructions. */
42439 ix86_expand_sse2_abs (rtx target
, rtx input
)
42441 enum machine_mode mode
= GET_MODE (target
);
42446 /* For 32-bit signed integer X, the best way to calculate the absolute
42447 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
42449 tmp0
= expand_simple_binop (mode
, ASHIFTRT
, input
,
42450 GEN_INT (GET_MODE_BITSIZE
42451 (GET_MODE_INNER (mode
)) - 1),
42452 NULL
, 0, OPTAB_DIRECT
);
42453 tmp1
= expand_simple_binop (mode
, XOR
, tmp0
, input
,
42454 NULL
, 0, OPTAB_DIRECT
);
42455 x
= expand_simple_binop (mode
, MINUS
, tmp1
, tmp0
,
42456 target
, 0, OPTAB_DIRECT
);
42459 /* For 16-bit signed integer X, the best way to calculate the absolute
42460 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
42462 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42464 x
= expand_simple_binop (mode
, SMAX
, tmp0
, input
,
42465 target
, 0, OPTAB_DIRECT
);
42468 /* For 8-bit signed integer X, the best way to calculate the absolute
42469 value of X is min ((unsigned char) X, (unsigned char) (-X)),
42470 as SSE2 provides the PMINUB insn. */
42472 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42474 x
= expand_simple_binop (V16QImode
, UMIN
, tmp0
, input
,
42475 target
, 0, OPTAB_DIRECT
);
42479 gcc_unreachable ();
42483 emit_move_insn (target
, x
);
42486 /* Expand an insert into a vector register through pinsr insn.
42487 Return true if successful. */
42490 ix86_expand_pinsr (rtx
*operands
)
42492 rtx dst
= operands
[0];
42493 rtx src
= operands
[3];
42495 unsigned int size
= INTVAL (operands
[1]);
42496 unsigned int pos
= INTVAL (operands
[2]);
42498 if (GET_CODE (dst
) == SUBREG
)
42500 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
42501 dst
= SUBREG_REG (dst
);
42504 if (GET_CODE (src
) == SUBREG
)
42505 src
= SUBREG_REG (src
);
42507 switch (GET_MODE (dst
))
42514 enum machine_mode srcmode
, dstmode
;
42515 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
42517 srcmode
= mode_for_size (size
, MODE_INT
, 0);
42522 if (!TARGET_SSE4_1
)
42524 dstmode
= V16QImode
;
42525 pinsr
= gen_sse4_1_pinsrb
;
42531 dstmode
= V8HImode
;
42532 pinsr
= gen_sse2_pinsrw
;
42536 if (!TARGET_SSE4_1
)
42538 dstmode
= V4SImode
;
42539 pinsr
= gen_sse4_1_pinsrd
;
42543 gcc_assert (TARGET_64BIT
);
42544 if (!TARGET_SSE4_1
)
42546 dstmode
= V2DImode
;
42547 pinsr
= gen_sse4_1_pinsrq
;
42555 if (GET_MODE (dst
) != dstmode
)
42556 d
= gen_reg_rtx (dstmode
);
42557 src
= gen_lowpart (srcmode
, src
);
42561 emit_insn (pinsr (d
, gen_lowpart (dstmode
, dst
), src
,
42562 GEN_INT (1 << pos
)));
42564 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
42573 /* This function returns the calling abi specific va_list type node.
42574 It returns the FNDECL specific va_list type. */
42577 ix86_fn_abi_va_list (tree fndecl
)
42580 return va_list_type_node
;
42581 gcc_assert (fndecl
!= NULL_TREE
);
42583 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
42584 return ms_va_list_type_node
;
42586 return sysv_va_list_type_node
;
42589 /* Returns the canonical va_list type specified by TYPE. If there
42590 is no valid TYPE provided, it return NULL_TREE. */
42593 ix86_canonical_va_list_type (tree type
)
42597 /* Resolve references and pointers to va_list type. */
42598 if (TREE_CODE (type
) == MEM_REF
)
42599 type
= TREE_TYPE (type
);
42600 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
42601 type
= TREE_TYPE (type
);
42602 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
42603 type
= TREE_TYPE (type
);
42605 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
42607 wtype
= va_list_type_node
;
42608 gcc_assert (wtype
!= NULL_TREE
);
42610 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42612 /* If va_list is an array type, the argument may have decayed
42613 to a pointer type, e.g. by being passed to another function.
42614 In that case, unwrap both types so that we can compare the
42615 underlying records. */
42616 if (TREE_CODE (htype
) == ARRAY_TYPE
42617 || POINTER_TYPE_P (htype
))
42619 wtype
= TREE_TYPE (wtype
);
42620 htype
= TREE_TYPE (htype
);
42623 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42624 return va_list_type_node
;
42625 wtype
= sysv_va_list_type_node
;
42626 gcc_assert (wtype
!= NULL_TREE
);
42628 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42630 /* If va_list is an array type, the argument may have decayed
42631 to a pointer type, e.g. by being passed to another function.
42632 In that case, unwrap both types so that we can compare the
42633 underlying records. */
42634 if (TREE_CODE (htype
) == ARRAY_TYPE
42635 || POINTER_TYPE_P (htype
))
42637 wtype
= TREE_TYPE (wtype
);
42638 htype
= TREE_TYPE (htype
);
42641 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42642 return sysv_va_list_type_node
;
42643 wtype
= ms_va_list_type_node
;
42644 gcc_assert (wtype
!= NULL_TREE
);
42646 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42648 /* If va_list is an array type, the argument may have decayed
42649 to a pointer type, e.g. by being passed to another function.
42650 In that case, unwrap both types so that we can compare the
42651 underlying records. */
42652 if (TREE_CODE (htype
) == ARRAY_TYPE
42653 || POINTER_TYPE_P (htype
))
42655 wtype
= TREE_TYPE (wtype
);
42656 htype
= TREE_TYPE (htype
);
42659 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42660 return ms_va_list_type_node
;
42663 return std_canonical_va_list_type (type
);
42666 /* Iterate through the target-specific builtin types for va_list.
42667 IDX denotes the iterator, *PTREE is set to the result type of
42668 the va_list builtin, and *PNAME to its internal type.
42669 Returns zero if there is no element for this index, otherwise
42670 IDX should be increased upon the next call.
42671 Note, do not iterate a base builtin's name like __builtin_va_list.
42672 Used from c_common_nodes_and_builtins. */
42675 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
42685 *ptree
= ms_va_list_type_node
;
42686 *pname
= "__builtin_ms_va_list";
42690 *ptree
= sysv_va_list_type_node
;
42691 *pname
= "__builtin_sysv_va_list";
42699 #undef TARGET_SCHED_DISPATCH
42700 #define TARGET_SCHED_DISPATCH has_dispatch
42701 #undef TARGET_SCHED_DISPATCH_DO
42702 #define TARGET_SCHED_DISPATCH_DO do_dispatch
42703 #undef TARGET_SCHED_REASSOCIATION_WIDTH
42704 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
42705 #undef TARGET_SCHED_REORDER
42706 #define TARGET_SCHED_REORDER ix86_sched_reorder
42707 #undef TARGET_SCHED_ADJUST_PRIORITY
42708 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
42709 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
42710 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
42711 ix86_dependencies_evaluation_hook
42713 /* The size of the dispatch window is the total number of bytes of
42714 object code allowed in a window. */
42715 #define DISPATCH_WINDOW_SIZE 16
42717 /* Number of dispatch windows considered for scheduling. */
42718 #define MAX_DISPATCH_WINDOWS 3
42720 /* Maximum number of instructions in a window. */
42723 /* Maximum number of immediate operands in a window. */
42726 /* Maximum number of immediate bits allowed in a window. */
42727 #define MAX_IMM_SIZE 128
42729 /* Maximum number of 32 bit immediates allowed in a window. */
42730 #define MAX_IMM_32 4
42732 /* Maximum number of 64 bit immediates allowed in a window. */
42733 #define MAX_IMM_64 2
42735 /* Maximum total of loads or prefetches allowed in a window. */
42738 /* Maximum total of stores allowed in a window. */
42739 #define MAX_STORE 1
42745 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
42746 enum dispatch_group
{
42761 /* Number of allowable groups in a dispatch window. It is an array
42762 indexed by dispatch_group enum. 100 is used as a big number,
42763 because the number of these kind of operations does not have any
42764 effect in dispatch window, but we need them for other reasons in
42766 static unsigned int num_allowable_groups
[disp_last
] = {
42767 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
42770 char group_name
[disp_last
+ 1][16] = {
42771 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
42772 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
42773 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
42776 /* Instruction path. */
42779 path_single
, /* Single micro op. */
42780 path_double
, /* Double micro op. */
42781 path_multi
, /* Instructions with more than 2 micro op.. */
42785 /* sched_insn_info defines a window to the instructions scheduled in
42786 the basic block. It contains a pointer to the insn_info table and
42787 the instruction scheduled.
42789 Windows are allocated for each basic block and are linked
42791 typedef struct sched_insn_info_s
{
42793 enum dispatch_group group
;
42794 enum insn_path path
;
42799 /* Linked list of dispatch windows. This is a two way list of
42800 dispatch windows of a basic block. It contains information about
42801 the number of uops in the window and the total number of
42802 instructions and of bytes in the object code for this dispatch
42804 typedef struct dispatch_windows_s
{
42805 int num_insn
; /* Number of insn in the window. */
42806 int num_uops
; /* Number of uops in the window. */
42807 int window_size
; /* Number of bytes in the window. */
42808 int window_num
; /* Window number between 0 or 1. */
42809 int num_imm
; /* Number of immediates in an insn. */
42810 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
42811 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
42812 int imm_size
; /* Total immediates in the window. */
42813 int num_loads
; /* Total memory loads in the window. */
42814 int num_stores
; /* Total memory stores in the window. */
42815 int violation
; /* Violation exists in window. */
42816 sched_insn_info
*window
; /* Pointer to the window. */
42817 struct dispatch_windows_s
*next
;
42818 struct dispatch_windows_s
*prev
;
42819 } dispatch_windows
;
42821 /* Immediate valuse used in an insn. */
42822 typedef struct imm_info_s
42829 static dispatch_windows
*dispatch_window_list
;
42830 static dispatch_windows
*dispatch_window_list1
;
42832 /* Get dispatch group of insn. */
42834 static enum dispatch_group
42835 get_mem_group (rtx insn
)
42837 enum attr_memory memory
;
42839 if (INSN_CODE (insn
) < 0)
42840 return disp_no_group
;
42841 memory
= get_attr_memory (insn
);
42842 if (memory
== MEMORY_STORE
)
42845 if (memory
== MEMORY_LOAD
)
42848 if (memory
== MEMORY_BOTH
)
42849 return disp_load_store
;
42851 return disp_no_group
;
42854 /* Return true if insn is a compare instruction. */
42859 enum attr_type type
;
42861 type
= get_attr_type (insn
);
42862 return (type
== TYPE_TEST
42863 || type
== TYPE_ICMP
42864 || type
== TYPE_FCMP
42865 || GET_CODE (PATTERN (insn
)) == COMPARE
);
42868 /* Return true if a dispatch violation encountered. */
42871 dispatch_violation (void)
42873 if (dispatch_window_list
->next
)
42874 return dispatch_window_list
->next
->violation
;
42875 return dispatch_window_list
->violation
;
42878 /* Return true if insn is a branch instruction. */
42881 is_branch (rtx insn
)
42883 return (CALL_P (insn
) || JUMP_P (insn
));
42886 /* Return true if insn is a prefetch instruction. */
42889 is_prefetch (rtx insn
)
42891 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
42894 /* This function initializes a dispatch window and the list container holding a
42895 pointer to the window. */
42898 init_window (int window_num
)
42901 dispatch_windows
*new_list
;
42903 if (window_num
== 0)
42904 new_list
= dispatch_window_list
;
42906 new_list
= dispatch_window_list1
;
42908 new_list
->num_insn
= 0;
42909 new_list
->num_uops
= 0;
42910 new_list
->window_size
= 0;
42911 new_list
->next
= NULL
;
42912 new_list
->prev
= NULL
;
42913 new_list
->window_num
= window_num
;
42914 new_list
->num_imm
= 0;
42915 new_list
->num_imm_32
= 0;
42916 new_list
->num_imm_64
= 0;
42917 new_list
->imm_size
= 0;
42918 new_list
->num_loads
= 0;
42919 new_list
->num_stores
= 0;
42920 new_list
->violation
= false;
42922 for (i
= 0; i
< MAX_INSN
; i
++)
42924 new_list
->window
[i
].insn
= NULL
;
42925 new_list
->window
[i
].group
= disp_no_group
;
42926 new_list
->window
[i
].path
= no_path
;
42927 new_list
->window
[i
].byte_len
= 0;
42928 new_list
->window
[i
].imm_bytes
= 0;
42933 /* This function allocates and initializes a dispatch window and the
42934 list container holding a pointer to the window. */
42936 static dispatch_windows
*
42937 allocate_window (void)
42939 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
42940 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42945 /* This routine initializes the dispatch scheduling information. It
42946 initiates building dispatch scheduler tables and constructs the
42947 first dispatch window. */
42950 init_dispatch_sched (void)
42952 /* Allocate a dispatch list and a window. */
42953 dispatch_window_list
= allocate_window ();
42954 dispatch_window_list1
= allocate_window ();
42959 /* This function returns true if a branch is detected. End of a basic block
42960 does not have to be a branch, but here we assume only branches end a
42964 is_end_basic_block (enum dispatch_group group
)
42966 return group
== disp_branch
;
42969 /* This function is called when the end of a window processing is reached. */
42972 process_end_window (void)
42974 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42975 if (dispatch_window_list
->next
)
42977 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42978 gcc_assert (dispatch_window_list
->window_size
42979 + dispatch_window_list1
->window_size
<= 48);
42985 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42986 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42987 for 48 bytes of instructions. Note that these windows are not dispatch
42988 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42990 static dispatch_windows
*
42991 allocate_next_window (int window_num
)
42993 if (window_num
== 0)
42995 if (dispatch_window_list
->next
)
42998 return dispatch_window_list
;
43001 dispatch_window_list
->next
= dispatch_window_list1
;
43002 dispatch_window_list1
->prev
= dispatch_window_list
;
43004 return dispatch_window_list1
;
43007 /* Increment the number of immediate operands of an instruction. */
43010 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
43015 switch ( GET_CODE (*in_rtx
))
43020 (imm_values
->imm
)++;
43021 if (x86_64_immediate_operand (*in_rtx
, SImode
))
43022 (imm_values
->imm32
)++;
43024 (imm_values
->imm64
)++;
43028 (imm_values
->imm
)++;
43029 (imm_values
->imm64
)++;
43033 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
43035 (imm_values
->imm
)++;
43036 (imm_values
->imm32
)++;
43047 /* Compute number of immediate operands of an instruction. */
43050 find_constant (rtx in_rtx
, imm_info
*imm_values
)
43052 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
43053 (rtx_function
) find_constant_1
, (void *) imm_values
);
43056 /* Return total size of immediate operands of an instruction along with number
43057 of corresponding immediate-operands. It initializes its parameters to zero
43058 befor calling FIND_CONSTANT.
43059 INSN is the input instruction. IMM is the total of immediates.
43060 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
43064 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
43066 imm_info imm_values
= {0, 0, 0};
43068 find_constant (insn
, &imm_values
);
43069 *imm
= imm_values
.imm
;
43070 *imm32
= imm_values
.imm32
;
43071 *imm64
= imm_values
.imm64
;
43072 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
43075 /* This function indicates if an operand of an instruction is an
43079 has_immediate (rtx insn
)
43081 int num_imm_operand
;
43082 int num_imm32_operand
;
43083 int num_imm64_operand
;
43086 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43087 &num_imm64_operand
);
43091 /* Return single or double path for instructions. */
43093 static enum insn_path
43094 get_insn_path (rtx insn
)
43096 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
43098 if ((int)path
== 0)
43099 return path_single
;
43101 if ((int)path
== 1)
43102 return path_double
;
43107 /* Return insn dispatch group. */
43109 static enum dispatch_group
43110 get_insn_group (rtx insn
)
43112 enum dispatch_group group
= get_mem_group (insn
);
43116 if (is_branch (insn
))
43117 return disp_branch
;
43122 if (has_immediate (insn
))
43125 if (is_prefetch (insn
))
43126 return disp_prefetch
;
43128 return disp_no_group
;
43131 /* Count number of GROUP restricted instructions in a dispatch
43132 window WINDOW_LIST. */
43135 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
43137 enum dispatch_group group
= get_insn_group (insn
);
43139 int num_imm_operand
;
43140 int num_imm32_operand
;
43141 int num_imm64_operand
;
43143 if (group
== disp_no_group
)
43146 if (group
== disp_imm
)
43148 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43149 &num_imm64_operand
);
43150 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
43151 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
43152 || (num_imm32_operand
> 0
43153 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
43154 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
43155 || (num_imm64_operand
> 0
43156 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
43157 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
43158 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
43159 && num_imm64_operand
> 0
43160 && ((window_list
->num_imm_64
> 0
43161 && window_list
->num_insn
>= 2)
43162 || window_list
->num_insn
>= 3)))
43168 if ((group
== disp_load_store
43169 && (window_list
->num_loads
>= MAX_LOAD
43170 || window_list
->num_stores
>= MAX_STORE
))
43171 || ((group
== disp_load
43172 || group
== disp_prefetch
)
43173 && window_list
->num_loads
>= MAX_LOAD
)
43174 || (group
== disp_store
43175 && window_list
->num_stores
>= MAX_STORE
))
43181 /* This function returns true if insn satisfies dispatch rules on the
43182 last window scheduled. */
43185 fits_dispatch_window (rtx insn
)
43187 dispatch_windows
*window_list
= dispatch_window_list
;
43188 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
43189 unsigned int num_restrict
;
43190 enum dispatch_group group
= get_insn_group (insn
);
43191 enum insn_path path
= get_insn_path (insn
);
43194 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
43195 instructions should be given the lowest priority in the
43196 scheduling process in Haifa scheduler to make sure they will be
43197 scheduled in the same dispatch window as the reference to them. */
43198 if (group
== disp_jcc
|| group
== disp_cmp
)
43201 /* Check nonrestricted. */
43202 if (group
== disp_no_group
|| group
== disp_branch
)
43205 /* Get last dispatch window. */
43206 if (window_list_next
)
43207 window_list
= window_list_next
;
43209 if (window_list
->window_num
== 1)
43211 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
43214 || (min_insn_size (insn
) + sum
) >= 48)
43215 /* Window 1 is full. Go for next window. */
43219 num_restrict
= count_num_restricted (insn
, window_list
);
43221 if (num_restrict
> num_allowable_groups
[group
])
43224 /* See if it fits in the first window. */
43225 if (window_list
->window_num
== 0)
43227 /* The first widow should have only single and double path
43229 if (path
== path_double
43230 && (window_list
->num_uops
+ 2) > MAX_INSN
)
43232 else if (path
!= path_single
)
43238 /* Add an instruction INSN with NUM_UOPS micro-operations to the
43239 dispatch window WINDOW_LIST. */
43242 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
43244 int byte_len
= min_insn_size (insn
);
43245 int num_insn
= window_list
->num_insn
;
43247 sched_insn_info
*window
= window_list
->window
;
43248 enum dispatch_group group
= get_insn_group (insn
);
43249 enum insn_path path
= get_insn_path (insn
);
43250 int num_imm_operand
;
43251 int num_imm32_operand
;
43252 int num_imm64_operand
;
43254 if (!window_list
->violation
&& group
!= disp_cmp
43255 && !fits_dispatch_window (insn
))
43256 window_list
->violation
= true;
43258 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43259 &num_imm64_operand
);
43261 /* Initialize window with new instruction. */
43262 window
[num_insn
].insn
= insn
;
43263 window
[num_insn
].byte_len
= byte_len
;
43264 window
[num_insn
].group
= group
;
43265 window
[num_insn
].path
= path
;
43266 window
[num_insn
].imm_bytes
= imm_size
;
43268 window_list
->window_size
+= byte_len
;
43269 window_list
->num_insn
= num_insn
+ 1;
43270 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
43271 window_list
->imm_size
+= imm_size
;
43272 window_list
->num_imm
+= num_imm_operand
;
43273 window_list
->num_imm_32
+= num_imm32_operand
;
43274 window_list
->num_imm_64
+= num_imm64_operand
;
43276 if (group
== disp_store
)
43277 window_list
->num_stores
+= 1;
43278 else if (group
== disp_load
43279 || group
== disp_prefetch
)
43280 window_list
->num_loads
+= 1;
43281 else if (group
== disp_load_store
)
43283 window_list
->num_stores
+= 1;
43284 window_list
->num_loads
+= 1;
43288 /* Adds a scheduled instruction, INSN, to the current dispatch window.
43289 If the total bytes of instructions or the number of instructions in
43290 the window exceed allowable, it allocates a new window. */
43293 add_to_dispatch_window (rtx insn
)
43296 dispatch_windows
*window_list
;
43297 dispatch_windows
*next_list
;
43298 dispatch_windows
*window0_list
;
43299 enum insn_path path
;
43300 enum dispatch_group insn_group
;
43308 if (INSN_CODE (insn
) < 0)
43311 byte_len
= min_insn_size (insn
);
43312 window_list
= dispatch_window_list
;
43313 next_list
= window_list
->next
;
43314 path
= get_insn_path (insn
);
43315 insn_group
= get_insn_group (insn
);
43317 /* Get the last dispatch window. */
43319 window_list
= dispatch_window_list
->next
;
43321 if (path
== path_single
)
43323 else if (path
== path_double
)
43326 insn_num_uops
= (int) path
;
43328 /* If current window is full, get a new window.
43329 Window number zero is full, if MAX_INSN uops are scheduled in it.
43330 Window number one is full, if window zero's bytes plus window
43331 one's bytes is 32, or if the bytes of the new instruction added
43332 to the total makes it greater than 48, or it has already MAX_INSN
43333 instructions in it. */
43334 num_insn
= window_list
->num_insn
;
43335 num_uops
= window_list
->num_uops
;
43336 window_num
= window_list
->window_num
;
43337 insn_fits
= fits_dispatch_window (insn
);
43339 if (num_insn
>= MAX_INSN
43340 || num_uops
+ insn_num_uops
> MAX_INSN
43343 window_num
= ~window_num
& 1;
43344 window_list
= allocate_next_window (window_num
);
43347 if (window_num
== 0)
43349 add_insn_window (insn
, window_list
, insn_num_uops
);
43350 if (window_list
->num_insn
>= MAX_INSN
43351 && insn_group
== disp_branch
)
43353 process_end_window ();
43357 else if (window_num
== 1)
43359 window0_list
= window_list
->prev
;
43360 sum
= window0_list
->window_size
+ window_list
->window_size
;
43362 || (byte_len
+ sum
) >= 48)
43364 process_end_window ();
43365 window_list
= dispatch_window_list
;
43368 add_insn_window (insn
, window_list
, insn_num_uops
);
43371 gcc_unreachable ();
43373 if (is_end_basic_block (insn_group
))
43375 /* End of basic block is reached do end-basic-block process. */
43376 process_end_window ();
43381 /* Print the dispatch window, WINDOW_NUM, to FILE. */
43383 DEBUG_FUNCTION
static void
43384 debug_dispatch_window_file (FILE *file
, int window_num
)
43386 dispatch_windows
*list
;
43389 if (window_num
== 0)
43390 list
= dispatch_window_list
;
43392 list
= dispatch_window_list1
;
43394 fprintf (file
, "Window #%d:\n", list
->window_num
);
43395 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
43396 list
->num_insn
, list
->num_uops
, list
->window_size
);
43397 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43398 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
43400 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
43402 fprintf (file
, " insn info:\n");
43404 for (i
= 0; i
< MAX_INSN
; i
++)
43406 if (!list
->window
[i
].insn
)
43408 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
43409 i
, group_name
[list
->window
[i
].group
],
43410 i
, (void *)list
->window
[i
].insn
,
43411 i
, list
->window
[i
].path
,
43412 i
, list
->window
[i
].byte_len
,
43413 i
, list
->window
[i
].imm_bytes
);
43417 /* Print to stdout a dispatch window. */
43419 DEBUG_FUNCTION
void
43420 debug_dispatch_window (int window_num
)
43422 debug_dispatch_window_file (stdout
, window_num
);
43425 /* Print INSN dispatch information to FILE. */
43427 DEBUG_FUNCTION
static void
43428 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
43431 enum insn_path path
;
43432 enum dispatch_group group
;
43434 int num_imm_operand
;
43435 int num_imm32_operand
;
43436 int num_imm64_operand
;
43438 if (INSN_CODE (insn
) < 0)
43441 byte_len
= min_insn_size (insn
);
43442 path
= get_insn_path (insn
);
43443 group
= get_insn_group (insn
);
43444 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43445 &num_imm64_operand
);
43447 fprintf (file
, " insn info:\n");
43448 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
43449 group_name
[group
], path
, byte_len
);
43450 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43451 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
43454 /* Print to STDERR the status of the ready list with respect to
43455 dispatch windows. */
43457 DEBUG_FUNCTION
void
43458 debug_ready_dispatch (void)
43461 int no_ready
= number_in_ready ();
43463 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
43465 for (i
= 0; i
< no_ready
; i
++)
43466 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
43469 /* This routine is the driver of the dispatch scheduler. */
43472 do_dispatch (rtx insn
, int mode
)
43474 if (mode
== DISPATCH_INIT
)
43475 init_dispatch_sched ();
43476 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
43477 add_to_dispatch_window (insn
);
43480 /* Return TRUE if Dispatch Scheduling is supported. */
43483 has_dispatch (rtx insn
, int action
)
43485 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
|| TARGET_BDVER4
)
43486 && flag_dispatch_scheduler
)
43492 case IS_DISPATCH_ON
:
43497 return is_cmp (insn
);
43499 case DISPATCH_VIOLATION
:
43500 return dispatch_violation ();
43502 case FITS_DISPATCH_WINDOW
:
43503 return fits_dispatch_window (insn
);
43509 /* Implementation of reassociation_width target hook used by
43510 reassoc phase to identify parallelism level in reassociated
43511 tree. Statements tree_code is passed in OPC. Arguments type
43514 Currently parallel reassociation is enabled for Atom
43515 processors only and we set reassociation width to be 2
43516 because Atom may issue up to 2 instructions per cycle.
43518 Return value should be fixed if parallel reassociation is
43519 enabled for other processors. */
43522 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
43523 enum machine_mode mode
)
43527 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
43529 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
43535 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
43536 place emms and femms instructions. */
43538 static enum machine_mode
43539 ix86_preferred_simd_mode (enum machine_mode mode
)
43547 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
43549 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
43551 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
43553 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
43556 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43562 if (!TARGET_VECTORIZE_DOUBLE
)
43564 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43566 else if (TARGET_SSE2
)
43575 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
43578 static unsigned int
43579 ix86_autovectorize_vector_sizes (void)
43581 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
43586 /* Return class of registers which could be used for pseudo of MODE
43587 and of class RCLASS for spilling instead of memory. Return NO_REGS
43588 if it is not possible or non-profitable. */
43590 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
43592 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
43593 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
43594 && INTEGER_CLASS_P (rclass
))
43595 return ALL_SSE_REGS
;
43599 /* Implement targetm.vectorize.init_cost. */
43602 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
43604 unsigned *cost
= XNEWVEC (unsigned, 3);
43605 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
43609 /* Implement targetm.vectorize.add_stmt_cost. */
43612 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
43613 struct _stmt_vec_info
*stmt_info
, int misalign
,
43614 enum vect_cost_model_location where
)
43616 unsigned *cost
= (unsigned *) data
;
43617 unsigned retval
= 0;
43619 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
43620 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
43622 /* Statements in an inner loop relative to the loop being
43623 vectorized are weighted more heavily. The value here is
43624 arbitrary and could potentially be improved with analysis. */
43625 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
43626 count
*= 50; /* FIXME. */
43628 retval
= (unsigned) (count
* stmt_cost
);
43629 cost
[where
] += retval
;
43634 /* Implement targetm.vectorize.finish_cost. */
43637 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
43638 unsigned *body_cost
, unsigned *epilogue_cost
)
43640 unsigned *cost
= (unsigned *) data
;
43641 *prologue_cost
= cost
[vect_prologue
];
43642 *body_cost
= cost
[vect_body
];
43643 *epilogue_cost
= cost
[vect_epilogue
];
43646 /* Implement targetm.vectorize.destroy_cost_data. */
43649 ix86_destroy_cost_data (void *data
)
43654 /* Validate target specific memory model bits in VAL. */
43656 static unsigned HOST_WIDE_INT
43657 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
43659 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
43662 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
43664 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
43666 warning (OPT_Winvalid_memory_model
,
43667 "Unknown architecture specific memory model");
43668 return MEMMODEL_SEQ_CST
;
43670 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
43671 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
43673 warning (OPT_Winvalid_memory_model
,
43674 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
43675 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
43677 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
43679 warning (OPT_Winvalid_memory_model
,
43680 "HLE_RELEASE not used with RELEASE or stronger memory model");
43681 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
43686 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
43689 ix86_float_exceptions_rounding_supported_p (void)
43691 /* For x87 floating point with standard excess precision handling,
43692 there is no adddf3 pattern (since x87 floating point only has
43693 XFmode operations) so the default hook implementation gets this
43695 return TARGET_80387
|| TARGET_SSE_MATH
;
43698 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
43701 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
43703 if (!TARGET_80387
&& !TARGET_SSE_MATH
)
43705 tree exceptions_var
= create_tmp_var (integer_type_node
, NULL
);
43708 tree fenv_index_type
= build_index_type (size_int (6));
43709 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
43710 tree fenv_var
= create_tmp_var (fenv_type
, NULL
);
43711 mark_addressable (fenv_var
);
43712 tree fenv_ptr
= build_pointer_type (fenv_type
);
43713 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
43714 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
43715 tree fnstenv
= ix86_builtins
[IX86_BUILTIN_FNSTENV
];
43716 tree fldenv
= ix86_builtins
[IX86_BUILTIN_FLDENV
];
43717 tree fnstsw
= ix86_builtins
[IX86_BUILTIN_FNSTSW
];
43718 tree fnclex
= ix86_builtins
[IX86_BUILTIN_FNCLEX
];
43719 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
43720 tree hold_fnclex
= build_call_expr (fnclex
, 0);
43721 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_fnstenv
,
43723 *clear
= build_call_expr (fnclex
, 0);
43724 tree sw_var
= create_tmp_var (short_unsigned_type_node
, NULL
);
43725 mark_addressable (sw_var
);
43726 tree su_ptr
= build_pointer_type (short_unsigned_type_node
);
43727 tree sw_addr
= build1 (ADDR_EXPR
, su_ptr
, sw_var
);
43728 tree fnstsw_call
= build_call_expr (fnstsw
, 1, sw_addr
);
43729 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
43730 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
43731 exceptions_var
, exceptions_x87
);
43732 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
43733 fnstsw_call
, update_mod
);
43734 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
43735 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
43737 if (TARGET_SSE_MATH
)
43739 tree mxcsr_orig_var
= create_tmp_var (unsigned_type_node
, NULL
);
43740 tree mxcsr_mod_var
= create_tmp_var (unsigned_type_node
, NULL
);
43741 tree stmxcsr
= ix86_builtins
[IX86_BUILTIN_STMXCSR
];
43742 tree ldmxcsr
= ix86_builtins
[IX86_BUILTIN_LDMXCSR
];
43743 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
43744 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
43745 mxcsr_orig_var
, stmxcsr_hold_call
);
43746 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
43748 build_int_cst (unsigned_type_node
, 0x1f80));
43749 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
43750 build_int_cst (unsigned_type_node
, 0xffffffc0));
43751 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
43752 mxcsr_mod_var
, hold_mod_val
);
43753 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
43754 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
43755 hold_assign_orig
, hold_assign_mod
);
43756 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
43757 ldmxcsr_hold_call
);
43759 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
43762 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
43764 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
43765 ldmxcsr_clear_call
);
43767 *clear
= ldmxcsr_clear_call
;
43768 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
43769 tree exceptions_sse
= fold_convert (integer_type_node
,
43770 stxmcsr_update_call
);
43773 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
43774 exceptions_var
, exceptions_sse
);
43775 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
43776 exceptions_var
, exceptions_mod
);
43777 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
43778 exceptions_assign
);
43781 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
43782 exceptions_var
, exceptions_sse
);
43783 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
43784 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
43785 ldmxcsr_update_call
);
43787 tree atomic_feraiseexcept
43788 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
43789 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
43790 1, exceptions_var
);
43791 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
43792 atomic_feraiseexcept_call
);
43795 /* Initialize the GCC target structure. */
43796 #undef TARGET_RETURN_IN_MEMORY
43797 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
43799 #undef TARGET_LEGITIMIZE_ADDRESS
43800 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
43802 #undef TARGET_ATTRIBUTE_TABLE
43803 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
43804 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
43805 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
43806 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43807 # undef TARGET_MERGE_DECL_ATTRIBUTES
43808 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
43811 #undef TARGET_COMP_TYPE_ATTRIBUTES
43812 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
43814 #undef TARGET_INIT_BUILTINS
43815 #define TARGET_INIT_BUILTINS ix86_init_builtins
43816 #undef TARGET_BUILTIN_DECL
43817 #define TARGET_BUILTIN_DECL ix86_builtin_decl
43818 #undef TARGET_EXPAND_BUILTIN
43819 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
43821 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
43822 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
43823 ix86_builtin_vectorized_function
43825 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
43826 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
43828 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
43829 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
43831 #undef TARGET_VECTORIZE_BUILTIN_GATHER
43832 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
43834 #undef TARGET_BUILTIN_RECIPROCAL
43835 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
43837 #undef TARGET_ASM_FUNCTION_EPILOGUE
43838 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
43840 #undef TARGET_ENCODE_SECTION_INFO
43841 #ifndef SUBTARGET_ENCODE_SECTION_INFO
43842 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
43844 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
43847 #undef TARGET_ASM_OPEN_PAREN
43848 #define TARGET_ASM_OPEN_PAREN ""
43849 #undef TARGET_ASM_CLOSE_PAREN
43850 #define TARGET_ASM_CLOSE_PAREN ""
43852 #undef TARGET_ASM_BYTE_OP
43853 #define TARGET_ASM_BYTE_OP ASM_BYTE
43855 #undef TARGET_ASM_ALIGNED_HI_OP
43856 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
43857 #undef TARGET_ASM_ALIGNED_SI_OP
43858 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
43860 #undef TARGET_ASM_ALIGNED_DI_OP
43861 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
43864 #undef TARGET_PROFILE_BEFORE_PROLOGUE
43865 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
43867 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
43868 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
43870 #undef TARGET_ASM_UNALIGNED_HI_OP
43871 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
43872 #undef TARGET_ASM_UNALIGNED_SI_OP
43873 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
43874 #undef TARGET_ASM_UNALIGNED_DI_OP
43875 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
43877 #undef TARGET_PRINT_OPERAND
43878 #define TARGET_PRINT_OPERAND ix86_print_operand
43879 #undef TARGET_PRINT_OPERAND_ADDRESS
43880 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
43881 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
43882 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
43883 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
43884 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
43886 #undef TARGET_SCHED_INIT_GLOBAL
43887 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
43888 #undef TARGET_SCHED_ADJUST_COST
43889 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
43890 #undef TARGET_SCHED_ISSUE_RATE
43891 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
43892 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
43893 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
43894 ia32_multipass_dfa_lookahead
43895 #undef TARGET_SCHED_MACRO_FUSION_P
43896 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
43897 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
43898 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
43900 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
43901 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
43903 #undef TARGET_MEMMODEL_CHECK
43904 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
43906 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
43907 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
43910 #undef TARGET_HAVE_TLS
43911 #define TARGET_HAVE_TLS true
43913 #undef TARGET_CANNOT_FORCE_CONST_MEM
43914 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
43915 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
43916 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
43918 #undef TARGET_DELEGITIMIZE_ADDRESS
43919 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
43921 #undef TARGET_MS_BITFIELD_LAYOUT_P
43922 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
43925 #undef TARGET_BINDS_LOCAL_P
43926 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
43928 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43929 #undef TARGET_BINDS_LOCAL_P
43930 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
43933 #undef TARGET_ASM_OUTPUT_MI_THUNK
43934 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
43935 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
43936 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
43938 #undef TARGET_ASM_FILE_START
43939 #define TARGET_ASM_FILE_START x86_file_start
43941 #undef TARGET_OPTION_OVERRIDE
43942 #define TARGET_OPTION_OVERRIDE ix86_option_override
43944 #undef TARGET_REGISTER_MOVE_COST
43945 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
43946 #undef TARGET_MEMORY_MOVE_COST
43947 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
43948 #undef TARGET_RTX_COSTS
43949 #define TARGET_RTX_COSTS ix86_rtx_costs
43950 #undef TARGET_ADDRESS_COST
43951 #define TARGET_ADDRESS_COST ix86_address_cost
43953 #undef TARGET_FIXED_CONDITION_CODE_REGS
43954 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
43955 #undef TARGET_CC_MODES_COMPATIBLE
43956 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
43958 #undef TARGET_MACHINE_DEPENDENT_REORG
43959 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
43961 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
43962 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
43964 #undef TARGET_BUILD_BUILTIN_VA_LIST
43965 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
43967 #undef TARGET_FOLD_BUILTIN
43968 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
43970 #undef TARGET_COMPARE_VERSION_PRIORITY
43971 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
43973 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
43974 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
43975 ix86_generate_version_dispatcher_body
43977 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
43978 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
43979 ix86_get_function_versions_dispatcher
43981 #undef TARGET_ENUM_VA_LIST_P
43982 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
43984 #undef TARGET_FN_ABI_VA_LIST
43985 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
43987 #undef TARGET_CANONICAL_VA_LIST_TYPE
43988 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
43990 #undef TARGET_EXPAND_BUILTIN_VA_START
43991 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
43993 #undef TARGET_MD_ASM_CLOBBERS
43994 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
43996 #undef TARGET_PROMOTE_PROTOTYPES
43997 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
43998 #undef TARGET_STRUCT_VALUE_RTX
43999 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
44000 #undef TARGET_SETUP_INCOMING_VARARGS
44001 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
44002 #undef TARGET_MUST_PASS_IN_STACK
44003 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
44004 #undef TARGET_FUNCTION_ARG_ADVANCE
44005 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
44006 #undef TARGET_FUNCTION_ARG
44007 #define TARGET_FUNCTION_ARG ix86_function_arg
44008 #undef TARGET_FUNCTION_ARG_BOUNDARY
44009 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
44010 #undef TARGET_PASS_BY_REFERENCE
44011 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
44012 #undef TARGET_INTERNAL_ARG_POINTER
44013 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
44014 #undef TARGET_UPDATE_STACK_BOUNDARY
44015 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
44016 #undef TARGET_GET_DRAP_RTX
44017 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
44018 #undef TARGET_STRICT_ARGUMENT_NAMING
44019 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
44020 #undef TARGET_STATIC_CHAIN
44021 #define TARGET_STATIC_CHAIN ix86_static_chain
44022 #undef TARGET_TRAMPOLINE_INIT
44023 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
44024 #undef TARGET_RETURN_POPS_ARGS
44025 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
44027 #undef TARGET_LEGITIMATE_COMBINED_INSN
44028 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
44030 #undef TARGET_ASAN_SHADOW_OFFSET
44031 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
44033 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
44034 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
44036 #undef TARGET_SCALAR_MODE_SUPPORTED_P
44037 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
44039 #undef TARGET_VECTOR_MODE_SUPPORTED_P
44040 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
44042 #undef TARGET_C_MODE_FOR_SUFFIX
44043 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
44046 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
44047 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
44050 #ifdef SUBTARGET_INSERT_ATTRIBUTES
44051 #undef TARGET_INSERT_ATTRIBUTES
44052 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
44055 #undef TARGET_MANGLE_TYPE
44056 #define TARGET_MANGLE_TYPE ix86_mangle_type
44059 #undef TARGET_STACK_PROTECT_FAIL
44060 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
44063 #undef TARGET_FUNCTION_VALUE
44064 #define TARGET_FUNCTION_VALUE ix86_function_value
44066 #undef TARGET_FUNCTION_VALUE_REGNO_P
44067 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
44069 #undef TARGET_PROMOTE_FUNCTION_MODE
44070 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
44072 #undef TARGET_MEMBER_TYPE_FORCES_BLK
44073 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
44075 #undef TARGET_INSTANTIATE_DECLS
44076 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
44078 #undef TARGET_SECONDARY_RELOAD
44079 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
44081 #undef TARGET_CLASS_MAX_NREGS
44082 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
44084 #undef TARGET_PREFERRED_RELOAD_CLASS
44085 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
44086 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
44087 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
44088 #undef TARGET_CLASS_LIKELY_SPILLED_P
44089 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
44091 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
44092 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
44093 ix86_builtin_vectorization_cost
44094 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
44095 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
44096 ix86_vectorize_vec_perm_const_ok
44097 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
44098 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
44099 ix86_preferred_simd_mode
44100 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
44101 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
44102 ix86_autovectorize_vector_sizes
44103 #undef TARGET_VECTORIZE_INIT_COST
44104 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
44105 #undef TARGET_VECTORIZE_ADD_STMT_COST
44106 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
44107 #undef TARGET_VECTORIZE_FINISH_COST
44108 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
44109 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
44110 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
44112 #undef TARGET_SET_CURRENT_FUNCTION
44113 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
44115 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
44116 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
44118 #undef TARGET_OPTION_SAVE
44119 #define TARGET_OPTION_SAVE ix86_function_specific_save
44121 #undef TARGET_OPTION_RESTORE
44122 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
44124 #undef TARGET_OPTION_PRINT
44125 #define TARGET_OPTION_PRINT ix86_function_specific_print
44127 #undef TARGET_OPTION_FUNCTION_VERSIONS
44128 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
44130 #undef TARGET_CAN_INLINE_P
44131 #define TARGET_CAN_INLINE_P ix86_can_inline_p
44133 #undef TARGET_EXPAND_TO_RTL_HOOK
44134 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
44136 #undef TARGET_LEGITIMATE_ADDRESS_P
44137 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
44139 #undef TARGET_LRA_P
44140 #define TARGET_LRA_P hook_bool_void_true
44142 #undef TARGET_REGISTER_PRIORITY
44143 #define TARGET_REGISTER_PRIORITY ix86_register_priority
44145 #undef TARGET_REGISTER_USAGE_LEVELING_P
44146 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
44148 #undef TARGET_LEGITIMATE_CONSTANT_P
44149 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
44151 #undef TARGET_FRAME_POINTER_REQUIRED
44152 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
44154 #undef TARGET_CAN_ELIMINATE
44155 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
44157 #undef TARGET_EXTRA_LIVE_ON_ENTRY
44158 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
44160 #undef TARGET_ASM_CODE_END
44161 #define TARGET_ASM_CODE_END ix86_code_end
44163 #undef TARGET_CONDITIONAL_REGISTER_USAGE
44164 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
44167 #undef TARGET_INIT_LIBFUNCS
44168 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
44171 #undef TARGET_SPILL_CLASS
44172 #define TARGET_SPILL_CLASS ix86_spill_class
44174 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
44175 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
44176 ix86_float_exceptions_rounding_supported_p
44178 struct gcc_target targetm
= TARGET_INITIALIZER
;
44180 #include "gt-i386.h"