]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/i386.c
c-format.c (handle_format_attribute): Fix -Wc++-compat and/or -Wcast-qual warnings.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
f1bf33ce
UB
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
2a2ab3f9 5
188fc5b5 6This file is part of GCC.
2a2ab3f9 7
188fc5b5 8GCC is free software; you can redistribute it and/or modify
2a2ab3f9 9it under the terms of the GNU General Public License as published by
2f83c7d6 10the Free Software Foundation; either version 3, or (at your option)
2a2ab3f9
JVA
11any later version.
12
188fc5b5 13GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
2f83c7d6
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
c818d019 38#include "c-common.h"
a8ffcc81 39#include "except.h"
ecbc4695 40#include "function.h"
00c79232 41#include "recog.h"
ced8dd8c 42#include "expr.h"
e78d8e51 43#include "optabs.h"
f103890b 44#include "toplev.h"
e075ae69 45#include "basic-block.h"
1526a060 46#include "ggc.h"
672a6f42
NB
47#include "target.h"
48#include "target-def.h"
f1e639b1 49#include "langhooks.h"
dafc5b82 50#include "cgraph.h"
cd3ce9b4 51#include "tree-gimple.h"
72ce3d4a 52#include "dwarf2.h"
6fb5fa3c 53#include "df.h"
279bb624 54#include "tm-constrs.h"
47eb5b32 55#include "params.h"
2a2ab3f9 56
e70444a8 57static int x86_builtin_vectorization_cost (bool);
8502420b 58static rtx legitimize_dllimport_symbol (rtx, bool);
e70444a8 59
8dfe5673 60#ifndef CHECK_STACK_LIMIT
07933f72 61#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
62#endif
63
3c50106f
RH
64/* Return index of given mode in mult and division cost tables. */
65#define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
70 : 4)
71
2ab0437e 72/* Processor costs (relative to an add) */
3dd0df7f
RS
73/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74#define COSTS_N_BYTES(N) ((N) * 2)
75
8c996513
JH
76#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
77
fce5a9f2 78static const
2a8a8292 79struct processor_costs size_cost = { /* costs for tuning for size */
3dd0df7f
RS
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
2ab0437e 89 0, /* cost of multiply per each bit set */
3dd0df7f
RS
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
2ab0437e
JH
97 0, /* "large" insn */
98 2, /* MOVE_RATIO */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
75bcbcdb
L
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
2ab0437e
JH
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
46cb0441
ZD
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
f4365627
JH
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
3dd0df7f
RS
124 2, /* Branch cost */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
8c996513
JH
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
e70444a8
HJ
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
2ab0437e 146};
229b303a 147
32b5b1aa 148/* Processor costs (relative to an add) */
fce5a9f2 149static const
32b5b1aa 150struct processor_costs i386_cost = { /* 386 specific costs */
a9cc9cc6
JH
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 168 15, /* "large" insn */
e2e52e1b 169 3, /* MOVE_RATIO */
7c6b971d 170 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
0f290768 173 Relative to reg-reg move (2). */
96e7ae40
JH
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
75bcbcdb
L
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
fa79946e
JH
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
46cb0441
ZD
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
f4365627
JH
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
4977bab6 195 1, /* Branch cost */
a9cc9cc6
JH
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
8c996513
JH
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
e70444a8
HJ
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
32b5b1aa
SC
217};
218
fce5a9f2 219static const
32b5b1aa 220struct processor_costs i486_cost = { /* 486 specific costs */
a9cc9cc6
JH
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
32b5b1aa 230 1, /* cost of multiply per each bit set */
a9cc9cc6
JH
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 238 15, /* "large" insn */
e2e52e1b 239 3, /* MOVE_RATIO */
7c6b971d 240 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
0f290768 243 Relative to reg-reg move (2). */
96e7ae40
JH
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
75bcbcdb
L
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
fa79946e
JH
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
f4365627 260 3, /* MMX or SSE register to integer */
46cb0441
ZD
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
f4365627
JH
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
4977bab6 267 1, /* Branch cost */
a9cc9cc6
JH
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
8c996513
JH
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
e70444a8
HJ
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
32b5b1aa
SC
289};
290
fce5a9f2 291static const
e5cb57e8 292struct processor_costs pentium_cost = {
a9cc9cc6
JH
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
856b07a1 302 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 310 8, /* "large" insn */
e2e52e1b 311 6, /* MOVE_RATIO */
7c6b971d 312 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
0f290768 315 Relative to reg-reg move (2). */
96e7ae40
JH
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
75bcbcdb
L
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
fa79946e
JH
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
f4365627 332 3, /* MMX or SSE register to integer */
46cb0441
ZD
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
f4365627
JH
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
4977bab6 337 2, /* Branch cost */
a9cc9cc6
JH
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
8c996513
JH
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
e70444a8
HJ
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
32b5b1aa
SC
359};
360
fce5a9f2 361static const
856b07a1 362struct processor_costs pentiumpro_cost = {
a9cc9cc6
JH
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
856b07a1 372 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
96e7ae40 380 8, /* "large" insn */
e2e52e1b 381 6, /* MOVE_RATIO */
7c6b971d 382 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
0f290768 385 Relative to reg-reg move (2). */
96e7ae40
JH
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
75bcbcdb
L
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
fa79946e
JH
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
f4365627 402 3, /* MMX or SSE register to integer */
46cb0441
ZD
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
f4365627
JH
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
4977bab6 407 2, /* Branch cost */
a9cc9cc6
JH
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
8c996513
JH
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
418 */
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
856b07a1
SC
436};
437
cfe1b18f
VM
438static const
439struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
46cb0441
ZD
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
cfe1b18f
VM
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
485 1, /* Branch cost */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
8c996513
JH
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
cfe1b18f
VM
507};
508
fce5a9f2 509static const
a269a03c 510struct processor_costs k6_cost = {
a9cc9cc6
JH
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
a269a03c 520 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 528 8, /* "large" insn */
e2e52e1b 529 4, /* MOVE_RATIO */
7c6b971d 530 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
0f290768 533 Relative to reg-reg move (2). */
96e7ae40
JH
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
75bcbcdb
L
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
fa79946e
JH
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
f4365627 550 6, /* MMX or SSE register to integer */
46cb0441
ZD
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
f4365627
JH
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
4977bab6 558 1, /* Branch cost */
a9cc9cc6
JH
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
8c996513
JH
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
a269a03c
JC
580};
581
fce5a9f2 582static const
309ada50 583struct processor_costs athlon_cost = {
a9cc9cc6
JH
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
309ada50 593 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
309ada50 601 8, /* "large" insn */
e2e52e1b 602 9, /* MOVE_RATIO */
309ada50 603 4, /* cost for loading QImode using movzbl */
b72b1c29 604 {3, 4, 3}, /* cost of loading integer registers
309ada50 605 in QImode, HImode and SImode.
0f290768 606 Relative to reg-reg move (2). */
b72b1c29 607 {3, 4, 3}, /* cost of storing integer registers */
309ada50 608 4, /* cost of reg,reg fld/fst */
b72b1c29 609 {4, 4, 12}, /* cost of loading fp registers
309ada50 610 in SFmode, DFmode and XFmode */
75bcbcdb
L
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
fa79946e 613 2, /* cost of moving MMX register */
b72b1c29 614 {4, 4}, /* cost of loading MMX registers
fa79946e 615 in SImode and DImode */
b72b1c29 616 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
b72b1c29 619 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 620 in SImode, DImode and TImode */
b72b1c29 621 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 622 in SImode, DImode and TImode */
b72b1c29 623 5, /* MMX or SSE register to integer */
46cb0441
ZD
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
f4365627
JH
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
8c1e80e9 628 5, /* Branch cost */
a9cc9cc6
JH
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
8c996513 635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
9c134b65 636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
8c996513
JH
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
309ada50
JH
653};
654
4977bab6
ZW
655static const
656struct processor_costs k8_cost = {
a9cc9cc6
JH
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
4977bab6 666 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
4977bab6
ZW
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
75bcbcdb
L
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
4977bab6
ZW
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
46cb0441
ZD
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
4977bab6 699 64, /* size of prefetch block */
8fbbf354 700 /* New AMD processors never drop prefetches; if they cannot be performed
47eb5b32
ZD
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
704 time). */
705 100, /* number of parallel prefetches */
cedbd764 706 3, /* Branch cost */
a9cc9cc6
JH
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
8c996513
JH
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
cedbd764
JS
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
4977bab6
ZW
732};
733
21efb4d4
HJ
734struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
753 9, /* MOVE_RATIO */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
775 /* On K8
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
778 On AMDFAM10
779 MOVD reg64, xmmreg Double FADD 3
780 1/1 1/1
781 MOVD reg32, xmmreg Double FADD 3
782 1/1 1/1 */
46cb0441
ZD
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
21efb4d4
HJ
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
790 time). */
791 100, /* number of parallel prefetches */
cedbd764 792 2, /* Branch cost */
21efb4d4
HJ
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
799
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
cedbd764 817 2, /* cond_taken_branch_cost. */
e70444a8 818 1, /* cond_not_taken_branch_cost. */
21efb4d4
HJ
819};
820
fce5a9f2 821static const
b4e89e2d 822struct processor_costs pentium4_cost = {
a9cc9cc6
JH
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
b4e89e2d 832 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
b4e89e2d
JH
840 16, /* "large" insn */
841 6, /* MOVE_RATIO */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
75bcbcdb
L
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
b4e89e2d
JH
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
46cb0441
ZD
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
f4365627
JH
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
4977bab6 867 2, /* Branch cost */
a9cc9cc6
JH
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
e850f028 874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
cc0faf9d 875 DUMMY_STRINGOP_ALGS},
e850f028 876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
cc0faf9d
JH
877 {-1, libcall}}},
878 DUMMY_STRINGOP_ALGS},
e70444a8
HJ
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
b4e89e2d
JH
890};
891
89c43c0a
VM
892static const
893struct processor_costs nocona_cost = {
a9cc9cc6
JH
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
89c43c0a 903 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
89c43c0a 911 16, /* "large" insn */
ea407814 912 17, /* MOVE_RATIO */
89c43c0a
VM
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
75bcbcdb
L
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
89c43c0a
VM
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
46cb0441
ZD
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
89c43c0a
VM
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
938 1, /* Branch cost */
a9cc9cc6
JH
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
e850f028 945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
8c996513
JH
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
e850f028 948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
cc0faf9d 949 {-1, libcall}}},
8c996513 950 {libcall, {{24, loop}, {64, unrolled_loop},
e70444a8
HJ
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
89c43c0a
VM
963};
964
05f85dbb
VM
965static const
966struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
985 16, /* MOVE_RATIO */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
46cb0441
ZD
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
05f85dbb
VM
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
8c996513
JH
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
e70444a8
HJ
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
05f85dbb
VM
1035};
1036
d326eaf0
JH
1037/* Generic64 should produce code tuned for Nocona and K8. */
1038static const
1039struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
6fc0bb99 1043 use of unnecessary temporary registers causing regression on several
d326eaf0
JH
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
75bcbcdb
L
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
d326eaf0
JH
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
46cb0441
ZD
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
d326eaf0
JH
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
8c996513
JH
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
e70444a8
HJ
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
d326eaf0
JH
1112};
1113
1114/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1115static const
1116struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
75bcbcdb
L
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
d326eaf0
JH
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
46cb0441
ZD
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
d326eaf0
JH
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
8c996513
JH
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
e70444a8
HJ
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
d326eaf0
JH
1183};
1184
8b60264b 1185const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 1186
a269a03c
JC
1187/* Processor feature/optimization bitmasks. */
1188#define m_386 (1<<PROCESSOR_I386)
1189#define m_486 (1<<PROCESSOR_I486)
1190#define m_PENT (1<<PROCESSOR_PENTIUM)
1191#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
8691cea3
UB
1192#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193#define m_NOCONA (1<<PROCESSOR_NOCONA)
1194#define m_CORE2 (1<<PROCESSOR_CORE2)
1195
cfe1b18f 1196#define m_GEODE (1<<PROCESSOR_GEODE)
a269a03c 1197#define m_K6 (1<<PROCESSOR_K6)
8691cea3 1198#define m_K6_GEODE (m_K6 | m_GEODE)
4977bab6 1199#define m_K8 (1<<PROCESSOR_K8)
8691cea3 1200#define m_ATHLON (1<<PROCESSOR_ATHLON)
4977bab6 1201#define m_ATHLON_K8 (m_K8 | m_ATHLON)
21efb4d4 1202#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
04e1d06b 1203#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
8691cea3 1204
d326eaf0
JH
1205#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
d326eaf0
JH
1207
1208/* Generic instruction choice should be common subset of supported CPUs
05f85dbb 1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
8691cea3 1210#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
d326eaf0 1211
80fd744f
RH
1212/* Feature tests against the various tunings. */
1213unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
04e1d06b 1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
80fd744f
RH
1219
1220 /* X86_TUNE_PUSH_MEMORY */
04e1d06b 1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
80fd744f
RH
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1223
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1225 m_486 | m_PENT,
1226
1227 /* X86_TUNE_USE_BIT_TEST */
1228 m_386,
1229
1230 /* X86_TUNE_UNROLL_STRLEN */
04e1d06b 1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
80fd744f
RH
1232
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
04e1d06b 1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
80fd744f
RH
1235
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1240 0,
1241
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1243 ~m_386,
54a88090 1244
80fd744f 1245 /* X86_TUNE_USE_SAHF */
3c2d980c 1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
5be6cb59 1247 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1248
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
3c2d980c 1250 partial dependencies. */
04e1d06b 1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
80fd744f
RH
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1253
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1262 m_PPRO,
1263
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
54a88090 1266
80fd744f
RH
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1269
1270 /* X86_TUNE_USE_SIMODE_FIOP */
04e1d06b 1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
80fd744f
RH
1272
1273 /* X86_TUNE_USE_MOV0 */
1274 m_K6,
54a88090 1275
80fd744f
RH
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1278
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1280 m_PENT4,
1281
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1283 m_PPRO,
1284
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1286 ~m_PENT,
1287
1288 /* X86_TUNE_READ_MODIFY */
1289 ~(m_PENT | m_PPRO),
1290
1291 /* X86_TUNE_PROMOTE_QIMODE */
04e1d06b 1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
80fd744f
RH
1293 | m_GENERIC /* | m_PENT4 ? */,
1294
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1297
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
54a88090 1300
80fd744f
RH
1301 /* X86_TUNE_QIMODE_MATH */
1302 ~0,
54a88090 1303
80fd744f
RH
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1308 ~m_PPRO,
1309
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1311 0,
1312
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1314 m_PPRO,
1315
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
04e1d06b 1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1318
1319 /* X86_TUNE_ADD_ESP_8 */
04e1d06b 1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
80fd744f
RH
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_SUB_ESP_4 */
04e1d06b 1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1325
1326 /* X86_TUNE_SUB_ESP_8 */
04e1d06b 1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
80fd744f
RH
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
04e1d06b 1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
80fd744f
RH
1333 | m_GENERIC | m_GEODE),
1334
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
04e1d06b 1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1337
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1348
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1350 m_AMDFAM10,
1351
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1356 m_ATHLON_K8,
21efb4d4 1357
80fd744f 1358 /* X86_TUNE_SSE_TYPELESS_STORES */
04e1d06b 1359 m_AMD_MULTIPLE,
21efb4d4 1360
80fd744f
RH
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
21efb4d4 1363
80fd744f 1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
04e1d06b 1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
21efb4d4 1366
80fd744f
RH
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372
1373 /* X86_TUNE_SHIFT1 */
1374 ~m_486,
1375
1376 /* X86_TUNE_USE_FFREEP */
04e1d06b 1377 m_AMD_MULTIPLE,
80fd744f
RH
1378
1379 /* X86_TUNE_INTER_UNIT_MOVES */
04e1d06b 1380 ~(m_AMD_MULTIPLE | m_GENERIC),
80fd744f 1381
630ecd8d
JH
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1383 ~(m_AMDFAM10),
1384
80fd744f
RH
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
04e1d06b 1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1388
1389 /* X86_TUNE_SCHEDULE */
04e1d06b 1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
80fd744f
RH
1391
1392 /* X86_TUNE_USE_BT */
33ee5810 1393 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
80fd744f
RH
1394
1395 /* X86_TUNE_USE_INCDEC */
77c1632f 1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
80fd744f
RH
1397
1398 /* X86_TUNE_PAD_RETURNS */
04e1d06b 1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
80fd744f
RH
1400
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
ddff69b9
MM
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1403
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1405 ~m_K8,
1406
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1408 m_K8 | m_GENERIC64,
1409
a646aded
UB
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1412 ~(m_386 | m_486),
1413
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
ddff69b9
MM
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1417
a646aded
UB
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1419 machines. */
ddff69b9
MM
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421
a646aded
UB
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 than a MOV. */
ddff69b9
MM
1424 m_PENT,
1425
a646aded
UB
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
ddff69b9
MM
1428 m_PENT,
1429
a646aded 1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
ddff69b9 1431 operand that cannot be represented using a modRM byte. The XOR
a646aded 1432 replacement is long decoded, so this split helps here as well. */
ddff69b9 1433 m_K6,
4845dbb5 1434
84fbffb2 1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
4e9d897d 1436 from integer to FP. */
4845dbb5 1437 m_AMDFAM10,
354f84af
UB
1438
1439 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1440 with a subsequent conditional jump instruction into a single
1441 compare-and-branch uop. */
1442 m_CORE2,
80fd744f
RH
1443};
1444
1445/* Feature tests against the various architecture variations. */
1446unsigned int ix86_arch_features[X86_ARCH_LAST] = {
0a1c5e55
UB
1447 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1448 ~(m_386 | m_486 | m_PENT | m_K6),
80fd744f
RH
1449
1450 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1451 ~m_386,
1452
1453 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1454 ~(m_386 | m_486),
1455
1456 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1457 ~m_386,
1458
1459 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1460 ~m_386,
1461};
1462
1463static const unsigned int x86_accumulate_outgoing_args
04e1d06b 1464 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
80fd744f
RH
1465
1466static const unsigned int x86_arch_always_fancy_math_387
04e1d06b 1467 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
80fd744f 1468 | m_NOCONA | m_CORE2 | m_GENERIC;
a269a03c 1469
8c996513
JH
1470static enum stringop_alg stringop_alg = no_stringop;
1471
d1f87653 1472/* In case the average insn count for single function invocation is
6ab16dd9
JH
1473 lower than this constant, emit fast (but longer) prologue and
1474 epilogue code. */
4977bab6 1475#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 1476
5bf0ebab
RH
1477/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1478static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1479static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1480static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
1481
1482/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 1483 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 1484
e075ae69 1485enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
1486{
1487 /* ax, dx, cx, bx */
ab408a86 1488 AREG, DREG, CREG, BREG,
4c0d89b5 1489 /* si, di, bp, sp */
e075ae69 1490 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
1491 /* FP registers */
1492 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 1493 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 1494 /* arg pointer */
83774849 1495 NON_Q_REGS,
b0d95de8
UB
1496 /* flags, fpsr, fpcr, frame */
1497 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
06f4e35d
L
1498 /* SSE registers */
1499 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
a7180f70 1500 SSE_REGS, SSE_REGS,
06f4e35d 1501 /* MMX registers */
a7180f70 1502 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30 1503 MMX_REGS, MMX_REGS,
06f4e35d 1504 /* REX registers */
3d117b30
JH
1505 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1506 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
06f4e35d 1507 /* SSE REX registers */
3d117b30
JH
1508 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1509 SSE_REGS, SSE_REGS,
4c0d89b5 1510};
c572e5ba 1511
3d117b30 1512/* The "default" register map used in 32bit mode. */
83774849 1513
0f290768 1514int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
1515{
1516 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1517 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
b0d95de8 1518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
a7180f70
BS
1519 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1520 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
1521 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1522 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
1523};
1524
5bf0ebab
RH
1525static int const x86_64_int_parameter_registers[6] =
1526{
1527 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1528 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1529};
1530
ccf8e764
RH
1531static int const x86_64_ms_abi_int_parameter_registers[4] =
1532{
1533 2 /*RCX*/, 1 /*RDX*/,
1534 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1535};
1536
5bf0ebab
RH
1537static int const x86_64_int_return_registers[4] =
1538{
ccf8e764 1539 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
5bf0ebab 1540};
53c17031 1541
0f7fa3d0
JH
1542/* The "default" register map used in 64bit mode. */
1543int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1544{
1545 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 1546 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
b0d95de8 1547 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
0f7fa3d0
JH
1548 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1549 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1550 8,9,10,11,12,13,14,15, /* extended integer registers */
1551 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1552};
1553
83774849
RH
1554/* Define the register numbers to be used in Dwarf debugging information.
1555 The SVR4 reference port C compiler uses the following register numbers
1556 in its Dwarf output code:
1557 0 for %eax (gcc regno = 0)
1558 1 for %ecx (gcc regno = 2)
1559 2 for %edx (gcc regno = 1)
1560 3 for %ebx (gcc regno = 3)
1561 4 for %esp (gcc regno = 7)
1562 5 for %ebp (gcc regno = 6)
1563 6 for %esi (gcc regno = 4)
1564 7 for %edi (gcc regno = 5)
1565 The following three DWARF register numbers are never generated by
1566 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1567 believes these numbers have these meanings.
1568 8 for %eip (no gcc equivalent)
1569 9 for %eflags (gcc regno = 17)
1570 10 for %trapno (no gcc equivalent)
1571 It is not at all clear how we should number the FP stack registers
1572 for the x86 architecture. If the version of SDB on x86/svr4 were
1573 a bit less brain dead with respect to floating-point then we would
1574 have a precedent to follow with respect to DWARF register numbers
1575 for x86 FP registers, but the SDB on x86/svr4 is so completely
1576 broken with respect to FP registers that it is hardly worth thinking
1577 of it as something to strive for compatibility with.
1578 The version of x86/svr4 SDB I have at the moment does (partially)
1579 seem to believe that DWARF register number 11 is associated with
1580 the x86 register %st(0), but that's about all. Higher DWARF
1581 register numbers don't seem to be associated with anything in
1582 particular, and even for DWARF regno 11, SDB only seems to under-
1583 stand that it should say that a variable lives in %st(0) (when
1584 asked via an `=' command) if we said it was in DWARF regno 11,
1585 but SDB still prints garbage when asked for the value of the
1586 variable in question (via a `/' command).
1587 (Also note that the labels SDB prints for various FP stack regs
1588 when doing an `x' command are all wrong.)
1589 Note that these problems generally don't affect the native SVR4
1590 C compiler because it doesn't allow the use of -O with -g and
1591 because when it is *not* optimizing, it allocates a memory
1592 location for each floating-point variable, and the memory
1593 location is what gets described in the DWARF AT_location
1594 attribute for the variable in question.
1595 Regardless of the severe mental illness of the x86/svr4 SDB, we
1596 do something sensible here and we use the following DWARF
1597 register numbers. Note that these are all stack-top-relative
1598 numbers.
1599 11 for %st(0) (gcc regno = 8)
1600 12 for %st(1) (gcc regno = 9)
1601 13 for %st(2) (gcc regno = 10)
1602 14 for %st(3) (gcc regno = 11)
1603 15 for %st(4) (gcc regno = 12)
1604 16 for %st(5) (gcc regno = 13)
1605 17 for %st(6) (gcc regno = 14)
1606 18 for %st(7) (gcc regno = 15)
1607*/
0f290768 1608int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
1609{
1610 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1611 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
b0d95de8 1612 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
a7180f70
BS
1613 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1614 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
1615 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1616 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
1617};
1618
c572e5ba
JVA
1619/* Test and compare insns in i386.md store the information needed to
1620 generate branch and scc insns here. */
1621
07933f72
GS
1622rtx ix86_compare_op0 = NULL_RTX;
1623rtx ix86_compare_op1 = NULL_RTX;
1ef45b77 1624rtx ix86_compare_emitted = NULL_RTX;
f5316dfe 1625
8362f420 1626/* Size of the register save area. */
7c800926 1627#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
36edd3cc
BS
1628
1629/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
1630
1631struct stack_local_entry GTY(())
1632{
1633 unsigned short mode;
1634 unsigned short n;
1635 rtx rtl;
1636 struct stack_local_entry *next;
1637};
1638
4dd2ac2c
JH
1639/* Structure describing stack frame layout.
1640 Stack grows downward:
1641
1642 [arguments]
1643 <- ARG_POINTER
1644 saved pc
1645
1646 saved frame pointer if frame_pointer_needed
1647 <- HARD_FRAME_POINTER
1648 [saved regs]
1649
1650 [padding1] \
1651 )
1652 [va_arg registers] (
1653 > to_allocate <- FRAME_POINTER
1654 [frame] (
1655 )
1656 [padding2] /
1657 */
1658struct ix86_frame
1659{
1660 int nregs;
1661 int padding1;
8362f420 1662 int va_arg_size;
4dd2ac2c
JH
1663 HOST_WIDE_INT frame;
1664 int padding2;
1665 int outgoing_arguments_size;
8362f420 1666 int red_zone_size;
4dd2ac2c
JH
1667
1668 HOST_WIDE_INT to_allocate;
1669 /* The offsets relative to ARG_POINTER. */
1670 HOST_WIDE_INT frame_pointer_offset;
1671 HOST_WIDE_INT hard_frame_pointer_offset;
1672 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
1673
1674 /* When save_regs_using_mov is set, emit prologue using
1675 move instead of push instructions. */
1676 bool save_regs_using_mov;
4dd2ac2c
JH
1677};
1678
55bea00a 1679/* Code model option. */
6189a572 1680enum cmodel ix86_cmodel;
80f33d06 1681/* Asm dialect. */
80f33d06 1682enum asm_dialect ix86_asm_dialect = ASM_ATT;
5bf5a10b 1683/* TLS dialects. */
f996902d 1684enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 1685
5bf0ebab 1686/* Which unit we are generating floating point math for. */
965f5423
JH
1687enum fpmath_unit ix86_fpmath;
1688
5bf0ebab 1689/* Which cpu are we scheduling for. */
9e555526 1690enum processor_type ix86_tune;
8691cea3 1691
5bf0ebab
RH
1692/* Which instruction set architecture to use. */
1693enum processor_type ix86_arch;
c8c5cb99 1694
f4365627
JH
1695/* true if sse prefetch instruction is not NOOP. */
1696int x86_prefetch_sse;
1697
e075ae69 1698/* ix86_regparm_string as a number */
6ac49599 1699static int ix86_regparm;
e9a25f70 1700
33932946
SH
1701/* -mstackrealign option */
1702extern int ix86_force_align_arg_pointer;
1703static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1704
999d3194
L
1705static rtx (*ix86_gen_leave) (void);
1706static rtx (*ix86_gen_pop1) (rtx);
1707static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1708static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1709static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1710static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1711static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1712
3af4bd89 1713/* Preferred alignment for stack boundary in bits. */
95899b34 1714unsigned int ix86_preferred_stack_boundary;
3af4bd89 1715
e9a25f70 1716/* Values 1-5: see jump.c */
e075ae69 1717int ix86_branch_cost;
623fe810 1718
7dcbf659
JH
1719/* Variables which are this size or smaller are put in the data/bss
1720 or ldata/lbss sections. */
1721
1722int ix86_section_threshold = 65536;
1723
623fe810 1724/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
8fe75e43
RH
1725char internal_label_prefix[16];
1726int internal_label_prefix_len;
e56feed6 1727
79f5e442
ZD
1728/* Fence to use after loop using movnt. */
1729tree x86_mfence;
1730
53c17031
JH
1731/* Register class used for passing given 64bit part of the argument.
1732 These represent classes as documented by the PS ABI, with the exception
1733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 1734 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 1735
d1f87653 1736 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2ed941ec 1737 whenever possible (upper half does contain padding). */
53c17031
JH
1738enum x86_64_reg_class
1739 {
1740 X86_64_NO_CLASS,
1741 X86_64_INTEGER_CLASS,
1742 X86_64_INTEGERSI_CLASS,
1743 X86_64_SSE_CLASS,
1744 X86_64_SSESF_CLASS,
1745 X86_64_SSEDF_CLASS,
1746 X86_64_SSEUP_CLASS,
1747 X86_64_X87_CLASS,
1748 X86_64_X87UP_CLASS,
499accd7 1749 X86_64_COMPLEX_X87_CLASS,
53c17031
JH
1750 X86_64_MEMORY_CLASS
1751 };
2ed941ec
RH
1752static const char * const x86_64_reg_class_name[] =
1753{
6c4ccfd8
RH
1754 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1755 "sseup", "x87", "x87up", "cplx87", "no"
1756};
53c17031
JH
1757
1758#define MAX_CLASSES 4
881b2a96 1759
43f3a59d 1760/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
1761static REAL_VALUE_TYPE ext_80387_constants_table [5];
1762static bool ext_80387_constants_init = 0;
cb1119b7 1763
2ed941ec
RH
1764\f
1765static struct machine_function * ix86_init_machine_status (void);
586de218 1766static rtx ix86_function_value (const_tree, const_tree, bool);
3101faab 1767static int ix86_function_regparm (const_tree, const_tree);
2ed941ec
RH
1768static void ix86_compute_frame_layout (struct ix86_frame *);
1769static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1770 rtx, rtx, int);
89c43c0a 1771
e075ae69 1772\f
67c2b45f
JS
1773/* The svr4 ABI for the i386 says that records and unions are returned
1774 in memory. */
1775#ifndef DEFAULT_PCC_STRUCT_RETURN
1776#define DEFAULT_PCC_STRUCT_RETURN 1
1777#endif
1778
0a1c5e55
UB
1779/* Bit flags that specify the ISA we are compiling for. */
1780int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1781
1782/* A mask of ix86_isa_flags that includes bit X if X
1783 was set or cleared on the command line. */
1784static int ix86_isa_flags_explicit;
1785
287a7d41
L
1786/* Define a set of ISAs which are available when a given ISA is
1787 enabled. MMX and SSE ISAs are handled separately. */
1788
1789#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1790#define OPTION_MASK_ISA_3DNOW_SET \
1791 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1792
1793#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1794#define OPTION_MASK_ISA_SSE2_SET \
1795 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1796#define OPTION_MASK_ISA_SSE3_SET \
1797 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1798#define OPTION_MASK_ISA_SSSE3_SET \
1799 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1800#define OPTION_MASK_ISA_SSE4_1_SET \
1801 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1802#define OPTION_MASK_ISA_SSE4_2_SET \
1803 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1804
1805/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1806 as -msse4.2. */
1807#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1808
1809#define OPTION_MASK_ISA_SSE4A_SET \
1810 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1811#define OPTION_MASK_ISA_SSE5_SET \
1812 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1813
1814/* Define a set of ISAs which aren't available when a given ISA is
1815 disabled. MMX and SSE ISAs are handled separately. */
3b8dd071
L
1816
1817#define OPTION_MASK_ISA_MMX_UNSET \
287a7d41
L
1818 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1819#define OPTION_MASK_ISA_3DNOW_UNSET \
1820 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1821#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
3b8dd071
L
1822
1823#define OPTION_MASK_ISA_SSE_UNSET \
287a7d41 1824 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
3b8dd071 1825#define OPTION_MASK_ISA_SSE2_UNSET \
287a7d41 1826 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
3b8dd071 1827#define OPTION_MASK_ISA_SSE3_UNSET \
287a7d41
L
1828 (OPTION_MASK_ISA_SSE3 \
1829 | OPTION_MASK_ISA_SSSE3_UNSET \
1830 | OPTION_MASK_ISA_SSE4A_UNSET )
3b8dd071 1831#define OPTION_MASK_ISA_SSSE3_UNSET \
287a7d41 1832 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
3b8dd071 1833#define OPTION_MASK_ISA_SSE4_1_UNSET \
287a7d41
L
1834 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1835#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
3b8dd071 1836
287a7d41
L
1837/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1838 as -mno-sse4.1. */
3b8dd071
L
1839#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1840
287a7d41
L
1841#define OPTION_MASK_ISA_SSE4A_UNSET \
1842 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
3b8dd071 1843
287a7d41 1844#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
04e1d06b 1845
a5ea943c
RG
1846/* Vectorization library interface and handlers. */
1847tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
9aba5d22 1848static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
a5ea943c
RG
1849static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1850
6ac49599
RS
1851/* Implement TARGET_HANDLE_OPTION. */
1852
1853static bool
55bea00a 1854ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
6ac49599
RS
1855{
1856 switch (code)
1857 {
0a1c5e55 1858 case OPT_mmmx:
287a7d41
L
1859 if (value)
1860 {
1861 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1863 }
1864 else
6ac49599 1865 {
3b8dd071
L
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
6ac49599
RS
1868 }
1869 return true;
1870
0a1c5e55 1871 case OPT_m3dnow:
287a7d41
L
1872 if (value)
1873 {
1874 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1876 }
1877 else
6ac49599 1878 {
3b8dd071
L
1879 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1880 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
6ac49599
RS
1881 }
1882 return true;
1883
0a1c5e55
UB
1884 case OPT_m3dnowa:
1885 return false;
1886
6ac49599 1887 case OPT_msse:
287a7d41
L
1888 if (value)
1889 {
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1892 }
1893 else
6ac49599 1894 {
3b8dd071
L
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
6ac49599
RS
1897 }
1898 return true;
1899
1900 case OPT_msse2:
287a7d41
L
1901 if (value)
1902 {
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1905 }
1906 else
6ac49599 1907 {
3b8dd071
L
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
21efb4d4
HJ
1910 }
1911 return true;
1912
1913 case OPT_msse3:
287a7d41
L
1914 if (value)
1915 {
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1918 }
1919 else
594dc048 1920 {
3b8dd071
L
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
594dc048
L
1923 }
1924 return true;
1925
1926 case OPT_mssse3:
287a7d41
L
1927 if (value)
1928 {
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1931 }
1932 else
9a5cee02 1933 {
3b8dd071
L
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
9a5cee02
L
1936 }
1937 return true;
1938
1939 case OPT_msse4_1:
287a7d41
L
1940 if (value)
1941 {
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1944 }
1945 else
21efb4d4 1946 {
3b8dd071
L
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1949 }
1950 return true;
1951
1952 case OPT_msse4_2:
287a7d41
L
1953 if (value)
1954 {
1955 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1956 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1957 }
1958 else
3b8dd071
L
1959 {
1960 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1961 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
6ac49599
RS
1962 }
1963 return true;
1964
3b8dd071 1965 case OPT_msse4:
287a7d41
L
1966 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1967 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
3b8dd071
L
1968 return true;
1969
1970 case OPT_mno_sse4:
1971 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1972 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1973 return true;
1974
9a5cee02 1975 case OPT_msse4a:
287a7d41
L
1976 if (value)
1977 {
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1980 }
1981 else
9a5cee02 1982 {
3b8dd071
L
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
9a5cee02
L
1985 }
1986 return true;
1987
04e1d06b 1988 case OPT_msse5:
287a7d41
L
1989 if (value)
1990 {
1991 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1992 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1993 }
1994 else
04e1d06b
MM
1995 {
1996 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1997 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1998 }
1999 return true;
2000
6ac49599
RS
2001 default:
2002 return true;
2003 }
2004}
2005
f5316dfe
MM
2006/* Sometimes certain combinations of command options do not make
2007 sense on a particular target machine. You can define a macro
2008 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2009 defined, is executed once just after all the command options have
2010 been parsed.
2011
2012 Don't use this macro to turn on various extra optimizations for
2013 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2014
2015void
b96a374d 2016override_options (void)
f5316dfe 2017{
400500c4 2018 int i;
3326f410 2019 int ix86_tune_defaulted = 0;
b26f6ed7 2020 int ix86_arch_specified = 0;
80fd744f 2021 unsigned int ix86_arch_mask, ix86_tune_mask;
3326f410 2022
e075ae69
RH
2023 /* Comes from final.c -- no real reason to change it. */
2024#define MAX_CODE_ALIGN 16
f5316dfe 2025
c8c5cb99
SC
2026 static struct ptt
2027 {
8b60264b 2028 const struct processor_costs *cost; /* Processor costs */
8b60264b 2029 const int align_loop; /* Default alignments. */
2cca7283 2030 const int align_loop_max_skip;
8b60264b 2031 const int align_jump;
2cca7283 2032 const int align_jump_max_skip;
8b60264b 2033 const int align_func;
e075ae69 2034 }
0f290768 2035 const processor_target_table[PROCESSOR_max] =
e075ae69 2036 {
0a1c5e55
UB
2037 {&i386_cost, 4, 3, 4, 3, 4},
2038 {&i486_cost, 16, 15, 16, 15, 16},
2039 {&pentium_cost, 16, 7, 16, 7, 16},
461a73b5 2040 {&pentiumpro_cost, 16, 15, 16, 10, 16},
0a1c5e55
UB
2041 {&geode_cost, 0, 0, 0, 0, 0},
2042 {&k6_cost, 32, 7, 32, 7, 32},
2043 {&athlon_cost, 16, 7, 16, 7, 16},
2044 {&pentium4_cost, 0, 0, 0, 0, 0},
2045 {&k8_cost, 16, 7, 16, 7, 16},
2046 {&nocona_cost, 0, 0, 0, 0, 0},
461a73b5 2047 {&core2_cost, 16, 10, 16, 10, 16},
0a1c5e55 2048 {&generic32_cost, 16, 7, 16, 7, 16},
461a73b5 2049 {&generic64_cost, 16, 10, 16, 10, 16},
0a1c5e55 2050 {&amdfam10_cost, 32, 24, 32, 7, 32}
e075ae69
RH
2051 };
2052
c2f17e19
UB
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2054 {
2055 "generic",
2056 "i386",
2057 "i486",
2058 "pentium",
2059 "pentium-mmx",
2060 "pentiumpro",
2061 "pentium2",
2062 "pentium3",
2063 "pentium4",
2064 "pentium-m",
2065 "prescott",
2066 "nocona",
2067 "core2",
2068 "geode",
2069 "k6",
2070 "k6-2",
2071 "k6-3",
2072 "athlon",
2073 "athlon-4",
2074 "k8",
2075 "amdfam10"
2076 };
2077
9415ab7d
TN
2078 enum pta_flags
2079 {
2080 PTA_SSE = 1 << 0,
2081 PTA_SSE2 = 1 << 1,
2082 PTA_SSE3 = 1 << 2,
2083 PTA_MMX = 1 << 3,
2084 PTA_PREFETCH_SSE = 1 << 4,
2085 PTA_3DNOW = 1 << 5,
2086 PTA_3DNOW_A = 1 << 6,
2087 PTA_64BIT = 1 << 7,
2088 PTA_SSSE3 = 1 << 8,
2089 PTA_CX16 = 1 << 9,
2090 PTA_POPCNT = 1 << 10,
2091 PTA_ABM = 1 << 11,
2092 PTA_SSE4A = 1 << 12,
2093 PTA_NO_SAHF = 1 << 13,
2094 PTA_SSE4_1 = 1 << 14,
04e1d06b 2095 PTA_SSE4_2 = 1 << 15,
8b96a312
L
2096 PTA_SSE5 = 1 << 16,
2097 PTA_AES = 1 << 17,
2098 PTA_PCLMUL = 1 << 18
9415ab7d
TN
2099 };
2100
e075ae69
RH
2101 static struct pta
2102 {
8b60264b
KG
2103 const char *const name; /* processor name or nickname. */
2104 const enum processor_type processor;
9415ab7d 2105 const unsigned /*enum pta_flags*/ flags;
e075ae69 2106 }
0f290768 2107 const processor_alias_table[] =
e075ae69 2108 {
0dd0e980
JH
2109 {"i386", PROCESSOR_I386, 0},
2110 {"i486", PROCESSOR_I486, 0},
2111 {"i586", PROCESSOR_PENTIUM, 0},
2112 {"pentium", PROCESSOR_PENTIUM, 0},
2113 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
2114 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2115 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2116 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0a1c5e55 2117 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
0dd0e980
JH
2118 {"i686", PROCESSOR_PENTIUMPRO, 0},
2119 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2120 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
0a1c5e55
UB
2121 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2122 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2123 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2124 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2125 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2126 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
4f3f76e6 2127 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
0a1c5e55
UB
2128 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2129 | PTA_CX16 | PTA_NO_SAHF)},
2130 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2131 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2132 | PTA_SSSE3
2133 | PTA_CX16)},
2134 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2135 |PTA_PREFETCH_SSE)},
0dd0e980
JH
2136 {"k6", PROCESSOR_K6, PTA_MMX},
2137 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2138 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
0a1c5e55
UB
2139 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2140 | PTA_PREFETCH_SSE)},
2141 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2142 | PTA_PREFETCH_SSE)},
2143 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2144 | PTA_SSE)},
2145 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2146 | PTA_SSE)},
2147 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2148 | PTA_SSE)},
2149 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2150 | PTA_MMX | PTA_SSE | PTA_SSE2
2151 | PTA_NO_SAHF)},
2152 {"k8", PROCESSOR_K8, (PTA_64BIT
2153 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2154 | PTA_SSE | PTA_SSE2
2155 | PTA_NO_SAHF)},
2156 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2157 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2158 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2159 | PTA_NO_SAHF)},
2160 {"opteron", PROCESSOR_K8, (PTA_64BIT
2161 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2162 | PTA_SSE | PTA_SSE2
2163 | PTA_NO_SAHF)},
2164 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2165 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2166 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2167 | PTA_NO_SAHF)},
2168 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2169 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2170 | PTA_SSE | PTA_SSE2
2171 | PTA_NO_SAHF)},
2172 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2175 | PTA_NO_SAHF)},
2176 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2177 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2178 | PTA_SSE | PTA_SSE2
2179 | PTA_NO_SAHF)},
2180 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2181 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2182 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2183 | PTA_SSE4A
2184 | PTA_CX16 | PTA_ABM)},
2185 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2186 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2187 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2188 | PTA_SSE4A
2189 | PTA_CX16 | PTA_ABM)},
d326eaf0
JH
2190 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2191 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
3af4bd89 2192 };
c8c5cb99 2193
ca7558fc 2194 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 2195
554707bd
DJ
2196#ifdef SUBTARGET_OVERRIDE_OPTIONS
2197 SUBTARGET_OVERRIDE_OPTIONS;
2198#endif
2199
f475fd3c
MS
2200#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2201 SUBSUBTARGET_OVERRIDE_OPTIONS;
2202#endif
2203
f7288899
EC
2204 /* -fPIC is the default for x86_64. */
2205 if (TARGET_MACHO && TARGET_64BIT)
2206 flag_pic = 2;
2207
41ed2237 2208 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 2209 in case they weren't overwritten by command line options. */
55ba61f3
JH
2210 if (TARGET_64BIT)
2211 {
f7288899 2212 /* Mach-O doesn't support omitting the frame pointer for now. */
55ba61f3 2213 if (flag_omit_frame_pointer == 2)
f7288899 2214 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
55ba61f3
JH
2215 if (flag_asynchronous_unwind_tables == 2)
2216 flag_asynchronous_unwind_tables = 1;
2217 if (flag_pcc_struct_return == 2)
2218 flag_pcc_struct_return = 0;
2219 }
2220 else
2221 {
2222 if (flag_omit_frame_pointer == 2)
2223 flag_omit_frame_pointer = 0;
2224 if (flag_asynchronous_unwind_tables == 2)
2225 flag_asynchronous_unwind_tables = 0;
2226 if (flag_pcc_struct_return == 2)
7c712dcc 2227 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
2228 }
2229
d326eaf0
JH
2230 /* Need to check -mtune=generic first. */
2231 if (ix86_tune_string)
3326f410 2232 {
d326eaf0 2233 if (!strcmp (ix86_tune_string, "generic")
fa959ce4
MM
2234 || !strcmp (ix86_tune_string, "i686")
2235 /* As special support for cross compilers we read -mtune=native
2236 as -mtune=generic. With native compilers we won't see the
2237 -mtune=native, as it was changed by the driver. */
2238 || !strcmp (ix86_tune_string, "native"))
d326eaf0
JH
2239 {
2240 if (TARGET_64BIT)
2241 ix86_tune_string = "generic64";
2242 else
2243 ix86_tune_string = "generic32";
2244 }
2245 else if (!strncmp (ix86_tune_string, "generic", 7))
2246 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
3326f410 2247 }
d326eaf0
JH
2248 else
2249 {
2250 if (ix86_arch_string)
2251 ix86_tune_string = ix86_arch_string;
2252 if (!ix86_tune_string)
2253 {
c2f17e19 2254 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
d326eaf0
JH
2255 ix86_tune_defaulted = 1;
2256 }
2257
2258 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2259 need to use a sensible tune option. */
2260 if (!strcmp (ix86_tune_string, "generic")
2261 || !strcmp (ix86_tune_string, "x86-64")
2262 || !strcmp (ix86_tune_string, "i686"))
2263 {
2264 if (TARGET_64BIT)
2265 ix86_tune_string = "generic64";
2266 else
2267 ix86_tune_string = "generic32";
2268 }
2269 }
8c996513
JH
2270 if (ix86_stringop_string)
2271 {
2272 if (!strcmp (ix86_stringop_string, "rep_byte"))
2273 stringop_alg = rep_prefix_1_byte;
2274 else if (!strcmp (ix86_stringop_string, "libcall"))
2275 stringop_alg = libcall;
2276 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2277 stringop_alg = rep_prefix_4_byte;
2278 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2279 stringop_alg = rep_prefix_8_byte;
2280 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2281 stringop_alg = loop_1_byte;
2282 else if (!strcmp (ix86_stringop_string, "loop"))
2283 stringop_alg = loop;
2284 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2285 stringop_alg = unrolled_loop;
2286 else
2287 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2288 }
d326eaf0
JH
2289 if (!strcmp (ix86_tune_string, "x86-64"))
2290 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2291 "-mtune=generic instead as appropriate.");
2292
f4365627 2293 if (!ix86_arch_string)
3fec9fa9 2294 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
b26f6ed7
EC
2295 else
2296 ix86_arch_specified = 1;
4f3f76e6 2297
d326eaf0
JH
2298 if (!strcmp (ix86_arch_string, "generic"))
2299 error ("generic CPU can be used only for -mtune= switch");
2300 if (!strncmp (ix86_arch_string, "generic", 7))
2301 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 2302
6189a572
JH
2303 if (ix86_cmodel_string != 0)
2304 {
2305 if (!strcmp (ix86_cmodel_string, "small"))
2306 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
7dcbf659
JH
2307 else if (!strcmp (ix86_cmodel_string, "medium"))
2308 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
dc4d7240
JH
2309 else if (!strcmp (ix86_cmodel_string, "large"))
2310 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
6189a572 2311 else if (flag_pic)
dc4d7240 2312 error ("code model %s does not support PIC mode", ix86_cmodel_string);
6189a572
JH
2313 else if (!strcmp (ix86_cmodel_string, "32"))
2314 ix86_cmodel = CM_32;
2315 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2316 ix86_cmodel = CM_KERNEL;
6189a572
JH
2317 else
2318 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2319 }
2320 else
2321 {
7c800926 2322 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
ccf8e764
RH
2323 use of rip-relative addressing. This eliminates fixups that
2324 would otherwise be needed if this object is to be placed in a
2325 DLL, and is essentially just as efficient as direct addressing. */
7c800926 2326 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
ccf8e764
RH
2327 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2328 else if (TARGET_64BIT)
6189a572 2329 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
ccf8e764
RH
2330 else
2331 ix86_cmodel = CM_32;
6189a572 2332 }
c93e80a5
JH
2333 if (ix86_asm_string != 0)
2334 {
1f4c2c57
MS
2335 if (! TARGET_MACHO
2336 && !strcmp (ix86_asm_string, "intel"))
c93e80a5
JH
2337 ix86_asm_dialect = ASM_INTEL;
2338 else if (!strcmp (ix86_asm_string, "att"))
2339 ix86_asm_dialect = ASM_ATT;
2340 else
2341 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2342 }
6189a572 2343 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
9e637a26 2344 error ("code model %qs not supported in the %s bit mode",
6189a572 2345 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
853a33f3 2346 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
c725bd79 2347 sorry ("%i-bit mode not compiled in",
853a33f3 2348 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
6189a572 2349
f4365627
JH
2350 for (i = 0; i < pta_size; i++)
2351 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2352 {
2353 ix86_arch = processor_alias_table[i].processor;
2354 /* Default cpu tuning to the architecture. */
9e555526 2355 ix86_tune = ix86_arch;
0a1c5e55
UB
2356
2357 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2358 error ("CPU you selected does not support x86-64 "
2359 "instruction set");
2360
f4365627 2361 if (processor_alias_table[i].flags & PTA_MMX
853a33f3
UB
2362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2363 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
f4365627 2364 if (processor_alias_table[i].flags & PTA_3DNOW
853a33f3
UB
2365 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2366 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
f4365627 2367 if (processor_alias_table[i].flags & PTA_3DNOW_A
853a33f3
UB
2368 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2369 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
f4365627 2370 if (processor_alias_table[i].flags & PTA_SSE
853a33f3
UB
2371 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2372 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
f4365627 2373 if (processor_alias_table[i].flags & PTA_SSE2
853a33f3
UB
2374 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2375 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
5bbeea44 2376 if (processor_alias_table[i].flags & PTA_SSE3
853a33f3
UB
2377 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2378 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
b1875f52 2379 if (processor_alias_table[i].flags & PTA_SSSE3
853a33f3
UB
2380 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2381 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
9a5cee02 2382 if (processor_alias_table[i].flags & PTA_SSE4_1
853a33f3
UB
2383 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2384 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3b8dd071
L
2385 if (processor_alias_table[i].flags & PTA_SSE4_2
2386 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2387 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
0a1c5e55 2388 if (processor_alias_table[i].flags & PTA_SSE4A
853a33f3
UB
2389 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2390 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
04e1d06b
MM
2391 if (processor_alias_table[i].flags & PTA_SSE5
2392 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2393 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
0a1c5e55
UB
2394
2395 if (processor_alias_table[i].flags & PTA_ABM)
2396 x86_abm = true;
15a26abf
JJ
2397 if (processor_alias_table[i].flags & PTA_CX16)
2398 x86_cmpxchg16b = true;
0a1c5e55
UB
2399 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2400 x86_popcnt = true;
2401 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2402 x86_prefetch_sse = true;
9064c533 2403 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
3c2d980c 2404 x86_sahf = true;
8b96a312
L
2405 if (processor_alias_table[i].flags & PTA_AES)
2406 x86_aes = true;
2407 if (processor_alias_table[i].flags & PTA_PCLMUL)
2408 x86_pclmul = true;
0a1c5e55 2409
6716ecbc
JM
2410 break;
2411 }
2412
2413 if (i == pta_size)
2414 error ("bad value (%s) for -march= switch", ix86_arch_string);
2415
80fd744f
RH
2416 ix86_arch_mask = 1u << ix86_arch;
2417 for (i = 0; i < X86_ARCH_LAST; ++i)
2418 ix86_arch_features[i] &= ix86_arch_mask;
2419
6716ecbc
JM
2420 for (i = 0; i < pta_size; i++)
2421 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2422 {
2423 ix86_tune = processor_alias_table[i].processor;
4977bab6 2424 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
2425 {
2426 if (ix86_tune_defaulted)
2427 {
2428 ix86_tune_string = "x86-64";
2429 for (i = 0; i < pta_size; i++)
2430 if (! strcmp (ix86_tune_string,
2431 processor_alias_table[i].name))
2432 break;
2433 ix86_tune = processor_alias_table[i].processor;
2434 }
2435 else
2436 error ("CPU you selected does not support x86-64 "
2437 "instruction set");
2438 }
c618c6ec
JJ
2439 /* Intel CPUs have always interpreted SSE prefetch instructions as
2440 NOPs; so, we can enable SSE prefetch instructions even when
2441 -mtune (rather than -march) points us to a processor that has them.
2442 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2443 higher processors. */
0a1c5e55
UB
2444 if (TARGET_CMOVE
2445 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
c618c6ec 2446 x86_prefetch_sse = true;
f4365627
JH
2447 break;
2448 }
f4365627 2449 if (i == pta_size)
9e555526 2450 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 2451
8b96a312
L
2452 /* Enable SSE2 if AES or PCLMUL is enabled. */
2453 if ((x86_aes || x86_pclmul)
2454 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2455 {
2456 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2457 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2458 }
2459
80fd744f
RH
2460 ix86_tune_mask = 1u << ix86_tune;
2461 for (i = 0; i < X86_TUNE_LAST; ++i)
2462 ix86_tune_features[i] &= ix86_tune_mask;
0fa4c370 2463
2ab0437e
JH
2464 if (optimize_size)
2465 ix86_cost = &size_cost;
2466 else
9e555526 2467 ix86_cost = processor_target_table[ix86_tune].cost;
e075ae69 2468
36edd3cc
BS
2469 /* Arrange to set up i386_stack_locals for all functions. */
2470 init_machine_status = ix86_init_machine_status;
fce5a9f2 2471
0f290768 2472 /* Validate -mregparm= value. */
e075ae69 2473 if (ix86_regparm_string)
b08de47e 2474 {
ccf8e764
RH
2475 if (TARGET_64BIT)
2476 warning (0, "-mregparm is ignored in 64-bit mode");
400500c4
RK
2477 i = atoi (ix86_regparm_string);
2478 if (i < 0 || i > REGPARM_MAX)
2479 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2480 else
2481 ix86_regparm = i;
b08de47e 2482 }
ccf8e764
RH
2483 if (TARGET_64BIT)
2484 ix86_regparm = REGPARM_MAX;
b08de47e 2485
3e18fdf6 2486 /* If the user has provided any of the -malign-* options,
a4f31c00 2487 warn and use that value only if -falign-* is not set.
3e18fdf6 2488 Remove this code in GCC 3.2 or later. */
e075ae69 2489 if (ix86_align_loops_string)
b08de47e 2490 {
d4ee4d25 2491 warning (0, "-malign-loops is obsolete, use -falign-loops");
3e18fdf6
GK
2492 if (align_loops == 0)
2493 {
2494 i = atoi (ix86_align_loops_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2497 else
2498 align_loops = 1 << i;
2499 }
b08de47e 2500 }
3af4bd89 2501
e075ae69 2502 if (ix86_align_jumps_string)
b08de47e 2503 {
d4ee4d25 2504 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
3e18fdf6
GK
2505 if (align_jumps == 0)
2506 {
2507 i = atoi (ix86_align_jumps_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2510 else
2511 align_jumps = 1 << i;
2512 }
b08de47e 2513 }
b08de47e 2514
e075ae69 2515 if (ix86_align_funcs_string)
b08de47e 2516 {
d4ee4d25 2517 warning (0, "-malign-functions is obsolete, use -falign-functions");
3e18fdf6
GK
2518 if (align_functions == 0)
2519 {
2520 i = atoi (ix86_align_funcs_string);
2521 if (i < 0 || i > MAX_CODE_ALIGN)
2522 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2523 else
2524 align_functions = 1 << i;
2525 }
b08de47e 2526 }
3af4bd89 2527
3e18fdf6 2528 /* Default align_* from the processor table. */
3e18fdf6 2529 if (align_loops == 0)
2cca7283 2530 {
9e555526
RH
2531 align_loops = processor_target_table[ix86_tune].align_loop;
2532 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 2533 }
3e18fdf6 2534 if (align_jumps == 0)
2cca7283 2535 {
9e555526
RH
2536 align_jumps = processor_target_table[ix86_tune].align_jump;
2537 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 2538 }
3e18fdf6 2539 if (align_functions == 0)
2cca7283 2540 {
9e555526 2541 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 2542 }
3e18fdf6 2543
0f290768 2544 /* Validate -mbranch-cost= value, or provide default. */
3dd0df7f 2545 ix86_branch_cost = ix86_cost->branch_cost;
e075ae69 2546 if (ix86_branch_cost_string)
804a8ee0 2547 {
400500c4
RK
2548 i = atoi (ix86_branch_cost_string);
2549 if (i < 0 || i > 5)
2550 error ("-mbranch-cost=%d is not between 0 and 5", i);
2551 else
2552 ix86_branch_cost = i;
804a8ee0 2553 }
7dcbf659
JH
2554 if (ix86_section_threshold_string)
2555 {
2556 i = atoi (ix86_section_threshold_string);
2557 if (i < 0)
2558 error ("-mlarge-data-threshold=%d is negative", i);
2559 else
2560 ix86_section_threshold = i;
2561 }
804a8ee0 2562
f996902d
RH
2563 if (ix86_tls_dialect_string)
2564 {
2565 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2566 ix86_tls_dialect = TLS_DIALECT_GNU;
5bf5a10b
AO
2567 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2568 ix86_tls_dialect = TLS_DIALECT_GNU2;
f996902d
RH
2569 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2570 ix86_tls_dialect = TLS_DIALECT_SUN;
2571 else
2572 error ("bad value (%s) for -mtls-dialect= switch",
2573 ix86_tls_dialect_string);
2574 }
2575
577565f9
UB
2576 if (ix87_precision_string)
2577 {
2578 i = atoi (ix87_precision_string);
2579 if (i != 32 && i != 64 && i != 80)
2580 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2581 }
2582
d6b0b376
EC
2583 if (TARGET_64BIT)
2584 {
2585 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2586
2587 /* Enable by default the SSE and MMX builtins. Do allow the user to
2588 explicitly disable any of these. In particular, disabling SSE and
2589 MMX for kernel code is extremely useful. */
b26f6ed7 2590 if (!ix86_arch_specified)
d6b0b376
EC
2591 ix86_isa_flags
2592 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2593 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2594
2595 if (TARGET_RTD)
2596 warning (0, "-mrtd is ignored in 64bit mode");
2597 }
2598 else
2599 {
2600 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2601
b26f6ed7 2602 if (!ix86_arch_specified)
d6b0b376
EC
2603 ix86_isa_flags
2604 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2605
2606 /* i386 ABI does not specify red zone. It still makes sense to use it
2607 when programmer takes care to stack from being destroyed. */
2608 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2609 target_flags |= MASK_NO_RED_ZONE;
2610 }
2611
e9a25f70 2612 /* Keep nonleaf frame pointers. */
14c473b9
RS
2613 if (flag_omit_frame_pointer)
2614 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2615 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 2616 flag_omit_frame_pointer = 1;
e075ae69
RH
2617
2618 /* If we're doing fast math, we don't care about comparison order
2619 wrt NaNs. This lets us use a shorter comparison sequence. */
5a4171a0 2620 if (flag_finite_math_only)
e075ae69
RH
2621 target_flags &= ~MASK_IEEE_FP;
2622
30c99a84
RH
2623 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2624 since the insns won't need emulation. */
e39e8c36 2625 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
30c99a84
RH
2626 target_flags &= ~MASK_NO_FANCY_MATH_387;
2627
ba2baa55 2628 /* Likewise, if the target doesn't have a 387, or we've specified
0fa2e4df 2629 software floating point, don't use 387 inline intrinsics. */
ba2baa55
RS
2630 if (!TARGET_80387)
2631 target_flags |= MASK_NO_FANCY_MATH_387;
2632
a5370cf0
RH
2633 /* Turn on MMX builtins for -msse. */
2634 if (TARGET_SSE)
2635 {
853a33f3 2636 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
a5370cf0
RH
2637 x86_prefetch_sse = true;
2638 }
2639
837a8954
UB
2640 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2641 if (TARGET_SSE4_2 || TARGET_ABM)
0a1c5e55 2642 x86_popcnt = true;
21efb4d4 2643
d0655f33 2644 /* Validate -mpreferred-stack-boundary= value, or provide default.
1395ea39
L
2645 The default of 128 bits is for Pentium III's SSE __m128. We can't
2646 change it because of optimize_size. Otherwise, we can't mix object
2647 files compiled with -Os and -On. */
2648 ix86_preferred_stack_boundary = 128;
d0655f33
JM
2649 if (ix86_preferred_stack_boundary_string)
2650 {
2651 i = atoi (ix86_preferred_stack_boundary_string);
2652 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2653 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2654 TARGET_64BIT ? 4 : 2);
2655 else
2656 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2657 }
2658
1f97667f
RG
2659 /* Accept -msseregparm only if at least SSE support is enabled. */
2660 if (TARGET_SSEREGPARM
2661 && ! TARGET_SSE)
2662 error ("-msseregparm used without SSE enabled");
2663
004d3859 2664 ix86_fpmath = TARGET_FPMATH_DEFAULT;
965f5423
JH
2665 if (ix86_fpmath_string != 0)
2666 {
2667 if (! strcmp (ix86_fpmath_string, "387"))
2668 ix86_fpmath = FPMATH_387;
2669 else if (! strcmp (ix86_fpmath_string, "sse"))
2670 {
2671 if (!TARGET_SSE)
2672 {
d4ee4d25 2673 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
2674 ix86_fpmath = FPMATH_387;
2675 }
2676 else
2677 ix86_fpmath = FPMATH_SSE;
2678 }
2679 else if (! strcmp (ix86_fpmath_string, "387,sse")
2680 || ! strcmp (ix86_fpmath_string, "sse,387"))
2681 {
2682 if (!TARGET_SSE)
2683 {
d4ee4d25 2684 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
2685 ix86_fpmath = FPMATH_387;
2686 }
2687 else if (!TARGET_80387)
2688 {
d4ee4d25 2689 warning (0, "387 instruction set disabled, using SSE arithmetics");
965f5423
JH
2690 ix86_fpmath = FPMATH_SSE;
2691 }
2692 else
9415ab7d 2693 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
965f5423 2694 }
fce5a9f2 2695 else
965f5423
JH
2696 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2697 }
14f73b5a 2698
de004e6d
JS
2699 /* If the i387 is disabled, then do not return values in it. */
2700 if (!TARGET_80387)
2701 target_flags &= ~MASK_FLOAT_RETURNS;
2702
a5ea943c
RG
2703 /* Use external vectorized library in vectorizing intrinsics. */
2704 if (ix86_veclibabi_string)
2705 {
9aba5d22
UB
2706 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2707 ix86_veclib_handler = ix86_veclibabi_svml;
2708 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
a5ea943c
RG
2709 ix86_veclib_handler = ix86_veclibabi_acml;
2710 else
2711 error ("unknown vectorization library ABI type (%s) for "
2712 "-mveclibabi= switch", ix86_veclibabi_string);
2713 }
2714
e39e8c36 2715 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
9ef1b13a 2716 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
2717 && !optimize_size)
2718 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810 2719
d3073c70
RH
2720 /* ??? Unwind info is not correct around the CFG unless either a frame
2721 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2722 unwind info generation to be aware of the CFG and propagating states
2723 around edges. */
2724 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2725 || flag_exceptions || flag_non_call_exceptions)
2726 && flag_omit_frame_pointer
2727 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2728 {
2729 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2730 warning (0, "unwind tables currently require either a frame pointer "
2731 "or -maccumulate-outgoing-args for correctness");
2732 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2733 }
2734
2c7c6f54
JM
2735 /* If stack probes are required, the space used for large function
2736 arguments on the stack must also be probed, so enable
2737 -maccumulate-outgoing-args so this happens in the prologue. */
2738 if (TARGET_STACK_PROBE
2739 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2740 {
2741 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2742 warning (0, "stack probing requires -maccumulate-outgoing-args "
2743 "for correctness");
2744 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2745 }
2746
80fd744f
RH
2747 /* For sane SSE instruction set generation we need fcomi instruction.
2748 It is safe to enable all CMOVE instructions. */
2749 if (TARGET_SSE)
2750 TARGET_CMOVE = 1;
2751
623fe810
RH
2752 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2753 {
2754 char *p;
2755 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2756 p = strchr (internal_label_prefix, 'X');
2757 internal_label_prefix_len = p - internal_label_prefix;
2758 *p = '\0';
2759 }
a5370cf0
RH
2760
2761 /* When scheduling description is not available, disable scheduler pass
2762 so it won't slow down the compilation and make x87 code slower. */
ad7b96a9
JH
2763 if (!TARGET_SCHEDULE)
2764 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
47eb5b32
ZD
2765
2766 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2767 set_param_value ("simultaneous-prefetches",
2768 ix86_cost->simultaneous_prefetches);
2769 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2770 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
46cb0441
ZD
2771 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2772 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2773 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2774 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
d7bd8aeb
JJ
2775
2776 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
7c800926
KT
2777 can be optimized to ap = __builtin_next_arg (0).
2778 For abi switching it should be corrected. */
2779 if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
d7bd8aeb 2780 targetm.expand_builtin_va_start = NULL;
922e3e33 2781
999d3194
L
2782 if (TARGET_64BIT)
2783 {
2784 ix86_gen_leave = gen_leave_rex64;
2785 ix86_gen_pop1 = gen_popdi1;
2786 ix86_gen_add3 = gen_adddi3;
2787 ix86_gen_sub3 = gen_subdi3;
2788 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
2789 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
2790 ix86_gen_monitor = gen_sse3_monitor64;
2791 }
2792 else
2793 {
2794 ix86_gen_leave = gen_leave;
2795 ix86_gen_pop1 = gen_popsi1;
2796 ix86_gen_add3 = gen_addsi3;
2797 ix86_gen_sub3 = gen_subsi3;
2798 ix86_gen_sub3_carry = gen_subsi3_carry;
2799 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
2800 ix86_gen_monitor = gen_sse3_monitor;
2801 }
2802
922e3e33
UB
2803#ifdef USE_IX86_CLD
2804 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
2805 if (!TARGET_64BIT)
2806 target_flags |= MASK_CLD & ~target_flags_explicit;
2807#endif
f5316dfe
MM
2808}
2809\f
2ed941ec
RH
2810/* Return true if this goes in large data/bss. */
2811
2812static bool
2813ix86_in_large_data_p (tree exp)
2814{
2815 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2816 return false;
2817
2818 /* Functions are never large data. */
2819 if (TREE_CODE (exp) == FUNCTION_DECL)
2820 return false;
2821
2822 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2823 {
2824 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2825 if (strcmp (section, ".ldata") == 0
2826 || strcmp (section, ".lbss") == 0)
2827 return true;
2828 return false;
2829 }
2830 else
2831 {
2832 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2833
2834 /* If this is an incomplete type with size 0, then we can't put it
2835 in data because it might be too big when completed. */
2836 if (!size || size > ix86_section_threshold)
2837 return true;
2838 }
2839
2840 return false;
2841}
2842
2843/* Switch to the appropriate section for output of DECL.
7dcbf659
JH
2844 DECL is either a `VAR_DECL' node or a constant of some sort.
2845 RELOC indicates whether forming the initial value of DECL requires
2846 link-time relocations. */
2847
2ed941ec
RH
2848static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2849 ATTRIBUTE_UNUSED;
2850
d6b5193b 2851static section *
7dcbf659 2852x86_64_elf_select_section (tree decl, int reloc,
d6b5193b 2853 unsigned HOST_WIDE_INT align)
7dcbf659
JH
2854{
2855 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2856 && ix86_in_large_data_p (decl))
2857 {
2858 const char *sname = NULL;
3b10d286 2859 unsigned int flags = SECTION_WRITE;
9b580a0b 2860 switch (categorize_decl_for_section (decl, reloc))
7dcbf659
JH
2861 {
2862 case SECCAT_DATA:
2863 sname = ".ldata";
2864 break;
2865 case SECCAT_DATA_REL:
2866 sname = ".ldata.rel";
2867 break;
2868 case SECCAT_DATA_REL_LOCAL:
2869 sname = ".ldata.rel.local";
2870 break;
2871 case SECCAT_DATA_REL_RO:
2872 sname = ".ldata.rel.ro";
2873 break;
2874 case SECCAT_DATA_REL_RO_LOCAL:
2875 sname = ".ldata.rel.ro.local";
2876 break;
2877 case SECCAT_BSS:
2878 sname = ".lbss";
3b10d286 2879 flags |= SECTION_BSS;
7dcbf659
JH
2880 break;
2881 case SECCAT_RODATA:
2882 case SECCAT_RODATA_MERGE_STR:
2883 case SECCAT_RODATA_MERGE_STR_INIT:
2884 case SECCAT_RODATA_MERGE_CONST:
2885 sname = ".lrodata";
3b10d286 2886 flags = 0;
7dcbf659
JH
2887 break;
2888 case SECCAT_SRODATA:
2889 case SECCAT_SDATA:
2890 case SECCAT_SBSS:
2891 gcc_unreachable ();
2892 case SECCAT_TEXT:
2893 case SECCAT_TDATA:
2894 case SECCAT_TBSS:
2895 /* We don't split these for medium model. Place them into
2896 default sections and hope for best. */
2897 break;
feb60f03
NS
2898 case SECCAT_EMUTLS_VAR:
2899 case SECCAT_EMUTLS_TMPL:
2900 gcc_unreachable ();
7dcbf659
JH
2901 }
2902 if (sname)
3b10d286
JJ
2903 {
2904 /* We might get called with string constants, but get_named_section
2905 doesn't like them as they are not DECLs. Also, we need to set
2906 flags in that case. */
2907 if (!DECL_P (decl))
2908 return get_section (sname, flags, NULL);
2909 return get_named_section (decl, sname, reloc);
2910 }
7dcbf659 2911 }
d6b5193b 2912 return default_elf_select_section (decl, reloc, align);
7dcbf659
JH
2913}
2914
2915/* Build up a unique section name, expressed as a
2916 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2917 RELOC indicates whether the initial value of EXP requires
2918 link-time relocations. */
2919
2ed941ec 2920static void ATTRIBUTE_UNUSED
7dcbf659
JH
2921x86_64_elf_unique_section (tree decl, int reloc)
2922{
2923 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2924 && ix86_in_large_data_p (decl))
2925 {
2926 const char *prefix = NULL;
2927 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2928 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2929
9b580a0b 2930 switch (categorize_decl_for_section (decl, reloc))
7dcbf659
JH
2931 {
2932 case SECCAT_DATA:
2933 case SECCAT_DATA_REL:
2934 case SECCAT_DATA_REL_LOCAL:
2935 case SECCAT_DATA_REL_RO:
2936 case SECCAT_DATA_REL_RO_LOCAL:
feb60f03 2937 prefix = one_only ? ".ld" : ".ldata";
7dcbf659
JH
2938 break;
2939 case SECCAT_BSS:
feb60f03 2940 prefix = one_only ? ".lb" : ".lbss";
7dcbf659
JH
2941 break;
2942 case SECCAT_RODATA:
2943 case SECCAT_RODATA_MERGE_STR:
2944 case SECCAT_RODATA_MERGE_STR_INIT:
2945 case SECCAT_RODATA_MERGE_CONST:
feb60f03 2946 prefix = one_only ? ".lr" : ".lrodata";
7dcbf659
JH
2947 break;
2948 case SECCAT_SRODATA:
2949 case SECCAT_SDATA:
2950 case SECCAT_SBSS:
2951 gcc_unreachable ();
2952 case SECCAT_TEXT:
2953 case SECCAT_TDATA:
2954 case SECCAT_TBSS:
2955 /* We don't split these for medium model. Place them into
2956 default sections and hope for best. */
2957 break;
feb60f03
NS
2958 case SECCAT_EMUTLS_VAR:
2959 prefix = targetm.emutls.var_section;
2960 break;
2961 case SECCAT_EMUTLS_TMPL:
2962 prefix = targetm.emutls.tmpl_section;
2963 break;
7dcbf659
JH
2964 }
2965 if (prefix)
2966 {
feb60f03 2967 const char *name, *linkonce;
7dcbf659 2968 char *string;
7dcbf659
JH
2969
2970 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2971 name = targetm.strip_name_encoding (name);
feb60f03
NS
2972
2973 /* If we're using one_only, then there needs to be a .gnu.linkonce
2974 prefix to the section name. */
2975 linkonce = one_only ? ".gnu.linkonce" : "";
2976
2977 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2978
2979 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
7dcbf659
JH
2980 return;
2981 }
2982 }
2983 default_unique_section (decl, reloc);
2984}
2985
e81d37df 2986#ifdef COMMON_ASM_OP
7dcbf659
JH
2987/* This says how to output assembler code to declare an
2988 uninitialized external linkage data object.
2989
569b7f6a 2990 For medium model x86-64 we need to use .largecomm opcode for
7dcbf659
JH
2991 large objects. */
2992void
2993x86_elf_aligned_common (FILE *file,
2994 const char *name, unsigned HOST_WIDE_INT size,
2995 int align)
2996{
2997 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2998 && size > (unsigned int)ix86_section_threshold)
2999 fprintf (file, ".largecomm\t");
3000 else
3001 fprintf (file, "%s", COMMON_ASM_OP);
3002 assemble_name (file, name);
3003 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3004 size, align / BITS_PER_UNIT);
3005}
29c08d7c 3006#endif
2ed941ec 3007
7dcbf659
JH
3008/* Utility function for targets to use in implementing
3009 ASM_OUTPUT_ALIGNED_BSS. */
3010
3011void
3012x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3013 const char *name, unsigned HOST_WIDE_INT size,
3014 int align)
3015{
3016 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3017 && size > (unsigned int)ix86_section_threshold)
d6b5193b 3018 switch_to_section (get_named_section (decl, ".lbss", 0));
7dcbf659 3019 else
d6b5193b 3020 switch_to_section (bss_section);
7dcbf659
JH
3021 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3022#ifdef ASM_DECLARE_OBJECT_NAME
3023 last_assemble_variable_decl = decl;
3024 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3025#else
3026 /* Standard thing is just output label for the object. */
3027 ASM_OUTPUT_LABEL (file, name);
3028#endif /* ASM_DECLARE_OBJECT_NAME */
3029 ASM_OUTPUT_SKIP (file, size ? size : 1);
3030}
3031\f
32b5b1aa 3032void
b96a374d 3033optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 3034{
e9a25f70
JL
3035 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3036 make the problem with not enough registers even worse. */
32b5b1aa
SC
3037#ifdef INSN_SCHEDULING
3038 if (level > 1)
3039 flag_schedule_insns = 0;
3040#endif
55ba61f3 3041
2e3f0db6
DJ
3042 if (TARGET_MACHO)
3043 /* The Darwin libraries never set errno, so we might as well
3044 avoid calling them when that's the only reason we would. */
3045 flag_errno_math = 0;
3046
55ba61f3
JH
3047 /* The default values of these switches depend on the TARGET_64BIT
3048 that is not known at this moment. Mark these values with 2 and
3049 let user the to override these. In case there is no command line option
3050 specifying them, we will set the defaults in override_options. */
3051 if (optimize >= 1)
3052 flag_omit_frame_pointer = 2;
3053 flag_pcc_struct_return = 2;
3054 flag_asynchronous_unwind_tables = 2;
32070c7b 3055 flag_vect_cost_model = 1;
4f514514
JM
3056#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3057 SUBTARGET_OPTIMIZATION_OPTIONS;
3058#endif
32b5b1aa 3059}
b08de47e 3060\f
5fbf0217
EB
3061/* Decide whether we can make a sibling call to a function. DECL is the
3062 declaration of the function being targeted by the call and EXP is the
3063 CALL_EXPR representing the call. */
4977bab6
ZW
3064
3065static bool
b96a374d 3066ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6 3067{
f19e3a64 3068 tree func;
cb1119b7 3069 rtx a, b;
f19e3a64 3070
4977bab6
ZW
3071 /* If we are generating position-independent code, we cannot sibcall
3072 optimize any indirect call, or a direct call to a global function,
3073 as the PLT requires %ebx be live. */
010ef110 3074 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4977bab6
ZW
3075 return false;
3076
f19e3a64
JJ
3077 if (decl)
3078 func = decl;
3079 else
cb1119b7 3080 {
5039610b 3081 func = TREE_TYPE (CALL_EXPR_FN (exp));
cb1119b7
RG
3082 if (POINTER_TYPE_P (func))
3083 func = TREE_TYPE (func);
3084 }
f19e3a64 3085
cb1119b7
RG
3086 /* Check that the return value locations are the same. Like
3087 if we are returning floats on the 80387 register stack, we cannot
4977bab6 3088 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
3089 function that does or, conversely, from a function that does return
3090 a float to a function that doesn't; the necessary stack adjustment
cb1119b7 3091 would not be executed. This is also the place we notice
cac32996
RG
3092 differences in the return value ABI. Note that it is ok for one
3093 of the functions to have void return type as long as the return
3094 value of the other is passed in a register. */
cb1119b7
RG
3095 a = ix86_function_value (TREE_TYPE (exp), func, false);
3096 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3097 cfun->decl, false);
5d3018ce
RH
3098 if (STACK_REG_P (a) || STACK_REG_P (b))
3099 {
3100 if (!rtx_equal_p (a, b))
3101 return false;
3102 }
3103 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3104 ;
3105 else if (!rtx_equal_p (a, b))
4977bab6
ZW
3106 return false;
3107
3108 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 3109 register for the address of the target function. Make sure that all
4977bab6
ZW
3110 such registers are not used for passing parameters. */
3111 if (!decl && !TARGET_64BIT)
3112 {
e767b5be 3113 tree type;
4977bab6
ZW
3114
3115 /* We're looking at the CALL_EXPR, we need the type of the function. */
5039610b 3116 type = CALL_EXPR_FN (exp); /* pointer expression */
4977bab6
ZW
3117 type = TREE_TYPE (type); /* pointer type */
3118 type = TREE_TYPE (type); /* function type */
3119
e767b5be 3120 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
3121 {
3122 /* ??? Need to count the actual number of registers to be used,
3123 not the possible number of registers. Fix later. */
3124 return false;
3125 }
3126 }
3127
6cc37e7e 3128 /* Dllimport'd functions are also called indirectly. */
da489f73
RH
3129 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3130 && decl && DECL_DLLIMPORT_P (decl)
6cc37e7e
DS
3131 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3132 return false;
6cc37e7e 3133
150cdc9e
RH
3134 /* If we forced aligned the stack, then sibcalling would unalign the
3135 stack, which may break the called function. */
3136 if (cfun->machine->force_align_arg_pointer)
3137 return false;
3138
4977bab6
ZW
3139 /* Otherwise okay. That also includes certain types of indirect calls. */
3140 return true;
3141}
3142
fa283935
UB
3143/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3144 calling convention attributes;
91d231cb 3145 arguments as in struct attribute_spec.handler. */
b08de47e 3146
91d231cb 3147static tree
2f84b963
RG
3148ix86_handle_cconv_attribute (tree *node, tree name,
3149 tree args,
3150 int flags ATTRIBUTE_UNUSED,
3151 bool *no_add_attrs)
91d231cb
JM
3152{
3153 if (TREE_CODE (*node) != FUNCTION_TYPE
3154 && TREE_CODE (*node) != METHOD_TYPE
3155 && TREE_CODE (*node) != FIELD_DECL
3156 && TREE_CODE (*node) != TYPE_DECL)
3157 {
5c498b10 3158 warning (OPT_Wattributes, "%qs attribute only applies to functions",
91d231cb
JM
3159 IDENTIFIER_POINTER (name));
3160 *no_add_attrs = true;
2f84b963 3161 return NULL_TREE;
91d231cb 3162 }
2f84b963
RG
3163
3164 /* Can combine regparm with all attributes but fastcall. */
3165 if (is_attribute_p ("regparm", name))
91d231cb
JM
3166 {
3167 tree cst;
b08de47e 3168
2f84b963
RG
3169 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3170 {
3171 error ("fastcall and regparm attributes are not compatible");
3172 }
3173
91d231cb
JM
3174 cst = TREE_VALUE (args);
3175 if (TREE_CODE (cst) != INTEGER_CST)
3176 {
5c498b10
DD
3177 warning (OPT_Wattributes,
3178 "%qs attribute requires an integer constant argument",
91d231cb
JM
3179 IDENTIFIER_POINTER (name));
3180 *no_add_attrs = true;
3181 }
3182 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3183 {
5c498b10 3184 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
91d231cb
JM
3185 IDENTIFIER_POINTER (name), REGPARM_MAX);
3186 *no_add_attrs = true;
3187 }
e91f04de 3188
33932946
SH
3189 if (!TARGET_64BIT
3190 && lookup_attribute (ix86_force_align_arg_pointer_string,
3191 TYPE_ATTRIBUTES (*node))
3192 && compare_tree_int (cst, REGPARM_MAX-1))
3193 {
3194 error ("%s functions limited to %d register parameters",
3195 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3196 }
3197
2f84b963
RG
3198 return NULL_TREE;
3199 }
3200
3201 if (TARGET_64BIT)
3202 {
ccf8e764 3203 /* Do not warn when emulating the MS ABI. */
7c800926 3204 if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
ccf8e764
RH
3205 warning (OPT_Wattributes, "%qs attribute ignored",
3206 IDENTIFIER_POINTER (name));
2f84b963
RG
3207 *no_add_attrs = true;
3208 return NULL_TREE;
3209 }
3210
fa283935 3211 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2f84b963
RG
3212 if (is_attribute_p ("fastcall", name))
3213 {
3214 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3215 {
3216 error ("fastcall and cdecl attributes are not compatible");
3217 }
3218 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3219 {
3220 error ("fastcall and stdcall attributes are not compatible");
3221 }
3222 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3223 {
e767b5be
JH
3224 error ("fastcall and regparm attributes are not compatible");
3225 }
b08de47e
MM
3226 }
3227
fa283935
UB
3228 /* Can combine stdcall with fastcall (redundant), regparm and
3229 sseregparm. */
2f84b963
RG
3230 else if (is_attribute_p ("stdcall", name))
3231 {
3232 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3233 {
3234 error ("stdcall and cdecl attributes are not compatible");
3235 }
3236 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3237 {
3238 error ("stdcall and fastcall attributes are not compatible");
3239 }
3240 }
3241
fa283935 3242 /* Can combine cdecl with regparm and sseregparm. */
2f84b963
RG
3243 else if (is_attribute_p ("cdecl", name))
3244 {
3245 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3246 {
3247 error ("stdcall and cdecl attributes are not compatible");
3248 }
3249 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3250 {
3251 error ("fastcall and cdecl attributes are not compatible");
3252 }
3253 }
3254
fa283935 3255 /* Can combine sseregparm with all attributes. */
2f84b963 3256
91d231cb 3257 return NULL_TREE;
b08de47e
MM
3258}
3259
3260/* Return 0 if the attributes for two types are incompatible, 1 if they
3261 are compatible, and 2 if they are nearly compatible (which causes a
3262 warning to be generated). */
3263
8d8e52be 3264static int
3101faab 3265ix86_comp_type_attributes (const_tree type1, const_tree type2)
b08de47e 3266{
0f290768 3267 /* Check for mismatch of non-default calling convention. */
27c38fbe 3268 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c 3269
18ff3013
DS
3270 if (TREE_CODE (type1) != FUNCTION_TYPE
3271 && TREE_CODE (type1) != METHOD_TYPE)
afcfe58c
MM
3272 return 1;
3273
2f84b963
RG
3274 /* Check for mismatched fastcall/regparm types. */
3275 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3276 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3277 || (ix86_function_regparm (type1, NULL)
3278 != ix86_function_regparm (type2, NULL)))
3279 return 0;
3280
3281 /* Check for mismatched sseregparm types. */
3282 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3283 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
b96a374d 3284 return 0;
e91f04de 3285
afcfe58c 3286 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
3287 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3288 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac 3289 return 0;
2f84b963 3290
b08de47e
MM
3291 return 1;
3292}
b08de47e 3293\f
0fa2e4df 3294/* Return the regparm value for a function with the indicated TYPE and DECL.
e767b5be 3295 DECL may be NULL when calling function indirectly
839a4992 3296 or considering a libcall. */
483ab821
MM
3297
3298static int
3101faab 3299ix86_function_regparm (const_tree type, const_tree decl)
483ab821
MM
3300{
3301 tree attr;
e767b5be 3302 int regparm = ix86_regparm;
483ab821 3303
27183bba
UB
3304 static bool error_issued;
3305
ee2f65b4 3306 if (TARGET_64BIT)
7c800926
KT
3307 {
3308 if (ix86_function_type_abi (type) == DEFAULT_ABI)
3309 return regparm;
3310 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
3311 }
ee2f65b4
RH
3312
3313 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3314 if (attr)
27183bba
UB
3315 {
3316 regparm
3317 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3318
3319 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3320 {
3321 /* We can't use regparm(3) for nested functions because
3322 these pass static chain pointer in %ecx register. */
3323 if (!error_issued && regparm == 3
3324 && decl_function_context (decl)
3325 && !DECL_NO_STATIC_CHAIN (decl))
3326 {
3327 error ("nested functions are limited to 2 register parameters");
3328 error_issued = true;
3329 return 0;
3330 }
3331 }
3332
3333 return regparm;
3334 }
ee2f65b4
RH
3335
3336 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3337 return 2;
3338
3339 /* Use register calling convention for local functions when possible. */
ac97d816
UB
3340 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3341 && flag_unit_at_a_time && !profile_flag)
e767b5be 3342 {
3101faab 3343 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
b1d5455a 3344 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
ee2f65b4 3345 if (i && i->local)
e767b5be 3346 {
ee2f65b4
RH
3347 int local_regparm, globals = 0, regno;
3348 struct function *f;
e767b5be 3349
ee2f65b4 3350 /* Make sure no regparm register is taken by a
ec382b8c
UB
3351 fixed register variable. */
3352 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3353 if (fixed_regs[local_regparm])
ee2f65b4 3354 break;
e767b5be 3355
ee2f65b4
RH
3356 /* We can't use regparm(3) for nested functions as these use
3357 static chain pointer in third argument. */
3358 if (local_regparm == 3
f2f0a960
HMC
3359 && (decl_function_context (decl)
3360 || ix86_force_align_arg_pointer)
ee2f65b4
RH
3361 && !DECL_NO_STATIC_CHAIN (decl))
3362 local_regparm = 2;
3363
3364 /* If the function realigns its stackpointer, the prologue will
3365 clobber %ecx. If we've already generated code for the callee,
3366 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3367 scanning the attributes for the self-realigning property. */
3368 f = DECL_STRUCT_FUNCTION (decl);
3369 if (local_regparm == 3
3370 && (f ? !!f->machine->force_align_arg_pointer
3371 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3372 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3373 local_regparm = 2;
3374
ec382b8c
UB
3375 /* Each fixed register usage increases register pressure,
3376 so less registers should be used for argument passing.
3377 This functionality can be overriden by an explicit
3378 regparm value. */
3379 for (regno = 0; regno <= DI_REG; regno++)
3380 if (fixed_regs[regno])
ee2f65b4 3381 globals++;
ec382b8c 3382
ee2f65b4
RH
3383 local_regparm
3384 = globals < local_regparm ? local_regparm - globals : 0;
3385
3386 if (local_regparm > regparm)
3387 regparm = local_regparm;
e767b5be
JH
3388 }
3389 }
ee2f65b4 3390
e767b5be 3391 return regparm;
483ab821
MM
3392}
3393
3e0a5abd
UB
3394/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3395 DFmode (2) arguments in SSE registers for a function with the
3396 indicated TYPE and DECL. DECL may be NULL when calling function
3397 indirectly or considering a libcall. Otherwise return 0. */
2f84b963
RG
3398
3399static int
7074bc2e 3400ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
2f84b963 3401{
ee2f65b4
RH
3402 gcc_assert (!TARGET_64BIT);
3403
2f84b963
RG
3404 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3405 by the sseregparm attribute. */
1f97667f 3406 if (TARGET_SSEREGPARM
ee2f65b4 3407 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2f84b963
RG
3408 {
3409 if (!TARGET_SSE)
3410 {
7074bc2e
L
3411 if (warn)
3412 {
3413 if (decl)
3414 error ("Calling %qD with attribute sseregparm without "
3415 "SSE/SSE2 enabled", decl);
3416 else
3417 error ("Calling %qT with attribute sseregparm without "
3418 "SSE/SSE2 enabled", type);
3419 }
2f84b963
RG
3420 return 0;
3421 }
3422
3423 return 2;
3424 }
3425
56829cae 3426 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
ee2f65b4
RH
3427 (and DFmode for SSE2) arguments in SSE registers. */
3428 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2f84b963 3429 {
586de218 3430 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
b1d5455a 3431 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
2f84b963
RG
3432 if (i && i->local)
3433 return TARGET_SSE2 ? 2 : 1;
3434 }
3435
3436 return 0;
3437}
3438
f676971a 3439/* Return true if EAX is live at the start of the function. Used by
fe9f516f
RH
3440 ix86_expand_prologue to determine if we need special help before
3441 calling allocate_stack_worker. */
3442
3443static bool
3444ix86_eax_live_at_start_p (void)
3445{
3446 /* Cheat. Don't bother working forward from ix86_function_regparm
3447 to the function type to whether an actual argument is located in
3448 eax. Instead just look at cfg info, which is still close enough
3449 to correct at this point. This gives false positives for broken
3450 functions that might use uninitialized data that happens to be
3451 allocated in eax, but who cares? */
eaf7f7e7 3452 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
fe9f516f
RH
3453}
3454
b08de47e
MM
3455/* Value is the number of bytes of arguments automatically
3456 popped when returning from a subroutine call.
3457 FUNDECL is the declaration node of the function (as a tree),
3458 FUNTYPE is the data type of the function (as a tree),
3459 or for a library call it is an identifier node for the subroutine name.
3460 SIZE is the number of bytes of arguments passed on the stack.
3461
3462 On the 80386, the RTD insn may be used to pop them if the number
3463 of args is fixed, but if the number is variable then the caller
3464 must pop them all. RTD can't be used for library calls now
3465 because the library is compiled with the Unix compiler.
3466 Use of RTD is a selectable option, since it is incompatible with
3467 standard Unix calling sequences. If the option is not selected,
3468 the caller must always pop the args.
3469
3470 The attribute stdcall is equivalent to RTD on a per module basis. */
3471
3472int
b96a374d 3473ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 3474{
ee2f65b4
RH
3475 int rtd;
3476
3477 /* None of the 64-bit ABIs pop arguments. */
3478 if (TARGET_64BIT)
3479 return 0;
3480
3481 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 3482
43f3a59d 3483 /* Cdecl functions override -mrtd, and never pop the stack. */
ee2f65b4
RH
3484 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3485 {
3486 /* Stdcall and fastcall functions will pop the stack if not
3487 variable args. */
3488 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3489 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3490 rtd = 1;
3491
04e1d06b 3492 if (rtd && ! stdarg_p (funtype))
ee2f65b4
RH
3493 return size;
3494 }
79325812 3495
232b8f52 3496 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 3497 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
61fec9ff 3498 && !KEEP_AGGREGATE_RETURN_POINTER)
232b8f52 3499 {
e767b5be 3500 int nregs = ix86_function_regparm (funtype, fundecl);
ee2f65b4 3501 if (nregs == 0)
232b8f52
JJ
3502 return GET_MODE_SIZE (Pmode);
3503 }
3504
3505 return 0;
b08de47e 3506}
b08de47e
MM
3507\f
3508/* Argument support functions. */
3509
53c17031
JH
3510/* Return true when register may be used to pass function parameters. */
3511bool
b96a374d 3512ix86_function_arg_regno_p (int regno)
53c17031
JH
3513{
3514 int i;
ccf8e764 3515 const int *parm_regs;
ee2f65b4 3516
53c17031 3517 if (!TARGET_64BIT)
88c6f101
HMC
3518 {
3519 if (TARGET_MACHO)
3520 return (regno < REGPARM_MAX
3521 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3522 else
3523 return (regno < REGPARM_MAX
3524 || (TARGET_MMX && MMX_REGNO_P (regno)
3525 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3526 || (TARGET_SSE && SSE_REGNO_P (regno)
3527 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3528 }
3529
3530 if (TARGET_MACHO)
3531 {
3532 if (SSE_REGNO_P (regno) && TARGET_SSE)
3533 return true;
3534 }
3535 else
3536 {
3537 if (TARGET_SSE && SSE_REGNO_P (regno)
3538 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3539 return true;
3540 }
ee2f65b4 3541
7c800926
KT
3542 /* TODO: The function should depend on current function ABI but
3543 builtins.c would need updating then. Therefore we use the
3544 default ABI. */
3545
53c17031 3546 /* RAX is used as hidden argument to va_arg functions. */
7c800926 3547 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
53c17031 3548 return true;
ee2f65b4 3549
7c800926 3550 if (DEFAULT_ABI == MS_ABI)
ccf8e764
RH
3551 parm_regs = x86_64_ms_abi_int_parameter_registers;
3552 else
3553 parm_regs = x86_64_int_parameter_registers;
7c800926
KT
3554 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
3555 : X86_64_REGPARM_MAX); i++)
ccf8e764 3556 if (regno == parm_regs[i])
53c17031
JH
3557 return true;
3558 return false;
3559}
3560
fe984136
RH
3561/* Return if we do not know how to pass TYPE solely in registers. */
3562
3563static bool
586de218 3564ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
fe984136
RH
3565{
3566 if (must_pass_in_stack_var_size_or_pad (mode, type))
3567 return true;
dcbca208
RH
3568
3569 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3570 The layout_type routine is crafty and tries to trick us into passing
3571 currently unsupported vector types on the stack by using TImode. */
3572 return (!TARGET_64BIT && mode == TImode
3573 && type && TREE_CODE (type) != VECTOR_TYPE);
fe984136
RH
3574}
3575
7c800926
KT
3576/* It returns the size, in bytes, of the area reserved for arguments passed
3577 in registers for the function represented by fndecl dependent to the used
3578 abi format. */
8a762fcb 3579int
7c800926
KT
3580ix86_reg_parm_stack_space (const_tree fndecl)
3581{
3582 int call_abi = 0;
3583 /* For libcalls it is possible that there is no fndecl at hand.
3584 Therefore assume for this case the default abi of the target. */
3585 if (!fndecl)
3586 call_abi = DEFAULT_ABI;
3587 else
3588 call_abi = ix86_function_abi (fndecl);
3589 if (call_abi == 1)
3590 return 32;
3591 return 0;
3592}
3593
3594/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
3595 call abi used. */
3596int
3597ix86_function_type_abi (const_tree fntype)
3598{
3599 if (TARGET_64BIT && fntype != NULL)
3600 {
3601 int abi;
3602 if (DEFAULT_ABI == SYSV_ABI)
3603 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
3604 else
3605 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
3606
3607 if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
3608 sorry ("using sysv calling convention on target w64 is not supported");
3609
3610 return abi;
3611 }
3612 return DEFAULT_ABI;
3613}
3614
3615int
3616ix86_function_abi (const_tree fndecl)
3617{
3618 if (! fndecl)
3619 return DEFAULT_ABI;
3620 return ix86_function_type_abi (TREE_TYPE (fndecl));
3621}
3622
3623/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
3624 call abi used. */
3625int
3626ix86_cfun_abi (void)
3627{
3628 if (! cfun || ! TARGET_64BIT)
3629 return DEFAULT_ABI;
3630 return cfun->machine->call_abi;
3631}
3632
3633/* regclass.c */
3634extern void init_regs (void);
3635
3636/* Implementation of call abi switching target hook. Specific to FNDECL
3637 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
3638 for more details.
3639 To prevent redudant calls of costy function init_regs (), it checks not to
3640 reset register usage for default abi. */
3641void
3642ix86_call_abi_override (const_tree fndecl)
3643{
3644 if (fndecl == NULL_TREE)
3645 cfun->machine->call_abi = DEFAULT_ABI;
3646 else
3647 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
8a762fcb 3648 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
7c800926
KT
3649 {
3650 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
3651 {
3652 call_used_regs[4 /*RSI*/] = 0;
3653 call_used_regs[5 /*RDI*/] = 0;
3654 init_regs ();
3655 }
3656 }
8a762fcb 3657 else if (TARGET_64BIT)
7c800926
KT
3658 {
3659 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
3660 {
3661 call_used_regs[4 /*RSI*/] = 1;
3662 call_used_regs[5 /*RDI*/] = 1;
3663 init_regs ();
3664 }
3665 }
3666}
3667
b08de47e
MM
3668/* Initialize a variable CUM of type CUMULATIVE_ARGS
3669 for a call to a function whose data type is FNTYPE.
3670 For a library call, FNTYPE is 0. */
3671
3672void
b96a374d
AJ
3673init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3674 tree fntype, /* tree ptr for function decl */
3675 rtx libname, /* SYMBOL_REF of library name or 0 */
3676 tree fndecl)
b08de47e 3677{
d6951cae 3678 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
ee2f65b4 3679 memset (cum, 0, sizeof (*cum));
b08de47e 3680
7c800926 3681 cum->call_abi = ix86_function_type_abi (fntype);
b08de47e 3682 /* Set up the number of registers to use for passing arguments. */
2f84b963 3683 cum->nregs = ix86_regparm;
7c800926
KT
3684 if (TARGET_64BIT)
3685 {
3686 if (cum->call_abi != DEFAULT_ABI)
3687 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
3688 : X64_REGPARM_MAX;
3689 }
78fbfc4b 3690 if (TARGET_SSE)
7c800926
KT
3691 {
3692 cum->sse_nregs = SSE_REGPARM_MAX;
3693 if (TARGET_64BIT)
3694 {
3695 if (cum->call_abi != DEFAULT_ABI)
3696 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
3697 : X64_SSE_REGPARM_MAX;
3698 }
3699 }
78fbfc4b
JB
3700 if (TARGET_MMX)
3701 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
3702 cum->warn_sse = true;
3703 cum->warn_mmx = true;
d6951cae
JH
3704
3705 /* Because type might mismatch in between caller and callee, we need to
3706 use actual type of function for local calls.
3707 FIXME: cgraph_analyze can be told to actually record if function uses
3708 va_start so for local functions maybe_vaarg can be made aggressive
3709 helping K&R code.
3710 FIXME: once typesytem is fixed, we won't need this code anymore. */
3711 if (i && i->local)
3712 fntype = TREE_TYPE (fndecl);
f8024378 3713 cum->maybe_vaarg = (fntype
04e1d06b 3714 ? (!prototype_p (fntype) || stdarg_p (fntype))
f8024378 3715 : !libname);
b08de47e 3716
ee2f65b4 3717 if (!TARGET_64BIT)
e91f04de 3718 {
ee2f65b4
RH
3719 /* If there are variable arguments, then we won't pass anything
3720 in registers in 32-bit mode. */
64ceac43 3721 if (stdarg_p (fntype))
e91f04de 3722 {
ee2f65b4
RH
3723 cum->nregs = 0;
3724 cum->sse_nregs = 0;
3725 cum->mmx_nregs = 0;
3726 cum->warn_sse = 0;
3727 cum->warn_mmx = 0;
3728 return;
e91f04de 3729 }
2f84b963 3730
ee2f65b4
RH
3731 /* Use ecx and edx registers if function has fastcall attribute,
3732 else look for regparm information. */
3733 if (fntype)
b08de47e 3734 {
ee2f65b4 3735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
53c17031 3736 {
ee2f65b4
RH
3737 cum->nregs = 2;
3738 cum->fastcall = 1;
53c17031 3739 }
ee2f65b4
RH
3740 else
3741 cum->nregs = ix86_function_regparm (fntype, fndecl);
b08de47e 3742 }
f19e3a64 3743
ee2f65b4
RH
3744 /* Set up the number of SSE registers used for passing SFmode
3745 and DFmode arguments. Warn for mismatching ABI. */
7074bc2e 3746 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
ee2f65b4 3747 }
b08de47e
MM
3748}
3749
6c4ccfd8
RH
3750/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3751 But in the case of vector types, it is some vector mode.
3752
3753 When we have only some of our vector isa extensions enabled, then there
3754 are some modes for which vector_mode_supported_p is false. For these
3755 modes, the generic vector support in gcc will choose some non-vector mode
5656a184 3756 in order to implement the type. By computing the natural mode, we'll
6c4ccfd8
RH
3757 select the proper ABI location for the operand and not depend on whatever
3758 the middle-end decides to do with these vector types. */
3759
3760static enum machine_mode
586de218 3761type_natural_mode (const_tree type)
6c4ccfd8
RH
3762{
3763 enum machine_mode mode = TYPE_MODE (type);
3764
3765 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3766 {
3767 HOST_WIDE_INT size = int_size_in_bytes (type);
3768 if ((size == 8 || size == 16)
3769 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3770 && TYPE_VECTOR_SUBPARTS (type) > 1)
3771 {
3772 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3773
3774 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3775 mode = MIN_MODE_VECTOR_FLOAT;
3776 else
3777 mode = MIN_MODE_VECTOR_INT;
3778
3779 /* Get the mode which has this inner mode and number of units. */
3780 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3781 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3782 && GET_MODE_INNER (mode) == innermode)
3783 return mode;
3784
d0396b79 3785 gcc_unreachable ();
6c4ccfd8
RH
3786 }
3787 }
3788
3789 return mode;
3790}
3791
3792/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3793 this may not agree with the mode that the type system has chosen for the
3794 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3795 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3796
3797static rtx
3798gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3799 unsigned int regno)
3800{
3801 rtx tmp;
3802
3803 if (orig_mode != BLKmode)
3804 tmp = gen_rtx_REG (orig_mode, regno);
3805 else
3806 {
3807 tmp = gen_rtx_REG (mode, regno);
3808 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3809 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3810 }
3811
3812 return tmp;
3813}
3814
d1f87653 3815/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 3816 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
3817 class and assign registers accordingly. */
3818
3819/* Return the union class of CLASS1 and CLASS2.
3820 See the x86-64 PS ABI for details. */
3821
3822static enum x86_64_reg_class
b96a374d 3823merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
3824{
3825 /* Rule #1: If both classes are equal, this is the resulting class. */
3826 if (class1 == class2)
3827 return class1;
3828
3829 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3830 the other class. */
3831 if (class1 == X86_64_NO_CLASS)
3832 return class2;
3833 if (class2 == X86_64_NO_CLASS)
3834 return class1;
3835
3836 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3837 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3838 return X86_64_MEMORY_CLASS;
3839
3840 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3841 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3842 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3843 return X86_64_INTEGERSI_CLASS;
3844 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3845 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3846 return X86_64_INTEGER_CLASS;
3847
499accd7
JB
3848 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3849 MEMORY is used. */
3850 if (class1 == X86_64_X87_CLASS
3851 || class1 == X86_64_X87UP_CLASS
3852 || class1 == X86_64_COMPLEX_X87_CLASS
3853 || class2 == X86_64_X87_CLASS
3854 || class2 == X86_64_X87UP_CLASS
3855 || class2 == X86_64_COMPLEX_X87_CLASS)
53c17031
JH
3856 return X86_64_MEMORY_CLASS;
3857
3858 /* Rule #6: Otherwise class SSE is used. */
3859 return X86_64_SSE_CLASS;
3860}
3861
3862/* Classify the argument of type TYPE and mode MODE.
3863 CLASSES will be filled by the register class used to pass each word
3864 of the operand. The number of words is returned. In case the parameter
3865 should be passed in memory, 0 is returned. As a special case for zero
3866 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3867
3868 BIT_OFFSET is used internally for handling records and specifies offset
3869 of the offset in bits modulo 256 to avoid overflow cases.
3870
3871 See the x86-64 PS ABI for details.
3872*/
3873
3874static int
586de218 3875classify_argument (enum machine_mode mode, const_tree type,
b96a374d 3876 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 3877{
296e4ae8 3878 HOST_WIDE_INT bytes =
53c17031 3879 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 3880 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 3881
c60ee6f5
JH
3882 /* Variable sized entities are always passed/returned in memory. */
3883 if (bytes < 0)
3884 return 0;
3885
dafc5b82 3886 if (mode != VOIDmode
fe984136 3887 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
3888 return 0;
3889
53c17031
JH
3890 if (type && AGGREGATE_TYPE_P (type))
3891 {
3892 int i;
3893 tree field;
3894 enum x86_64_reg_class subclasses[MAX_CLASSES];
3895
3896 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3897 if (bytes > 16)
3898 return 0;
3899
3900 for (i = 0; i < words; i++)
3901 classes[i] = X86_64_NO_CLASS;
3902
3903 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3904 signalize memory class, so handle it as special case. */
3905 if (!words)
3906 {
3907 classes[0] = X86_64_NO_CLASS;
3908 return 1;
3909 }
3910
3911 /* Classify each field of record and merge classes. */
d0396b79 3912 switch (TREE_CODE (type))
53c17031 3913 {
d0396b79 3914 case RECORD_TYPE:
43f3a59d 3915 /* And now merge the fields of structure. */
53c17031
JH
3916 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3917 {
3918 if (TREE_CODE (field) == FIELD_DECL)
3919 {
3920 int num;
3921
f7360901
VR
3922 if (TREE_TYPE (field) == error_mark_node)
3923 continue;
3924
53c17031
JH
3925 /* Bitfields are always classified as integer. Handle them
3926 early, since later code would consider them to be
3927 misaligned integers. */
3928 if (DECL_BIT_FIELD (field))
3929 {
9286af97
JH
3930 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3931 i < ((int_bit_position (field) + (bit_offset % 64))
53c17031 3932 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 3933 + 63) / 8 / 8; i++)
53c17031
JH
3934 classes[i] =
3935 merge_classes (X86_64_INTEGER_CLASS,
3936 classes[i]);
3937 }
3938 else
3939 {
3940 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3941 TREE_TYPE (field), subclasses,
3942 (int_bit_position (field)
3943 + bit_offset) % 256);
3944 if (!num)
3945 return 0;
3946 for (i = 0; i < num; i++)
3947 {
3948 int pos =
db01f480 3949 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
3950 classes[i + pos] =
3951 merge_classes (subclasses[i], classes[i + pos]);
3952 }
3953 }
3954 }
3955 }
d0396b79 3956 break;
91ea38f9 3957
d0396b79
NS
3958 case ARRAY_TYPE:
3959 /* Arrays are handled as small records. */
3960 {
3961 int num;
3962 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3963 TREE_TYPE (type), subclasses, bit_offset);
3964 if (!num)
3965 return 0;
91ea38f9 3966
d0396b79
NS
3967 /* The partial classes are now full classes. */
3968 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3969 subclasses[0] = X86_64_SSE_CLASS;
3970 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3971 subclasses[0] = X86_64_INTEGER_CLASS;
5656a184 3972
d0396b79
NS
3973 for (i = 0; i < words; i++)
3974 classes[i] = subclasses[i % num];
5656a184 3975
d0396b79
NS
3976 break;
3977 }
3978 case UNION_TYPE:
3979 case QUAL_UNION_TYPE:
3980 /* Unions are similar to RECORD_TYPE but offset is always 0.
3981 */
53c17031
JH
3982 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3983 {
3984 if (TREE_CODE (field) == FIELD_DECL)
3985 {
3986 int num;
118ed72a
VR
3987
3988 if (TREE_TYPE (field) == error_mark_node)
3989 continue;
3990
53c17031
JH
3991 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3992 TREE_TYPE (field), subclasses,
3993 bit_offset);
3994 if (!num)
3995 return 0;
3996 for (i = 0; i < num; i++)
3997 classes[i] = merge_classes (subclasses[i], classes[i]);
3998 }
3999 }
d0396b79
NS
4000 break;
4001
4002 default:
4003 gcc_unreachable ();
53c17031 4004 }
53c17031
JH
4005
4006 /* Final merger cleanup. */
4007 for (i = 0; i < words; i++)
4008 {
4009 /* If one class is MEMORY, everything should be passed in
4010 memory. */
4011 if (classes[i] == X86_64_MEMORY_CLASS)
4012 return 0;
4013
d6a7951f 4014 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
4015 X86_64_SSE_CLASS. */
4016 if (classes[i] == X86_64_SSEUP_CLASS
4017 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4018 classes[i] = X86_64_SSE_CLASS;
4019
d6a7951f 4020 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
4021 if (classes[i] == X86_64_X87UP_CLASS
4022 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4023 classes[i] = X86_64_SSE_CLASS;
4024 }
4025 return words;
4026 }
4027
4028 /* Compute alignment needed. We align all types to natural boundaries with
4029 exception of XFmode that is aligned to 64bits. */
4030 if (mode != VOIDmode && mode != BLKmode)
4031 {
4032 int mode_alignment = GET_MODE_BITSIZE (mode);
4033
4034 if (mode == XFmode)
4035 mode_alignment = 128;
4036 else if (mode == XCmode)
4037 mode_alignment = 256;
2c6b27c3
JH
4038 if (COMPLEX_MODE_P (mode))
4039 mode_alignment /= 2;
f5143c46 4040 /* Misaligned fields are always returned in memory. */
53c17031
JH
4041 if (bit_offset % mode_alignment)
4042 return 0;
4043 }
4044
9e9fb0ce 4045 /* for V1xx modes, just use the base mode */
10a97ae6 4046 if (VECTOR_MODE_P (mode) && mode != V1DImode
9e9fb0ce
JB
4047 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4048 mode = GET_MODE_INNER (mode);
4049
53c17031
JH
4050 /* Classification of atomic types. */
4051 switch (mode)
4052 {
a81083b2
BE
4053 case SDmode:
4054 case DDmode:
4055 classes[0] = X86_64_SSE_CLASS;
4056 return 1;
4057 case TDmode:
4058 classes[0] = X86_64_SSE_CLASS;
4059 classes[1] = X86_64_SSEUP_CLASS;
4060 return 2;
53c17031
JH
4061 case DImode:
4062 case SImode:
4063 case HImode:
4064 case QImode:
4065 case CSImode:
4066 case CHImode:
4067 case CQImode:
4068 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4069 classes[0] = X86_64_INTEGERSI_CLASS;
4070 else
4071 classes[0] = X86_64_INTEGER_CLASS;
4072 return 1;
4073 case CDImode:
4074 case TImode:
4075 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4076 return 2;
4077 case CTImode:
9e9fb0ce 4078 return 0;
53c17031
JH
4079 case SFmode:
4080 if (!(bit_offset % 64))
4081 classes[0] = X86_64_SSESF_CLASS;
4082 else
4083 classes[0] = X86_64_SSE_CLASS;
4084 return 1;
4085 case DFmode:
4086 classes[0] = X86_64_SSEDF_CLASS;
4087 return 1;
f8a1ebc6 4088 case XFmode:
53c17031
JH
4089 classes[0] = X86_64_X87_CLASS;
4090 classes[1] = X86_64_X87UP_CLASS;
4091 return 2;
f8a1ebc6 4092 case TFmode:
9e9fb0ce
JB
4093 classes[0] = X86_64_SSE_CLASS;
4094 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
4095 return 2;
4096 case SCmode:
4097 classes[0] = X86_64_SSE_CLASS;
4098 return 1;
9e9fb0ce
JB
4099 case DCmode:
4100 classes[0] = X86_64_SSEDF_CLASS;
4101 classes[1] = X86_64_SSEDF_CLASS;
4102 return 2;
4103 case XCmode:
499accd7
JB
4104 classes[0] = X86_64_COMPLEX_X87_CLASS;
4105 return 1;
9e9fb0ce 4106 case TCmode:
499accd7 4107 /* This modes is larger than 16 bytes. */
9e9fb0ce 4108 return 0;
e95d6b23
JH
4109 case V4SFmode:
4110 case V4SImode:
495333a6
JH
4111 case V16QImode:
4112 case V8HImode:
4113 case V2DFmode:
4114 case V2DImode:
e95d6b23
JH
4115 classes[0] = X86_64_SSE_CLASS;
4116 classes[1] = X86_64_SSEUP_CLASS;
4117 return 2;
10a97ae6 4118 case V1DImode:
e95d6b23
JH
4119 case V2SFmode:
4120 case V2SImode:
4121 case V4HImode:
4122 case V8QImode:
9e9fb0ce
JB
4123 classes[0] = X86_64_SSE_CLASS;
4124 return 1;
53c17031 4125 case BLKmode:
e95d6b23 4126 case VOIDmode:
53c17031
JH
4127 return 0;
4128 default:
d0396b79 4129 gcc_assert (VECTOR_MODE_P (mode));
5656a184 4130
d0396b79
NS
4131 if (bytes > 16)
4132 return 0;
5656a184 4133
d0396b79 4134 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5656a184 4135
d0396b79
NS
4136 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4137 classes[0] = X86_64_INTEGERSI_CLASS;
4138 else
4139 classes[0] = X86_64_INTEGER_CLASS;
4140 classes[1] = X86_64_INTEGER_CLASS;
4141 return 1 + (bytes > 8);
53c17031
JH
4142 }
4143}
4144
4145/* Examine the argument and return set number of register required in each
f5143c46 4146 class. Return 0 iff parameter should be passed in memory. */
53c17031 4147static int
586de218 4148examine_argument (enum machine_mode mode, const_tree type, int in_return,
b96a374d 4149 int *int_nregs, int *sse_nregs)
53c17031 4150{
9415ab7d
TN
4151 enum x86_64_reg_class regclass[MAX_CLASSES];
4152 int n = classify_argument (mode, type, regclass, 0);
53c17031
JH
4153
4154 *int_nregs = 0;
4155 *sse_nregs = 0;
4156 if (!n)
4157 return 0;
4158 for (n--; n >= 0; n--)
9415ab7d 4159 switch (regclass[n])
53c17031
JH
4160 {
4161 case X86_64_INTEGER_CLASS:
4162 case X86_64_INTEGERSI_CLASS:
4163 (*int_nregs)++;
4164 break;
4165 case X86_64_SSE_CLASS:
4166 case X86_64_SSESF_CLASS:
4167 case X86_64_SSEDF_CLASS:
4168 (*sse_nregs)++;
4169 break;
4170 case X86_64_NO_CLASS:
4171 case X86_64_SSEUP_CLASS:
4172 break;
4173 case X86_64_X87_CLASS:
4174 case X86_64_X87UP_CLASS:
4175 if (!in_return)
4176 return 0;
4177 break;
499accd7
JB
4178 case X86_64_COMPLEX_X87_CLASS:
4179 return in_return ? 2 : 0;
53c17031 4180 case X86_64_MEMORY_CLASS:
d0396b79 4181 gcc_unreachable ();
53c17031
JH
4182 }
4183 return 1;
4184}
6c4ccfd8 4185
53c17031
JH
4186/* Construct container for the argument used by GCC interface. See
4187 FUNCTION_ARG for the detailed description. */
6c4ccfd8 4188
53c17031 4189static rtx
6c4ccfd8 4190construct_container (enum machine_mode mode, enum machine_mode orig_mode,
586de218 4191 const_tree type, int in_return, int nintregs, int nsseregs,
6c4ccfd8 4192 const int *intreg, int sse_regno)
53c17031 4193{
94e76332
RS
4194 /* The following variables hold the static issued_error state. */
4195 static bool issued_sse_arg_error;
4196 static bool issued_sse_ret_error;
4197 static bool issued_x87_ret_error;
4198
53c17031
JH
4199 enum machine_mode tmpmode;
4200 int bytes =
4201 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
9415ab7d 4202 enum x86_64_reg_class regclass[MAX_CLASSES];
53c17031
JH
4203 int n;
4204 int i;
4205 int nexps = 0;
4206 int needed_sseregs, needed_intregs;
4207 rtx exp[MAX_CLASSES];
4208 rtx ret;
4209
9415ab7d 4210 n = classify_argument (mode, type, regclass, 0);
53c17031
JH
4211 if (!n)
4212 return NULL;
6c4ccfd8
RH
4213 if (!examine_argument (mode, type, in_return, &needed_intregs,
4214 &needed_sseregs))
53c17031
JH
4215 return NULL;
4216 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4217 return NULL;
4218
a5370cf0
RH
4219 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4220 some less clueful developer tries to use floating-point anyway. */
4221 if (needed_sseregs && !TARGET_SSE)
4222 {
94e76332 4223 if (in_return)
a5370cf0 4224 {
94e76332
RS
4225 if (!issued_sse_ret_error)
4226 {
4227 error ("SSE register return with SSE disabled");
4228 issued_sse_ret_error = true;
4229 }
4230 }
4231 else if (!issued_sse_arg_error)
4232 {
4233 error ("SSE register argument with SSE disabled");
4234 issued_sse_arg_error = true;
a5370cf0
RH
4235 }
4236 return NULL;
4237 }
4238
94e76332
RS
4239 /* Likewise, error if the ABI requires us to return values in the
4240 x87 registers and the user specified -mno-80387. */
4241 if (!TARGET_80387 && in_return)
4242 for (i = 0; i < n; i++)
9415ab7d
TN
4243 if (regclass[i] == X86_64_X87_CLASS
4244 || regclass[i] == X86_64_X87UP_CLASS
4245 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
94e76332
RS
4246 {
4247 if (!issued_x87_ret_error)
4248 {
4249 error ("x87 register return with x87 disabled");
4250 issued_x87_ret_error = true;
4251 }
4252 return NULL;
4253 }
4254
53c17031
JH
4255 /* First construct simple cases. Avoid SCmode, since we want to use
4256 single register to pass this type. */
4257 if (n == 1 && mode != SCmode)
9415ab7d 4258 switch (regclass[0])
53c17031
JH
4259 {
4260 case X86_64_INTEGER_CLASS:
4261 case X86_64_INTEGERSI_CLASS:
4262 return gen_rtx_REG (mode, intreg[0]);
4263 case X86_64_SSE_CLASS:
4264 case X86_64_SSESF_CLASS:
4265 case X86_64_SSEDF_CLASS:
6c4ccfd8 4266 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
53c17031 4267 case X86_64_X87_CLASS:
499accd7 4268 case X86_64_COMPLEX_X87_CLASS:
53c17031
JH
4269 return gen_rtx_REG (mode, FIRST_STACK_REG);
4270 case X86_64_NO_CLASS:
4271 /* Zero sized array, struct or class. */
4272 return NULL;
4273 default:
d0396b79 4274 gcc_unreachable ();
53c17031 4275 }
9415ab7d
TN
4276 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4277 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
e95d6b23 4278 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
ee2f65b4 4279
53c17031 4280 if (n == 2
9415ab7d 4281 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
f8a1ebc6 4282 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
9415ab7d
TN
4283 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4284 && regclass[1] == X86_64_INTEGER_CLASS
f8a1ebc6 4285 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
4286 && intreg[0] + 1 == intreg[1])
4287 return gen_rtx_REG (mode, intreg[0]);
53c17031
JH
4288
4289 /* Otherwise figure out the entries of the PARALLEL. */
4290 for (i = 0; i < n; i++)
4291 {
9415ab7d 4292 switch (regclass[i])
53c17031
JH
4293 {
4294 case X86_64_NO_CLASS:
4295 break;
4296 case X86_64_INTEGER_CLASS:
4297 case X86_64_INTEGERSI_CLASS:
d1f87653 4298 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
4299 if (i * 8 + 8 > bytes)
4300 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
9415ab7d 4301 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
53c17031
JH
4302 tmpmode = SImode;
4303 else
4304 tmpmode = DImode;
4305 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4306 if (tmpmode == BLKmode)
4307 tmpmode = DImode;
4308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4309 gen_rtx_REG (tmpmode, *intreg),
4310 GEN_INT (i*8));
4311 intreg++;
4312 break;
4313 case X86_64_SSESF_CLASS:
4314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4315 gen_rtx_REG (SFmode,
4316 SSE_REGNO (sse_regno)),
4317 GEN_INT (i*8));
4318 sse_regno++;
4319 break;
4320 case X86_64_SSEDF_CLASS:
4321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4322 gen_rtx_REG (DFmode,
4323 SSE_REGNO (sse_regno)),
4324 GEN_INT (i*8));
4325 sse_regno++;
4326 break;
4327 case X86_64_SSE_CLASS:
9415ab7d 4328 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
12f5c45e 4329 tmpmode = TImode;
53c17031
JH
4330 else
4331 tmpmode = DImode;
4332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4333 gen_rtx_REG (tmpmode,
4334 SSE_REGNO (sse_regno)),
4335 GEN_INT (i*8));
12f5c45e
JH
4336 if (tmpmode == TImode)
4337 i++;
53c17031
JH
4338 sse_regno++;
4339 break;
4340 default:
d0396b79 4341 gcc_unreachable ();
53c17031
JH
4342 }
4343 }
1b803355
JJ
4344
4345 /* Empty aligned struct, union or class. */
4346 if (nexps == 0)
4347 return NULL;
4348
53c17031
JH
4349 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4350 for (i = 0; i < nexps; i++)
4351 XVECEXP (ret, 0, i) = exp [i];
4352 return ret;
4353}
4354
ee2f65b4
RH
4355/* Update the data in CUM to advance over an argument of mode MODE
4356 and data type TYPE. (TYPE is null for libcalls where that information
4357 may not be available.) */
b08de47e 4358
ee2f65b4
RH
4359static void
4360function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4361 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
b08de47e 4362{
ee2f65b4
RH
4363 switch (mode)
4364 {
4365 default:
4366 break;
b08de47e 4367
ee2f65b4
RH
4368 case BLKmode:
4369 if (bytes < 0)
4370 break;
4371 /* FALLTHRU */
b3a1ca49 4372
ee2f65b4
RH
4373 case DImode:
4374 case SImode:
4375 case HImode:
4376 case QImode:
4377 cum->words += words;
4378 cum->nregs -= words;
4379 cum->regno += words;
b3a1ca49 4380
ee2f65b4 4381 if (cum->nregs <= 0)
82a127a9 4382 {
ee2f65b4
RH
4383 cum->nregs = 0;
4384 cum->regno = 0;
82a127a9 4385 }
ee2f65b4 4386 break;
b3a1ca49 4387
ee2f65b4
RH
4388 case DFmode:
4389 if (cum->float_in_sse < 2)
4390 break;
4391 case SFmode:
4392 if (cum->float_in_sse < 1)
4393 break;
4394 /* FALLTHRU */
f19e3a64 4395
ee2f65b4
RH
4396 case TImode:
4397 case V16QImode:
4398 case V8HImode:
4399 case V4SImode:
4400 case V2DImode:
4401 case V4SFmode:
4402 case V2DFmode:
4403 if (!type || !AGGREGATE_TYPE_P (type))
4404 {
4405 cum->sse_words += words;
4406 cum->sse_nregs -= 1;
4407 cum->sse_regno += 1;
4408 if (cum->sse_nregs <= 0)
b3a1ca49 4409 {
ee2f65b4
RH
4410 cum->sse_nregs = 0;
4411 cum->sse_regno = 0;
b3a1ca49 4412 }
ee2f65b4
RH
4413 }
4414 break;
b3a1ca49 4415
ee2f65b4
RH
4416 case V8QImode:
4417 case V4HImode:
4418 case V2SImode:
4419 case V2SFmode:
10a97ae6 4420 case V1DImode:
ee2f65b4
RH
4421 if (!type || !AGGREGATE_TYPE_P (type))
4422 {
4423 cum->mmx_words += words;
4424 cum->mmx_nregs -= 1;
4425 cum->mmx_regno += 1;
4426 if (cum->mmx_nregs <= 0)
b3a1ca49 4427 {
ee2f65b4
RH
4428 cum->mmx_nregs = 0;
4429 cum->mmx_regno = 0;
b3a1ca49 4430 }
82a127a9 4431 }
ee2f65b4 4432 break;
82a127a9 4433 }
b08de47e
MM
4434}
4435
ee2f65b4
RH
4436static void
4437function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4438 tree type, HOST_WIDE_INT words)
4439{
4440 int int_nregs, sse_nregs;
4441
4442 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4443 cum->words += words;
4444 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4445 {
4446 cum->nregs -= int_nregs;
4447 cum->sse_nregs -= sse_nregs;
4448 cum->regno += int_nregs;
4449 cum->sse_regno += sse_nregs;
4450 }
4451 else
4452 cum->words += words;
4453}
4454
ccf8e764
RH
4455static void
4456function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4457 HOST_WIDE_INT words)
4458{
4459 /* Otherwise, this should be passed indirect. */
4460 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4461
4462 cum->words += words;
4463 if (cum->nregs > 0)
4464 {
4465 cum->nregs -= 1;
4466 cum->regno += 1;
4467 }
4468}
4469
ee2f65b4
RH
4470void
4471function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4472 tree type, int named ATTRIBUTE_UNUSED)
4473{
4474 HOST_WIDE_INT bytes, words;
4475
4476 if (mode == BLKmode)
4477 bytes = int_size_in_bytes (type);
4478 else
4479 bytes = GET_MODE_SIZE (mode);
4480 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4481
4482 if (type)
4483 mode = type_natural_mode (type);
4484
7c800926 4485 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
ccf8e764
RH
4486 function_arg_advance_ms_64 (cum, bytes, words);
4487 else if (TARGET_64BIT)
ee2f65b4
RH
4488 function_arg_advance_64 (cum, mode, type, words);
4489 else
4490 function_arg_advance_32 (cum, mode, type, bytes, words);
4491}
4492
b08de47e
MM
4493/* Define where to put the arguments to a function.
4494 Value is zero to push the argument on the stack,
4495 or a hard register in which to store the argument.
4496
4497 MODE is the argument's machine mode.
4498 TYPE is the data type of the argument (as a tree).
4499 This is null for libcalls where that information may
4500 not be available.
4501 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4502 the preceding args and about the function being called.
4503 NAMED is nonzero if this argument is a named parameter
4504 (otherwise it is an extra parameter matching an ellipsis). */
4505
ee2f65b4
RH
4506static rtx
4507function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4508 enum machine_mode orig_mode, tree type,
4509 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
b08de47e 4510{
bcf17554 4511 static bool warnedsse, warnedmmx;
b08de47e 4512
ee2f65b4 4513 /* Avoid the AL settings for the Unix64 ABI. */
32ee7d1d 4514 if (mode == VOIDmode)
ee2f65b4
RH
4515 return constm1_rtx;
4516
4517 switch (mode)
b08de47e 4518 {
ee2f65b4
RH
4519 default:
4520 break;
4521
4522 case BLKmode:
4523 if (bytes < 0)
53c17031 4524 break;
ee2f65b4
RH
4525 /* FALLTHRU */
4526 case DImode:
4527 case SImode:
4528 case HImode:
4529 case QImode:
4530 if (words <= cum->nregs)
4531 {
4532 int regno = cum->regno;
53c17031 4533
ee2f65b4 4534 /* Fastcall allocates the first two DWORD (SImode) or
87300e8c
L
4535 smaller arguments to ECX and EDX if it isn't an
4536 aggregate type . */
ee2f65b4
RH
4537 if (cum->fastcall)
4538 {
87300e8c
L
4539 if (mode == BLKmode
4540 || mode == DImode
4541 || (type && AGGREGATE_TYPE_P (type)))
ee2f65b4 4542 break;
b96a374d 4543
ee2f65b4 4544 /* ECX not EAX is the first allocated register. */
29b74761
UB
4545 if (regno == AX_REG)
4546 regno = CX_REG;
ee2f65b4
RH
4547 }
4548 return gen_rtx_REG (mode, regno);
4549 }
4550 break;
b96a374d 4551
ee2f65b4
RH
4552 case DFmode:
4553 if (cum->float_in_sse < 2)
bcf17554 4554 break;
ee2f65b4
RH
4555 case SFmode:
4556 if (cum->float_in_sse < 1)
53c17031 4557 break;
ee2f65b4
RH
4558 /* FALLTHRU */
4559 case TImode:
4560 case V16QImode:
4561 case V8HImode:
4562 case V4SImode:
4563 case V2DImode:
4564 case V4SFmode:
4565 case V2DFmode:
4566 if (!type || !AGGREGATE_TYPE_P (type))
4567 {
4568 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4569 {
4570 warnedsse = true;
4571 warning (0, "SSE vector argument without SSE enabled "
4572 "changes the ABI");
4573 }
4574 if (cum->sse_nregs)
4575 return gen_reg_or_parallel (mode, orig_mode,
4576 cum->sse_regno + FIRST_SSE_REG);
4577 }
4578 break;
b08de47e 4579
ee2f65b4
RH
4580 case V8QImode:
4581 case V4HImode:
4582 case V2SImode:
4583 case V2SFmode:
10a97ae6 4584 case V1DImode:
ee2f65b4
RH
4585 if (!type || !AGGREGATE_TYPE_P (type))
4586 {
4587 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4588 {
4589 warnedmmx = true;
4590 warning (0, "MMX vector argument without MMX enabled "
4591 "changes the ABI");
4592 }
4593 if (cum->mmx_nregs)
4594 return gen_reg_or_parallel (mode, orig_mode,
4595 cum->mmx_regno + FIRST_MMX_REG);
4596 }
4597 break;
4598 }
b08de47e 4599
ee2f65b4
RH
4600 return NULL_RTX;
4601}
b08de47e 4602
ee2f65b4
RH
4603static rtx
4604function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4605 enum machine_mode orig_mode, tree type)
4606{
4607 /* Handle a hidden AL argument containing number of registers
4608 for varargs x86-64 functions. */
4609 if (mode == VOIDmode)
4610 return GEN_INT (cum->maybe_vaarg
4611 ? (cum->sse_nregs < 0
7c800926
KT
4612 ? (cum->call_abi == DEFAULT_ABI
4613 ? SSE_REGPARM_MAX
4614 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4615 : X64_SSE_REGPARM_MAX))
4616 : cum->sse_regno)
ee2f65b4
RH
4617 : -1);
4618
4619 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4620 cum->sse_nregs,
4621 &x86_64_int_parameter_registers [cum->regno],
4622 cum->sse_regno);
4623}
b08de47e 4624
ccf8e764
RH
4625static rtx
4626function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
893125e0
KT
4627 enum machine_mode orig_mode, int named,
4628 HOST_WIDE_INT bytes)
ccf8e764
RH
4629{
4630 unsigned int regno;
4631
4632 /* Avoid the AL settings for the Unix64 ABI. */
4633 if (mode == VOIDmode)
4634 return constm1_rtx;
4635
4636 /* If we've run out of registers, it goes on the stack. */
4637 if (cum->nregs == 0)
4638 return NULL_RTX;
4639
4640 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4641
4642 /* Only floating point modes are passed in anything but integer regs. */
4643 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4644 {
4645 if (named)
4646 regno = cum->regno + FIRST_SSE_REG;
4647 else
4648 {
4649 rtx t1, t2;
4650
4651 /* Unnamed floating parameters are passed in both the
4652 SSE and integer registers. */
4653 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4654 t2 = gen_rtx_REG (mode, regno);
4655 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4656 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4657 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4658 }
4659 }
893125e0
KT
4660 /* Handle aggregated types passed in register. */
4661 if (orig_mode == BLKmode)
4662 {
4663 if (bytes > 0 && bytes <= 8)
4664 mode = (bytes > 4 ? DImode : SImode);
4665 if (mode == BLKmode)
4666 mode = DImode;
4667 }
ccf8e764
RH
4668
4669 return gen_reg_or_parallel (mode, orig_mode, regno);
4670}
4671
ee2f65b4
RH
4672rtx
4673function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
ccf8e764 4674 tree type, int named)
ee2f65b4
RH
4675{
4676 enum machine_mode mode = omode;
4677 HOST_WIDE_INT bytes, words;
4678
4679 if (mode == BLKmode)
4680 bytes = int_size_in_bytes (type);
4681 else
4682 bytes = GET_MODE_SIZE (mode);
4683 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4684
4685 /* To simplify the code below, represent vector types with a vector mode
4686 even if MMX/SSE are not active. */
4687 if (type && TREE_CODE (type) == VECTOR_TYPE)
4688 mode = type_natural_mode (type);
4689
7c800926 4690 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
893125e0 4691 return function_arg_ms_64 (cum, mode, omode, named, bytes);
ccf8e764 4692 else if (TARGET_64BIT)
ee2f65b4
RH
4693 return function_arg_64 (cum, mode, omode, type);
4694 else
4695 return function_arg_32 (cum, mode, omode, type, bytes, words);
b08de47e 4696}
53c17031 4697
09b2e78d
ZD
4698/* A C expression that indicates when an argument must be passed by
4699 reference. If nonzero for an argument, a copy of that argument is
4700 made in memory and a pointer to the argument is passed instead of
4701 the argument itself. The pointer is passed in whatever way is
4702 appropriate for passing a pointer to that type. */
4703
8cd5a4e0
RH
4704static bool
4705ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4706 enum machine_mode mode ATTRIBUTE_UNUSED,
586de218 4707 const_tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d 4708{
893125e0 4709 /* See Windows x64 Software Convention. */
7c800926 4710 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
ccf8e764 4711 {
893125e0 4712 int msize = (int) GET_MODE_SIZE (mode);
ccf8e764
RH
4713 if (type)
4714 {
4715 /* Arrays are passed by reference. */
4716 if (TREE_CODE (type) == ARRAY_TYPE)
4717 return true;
4718
4719 if (AGGREGATE_TYPE_P (type))
4720 {
4721 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4722 are passed by reference. */
893125e0 4723 msize = int_size_in_bytes (type);
ccf8e764
RH
4724 }
4725 }
4726
4727 /* __m128 is passed by reference. */
893125e0
KT
4728 switch (msize) {
4729 case 1: case 2: case 4: case 8:
4730 break;
4731 default:
4732 return true;
4733 }
ccf8e764
RH
4734 }
4735 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
ee2f65b4 4736 return 1;
09b2e78d
ZD
4737
4738 return 0;
4739}
4740
8b978a57 4741/* Return true when TYPE should be 128bit aligned for 32bit argument passing
4317a2fa 4742 ABI. */
8b978a57 4743static bool
4317a2fa 4744contains_aligned_value_p (tree type)
8b978a57
JH
4745{
4746 enum machine_mode mode = TYPE_MODE (type);
4317a2fa 4747 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
8b978a57
JH
4748 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4749 return true;
4750 if (TYPE_ALIGN (type) < 128)
4751 return false;
4752
4753 if (AGGREGATE_TYPE_P (type))
4754 {
2a43945f 4755 /* Walk the aggregates recursively. */
d0396b79 4756 switch (TREE_CODE (type))
8b978a57 4757 {
d0396b79
NS
4758 case RECORD_TYPE:
4759 case UNION_TYPE:
4760 case QUAL_UNION_TYPE:
4761 {
4762 tree field;
5656a184 4763
1faf92ae 4764 /* Walk all the structure fields. */
d0396b79
NS
4765 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4766 {
4767 if (TREE_CODE (field) == FIELD_DECL
4317a2fa 4768 && contains_aligned_value_p (TREE_TYPE (field)))
fa743e8c 4769 return true;
d0396b79
NS
4770 }
4771 break;
4772 }
4773
4774 case ARRAY_TYPE:
4775 /* Just for use if some languages passes arrays by value. */
4317a2fa 4776 if (contains_aligned_value_p (TREE_TYPE (type)))
8b978a57 4777 return true;
5139c66b 4778 break;
5656a184 4779
d0396b79
NS
4780 default:
4781 gcc_unreachable ();
8b978a57 4782 }
8b978a57
JH
4783 }
4784 return false;
4785}
4786
bb498ea3
AH
4787/* Gives the alignment boundary, in bits, of an argument with the
4788 specified mode and type. */
53c17031
JH
4789
4790int
b96a374d 4791ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
4792{
4793 int align;
53c17031 4794 if (type)
23ac85e7 4795 {
a20007a4
L
4796 /* Since canonical type is used for call, we convert it to
4797 canonical type if needed. */
4798 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
4799 type = TYPE_CANONICAL (type);
4800 align = TYPE_ALIGN (type);
23ac85e7 4801 }
53c17031
JH
4802 else
4803 align = GET_MODE_ALIGNMENT (mode);
4804 if (align < PARM_BOUNDARY)
4805 align = PARM_BOUNDARY;
4317a2fa
L
4806 /* In 32bit, only _Decimal128 is aligned to its natural boundary. */
4807 if (!TARGET_64BIT && mode != TDmode)
8b978a57
JH
4808 {
4809 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4810 make an exception for SSE modes since these require 128bit
b96a374d 4811 alignment.
8b978a57
JH
4812
4813 The handling here differs from field_alignment. ICC aligns MMX
4814 arguments to 4 byte boundaries, while structure fields are aligned
4815 to 8 byte boundaries. */
4317a2fa 4816 if (!type)
8b978a57 4817 {
4317a2fa 4818 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
8b978a57
JH
4819 align = PARM_BOUNDARY;
4820 }
4821 else
4822 {
4317a2fa 4823 if (!contains_aligned_value_p (type))
8b978a57
JH
4824 align = PARM_BOUNDARY;
4825 }
8b978a57 4826 }
35dd7cc3
L
4827 if (align > BIGGEST_ALIGNMENT)
4828 align = BIGGEST_ALIGNMENT;
53c17031
JH
4829 return align;
4830}
4831
4832/* Return true if N is a possible register number of function value. */
ee2f65b4 4833
53c17031 4834bool
b96a374d 4835ix86_function_value_regno_p (int regno)
53c17031 4836{
ee2f65b4 4837 switch (regno)
88c6f101 4838 {
ee2f65b4
RH
4839 case 0:
4840 return true;
aa941a60 4841
ee2f65b4 4842 case FIRST_FLOAT_REG:
7c800926
KT
4843 /* TODO: The function should depend on current function ABI but
4844 builtins.c would need updating then. Therefore we use the
4845 default ABI. */
4846 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
ccf8e764 4847 return false;
ee2f65b4 4848 return TARGET_FLOAT_RETURNS_IN_80387;
aa941a60 4849
ee2f65b4
RH
4850 case FIRST_SSE_REG:
4851 return TARGET_SSE;
4852
4853 case FIRST_MMX_REG:
4854 if (TARGET_MACHO || TARGET_64BIT)
4855 return false;
4856 return TARGET_MMX;
88c6f101 4857 }
ee2f65b4
RH
4858
4859 return false;
53c17031
JH
4860}
4861
4862/* Define how to find the value returned by a function.
4863 VALTYPE is the data type of the value (as a tree).
4864 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4865 otherwise, FUNC is 0. */
ee2f65b4
RH
4866
4867static rtx
4868function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
586de218 4869 const_tree fntype, const_tree fn)
53c17031 4870{
ee2f65b4 4871 unsigned int regno;
b3a1ca49 4872
ee2f65b4
RH
4873 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4874 we normally prevent this case when mmx is not available. However
4875 some ABIs may require the result to be returned like DImode. */
4876 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4877 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4878
4879 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4880 we prevent this case when sse is not available. However some ABIs
4881 may require the result to be returned like integer TImode. */
4882 else if (mode == TImode
4883 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4884 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4885
27ac40e2
UB
4886 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4887 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4888 regno = FIRST_FLOAT_REG;
4889 else
4890 /* Most things go in %eax. */
29b74761 4891 regno = AX_REG;
4f3f76e6 4892
27ac40e2 4893 /* Override FP return register with %xmm0 for local functions when
ee2f65b4 4894 SSE math is enabled or for functions with sseregparm attribute. */
27ac40e2 4895 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
53c17031 4896 {
7074bc2e 4897 int sse_level = ix86_function_sseregparm (fntype, fn, false);
27ac40e2
UB
4898 if ((sse_level >= 1 && mode == SFmode)
4899 || (sse_level == 2 && mode == DFmode))
4900 regno = FIRST_SSE_REG;
53c17031 4901 }
ee2f65b4
RH
4902
4903 return gen_rtx_REG (orig_mode, regno);
4904}
4905
4906static rtx
4907function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
586de218 4908 const_tree valtype)
ee2f65b4
RH
4909{
4910 rtx ret;
4911
4912 /* Handle libcalls, which don't provide a type node. */
4913 if (valtype == NULL)
cb1119b7 4914 {
ee2f65b4
RH
4915 switch (mode)
4916 {
4917 case SFmode:
4918 case SCmode:
4919 case DFmode:
4920 case DCmode:
4921 case TFmode:
4922 case SDmode:
4923 case DDmode:
4924 case TDmode:
4925 return gen_rtx_REG (mode, FIRST_SSE_REG);
4926 case XFmode:
4927 case XCmode:
4928 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4929 case TCmode:
4930 return NULL;
4931 default:
29b74761 4932 return gen_rtx_REG (mode, AX_REG);
ee2f65b4 4933 }
cb1119b7 4934 }
ee2f65b4
RH
4935
4936 ret = construct_container (mode, orig_mode, valtype, 1,
7c800926 4937 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
ee2f65b4
RH
4938 x86_64_int_return_registers, 0);
4939
4940 /* For zero sized structures, construct_container returns NULL, but we
4941 need to keep rest of compiler happy by returning meaningful value. */
4942 if (!ret)
29b74761 4943 ret = gen_rtx_REG (orig_mode, AX_REG);
ee2f65b4
RH
4944
4945 return ret;
53c17031
JH
4946}
4947
ccf8e764
RH
4948static rtx
4949function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4950{
29b74761 4951 unsigned int regno = AX_REG;
ccf8e764
RH
4952
4953 if (TARGET_SSE)
4954 {
893125e0
KT
4955 switch (GET_MODE_SIZE (mode))
4956 {
4957 case 16:
4958 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4959 && !COMPLEX_MODE_P (mode))
4960 regno = FIRST_SSE_REG;
4961 break;
4962 case 8:
4963 case 4:
4964 if (mode == SFmode || mode == DFmode)
4965 regno = FIRST_SSE_REG;
4966 break;
4967 default:
4968 break;
4969 }
ccf8e764 4970 }
ccf8e764
RH
4971 return gen_rtx_REG (orig_mode, regno);
4972}
4973
ee2f65b4 4974static rtx
586de218 4975ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
ee2f65b4 4976 enum machine_mode orig_mode, enum machine_mode mode)
53c17031 4977{
586de218 4978 const_tree fn, fntype;
ee2f65b4
RH
4979
4980 fn = NULL_TREE;
4981 if (fntype_or_decl && DECL_P (fntype_or_decl))
4982 fn = fntype_or_decl;
4983 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
a30b6839 4984
7c800926 4985 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
ccf8e764
RH
4986 return function_value_ms_64 (orig_mode, mode);
4987 else if (TARGET_64BIT)
ee2f65b4
RH
4988 return function_value_64 (orig_mode, mode, valtype);
4989 else
4990 return function_value_32 (orig_mode, mode, fntype, fn);
4991}
4992
4993static rtx
586de218 4994ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
ee2f65b4
RH
4995 bool outgoing ATTRIBUTE_UNUSED)
4996{
4997 enum machine_mode mode, orig_mode;
4998
4999 orig_mode = TYPE_MODE (valtype);
5000 mode = type_natural_mode (valtype);
5001 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5002}
5003
5004rtx
5005ix86_libcall_value (enum machine_mode mode)
5006{
5007 return ix86_function_value_1 (NULL, NULL, mode, mode);
5008}
5009
5010/* Return true iff type is returned in memory. */
5011
71995c2c 5012static int ATTRIBUTE_UNUSED
586de218 5013return_in_memory_32 (const_tree type, enum machine_mode mode)
ee2f65b4
RH
5014{
5015 HOST_WIDE_INT size;
a30b6839
RH
5016
5017 if (mode == BLKmode)
5018 return 1;
5019
5020 size = int_size_in_bytes (type);
5021
5022 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5023 return 0;
5024
5025 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 5026 {
a30b6839
RH
5027 /* User-created vectors small enough to fit in EAX. */
5028 if (size < 8)
5e062767 5029 return 0;
a30b6839 5030
74c4a88a
UB
5031 /* MMX/3dNow values are returned in MM0,
5032 except when it doesn't exits. */
a30b6839 5033 if (size == 8)
74c4a88a 5034 return (TARGET_MMX ? 0 : 1);
a30b6839 5035
0397ac35 5036 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 5037 if (size == 16)
0397ac35 5038 return (TARGET_SSE ? 0 : 1);
53c17031 5039 }
a30b6839 5040
cf2348cb 5041 if (mode == XFmode)
a30b6839 5042 return 0;
f8a1ebc6 5043
a81083b2
BE
5044 if (mode == TDmode)
5045 return 1;
5046
a30b6839
RH
5047 if (size > 12)
5048 return 1;
5049 return 0;
53c17031
JH
5050}
5051
71995c2c 5052static int ATTRIBUTE_UNUSED
586de218 5053return_in_memory_64 (const_tree type, enum machine_mode mode)
ee2f65b4
RH
5054{
5055 int needed_intregs, needed_sseregs;
5056 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5057}
5058
71995c2c 5059static int ATTRIBUTE_UNUSED
586de218 5060return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
ccf8e764
RH
5061{
5062 HOST_WIDE_INT size = int_size_in_bytes (type);
5063
893125e0
KT
5064 /* __m128 is returned in xmm0. */
5065 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5066 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
ccf8e764
RH
5067 return 0;
5068
893125e0
KT
5069 /* Otherwise, the size must be exactly in [1248]. */
5070 return (size != 1 && size != 2 && size != 4 && size != 8);
ccf8e764
RH
5071}
5072
f680436b 5073static bool
81464b2c 5074ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
ee2f65b4 5075{
f680436b
KT
5076#ifdef SUBTARGET_RETURN_IN_MEMORY
5077 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5078#else
5079 const enum machine_mode mode = type_natural_mode (type);
5080
d29899ba 5081 if (TARGET_64BIT_MS_ABI)
f680436b
KT
5082 return return_in_memory_ms_64 (type, mode);
5083 else if (TARGET_64BIT)
5084 return return_in_memory_64 (type, mode);
5085 else
5086 return return_in_memory_32 (type, mode);
5087#endif
ee2f65b4
RH
5088}
5089
29173496
RS
5090/* Return false iff TYPE is returned in memory. This version is used
5091 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5092 but differs notably in that when MMX is available, 8-byte vectors
5093 are returned in memory, rather than in MMX registers. */
5094
e8d6aaee 5095bool
81464b2c 5096ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
29173496 5097{
e797f7e1 5098 int size;
29173496
RS
5099 enum machine_mode mode = type_natural_mode (type);
5100
5101 if (TARGET_64BIT)
5102 return return_in_memory_64 (type, mode);
5103
5104 if (mode == BLKmode)
5105 return 1;
5106
5107 size = int_size_in_bytes (type);
5108
5109 if (VECTOR_MODE_P (mode))
5110 {
5111 /* Return in memory only if MMX registers *are* available. This
5112 seems backwards, but it is consistent with the existing
5113 Solaris x86 ABI. */
5114 if (size == 8)
5115 return TARGET_MMX;
5116 if (size == 16)
5117 return !TARGET_SSE;
5118 }
5119 else if (mode == TImode)
5120 return !TARGET_SSE;
5121 else if (mode == XFmode)
5122 return 0;
5123
5124 return size > 12;
5125}
5126
0397ac35
RH
5127/* When returning SSE vector types, we have a choice of either
5128 (1) being abi incompatible with a -march switch, or
5129 (2) generating an error.
5130 Given no good solution, I think the safest thing is one warning.
5131 The user won't be able to use -Werror, but....
5132
5133 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
5134 called in response to actually generating a caller or callee that
81464b2c 5135 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
0397ac35
RH
5136 via aggregate_value_p for general type probing from tree-ssa. */
5137
5138static rtx
5139ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
5140{
74c4a88a 5141 static bool warnedsse, warnedmmx;
0397ac35 5142
ee2f65b4 5143 if (!TARGET_64BIT && type)
0397ac35
RH
5144 {
5145 /* Look at the return type of the function, not the function type. */
5146 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
5147
74c4a88a
UB
5148 if (!TARGET_SSE && !warnedsse)
5149 {
5150 if (mode == TImode
5151 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5152 {
5153 warnedsse = true;
5154 warning (0, "SSE vector return without SSE enabled "
5155 "changes the ABI");
5156 }
5157 }
5158
5159 if (!TARGET_MMX && !warnedmmx)
0397ac35 5160 {
74c4a88a
UB
5161 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5162 {
5163 warnedmmx = true;
5164 warning (0, "MMX vector return without MMX enabled "
5165 "changes the ABI");
5166 }
0397ac35
RH
5167 }
5168 }
5169
5170 return NULL;
5171}
5172
ad919812
JH
5173\f
5174/* Create the va_list data type. */
53c17031 5175
c35d187f
RH
5176static tree
5177ix86_build_builtin_va_list (void)
ad919812
JH
5178{
5179 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 5180
ad919812 5181 /* For i386 we use plain pointer to argument area. */
7c800926 5182 if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
ad919812
JH
5183 return build_pointer_type (char_type_node);
5184
f1e639b1 5185 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
5186 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5187
fce5a9f2 5188 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 5189 unsigned_type_node);
fce5a9f2 5190 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
5191 unsigned_type_node);
5192 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5193 ptr_type_node);
5194 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5195 ptr_type_node);
5196
9d30f3c1
JJ
5197 va_list_gpr_counter_field = f_gpr;
5198 va_list_fpr_counter_field = f_fpr;
5199
ad919812
JH
5200 DECL_FIELD_CONTEXT (f_gpr) = record;
5201 DECL_FIELD_CONTEXT (f_fpr) = record;
5202 DECL_FIELD_CONTEXT (f_ovf) = record;
5203 DECL_FIELD_CONTEXT (f_sav) = record;
5204
5205 TREE_CHAIN (record) = type_decl;
5206 TYPE_NAME (record) = type_decl;
5207 TYPE_FIELDS (record) = f_gpr;
5208 TREE_CHAIN (f_gpr) = f_fpr;
5209 TREE_CHAIN (f_fpr) = f_ovf;
5210 TREE_CHAIN (f_ovf) = f_sav;
5211
5212 layout_type (record);
5213
5214 /* The correct type is an array type of one element. */
5215 return build_array_type (record, build_index_type (size_zero_node));
5216}
5217
a0524eb3 5218/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 5219
a0524eb3 5220static void
ee2f65b4 5221setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
ad919812 5222{
ee2f65b4 5223 rtx save_area, mem;
ad919812
JH
5224 rtx label;
5225 rtx label_ref;
5226 rtx tmp_reg;
5227 rtx nsse_reg;
4862826d 5228 alias_set_type set;
ad919812 5229 int i;
7c800926
KT
5230 int regparm = ix86_regparm;
5231
5232 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
5233 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
ad919812 5234
9d30f3c1
JJ
5235 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5236 return;
5237
ad919812
JH
5238 /* Indicate to allocate space on the stack for varargs save area. */
5239 ix86_save_varrargs_registers = 1;
48dd736c
JH
5240 /* We need 16-byte stack alignment to save SSE registers. If user
5241 asked for lower preferred_stack_boundary, lets just hope that he knows
4f3f76e6 5242 what he is doing and won't varargs SSE values.
48dd736c
JH
5243
5244 We also may end up assuming that only 64bit values are stored in SSE
5245 register let some floating point program work. */
35dd7cc3 5246 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
cb91fab0 5247 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5474eed5 5248
ee2f65b4 5249 save_area = frame_pointer_rtx;
ad919812
JH
5250 set = get_varargs_alias_set ();
5251
ee2f65b4 5252 for (i = cum->regno;
7c800926 5253 i < regparm
ee2f65b4 5254 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9d30f3c1 5255 i++)
ad919812
JH
5256 {
5257 mem = gen_rtx_MEM (Pmode,
5258 plus_constant (save_area, i * UNITS_PER_WORD));
8476af98 5259 MEM_NOTRAP_P (mem) = 1;
0692acba 5260 set_mem_alias_set (mem, set);
ad919812
JH
5261 emit_move_insn (mem, gen_rtx_REG (Pmode,
5262 x86_64_int_parameter_registers[i]));
5263 }
5264
ee2f65b4 5265 if (cum->sse_nregs && cfun->va_list_fpr_size)
ad919812
JH
5266 {
5267 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 5268 of SSE parameter registers used to call this function. We use
ad919812
JH
5269 sse_prologue_save insn template that produces computed jump across
5270 SSE saves. We need some preparation work to get this working. */
5271
5272 label = gen_label_rtx ();
5273 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5274
5275 /* Compute address to jump to :
bd7415db 5276 label - eax*4 + nnamed_sse_arguments*4 */
ad919812
JH
5277 tmp_reg = gen_reg_rtx (Pmode);
5278 nsse_reg = gen_reg_rtx (Pmode);
29b74761 5279 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
ad919812 5280 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 5281 gen_rtx_MULT (Pmode, nsse_reg,
ad919812 5282 GEN_INT (4))));
ee2f65b4 5283 if (cum->sse_regno)
ad919812
JH
5284 emit_move_insn
5285 (nsse_reg,
5286 gen_rtx_CONST (DImode,
5287 gen_rtx_PLUS (DImode,
5288 label_ref,
ee2f65b4 5289 GEN_INT (cum->sse_regno * 4))));
ad919812
JH
5290 else
5291 emit_move_insn (nsse_reg, label_ref);
5292 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5293
5294 /* Compute address of memory block we save into. We always use pointer
5295 pointing 127 bytes after first byte to store - this is needed to keep
5296 instruction size limited by 4 bytes. */
5297 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
5298 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5299 plus_constant (save_area,
7c800926 5300 8 * X86_64_REGPARM_MAX + 127)));
ad919812 5301 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
8476af98 5302 MEM_NOTRAP_P (mem) = 1;
14f73b5a 5303 set_mem_alias_set (mem, set);
8ac61af7 5304 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
5305
5306 /* And finally do the dirty job! */
8ac61af7 5307 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
ee2f65b4 5308 GEN_INT (cum->sse_regno), label));
ad919812 5309 }
ee2f65b4
RH
5310}
5311
ccf8e764
RH
5312static void
5313setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5314{
4862826d 5315 alias_set_type set = get_varargs_alias_set ();
ccf8e764
RH
5316 int i;
5317
7c800926 5318 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
ccf8e764
RH
5319 {
5320 rtx reg, mem;
5321
5322 mem = gen_rtx_MEM (Pmode,
5323 plus_constant (virtual_incoming_args_rtx,
5324 i * UNITS_PER_WORD));
5325 MEM_NOTRAP_P (mem) = 1;
5326 set_mem_alias_set (mem, set);
5327
5328 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5329 emit_move_insn (mem, reg);
5330 }
5331}
5332
ee2f65b4
RH
5333static void
5334ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5335 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5336 int no_rtl)
5337{
5338 CUMULATIVE_ARGS next_cum;
5339 tree fntype;
ee2f65b4
RH
5340
5341 /* This argument doesn't appear to be used anymore. Which is good,
5342 because the old code here didn't suppress rtl generation. */
5343 gcc_assert (!no_rtl);
5344
5345 if (!TARGET_64BIT)
5346 return;
5347
5348 fntype = TREE_TYPE (current_function_decl);
ad919812 5349
ee2f65b4
RH
5350 /* For varargs, we do not want to skip the dummy va_dcl argument.
5351 For stdargs, we do want to skip the last named argument. */
5352 next_cum = *cum;
04e1d06b 5353 if (stdarg_p (fntype))
ee2f65b4
RH
5354 function_arg_advance (&next_cum, mode, type, 1);
5355
7c800926 5356 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
ccf8e764
RH
5357 setup_incoming_varargs_ms_64 (&next_cum);
5358 else
5359 setup_incoming_varargs_64 (&next_cum);
ad919812
JH
5360}
5361
5362/* Implement va_start. */
5363
d7bd8aeb 5364static void
b96a374d 5365ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
5366{
5367 HOST_WIDE_INT words, n_gpr, n_fpr;
5368 tree f_gpr, f_fpr, f_ovf, f_sav;
5369 tree gpr, fpr, ovf, sav, t;
3db8a113 5370 tree type;
ad919812
JH
5371
5372 /* Only 64bit target needs something special. */
7c800926 5373 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
ad919812 5374 {
e5faf155 5375 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
5376 return;
5377 }
5378
5379 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5380 f_fpr = TREE_CHAIN (f_gpr);
5381 f_ovf = TREE_CHAIN (f_fpr);
5382 f_sav = TREE_CHAIN (f_ovf);
5383
5384 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
47a25a46
RG
5385 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5386 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5387 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5388 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
5389
5390 /* Count number of gp and fp argument registers used. */
38173d38
JH
5391 words = crtl->args.info.words;
5392 n_gpr = crtl->args.info.regno;
5393 n_fpr = crtl->args.info.sse_regno;
ad919812 5394
9d30f3c1
JJ
5395 if (cfun->va_list_gpr_size)
5396 {
3db8a113 5397 type = TREE_TYPE (gpr);
07beea0d 5398 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
3db8a113 5399 build_int_cst (type, n_gpr * 8));
9d30f3c1
JJ
5400 TREE_SIDE_EFFECTS (t) = 1;
5401 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5402 }
ad919812 5403
9d30f3c1
JJ
5404 if (cfun->va_list_fpr_size)
5405 {
3db8a113 5406 type = TREE_TYPE (fpr);
07beea0d 5407 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
7c800926 5408 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9d30f3c1
JJ
5409 TREE_SIDE_EFFECTS (t) = 1;
5410 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5411 }
ad919812
JH
5412
5413 /* Find the overflow area. */
3db8a113
RS
5414 type = TREE_TYPE (ovf);
5415 t = make_tree (type, virtual_incoming_args_rtx);
ad919812 5416 if (words != 0)
5be014d5
AP
5417 t = build2 (POINTER_PLUS_EXPR, type, t,
5418 size_int (words * UNITS_PER_WORD));
07beea0d 5419 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
ad919812
JH
5420 TREE_SIDE_EFFECTS (t) = 1;
5421 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5422
9d30f3c1
JJ
5423 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5424 {
5425 /* Find the register save area.
5426 Prologue of the function save it right above stack frame. */
3db8a113
RS
5427 type = TREE_TYPE (sav);
5428 t = make_tree (type, frame_pointer_rtx);
07beea0d 5429 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
9d30f3c1
JJ
5430 TREE_SIDE_EFFECTS (t) = 1;
5431 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5432 }
ad919812
JH
5433}
5434
5435/* Implement va_arg. */
cd3ce9b4 5436
2ed941ec 5437static tree
23a60a04 5438ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 5439{
cd3ce9b4
JM
5440 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5441 tree f_gpr, f_fpr, f_ovf, f_sav;
5442 tree gpr, fpr, ovf, sav, t;
5443 int size, rsize;
5444 tree lab_false, lab_over = NULL_TREE;
5445 tree addr, t2;
5446 rtx container;
5447 int indirect_p = 0;
5448 tree ptrtype;
52cf10a3 5449 enum machine_mode nat_mode;
cd3ce9b4
JM
5450
5451 /* Only 64bit target needs something special. */
7c800926 5452 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
23a60a04 5453 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
5454
5455 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5456 f_fpr = TREE_CHAIN (f_gpr);
5457 f_ovf = TREE_CHAIN (f_fpr);
5458 f_sav = TREE_CHAIN (f_ovf);
5459
c2433d7d 5460 valist = build_va_arg_indirect_ref (valist);
47a25a46
RG
5461 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5462 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5463 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5464 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 5465
08b0dc1b
RH
5466 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5467 if (indirect_p)
5468 type = build_pointer_type (type);
cd3ce9b4 5469 size = int_size_in_bytes (type);
cd3ce9b4
JM
5470 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5471
52cf10a3
RH
5472 nat_mode = type_natural_mode (type);
5473 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
7c800926
KT
5474 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5475 intreg, 0);
6c4ccfd8
RH
5476
5477 /* Pull the value out of the saved registers. */
cd3ce9b4
JM
5478
5479 addr = create_tmp_var (ptr_type_node, "addr");
5480 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5481
5482 if (container)
5483 {
5484 int needed_intregs, needed_sseregs;
e52a6df5 5485 bool need_temp;
cd3ce9b4
JM
5486 tree int_addr, sse_addr;
5487
5488 lab_false = create_artificial_label ();
5489 lab_over = create_artificial_label ();
5490
52cf10a3 5491 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
cd3ce9b4 5492
e52a6df5
JB
5493 need_temp = (!REG_P (container)
5494 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5495 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
5496
5497 /* In case we are passing structure, verify that it is consecutive block
5498 on the register save area. If not we need to do moves. */
5499 if (!need_temp && !REG_P (container))
5500 {
5501 /* Verify that all registers are strictly consecutive */
5502 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5503 {
5504 int i;
5505
5506 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5507 {
5508 rtx slot = XVECEXP (container, 0, i);
5509 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5510 || INTVAL (XEXP (slot, 1)) != i * 16)
5511 need_temp = 1;
5512 }
5513 }
5514 else
5515 {
5516 int i;
5517
5518 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5519 {
5520 rtx slot = XVECEXP (container, 0, i);
5521 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5522 || INTVAL (XEXP (slot, 1)) != i * 8)
5523 need_temp = 1;
5524 }
5525 }
5526 }
5527 if (!need_temp)
5528 {
5529 int_addr = addr;
5530 sse_addr = addr;
5531 }
5532 else
5533 {
5534 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5535 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5536 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5537 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5538 }
56d76b69 5539
cd3ce9b4
JM
5540 /* First ensure that we fit completely in registers. */
5541 if (needed_intregs)
5542 {
4a90aeeb 5543 t = build_int_cst (TREE_TYPE (gpr),
7c800926 5544 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
cd3ce9b4
JM
5545 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5546 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 5547 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
5548 gimplify_and_add (t, pre_p);
5549 }
5550 if (needed_sseregs)
5551 {
4a90aeeb 5552 t = build_int_cst (TREE_TYPE (fpr),
7c800926
KT
5553 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5554 + X86_64_REGPARM_MAX * 8);
cd3ce9b4
JM
5555 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5556 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 5557 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
5558 gimplify_and_add (t, pre_p);
5559 }
5560
5561 /* Compute index to start of area used for integer regs. */
5562 if (needed_intregs)
5563 {
5564 /* int_addr = gpr + sav; */
5be014d5
AP
5565 t = fold_convert (sizetype, gpr);
5566 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
07beea0d 5567 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
cd3ce9b4
JM
5568 gimplify_and_add (t, pre_p);
5569 }
5570 if (needed_sseregs)
5571 {
5572 /* sse_addr = fpr + sav; */
5be014d5
AP
5573 t = fold_convert (sizetype, fpr);
5574 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
07beea0d 5575 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
cd3ce9b4
JM
5576 gimplify_and_add (t, pre_p);
5577 }
5578 if (need_temp)
5579 {
5580 int i;
5581 tree temp = create_tmp_var (type, "va_arg_tmp");
5582
5583 /* addr = &temp; */
5584 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
07beea0d 5585 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
cd3ce9b4 5586 gimplify_and_add (t, pre_p);
f676971a 5587
cd3ce9b4
JM
5588 for (i = 0; i < XVECLEN (container, 0); i++)
5589 {
5590 rtx slot = XVECEXP (container, 0, i);
5591 rtx reg = XEXP (slot, 0);
5592 enum machine_mode mode = GET_MODE (reg);
5593 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5594 tree addr_type = build_pointer_type (piece_type);
5595 tree src_addr, src;
5596 int src_offset;
5597 tree dest_addr, dest;
5598
5599 if (SSE_REGNO_P (REGNO (reg)))
5600 {
5601 src_addr = sse_addr;
5602 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5603 }
5604 else
5605 {
5606 src_addr = int_addr;
5607 src_offset = REGNO (reg) * 8;
5608 }
8fe75e43 5609 src_addr = fold_convert (addr_type, src_addr);
5be014d5 5610 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
381d35b2 5611 size_int (src_offset));
c2433d7d 5612 src = build_va_arg_indirect_ref (src_addr);
e6e81735 5613
8fe75e43 5614 dest_addr = fold_convert (addr_type, addr);
5be014d5 5615 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
381d35b2 5616 size_int (INTVAL (XEXP (slot, 1))));
c2433d7d 5617 dest = build_va_arg_indirect_ref (dest_addr);
3a3677ff 5618
07beea0d 5619 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
8fe75e43
RH
5620 gimplify_and_add (t, pre_p);
5621 }
5622 }
e6e81735 5623
8fe75e43
RH
5624 if (needed_intregs)
5625 {
5626 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
56d76b69 5627 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
07beea0d 5628 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
8fe75e43
RH
5629 gimplify_and_add (t, pre_p);
5630 }
5631 if (needed_sseregs)
5632 {
4a90aeeb 5633 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
56d76b69 5634 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
07beea0d 5635 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
8fe75e43
RH
5636 gimplify_and_add (t, pre_p);
5637 }
e6e81735 5638
8fe75e43
RH
5639 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5640 gimplify_and_add (t, pre_p);
5641
5642 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5643 append_to_statement_list (t, pre_p);
3a3677ff 5644 }
b840bfb0 5645
8fe75e43 5646 /* ... otherwise out of the overflow area. */
e9e80858 5647
8fe75e43 5648 /* Care for on-stack alignment if needed. */
f5a7da0f
RG
5649 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5650 || integer_zerop (TYPE_SIZE (type)))
8fe75e43 5651 t = ovf;
5be014d5 5652 else
e9e80858 5653 {
8fe75e43 5654 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5be014d5
AP
5655 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5656 size_int (align - 1));
5657 t = fold_convert (sizetype, t);
47a25a46 5658 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5be014d5
AP
5659 size_int (-align));
5660 t = fold_convert (TREE_TYPE (ovf), t);
e9e80858 5661 }
8fe75e43 5662 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
e075ae69 5663
07beea0d 5664 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
8fe75e43 5665 gimplify_and_add (t2, pre_p);
e075ae69 5666
5be014d5
AP
5667 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5668 size_int (rsize * UNITS_PER_WORD));
07beea0d 5669 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
8fe75e43 5670 gimplify_and_add (t, pre_p);
e075ae69 5671
8fe75e43 5672 if (container)
2a2ab3f9 5673 {
8fe75e43
RH
5674 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5675 append_to_statement_list (t, pre_p);
2a2ab3f9 5676 }
e075ae69 5677
8fe75e43
RH
5678 ptrtype = build_pointer_type (type);
5679 addr = fold_convert (ptrtype, addr);
0a726ef1 5680
8fe75e43 5681 if (indirect_p)
c2433d7d
FCE
5682 addr = build_va_arg_indirect_ref (addr);
5683 return build_va_arg_indirect_ref (addr);
0a726ef1 5684}
8fe75e43
RH
5685\f
5686/* Return nonzero if OPNUM's MEM should be matched
5687 in movabs* patterns. */
fee2770d
RS
5688
5689int
8fe75e43 5690ix86_check_movabs (rtx insn, int opnum)
4f2c8ebb 5691{
8fe75e43 5692 rtx set, mem;
e075ae69 5693
8fe75e43
RH
5694 set = PATTERN (insn);
5695 if (GET_CODE (set) == PARALLEL)
5696 set = XVECEXP (set, 0, 0);
d0396b79 5697 gcc_assert (GET_CODE (set) == SET);
8fe75e43
RH
5698 mem = XEXP (set, opnum);
5699 while (GET_CODE (mem) == SUBREG)
5700 mem = SUBREG_REG (mem);
7656aee4 5701 gcc_assert (MEM_P (mem));
8fe75e43 5702 return (volatile_ok || !MEM_VOLATILE_P (mem));
2247f6ed 5703}
e075ae69 5704\f
881b2a96
RS
5705/* Initialize the table of extra 80387 mathematical constants. */
5706
5707static void
b96a374d 5708init_ext_80387_constants (void)
881b2a96
RS
5709{
5710 static const char * cst[5] =
5711 {
5712 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5713 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5714 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5715 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5716 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5717 };
5718 int i;
5719
5720 for (i = 0; i < 5; i++)
5721 {
5722 real_from_string (&ext_80387_constants_table[i], cst[i]);
5723 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 5724 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 5725 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
5726 }
5727
5728 ext_80387_constants_init = 1;
5729}
5730
e075ae69 5731/* Return true if the constant is something that can be loaded with
881b2a96 5732 a special instruction. */
57dbca5e
BS
5733
5734int
b96a374d 5735standard_80387_constant_p (rtx x)
57dbca5e 5736{
27ac40e2
UB
5737 enum machine_mode mode = GET_MODE (x);
5738
2e1f15bd
UB
5739 REAL_VALUE_TYPE r;
5740
27ac40e2 5741 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
e075ae69 5742 return -1;
881b2a96 5743
27ac40e2 5744 if (x == CONST0_RTX (mode))
2b04e52b 5745 return 1;
27ac40e2 5746 if (x == CONST1_RTX (mode))
2b04e52b 5747 return 2;
881b2a96 5748
2e1f15bd
UB
5749 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5750
22cc69c4
RS
5751 /* For XFmode constants, try to find a special 80387 instruction when
5752 optimizing for size or on those CPUs that benefit from them. */
27ac40e2 5753 if (mode == XFmode
80fd744f 5754 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
881b2a96 5755 {
881b2a96
RS
5756 int i;
5757
5758 if (! ext_80387_constants_init)
5759 init_ext_80387_constants ();
5760
881b2a96
RS
5761 for (i = 0; i < 5; i++)
5762 if (real_identical (&r, &ext_80387_constants_table[i]))
5763 return i + 3;
5764 }
5765
2e1f15bd
UB
5766 /* Load of the constant -0.0 or -1.0 will be split as
5767 fldz;fchs or fld1;fchs sequence. */
5768 if (real_isnegzero (&r))
5769 return 8;
5770 if (real_identical (&r, &dconstm1))
5771 return 9;
5772
e075ae69 5773 return 0;
57dbca5e
BS
5774}
5775
881b2a96
RS
5776/* Return the opcode of the special instruction to be used to load
5777 the constant X. */
5778
5779const char *
b96a374d 5780standard_80387_constant_opcode (rtx x)
881b2a96
RS
5781{
5782 switch (standard_80387_constant_p (x))
5783 {
b96a374d 5784 case 1:
881b2a96
RS
5785 return "fldz";
5786 case 2:
5787 return "fld1";
b96a374d 5788 case 3:
881b2a96
RS
5789 return "fldlg2";
5790 case 4:
5791 return "fldln2";
b96a374d 5792 case 5:
881b2a96
RS
5793 return "fldl2e";
5794 case 6:
5795 return "fldl2t";
b96a374d 5796 case 7:
881b2a96 5797 return "fldpi";
2e1f15bd
UB
5798 case 8:
5799 case 9:
5800 return "#";
d0396b79
NS
5801 default:
5802 gcc_unreachable ();
881b2a96 5803 }
881b2a96
RS
5804}
5805
5806/* Return the CONST_DOUBLE representing the 80387 constant that is
5807 loaded by the specified special instruction. The argument IDX
5808 matches the return value from standard_80387_constant_p. */
5809
5810rtx
b96a374d 5811standard_80387_constant_rtx (int idx)
881b2a96
RS
5812{
5813 int i;
5814
5815 if (! ext_80387_constants_init)
5816 init_ext_80387_constants ();
5817
5818 switch (idx)
5819 {
5820 case 3:
5821 case 4:
5822 case 5:
5823 case 6:
5824 case 7:
5825 i = idx - 3;
5826 break;
5827
5828 default:
d0396b79 5829 gcc_unreachable ();
881b2a96
RS
5830 }
5831
1f48e56d 5832 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 5833 XFmode);
881b2a96
RS
5834}
5835
5656a184
EC
5836/* Return 1 if mode is a valid mode for sse. */
5837static int
5838standard_sse_mode_p (enum machine_mode mode)
5839{
5840 switch (mode)
5841 {
5842 case V16QImode:
5843 case V8HImode:
5844 case V4SImode:
5845 case V2DImode:
5846 case V4SFmode:
5847 case V2DFmode:
5848 return 1;
5849
5850 default:
5851 return 0;
5852 }
5853}
5854
2b04e52b
JH
5855/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5856 */
5857int
b96a374d 5858standard_sse_constant_p (rtx x)
2b04e52b 5859{
5656a184
EC
5860 enum machine_mode mode = GET_MODE (x);
5861
5862 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
0e67d460 5863 return 1;
5656a184
EC
5864 if (vector_all_ones_operand (x, mode)
5865 && standard_sse_mode_p (mode))
5866 return TARGET_SSE2 ? 2 : -1;
5867
5868 return 0;
5869}
5870
5871/* Return the opcode of the special instruction to be used to load
5872 the constant X. */
5873
5874const char *
5875standard_sse_constant_opcode (rtx insn, rtx x)
5876{
5877 switch (standard_sse_constant_p (x))
5878 {
5879 case 1:
5880 if (get_attr_mode (insn) == MODE_V4SF)
5881 return "xorps\t%0, %0";
5882 else if (get_attr_mode (insn) == MODE_V2DF)
5883 return "xorpd\t%0, %0";
5884 else
5885 return "pxor\t%0, %0";
5886 case 2:
5887 return "pcmpeqd\t%0, %0";
5888 }
5889 gcc_unreachable ();
2b04e52b
JH
5890}
5891
2a2ab3f9
JVA
5892/* Returns 1 if OP contains a symbol reference */
5893
5894int
b96a374d 5895symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 5896{
8d531ab9
KH
5897 const char *fmt;
5898 int i;
2a2ab3f9
JVA
5899
5900 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5901 return 1;
5902
5903 fmt = GET_RTX_FORMAT (GET_CODE (op));
5904 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5905 {
5906 if (fmt[i] == 'E')
5907 {
8d531ab9 5908 int j;
2a2ab3f9
JVA
5909
5910 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5911 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5912 return 1;
5913 }
e9a25f70 5914
2a2ab3f9
JVA
5915 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5916 return 1;
5917 }
5918
5919 return 0;
5920}
e075ae69
RH
5921
5922/* Return 1 if it is appropriate to emit `ret' instructions in the
5923 body of a function. Do this only if the epilogue is simple, needing a
5924 couple of insns. Prior to reloading, we can't tell how many registers
5925 must be saved, so return 0 then. Return 0 if there is no frame
6e14af16 5926 marker to de-allocate. */
32b5b1aa
SC
5927
5928int
b96a374d 5929ix86_can_use_return_insn_p (void)
32b5b1aa 5930{
4dd2ac2c 5931 struct ix86_frame frame;
9a7372d6 5932
9a7372d6
RH
5933 if (! reload_completed || frame_pointer_needed)
5934 return 0;
32b5b1aa 5935
9a7372d6
RH
5936 /* Don't allow more than 32 pop, since that's all we can do
5937 with one instruction. */
38173d38
JH
5938 if (crtl->args.pops_args
5939 && crtl->args.size >= 32768)
e075ae69 5940 return 0;
32b5b1aa 5941
4dd2ac2c
JH
5942 ix86_compute_frame_layout (&frame);
5943 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 5944}
6189a572 5945\f
6fca22eb
RH
5946/* Value should be nonzero if functions must have frame pointers.
5947 Zero means the frame pointer need not be set up (and parms may
5948 be accessed via the stack pointer) in functions that seem suitable. */
5949
5950int
b96a374d 5951ix86_frame_pointer_required (void)
6fca22eb
RH
5952{
5953 /* If we accessed previous frames, then the generated code expects
5954 to be able to access the saved ebp value in our frame. */
5955 if (cfun->machine->accesses_prev_frame)
5956 return 1;
a4f31c00 5957
6fca22eb
RH
5958 /* Several x86 os'es need a frame pointer for other reasons,
5959 usually pertaining to setjmp. */
5960 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5961 return 1;
5962
5963 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5964 the frame pointer by default. Turn it back on now if we've not
5965 got a leaf function. */
a7943381 5966 if (TARGET_OMIT_LEAF_FRAME_POINTER
5bf5a10b
AO
5967 && (!current_function_is_leaf
5968 || ix86_current_function_calls_tls_descriptor))
55ba61f3
JH
5969 return 1;
5970
e3b5732b 5971 if (crtl->profile)
6fca22eb
RH
5972 return 1;
5973
5974 return 0;
5975}
5976
5977/* Record that the current function accesses previous call frames. */
5978
5979void
b96a374d 5980ix86_setup_frame_addresses (void)
6fca22eb
RH
5981{
5982 cfun->machine->accesses_prev_frame = 1;
5983}
e075ae69 5984\f
7d072037 5985#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
145aacc2
RH
5986# define USE_HIDDEN_LINKONCE 1
5987#else
5988# define USE_HIDDEN_LINKONCE 0
5989#endif
5990
bd09bdeb 5991static int pic_labels_used;
e9a25f70 5992
145aacc2
RH
5993/* Fills in the label name that should be used for a pc thunk for
5994 the given register. */
5995
5996static void
b96a374d 5997get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2 5998{
f7288899
EC
5999 gcc_assert (!TARGET_64BIT);
6000
145aacc2
RH
6001 if (USE_HIDDEN_LINKONCE)
6002 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6003 else
6004 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6005}
6006
6007
e075ae69
RH
6008/* This function generates code for -fpic that loads %ebx with
6009 the return address of the caller and then returns. */
6010
6011void
b96a374d 6012ix86_file_end (void)
e075ae69
RH
6013{
6014 rtx xops[2];
bd09bdeb 6015 int regno;
32b5b1aa 6016
bd09bdeb 6017 for (regno = 0; regno < 8; ++regno)
7c262518 6018 {
145aacc2
RH
6019 char name[32];
6020
bd09bdeb
RH
6021 if (! ((pic_labels_used >> regno) & 1))
6022 continue;
6023
145aacc2 6024 get_pc_thunk_name (name, regno);
bd09bdeb 6025
7d072037
SH
6026#if TARGET_MACHO
6027 if (TARGET_MACHO)
6028 {
6029 switch_to_section (darwin_sections[text_coal_section]);
6030 fputs ("\t.weak_definition\t", asm_out_file);
6031 assemble_name (asm_out_file, name);
6032 fputs ("\n\t.private_extern\t", asm_out_file);
6033 assemble_name (asm_out_file, name);
6034 fputs ("\n", asm_out_file);
6035 ASM_OUTPUT_LABEL (asm_out_file, name);
6036 }
6037 else
6038#endif
145aacc2
RH
6039 if (USE_HIDDEN_LINKONCE)
6040 {
6041 tree decl;
6042
6043 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6044 error_mark_node);
6045 TREE_PUBLIC (decl) = 1;
6046 TREE_STATIC (decl) = 1;
6047 DECL_ONE_ONLY (decl) = 1;
6048
6049 (*targetm.asm_out.unique_section) (decl, 0);
d6b5193b 6050 switch_to_section (get_named_section (decl, NULL, 0));
145aacc2 6051
a5fe455b
ZW
6052 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6053 fputs ("\t.hidden\t", asm_out_file);
6054 assemble_name (asm_out_file, name);
6055 fputc ('\n', asm_out_file);
6056 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
6057 }
6058 else
6059 {
d6b5193b 6060 switch_to_section (text_section);
a5fe455b 6061 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 6062 }
18efb179
KT
6063
6064 xops[0] = gen_rtx_REG (Pmode, regno);
6065 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
999d3194 6066 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
18efb179 6067 output_asm_insn ("ret", xops);
7c262518 6068 }
3edc56a9 6069
a5fe455b
ZW
6070 if (NEED_INDICATE_EXEC_STACK)
6071 file_end_indicate_exec_stack ();
32b5b1aa 6072}
32b5b1aa 6073
c8c03509 6074/* Emit code for the SET_GOT patterns. */
32b5b1aa 6075
c8c03509 6076const char *
7d072037 6077output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
c8c03509
RH
6078{
6079 rtx xops[3];
0d7d98ee 6080
c8c03509 6081 xops[0] = dest;
170bdaba
RS
6082
6083 if (TARGET_VXWORKS_RTP && flag_pic)
6084 {
6085 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6086 xops[2] = gen_rtx_MEM (Pmode,
6087 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6088 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6089
6090 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6091 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6092 an unadorned address. */
6093 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6094 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6095 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6096 return "";
6097 }
6098
5fc0e5df 6099 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 6100
c8c03509 6101 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 6102 {
7d072037 6103 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
c8c03509
RH
6104
6105 if (!flag_pic)
999d3194 6106 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
c8c03509
RH
6107 else
6108 output_asm_insn ("call\t%a2", xops);
6109
b069de3b 6110#if TARGET_MACHO
7d072037
SH
6111 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6112 is what will be referenced by the Mach-O PIC subsystem. */
6113 if (!label)
6114 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
b069de3b 6115#endif
7d072037 6116
4977bab6 6117 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
6118 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6119
6120 if (flag_pic)
999d3194 6121 output_asm_insn ("pop%z0\t%0", xops);
32b5b1aa 6122 }
e075ae69 6123 else
e5cb57e8 6124 {
145aacc2
RH
6125 char name[32];
6126 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 6127 pic_labels_used |= 1 << REGNO (dest);
f996902d 6128
145aacc2 6129 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
6130 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6131 output_asm_insn ("call\t%X2", xops);
7d072037
SH
6132 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6133 is what will be referenced by the Mach-O PIC subsystem. */
6134#if TARGET_MACHO
6135 if (!label)
6136 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
9795d9fd
AP
6137 else
6138 targetm.asm_out.internal_label (asm_out_file, "L",
6139 CODE_LABEL_NUMBER (label));
7d072037 6140#endif
e5cb57e8 6141 }
e5cb57e8 6142
7d072037
SH
6143 if (TARGET_MACHO)
6144 return "";
6145
c8c03509 6146 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
999d3194 6147 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7d072037 6148 else
999d3194 6149 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
79325812 6150
c8c03509 6151 return "";
e9a25f70 6152}
8dfe5673 6153
0d7d98ee 6154/* Generate an "push" pattern for input ARG. */
e9a25f70 6155
e075ae69 6156static rtx
b96a374d 6157gen_push (rtx arg)
e9a25f70 6158{
c5c76735 6159 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
6160 gen_rtx_MEM (Pmode,
6161 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
6162 stack_pointer_rtx)),
6163 arg);
e9a25f70
JL
6164}
6165
bd09bdeb
RH
6166/* Return >= 0 if there is an unused call-clobbered register available
6167 for the entire function. */
6168
6169static unsigned int
b96a374d 6170ix86_select_alt_pic_regnum (void)
bd09bdeb 6171{
e3b5732b 6172 if (current_function_is_leaf && !crtl->profile
5bf5a10b 6173 && !ix86_current_function_calls_tls_descriptor)
bd09bdeb
RH
6174 {
6175 int i;
6176 for (i = 2; i >= 0; --i)
6fb5fa3c 6177 if (!df_regs_ever_live_p (i))
bd09bdeb
RH
6178 return i;
6179 }
6180
6181 return INVALID_REGNUM;
6182}
fce5a9f2 6183
4dd2ac2c
JH
6184/* Return 1 if we need to save REGNO. */
6185static int
b96a374d 6186ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 6187{
bd09bdeb
RH
6188 if (pic_offset_table_rtx
6189 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6fb5fa3c 6190 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
e3b5732b
JH
6191 || crtl->profile
6192 || crtl->calls_eh_return
6193 || crtl->uses_const_pool))
bd09bdeb
RH
6194 {
6195 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6196 return 0;
6197 return 1;
6198 }
1020a5ab 6199
e3b5732b 6200 if (crtl->calls_eh_return && maybe_eh_return)
1020a5ab
RH
6201 {
6202 unsigned i;
6203 for (i = 0; ; i++)
6204 {
b531087a 6205 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
6206 if (test == INVALID_REGNUM)
6207 break;
9b690711 6208 if (test == regno)
1020a5ab
RH
6209 return 1;
6210 }
6211 }
4dd2ac2c 6212
150cdc9e
RH
6213 if (cfun->machine->force_align_arg_pointer
6214 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6215 return 1;
6216
6fb5fa3c 6217 return (df_regs_ever_live_p (regno)
1020a5ab
RH
6218 && !call_used_regs[regno]
6219 && !fixed_regs[regno]
6220 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
6221}
6222
0903fcab
JH
6223/* Return number of registers to be saved on the stack. */
6224
6225static int
b96a374d 6226ix86_nsaved_regs (void)
0903fcab
JH
6227{
6228 int nregs = 0;
0903fcab
JH
6229 int regno;
6230
4dd2ac2c 6231 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 6232 if (ix86_save_reg (regno, true))
4dd2ac2c 6233 nregs++;
0903fcab
JH
6234 return nregs;
6235}
6236
6237/* Return the offset between two registers, one to be eliminated, and the other
6238 its replacement, at the start of a routine. */
6239
6240HOST_WIDE_INT
b96a374d 6241ix86_initial_elimination_offset (int from, int to)
0903fcab 6242{
4dd2ac2c
JH
6243 struct ix86_frame frame;
6244 ix86_compute_frame_layout (&frame);
564d80f4
JH
6245
6246 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 6247 return frame.hard_frame_pointer_offset;
564d80f4
JH
6248 else if (from == FRAME_POINTER_REGNUM
6249 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 6250 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
6251 else
6252 {
d0396b79
NS
6253 gcc_assert (to == STACK_POINTER_REGNUM);
6254
6255 if (from == ARG_POINTER_REGNUM)
4dd2ac2c 6256 return frame.stack_pointer_offset;
5656a184 6257
d0396b79
NS
6258 gcc_assert (from == FRAME_POINTER_REGNUM);
6259 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
6260 }
6261}
6262
4dd2ac2c 6263/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 6264
4dd2ac2c 6265static void
b96a374d 6266ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 6267{
65954bd8 6268 HOST_WIDE_INT total_size;
95899b34 6269 unsigned int stack_alignment_needed;
b19ee4bd 6270 HOST_WIDE_INT offset;
95899b34 6271 unsigned int preferred_alignment;
4dd2ac2c 6272 HOST_WIDE_INT size = get_frame_size ();
65954bd8 6273
4dd2ac2c 6274 frame->nregs = ix86_nsaved_regs ();
564d80f4 6275 total_size = size;
65954bd8 6276
cb91fab0
JH
6277 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6278 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
95899b34 6279
d7394366
JH
6280 /* During reload iteration the amount of registers saved can change.
6281 Recompute the value as needed. Do not recompute when amount of registers
aabcd309 6282 didn't change as reload does multiple calls to the function and does not
d7394366
JH
6283 expect the decision to change within single iteration. */
6284 if (!optimize_size
6285 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
6286 {
6287 int count = frame->nregs;
6288
d7394366 6289 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
6290 /* The fast prologue uses move instead of push to save registers. This
6291 is significantly longer, but also executes faster as modern hardware
6292 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 6293
d9b40e8d
JH
6294 Be careful about choosing what prologue to emit: When function takes
6295 many instructions to execute we may use slow version as well as in
6296 case function is known to be outside hot spot (this is known with
6297 feedback only). Weight the size of function by number of registers
6298 to save as it is cheap to use one or two push instructions but very
6299 slow to use many of them. */
6300 if (count)
6301 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6302 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6303 || (flag_branch_probabilities
6304 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6305 cfun->machine->use_fast_prologue_epilogue = false;
6306 else
6307 cfun->machine->use_fast_prologue_epilogue
6308 = !expensive_function_p (count);
6309 }
6310 if (TARGET_PROLOGUE_USING_MOVE
6311 && cfun->machine->use_fast_prologue_epilogue)
6312 frame->save_regs_using_mov = true;
6313 else
6314 frame->save_regs_using_mov = false;
6315
6316
9ba81eaa 6317 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
6318 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6319
6320 frame->hard_frame_pointer_offset = offset;
564d80f4 6321
fcbfaa65
RK
6322 /* Do some sanity checking of stack_alignment_needed and
6323 preferred_alignment, since i386 port is the only using those features
f710504c 6324 that may break easily. */
564d80f4 6325
d0396b79
NS
6326 gcc_assert (!size || stack_alignment_needed);
6327 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6328 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6329 gcc_assert (stack_alignment_needed
6330 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
564d80f4 6331
4dd2ac2c
JH
6332 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6333 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 6334
4dd2ac2c
JH
6335 /* Register save area */
6336 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 6337
8362f420
JH
6338 /* Va-arg area */
6339 if (ix86_save_varrargs_registers)
6340 {
6341 offset += X86_64_VARARGS_SIZE;
6342 frame->va_arg_size = X86_64_VARARGS_SIZE;
6343 }
6344 else
6345 frame->va_arg_size = 0;
6346
4dd2ac2c
JH
6347 /* Align start of frame for local function. */
6348 frame->padding1 = ((offset + stack_alignment_needed - 1)
6349 & -stack_alignment_needed) - offset;
f73ad30e 6350
4dd2ac2c 6351 offset += frame->padding1;
65954bd8 6352
4dd2ac2c
JH
6353 /* Frame pointer points here. */
6354 frame->frame_pointer_offset = offset;
54ff41b7 6355
4dd2ac2c 6356 offset += size;
65954bd8 6357
0b7ae565 6358 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
6359 all the function calls as dead code.
6360 Skipping is however impossible when function calls alloca. Alloca
38173d38 6361 expander assumes that last crtl->outgoing_args_size
965514bd
JH
6362 of stack frame are unused. */
6363 if (ACCUMULATE_OUTGOING_ARGS
e3b5732b 6364 && (!current_function_is_leaf || cfun->calls_alloca
5bf5a10b 6365 || ix86_current_function_calls_tls_descriptor))
4dd2ac2c 6366 {
38173d38
JH
6367 offset += crtl->outgoing_args_size;
6368 frame->outgoing_arguments_size = crtl->outgoing_args_size;
4dd2ac2c
JH
6369 }
6370 else
6371 frame->outgoing_arguments_size = 0;
564d80f4 6372
002ff5bc
RH
6373 /* Align stack boundary. Only needed if we're calling another function
6374 or using alloca. */
e3b5732b 6375 if (!current_function_is_leaf || cfun->calls_alloca
5bf5a10b 6376 || ix86_current_function_calls_tls_descriptor)
0b7ae565
RH
6377 frame->padding2 = ((offset + preferred_alignment - 1)
6378 & -preferred_alignment) - offset;
6379 else
6380 frame->padding2 = 0;
4dd2ac2c
JH
6381
6382 offset += frame->padding2;
6383
6384 /* We've reached end of stack frame. */
6385 frame->stack_pointer_offset = offset;
6386
6387 /* Size prologue needs to allocate. */
6388 frame->to_allocate =
6389 (size + frame->padding1 + frame->padding2
8362f420 6390 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 6391
b19ee4bd
JJ
6392 if ((!frame->to_allocate && frame->nregs <= 1)
6393 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
6394 frame->save_regs_using_mov = false;
6395
394a378c 6396 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
5bf5a10b
AO
6397 && current_function_is_leaf
6398 && !ix86_current_function_calls_tls_descriptor)
8362f420
JH
6399 {
6400 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
6401 if (frame->save_regs_using_mov)
6402 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
6403 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6404 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6405 }
6406 else
6407 frame->red_zone_size = 0;
6408 frame->to_allocate -= frame->red_zone_size;
6409 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c 6410#if 0
7874f14d
MM
6411 fprintf (stderr, "\n");
6412 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6413 fprintf (stderr, "size: %ld\n", (long)size);
6414 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6415 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6416 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6417 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6418 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6419 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6420 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6421 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6422 (long)frame->hard_frame_pointer_offset);
6423 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6424 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
e3b5732b 6425 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7874f14d 6426 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
4dd2ac2c 6427#endif
65954bd8
JL
6428}
6429
0903fcab
JH
6430/* Emit code to save registers in the prologue. */
6431
6432static void
b96a374d 6433ix86_emit_save_regs (void)
0903fcab 6434{
150cdc9e 6435 unsigned int regno;
0903fcab 6436 rtx insn;
0903fcab 6437
150cdc9e 6438 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
1020a5ab 6439 if (ix86_save_reg (regno, true))
0903fcab 6440 {
0d7d98ee 6441 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
6442 RTX_FRAME_RELATED_P (insn) = 1;
6443 }
6444}
6445
c6036a37
JH
6446/* Emit code to save registers using MOV insns. First register
6447 is restored from POINTER + OFFSET. */
6448static void
b96a374d 6449ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37 6450{
150cdc9e 6451 unsigned int regno;
c6036a37
JH
6452 rtx insn;
6453
6454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6455 if (ix86_save_reg (regno, true))
6456 {
b72f00af
RK
6457 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6458 Pmode, offset),
c6036a37
JH
6459 gen_rtx_REG (Pmode, regno));
6460 RTX_FRAME_RELATED_P (insn) = 1;
6461 offset += UNITS_PER_WORD;
6462 }
6463}
6464
839a4992 6465/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
6466 The pattern exist to put a dependency on all ebp-based memory accesses.
6467 STYLE should be negative if instructions should be marked as frame related,
6468 zero if %r11 register is live and cannot be freely used and positive
6469 otherwise. */
6470
6471static void
6472pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6473{
6474 rtx insn;
6475
6476 if (! TARGET_64BIT)
6477 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6478 else if (x86_64_immediate_operand (offset, DImode))
6479 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6480 else
6481 {
6482 rtx r11;
6483 /* r11 is used by indirect sibcall return as well, set before the
6484 epilogue and used after the epilogue. ATM indirect sibcall
6485 shouldn't be used together with huge frame sizes in one
6486 function because of the frame_size check in sibcall.c. */
d0396b79 6487 gcc_assert (style);
3c4ace25 6488 r11 = gen_rtx_REG (DImode, R11_REG);
b19ee4bd
JJ
6489 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6490 if (style < 0)
6491 RTX_FRAME_RELATED_P (insn) = 1;
6492 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6493 offset));
6494 }
6495 if (style < 0)
6496 RTX_FRAME_RELATED_P (insn) = 1;
6497}
6498
150cdc9e
RH
6499/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6500
6501static rtx
6502ix86_internal_arg_pointer (void)
6503{
33932946
SH
6504 bool has_force_align_arg_pointer =
6505 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6506 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6507 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6508 && DECL_NAME (current_function_decl)
6509 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6510 && DECL_FILE_SCOPE_P (current_function_decl))
6511 || ix86_force_align_arg_pointer
6512 || has_force_align_arg_pointer)
150cdc9e 6513 {
33932946
SH
6514 /* Nested functions can't realign the stack due to a register
6515 conflict. */
6516 if (DECL_CONTEXT (current_function_decl)
6517 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6518 {
6519 if (ix86_force_align_arg_pointer)
6520 warning (0, "-mstackrealign ignored for nested functions");
6521 if (has_force_align_arg_pointer)
6522 error ("%s not supported for nested functions",
6523 ix86_force_align_arg_pointer_string);
6524 return virtual_incoming_args_rtx;
6525 }
29b74761 6526 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
150cdc9e
RH
6527 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6528 }
6529 else
6530 return virtual_incoming_args_rtx;
6531}
6532
6533/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6534 This is called from dwarf2out.c to emit call frame instructions
6535 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6536static void
6537ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6538{
6539 rtx unspec = SET_SRC (pattern);
6540 gcc_assert (GET_CODE (unspec) == UNSPEC);
6541
6542 switch (index)
6543 {
6544 case UNSPEC_REG_SAVE:
6545 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6546 SET_DEST (pattern));
6547 break;
6548 case UNSPEC_DEF_CFA:
6549 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6550 INTVAL (XVECEXP (unspec, 0, 0)));
6551 break;
6552 default:
6553 gcc_unreachable ();
6554 }
6555}
6556
0f290768 6557/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
6558
6559void
b96a374d 6560ix86_expand_prologue (void)
2a2ab3f9 6561{
564d80f4 6562 rtx insn;
bd09bdeb 6563 bool pic_reg_used;
4dd2ac2c 6564 struct ix86_frame frame;
c6036a37 6565 HOST_WIDE_INT allocate;
4dd2ac2c 6566
4977bab6 6567 ix86_compute_frame_layout (&frame);
79325812 6568
150cdc9e
RH
6569 if (cfun->machine->force_align_arg_pointer)
6570 {
6571 rtx x, y;
6572
6573 /* Grab the argument pointer. */
6574 x = plus_constant (stack_pointer_rtx, 4);
6575 y = cfun->machine->force_align_arg_pointer;
6576 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6577 RTX_FRAME_RELATED_P (insn) = 1;
6578
6579 /* The unwind info consists of two parts: install the fafp as the cfa,
6580 and record the fafp as the "save register" of the stack pointer.
6581 The later is there in order that the unwinder can see where it
6582 should restore the stack pointer across the and insn. */
6583 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6584 x = gen_rtx_SET (VOIDmode, y, x);
6585 RTX_FRAME_RELATED_P (x) = 1;
6586 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6587 UNSPEC_REG_SAVE);
6588 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6589 RTX_FRAME_RELATED_P (y) = 1;
6590 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6591 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6592 REG_NOTES (insn) = x;
6593
6594 /* Align the stack. */
6595 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6596 GEN_INT (-16)));
6597
6598 /* And here we cheat like madmen with the unwind info. We force the
6599 cfa register back to sp+4, which is exactly what it was at the
6600 start of the function. Re-pushing the return address results in
5656a184 6601 the return at the same spot relative to the cfa, and thus is
150cdc9e
RH
6602 correct wrt the unwind info. */
6603 x = cfun->machine->force_align_arg_pointer;
6604 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6605 insn = emit_insn (gen_push (x));
6606 RTX_FRAME_RELATED_P (insn) = 1;
6607
6608 x = GEN_INT (4);
6609 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6610 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6611 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6612 REG_NOTES (insn) = x;
6613 }
6614
e075ae69
RH
6615 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6616 slower on all targets. Also sdb doesn't like it. */
e9a25f70 6617
2a2ab3f9
JVA
6618 if (frame_pointer_needed)
6619 {
564d80f4 6620 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 6621 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 6622
564d80f4 6623 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 6624 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
6625 }
6626
c6036a37 6627 allocate = frame.to_allocate;
c6036a37 6628
d9b40e8d 6629 if (!frame.save_regs_using_mov)
c6036a37
JH
6630 ix86_emit_save_regs ();
6631 else
6632 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 6633
d9b40e8d 6634 /* When using red zone we may start register saving before allocating
6893e828
AN
6635 the stack frame saving one cycle of the prologue. However I will
6636 avoid doing this if I am going to have to probe the stack since
6637 at least on x86_64 the stack probe can turn into a call that clobbers
6638 a red zone location */
394a378c 6639 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
6893e828 6640 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
d9b40e8d
JH
6641 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6642 : stack_pointer_rtx,
6643 -frame.nregs * UNITS_PER_WORD);
6644
c6036a37 6645 if (allocate == 0)
8dfe5673 6646 ;
e323735c 6647 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
6648 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6649 GEN_INT (-allocate), -1);
79325812 6650 else
8dfe5673 6651 {
fe9f516f 6652 /* Only valid for Win32. */
29b74761 6653 rtx eax = gen_rtx_REG (Pmode, AX_REG);
ccf8e764 6654 bool eax_live;
5fc94ac4 6655 rtx t;
e9a25f70 6656
7c800926 6657 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
ccf8e764 6658
7c800926 6659 if (cfun->machine->call_abi == MS_ABI)
ccf8e764
RH
6660 eax_live = false;
6661 else
6662 eax_live = ix86_eax_live_at_start_p ();
e075ae69 6663
fe9f516f
RH
6664 if (eax_live)
6665 {
6666 emit_insn (gen_push (eax));
ccf8e764 6667 allocate -= UNITS_PER_WORD;
fe9f516f
RH
6668 }
6669
5fc94ac4 6670 emit_move_insn (eax, GEN_INT (allocate));
98417968 6671
ccf8e764
RH
6672 if (TARGET_64BIT)
6673 insn = gen_allocate_stack_worker_64 (eax);
6674 else
6675 insn = gen_allocate_stack_worker_32 (eax);
6676 insn = emit_insn (insn);
b1177d69 6677 RTX_FRAME_RELATED_P (insn) = 1;
5fc94ac4
RH
6678 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6679 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6680 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6681 t, REG_NOTES (insn));
fe9f516f
RH
6682
6683 if (eax_live)
6684 {
ea5f7a19
RS
6685 if (frame_pointer_needed)
6686 t = plus_constant (hard_frame_pointer_rtx,
6687 allocate
6688 - frame.to_allocate
6689 - frame.nregs * UNITS_PER_WORD);
6690 else
6691 t = plus_constant (stack_pointer_rtx, allocate);
ccf8e764 6692 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
fe9f516f 6693 }
e075ae69 6694 }
fe9f516f 6695
6893e828 6696 if (frame.save_regs_using_mov
394a378c 6697 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
6893e828 6698 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
c6036a37
JH
6699 {
6700 if (!frame_pointer_needed || !frame.to_allocate)
6701 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6702 else
6703 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6704 -frame.nregs * UNITS_PER_WORD);
6705 }
e9a25f70 6706
bd09bdeb
RH
6707 pic_reg_used = false;
6708 if (pic_offset_table_rtx
6fb5fa3c 6709 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
e3b5732b 6710 || crtl->profile))
bd09bdeb
RH
6711 {
6712 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6713
6714 if (alt_pic_reg_used != INVALID_REGNUM)
6fb5fa3c 6715 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
bd09bdeb
RH
6716
6717 pic_reg_used = true;
6718 }
6719
e9a25f70 6720 if (pic_reg_used)
c8c03509 6721 {
7dcbf659 6722 if (TARGET_64BIT)
dc4d7240
JH
6723 {
6724 if (ix86_cmodel == CM_LARGE_PIC)
6725 {
29b74761 6726 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
dc4d7240
JH
6727 rtx label = gen_label_rtx ();
6728 emit_label (label);
6729 LABEL_PRESERVE_P (label) = 1;
6730 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6731 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
dc4d7240 6732 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
dc4d7240
JH
6733 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6734 pic_offset_table_rtx, tmp_reg));
6735 }
6736 else
6737 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6738 }
7dcbf659
JH
6739 else
6740 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
c8c03509 6741 }
77a989d1 6742
8660aaae 6743 /* Prevent function calls from being scheduled before the call to mcount.
66edd3b4 6744 In the pic_reg_used case, make sure that the got load isn't deleted. */
e3b5732b 6745 if (crtl->profile)
6fb5fa3c
DB
6746 {
6747 if (pic_reg_used)
6748 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6749 emit_insn (gen_blockage ());
6750 }
922e3e33
UB
6751
6752 /* Emit cld instruction if stringops are used in the function. */
6753 if (TARGET_CLD && ix86_current_function_needs_cld)
6754 emit_insn (gen_cld ());
77a989d1
SC
6755}
6756
da2d1d3a
JH
6757/* Emit code to restore saved registers using MOV insns. First register
6758 is restored from POINTER + OFFSET. */
6759static void
72613dfa
JH
6760ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6761 int maybe_eh_return)
da2d1d3a
JH
6762{
6763 int regno;
72613dfa 6764 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 6765
4dd2ac2c 6766 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 6767 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 6768 {
72613dfa
JH
6769 /* Ensure that adjust_address won't be forced to produce pointer
6770 out of range allowed by x86-64 instruction set. */
6771 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6772 {
6773 rtx r11;
6774
3c4ace25 6775 r11 = gen_rtx_REG (DImode, R11_REG);
72613dfa
JH
6776 emit_move_insn (r11, GEN_INT (offset));
6777 emit_insn (gen_adddi3 (r11, r11, pointer));
6778 base_address = gen_rtx_MEM (Pmode, r11);
6779 offset = 0;
6780 }
4dd2ac2c 6781 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 6782 adjust_address (base_address, Pmode, offset));
4dd2ac2c 6783 offset += UNITS_PER_WORD;
da2d1d3a
JH
6784 }
6785}
6786
0f290768 6787/* Restore function stack, frame, and registers. */
e9a25f70 6788
2a2ab3f9 6789void
b96a374d 6790ix86_expand_epilogue (int style)
2a2ab3f9 6791{
1c71e60e 6792 int regno;
fdb8a883 6793 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 6794 struct ix86_frame frame;
65954bd8 6795 HOST_WIDE_INT offset;
4dd2ac2c
JH
6796
6797 ix86_compute_frame_layout (&frame);
2a2ab3f9 6798
a4f31c00 6799 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
6800 must be taken for the normal return case of a function using
6801 eh_return: the eax and edx registers are marked as saved, but not
6802 restored along this path. */
6803 offset = frame.nregs;
e3b5732b 6804 if (crtl->calls_eh_return && style != 2)
84e306b4
RH
6805 offset -= 2;
6806 offset *= -UNITS_PER_WORD;
2a2ab3f9 6807
fdb8a883
JW
6808 /* If we're only restoring one register and sp is not valid then
6809 using a move instruction to restore the register since it's
0f290768 6810 less work than reloading sp and popping the register.
da2d1d3a
JH
6811
6812 The default code result in stack adjustment using add/lea instruction,
6813 while this code results in LEAVE instruction (or discrete equivalent),
6814 so it is profitable in some other cases as well. Especially when there
6815 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 6816 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 6817 tuning in future. */
4dd2ac2c 6818 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 6819 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 6820 && cfun->machine->use_fast_prologue_epilogue
c6036a37 6821 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 6822 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 6823 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
6824 && cfun->machine->use_fast_prologue_epilogue
6825 && frame.nregs == 1)
e3b5732b 6826 || crtl->calls_eh_return)
2a2ab3f9 6827 {
da2d1d3a
JH
6828 /* Restore registers. We can use ebp or esp to address the memory
6829 locations. If both are available, default to ebp, since offsets
6830 are known to be small. Only exception is esp pointing directly to the
6831 end of block of saved registers, where we may simplify addressing
6832 mode. */
6833
4dd2ac2c 6834 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
6835 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6836 frame.to_allocate, style == 2);
da2d1d3a 6837 else
1020a5ab
RH
6838 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6839 offset, style == 2);
6840
6841 /* eh_return epilogues need %ecx added to the stack pointer. */
6842 if (style == 2)
6843 {
6844 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 6845
1020a5ab
RH
6846 if (frame_pointer_needed)
6847 {
6848 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6849 tmp = plus_constant (tmp, UNITS_PER_WORD);
6850 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6851
6852 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6853 emit_move_insn (hard_frame_pointer_rtx, tmp);
6854
b19ee4bd
JJ
6855 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6856 const0_rtx, style);
1020a5ab
RH
6857 }
6858 else
6859 {
6860 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6861 tmp = plus_constant (tmp, (frame.to_allocate
6862 + frame.nregs * UNITS_PER_WORD));
6863 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6864 }
6865 }
6866 else if (!frame_pointer_needed)
b19ee4bd
JJ
6867 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6868 GEN_INT (frame.to_allocate
6869 + frame.nregs * UNITS_PER_WORD),
6870 style);
0f290768 6871 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
6872 else if (TARGET_USE_LEAVE || optimize_size
6873 || !cfun->machine->use_fast_prologue_epilogue)
999d3194 6874 emit_insn ((*ix86_gen_leave) ());
c8c5cb99 6875 else
2a2ab3f9 6876 {
b19ee4bd
JJ
6877 pro_epilogue_adjust_stack (stack_pointer_rtx,
6878 hard_frame_pointer_rtx,
6879 const0_rtx, style);
999d3194
L
6880
6881 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
e9a25f70
JL
6882 }
6883 }
1c71e60e 6884 else
68f654ec 6885 {
1c71e60e
JH
6886 /* First step is to deallocate the stack frame so that we can
6887 pop the registers. */
6888 if (!sp_valid)
6889 {
d0396b79 6890 gcc_assert (frame_pointer_needed);
b19ee4bd
JJ
6891 pro_epilogue_adjust_stack (stack_pointer_rtx,
6892 hard_frame_pointer_rtx,
6893 GEN_INT (offset), style);
1c71e60e 6894 }
4dd2ac2c 6895 else if (frame.to_allocate)
b19ee4bd
JJ
6896 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6897 GEN_INT (frame.to_allocate), style);
1c71e60e 6898
4dd2ac2c 6899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 6900 if (ix86_save_reg (regno, false))
999d3194 6901 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
4dd2ac2c 6902 if (frame_pointer_needed)
8362f420 6903 {
f5143c46 6904 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
6905 able to grok it fast. */
6906 if (TARGET_USE_LEAVE)
999d3194 6907 emit_insn ((*ix86_gen_leave) ());
8362f420 6908 else
999d3194 6909 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8362f420 6910 }
68f654ec 6911 }
68f654ec 6912
150cdc9e
RH
6913 if (cfun->machine->force_align_arg_pointer)
6914 {
6915 emit_insn (gen_addsi3 (stack_pointer_rtx,
6916 cfun->machine->force_align_arg_pointer,
6917 GEN_INT (-4)));
6918 }
6919
cbbf65e0 6920 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 6921 if (style == 0)
cbbf65e0
RH
6922 return;
6923
38173d38 6924 if (crtl->args.pops_args && crtl->args.size)
2a2ab3f9 6925 {
38173d38 6926 rtx popc = GEN_INT (crtl->args.pops_args);
2a2ab3f9 6927
b8c752c8
UD
6928 /* i386 can only pop 64K bytes. If asked to pop more, pop
6929 return address, do explicit add, and jump indirectly to the
0f290768 6930 caller. */
2a2ab3f9 6931
38173d38 6932 if (crtl->args.pops_args >= 65536)
2a2ab3f9 6933 {
29b74761 6934 rtx ecx = gen_rtx_REG (SImode, CX_REG);
e9a25f70 6935
ccf8e764 6936 /* There is no "pascal" calling convention in any 64bit ABI. */
d0396b79 6937 gcc_assert (!TARGET_64BIT);
8362f420 6938
e075ae69
RH
6939 emit_insn (gen_popsi1 (ecx));
6940 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 6941 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 6942 }
79325812 6943 else
e075ae69
RH
6944 emit_jump_insn (gen_return_pop_internal (popc));
6945 }
6946 else
6947 emit_jump_insn (gen_return_internal ());
6948}
bd09bdeb
RH
6949
6950/* Reset from the function's potential modifications. */
6951
6952static void
b96a374d
AJ
6953ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6954 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
6955{
6956 if (pic_offset_table_rtx)
6fb5fa3c 6957 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
f8c2645c
AL
6958#if TARGET_MACHO
6959 /* Mach-O doesn't support labels at the end of objects, so if
6960 it looks like we might want one, insert a NOP. */
6961 {
6962 rtx insn = get_last_insn ();
6963 while (insn
6964 && NOTE_P (insn)
a38e7aa5 6965 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
f8c2645c
AL
6966 insn = PREV_INSN (insn);
6967 if (insn
6968 && (LABEL_P (insn)
6969 || (NOTE_P (insn)
a38e7aa5 6970 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
f8c2645c
AL
6971 fputs ("\tnop\n", file);
6972 }
6973#endif
6974
bd09bdeb 6975}
e075ae69
RH
6976\f
6977/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
6978 for an instruction. Return 0 if the structure of the address is
6979 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 6980 strictly valid, but still used for computing length of lea instruction. */
e075ae69 6981
8fe75e43 6982int
8d531ab9 6983ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69 6984{
7c93c2cc
PB
6985 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6986 rtx base_reg, index_reg;
e075ae69
RH
6987 HOST_WIDE_INT scale = 1;
6988 rtx scale_rtx = NULL_RTX;
b446e5a2 6989 int retval = 1;
74dc3e94 6990 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 6991
7656aee4 6992 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
6993 base = addr;
6994 else if (GET_CODE (addr) == PLUS)
6995 {
74dc3e94
RH
6996 rtx addends[4], op;
6997 int n = 0, i;
e075ae69 6998
74dc3e94
RH
6999 op = addr;
7000 do
e075ae69 7001 {
74dc3e94
RH
7002 if (n >= 4)
7003 return 0;
7004 addends[n++] = XEXP (op, 1);
7005 op = XEXP (op, 0);
2a2ab3f9 7006 }
74dc3e94
RH
7007 while (GET_CODE (op) == PLUS);
7008 if (n >= 4)
7009 return 0;
7010 addends[n] = op;
7011
7012 for (i = n; i >= 0; --i)
e075ae69 7013 {
74dc3e94
RH
7014 op = addends[i];
7015 switch (GET_CODE (op))
7016 {
7017 case MULT:
7018 if (index)
7019 return 0;
7020 index = XEXP (op, 0);
7021 scale_rtx = XEXP (op, 1);
7022 break;
7023
7024 case UNSPEC:
7025 if (XINT (op, 1) == UNSPEC_TP
7026 && TARGET_TLS_DIRECT_SEG_REFS
7027 && seg == SEG_DEFAULT)
7028 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7029 else
7030 return 0;
7031 break;
7032
7033 case REG:
7034 case SUBREG:
7035 if (!base)
7036 base = op;
7037 else if (!index)
7038 index = op;
7039 else
7040 return 0;
7041 break;
7042
7043 case CONST:
7044 case CONST_INT:
7045 case SYMBOL_REF:
7046 case LABEL_REF:
7047 if (disp)
7048 return 0;
7049 disp = op;
7050 break;
7051
7052 default:
7053 return 0;
7054 }
e075ae69 7055 }
e075ae69
RH
7056 }
7057 else if (GET_CODE (addr) == MULT)
7058 {
7059 index = XEXP (addr, 0); /* index*scale */
7060 scale_rtx = XEXP (addr, 1);
7061 }
7062 else if (GET_CODE (addr) == ASHIFT)
7063 {
7064 rtx tmp;
7065
7066 /* We're called for lea too, which implements ashift on occasion. */
7067 index = XEXP (addr, 0);
7068 tmp = XEXP (addr, 1);
7656aee4 7069 if (!CONST_INT_P (tmp))
b446e5a2 7070 return 0;
e075ae69
RH
7071 scale = INTVAL (tmp);
7072 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 7073 return 0;
e075ae69 7074 scale = 1 << scale;
b446e5a2 7075 retval = -1;
2a2ab3f9 7076 }
2a2ab3f9 7077 else
e075ae69
RH
7078 disp = addr; /* displacement */
7079
7080 /* Extract the integral value of scale. */
7081 if (scale_rtx)
e9a25f70 7082 {
7656aee4 7083 if (!CONST_INT_P (scale_rtx))
b446e5a2 7084 return 0;
e075ae69 7085 scale = INTVAL (scale_rtx);
e9a25f70 7086 }
3b3c6a3f 7087
7c93c2cc
PB
7088 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
7089 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
7090
74dc3e94 7091 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7c93c2cc
PB
7092 if (base_reg && index_reg && scale == 1
7093 && (index_reg == arg_pointer_rtx
7094 || index_reg == frame_pointer_rtx
7095 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
e075ae69 7096 {
7c93c2cc
PB
7097 rtx tmp;
7098 tmp = base, base = index, index = tmp;
7099 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
e075ae69
RH
7100 }
7101
7102 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7c93c2cc
PB
7103 if ((base_reg == hard_frame_pointer_rtx
7104 || base_reg == frame_pointer_rtx
7105 || base_reg == arg_pointer_rtx) && !disp)
e075ae69
RH
7106 disp = const0_rtx;
7107
7108 /* Special case: on K6, [%esi] makes the instruction vector decoded.
7109 Avoid this by transforming to [%esi+0]. */
8383d43c 7110 if (TARGET_K6 && !optimize_size
7c93c2cc
PB
7111 && base_reg && !index_reg && !disp
7112 && REG_P (base_reg)
7113 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
e075ae69
RH
7114 disp = const0_rtx;
7115
7116 /* Special case: encode reg+reg instead of reg*2. */
7117 if (!base && index && scale && scale == 2)
7c93c2cc 7118 base = index, base_reg = index_reg, scale = 1;
0f290768 7119
e075ae69
RH
7120 /* Special case: scaling cannot be encoded without base or displacement. */
7121 if (!base && !disp && index && scale != 1)
7122 disp = const0_rtx;
7123
7124 out->base = base;
7125 out->index = index;
7126 out->disp = disp;
7127 out->scale = scale;
74dc3e94 7128 out->seg = seg;
3b3c6a3f 7129
b446e5a2 7130 return retval;
e075ae69 7131}
01329426
JH
7132\f
7133/* Return cost of the memory address x.
7134 For i386, it is better to use a complex address than let gcc copy
7135 the address into a reg and make a new pseudo. But not if the address
7136 requires to two regs - that would mean more pseudos with longer
7137 lifetimes. */
dcefdf67 7138static int
b96a374d 7139ix86_address_cost (rtx x)
01329426
JH
7140{
7141 struct ix86_address parts;
7142 int cost = 1;
d0396b79 7143 int ok = ix86_decompose_address (x, &parts);
3b3c6a3f 7144
d0396b79 7145 gcc_assert (ok);
01329426 7146
7c93c2cc
PB
7147 if (parts.base && GET_CODE (parts.base) == SUBREG)
7148 parts.base = SUBREG_REG (parts.base);
7149 if (parts.index && GET_CODE (parts.index) == SUBREG)
7150 parts.index = SUBREG_REG (parts.index);
7151
01329426
JH
7152 /* Attempt to minimize number of registers in the address. */
7153 if ((parts.base
7154 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7155 || (parts.index
7156 && (!REG_P (parts.index)
7157 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7158 cost++;
7159
7160 if (parts.base
7161 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7162 && parts.index
7163 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7164 && parts.base != parts.index)
7165 cost++;
7166
7167 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7168 since it's predecode logic can't detect the length of instructions
7169 and it degenerates to vector decoded. Increase cost of such
7170 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 7171 to split such addresses or even refuse such addresses at all.
01329426
JH
7172
7173 Following addressing modes are affected:
7174 [base+scale*index]
7175 [scale*index+disp]
7176 [base+index]
0f290768 7177
01329426
JH
7178 The first and last case may be avoidable by explicitly coding the zero in
7179 memory address, but I don't have AMD-K6 machine handy to check this
7180 theory. */
7181
7182 if (TARGET_K6
7183 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7184 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7185 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7186 cost += 10;
0f290768 7187
01329426
JH
7188 return cost;
7189}
7190\f
2ed941ec
RH
7191/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7192 this is used for to form addresses to local data when -fPIC is in
7193 use. */
828a4fe4
MS
7194
7195static bool
7196darwin_local_data_pic (rtx disp)
7197{
7198 if (GET_CODE (disp) == MINUS)
7199 {
7200 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7201 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7202 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7203 {
7204 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7205 if (! strcmp (sym_name, "<pic base>"))
7206 return true;
7207 }
7208 }
7209
7210 return false;
7211}
2ed941ec 7212
f996902d
RH
7213/* Determine if a given RTX is a valid constant. We already know this
7214 satisfies CONSTANT_P. */
7215
7216bool
b96a374d 7217legitimate_constant_p (rtx x)
f996902d 7218{
f996902d
RH
7219 switch (GET_CODE (x))
7220 {
f996902d 7221 case CONST:
1e19ac74 7222 x = XEXP (x, 0);
f996902d 7223
1e19ac74 7224 if (GET_CODE (x) == PLUS)
828a4fe4 7225 {
7656aee4 7226 if (!CONST_INT_P (XEXP (x, 1)))
828a4fe4 7227 return false;
1e19ac74 7228 x = XEXP (x, 0);
828a4fe4
MS
7229 }
7230
1e19ac74 7231 if (TARGET_MACHO && darwin_local_data_pic (x))
828a4fe4
MS
7232 return true;
7233
f996902d 7234 /* Only some unspecs are valid as "constants". */
1e19ac74
RH
7235 if (GET_CODE (x) == UNSPEC)
7236 switch (XINT (x, 1))
f996902d 7237 {
dc4d7240 7238 case UNSPEC_GOT:
7dcbf659 7239 case UNSPEC_GOTOFF:
dc4d7240 7240 case UNSPEC_PLTOFF:
7dcbf659 7241 return TARGET_64BIT;
f996902d 7242 case UNSPEC_TPOFF:
cb0e3e3f 7243 case UNSPEC_NTPOFF:
fd4aca96
RH
7244 x = XVECEXP (x, 0, 0);
7245 return (GET_CODE (x) == SYMBOL_REF
7246 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
cb0e3e3f 7247 case UNSPEC_DTPOFF:
fd4aca96
RH
7248 x = XVECEXP (x, 0, 0);
7249 return (GET_CODE (x) == SYMBOL_REF
7250 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
f996902d
RH
7251 default:
7252 return false;
7253 }
1e19ac74
RH
7254
7255 /* We must have drilled down to a symbol. */
fd4aca96
RH
7256 if (GET_CODE (x) == LABEL_REF)
7257 return true;
7258 if (GET_CODE (x) != SYMBOL_REF)
1e19ac74
RH
7259 return false;
7260 /* FALLTHRU */
7261
7262 case SYMBOL_REF:
7263 /* TLS symbols are never valid. */
fd4aca96 7264 if (SYMBOL_REF_TLS_MODEL (x))
1e19ac74 7265 return false;
da489f73
RH
7266
7267 /* DLLIMPORT symbols are never valid. */
7268 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7269 && SYMBOL_REF_DLLIMPORT_P (x))
7270 return false;
f996902d
RH
7271 break;
7272
d0b89852
RS
7273 case CONST_DOUBLE:
7274 if (GET_MODE (x) == TImode
7275 && x != CONST0_RTX (TImode)
7276 && !TARGET_64BIT)
7277 return false;
7278 break;
7279
7280 case CONST_VECTOR:
7281 if (x == CONST0_RTX (GET_MODE (x)))
7282 return true;
7283 return false;
7284
f996902d
RH
7285 default:
7286 break;
7287 }
7288
7289 /* Otherwise we handle everything else in the move patterns. */
7290 return true;
7291}
7292
3a04ff64
RH
7293/* Determine if it's legal to put X into the constant pool. This
7294 is not possible for the address of thread-local symbols, which
7295 is checked above. */
7296
7297static bool
b96a374d 7298ix86_cannot_force_const_mem (rtx x)
3a04ff64 7299{
d0b89852
RS
7300 /* We can always put integral constants and vectors in memory. */
7301 switch (GET_CODE (x))
7302 {
7303 case CONST_INT:
7304 case CONST_DOUBLE:
7305 case CONST_VECTOR:
7306 return false;
7307
7308 default:
7309 break;
7310 }
3a04ff64
RH
7311 return !legitimate_constant_p (x);
7312}
7313
f996902d
RH
7314/* Determine if a given RTX is a valid constant address. */
7315
7316bool
b96a374d 7317constant_address_p (rtx x)
f996902d 7318{
a94f136b 7319 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
7320}
7321
7322/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 7323 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
7324 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7325
7326bool
b96a374d 7327legitimate_pic_operand_p (rtx x)
f996902d
RH
7328{
7329 rtx inner;
7330
7331 switch (GET_CODE (x))
7332 {
7333 case CONST:
7334 inner = XEXP (x, 0);
7dcbf659 7335 if (GET_CODE (inner) == PLUS
7656aee4 7336 && CONST_INT_P (XEXP (inner, 1)))
7dcbf659 7337 inner = XEXP (inner, 0);
f996902d
RH
7338
7339 /* Only some unspecs are valid as "constants". */
7340 if (GET_CODE (inner) == UNSPEC)
7341 switch (XINT (inner, 1))
7342 {
dc4d7240 7343 case UNSPEC_GOT:
7dcbf659 7344 case UNSPEC_GOTOFF:
dc4d7240 7345 case UNSPEC_PLTOFF:
7dcbf659 7346 return TARGET_64BIT;
f996902d 7347 case UNSPEC_TPOFF:
fd4aca96
RH
7348 x = XVECEXP (inner, 0, 0);
7349 return (GET_CODE (x) == SYMBOL_REF
7350 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
f996902d
RH
7351 default:
7352 return false;
7353 }
5efb1046 7354 /* FALLTHRU */
f996902d
RH
7355
7356 case SYMBOL_REF:
7357 case LABEL_REF:
7358 return legitimate_pic_address_disp_p (x);
7359
7360 default:
7361 return true;
7362 }
7363}
7364
e075ae69
RH
7365/* Determine if a given CONST RTX is a valid memory displacement
7366 in PIC mode. */
0f290768 7367
59be65f6 7368int
8d531ab9 7369legitimate_pic_address_disp_p (rtx disp)
91bb873f 7370{
f996902d
RH
7371 bool saw_plus;
7372
6eb791fc
JH
7373 /* In 64bit mode we can allow direct addresses of symbols and labels
7374 when they are not dynamic symbols. */
c05dbe81
JH
7375 if (TARGET_64BIT)
7376 {
fd4aca96
RH
7377 rtx op0 = disp, op1;
7378
7379 switch (GET_CODE (disp))
a132b6a8 7380 {
fd4aca96
RH
7381 case LABEL_REF:
7382 return true;
7383
7384 case CONST:
7385 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7386 break;
7387 op0 = XEXP (XEXP (disp, 0), 0);
7388 op1 = XEXP (XEXP (disp, 0), 1);
7656aee4 7389 if (!CONST_INT_P (op1)
fd4aca96
RH
7390 || INTVAL (op1) >= 16*1024*1024
7391 || INTVAL (op1) < -16*1024*1024)
f7288899 7392 break;
fd4aca96
RH
7393 if (GET_CODE (op0) == LABEL_REF)
7394 return true;
7395 if (GET_CODE (op0) != SYMBOL_REF)
7396 break;
7397 /* FALLTHRU */
a132b6a8 7398
fd4aca96 7399 case SYMBOL_REF:
a132b6a8 7400 /* TLS references should always be enclosed in UNSPEC. */
fd4aca96
RH
7401 if (SYMBOL_REF_TLS_MODEL (op0))
7402 return false;
dc4d7240
JH
7403 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7404 && ix86_cmodel != CM_LARGE_PIC)
fd4aca96
RH
7405 return true;
7406 break;
7407
7408 default:
7409 break;
a132b6a8 7410 }
c05dbe81 7411 }
91bb873f
RH
7412 if (GET_CODE (disp) != CONST)
7413 return 0;
7414 disp = XEXP (disp, 0);
7415
6eb791fc
JH
7416 if (TARGET_64BIT)
7417 {
7418 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7419 of GOT tables. We should not need these anyway. */
7420 if (GET_CODE (disp) != UNSPEC
7dcbf659 7421 || (XINT (disp, 1) != UNSPEC_GOTPCREL
dc4d7240
JH
7422 && XINT (disp, 1) != UNSPEC_GOTOFF
7423 && XINT (disp, 1) != UNSPEC_PLTOFF))
6eb791fc
JH
7424 return 0;
7425
7426 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7427 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7428 return 0;
7429 return 1;
7430 }
7431
f996902d 7432 saw_plus = false;
91bb873f
RH
7433 if (GET_CODE (disp) == PLUS)
7434 {
7656aee4 7435 if (!CONST_INT_P (XEXP (disp, 1)))
91bb873f
RH
7436 return 0;
7437 disp = XEXP (disp, 0);
f996902d 7438 saw_plus = true;
91bb873f
RH
7439 }
7440
828a4fe4
MS
7441 if (TARGET_MACHO && darwin_local_data_pic (disp))
7442 return 1;
b069de3b 7443
8ee41eaf 7444 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
7445 return 0;
7446
623fe810
RH
7447 switch (XINT (disp, 1))
7448 {
8ee41eaf 7449 case UNSPEC_GOT:
f996902d
RH
7450 if (saw_plus)
7451 return false;
170bdaba
RS
7452 /* We need to check for both symbols and labels because VxWorks loads
7453 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7454 details. */
7455 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7456 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8ee41eaf 7457 case UNSPEC_GOTOFF:
47efdea4
JH
7458 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7459 While ABI specify also 32bit relocation but we don't produce it in
7460 small PIC model at all. */
7461 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7462 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7463 && !TARGET_64BIT)
170bdaba 7464 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
799b33a0 7465 return false;
f996902d 7466 case UNSPEC_GOTTPOFF:
dea73790
JJ
7467 case UNSPEC_GOTNTPOFF:
7468 case UNSPEC_INDNTPOFF:
f996902d
RH
7469 if (saw_plus)
7470 return false;
fd4aca96
RH
7471 disp = XVECEXP (disp, 0, 0);
7472 return (GET_CODE (disp) == SYMBOL_REF
7473 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
f996902d 7474 case UNSPEC_NTPOFF:
fd4aca96
RH
7475 disp = XVECEXP (disp, 0, 0);
7476 return (GET_CODE (disp) == SYMBOL_REF
7477 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
f996902d 7478 case UNSPEC_DTPOFF:
fd4aca96
RH
7479 disp = XVECEXP (disp, 0, 0);
7480 return (GET_CODE (disp) == SYMBOL_REF
7481 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
623fe810 7482 }
fce5a9f2 7483
623fe810 7484 return 0;
91bb873f
RH
7485}
7486
e075ae69
RH
7487/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7488 memory address for an instruction. The MODE argument is the machine mode
7489 for the MEM expression that wants to use this address.
7490
7491 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7492 convert common non-canonical forms to canonical form so that they will
7493 be recognized. */
7494
3b3c6a3f 7495int
ee2f65b4
RH
7496legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7497 rtx addr, int strict)
3b3c6a3f 7498{
e075ae69
RH
7499 struct ix86_address parts;
7500 rtx base, index, disp;
7501 HOST_WIDE_INT scale;
7502 const char *reason = NULL;
7503 rtx reason_rtx = NULL_RTX;
3b3c6a3f 7504
b446e5a2 7505 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 7506 {
e075ae69 7507 reason = "decomposition failed";
50e60bc3 7508 goto report_error;
3b3c6a3f
MM
7509 }
7510
e075ae69
RH
7511 base = parts.base;
7512 index = parts.index;
7513 disp = parts.disp;
7514 scale = parts.scale;
91f0226f 7515
e075ae69 7516 /* Validate base register.
e9a25f70 7517
7c93c2cc
PB
7518 Don't allow SUBREG's that span more than a word here. It can lead to spill
7519 failures when the base is one word out of a two word structure, which is
7520 represented internally as a DImode int. */
e9a25f70 7521
3b3c6a3f
MM
7522 if (base)
7523 {
7c93c2cc 7524 rtx reg;
e075ae69 7525 reason_rtx = base;
5656a184 7526
7c93c2cc
PB
7527 if (REG_P (base))
7528 reg = base;
7529 else if (GET_CODE (base) == SUBREG
7530 && REG_P (SUBREG_REG (base))
7531 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7532 <= UNITS_PER_WORD)
7533 reg = SUBREG_REG (base);
7534 else
3b3c6a3f 7535 {
e075ae69 7536 reason = "base is not a register";
50e60bc3 7537 goto report_error;
3b3c6a3f
MM
7538 }
7539
c954bd01
RH
7540 if (GET_MODE (base) != Pmode)
7541 {
e075ae69 7542 reason = "base is not in Pmode";
50e60bc3 7543 goto report_error;
c954bd01
RH
7544 }
7545
7c93c2cc
PB
7546 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7547 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 7548 {
e075ae69 7549 reason = "base is not valid";
50e60bc3 7550 goto report_error;
3b3c6a3f
MM
7551 }
7552 }
7553
e075ae69 7554 /* Validate index register.
e9a25f70 7555
7c93c2cc 7556 Don't allow SUBREG's that span more than a word here -- same as above. */
e075ae69
RH
7557
7558 if (index)
3b3c6a3f 7559 {
7c93c2cc 7560 rtx reg;
e075ae69
RH
7561 reason_rtx = index;
7562
7c93c2cc
PB
7563 if (REG_P (index))
7564 reg = index;
7565 else if (GET_CODE (index) == SUBREG
7566 && REG_P (SUBREG_REG (index))
7567 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7568 <= UNITS_PER_WORD)
7569 reg = SUBREG_REG (index);
7570 else
3b3c6a3f 7571 {
e075ae69 7572 reason = "index is not a register";
50e60bc3 7573 goto report_error;
3b3c6a3f
MM
7574 }
7575
e075ae69 7576 if (GET_MODE (index) != Pmode)
c954bd01 7577 {
e075ae69 7578 reason = "index is not in Pmode";
50e60bc3 7579 goto report_error;
c954bd01
RH
7580 }
7581
7c93c2cc
PB
7582 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7583 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 7584 {
e075ae69 7585 reason = "index is not valid";
50e60bc3 7586 goto report_error;
3b3c6a3f
MM
7587 }
7588 }
3b3c6a3f 7589
e075ae69
RH
7590 /* Validate scale factor. */
7591 if (scale != 1)
3b3c6a3f 7592 {
e075ae69
RH
7593 reason_rtx = GEN_INT (scale);
7594 if (!index)
3b3c6a3f 7595 {
e075ae69 7596 reason = "scale without index";
50e60bc3 7597 goto report_error;
3b3c6a3f
MM
7598 }
7599
e075ae69 7600 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 7601 {
e075ae69 7602 reason = "scale is not a valid multiplier";
50e60bc3 7603 goto report_error;
3b3c6a3f
MM
7604 }
7605 }
7606
91bb873f 7607 /* Validate displacement. */
3b3c6a3f
MM
7608 if (disp)
7609 {
e075ae69
RH
7610 reason_rtx = disp;
7611
f996902d
RH
7612 if (GET_CODE (disp) == CONST
7613 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7614 switch (XINT (XEXP (disp, 0), 1))
7615 {
47efdea4
JH
7616 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7617 used. While ABI specify also 32bit relocations, we don't produce
7618 them at all and use IP relative instead. */
f996902d
RH
7619 case UNSPEC_GOT:
7620 case UNSPEC_GOTOFF:
47efdea4
JH
7621 gcc_assert (flag_pic);
7622 if (!TARGET_64BIT)
7623 goto is_legitimate_pic;
7624 reason = "64bit address unspec";
7625 goto report_error;
5656a184 7626
f996902d 7627 case UNSPEC_GOTPCREL:
d0396b79 7628 gcc_assert (flag_pic);
f996902d
RH
7629 goto is_legitimate_pic;
7630
7631 case UNSPEC_GOTTPOFF:
dea73790
JJ
7632 case UNSPEC_GOTNTPOFF:
7633 case UNSPEC_INDNTPOFF:
f996902d
RH
7634 case UNSPEC_NTPOFF:
7635 case UNSPEC_DTPOFF:
7636 break;
7637
7638 default:
7639 reason = "invalid address unspec";
7640 goto report_error;
7641 }
7642
f7288899
EC
7643 else if (SYMBOLIC_CONST (disp)
7644 && (flag_pic
7645 || (TARGET_MACHO
b069de3b 7646#if TARGET_MACHO
f7288899
EC
7647 && MACHOPIC_INDIRECT
7648 && !machopic_operand_p (disp)
b069de3b 7649#endif
f7288899 7650 )))
3b3c6a3f 7651 {
f7288899 7652
f996902d 7653 is_legitimate_pic:
0d7d98ee
JH
7654 if (TARGET_64BIT && (index || base))
7655 {
75d38379
JJ
7656 /* foo@dtpoff(%rX) is ok. */
7657 if (GET_CODE (disp) != CONST
7658 || GET_CODE (XEXP (disp, 0)) != PLUS
7659 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7656aee4 7660 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
75d38379
JJ
7661 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7662 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7663 {
7664 reason = "non-constant pic memory reference";
7665 goto report_error;
7666 }
0d7d98ee 7667 }
75d38379 7668 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 7669 {
e075ae69 7670 reason = "displacement is an invalid pic construct";
50e60bc3 7671 goto report_error;
91bb873f
RH
7672 }
7673
4e9efe54 7674 /* This code used to verify that a symbolic pic displacement
0f290768
KH
7675 includes the pic_offset_table_rtx register.
7676
4e9efe54
JH
7677 While this is good idea, unfortunately these constructs may
7678 be created by "adds using lea" optimization for incorrect
7679 code like:
7680
7681 int a;
7682 int foo(int i)
7683 {
7684 return *(&a+i);
7685 }
7686
50e60bc3 7687 This code is nonsensical, but results in addressing
4e9efe54 7688 GOT table with pic_offset_table_rtx base. We can't
f710504c 7689 just refuse it easily, since it gets matched by
4e9efe54
JH
7690 "addsi3" pattern, that later gets split to lea in the
7691 case output register differs from input. While this
7692 can be handled by separate addsi pattern for this case
7693 that never results in lea, this seems to be easier and
7694 correct fix for crash to disable this test. */
3b3c6a3f 7695 }
a94f136b 7696 else if (GET_CODE (disp) != LABEL_REF
7656aee4 7697 && !CONST_INT_P (disp)
a94f136b
JH
7698 && (GET_CODE (disp) != CONST
7699 || !legitimate_constant_p (disp))
7700 && (GET_CODE (disp) != SYMBOL_REF
7701 || !legitimate_constant_p (disp)))
f996902d
RH
7702 {
7703 reason = "displacement is not constant";
7704 goto report_error;
7705 }
8fe75e43
RH
7706 else if (TARGET_64BIT
7707 && !x86_64_immediate_operand (disp, VOIDmode))
c05dbe81
JH
7708 {
7709 reason = "displacement is out of range";
7710 goto report_error;
7711 }
3b3c6a3f
MM
7712 }
7713
e075ae69 7714 /* Everything looks valid. */
3b3c6a3f 7715 return TRUE;
e075ae69 7716
5bf0ebab 7717 report_error:
e075ae69 7718 return FALSE;
3b3c6a3f 7719}
3b3c6a3f 7720\f
569b7f6a 7721/* Return a unique alias set for the GOT. */
55efb413 7722
4862826d 7723static alias_set_type
b96a374d 7724ix86_GOT_alias_set (void)
55efb413 7725{
4862826d 7726 static alias_set_type set = -1;
5bf0ebab
RH
7727 if (set == -1)
7728 set = new_alias_set ();
7729 return set;
0f290768 7730}
55efb413 7731
3b3c6a3f
MM
7732/* Return a legitimate reference for ORIG (an address) using the
7733 register REG. If REG is 0, a new pseudo is generated.
7734
91bb873f 7735 There are two types of references that must be handled:
3b3c6a3f
MM
7736
7737 1. Global data references must load the address from the GOT, via
7738 the PIC reg. An insn is emitted to do this load, and the reg is
7739 returned.
7740
91bb873f
RH
7741 2. Static data references, constant pool addresses, and code labels
7742 compute the address as an offset from the GOT, whose base is in
2ae5ae57 7743 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
7744 differentiate them from global data objects. The returned
7745 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
7746
7747 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 7748 reg also appears in the address. */
3b3c6a3f 7749
b39edae3 7750static rtx
b96a374d 7751legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
7752{
7753 rtx addr = orig;
9415ab7d 7754 rtx new_rtx = orig;
91bb873f 7755 rtx base;
3b3c6a3f 7756
b069de3b 7757#if TARGET_MACHO
f7288899
EC
7758 if (TARGET_MACHO && !TARGET_64BIT)
7759 {
7760 if (reg == 0)
7761 reg = gen_reg_rtx (Pmode);
7762 /* Use the generic Mach-O PIC machinery. */
7763 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7764 }
b069de3b
SS
7765#endif
7766
c05dbe81 7767 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9415ab7d 7768 new_rtx = addr;
7dcbf659
JH
7769 else if (TARGET_64BIT
7770 && ix86_cmodel != CM_SMALL_PIC
170bdaba 7771 && gotoff_operand (addr, Pmode))
7dcbf659
JH
7772 {
7773 rtx tmpreg;
7774 /* This symbol may be referenced via a displacement from the PIC
7775 base address (@GOTOFF). */
7776
7777 if (reload_in_progress)
6fb5fa3c 7778 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7dcbf659
JH
7779 if (GET_CODE (addr) == CONST)
7780 addr = XEXP (addr, 0);
7781 if (GET_CODE (addr) == PLUS)
7782 {
9415ab7d
TN
7783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7784 UNSPEC_GOTOFF);
7785 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7dcbf659
JH
7786 }
7787 else
9415ab7d
TN
7788 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7789 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7dcbf659
JH
7790 if (!reg)
7791 tmpreg = gen_reg_rtx (Pmode);
7792 else
7793 tmpreg = reg;
9415ab7d 7794 emit_move_insn (tmpreg, new_rtx);
7dcbf659
JH
7795
7796 if (reg != 0)
7797 {
9415ab7d
TN
7798 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7799 tmpreg, 1, OPTAB_DIRECT);
7800 new_rtx = reg;
7dcbf659 7801 }
9415ab7d 7802 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7dcbf659 7803 }
170bdaba 7804 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
3b3c6a3f 7805 {
c05dbe81
JH
7806 /* This symbol may be referenced via a displacement from the PIC
7807 base address (@GOTOFF). */
3b3c6a3f 7808
c05dbe81 7809 if (reload_in_progress)
6fb5fa3c 7810 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
799b33a0
JH
7811 if (GET_CODE (addr) == CONST)
7812 addr = XEXP (addr, 0);
7813 if (GET_CODE (addr) == PLUS)
7814 {
9415ab7d
TN
7815 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7816 UNSPEC_GOTOFF);
7817 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
799b33a0
JH
7818 }
7819 else
9415ab7d
TN
7820 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7821 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7822 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3b3c6a3f 7823
c05dbe81
JH
7824 if (reg != 0)
7825 {
9415ab7d
TN
7826 emit_move_insn (reg, new_rtx);
7827 new_rtx = reg;
c05dbe81 7828 }
3b3c6a3f 7829 }
170bdaba
RS
7830 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7831 /* We can't use @GOTOFF for text labels on VxWorks;
7832 see gotoff_operand. */
7833 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
3b3c6a3f 7834 {
8502420b
KT
7835 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7836 {
7837 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7838 return legitimize_dllimport_symbol (addr, true);
7839 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7840 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7841 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7842 {
7843 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7844 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7845 }
7846 }
ccf8e764 7847
dc4d7240 7848 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
14f73b5a 7849 {
9415ab7d
TN
7850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7851 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7852 new_rtx = gen_const_mem (Pmode, new_rtx);
7853 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
14f73b5a
JH
7854
7855 if (reg == 0)
7856 reg = gen_reg_rtx (Pmode);
7857 /* Use directly gen_movsi, otherwise the address is loaded
7858 into register for CSE. We don't want to CSE this addresses,
7859 instead we CSE addresses from the GOT table, so skip this. */
9415ab7d
TN
7860 emit_insn (gen_movsi (reg, new_rtx));
7861 new_rtx = reg;
14f73b5a
JH
7862 }
7863 else
7864 {
7865 /* This symbol must be referenced via a load from the
7866 Global Offset Table (@GOT). */
3b3c6a3f 7867
66edd3b4 7868 if (reload_in_progress)
6fb5fa3c 7869 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9415ab7d
TN
7870 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7871 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
dc4d7240 7872 if (TARGET_64BIT)
9415ab7d
TN
7873 new_rtx = force_reg (Pmode, new_rtx);
7874 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7875 new_rtx = gen_const_mem (Pmode, new_rtx);
7876 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
3b3c6a3f 7877
14f73b5a
JH
7878 if (reg == 0)
7879 reg = gen_reg_rtx (Pmode);
9415ab7d
TN
7880 emit_move_insn (reg, new_rtx);
7881 new_rtx = reg;
14f73b5a 7882 }
0f290768 7883 }
91bb873f
RH
7884 else
7885 {
7656aee4 7886 if (CONST_INT_P (addr)
d8ff1871
JH
7887 && !x86_64_immediate_operand (addr, VOIDmode))
7888 {
7889 if (reg)
7890 {
7891 emit_move_insn (reg, addr);
9415ab7d 7892 new_rtx = reg;
d8ff1871
JH
7893 }
7894 else
9415ab7d 7895 new_rtx = force_reg (Pmode, addr);
d8ff1871
JH
7896 }
7897 else if (GET_CODE (addr) == CONST)
3b3c6a3f 7898 {
91bb873f 7899 addr = XEXP (addr, 0);
e3c8ea67
RH
7900
7901 /* We must match stuff we generate before. Assume the only
7902 unspecs that can get here are ours. Not that we could do
43f3a59d 7903 anything with them anyway.... */
e3c8ea67
RH
7904 if (GET_CODE (addr) == UNSPEC
7905 || (GET_CODE (addr) == PLUS
7906 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7907 return orig;
d0396b79 7908 gcc_assert (GET_CODE (addr) == PLUS);
3b3c6a3f 7909 }
91bb873f
RH
7910 if (GET_CODE (addr) == PLUS)
7911 {
7912 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 7913
91bb873f
RH
7914 /* Check first to see if this is a constant offset from a @GOTOFF
7915 symbol reference. */
170bdaba 7916 if (gotoff_operand (op0, Pmode)
7656aee4 7917 && CONST_INT_P (op1))
91bb873f 7918 {
6eb791fc
JH
7919 if (!TARGET_64BIT)
7920 {
66edd3b4 7921 if (reload_in_progress)
6fb5fa3c 7922 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9415ab7d
TN
7923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7924 UNSPEC_GOTOFF);
7925 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7926 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7927 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
91bb873f 7928
6eb791fc
JH
7929 if (reg != 0)
7930 {
9415ab7d
TN
7931 emit_move_insn (reg, new_rtx);
7932 new_rtx = reg;
6eb791fc
JH
7933 }
7934 }
7935 else
91bb873f 7936 {
75d38379
JJ
7937 if (INTVAL (op1) < -16*1024*1024
7938 || INTVAL (op1) >= 16*1024*1024)
a7297856
ILT
7939 {
7940 if (!x86_64_immediate_operand (op1, Pmode))
7941 op1 = force_reg (Pmode, op1);
9415ab7d 7942 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
a7297856 7943 }
91bb873f
RH
7944 }
7945 }
7946 else
7947 {
7948 base = legitimize_pic_address (XEXP (addr, 0), reg);
9415ab7d
TN
7949 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7950 base == reg ? NULL_RTX : reg);
91bb873f 7951
9415ab7d
TN
7952 if (CONST_INT_P (new_rtx))
7953 new_rtx = plus_constant (base, INTVAL (new_rtx));
91bb873f
RH
7954 else
7955 {
9415ab7d 7956 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
91bb873f 7957 {
9415ab7d
TN
7958 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7959 new_rtx = XEXP (new_rtx, 1);
91bb873f 7960 }
9415ab7d 7961 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
91bb873f
RH
7962 }
7963 }
7964 }
3b3c6a3f 7965 }
9415ab7d 7966 return new_rtx;
3b3c6a3f
MM
7967}
7968\f
74dc3e94 7969/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
7970
7971static rtx
b96a374d 7972get_thread_pointer (int to_reg)
f996902d 7973{
74dc3e94 7974 rtx tp, reg, insn;
f996902d
RH
7975
7976 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
7977 if (!to_reg)
7978 return tp;
f996902d 7979
74dc3e94
RH
7980 reg = gen_reg_rtx (Pmode);
7981 insn = gen_rtx_SET (VOIDmode, reg, tp);
7982 insn = emit_insn (insn);
7983
7984 return reg;
7985}
7986
7987/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7988 false if we expect this to be used for a memory address and true if
7989 we expect to load the address into a register. */
7990
7991static rtx
b96a374d 7992legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94 7993{
5bf5a10b 7994 rtx dest, base, off, pic, tp;
74dc3e94
RH
7995 int type;
7996
7997 switch (model)
7998 {
7999 case TLS_MODEL_GLOBAL_DYNAMIC:
8000 dest = gen_reg_rtx (Pmode);
5bf5a10b
AO
8001 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8002
8003 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94 8004 {
29b74761 8005 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
74dc3e94
RH
8006
8007 start_sequence ();
8008 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8009 insns = get_insns ();
8010 end_sequence ();
8011
becfd6e5 8012 RTL_CONST_CALL_P (insns) = 1;
74dc3e94
RH
8013 emit_libcall_block (insns, dest, rax, x);
8014 }
5bf5a10b
AO
8015 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8016 emit_insn (gen_tls_global_dynamic_64 (dest, x));
74dc3e94
RH
8017 else
8018 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5bf5a10b
AO
8019
8020 if (TARGET_GNU2_TLS)
8021 {
8022 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8023
8024 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8025 }
74dc3e94
RH
8026 break;
8027
8028 case TLS_MODEL_LOCAL_DYNAMIC:
8029 base = gen_reg_rtx (Pmode);
5bf5a10b
AO
8030 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8031
8032 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94 8033 {
29b74761 8034 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
74dc3e94
RH
8035
8036 start_sequence ();
8037 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8038 insns = get_insns ();
8039 end_sequence ();
8040
8041 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8042 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
becfd6e5 8043 RTL_CONST_CALL_P (insns) = 1;
74dc3e94
RH
8044 emit_libcall_block (insns, base, rax, note);
8045 }
5bf5a10b
AO
8046 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8047 emit_insn (gen_tls_local_dynamic_base_64 (base));
74dc3e94
RH
8048 else
8049 emit_insn (gen_tls_local_dynamic_base_32 (base));
8050
5bf5a10b
AO
8051 if (TARGET_GNU2_TLS)
8052 {
8053 rtx x = ix86_tls_module_base ();
8054
31ebc801
AO
8055 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8056 gen_rtx_MINUS (Pmode, x, tp));
5bf5a10b
AO
8057 }
8058
74dc3e94
RH
8059 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
8060 off = gen_rtx_CONST (Pmode, off);
8061
5bf5a10b 8062 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
31ebc801
AO
8063
8064 if (TARGET_GNU2_TLS)
8065 {
8066 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
8067
8068 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8069 }
8070
5bf5a10b 8071 break;
74dc3e94
RH
8072
8073 case TLS_MODEL_INITIAL_EXEC:
8074 if (TARGET_64BIT)
8075 {
8076 pic = NULL;
8077 type = UNSPEC_GOTNTPOFF;
8078 }
8079 else if (flag_pic)
8080 {
8081 if (reload_in_progress)
6fb5fa3c 8082 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
74dc3e94 8083 pic = pic_offset_table_rtx;
5bf5a10b 8084 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
74dc3e94 8085 }
5bf5a10b 8086 else if (!TARGET_ANY_GNU_TLS)
74dc3e94
RH
8087 {
8088 pic = gen_reg_rtx (Pmode);
8089 emit_insn (gen_set_got (pic));
8090 type = UNSPEC_GOTTPOFF;
8091 }
8092 else
8093 {
8094 pic = NULL;
8095 type = UNSPEC_INDNTPOFF;
8096 }
8097
8098 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
8099 off = gen_rtx_CONST (Pmode, off);
8100 if (pic)
8101 off = gen_rtx_PLUS (Pmode, pic, off);
542a8afa 8102 off = gen_const_mem (Pmode, off);
74dc3e94
RH
8103 set_mem_alias_set (off, ix86_GOT_alias_set ());
8104
5bf5a10b 8105 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
8106 {
8107 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8108 off = force_reg (Pmode, off);
8109 return gen_rtx_PLUS (Pmode, base, off);
8110 }
8111 else
8112 {
8113 base = get_thread_pointer (true);
8114 dest = gen_reg_rtx (Pmode);
8115 emit_insn (gen_subsi3 (dest, base, off));
8116 }
8117 break;
8118
8119 case TLS_MODEL_LOCAL_EXEC:
8120 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5bf5a10b 8121 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
8122 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
8123 off = gen_rtx_CONST (Pmode, off);
8124
5bf5a10b 8125 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
8126 {
8127 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8128 return gen_rtx_PLUS (Pmode, base, off);
8129 }
8130 else
8131 {
8132 base = get_thread_pointer (true);
8133 dest = gen_reg_rtx (Pmode);
8134 emit_insn (gen_subsi3 (dest, base, off));
8135 }
8136 break;
8137
8138 default:
d0396b79 8139 gcc_unreachable ();
74dc3e94
RH
8140 }
8141
8142 return dest;
f996902d 8143}
fce5a9f2 8144
da489f73
RH
8145/* Create or return the unique __imp_DECL dllimport symbol corresponding
8146 to symbol DECL. */
8147
8148static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8149 htab_t dllimport_map;
8150
8151static tree
8152get_dllimport_decl (tree decl)
8153{
8154 struct tree_map *h, in;
8155 void **loc;
8156 const char *name;
8157 const char *prefix;
8158 size_t namelen, prefixlen;
8159 char *imp_name;
8160 tree to;
8161 rtx rtl;
8162
8163 if (!dllimport_map)
8164 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8165
8166 in.hash = htab_hash_pointer (decl);
8167 in.base.from = decl;
8168 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9415ab7d 8169 h = (struct tree_map *) *loc;
da489f73
RH
8170 if (h)
8171 return h->to;
8172
9415ab7d 8173 *loc = h = GGC_NEW (struct tree_map);
da489f73
RH
8174 h->hash = in.hash;
8175 h->base.from = decl;
8176 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8177 DECL_ARTIFICIAL (to) = 1;
8178 DECL_IGNORED_P (to) = 1;
8179 DECL_EXTERNAL (to) = 1;
8180 TREE_READONLY (to) = 1;
8181
8182 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8183 name = targetm.strip_name_encoding (name);
e599ac2b 8184 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
da489f73
RH
8185 namelen = strlen (name);
8186 prefixlen = strlen (prefix);
9415ab7d 8187 imp_name = (char *) alloca (namelen + prefixlen + 1);
da489f73
RH
8188 memcpy (imp_name, prefix, prefixlen);
8189 memcpy (imp_name + prefixlen, name, namelen + 1);
8190
8191 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8192 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8193 SET_SYMBOL_REF_DECL (rtl, to);
8194 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8195
8196 rtl = gen_const_mem (Pmode, rtl);
8197 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8198
8199 SET_DECL_RTL (to, rtl);
18ff3013 8200 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
da489f73
RH
8201
8202 return to;
8203}
8204
8205/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8206 true if we require the result be a register. */
8207
8208static rtx
8209legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8210{
8211 tree imp_decl;
8212 rtx x;
8213
8214 gcc_assert (SYMBOL_REF_DECL (symbol));
8215 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8216
8217 x = DECL_RTL (imp_decl);
8218 if (want_reg)
8219 x = force_reg (Pmode, x);
8220 return x;
8221}
8222
3b3c6a3f
MM
8223/* Try machine-dependent ways of modifying an illegitimate address
8224 to be legitimate. If we find one, return the new, valid address.
8225 This macro is used in only one place: `memory_address' in explow.c.
8226
8227 OLDX is the address as it was before break_out_memory_refs was called.
8228 In some cases it is useful to look at this to decide what needs to be done.
8229
8230 MODE and WIN are passed so that this macro can use
8231 GO_IF_LEGITIMATE_ADDRESS.
8232
8233 It is always safe for this macro to do nothing. It exists to recognize
8234 opportunities to optimize the output.
8235
8236 For the 80386, we handle X+REG by loading X into a register R and
8237 using R+REG. R will go in a general reg and indexing will be used.
8238 However, if REG is a broken-out memory address or multiplication,
8239 nothing needs to be done because REG can certainly go in a general reg.
8240
8241 When -fpic is used, special handling is needed for symbolic references.
8242 See comments by legitimize_pic_address in i386.c for details. */
8243
8244rtx
8d531ab9 8245legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
8246{
8247 int changed = 0;
8248 unsigned log;
8249
8fe75e43 8250 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
f996902d 8251 if (log)
9415ab7d 8252 return legitimize_tls_address (x, (enum tls_model) log, false);
b39edae3
RH
8253 if (GET_CODE (x) == CONST
8254 && GET_CODE (XEXP (x, 0)) == PLUS
8fe75e43
RH
8255 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8256 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
b39edae3 8257 {
9415ab7d
TN
8258 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8259 (enum tls_model) log, false);
b39edae3
RH
8260 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8261 }
f996902d 8262
da489f73
RH
8263 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8264 {
8265 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8266 return legitimize_dllimport_symbol (x, true);
8267 if (GET_CODE (x) == CONST
8268 && GET_CODE (XEXP (x, 0)) == PLUS
8269 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8270 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8271 {
8272 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8273 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8274 }
8275 }
8276
ea2666ba
KT
8277 if (flag_pic && SYMBOLIC_CONST (x))
8278 return legitimize_pic_address (x, 0);
8279
3b3c6a3f
MM
8280 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8281 if (GET_CODE (x) == ASHIFT
7656aee4 8282 && CONST_INT_P (XEXP (x, 1))
85b583d3 8283 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
3b3c6a3f
MM
8284 {
8285 changed = 1;
85b583d3 8286 log = INTVAL (XEXP (x, 1));
a269a03c
JC
8287 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8288 GEN_INT (1 << log));
3b3c6a3f
MM
8289 }
8290
8291 if (GET_CODE (x) == PLUS)
8292 {
0f290768 8293 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 8294
3b3c6a3f 8295 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7656aee4 8296 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
85b583d3 8297 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
3b3c6a3f
MM
8298 {
8299 changed = 1;
85b583d3 8300 log = INTVAL (XEXP (XEXP (x, 0), 1));
c5c76735
JL
8301 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8302 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8303 GEN_INT (1 << log));
3b3c6a3f
MM
8304 }
8305
8306 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7656aee4 8307 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
85b583d3 8308 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
3b3c6a3f
MM
8309 {
8310 changed = 1;
85b583d3 8311 log = INTVAL (XEXP (XEXP (x, 1), 1));
c5c76735
JL
8312 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8313 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8314 GEN_INT (1 << log));
3b3c6a3f
MM
8315 }
8316
0f290768 8317 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
8318 if (GET_CODE (XEXP (x, 1)) == MULT)
8319 {
8320 rtx tmp = XEXP (x, 0);
8321 XEXP (x, 0) = XEXP (x, 1);
8322 XEXP (x, 1) = tmp;
8323 changed = 1;
8324 }
8325
8326 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8327 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8328 created by virtual register instantiation, register elimination, and
8329 similar optimizations. */
8330 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8331 {
8332 changed = 1;
c5c76735
JL
8333 x = gen_rtx_PLUS (Pmode,
8334 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8335 XEXP (XEXP (x, 1), 0)),
8336 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
8337 }
8338
e9a25f70
JL
8339 /* Canonicalize
8340 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
8341 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8342 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8343 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8344 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8345 && CONSTANT_P (XEXP (x, 1)))
8346 {
00c79232
ML
8347 rtx constant;
8348 rtx other = NULL_RTX;
3b3c6a3f 8349
7656aee4 8350 if (CONST_INT_P (XEXP (x, 1)))
3b3c6a3f
MM
8351 {
8352 constant = XEXP (x, 1);
8353 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8354 }
7656aee4 8355 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
3b3c6a3f
MM
8356 {
8357 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8358 other = XEXP (x, 1);
8359 }
8360 else
8361 constant = 0;
8362
8363 if (constant)
8364 {
8365 changed = 1;
c5c76735
JL
8366 x = gen_rtx_PLUS (Pmode,
8367 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8368 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8369 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
8370 }
8371 }
8372
8373 if (changed && legitimate_address_p (mode, x, FALSE))
8374 return x;
8375
8376 if (GET_CODE (XEXP (x, 0)) == MULT)
8377 {
8378 changed = 1;
8379 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8380 }
8381
8382 if (GET_CODE (XEXP (x, 1)) == MULT)
8383 {
8384 changed = 1;
8385 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8386 }
8387
8388 if (changed
7656aee4
UB
8389 && REG_P (XEXP (x, 1))
8390 && REG_P (XEXP (x, 0)))
3b3c6a3f
MM
8391 return x;
8392
8393 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8394 {
8395 changed = 1;
8396 x = legitimize_pic_address (x, 0);
8397 }
8398
8399 if (changed && legitimate_address_p (mode, x, FALSE))
8400 return x;
8401
7656aee4 8402 if (REG_P (XEXP (x, 0)))
3b3c6a3f 8403 {
8d531ab9
KH
8404 rtx temp = gen_reg_rtx (Pmode);
8405 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
8406 if (val != temp)
8407 emit_move_insn (temp, val);
8408
8409 XEXP (x, 1) = temp;
8410 return x;
8411 }
8412
7656aee4 8413 else if (REG_P (XEXP (x, 1)))
3b3c6a3f 8414 {
8d531ab9
KH
8415 rtx temp = gen_reg_rtx (Pmode);
8416 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
8417 if (val != temp)
8418 emit_move_insn (temp, val);
8419
8420 XEXP (x, 0) = temp;
8421 return x;
8422 }
8423 }
8424
8425 return x;
8426}
2a2ab3f9
JVA
8427\f
8428/* Print an integer constant expression in assembler syntax. Addition
8429 and subtraction are the only arithmetic that may appear in these
8430 expressions. FILE is the stdio stream to write to, X is the rtx, and
8431 CODE is the operand print code from the output string. */
8432
8433static void
b96a374d 8434output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
8435{
8436 char buf[256];
8437
8438 switch (GET_CODE (x))
8439 {
8440 case PC:
d0396b79
NS
8441 gcc_assert (flag_pic);
8442 putc ('.', file);
2a2ab3f9
JVA
8443 break;
8444
8445 case SYMBOL_REF:
320ce1d3
MS
8446 if (! TARGET_MACHO || TARGET_64BIT)
8447 output_addr_const (file, x);
8448 else
8449 {
8450 const char *name = XSTR (x, 0);
8451
ccf8e764
RH
8452 /* Mark the decl as referenced so that cgraph will
8453 output the function. */
320ce1d3
MS
8454 if (SYMBOL_REF_DECL (x))
8455 mark_decl_referenced (SYMBOL_REF_DECL (x));
8456
320ce1d3 8457#if TARGET_MACHO
c88fc50c
MS
8458 if (MACHOPIC_INDIRECT
8459 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
320ce1d3 8460 name = machopic_indirection_name (x, /*stub_p=*/true);
c88fc50c 8461#endif
320ce1d3
MS
8462 assemble_name (file, name);
8463 }
7c800926 8464 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
ccf8e764 8465 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 8466 fputs ("@PLT", file);
2a2ab3f9
JVA
8467 break;
8468
91bb873f
RH
8469 case LABEL_REF:
8470 x = XEXP (x, 0);
5efb1046 8471 /* FALLTHRU */
2a2ab3f9
JVA
8472 case CODE_LABEL:
8473 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8474 assemble_name (asm_out_file, buf);
8475 break;
8476
8477 case CONST_INT:
f64cecad 8478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
8479 break;
8480
8481 case CONST:
8482 /* This used to output parentheses around the expression,
8483 but that does not work on the 386 (either ATT or BSD assembler). */
8484 output_pic_addr_const (file, XEXP (x, 0), code);
8485 break;
8486
8487 case CONST_DOUBLE:
8488 if (GET_MODE (x) == VOIDmode)
8489 {
8490 /* We can use %d if the number is <32 bits and positive. */
8491 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
8492 fprintf (file, "0x%lx%08lx",
8493 (unsigned long) CONST_DOUBLE_HIGH (x),
8494 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 8495 else
f64cecad 8496 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
8497 }
8498 else
8499 /* We can't handle floating point constants;
8500 PRINT_OPERAND must handle them. */
8501 output_operand_lossage ("floating constant misused");
8502 break;
8503
8504 case PLUS:
e9a25f70 8505 /* Some assemblers need integer constants to appear first. */
7656aee4 8506 if (CONST_INT_P (XEXP (x, 0)))
2a2ab3f9 8507 {
2a2ab3f9 8508 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 8509 putc ('+', file);
e9a25f70 8510 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 8511 }
5656a184 8512 else
2a2ab3f9 8513 {
7656aee4 8514 gcc_assert (CONST_INT_P (XEXP (x, 1)));
2a2ab3f9 8515 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 8516 putc ('+', file);
e9a25f70 8517 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9
JVA
8518 }
8519 break;
8520
8521 case MINUS:
b069de3b
SS
8522 if (!TARGET_MACHO)
8523 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 8524 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 8525 putc ('-', file);
2a2ab3f9 8526 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
8527 if (!TARGET_MACHO)
8528 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
8529 break;
8530
91bb873f 8531 case UNSPEC:
d0396b79 8532 gcc_assert (XVECLEN (x, 0) == 1);
91bb873f
RH
8533 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8534 switch (XINT (x, 1))
77ebd435 8535 {
8ee41eaf 8536 case UNSPEC_GOT:
77ebd435
AJ
8537 fputs ("@GOT", file);
8538 break;
8ee41eaf 8539 case UNSPEC_GOTOFF:
77ebd435
AJ
8540 fputs ("@GOTOFF", file);
8541 break;
dc4d7240
JH
8542 case UNSPEC_PLTOFF:
8543 fputs ("@PLTOFF", file);
8544 break;
8ee41eaf 8545 case UNSPEC_GOTPCREL:
9ad5e54f
RIL
8546 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8547 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
6eb791fc 8548 break;
f996902d 8549 case UNSPEC_GOTTPOFF:
dea73790 8550 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
8551 fputs ("@GOTTPOFF", file);
8552 break;
8553 case UNSPEC_TPOFF:
8554 fputs ("@TPOFF", file);
8555 break;
8556 case UNSPEC_NTPOFF:
75d38379
JJ
8557 if (TARGET_64BIT)
8558 fputs ("@TPOFF", file);
8559 else
8560 fputs ("@NTPOFF", file);
f996902d
RH
8561 break;
8562 case UNSPEC_DTPOFF:
8563 fputs ("@DTPOFF", file);
8564 break;
dea73790 8565 case UNSPEC_GOTNTPOFF:
75d38379 8566 if (TARGET_64BIT)
9ad5e54f
RIL
8567 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8568 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
75d38379
JJ
8569 else
8570 fputs ("@GOTNTPOFF", file);
dea73790
JJ
8571 break;
8572 case UNSPEC_INDNTPOFF:
8573 fputs ("@INDNTPOFF", file);
8574 break;
77ebd435
AJ
8575 default:
8576 output_operand_lossage ("invalid UNSPEC as operand");
8577 break;
8578 }
91bb873f
RH
8579 break;
8580
2a2ab3f9
JVA
8581 default:
8582 output_operand_lossage ("invalid expression as operand");
8583 }
8584}
1865dbb5 8585
fdbe66f2 8586/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
b9203463
RH
8587 We need to emit DTP-relative relocations. */
8588
2ed941ec 8589static void ATTRIBUTE_UNUSED
b96a374d 8590i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 8591{
75d38379
JJ
8592 fputs (ASM_LONG, file);
8593 output_addr_const (file, x);
8594 fputs ("@DTPOFF", file);
b9203463
RH
8595 switch (size)
8596 {
8597 case 4:
b9203463
RH
8598 break;
8599 case 8:
75d38379 8600 fputs (", 0", file);
b9203463 8601 break;
b9203463 8602 default:
d0396b79 8603 gcc_unreachable ();
b9203463 8604 }
b9203463
RH
8605}
8606
1865dbb5 8607/* In the name of slightly smaller debug output, and to cater to
aabcd309 8608 general assembler lossage, recognize PIC+GOTOFF and turn it back
5656a184 8609 into a direct symbol reference.
dbde310d
GK
8610
8611 On Darwin, this is necessary to avoid a crash, because Darwin
8612 has a different PIC label for each routine but the DWARF debugging
8613 information is not associated with any particular routine, so it's
8614 necessary to remove references to the PIC label from RTL stored by
8615 the DWARF output code. */
1865dbb5 8616
69bd9368 8617static rtx
b96a374d 8618ix86_delegitimize_address (rtx orig_x)
1865dbb5 8619{
dbde310d
GK
8620 rtx x = orig_x;
8621 /* reg_addend is NULL or a multiple of some register. */
8622 rtx reg_addend = NULL_RTX;
8623 /* const_addend is NULL or a const_int. */
8624 rtx const_addend = NULL_RTX;
8625 /* This is the result, or NULL. */
8626 rtx result = NULL_RTX;
1865dbb5 8627
7656aee4 8628 if (MEM_P (x))
4c8c0dec
JJ
8629 x = XEXP (x, 0);
8630
6eb791fc
JH
8631 if (TARGET_64BIT)
8632 {
8633 if (GET_CODE (x) != CONST
8634 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 8635 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7656aee4 8636 || !MEM_P (orig_x))
6eb791fc
JH
8637 return orig_x;
8638 return XVECEXP (XEXP (x, 0), 0, 0);
8639 }
8640
1865dbb5 8641 if (GET_CODE (x) != PLUS
1865dbb5
JM
8642 || GET_CODE (XEXP (x, 1)) != CONST)
8643 return orig_x;
8644
7656aee4 8645 if (REG_P (XEXP (x, 0))
ec65b2e3
JJ
8646 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8647 /* %ebx + GOT/GOTOFF */
dbde310d 8648 ;
ec65b2e3
JJ
8649 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8650 {
8651 /* %ebx + %reg * scale + GOT/GOTOFF */
dbde310d 8652 reg_addend = XEXP (x, 0);
7656aee4 8653 if (REG_P (XEXP (reg_addend, 0))
dbde310d
GK
8654 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8655 reg_addend = XEXP (reg_addend, 1);
7656aee4 8656 else if (REG_P (XEXP (reg_addend, 1))
dbde310d
GK
8657 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8658 reg_addend = XEXP (reg_addend, 0);
ec65b2e3
JJ
8659 else
8660 return orig_x;
7656aee4 8661 if (!REG_P (reg_addend)
dbde310d
GK
8662 && GET_CODE (reg_addend) != MULT
8663 && GET_CODE (reg_addend) != ASHIFT)
ec65b2e3
JJ
8664 return orig_x;
8665 }
8666 else
8667 return orig_x;
8668
1865dbb5 8669 x = XEXP (XEXP (x, 1), 0);
1865dbb5 8670 if (GET_CODE (x) == PLUS
7656aee4 8671 && CONST_INT_P (XEXP (x, 1)))
ec65b2e3 8672 {
dbde310d
GK
8673 const_addend = XEXP (x, 1);
8674 x = XEXP (x, 0);
ec65b2e3 8675 }
1865dbb5 8676
dbde310d 8677 if (GET_CODE (x) == UNSPEC
7656aee4
UB
8678 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8679 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
dbde310d
GK
8680 result = XVECEXP (x, 0, 0);
8681
7931b1be 8682 if (TARGET_MACHO && darwin_local_data_pic (x)
7656aee4 8683 && !MEM_P (orig_x))
dbde310d
GK
8684 result = XEXP (x, 0);
8685
8686 if (! result)
8687 return orig_x;
5656a184 8688
dbde310d
GK
8689 if (const_addend)
8690 result = gen_rtx_PLUS (Pmode, result, const_addend);
8691 if (reg_addend)
8692 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8693 return result;
1865dbb5 8694}
2ed941ec
RH
8695
8696/* If X is a machine specific address (i.e. a symbol or label being
8697 referenced as a displacement from the GOT implemented using an
8698 UNSPEC), then return the base term. Otherwise return X. */
8699
8700rtx
8701ix86_find_base_term (rtx x)
8702{
8703 rtx term;
8704
8705 if (TARGET_64BIT)
8706 {
8707 if (GET_CODE (x) != CONST)
8708 return x;
8709 term = XEXP (x, 0);
8710 if (GET_CODE (term) == PLUS
8711 && (CONST_INT_P (XEXP (term, 1))
8712 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8713 term = XEXP (term, 0);
8714 if (GET_CODE (term) != UNSPEC
8715 || XINT (term, 1) != UNSPEC_GOTPCREL)
8716 return x;
8717
8718 term = XVECEXP (term, 0, 0);
8719
8720 if (GET_CODE (term) != SYMBOL_REF
8721 && GET_CODE (term) != LABEL_REF)
8722 return x;
8723
8724 return term;
8725 }
8726
8727 term = ix86_delegitimize_address (x);
8728
8729 if (GET_CODE (term) != SYMBOL_REF
8730 && GET_CODE (term) != LABEL_REF)
8731 return x;
8732
8733 return term;
8734}
2a2ab3f9 8735\f
a269a03c 8736static void
b96a374d
AJ
8737put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8738 int fp, FILE *file)
a269a03c 8739{
a269a03c
JC
8740 const char *suffix;
8741
9a915772
JH
8742 if (mode == CCFPmode || mode == CCFPUmode)
8743 {
8744 enum rtx_code second_code, bypass_code;
8745 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
d0396b79 8746 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
9a915772
JH
8747 code = ix86_fp_compare_code_to_integer (code);
8748 mode = CCmode;
8749 }
a269a03c
JC
8750 if (reverse)
8751 code = reverse_condition (code);
e075ae69 8752
a269a03c
JC
8753 switch (code)
8754 {
8755 case EQ:
06f4e35d
L
8756 switch (mode)
8757 {
8758 case CCAmode:
8759 suffix = "a";
8760 break;
8761
8762 case CCCmode:
8763 suffix = "c";
8764 break;
8765
8766 case CCOmode:
8767 suffix = "o";
8768 break;
8769
8770 case CCSmode:
8771 suffix = "s";
8772 break;
8773
8774 default:
8775 suffix = "e";
8776 }
a269a03c 8777 break;
a269a03c 8778 case NE:
06f4e35d
L
8779 switch (mode)
8780 {
8781 case CCAmode:
8782 suffix = "na";
8783 break;
8784
8785 case CCCmode:
8786 suffix = "nc";
8787 break;
8788
8789 case CCOmode:
8790 suffix = "no";
8791 break;
8792
8793 case CCSmode:
8794 suffix = "ns";
8795 break;
8796
8797 default:
8798 suffix = "ne";
8799 }
a269a03c 8800 break;
a269a03c 8801 case GT:
d0396b79 8802 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
e075ae69 8803 suffix = "g";
a269a03c 8804 break;
a269a03c 8805 case GTU:
aabcd309
KH
8806 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8807 Those same assemblers have the same but opposite lossage on cmov. */
d39d658d
RIL
8808 if (mode == CCmode)
8809 suffix = fp ? "nbe" : "a";
8810 else if (mode == CCCmode)
8811 suffix = "b";
8812 else
8813 gcc_unreachable ();
a269a03c 8814 break;
a269a03c 8815 case LT:
d0396b79
NS
8816 switch (mode)
8817 {
8818 case CCNOmode:
8819 case CCGOCmode:
8820 suffix = "s";
8821 break;
8822
8823 case CCmode:
8824 case CCGCmode:
8825 suffix = "l";
8826 break;
8827
8828 default:
8829 gcc_unreachable ();
8830 }
a269a03c 8831 break;
a269a03c 8832 case LTU:
d39d658d 8833 gcc_assert (mode == CCmode || mode == CCCmode);
a269a03c
JC
8834 suffix = "b";
8835 break;
a269a03c 8836 case GE:
d0396b79
NS
8837 switch (mode)
8838 {
8839 case CCNOmode:
8840 case CCGOCmode:
8841 suffix = "ns";
8842 break;
8843
8844 case CCmode:
8845 case CCGCmode:
8846 suffix = "ge";
8847 break;
8848
8849 default:
8850 gcc_unreachable ();
8851 }
a269a03c 8852 break;
a269a03c 8853 case GEU:
e075ae69 8854 /* ??? As above. */
d39d658d 8855 gcc_assert (mode == CCmode || mode == CCCmode);
7e08e190 8856 suffix = fp ? "nb" : "ae";
a269a03c 8857 break;
a269a03c 8858 case LE:
d0396b79 8859 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
e075ae69 8860 suffix = "le";
a269a03c 8861 break;
a269a03c 8862 case LEU:
d39d658d
RIL
8863 /* ??? As above. */
8864 if (mode == CCmode)
8865 suffix = "be";
8866 else if (mode == CCCmode)
8867 suffix = fp ? "nb" : "ae";
8868 else
8869 gcc_unreachable ();
a269a03c 8870 break;
3a3677ff 8871 case UNORDERED:
9e7adcb3 8872 suffix = fp ? "u" : "p";
3a3677ff
RH
8873 break;
8874 case ORDERED:
9e7adcb3 8875 suffix = fp ? "nu" : "np";
3a3677ff 8876 break;
a269a03c 8877 default:
d0396b79 8878 gcc_unreachable ();
a269a03c
JC
8879 }
8880 fputs (suffix, file);
8881}
8882
a55f4481
RK
8883/* Print the name of register X to FILE based on its machine mode and number.
8884 If CODE is 'w', pretend the mode is HImode.
8885 If CODE is 'b', pretend the mode is QImode.
8886 If CODE is 'k', pretend the mode is SImode.
8887 If CODE is 'q', pretend the mode is DImode.
d0396b79 8888 If CODE is 'h', pretend the reg is the 'high' byte register.
a55f4481
RK
8889 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8890
e075ae69 8891void
b96a374d 8892print_reg (rtx x, int code, FILE *file)
e5cb57e8 8893{
9ad5e54f
RIL
8894 gcc_assert (x == pc_rtx
8895 || (REGNO (x) != ARG_POINTER_REGNUM
8896 && REGNO (x) != FRAME_POINTER_REGNUM
8897 && REGNO (x) != FLAGS_REG
8898 && REGNO (x) != FPSR_REG
8899 && REGNO (x) != FPCR_REG));
480feac0 8900
9ad5e54f 8901 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
8902 putc ('%', file);
8903
9ad5e54f
RIL
8904 if (x == pc_rtx)
8905 {
8906 gcc_assert (TARGET_64BIT);
8907 fputs ("rip", file);
8908 return;
8909 }
8910
ef6257cd 8911 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
8912 code = 2;
8913 else if (code == 'b')
8914 code = 1;
8915 else if (code == 'k')
8916 code = 4;
3f3f2124
JH
8917 else if (code == 'q')
8918 code = 8;
e075ae69
RH
8919 else if (code == 'y')
8920 code = 3;
8921 else if (code == 'h')
8922 code = 0;
8923 else
8924 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 8925
3f3f2124
JH
8926 /* Irritatingly, AMD extended registers use different naming convention
8927 from the normal registers. */
8928 if (REX_INT_REG_P (x))
8929 {
d0396b79 8930 gcc_assert (TARGET_64BIT);
3f3f2124
JH
8931 switch (code)
8932 {
ef6257cd 8933 case 0:
c725bd79 8934 error ("extended registers have no high halves");
3f3f2124
JH
8935 break;
8936 case 1:
8937 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8938 break;
8939 case 2:
8940 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8941 break;
8942 case 4:
8943 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8944 break;
8945 case 8:
8946 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8947 break;
8948 default:
c725bd79 8949 error ("unsupported operand size for extended register");
3f3f2124
JH
8950 break;
8951 }
8952 return;
8953 }
e075ae69
RH
8954 switch (code)
8955 {
8956 case 3:
8957 if (STACK_TOP_P (x))
8958 {
8959 fputs ("st(0)", file);
8960 break;
8961 }
5efb1046 8962 /* FALLTHRU */
e075ae69 8963 case 8:
3f3f2124 8964 case 4:
e075ae69 8965 case 12:
446988df 8966 if (! ANY_FP_REG_P (x))
885a70fd 8967 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 8968 /* FALLTHRU */
a7180f70 8969 case 16:
e075ae69 8970 case 2:
d4c32b6f 8971 normal:
e075ae69
RH
8972 fputs (hi_reg_name[REGNO (x)], file);
8973 break;
8974 case 1:
d4c32b6f
RH
8975 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8976 goto normal;
e075ae69
RH
8977 fputs (qi_reg_name[REGNO (x)], file);
8978 break;
8979 case 0:
d4c32b6f
RH
8980 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8981 goto normal;
e075ae69
RH
8982 fputs (qi_high_reg_name[REGNO (x)], file);
8983 break;
8984 default:
d0396b79 8985 gcc_unreachable ();
fe25fea3 8986 }
e5cb57e8
SC
8987}
8988
f996902d
RH
8989/* Locate some local-dynamic symbol still in use by this function
8990 so that we can print its name in some tls_local_dynamic_base
8991 pattern. */
8992
2ed941ec
RH
8993static int
8994get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8995{
8996 rtx x = *px;
8997
8998 if (GET_CODE (x) == SYMBOL_REF
8999 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9000 {
9001 cfun->machine->some_ld_name = XSTR (x, 0);
9002 return 1;
9003 }
9004
9005 return 0;
9006}
9007
f996902d 9008static const char *
b96a374d 9009get_some_local_dynamic_name (void)
f996902d
RH
9010{
9011 rtx insn;
9012
9013 if (cfun->machine->some_ld_name)
9014 return cfun->machine->some_ld_name;
9015
9016 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9017 if (INSN_P (insn)
9018 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9019 return cfun->machine->some_ld_name;
9020
d0396b79 9021 gcc_unreachable ();
f996902d
RH
9022}
9023
2a2ab3f9 9024/* Meaning of CODE:
fe25fea3 9025 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 9026 C -- print opcode suffix for set/cmov insn.
fe25fea3 9027 c -- like C, but print reversed condition
354f84af 9028 E,e -- likewise, but for compare-and-branch fused insn.
ef6257cd 9029 F,f -- likewise, but for floating-point.
f6f5dff2
RO
9030 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9031 otherwise nothing
2a2ab3f9
JVA
9032 R -- print the prefix for register names.
9033 z -- print the opcode suffix for the size of the current operand.
9034 * -- print a star (in certain assembler syntax)
fb204271 9035 A -- print an absolute memory reference.
2a2ab3f9 9036 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
9037 s -- print a shift double count, followed by the assemblers argument
9038 delimiter.
fe25fea3
SC
9039 b -- print the QImode name of the register for the indicated operand.
9040 %b0 would print %al if operands[0] is reg 0.
9041 w -- likewise, print the HImode name of the register.
9042 k -- likewise, print the SImode name of the register.
3f3f2124 9043 q -- likewise, print the DImode name of the register.
ef6257cd
JH
9044 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9045 y -- print "st(0)" instead of "st" as a register.
a46d1d38 9046 D -- print condition for SSE cmp instruction.
ef6257cd
JH
9047 P -- if PIC, print an @PLT suffix.
9048 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 9049 & -- print some in-use local-dynamic symbol name.
ef719a44 9050 H -- print a memory address offset by 8; used for sse high-parts
04e1d06b 9051 Y -- print condition for SSE5 com* instruction.
c9d259cb
UB
9052 + -- print a branch hint as 'cs' or 'ds' prefix
9053 ; -- print a semicolon (after prefixes due to bug in older gas).
a46d1d38 9054 */
2a2ab3f9
JVA
9055
9056void
b96a374d 9057print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
9058{
9059 if (code)
9060 {
9061 switch (code)
9062 {
9063 case '*':
80f33d06 9064 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
9065 putc ('*', file);
9066 return;
9067
f996902d
RH
9068 case '&':
9069 assemble_name (file, get_some_local_dynamic_name ());
9070 return;
9071
fb204271 9072 case 'A':
d0396b79 9073 switch (ASSEMBLER_DIALECT)
fb204271 9074 {
d0396b79
NS
9075 case ASM_ATT:
9076 putc ('*', file);
9077 break;
9078
9079 case ASM_INTEL:
fb204271
DN
9080 /* Intel syntax. For absolute addresses, registers should not
9081 be surrounded by braces. */
7656aee4 9082 if (!REG_P (x))
fb204271
DN
9083 {
9084 putc ('[', file);
9085 PRINT_OPERAND (file, x, 0);
9086 putc (']', file);
9087 return;
9088 }
d0396b79
NS
9089 break;
9090
9091 default:
9092 gcc_unreachable ();
fb204271
DN
9093 }
9094
9095 PRINT_OPERAND (file, x, 0);
9096 return;
9097
9098
2a2ab3f9 9099 case 'L':
80f33d06 9100 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9101 putc ('l', file);
2a2ab3f9
JVA
9102 return;
9103
9104 case 'W':
80f33d06 9105 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9106 putc ('w', file);
2a2ab3f9
JVA
9107 return;
9108
9109 case 'B':
80f33d06 9110 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9111 putc ('b', file);
2a2ab3f9
JVA
9112 return;
9113
9114 case 'Q':
80f33d06 9115 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9116 putc ('l', file);
2a2ab3f9
JVA
9117 return;
9118
9119 case 'S':
80f33d06 9120 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9121 putc ('s', file);
2a2ab3f9
JVA
9122 return;
9123
5f1ec3e6 9124 case 'T':
80f33d06 9125 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9126 putc ('t', file);
5f1ec3e6
JVA
9127 return;
9128
2a2ab3f9
JVA
9129 case 'z':
9130 /* 387 opcodes don't get size suffixes if the operands are
0f290768 9131 registers. */
2a2ab3f9
JVA
9132 if (STACK_REG_P (x))
9133 return;
9134
831c4e87
KC
9135 /* Likewise if using Intel opcodes. */
9136 if (ASSEMBLER_DIALECT == ASM_INTEL)
9137 return;
9138
9139 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
9140 switch (GET_MODE_SIZE (GET_MODE (x)))
9141 {
37fc8424
UB
9142 case 1:
9143 putc ('b', file);
9144 return;
9145
2a2ab3f9 9146 case 2:
f3ba4235
UB
9147 if (MEM_P (x))
9148 {
155d8a47 9149#ifdef HAVE_GAS_FILDS_FISTS
f3ba4235 9150 putc ('s', file);
155d8a47 9151#endif
f3ba4235
UB
9152 return;
9153 }
9154 else
9155 putc ('w', file);
2a2ab3f9
JVA
9156 return;
9157
9158 case 4:
9159 if (GET_MODE (x) == SFmode)
9160 {
e075ae69 9161 putc ('s', file);
2a2ab3f9
JVA
9162 return;
9163 }
9164 else
e075ae69 9165 putc ('l', file);
2a2ab3f9
JVA
9166 return;
9167
5f1ec3e6 9168 case 12:
2b589241 9169 case 16:
e075ae69
RH
9170 putc ('t', file);
9171 return;
5f1ec3e6 9172
2a2ab3f9
JVA
9173 case 8:
9174 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa 9175 {
999d3194
L
9176 if (MEM_P (x))
9177 {
56c0e8fa 9178#ifdef GAS_MNEMONICS
999d3194 9179 putc ('q', file);
56c0e8fa 9180#else
999d3194
L
9181 putc ('l', file);
9182 putc ('l', file);
56c0e8fa 9183#endif
999d3194
L
9184 }
9185 else
9186 putc ('q', file);
56c0e8fa 9187 }
e075ae69
RH
9188 else
9189 putc ('l', file);
2a2ab3f9 9190 return;
155d8a47
JW
9191
9192 default:
d0396b79 9193 gcc_unreachable ();
2a2ab3f9 9194 }
4af3895e
JVA
9195
9196 case 'b':
9197 case 'w':
9198 case 'k':
3f3f2124 9199 case 'q':
4af3895e
JVA
9200 case 'h':
9201 case 'y':
5cb6195d 9202 case 'X':
e075ae69 9203 case 'P':
4af3895e
JVA
9204 break;
9205
2d49677f 9206 case 's':
7656aee4 9207 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
2d49677f
SC
9208 {
9209 PRINT_OPERAND (file, x, 0);
e075ae69 9210 putc (',', file);
2d49677f 9211 }
a269a03c
JC
9212 return;
9213
a46d1d38
JH
9214 case 'D':
9215 /* Little bit of braindamage here. The SSE compare instructions
9216 does use completely different names for the comparisons that the
9217 fp conditional moves. */
9218 switch (GET_CODE (x))
9219 {
9220 case EQ:
9221 case UNEQ:
9222 fputs ("eq", file);
9223 break;
9224 case LT:
9225 case UNLT:
9226 fputs ("lt", file);
9227 break;
9228 case LE:
9229 case UNLE:
9230 fputs ("le", file);
9231 break;
9232 case UNORDERED:
9233 fputs ("unord", file);
9234 break;
9235 case NE:
9236 case LTGT:
9237 fputs ("neq", file);
9238 break;
9239 case UNGE:
9240 case GE:
9241 fputs ("nlt", file);
9242 break;
9243 case UNGT:
9244 case GT:
9245 fputs ("nle", file);
9246 break;
9247 case ORDERED:
9248 fputs ("ord", file);
9249 break;
9250 default:
d0396b79 9251 gcc_unreachable ();
a46d1d38
JH
9252 }
9253 return;
048b1c95 9254 case 'O':
f6f5dff2 9255#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
9256 if (ASSEMBLER_DIALECT == ASM_ATT)
9257 {
9258 switch (GET_MODE (x))
9259 {
9260 case HImode: putc ('w', file); break;
9261 case SImode:
9262 case SFmode: putc ('l', file); break;
9263 case DImode:
9264 case DFmode: putc ('q', file); break;
d0396b79 9265 default: gcc_unreachable ();
048b1c95
JJ
9266 }
9267 putc ('.', file);
9268 }
9269#endif
9270 return;
1853aadd 9271 case 'C':
e075ae69 9272 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 9273 return;
fe25fea3 9274 case 'F':
f6f5dff2 9275#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
9276 if (ASSEMBLER_DIALECT == ASM_ATT)
9277 putc ('.', file);
9278#endif
e075ae69 9279 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
9280 return;
9281
e9a25f70 9282 /* Like above, but reverse condition */
e075ae69 9283 case 'c':
fce5a9f2 9284 /* Check to see if argument to %c is really a constant
c1d5afc4 9285 and not a condition code which needs to be reversed. */
ec8e098d 9286 if (!COMPARISON_P (x))
c1d5afc4
CR
9287 {
9288 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9289 return;
9290 }
e075ae69
RH
9291 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9292 return;
fe25fea3 9293 case 'f':
f6f5dff2 9294#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
9295 if (ASSEMBLER_DIALECT == ASM_ATT)
9296 putc ('.', file);
9297#endif
e075ae69 9298 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 9299 return;
ef719a44 9300
354f84af
UB
9301 case 'E':
9302 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
9303 return;
9304
9305 case 'e':
9306 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
9307 return;
9308
ef719a44
RH
9309 case 'H':
9310 /* It doesn't actually matter what mode we use here, as we're
9311 only going to use this for printing. */
9312 x = adjust_address_nv (x, DImode, 8);
9313 break;
9314
ef6257cd
JH
9315 case '+':
9316 {
9317 rtx x;
e5cb57e8 9318
ef6257cd
JH
9319 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9320 return;
a4f31c00 9321
ef6257cd
JH
9322 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9323 if (x)
9324 {
9325 int pred_val = INTVAL (XEXP (x, 0));
9326
9327 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9328 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9329 {
9330 int taken = pred_val > REG_BR_PROB_BASE / 2;
9331 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9332
9333 /* Emit hints only in the case default branch prediction
d1f87653 9334 heuristics would fail. */
ef6257cd
JH
9335 if (taken != cputaken)
9336 {
9337 /* We use 3e (DS) prefix for taken branches and
9338 2e (CS) prefix for not taken branches. */
9339 if (taken)
9340 fputs ("ds ; ", file);
9341 else
9342 fputs ("cs ; ", file);
9343 }
9344 }
9345 }
9346 return;
9347 }
c9d259cb 9348
04e1d06b
MM
9349 case 'Y':
9350 switch (GET_CODE (x))
9351 {
9352 case NE:
9353 fputs ("neq", file);
9354 break;
9355 case EQ:
9356 fputs ("eq", file);
9357 break;
9358 case GE:
9359 case GEU:
9360 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9361 break;
9362 case GT:
9363 case GTU:
9364 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9365 break;
9366 case LE:
9367 case LEU:
9368 fputs ("le", file);
9369 break;
9370 case LT:
9371 case LTU:
9372 fputs ("lt", file);
9373 break;
9374 case UNORDERED:
9375 fputs ("unord", file);
9376 break;
9377 case ORDERED:
9378 fputs ("ord", file);
9379 break;
9380 case UNEQ:
9381 fputs ("ueq", file);
9382 break;
9383 case UNGE:
9384 fputs ("nlt", file);
9385 break;
9386 case UNGT:
9387 fputs ("nle", file);
9388 break;
9389 case UNLE:
9390 fputs ("ule", file);
9391 break;
9392 case UNLT:
9393 fputs ("ult", file);
9394 break;
9395 case LTGT:
9396 fputs ("une", file);
9397 break;
9398 default:
9399 gcc_unreachable ();
9400 }
9401 return;
9402
c9d259cb
UB
9403 case ';':
9404#if TARGET_MACHO
9405 fputs (" ; ", file);
9406#else
9407 fputc (' ', file);
9408#endif
9409 return;
9410
4af3895e 9411 default:
9e637a26 9412 output_operand_lossage ("invalid operand code '%c'", code);
2a2ab3f9
JVA
9413 }
9414 }
e9a25f70 9415
7656aee4 9416 if (REG_P (x))
a55f4481 9417 print_reg (x, code, file);
e9a25f70 9418
7656aee4 9419 else if (MEM_P (x))
2a2ab3f9 9420 {
9ad5e54f
RIL
9421 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9422 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9423 && GET_MODE (x) != BLKmode)
2a2ab3f9 9424 {
69ddee61 9425 const char * size;
e075ae69
RH
9426 switch (GET_MODE_SIZE (GET_MODE (x)))
9427 {
9428 case 1: size = "BYTE"; break;
9429 case 2: size = "WORD"; break;
9430 case 4: size = "DWORD"; break;
9431 case 8: size = "QWORD"; break;
9432 case 12: size = "XWORD"; break;
9ad5e54f
RIL
9433 case 16:
9434 if (GET_MODE (x) == XFmode)
9435 size = "XWORD";
9436 else
9437 size = "XMMWORD";
9438 break;
e075ae69 9439 default:
d0396b79 9440 gcc_unreachable ();
e075ae69 9441 }
fb204271
DN
9442
9443 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9444 if (code == 'b')
9445 size = "BYTE";
9446 else if (code == 'w')
9447 size = "WORD";
9448 else if (code == 'k')
9449 size = "DWORD";
9450
e075ae69
RH
9451 fputs (size, file);
9452 fputs (" PTR ", file);
2a2ab3f9 9453 }
e075ae69
RH
9454
9455 x = XEXP (x, 0);
0d7d98ee 9456 /* Avoid (%rip) for call operands. */
d10f5ecf 9457 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7656aee4 9458 && !CONST_INT_P (x))
0d7d98ee 9459 output_addr_const (file, x);
c8b94768
RH
9460 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9461 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 9462 else
e075ae69 9463 output_address (x);
2a2ab3f9 9464 }
e9a25f70 9465
2a2ab3f9
JVA
9466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9467 {
e9a25f70
JL
9468 REAL_VALUE_TYPE r;
9469 long l;
9470
5f1ec3e6
JVA
9471 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9472 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 9473
80f33d06 9474 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9475 putc ('$', file);
3d57d7ce 9476 fprintf (file, "0x%08lx", (long unsigned int) l);
5f1ec3e6 9477 }
e9a25f70 9478
74dc3e94
RH
9479 /* These float cases don't actually occur as immediate operands. */
9480 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 9481 {
e9a25f70
JL
9482 char dstr[30];
9483
da6eec72 9484 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 9485 fprintf (file, "%s", dstr);
2a2ab3f9 9486 }
e9a25f70 9487
2b589241 9488 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 9489 && GET_MODE (x) == XFmode)
2a2ab3f9 9490 {
e9a25f70
JL
9491 char dstr[30];
9492
da6eec72 9493 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 9494 fprintf (file, "%s", dstr);
2a2ab3f9 9495 }
f996902d 9496
79325812 9497 else
2a2ab3f9 9498 {
b4e82619
RH
9499 /* We have patterns that allow zero sets of memory, for instance.
9500 In 64-bit mode, we should probably support all 8-byte vectors,
9501 since we can in fact encode that into an immediate. */
9502 if (GET_CODE (x) == CONST_VECTOR)
9503 {
d0396b79
NS
9504 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9505 x = const0_rtx;
b4e82619
RH
9506 }
9507
4af3895e 9508 if (code != 'P')
2a2ab3f9 9509 {
7656aee4 9510 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
e075ae69 9511 {
80f33d06 9512 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
9513 putc ('$', file);
9514 }
2a2ab3f9
JVA
9515 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9516 || GET_CODE (x) == LABEL_REF)
e075ae69 9517 {
80f33d06 9518 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
9519 putc ('$', file);
9520 else
9521 fputs ("OFFSET FLAT:", file);
9522 }
2a2ab3f9 9523 }
7656aee4 9524 if (CONST_INT_P (x))
e075ae69
RH
9525 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9526 else if (flag_pic)
2a2ab3f9
JVA
9527 output_pic_addr_const (file, x, code);
9528 else
9529 output_addr_const (file, x);
9530 }
9531}
9532\f
9533/* Print a memory operand whose address is ADDR. */
9534
9535void
8d531ab9 9536print_operand_address (FILE *file, rtx addr)
2a2ab3f9 9537{
e075ae69
RH
9538 struct ix86_address parts;
9539 rtx base, index, disp;
9540 int scale;
d0396b79 9541 int ok = ix86_decompose_address (addr, &parts);
e9a25f70 9542
d0396b79 9543 gcc_assert (ok);
e9a25f70 9544
e075ae69
RH
9545 base = parts.base;
9546 index = parts.index;
9547 disp = parts.disp;
9548 scale = parts.scale;
e9a25f70 9549
74dc3e94
RH
9550 switch (parts.seg)
9551 {
9552 case SEG_DEFAULT:
9553 break;
9554 case SEG_FS:
9555 case SEG_GS:
9ad5e54f 9556 if (ASSEMBLER_DIALECT == ASM_ATT)
74dc3e94
RH
9557 putc ('%', file);
9558 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9559 break;
9560 default:
d0396b79 9561 gcc_unreachable ();
74dc3e94
RH
9562 }
9563
9ad5e54f
RIL
9564 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9565 if (TARGET_64BIT && !base && !index)
9566 {
9567 rtx symbol = disp;
9568
9569 if (GET_CODE (disp) == CONST
9570 && GET_CODE (XEXP (disp, 0)) == PLUS
9571 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9572 symbol = XEXP (XEXP (disp, 0), 0);
9573
9574 if (GET_CODE (symbol) == LABEL_REF
9575 || (GET_CODE (symbol) == SYMBOL_REF
9576 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9577 base = pc_rtx;
9578 }
e075ae69
RH
9579 if (!base && !index)
9580 {
9581 /* Displacement only requires special attention. */
e9a25f70 9582
7656aee4 9583 if (CONST_INT_P (disp))
2a2ab3f9 9584 {
74dc3e94 9585 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9ad5e54f 9586 fputs ("ds:", file);
74dc3e94 9587 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 9588 }
e075ae69 9589 else if (flag_pic)
74dc3e94 9590 output_pic_addr_const (file, disp, 0);
e075ae69 9591 else
74dc3e94 9592 output_addr_const (file, disp);
e075ae69
RH
9593 }
9594 else
9595 {
80f33d06 9596 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 9597 {
e075ae69 9598 if (disp)
2a2ab3f9 9599 {
c399861d 9600 if (flag_pic)
e075ae69
RH
9601 output_pic_addr_const (file, disp, 0);
9602 else if (GET_CODE (disp) == LABEL_REF)
9603 output_asm_label (disp);
2a2ab3f9 9604 else
e075ae69 9605 output_addr_const (file, disp);
2a2ab3f9
JVA
9606 }
9607
e075ae69
RH
9608 putc ('(', file);
9609 if (base)
a55f4481 9610 print_reg (base, 0, file);
e075ae69 9611 if (index)
2a2ab3f9 9612 {
e075ae69 9613 putc (',', file);
a55f4481 9614 print_reg (index, 0, file);
e075ae69
RH
9615 if (scale != 1)
9616 fprintf (file, ",%d", scale);
2a2ab3f9 9617 }
e075ae69 9618 putc (')', file);
2a2ab3f9 9619 }
2a2ab3f9
JVA
9620 else
9621 {
e075ae69 9622 rtx offset = NULL_RTX;
e9a25f70 9623
e075ae69
RH
9624 if (disp)
9625 {
9626 /* Pull out the offset of a symbol; print any symbol itself. */
9627 if (GET_CODE (disp) == CONST
9628 && GET_CODE (XEXP (disp, 0)) == PLUS
7656aee4 9629 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
e075ae69
RH
9630 {
9631 offset = XEXP (XEXP (disp, 0), 1);
9632 disp = gen_rtx_CONST (VOIDmode,
9633 XEXP (XEXP (disp, 0), 0));
9634 }
ce193852 9635
e075ae69
RH
9636 if (flag_pic)
9637 output_pic_addr_const (file, disp, 0);
9638 else if (GET_CODE (disp) == LABEL_REF)
9639 output_asm_label (disp);
7656aee4 9640 else if (CONST_INT_P (disp))
e075ae69
RH
9641 offset = disp;
9642 else
9643 output_addr_const (file, disp);
9644 }
e9a25f70 9645
e075ae69
RH
9646 putc ('[', file);
9647 if (base)
a8620236 9648 {
a55f4481 9649 print_reg (base, 0, file);
e075ae69
RH
9650 if (offset)
9651 {
9652 if (INTVAL (offset) >= 0)
9653 putc ('+', file);
9654 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9655 }
a8620236 9656 }
e075ae69
RH
9657 else if (offset)
9658 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 9659 else
e075ae69 9660 putc ('0', file);
e9a25f70 9661
e075ae69
RH
9662 if (index)
9663 {
9664 putc ('+', file);
a55f4481 9665 print_reg (index, 0, file);
e075ae69
RH
9666 if (scale != 1)
9667 fprintf (file, "*%d", scale);
9668 }
9669 putc (']', file);
9670 }
2a2ab3f9
JVA
9671 }
9672}
f996902d
RH
9673
9674bool
b96a374d 9675output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
9676{
9677 rtx op;
9678
9679 if (GET_CODE (x) != UNSPEC)
9680 return false;
9681
9682 op = XVECEXP (x, 0, 0);
9683 switch (XINT (x, 1))
9684 {
9685 case UNSPEC_GOTTPOFF:
9686 output_addr_const (file, op);
dea73790 9687 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
9688 fputs ("@GOTTPOFF", file);
9689 break;
9690 case UNSPEC_TPOFF:
9691 output_addr_const (file, op);
9692 fputs ("@TPOFF", file);
9693 break;
9694 case UNSPEC_NTPOFF:
9695 output_addr_const (file, op);
75d38379
JJ
9696 if (TARGET_64BIT)
9697 fputs ("@TPOFF", file);
9698 else
9699 fputs ("@NTPOFF", file);
f996902d
RH
9700 break;
9701 case UNSPEC_DTPOFF:
9702 output_addr_const (file, op);
9703 fputs ("@DTPOFF", file);
9704 break;
dea73790
JJ
9705 case UNSPEC_GOTNTPOFF:
9706 output_addr_const (file, op);
75d38379 9707 if (TARGET_64BIT)
9ad5e54f
RIL
9708 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9709 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
75d38379
JJ
9710 else
9711 fputs ("@GOTNTPOFF", file);
dea73790
JJ
9712 break;
9713 case UNSPEC_INDNTPOFF:
9714 output_addr_const (file, op);
9715 fputs ("@INDNTPOFF", file);
9716 break;
f996902d
RH
9717
9718 default:
9719 return false;
9720 }
9721
9722 return true;
9723}
2a2ab3f9
JVA
9724\f
9725/* Split one or more DImode RTL references into pairs of SImode
9726 references. The RTL can be REG, offsettable MEM, integer constant, or
9727 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9728 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 9729 that parallel "operands". */
2a2ab3f9
JVA
9730
9731void
b96a374d 9732split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
9733{
9734 while (num--)
9735 {
57dbca5e 9736 rtx op = operands[num];
b932f770
JH
9737
9738 /* simplify_subreg refuse to split volatile memory addresses,
9739 but we still have to handle it. */
7656aee4 9740 if (MEM_P (op))
2a2ab3f9 9741 {
f4ef873c 9742 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 9743 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
9744 }
9745 else
b932f770 9746 {
38ca929b
JH
9747 lo_half[num] = simplify_gen_subreg (SImode, op,
9748 GET_MODE (op) == VOIDmode
9749 ? DImode : GET_MODE (op), 0);
9750 hi_half[num] = simplify_gen_subreg (SImode, op,
9751 GET_MODE (op) == VOIDmode
9752 ? DImode : GET_MODE (op), 4);
b932f770 9753 }
2a2ab3f9
JVA
9754 }
9755}
28356f52 9756/* Split one or more TImode RTL references into pairs of DImode
44cf5b6a
JH
9757 references. The RTL can be REG, offsettable MEM, integer constant, or
9758 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9759 split and "num" is its length. lo_half and hi_half are output arrays
9760 that parallel "operands". */
9761
9762void
b96a374d 9763split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
9764{
9765 while (num--)
9766 {
9767 rtx op = operands[num];
b932f770
JH
9768
9769 /* simplify_subreg refuse to split volatile memory addresses, but we
9770 still have to handle it. */
7656aee4 9771 if (MEM_P (op))
44cf5b6a
JH
9772 {
9773 lo_half[num] = adjust_address (op, DImode, 0);
9774 hi_half[num] = adjust_address (op, DImode, 8);
9775 }
9776 else
b932f770
JH
9777 {
9778 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9779 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9780 }
44cf5b6a
JH
9781 }
9782}
2a2ab3f9 9783\f
2a2ab3f9
JVA
9784/* Output code to perform a 387 binary operation in INSN, one of PLUS,
9785 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9786 is the expression of the binary operation. The output may either be
9787 emitted here, or returned to the caller, like all output_* functions.
9788
9789 There is no guarantee that the operands are the same mode, as they
0f290768 9790 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 9791
e3c2afab
AM
9792#ifndef SYSV386_COMPAT
9793/* Set to 1 for compatibility with brain-damaged assemblers. No-one
9794 wants to fix the assemblers because that causes incompatibility
9795 with gcc. No-one wants to fix gcc because that causes
9796 incompatibility with assemblers... You can use the option of
9797 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9798#define SYSV386_COMPAT 1
9799#endif
9800
69ddee61 9801const char *
b96a374d 9802output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 9803{
e3c2afab 9804 static char buf[30];
69ddee61 9805 const char *p;
1deaa899 9806 const char *ssep;
89b17498 9807 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
2a2ab3f9 9808
e3c2afab
AM
9809#ifdef ENABLE_CHECKING
9810 /* Even if we do not want to check the inputs, this documents input
9811 constraints. Which helps in understanding the following code. */
9812 if (STACK_REG_P (operands[0])
9813 && ((REG_P (operands[1])
9814 && REGNO (operands[0]) == REGNO (operands[1])
7656aee4 9815 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
e3c2afab
AM
9816 || (REG_P (operands[2])
9817 && REGNO (operands[0]) == REGNO (operands[2])
7656aee4 9818 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
e3c2afab
AM
9819 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9820 ; /* ok */
d0396b79
NS
9821 else
9822 gcc_assert (is_sse);
e3c2afab
AM
9823#endif
9824
2a2ab3f9
JVA
9825 switch (GET_CODE (operands[3]))
9826 {
9827 case PLUS:
e075ae69
RH
9828 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9829 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9830 p = "fiadd";
9831 else
9832 p = "fadd";
1deaa899 9833 ssep = "add";
2a2ab3f9
JVA
9834 break;
9835
9836 case MINUS:
e075ae69
RH
9837 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9838 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9839 p = "fisub";
9840 else
9841 p = "fsub";
1deaa899 9842 ssep = "sub";
2a2ab3f9
JVA
9843 break;
9844
9845 case MULT:
e075ae69
RH
9846 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9847 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9848 p = "fimul";
9849 else
9850 p = "fmul";
1deaa899 9851 ssep = "mul";
2a2ab3f9
JVA
9852 break;
9853
9854 case DIV:
e075ae69
RH
9855 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9856 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9857 p = "fidiv";
9858 else
9859 p = "fdiv";
1deaa899 9860 ssep = "div";
2a2ab3f9
JVA
9861 break;
9862
9863 default:
d0396b79 9864 gcc_unreachable ();
2a2ab3f9
JVA
9865 }
9866
1deaa899
JH
9867 if (is_sse)
9868 {
9869 strcpy (buf, ssep);
9870 if (GET_MODE (operands[0]) == SFmode)
9871 strcat (buf, "ss\t{%2, %0|%0, %2}");
9872 else
9873 strcat (buf, "sd\t{%2, %0|%0, %2}");
9874 return buf;
9875 }
e075ae69 9876 strcpy (buf, p);
2a2ab3f9
JVA
9877
9878 switch (GET_CODE (operands[3]))
9879 {
9880 case MULT:
9881 case PLUS:
9882 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9883 {
e3c2afab 9884 rtx temp = operands[2];
2a2ab3f9
JVA
9885 operands[2] = operands[1];
9886 operands[1] = temp;
9887 }
9888
e3c2afab
AM
9889 /* know operands[0] == operands[1]. */
9890
7656aee4 9891 if (MEM_P (operands[2]))
e075ae69
RH
9892 {
9893 p = "%z2\t%2";
9894 break;
9895 }
2a2ab3f9
JVA
9896
9897 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
9898 {
9899 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
9900 /* How is it that we are storing to a dead operand[2]?
9901 Well, presumably operands[1] is dead too. We can't
9902 store the result to st(0) as st(0) gets popped on this
9903 instruction. Instead store to operands[2] (which I
9904 think has to be st(1)). st(1) will be popped later.
9905 gcc <= 2.8.1 didn't have this check and generated
9906 assembly code that the Unixware assembler rejected. */
9907 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 9908 else
e3c2afab 9909 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 9910 break;
6b28fd63 9911 }
2a2ab3f9
JVA
9912
9913 if (STACK_TOP_P (operands[0]))
e3c2afab 9914 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 9915 else
e3c2afab 9916 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 9917 break;
2a2ab3f9
JVA
9918
9919 case MINUS:
9920 case DIV:
7656aee4 9921 if (MEM_P (operands[1]))
e075ae69
RH
9922 {
9923 p = "r%z1\t%1";
9924 break;
9925 }
2a2ab3f9 9926
7656aee4 9927 if (MEM_P (operands[2]))
e075ae69
RH
9928 {
9929 p = "%z2\t%2";
9930 break;
9931 }
2a2ab3f9 9932
2a2ab3f9 9933 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 9934 {
e3c2afab
AM
9935#if SYSV386_COMPAT
9936 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9937 derived assemblers, confusingly reverse the direction of
9938 the operation for fsub{r} and fdiv{r} when the
9939 destination register is not st(0). The Intel assembler
9940 doesn't have this brain damage. Read !SYSV386_COMPAT to
9941 figure out what the hardware really does. */
9942 if (STACK_TOP_P (operands[0]))
9943 p = "{p\t%0, %2|rp\t%2, %0}";
9944 else
9945 p = "{rp\t%2, %0|p\t%0, %2}";
9946#else
6b28fd63 9947 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
9948 /* As above for fmul/fadd, we can't store to st(0). */
9949 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 9950 else
e3c2afab
AM
9951 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9952#endif
e075ae69 9953 break;
6b28fd63 9954 }
2a2ab3f9
JVA
9955
9956 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 9957 {
e3c2afab 9958#if SYSV386_COMPAT
6b28fd63 9959 if (STACK_TOP_P (operands[0]))
e3c2afab 9960 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 9961 else
e3c2afab
AM
9962 p = "{p\t%1, %0|rp\t%0, %1}";
9963#else
9964 if (STACK_TOP_P (operands[0]))
9965 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9966 else
9967 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9968#endif
e075ae69 9969 break;
6b28fd63 9970 }
2a2ab3f9
JVA
9971
9972 if (STACK_TOP_P (operands[0]))
9973 {
9974 if (STACK_TOP_P (operands[1]))
e3c2afab 9975 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 9976 else
e3c2afab 9977 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 9978 break;
2a2ab3f9
JVA
9979 }
9980 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
9981 {
9982#if SYSV386_COMPAT
9983 p = "{\t%1, %0|r\t%0, %1}";
9984#else
9985 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9986#endif
9987 }
2a2ab3f9 9988 else
e3c2afab
AM
9989 {
9990#if SYSV386_COMPAT
9991 p = "{r\t%2, %0|\t%0, %2}";
9992#else
9993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9994#endif
9995 }
e075ae69 9996 break;
2a2ab3f9
JVA
9997
9998 default:
d0396b79 9999 gcc_unreachable ();
2a2ab3f9 10000 }
e075ae69
RH
10001
10002 strcat (buf, p);
10003 return buf;
2a2ab3f9 10004}
e075ae69 10005
ff680eb1
UB
10006/* Return needed mode for entity in optimize_mode_switching pass. */
10007
10008int
10009ix86_mode_needed (int entity, rtx insn)
10010{
10011 enum attr_i387_cw mode;
10012
10013 /* The mode UNINITIALIZED is used to store control word after a
10014 function call or ASM pattern. The mode ANY specify that function
10015 has no requirements on the control word and make no changes in the
10016 bits we are interested in. */
10017
10018 if (CALL_P (insn)
10019 || (NONJUMP_INSN_P (insn)
10020 && (asm_noperands (PATTERN (insn)) >= 0
10021 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10022 return I387_CW_UNINITIALIZED;
10023
10024 if (recog_memoized (insn) < 0)
10025 return I387_CW_ANY;
10026
10027 mode = get_attr_i387_cw (insn);
10028
10029 switch (entity)
10030 {
10031 case I387_TRUNC:
10032 if (mode == I387_CW_TRUNC)
10033 return mode;
10034 break;
10035
10036 case I387_FLOOR:
10037 if (mode == I387_CW_FLOOR)
10038 return mode;
10039 break;
10040
10041 case I387_CEIL:
10042 if (mode == I387_CW_CEIL)
10043 return mode;
10044 break;
10045
10046 case I387_MASK_PM:
10047 if (mode == I387_CW_MASK_PM)
10048 return mode;
10049 break;
10050
10051 default:
10052 gcc_unreachable ();
10053 }
10054
10055 return I387_CW_ANY;
10056}
10057
edeacc14
UB
10058/* Output code to initialize control word copies used by trunc?f?i and
10059 rounding patterns. CURRENT_MODE is set to current control word,
10060 while NEW_MODE is set to new control word. */
10061
7a2e09f4 10062void
ff680eb1 10063emit_i387_cw_initialization (int mode)
7a2e09f4 10064{
ff680eb1
UB
10065 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
10066 rtx new_mode;
10067
9415ab7d 10068 enum ix86_stack_slot slot;
ff680eb1 10069
7a2e09f4
JH
10070 rtx reg = gen_reg_rtx (HImode);
10071
ff680eb1 10072 emit_insn (gen_x86_fnstcw_1 (stored_mode));
3e916873 10073 emit_move_insn (reg, copy_rtx (stored_mode));
edeacc14 10074
ff680eb1 10075 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
edeacc14
UB
10076 {
10077 switch (mode)
10078 {
ff680eb1
UB
10079 case I387_CW_TRUNC:
10080 /* round toward zero (truncate) */
10081 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
10082 slot = SLOT_CW_TRUNC;
10083 break;
10084
edeacc14
UB
10085 case I387_CW_FLOOR:
10086 /* round down toward -oo */
ff680eb1
UB
10087 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10088 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
10089 slot = SLOT_CW_FLOOR;
edeacc14
UB
10090 break;
10091
10092 case I387_CW_CEIL:
10093 /* round up toward +oo */
ff680eb1
UB
10094 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10095 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
10096 slot = SLOT_CW_CEIL;
edeacc14
UB
10097 break;
10098
edeacc14
UB
10099 case I387_CW_MASK_PM:
10100 /* mask precision exception for nearbyint() */
10101 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 10102 slot = SLOT_CW_MASK_PM;
edeacc14
UB
10103 break;
10104
10105 default:
d0396b79 10106 gcc_unreachable ();
edeacc14
UB
10107 }
10108 }
7a2e09f4 10109 else
edeacc14
UB
10110 {
10111 switch (mode)
10112 {
ff680eb1
UB
10113 case I387_CW_TRUNC:
10114 /* round toward zero (truncate) */
10115 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
10116 slot = SLOT_CW_TRUNC;
10117 break;
10118
edeacc14
UB
10119 case I387_CW_FLOOR:
10120 /* round down toward -oo */
ff680eb1
UB
10121 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
10122 slot = SLOT_CW_FLOOR;
edeacc14
UB
10123 break;
10124
10125 case I387_CW_CEIL:
10126 /* round up toward +oo */
ff680eb1
UB
10127 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
10128 slot = SLOT_CW_CEIL;
edeacc14 10129 break;
5656a184 10130
edeacc14
UB
10131 case I387_CW_MASK_PM:
10132 /* mask precision exception for nearbyint() */
10133 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 10134 slot = SLOT_CW_MASK_PM;
edeacc14
UB
10135 break;
10136
10137 default:
d0396b79 10138 gcc_unreachable ();
edeacc14
UB
10139 }
10140 }
10141
ff680eb1
UB
10142 gcc_assert (slot < MAX_386_STACK_LOCALS);
10143
10144 new_mode = assign_386_stack_local (HImode, slot);
edeacc14 10145 emit_move_insn (new_mode, reg);
7a2e09f4
JH
10146}
10147
2a2ab3f9 10148/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 10149 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 10150 operand may be [SDX]Fmode. */
2a2ab3f9 10151
69ddee61 10152const char *
9199f050 10153output_fix_trunc (rtx insn, rtx *operands, int fisttp)
2a2ab3f9
JVA
10154{
10155 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 10156 int dimode_p = GET_MODE (operands[0]) == DImode;
6e858d45 10157 int round_mode = get_attr_i387_cw (insn);
2a2ab3f9 10158
e075ae69
RH
10159 /* Jump through a hoop or two for DImode, since the hardware has no
10160 non-popping instruction. We used to do this a different way, but
10161 that was somewhat fragile and broke with post-reload splitters. */
9199f050 10162 if ((dimode_p || fisttp) && !stack_top_dies)
a05924f9 10163 output_asm_insn ("fld\t%y1", operands);
e075ae69 10164
d0396b79 10165 gcc_assert (STACK_TOP_P (operands[1]));
7656aee4 10166 gcc_assert (MEM_P (operands[0]));
54a88090 10167 gcc_assert (GET_MODE (operands[1]) != TFmode);
e9a25f70 10168
9199f050
UB
10169 if (fisttp)
10170 output_asm_insn ("fisttp%z0\t%0", operands);
10195bd8 10171 else
9199f050 10172 {
6e858d45
UB
10173 if (round_mode != I387_CW_ANY)
10174 output_asm_insn ("fldcw\t%3", operands);
9199f050
UB
10175 if (stack_top_dies || dimode_p)
10176 output_asm_insn ("fistp%z0\t%0", operands);
10177 else
10178 output_asm_insn ("fist%z0\t%0", operands);
6e858d45
UB
10179 if (round_mode != I387_CW_ANY)
10180 output_asm_insn ("fldcw\t%2", operands);
9199f050 10181 }
10195bd8 10182
e075ae69 10183 return "";
2a2ab3f9 10184}
cda749b1 10185
b6c03bcd
RS
10186/* Output code for x87 ffreep insn. The OPNO argument, which may only
10187 have the values zero or one, indicates the ffreep insn's operand
10188 from the OPERANDS array. */
10189
10190static const char *
10191output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10192{
10193 if (TARGET_USE_FFREEP)
10194#if HAVE_AS_IX86_FFREEP
10195 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10196#else
87ccbc5c
UB
10197 {
10198 static char retval[] = ".word\t0xc_df";
10199 int regno = REGNO (operands[opno]);
54a88090 10200
87ccbc5c
UB
10201 gcc_assert (FP_REGNO_P (regno));
10202
10203 retval[9] = '0' + (regno - FIRST_STACK_REG);
10204 return retval;
10205 }
b6c03bcd
RS
10206#endif
10207
10208 return opno ? "fstp\t%y1" : "fstp\t%y0";
10209}
10210
10211
e075ae69 10212/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7c82106f 10213 should be used. UNORDERED_P is true when fucom should be used. */
e075ae69 10214
69ddee61 10215const char *
b96a374d 10216output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 10217{
e075ae69 10218 int stack_top_dies;
869d095e 10219 rtx cmp_op0, cmp_op1;
7c82106f 10220 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
e075ae69 10221
7c82106f 10222 if (eflags_p)
e075ae69 10223 {
7c82106f
UB
10224 cmp_op0 = operands[0];
10225 cmp_op1 = operands[1];
e075ae69 10226 }
869d095e
UB
10227 else
10228 {
7c82106f
UB
10229 cmp_op0 = operands[1];
10230 cmp_op1 = operands[2];
869d095e
UB
10231 }
10232
0644b628
JH
10233 if (is_sse)
10234 {
10235 if (GET_MODE (operands[0]) == SFmode)
10236 if (unordered_p)
10237 return "ucomiss\t{%1, %0|%0, %1}";
10238 else
a5cf80f0 10239 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
10240 else
10241 if (unordered_p)
10242 return "ucomisd\t{%1, %0|%0, %1}";
10243 else
a5cf80f0 10244 return "comisd\t{%1, %0|%0, %1}";
0644b628 10245 }
cda749b1 10246
d0396b79 10247 gcc_assert (STACK_TOP_P (cmp_op0));
cda749b1 10248
e075ae69 10249 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 10250
869d095e
UB
10251 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10252 {
10253 if (stack_top_dies)
10254 {
10255 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
b6c03bcd 10256 return output_387_ffreep (operands, 1);
869d095e
UB
10257 }
10258 else
10259 return "ftst\n\tfnstsw\t%0";
10260 }
10261
e075ae69
RH
10262 if (STACK_REG_P (cmp_op1)
10263 && stack_top_dies
10264 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10265 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 10266 {
e075ae69
RH
10267 /* If both the top of the 387 stack dies, and the other operand
10268 is also a stack register that dies, then this must be a
10269 `fcompp' float compare */
10270
7c82106f 10271 if (eflags_p)
e075ae69
RH
10272 {
10273 /* There is no double popping fcomi variant. Fortunately,
10274 eflags is immune from the fstp's cc clobbering. */
10275 if (unordered_p)
10276 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10277 else
10278 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
b6c03bcd 10279 return output_387_ffreep (operands, 0);
e075ae69
RH
10280 }
10281 else
cda749b1 10282 {
7c82106f
UB
10283 if (unordered_p)
10284 return "fucompp\n\tfnstsw\t%0";
cda749b1 10285 else
7c82106f 10286 return "fcompp\n\tfnstsw\t%0";
cda749b1 10287 }
cda749b1
JW
10288 }
10289 else
10290 {
e075ae69 10291 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 10292
7c82106f 10293 static const char * const alt[16] =
e075ae69 10294 {
7c82106f
UB
10295 "fcom%z2\t%y2\n\tfnstsw\t%0",
10296 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10297 "fucom%z2\t%y2\n\tfnstsw\t%0",
10298 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 10299
7c82106f
UB
10300 "ficom%z2\t%y2\n\tfnstsw\t%0",
10301 "ficomp%z2\t%y2\n\tfnstsw\t%0",
e075ae69
RH
10302 NULL,
10303 NULL,
10304
10305 "fcomi\t{%y1, %0|%0, %y1}",
10306 "fcomip\t{%y1, %0|%0, %y1}",
10307 "fucomi\t{%y1, %0|%0, %y1}",
10308 "fucomip\t{%y1, %0|%0, %y1}",
10309
10310 NULL,
10311 NULL,
10312 NULL,
e075ae69
RH
10313 NULL
10314 };
10315
10316 int mask;
69ddee61 10317 const char *ret;
e075ae69
RH
10318
10319 mask = eflags_p << 3;
7c82106f 10320 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
e075ae69
RH
10321 mask |= unordered_p << 1;
10322 mask |= stack_top_dies;
10323
d0396b79 10324 gcc_assert (mask < 16);
e075ae69 10325 ret = alt[mask];
d0396b79 10326 gcc_assert (ret);
cda749b1 10327
e075ae69 10328 return ret;
cda749b1
JW
10329 }
10330}
2a2ab3f9 10331
f88c65f7 10332void
b96a374d 10333ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
10334{
10335 const char *directive = ASM_LONG;
10336
f88c65f7 10337#ifdef ASM_QUAD
d0396b79
NS
10338 if (TARGET_64BIT)
10339 directive = ASM_QUAD;
f88c65f7 10340#else
d0396b79 10341 gcc_assert (!TARGET_64BIT);
f88c65f7 10342#endif
f88c65f7
RH
10343
10344 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10345}
10346
10347void
b96a374d 10348ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7 10349{
dc4d7240
JH
10350 const char *directive = ASM_LONG;
10351
10352#ifdef ASM_QUAD
10353 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10354 directive = ASM_QUAD;
10355#else
10356 gcc_assert (!TARGET_64BIT);
10357#endif
170bdaba
RS
10358 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10359 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
74411039 10360 fprintf (file, "%s%s%d-%s%d\n",
dc4d7240 10361 directive, LPREFIX, value, LPREFIX, rel);
f88c65f7
RH
10362 else if (HAVE_AS_GOTOFF_IN_DATA)
10363 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
10364#if TARGET_MACHO
10365 else if (TARGET_MACHO)
86ecdfb6
AP
10366 {
10367 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10368 machopic_output_function_base_name (file);
10369 fprintf(file, "\n");
10370 }
b069de3b 10371#endif
f88c65f7 10372 else
5fc0e5df
KW
10373 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10374 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 10375}
32b5b1aa 10376\f
a8bac9ab
RH
10377/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10378 for the target. */
10379
10380void
b96a374d 10381ix86_expand_clear (rtx dest)
a8bac9ab
RH
10382{
10383 rtx tmp;
10384
10385 /* We play register width games, which are only valid after reload. */
d0396b79 10386 gcc_assert (reload_completed);
a8bac9ab
RH
10387
10388 /* Avoid HImode and its attendant prefix byte. */
10389 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10390 dest = gen_rtx_REG (SImode, REGNO (dest));
a8bac9ab
RH
10391 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10392
10393 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10394 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10395 {
d02cb675 10396 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
a8bac9ab
RH
10397 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10398 }
10399
10400 emit_insn (tmp);
10401}
10402
f996902d
RH
10403/* X is an unchanging MEM. If it is a constant pool reference, return
10404 the constant pool rtx, else NULL. */
10405
8fe75e43 10406rtx
b96a374d 10407maybe_get_pool_constant (rtx x)
f996902d 10408{
69bd9368 10409 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
10410
10411 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10412 return get_pool_constant (x);
10413
10414 return NULL_RTX;
10415}
10416
79325812 10417void
b96a374d 10418ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 10419{
74dc3e94
RH
10420 rtx op0, op1;
10421 enum tls_model model;
f996902d
RH
10422
10423 op0 = operands[0];
10424 op1 = operands[1];
10425
d2ad2c8a 10426 if (GET_CODE (op1) == SYMBOL_REF)
f996902d 10427 {
d2ad2c8a
JH
10428 model = SYMBOL_REF_TLS_MODEL (op1);
10429 if (model)
10430 {
10431 op1 = legitimize_tls_address (op1, model, true);
10432 op1 = force_operand (op1, op0);
10433 if (op1 == op0)
10434 return;
10435 }
da489f73
RH
10436 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10437 && SYMBOL_REF_DLLIMPORT_P (op1))
10438 op1 = legitimize_dllimport_symbol (op1, false);
d2ad2c8a
JH
10439 }
10440 else if (GET_CODE (op1) == CONST
10441 && GET_CODE (XEXP (op1, 0)) == PLUS
10442 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10443 {
da489f73
RH
10444 rtx addend = XEXP (XEXP (op1, 0), 1);
10445 rtx symbol = XEXP (XEXP (op1, 0), 0);
10446 rtx tmp = NULL;
10447
10448 model = SYMBOL_REF_TLS_MODEL (symbol);
d2ad2c8a 10449 if (model)
da489f73
RH
10450 tmp = legitimize_tls_address (symbol, model, true);
10451 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10452 && SYMBOL_REF_DLLIMPORT_P (symbol))
10453 tmp = legitimize_dllimport_symbol (symbol, true);
10454
10455 if (tmp)
d2ad2c8a 10456 {
da489f73
RH
10457 tmp = force_operand (tmp, NULL);
10458 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
d2ad2c8a 10459 op0, 1, OPTAB_DIRECT);
da489f73 10460 if (tmp == op0)
d2ad2c8a
JH
10461 return;
10462 }
f996902d 10463 }
74dc3e94
RH
10464
10465 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 10466 {
f7288899
EC
10467 if (TARGET_MACHO && !TARGET_64BIT)
10468 {
b069de3b 10469#if TARGET_MACHO
f7288899
EC
10470 if (MACHOPIC_PURE)
10471 {
10472 rtx temp = ((reload_in_progress
7656aee4 10473 || ((op0 && REG_P (op0))
f7288899
EC
10474 && mode == Pmode))
10475 ? op0 : gen_reg_rtx (Pmode));
10476 op1 = machopic_indirect_data_reference (op1, temp);
10477 op1 = machopic_legitimize_pic_address (op1, mode,
10478 temp == op1 ? 0 : temp);
10479 }
10480 else if (MACHOPIC_INDIRECT)
10481 op1 = machopic_indirect_data_reference (op1, 0);
10482 if (op0 == op1)
10483 return;
10484#endif
10485 }
5656a184 10486 else
f7288899 10487 {
7656aee4 10488 if (MEM_P (op0))
f7288899 10489 op1 = force_reg (Pmode, op1);
dc4d7240 10490 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
170bdaba 10491 {
b3a13419 10492 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
170bdaba
RS
10493 op1 = legitimize_pic_address (op1, reg);
10494 if (op0 == op1)
10495 return;
10496 }
f7288899 10497 }
e075ae69
RH
10498 }
10499 else
10500 {
7656aee4 10501 if (MEM_P (op0)
44cf5b6a 10502 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d 10503 || !push_operand (op0, mode))
7656aee4 10504 && MEM_P (op1))
f996902d 10505 op1 = force_reg (mode, op1);
e9a25f70 10506
f996902d
RH
10507 if (push_operand (op0, mode)
10508 && ! general_no_elim_operand (op1, mode))
10509 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 10510
44cf5b6a
JH
10511 /* Force large constants in 64bit compilation into register
10512 to get them CSEed. */
926f3359
UB
10513 if (can_create_pseudo_p ()
10514 && (mode == DImode) && TARGET_64BIT
f996902d 10515 && immediate_operand (op1, mode)
8fe75e43 10516 && !x86_64_zext_immediate_operand (op1, VOIDmode)
f996902d 10517 && !register_operand (op0, mode)
926f3359 10518 && optimize)
f996902d 10519 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 10520
926f3359
UB
10521 if (can_create_pseudo_p ()
10522 && FLOAT_MODE_P (mode)
10523 && GET_CODE (op1) == CONST_DOUBLE)
32b5b1aa 10524 {
d7a29404
JH
10525 /* If we are loading a floating point constant to a register,
10526 force the value to memory now, since we'll get better code
10527 out the back end. */
e075ae69 10528
926f3359
UB
10529 op1 = validize_mem (force_const_mem (mode, op1));
10530 if (!register_operand (op0, mode))
ddc67067 10531 {
926f3359
UB
10532 rtx temp = gen_reg_rtx (mode);
10533 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10534 emit_move_insn (op0, temp);
10535 return;
ddc67067 10536 }
32b5b1aa 10537 }
32b5b1aa 10538 }
e9a25f70 10539
74dc3e94 10540 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 10541}
e9a25f70 10542
e37af218 10543void
b96a374d 10544ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218 10545{
c38573a8 10546 rtx op0 = operands[0], op1 = operands[1];
4d2a42a1 10547 unsigned int align = GET_MODE_ALIGNMENT (mode);
c38573a8 10548
e37af218
RH
10549 /* Force constants other than zero into memory. We do not know how
10550 the instructions used to build constants modify the upper 64 bits
10551 of the register, once we have that information we may be able
10552 to handle some of them more efficiently. */
926f3359 10553 if (can_create_pseudo_p ()
c38573a8 10554 && register_operand (op0, mode)
4d2a42a1
UB
10555 && (CONSTANT_P (op1)
10556 || (GET_CODE (op1) == SUBREG
10557 && CONSTANT_P (SUBREG_REG (op1))))
5656a184 10558 && standard_sse_constant_p (op1) <= 0)
c38573a8 10559 op1 = validize_mem (force_const_mem (mode, op1));
e37af218 10560
23ac85e7
L
10561 /* We need to check memory alignment for SSE mode since attribute
10562 can make operands unaligned. */
b3a13419 10563 if (can_create_pseudo_p ()
23ac85e7 10564 && SSE_REG_MODE_P (mode)
4d2a42a1
UB
10565 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10566 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10567 {
10568 rtx tmp[2];
10569
10570 /* ix86_expand_vector_move_misalign() does not like constants ... */
10571 if (CONSTANT_P (op1)
10572 || (GET_CODE (op1) == SUBREG
10573 && CONSTANT_P (SUBREG_REG (op1))))
10574 op1 = validize_mem (force_const_mem (mode, op1));
10575
10576 /* ... nor both arguments in memory. */
10577 if (!register_operand (op0, mode)
10578 && !register_operand (op1, mode))
10579 op1 = force_reg (mode, op1);
10580
10581 tmp[0] = op0; tmp[1] = op1;
10582 ix86_expand_vector_move_misalign (mode, tmp);
10583 return;
10584 }
10585
e37af218 10586 /* Make operand1 a register if it isn't already. */
b3a13419 10587 if (can_create_pseudo_p ()
c38573a8
RH
10588 && !register_operand (op0, mode)
10589 && !register_operand (op1, mode))
e37af218 10590 {
c38573a8 10591 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
e37af218
RH
10592 return;
10593 }
10594
c38573a8 10595 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
fce5a9f2 10596}
e37af218 10597
5656a184 10598/* Implement the movmisalign patterns for SSE. Non-SSE modes go
c38573a8 10599 straight to ix86_expand_vector_move. */
80fd744f
RH
10600/* Code generation for scalar reg-reg moves of single and double precision data:
10601 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10602 movaps reg, reg
10603 else
10604 movss reg, reg
10605 if (x86_sse_partial_reg_dependency == true)
10606 movapd reg, reg
10607 else
10608 movsd reg, reg
10609
10610 Code generation for scalar loads of double precision data:
10611 if (x86_sse_split_regs == true)
10612 movlpd mem, reg (gas syntax)
10613 else
10614 movsd mem, reg
54a88090 10615
80fd744f
RH
10616 Code generation for unaligned packed loads of single precision data
10617 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10618 if (x86_sse_unaligned_move_optimal)
10619 movups mem, reg
10620
10621 if (x86_sse_partial_reg_dependency == true)
10622 {
10623 xorps reg, reg
10624 movlps mem, reg
10625 movhps mem+8, reg
10626 }
10627 else
10628 {
10629 movlps mem, reg
10630 movhps mem+8, reg
10631 }
10632
10633 Code generation for unaligned packed loads of double precision data
10634 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10635 if (x86_sse_unaligned_move_optimal)
10636 movupd mem, reg
10637
10638 if (x86_sse_split_regs == true)
10639 {
10640 movlpd mem, reg
10641 movhpd mem+8, reg
10642 }
10643 else
10644 {
10645 movsd mem, reg
10646 movhpd mem+8, reg
10647 }
10648 */
c38573a8
RH
10649
10650void
10651ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10652{
10653 rtx op0, op1, m;
10654
10655 op0 = operands[0];
10656 op1 = operands[1];
10657
10658 if (MEM_P (op1))
10659 {
10660 /* If we're optimizing for size, movups is the smallest. */
10661 if (optimize_size)
10662 {
10663 op0 = gen_lowpart (V4SFmode, op0);
10664 op1 = gen_lowpart (V4SFmode, op1);
10665 emit_insn (gen_sse_movups (op0, op1));
10666 return;
10667 }
10668
10669 /* ??? If we have typed data, then it would appear that using
10670 movdqu is the only way to get unaligned data loaded with
10671 integer type. */
10672 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10673 {
10674 op0 = gen_lowpart (V16QImode, op0);
10675 op1 = gen_lowpart (V16QImode, op1);
10676 emit_insn (gen_sse2_movdqu (op0, op1));
10677 return;
10678 }
10679
10680 if (TARGET_SSE2 && mode == V2DFmode)
21efb4d4
HJ
10681 {
10682 rtx zero;
10683
10684 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10685 {
10686 op0 = gen_lowpart (V2DFmode, op0);
10687 op1 = gen_lowpart (V2DFmode, op1);
10688 emit_insn (gen_sse2_movupd (op0, op1));
10689 return;
10690 }
eb701deb 10691
c38573a8
RH
10692 /* When SSE registers are split into halves, we can avoid
10693 writing to the top half twice. */
10694 if (TARGET_SSE_SPLIT_REGS)
10695 {
c41c1387 10696 emit_clobber (op0);
eb701deb 10697 zero = op0;
c38573a8
RH
10698 }
10699 else
10700 {
10701 /* ??? Not sure about the best option for the Intel chips.
10702 The following would seem to satisfy; the register is
10703 entirely cleared, breaking the dependency chain. We
10704 then store to the upper half, with a dependency depth
10705 of one. A rumor has it that Intel recommends two movsd
10706 followed by an unpacklpd, but this is unconfirmed. And
10707 given that the dependency depth of the unpacklpd would
10708 still be one, I'm not sure why this would be better. */
eb701deb 10709 zero = CONST0_RTX (V2DFmode);
c38573a8 10710 }
eb701deb
RH
10711
10712 m = adjust_address (op1, DFmode, 0);
10713 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10714 m = adjust_address (op1, DFmode, 8);
10715 emit_insn (gen_sse2_loadhpd (op0, op0, m));
c38573a8
RH
10716 }
10717 else
21efb4d4
HJ
10718 {
10719 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10720 {
10721 op0 = gen_lowpart (V4SFmode, op0);
10722 op1 = gen_lowpart (V4SFmode, op1);
10723 emit_insn (gen_sse_movups (op0, op1));
10724 return;
10725 }
10726
c38573a8
RH
10727 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10728 emit_move_insn (op0, CONST0_RTX (mode));
10729 else
c41c1387 10730 emit_clobber (op0);
c38573a8 10731
b4bb3199
RH
10732 if (mode != V4SFmode)
10733 op0 = gen_lowpart (V4SFmode, op0);
2cdb3148
RH
10734 m = adjust_address (op1, V2SFmode, 0);
10735 emit_insn (gen_sse_loadlps (op0, op0, m));
10736 m = adjust_address (op1, V2SFmode, 8);
10737 emit_insn (gen_sse_loadhps (op0, op0, m));
c38573a8
RH
10738 }
10739 }
10740 else if (MEM_P (op0))
10741 {
10742 /* If we're optimizing for size, movups is the smallest. */
10743 if (optimize_size)
10744 {
10745 op0 = gen_lowpart (V4SFmode, op0);
10746 op1 = gen_lowpart (V4SFmode, op1);
10747 emit_insn (gen_sse_movups (op0, op1));
10748 return;
10749 }
10750
10751 /* ??? Similar to above, only less clear because of quote
10752 typeless stores unquote. */
10753 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10754 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10755 {
10756 op0 = gen_lowpart (V16QImode, op0);
10757 op1 = gen_lowpart (V16QImode, op1);
10758 emit_insn (gen_sse2_movdqu (op0, op1));
10759 return;
10760 }
10761
10762 if (TARGET_SSE2 && mode == V2DFmode)
10763 {
10764 m = adjust_address (op0, DFmode, 0);
10765 emit_insn (gen_sse2_storelpd (m, op1));
10766 m = adjust_address (op0, DFmode, 8);
10767 emit_insn (gen_sse2_storehpd (m, op1));
c38573a8
RH
10768 }
10769 else
10770 {
eb701deb
RH
10771 if (mode != V4SFmode)
10772 op1 = gen_lowpart (V4SFmode, op1);
2cdb3148
RH
10773 m = adjust_address (op0, V2SFmode, 0);
10774 emit_insn (gen_sse_storelps (m, op1));
10775 m = adjust_address (op0, V2SFmode, 8);
10776 emit_insn (gen_sse_storehps (m, op1));
c38573a8
RH
10777 }
10778 }
10779 else
10780 gcc_unreachable ();
10781}
10782
6b79c03c
RH
10783/* Expand a push in MODE. This is some mode for which we do not support
10784 proper push instructions, at least from the registers that we expect
10785 the value to live in. */
10786
10787void
10788ix86_expand_push (enum machine_mode mode, rtx x)
10789{
10790 rtx tmp;
10791
10792 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10793 GEN_INT (-GET_MODE_SIZE (mode)),
10794 stack_pointer_rtx, 1, OPTAB_DIRECT);
10795 if (tmp != stack_pointer_rtx)
10796 emit_move_insn (stack_pointer_rtx, tmp);
10797
10798 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10799 emit_move_insn (tmp, x);
10800}
c38573a8 10801
ffa1b3c6
RS
10802/* Helper function of ix86_fixup_binary_operands to canonicalize
10803 operand order. Returns true if the operands should be swapped. */
54a88090 10804
ffa1b3c6
RS
10805static bool
10806ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10807 rtx operands[])
10808{
10809 rtx dst = operands[0];
10810 rtx src1 = operands[1];
10811 rtx src2 = operands[2];
10812
10813 /* If the operation is not commutative, we can't do anything. */
10814 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10815 return false;
10816
10817 /* Highest priority is that src1 should match dst. */
10818 if (rtx_equal_p (dst, src1))
10819 return false;
10820 if (rtx_equal_p (dst, src2))
10821 return true;
10822
10823 /* Next highest priority is that immediate constants come second. */
10824 if (immediate_operand (src2, mode))
10825 return false;
10826 if (immediate_operand (src1, mode))
10827 return true;
10828
10829 /* Lowest priority is that memory references should come second. */
10830 if (MEM_P (src2))
10831 return false;
10832 if (MEM_P (src1))
10833 return true;
10834
10835 return false;
10836}
10837
10838
ef719a44
RH
10839/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10840 destination to use for the operation. If different from the true
10841 destination in operands[0], a copy operation will be required. */
e9a25f70 10842
ef719a44
RH
10843rtx
10844ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10845 rtx operands[])
e075ae69 10846{
ffa1b3c6
RS
10847 rtx dst = operands[0];
10848 rtx src1 = operands[1];
10849 rtx src2 = operands[2];
e075ae69 10850
ffa1b3c6
RS
10851 /* Canonicalize operand order. */
10852 if (ix86_swap_binary_operands_p (code, mode, operands))
e075ae69 10853 {
ffbaf337
UB
10854 rtx temp;
10855
10856 /* It is invalid to swap operands of different modes. */
10857 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10858
10859 temp = src1;
e075ae69
RH
10860 src1 = src2;
10861 src2 = temp;
32b5b1aa 10862 }
e9a25f70 10863
e075ae69 10864 /* Both source operands cannot be in memory. */
7656aee4 10865 if (MEM_P (src1) && MEM_P (src2))
e075ae69 10866 {
ffa1b3c6
RS
10867 /* Optimization: Only read from memory once. */
10868 if (rtx_equal_p (src1, src2))
10869 {
10870 src2 = force_reg (mode, src2);
10871 src1 = src2;
10872 }
e075ae69 10873 else
ffa1b3c6 10874 src2 = force_reg (mode, src2);
32b5b1aa 10875 }
e9a25f70 10876
ffa1b3c6
RS
10877 /* If the destination is memory, and we do not have matching source
10878 operands, do things in registers. */
10879 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10880 dst = gen_reg_rtx (mode);
10881
10882 /* Source 1 cannot be a constant. */
10883 if (CONSTANT_P (src1))
10884 src1 = force_reg (mode, src1);
10885
10886 /* Source 1 cannot be a non-matching memory. */
10887 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
e075ae69 10888 src1 = force_reg (mode, src1);
0f290768 10889
ffa1b3c6
RS
10890 operands[1] = src1;
10891 operands[2] = src2;
ef719a44
RH
10892 return dst;
10893}
10894
10895/* Similarly, but assume that the destination has already been
10896 set up properly. */
10897
10898void
10899ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10900 enum machine_mode mode, rtx operands[])
10901{
10902 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10903 gcc_assert (dst == operands[0]);
10904}
10905
10906/* Attempt to expand a binary operator. Make the expansion closer to the
10907 actual machine, then just general_operand, which will allow 3 separate
10908 memory references (one output, two input) in a single insn. */
10909
10910void
10911ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10912 rtx operands[])
10913{
10914 rtx src1, src2, dst, op, clob;
10915
10916 dst = ix86_fixup_binary_operands (code, mode, operands);
10917 src1 = operands[1];
10918 src2 = operands[2];
10919
10920 /* Emit the instruction. */
e075ae69
RH
10921
10922 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10923 if (reload_in_progress)
10924 {
10925 /* Reload doesn't know about the flags register, and doesn't know that
10926 it doesn't want to clobber it. We can only do this with PLUS. */
d0396b79 10927 gcc_assert (code == PLUS);
e075ae69
RH
10928 emit_insn (op);
10929 }
10930 else
32b5b1aa 10931 {
e075ae69
RH
10932 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10933 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 10934 }
e9a25f70 10935
e075ae69
RH
10936 /* Fix up the destination if needed. */
10937 if (dst != operands[0])
10938 emit_move_insn (operands[0], dst);
10939}
10940
10941/* Return TRUE or FALSE depending on whether the binary operator meets the
10942 appropriate constraints. */
10943
10944int
ffa1b3c6 10945ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
b96a374d 10946 rtx operands[3])
e075ae69 10947{
ffa1b3c6
RS
10948 rtx dst = operands[0];
10949 rtx src1 = operands[1];
10950 rtx src2 = operands[2];
10951
e075ae69 10952 /* Both source operands cannot be in memory. */
ffa1b3c6 10953 if (MEM_P (src1) && MEM_P (src2))
e075ae69 10954 return 0;
ffa1b3c6
RS
10955
10956 /* Canonicalize operand order for commutative operators. */
10957 if (ix86_swap_binary_operands_p (code, mode, operands))
10958 {
10959 rtx temp = src1;
10960 src1 = src2;
10961 src2 = temp;
10962 }
10963
e075ae69 10964 /* If the destination is memory, we must have a matching source operand. */
ffa1b3c6
RS
10965 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10966 return 0;
10967
10968 /* Source 1 cannot be a constant. */
10969 if (CONSTANT_P (src1))
e075ae69 10970 return 0;
ffa1b3c6
RS
10971
10972 /* Source 1 cannot be a non-matching memory. */
10973 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
06a964de 10974 return 0;
ffa1b3c6 10975
e075ae69
RH
10976 return 1;
10977}
10978
10979/* Attempt to expand a unary operator. Make the expansion closer to the
10980 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 10981 memory references (one output, one input) in a single insn. */
e075ae69 10982
9d81fc27 10983void
b96a374d
AJ
10984ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10985 rtx operands[])
e075ae69 10986{
06a964de
JH
10987 int matching_memory;
10988 rtx src, dst, op, clob;
10989
10990 dst = operands[0];
10991 src = operands[1];
e075ae69 10992
06a964de
JH
10993 /* If the destination is memory, and we do not have matching source
10994 operands, do things in registers. */
10995 matching_memory = 0;
7cacf53e 10996 if (MEM_P (dst))
32b5b1aa 10997 {
06a964de
JH
10998 if (rtx_equal_p (dst, src))
10999 matching_memory = 1;
e075ae69 11000 else
06a964de 11001 dst = gen_reg_rtx (mode);
32b5b1aa 11002 }
e9a25f70 11003
06a964de 11004 /* When source operand is memory, destination must match. */
7cacf53e 11005 if (MEM_P (src) && !matching_memory)
06a964de 11006 src = force_reg (mode, src);
0f290768 11007
06a964de
JH
11008 /* Emit the instruction. */
11009
11010 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
11011 if (reload_in_progress || code == NOT)
11012 {
11013 /* Reload doesn't know about the flags register, and doesn't know that
11014 it doesn't want to clobber it. */
d0396b79 11015 gcc_assert (code == NOT);
06a964de
JH
11016 emit_insn (op);
11017 }
11018 else
11019 {
11020 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11021 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11022 }
11023
11024 /* Fix up the destination if needed. */
11025 if (dst != operands[0])
11026 emit_move_insn (operands[0], dst);
e075ae69
RH
11027}
11028
11029/* Return TRUE or FALSE depending on whether the unary operator meets the
11030 appropriate constraints. */
11031
11032int
b96a374d
AJ
11033ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11034 enum machine_mode mode ATTRIBUTE_UNUSED,
11035 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 11036{
06a964de 11037 /* If one of operands is memory, source and destination must match. */
7656aee4
UB
11038 if ((MEM_P (operands[0])
11039 || MEM_P (operands[1]))
06a964de
JH
11040 && ! rtx_equal_p (operands[0], operands[1]))
11041 return FALSE;
e075ae69
RH
11042 return TRUE;
11043}
7cacf53e 11044
174c12c7
RH
11045/* Post-reload splitter for converting an SF or DFmode value in an
11046 SSE register into an unsigned SImode. */
ebff937c
SH
11047
11048void
174c12c7 11049ix86_split_convert_uns_si_sse (rtx operands[])
ebff937c 11050{
174c12c7
RH
11051 enum machine_mode vecmode;
11052 rtx value, large, zero_or_two31, input, two31, x;
ebff937c 11053
174c12c7
RH
11054 large = operands[1];
11055 zero_or_two31 = operands[2];
11056 input = operands[3];
11057 two31 = operands[4];
11058 vecmode = GET_MODE (large);
11059 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
ebff937c 11060
174c12c7
RH
11061 /* Load up the value into the low element. We must ensure that the other
11062 elements are valid floats -- zero is the easiest such value. */
11063 if (MEM_P (input))
11064 {
11065 if (vecmode == V4SFmode)
11066 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
11067 else
11068 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
11069 }
11070 else
11071 {
11072 input = gen_rtx_REG (vecmode, REGNO (input));
11073 emit_move_insn (value, CONST0_RTX (vecmode));
11074 if (vecmode == V4SFmode)
11075 emit_insn (gen_sse_movss (value, value, input));
11076 else
11077 emit_insn (gen_sse2_movsd (value, value, input));
11078 }
ebff937c 11079
174c12c7
RH
11080 emit_move_insn (large, two31);
11081 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
ebff937c 11082
174c12c7 11083 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
ebff937c
SH
11084 emit_insn (gen_rtx_SET (VOIDmode, large, x));
11085
174c12c7 11086 x = gen_rtx_AND (vecmode, zero_or_two31, large);
ebff937c
SH
11087 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
11088
11089 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
11090 emit_insn (gen_rtx_SET (VOIDmode, value, x));
11091
174c12c7
RH
11092 large = gen_rtx_REG (V4SImode, REGNO (large));
11093 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
ebff937c 11094
174c12c7
RH
11095 x = gen_rtx_REG (V4SImode, REGNO (value));
11096 if (vecmode == V4SFmode)
11097 emit_insn (gen_sse2_cvttps2dq (x, value));
11098 else
11099 emit_insn (gen_sse2_cvttpd2dq (x, value));
11100 value = x;
ebff937c 11101
174c12c7 11102 emit_insn (gen_xorv4si3 (value, value, large));
ebff937c
SH
11103}
11104
11105/* Convert an unsigned DImode value into a DFmode, using only SSE.
11106 Expects the 64-bit DImode to be supplied in a pair of integral
11107 registers. Requires SSE2; will use SSE3 if available. For x86_32,
11108 -mfpmath=sse, !optimize_size only. */
11109
11110void
11111ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
11112{
11113 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
11114 rtx int_xmm, fp_xmm;
11115 rtx biases, exponents;
11116 rtx x;
11117
11118 int_xmm = gen_reg_rtx (V4SImode);
11119 if (TARGET_INTER_UNIT_MOVES)
11120 emit_insn (gen_movdi_to_sse (int_xmm, input));
11121 else if (TARGET_SSE_SPLIT_REGS)
11122 {
c41c1387 11123 emit_clobber (int_xmm);
ebff937c
SH
11124 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
11125 }
11126 else
11127 {
11128 x = gen_reg_rtx (V2DImode);
11129 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
11130 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
11131 }
11132
11133 x = gen_rtx_CONST_VECTOR (V4SImode,
11134 gen_rtvec (4, GEN_INT (0x43300000UL),
11135 GEN_INT (0x45300000UL),
11136 const0_rtx, const0_rtx));
11137 exponents = validize_mem (force_const_mem (V4SImode, x));
11138
11139 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
11140 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
11141
11142 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
11143 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
11144 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
11145 (0x1.0p84 + double(fp_value_hi_xmm)).
11146 Note these exponents differ by 32. */
11147
11148 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11149
11150 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11151 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11152 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11153 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11154 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11155 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11156 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11157 biases = validize_mem (force_const_mem (V2DFmode, biases));
11158 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11159
11160 /* Add the upper and lower DFmode values together. */
11161 if (TARGET_SSE3)
11162 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11163 else
11164 {
11165 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11166 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11167 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11168 }
11169
11170 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11171}
11172
7fb1431b
UB
11173/* Not used, but eases macroization of patterns. */
11174void
11175ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11176 rtx input ATTRIBUTE_UNUSED)
11177{
11178 gcc_unreachable ();
11179}
11180
ebff937c
SH
11181/* Convert an unsigned SImode value into a DFmode. Only currently used
11182 for SSE, but applicable anywhere. */
11183
11184void
11185ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11186{
11187 REAL_VALUE_TYPE TWO31r;
11188 rtx x, fp;
11189
11190 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11191 NULL, 1, OPTAB_DIRECT);
11192
11193 fp = gen_reg_rtx (DFmode);
11194 emit_insn (gen_floatsidf2 (fp, x));
11195
11196 real_ldexp (&TWO31r, &dconst1, 31);
11197 x = const_double_from_real_value (TWO31r, DFmode);
11198
11199 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11200 if (x != target)
11201 emit_move_insn (target, x);
11202}
11203
11204/* Convert a signed DImode value into a DFmode. Only used for SSE in
11205 32-bit mode; otherwise we have a direct convert instruction. */
11206
11207void
11208ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11209{
11210 REAL_VALUE_TYPE TWO32r;
11211 rtx fp_lo, fp_hi, x;
54a88090 11212
ebff937c
SH
11213 fp_lo = gen_reg_rtx (DFmode);
11214 fp_hi = gen_reg_rtx (DFmode);
11215
11216 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11217
11218 real_ldexp (&TWO32r, &dconst1, 32);
11219 x = const_double_from_real_value (TWO32r, DFmode);
11220 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11221
11222 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11223
11224 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11225 0, OPTAB_DIRECT);
11226 if (x != target)
11227 emit_move_insn (target, x);
11228}
11229
11230/* Convert an unsigned SImode value into a SFmode, using only SSE.
11231 For x86_32, -mfpmath=sse, !optimize_size only. */
11232void
11233ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11234{
11235 REAL_VALUE_TYPE ONE16r;
11236 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11237
11238 real_ldexp (&ONE16r, &dconst1, 16);
11239 x = const_double_from_real_value (ONE16r, SFmode);
11240 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11241 NULL, 0, OPTAB_DIRECT);
11242 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11243 NULL, 0, OPTAB_DIRECT);
11244 fp_hi = gen_reg_rtx (SFmode);
11245 fp_lo = gen_reg_rtx (SFmode);
11246 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11247 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11248 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11249 0, OPTAB_DIRECT);
11250 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11251 0, OPTAB_DIRECT);
11252 if (!rtx_equal_p (target, fp_hi))
11253 emit_move_insn (target, fp_hi);
11254}
11255
11256/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11257 then replicate the value for all elements of the vector
11258 register. */
11259
174c12c7 11260rtx
ebff937c
SH
11261ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11262{
11263 rtvec v;
11264 switch (mode)
11265 {
3b8dd071
L
11266 case SImode:
11267 gcc_assert (vect);
11268 v = gen_rtvec (4, value, value, value, value);
11269 return gen_rtx_CONST_VECTOR (V4SImode, v);
11270
11271 case DImode:
11272 gcc_assert (vect);
11273 v = gen_rtvec (2, value, value);
11274 return gen_rtx_CONST_VECTOR (V2DImode, v);
11275
ebff937c
SH
11276 case SFmode:
11277 if (vect)
11278 v = gen_rtvec (4, value, value, value, value);
11279 else
11280 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11281 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11282 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11283
11284 case DFmode:
11285 if (vect)
11286 v = gen_rtvec (2, value, value);
11287 else
11288 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11289 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11290
11291 default:
11292 gcc_unreachable ();
11293 }
11294}
11295
3b8dd071
L
11296/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11297 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11298 for an SSE register. If VECT is true, then replicate the mask for
11299 all elements of the vector register. If INVERT is true, then create
11300 a mask excluding the sign bit. */
046625fa
RH
11301
11302rtx
11303ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11304{
3b8dd071 11305 enum machine_mode vec_mode, imode;
046625fa
RH
11306 HOST_WIDE_INT hi, lo;
11307 int shift = 63;
ebff937c 11308 rtx v;
046625fa
RH
11309 rtx mask;
11310
11311 /* Find the sign bit, sign extended to 2*HWI. */
3b8dd071
L
11312 switch (mode)
11313 {
11314 case SImode:
11315 case SFmode:
11316 imode = SImode;
11317 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11318 lo = 0x80000000, hi = lo < 0;
11319 break;
11320
11321 case DImode:
11322 case DFmode:
11323 imode = DImode;
11324 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11325 if (HOST_BITS_PER_WIDE_INT >= 64)
11326 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11327 else
11328 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11329 break;
11330
edc5bbcd
UB
11331 case TImode:
11332 case TFmode:
11333 imode = TImode;
11334 vec_mode = VOIDmode;
11335 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11336 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11337 break;
11338
3b8dd071
L
11339 default:
11340 gcc_unreachable ();
11341 }
046625fa
RH
11342
11343 if (invert)
11344 lo = ~lo, hi = ~hi;
11345
11346 /* Force this value into the low part of a fp vector constant. */
3b8dd071 11347 mask = immed_double_const (lo, hi, imode);
046625fa
RH
11348 mask = gen_lowpart (mode, mask);
11349
edc5bbcd
UB
11350 if (vec_mode == VOIDmode)
11351 return force_reg (mode, mask);
11352
ebff937c 11353 v = ix86_build_const_vector (mode, vect, mask);
ebff937c 11354 return force_reg (vec_mode, v);
046625fa
RH
11355}
11356
7cacf53e
RH
11357/* Generate code for floating point ABS or NEG. */
11358
11359void
11360ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11361 rtx operands[])
11362{
11363 rtx mask, set, use, clob, dst, src;
7cacf53e 11364 bool use_sse = false;
ef719a44
RH
11365 bool vector_mode = VECTOR_MODE_P (mode);
11366 enum machine_mode elt_mode = mode;
7cacf53e 11367
ef719a44
RH
11368 if (vector_mode)
11369 {
11370 elt_mode = GET_MODE_INNER (mode);
ef719a44
RH
11371 use_sse = true;
11372 }
edc5bbcd
UB
11373 else if (mode == TFmode)
11374 use_sse = true;
046625fa 11375 else if (TARGET_SSE_MATH)
2aa3d033 11376 use_sse = SSE_FLOAT_MODE_P (mode);
7cacf53e
RH
11377
11378 /* NEG and ABS performed with SSE use bitwise mask operations.
11379 Create the appropriate mask now. */
11380 if (use_sse)
046625fa 11381 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
7cacf53e 11382 else
86ce1825 11383 mask = NULL_RTX;
7cacf53e
RH
11384
11385 dst = operands[0];
11386 src = operands[1];
11387
ef719a44
RH
11388 if (vector_mode)
11389 {
11390 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11391 set = gen_rtx_SET (VOIDmode, dst, set);
11392 emit_insn (set);
11393 }
11394 else
11395 {
11396 set = gen_rtx_fmt_e (code, mode, src);
11397 set = gen_rtx_SET (VOIDmode, dst, set);
86ce1825
RS
11398 if (mask)
11399 {
11400 use = gen_rtx_USE (VOIDmode, mask);
11401 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11402 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11403 gen_rtvec (3, set, use, clob)));
11404 }
11405 else
11406 emit_insn (set);
ef719a44 11407 }
7cacf53e 11408}
e075ae69 11409
b99d6d2b 11410/* Expand a copysign operation. Special case operand 0 being a constant. */
046625fa
RH
11411
11412void
b99d6d2b
RH
11413ix86_expand_copysign (rtx operands[])
11414{
537d4fa6 11415 enum machine_mode mode;
b99d6d2b
RH
11416 rtx dest, op0, op1, mask, nmask;
11417
11418 dest = operands[0];
11419 op0 = operands[1];
11420 op1 = operands[2];
11421
11422 mode = GET_MODE (dest);
b99d6d2b
RH
11423
11424 if (GET_CODE (op0) == CONST_DOUBLE)
11425 {
edc5bbcd 11426 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
b99d6d2b
RH
11427
11428 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11429 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11430
edc5bbcd
UB
11431 if (mode == SFmode || mode == DFmode)
11432 {
537d4fa6
UB
11433 enum machine_mode vmode;
11434
11435 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11436
edc5bbcd
UB
11437 if (op0 == CONST0_RTX (mode))
11438 op0 = CONST0_RTX (vmode);
b99d6d2b 11439 else
edc5bbcd
UB
11440 {
11441 rtvec v;
11442
11443 if (mode == SFmode)
11444 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11445 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11446 else
11447 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
537d4fa6 11448
edc5bbcd
UB
11449 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11450 }
b99d6d2b 11451 }
537d4fa6
UB
11452 else if (op0 != CONST0_RTX (mode))
11453 op0 = force_reg (mode, op0);
b99d6d2b
RH
11454
11455 mask = ix86_build_signbit_mask (mode, 0, 0);
11456
11457 if (mode == SFmode)
edc5bbcd
UB
11458 copysign_insn = gen_copysignsf3_const;
11459 else if (mode == DFmode)
11460 copysign_insn = gen_copysigndf3_const;
b99d6d2b 11461 else
edc5bbcd
UB
11462 copysign_insn = gen_copysigntf3_const;
11463
11464 emit_insn (copysign_insn (dest, op0, op1, mask));
b99d6d2b
RH
11465 }
11466 else
11467 {
edc5bbcd
UB
11468 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11469
b99d6d2b
RH
11470 nmask = ix86_build_signbit_mask (mode, 0, 1);
11471 mask = ix86_build_signbit_mask (mode, 0, 0);
11472
11473 if (mode == SFmode)
edc5bbcd
UB
11474 copysign_insn = gen_copysignsf3_var;
11475 else if (mode == DFmode)
11476 copysign_insn = gen_copysigndf3_var;
b99d6d2b 11477 else
edc5bbcd
UB
11478 copysign_insn = gen_copysigntf3_var;
11479
11480 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
b99d6d2b
RH
11481 }
11482}
11483
11484/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11485 be a constant, and so has already been expanded into a vector constant. */
11486
11487void
11488ix86_split_copysign_const (rtx operands[])
11489{
11490 enum machine_mode mode, vmode;
11491 rtx dest, op0, op1, mask, x;
11492
11493 dest = operands[0];
11494 op0 = operands[1];
11495 op1 = operands[2];
11496 mask = operands[3];
11497
11498 mode = GET_MODE (dest);
11499 vmode = GET_MODE (mask);
11500
11501 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11502 x = gen_rtx_AND (vmode, dest, mask);
11503 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11504
11505 if (op0 != CONST0_RTX (vmode))
11506 {
11507 x = gen_rtx_IOR (vmode, dest, op0);
11508 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11509 }
11510}
11511
11512/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11513 so we have to do two masks. */
11514
11515void
11516ix86_split_copysign_var (rtx operands[])
046625fa
RH
11517{
11518 enum machine_mode mode, vmode;
11519 rtx dest, scratch, op0, op1, mask, nmask, x;
11520
11521 dest = operands[0];
11522 scratch = operands[1];
11523 op0 = operands[2];
b99d6d2b
RH
11524 op1 = operands[3];
11525 nmask = operands[4];
046625fa
RH
11526 mask = operands[5];
11527
11528 mode = GET_MODE (dest);
11529 vmode = GET_MODE (mask);
11530
11531 if (rtx_equal_p (op0, op1))
11532 {
11533 /* Shouldn't happen often (it's useless, obviously), but when it does
11534 we'd generate incorrect code if we continue below. */
11535 emit_move_insn (dest, op0);
11536 return;
11537 }
11538
11539 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11540 {
11541 gcc_assert (REGNO (op1) == REGNO (scratch));
11542
11543 x = gen_rtx_AND (vmode, scratch, mask);
11544 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11545
11546 dest = mask;
11547 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11548 x = gen_rtx_NOT (vmode, dest);
11549 x = gen_rtx_AND (vmode, x, op0);
11550 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11551 }
11552 else
11553 {
11554 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11555 {
11556 x = gen_rtx_AND (vmode, scratch, mask);
11557 }
11558 else /* alternative 2,4 */
11559 {
11560 gcc_assert (REGNO (mask) == REGNO (scratch));
11561 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11562 x = gen_rtx_AND (vmode, scratch, op1);
11563 }
11564 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11565
11566 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11567 {
11568 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11569 x = gen_rtx_AND (vmode, dest, nmask);
11570 }
11571 else /* alternative 3,4 */
11572 {
11573 gcc_assert (REGNO (nmask) == REGNO (dest));
11574 dest = nmask;
11575 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11576 x = gen_rtx_AND (vmode, dest, op0);
11577 }
11578 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11579 }
11580
11581 x = gen_rtx_IOR (vmode, dest, scratch);
11582 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11583}
11584
16189740
RH
11585/* Return TRUE or FALSE depending on whether the first SET in INSN
11586 has source and destination with matching CC modes, and that the
11587 CC mode is at least as constrained as REQ_MODE. */
11588
11589int
b96a374d 11590ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
11591{
11592 rtx set;
11593 enum machine_mode set_mode;
11594
11595 set = PATTERN (insn);
11596 if (GET_CODE (set) == PARALLEL)
11597 set = XVECEXP (set, 0, 0);
d0396b79
NS
11598 gcc_assert (GET_CODE (set) == SET);
11599 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16189740
RH
11600
11601 set_mode = GET_MODE (SET_DEST (set));
11602 switch (set_mode)
11603 {
9076b9c1
JH
11604 case CCNOmode:
11605 if (req_mode != CCNOmode
11606 && (req_mode != CCmode
11607 || XEXP (SET_SRC (set), 1) != const0_rtx))
11608 return 0;
11609 break;
16189740 11610 case CCmode:
9076b9c1 11611 if (req_mode == CCGCmode)
16189740 11612 return 0;
5efb1046 11613 /* FALLTHRU */
9076b9c1
JH
11614 case CCGCmode:
11615 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11616 return 0;
5efb1046 11617 /* FALLTHRU */
9076b9c1 11618 case CCGOCmode:
16189740
RH
11619 if (req_mode == CCZmode)
11620 return 0;
5efb1046 11621 /* FALLTHRU */
16189740
RH
11622 case CCZmode:
11623 break;
11624
11625 default:
d0396b79 11626 gcc_unreachable ();
16189740
RH
11627 }
11628
11629 return (GET_MODE (SET_SRC (set)) == set_mode);
11630}
11631
e075ae69
RH
11632/* Generate insn patterns to do an integer compare of OPERANDS. */
11633
11634static rtx
b96a374d 11635ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
11636{
11637 enum machine_mode cmpmode;
11638 rtx tmp, flags;
11639
11640 cmpmode = SELECT_CC_MODE (code, op0, op1);
11641 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11642
11643 /* This is very simple, but making the interface the same as in the
11644 FP case makes the rest of the code easier. */
11645 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11646 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11647
11648 /* Return the test that should be put into the flags user, i.e.
11649 the bcc, scc, or cmov instruction. */
11650 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11651}
11652
3a3677ff
RH
11653/* Figure out whether to use ordered or unordered fp comparisons.
11654 Return the appropriate mode to use. */
e075ae69 11655
b1cdafbb 11656enum machine_mode
b96a374d 11657ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 11658{
9e7adcb3
JH
11659 /* ??? In order to make all comparisons reversible, we do all comparisons
11660 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11661 all forms trapping and nontrapping comparisons, we can make inequality
11662 comparisons trapping again, since it results in better code when using
11663 FCOM based compares. */
11664 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
11665}
11666
9076b9c1 11667enum machine_mode
b96a374d 11668ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1 11669{
27ac40e2
UB
11670 enum machine_mode mode = GET_MODE (op0);
11671
11672 if (SCALAR_FLOAT_MODE_P (mode))
11673 {
11674 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11675 return ix86_fp_compare_mode (code);
11676 }
11677
9076b9c1
JH
11678 switch (code)
11679 {
11680 /* Only zero flag is needed. */
11681 case EQ: /* ZF=0 */
11682 case NE: /* ZF!=0 */
11683 return CCZmode;
11684 /* Codes needing carry flag. */
265dab10 11685 case GEU: /* CF=0 */
7e08e190 11686 case LTU: /* CF=1 */
d39d658d
RIL
11687 /* Detect overflow checks. They need just the carry flag. */
11688 if (GET_CODE (op0) == PLUS
11689 && rtx_equal_p (op1, XEXP (op0, 0)))
11690 return CCCmode;
11691 else
11692 return CCmode;
11693 case GTU: /* CF=0 & ZF=0 */
7e08e190 11694 case LEU: /* CF=1 | ZF=1 */
d39d658d
RIL
11695 /* Detect overflow checks. They need just the carry flag. */
11696 if (GET_CODE (op0) == MINUS
11697 && rtx_equal_p (op1, XEXP (op0, 0)))
11698 return CCCmode;
11699 else
11700 return CCmode;
9076b9c1
JH
11701 /* Codes possibly doable only with sign flag when
11702 comparing against zero. */
11703 case GE: /* SF=OF or SF=0 */
7e08e190 11704 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
11705 if (op1 == const0_rtx)
11706 return CCGOCmode;
11707 else
11708 /* For other cases Carry flag is not required. */
11709 return CCGCmode;
11710 /* Codes doable only with sign flag when comparing
11711 against zero, but we miss jump instruction for it
4aae8a9a 11712 so we need to use relational tests against overflow
9076b9c1
JH
11713 that thus needs to be zero. */
11714 case GT: /* ZF=0 & SF=OF */
11715 case LE: /* ZF=1 | SF<>OF */
11716 if (op1 == const0_rtx)
11717 return CCNOmode;
11718 else
11719 return CCGCmode;
7fcd7218
JH
11720 /* strcmp pattern do (use flags) and combine may ask us for proper
11721 mode. */
11722 case USE:
11723 return CCmode;
9076b9c1 11724 default:
d0396b79 11725 gcc_unreachable ();
9076b9c1
JH
11726 }
11727}
11728
e129d93a
ILT
11729/* Return the fixed registers used for condition codes. */
11730
11731static bool
11732ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11733{
11734 *p1 = FLAGS_REG;
11735 *p2 = FPSR_REG;
11736 return true;
11737}
11738
11739/* If two condition code modes are compatible, return a condition code
11740 mode which is compatible with both. Otherwise, return
11741 VOIDmode. */
11742
11743static enum machine_mode
11744ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11745{
11746 if (m1 == m2)
11747 return m1;
11748
11749 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11750 return VOIDmode;
11751
11752 if ((m1 == CCGCmode && m2 == CCGOCmode)
11753 || (m1 == CCGOCmode && m2 == CCGCmode))
11754 return CCGCmode;
11755
11756 switch (m1)
11757 {
11758 default:
d0396b79 11759 gcc_unreachable ();
e129d93a
ILT
11760
11761 case CCmode:
11762 case CCGCmode:
11763 case CCGOCmode:
11764 case CCNOmode:
06f4e35d
L
11765 case CCAmode:
11766 case CCCmode:
11767 case CCOmode:
11768 case CCSmode:
e129d93a
ILT
11769 case CCZmode:
11770 switch (m2)
11771 {
11772 default:
11773 return VOIDmode;
11774
11775 case CCmode:
11776 case CCGCmode:
11777 case CCGOCmode:
11778 case CCNOmode:
06f4e35d
L
11779 case CCAmode:
11780 case CCCmode:
11781 case CCOmode:
11782 case CCSmode:
e129d93a
ILT
11783 case CCZmode:
11784 return CCmode;
11785 }
11786
11787 case CCFPmode:
11788 case CCFPUmode:
11789 /* These are only compatible with themselves, which we already
11790 checked above. */
11791 return VOIDmode;
11792 }
11793}
11794
c0c102a9
JH
11795/* Split comparison code CODE into comparisons we can do using branch
11796 instructions. BYPASS_CODE is comparison code for branch that will
11797 branch around FIRST_CODE and SECOND_CODE. If some of branches
f822d252 11798 is not required, set value to UNKNOWN.
c0c102a9 11799 We never require more than two branches. */
8fe75e43
RH
11800
11801void
b96a374d
AJ
11802ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11803 enum rtx_code *first_code,
11804 enum rtx_code *second_code)
c0c102a9
JH
11805{
11806 *first_code = code;
f822d252
ZW
11807 *bypass_code = UNKNOWN;
11808 *second_code = UNKNOWN;
c0c102a9
JH
11809
11810 /* The fcomi comparison sets flags as follows:
11811
11812 cmp ZF PF CF
11813 > 0 0 0
11814 < 0 0 1
11815 = 1 0 0
11816 un 1 1 1 */
11817
11818 switch (code)
11819 {
11820 case GT: /* GTU - CF=0 & ZF=0 */
11821 case GE: /* GEU - CF=0 */
11822 case ORDERED: /* PF=0 */
11823 case UNORDERED: /* PF=1 */
11824 case UNEQ: /* EQ - ZF=1 */
11825 case UNLT: /* LTU - CF=1 */
11826 case UNLE: /* LEU - CF=1 | ZF=1 */
11827 case LTGT: /* EQ - ZF=0 */
11828 break;
11829 case LT: /* LTU - CF=1 - fails on unordered */
11830 *first_code = UNLT;
11831 *bypass_code = UNORDERED;
11832 break;
11833 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11834 *first_code = UNLE;
11835 *bypass_code = UNORDERED;
11836 break;
11837 case EQ: /* EQ - ZF=1 - fails on unordered */
11838 *first_code = UNEQ;
11839 *bypass_code = UNORDERED;
11840 break;
11841 case NE: /* NE - ZF=0 - fails on unordered */
11842 *first_code = LTGT;
11843 *second_code = UNORDERED;
11844 break;
11845 case UNGE: /* GEU - CF=0 - fails on unordered */
11846 *first_code = GE;
11847 *second_code = UNORDERED;
11848 break;
11849 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11850 *first_code = GT;
11851 *second_code = UNORDERED;
11852 break;
11853 default:
d0396b79 11854 gcc_unreachable ();
c0c102a9
JH
11855 }
11856 if (!TARGET_IEEE_FP)
11857 {
f822d252
ZW
11858 *second_code = UNKNOWN;
11859 *bypass_code = UNKNOWN;
c0c102a9
JH
11860 }
11861}
11862
9e7adcb3 11863/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 11864 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
11865 In future this should be tweaked to compute bytes for optimize_size and
11866 take into account performance of various instructions on various CPUs. */
11867static int
b96a374d 11868ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
11869{
11870 if (!TARGET_IEEE_FP)
11871 return 4;
11872 /* The cost of code output by ix86_expand_fp_compare. */
11873 switch (code)
11874 {
11875 case UNLE:
11876 case UNLT:
11877 case LTGT:
11878 case GT:
11879 case GE:
11880 case UNORDERED:
11881 case ORDERED:
11882 case UNEQ:
11883 return 4;
11884 break;
11885 case LT:
11886 case NE:
11887 case EQ:
11888 case UNGE:
11889 return 5;
11890 break;
11891 case LE:
11892 case UNGT:
11893 return 6;
11894 break;
11895 default:
d0396b79 11896 gcc_unreachable ();
9e7adcb3
JH
11897 }
11898}
11899
11900/* Return cost of comparison done using fcomi operation.
11901 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11902static int
b96a374d 11903ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
11904{
11905 enum rtx_code bypass_code, first_code, second_code;
d1f87653 11906 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
11907 prevents gcc from using it. */
11908 if (!TARGET_CMOVE)
11909 return 1024;
11910 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 11911 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9e7adcb3
JH
11912}
11913
11914/* Return cost of comparison done using sahf operation.
11915 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11916static int
b96a374d 11917ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
11918{
11919 enum rtx_code bypass_code, first_code, second_code;
d1f87653 11920 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3 11921 avoids gcc from using it. */
3c2d980c 11922 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
9e7adcb3
JH
11923 return 1024;
11924 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 11925 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9e7adcb3
JH
11926}
11927
11928/* Compute cost of the comparison done using any method.
11929 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11930static int
b96a374d 11931ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
11932{
11933 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11934 int min;
11935
11936 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11937 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11938
11939 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11940 if (min > sahf_cost)
11941 min = sahf_cost;
11942 if (min > fcomi_cost)
11943 min = fcomi_cost;
11944 return min;
11945}
c0c102a9 11946
2ed941ec
RH
11947/* Return true if we should use an FCOMI instruction for this
11948 fp comparison. */
11949
11950int
11951ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11952{
11953 enum rtx_code swapped_code = swap_condition (code);
11954
11955 return ((ix86_fp_comparison_cost (code)
11956 == ix86_fp_comparison_fcomi_cost (code))
11957 || (ix86_fp_comparison_cost (swapped_code)
11958 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11959}
11960
11961/* Swap, force into registers, or otherwise massage the two operands
11962 to a fp comparison. The operands are updated in place; the new
11963 comparison code is returned. */
11964
11965static enum rtx_code
11966ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11967{
11968 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11969 rtx op0 = *pop0, op1 = *pop1;
11970 enum machine_mode op_mode = GET_MODE (op0);
11971 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11972
11973 /* All of the unordered compare instructions only work on registers.
11974 The same is true of the fcomi compare instructions. The XFmode
11975 compare instructions require registers except when comparing
11976 against zero or when converting operand 1 from fixed point to
11977 floating point. */
11978
11979 if (!is_sse
11980 && (fpcmp_mode == CCFPUmode
11981 || (op_mode == XFmode
11982 && ! (standard_80387_constant_p (op0) == 1
11983 || standard_80387_constant_p (op1) == 1)
11984 && GET_CODE (op1) != FLOAT)
11985 || ix86_use_fcomi_compare (code)))
11986 {
11987 op0 = force_reg (op_mode, op0);
11988 op1 = force_reg (op_mode, op1);
11989 }
11990 else
11991 {
11992 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11993 things around if they appear profitable, otherwise force op0
11994 into a register. */
11995
11996 if (standard_80387_constant_p (op0) == 0
11997 || (MEM_P (op0)
11998 && ! (standard_80387_constant_p (op1) == 0
11999 || MEM_P (op1))))
12000 {
12001 rtx tmp;
12002 tmp = op0, op0 = op1, op1 = tmp;
12003 code = swap_condition (code);
12004 }
12005
12006 if (!REG_P (op0))
12007 op0 = force_reg (op_mode, op0);
12008
12009 if (CONSTANT_P (op1))
12010 {
12011 int tmp = standard_80387_constant_p (op1);
12012 if (tmp == 0)
12013 op1 = validize_mem (force_const_mem (op_mode, op1));
12014 else if (tmp == 1)
12015 {
12016 if (TARGET_CMOVE)
12017 op1 = force_reg (op_mode, op1);
12018 }
12019 else
12020 op1 = force_reg (op_mode, op1);
12021 }
12022 }
12023
12024 /* Try to rearrange the comparison to make it cheaper. */
12025 if (ix86_fp_comparison_cost (code)
12026 > ix86_fp_comparison_cost (swap_condition (code))
b3a13419 12027 && (REG_P (op1) || can_create_pseudo_p ()))
2ed941ec
RH
12028 {
12029 rtx tmp;
12030 tmp = op0, op0 = op1, op1 = tmp;
12031 code = swap_condition (code);
12032 if (!REG_P (op0))
12033 op0 = force_reg (op_mode, op0);
12034 }
12035
12036 *pop0 = op0;
12037 *pop1 = op1;
12038 return code;
12039}
12040
12041/* Convert comparison codes we use to represent FP comparison to integer
12042 code that will result in proper branch. Return UNKNOWN if no such code
12043 is available. */
12044
12045enum rtx_code
12046ix86_fp_compare_code_to_integer (enum rtx_code code)
12047{
12048 switch (code)
12049 {
12050 case GT:
12051 return GTU;
12052 case GE:
12053 return GEU;
12054 case ORDERED:
12055 case UNORDERED:
12056 return code;
12057 break;
12058 case UNEQ:
12059 return EQ;
12060 break;
12061 case UNLT:
12062 return LTU;
12063 break;
12064 case UNLE:
12065 return LEU;
12066 break;
12067 case LTGT:
12068 return NE;
12069 break;
12070 default:
12071 return UNKNOWN;
12072 }
12073}
12074
3a3677ff
RH
12075/* Generate insn patterns to do a floating point compare of OPERANDS. */
12076
9e7adcb3 12077static rtx
b96a374d
AJ
12078ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
12079 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
12080{
12081 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 12082 rtx tmp, tmp2;
9e7adcb3 12083 int cost = ix86_fp_comparison_cost (code);
c0c102a9 12084 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
12085
12086 fpcmp_mode = ix86_fp_compare_mode (code);
12087 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
12088
9e7adcb3
JH
12089 if (second_test)
12090 *second_test = NULL_RTX;
12091 if (bypass_test)
12092 *bypass_test = NULL_RTX;
12093
c0c102a9
JH
12094 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12095
9e7adcb3 12096 /* Do fcomi/sahf based test when profitable. */
1406ee90 12097 if (ix86_fp_comparison_arithmetics_cost (code) > cost
3c2d980c 12098 && (bypass_code == UNKNOWN || bypass_test)
1406ee90 12099 && (second_code == UNKNOWN || second_test))
32b5b1aa 12100 {
1406ee90
UB
12101 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12102 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
12103 tmp);
c0c102a9 12104 if (TARGET_CMOVE)
1406ee90 12105 emit_insn (tmp);
c0c102a9
JH
12106 else
12107 {
1406ee90
UB
12108 gcc_assert (TARGET_SAHF);
12109
bf71a4f8
JH
12110 if (!scratch)
12111 scratch = gen_reg_rtx (HImode);
1406ee90
UB
12112 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
12113
12114 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
c0c102a9 12115 }
e075ae69
RH
12116
12117 /* The FP codes work out to act like unsigned. */
9a915772 12118 intcmp_mode = fpcmp_mode;
9e7adcb3 12119 code = first_code;
f822d252 12120 if (bypass_code != UNKNOWN)
9e7adcb3
JH
12121 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
12122 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12123 const0_rtx);
f822d252 12124 if (second_code != UNKNOWN)
9e7adcb3
JH
12125 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
12126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12127 const0_rtx);
e075ae69
RH
12128 }
12129 else
12130 {
12131 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 12132 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 12133 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
12134 if (!scratch)
12135 scratch = gen_reg_rtx (HImode);
3a3677ff 12136 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 12137
9a915772
JH
12138 /* In the unordered case, we have to check C2 for NaN's, which
12139 doesn't happen to work out to anything nice combination-wise.
12140 So do some bit twiddling on the value we've got in AH to come
12141 up with an appropriate set of condition codes. */
e075ae69 12142
9a915772
JH
12143 intcmp_mode = CCNOmode;
12144 switch (code)
32b5b1aa 12145 {
9a915772
JH
12146 case GT:
12147 case UNGT:
12148 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 12149 {
3a3677ff 12150 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 12151 code = EQ;
9a915772
JH
12152 }
12153 else
12154 {
12155 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12156 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12157 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12158 intcmp_mode = CCmode;
12159 code = GEU;
12160 }
12161 break;
12162 case LT:
12163 case UNLT:
12164 if (code == LT && TARGET_IEEE_FP)
12165 {
3a3677ff
RH
12166 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12167 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
12168 intcmp_mode = CCmode;
12169 code = EQ;
9a915772
JH
12170 }
12171 else
12172 {
12173 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12174 code = NE;
12175 }
12176 break;
12177 case GE:
12178 case UNGE:
12179 if (code == GE || !TARGET_IEEE_FP)
12180 {
3a3677ff 12181 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 12182 code = EQ;
9a915772
JH
12183 }
12184 else
12185 {
12186 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12187 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12188 GEN_INT (0x01)));
12189 code = NE;
12190 }
12191 break;
12192 case LE:
12193 case UNLE:
12194 if (code == LE && TARGET_IEEE_FP)
12195 {
3a3677ff
RH
12196 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12197 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12198 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
12199 intcmp_mode = CCmode;
12200 code = LTU;
9a915772
JH
12201 }
12202 else
12203 {
12204 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12205 code = NE;
12206 }
12207 break;
12208 case EQ:
12209 case UNEQ:
12210 if (code == EQ && TARGET_IEEE_FP)
12211 {
3a3677ff
RH
12212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12213 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
12214 intcmp_mode = CCmode;
12215 code = EQ;
9a915772
JH
12216 }
12217 else
12218 {
3a3677ff
RH
12219 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12220 code = NE;
12221 break;
9a915772
JH
12222 }
12223 break;
12224 case NE:
12225 case LTGT:
12226 if (code == NE && TARGET_IEEE_FP)
12227 {
3a3677ff 12228 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
12229 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12230 GEN_INT (0x40)));
3a3677ff 12231 code = NE;
9a915772
JH
12232 }
12233 else
12234 {
3a3677ff
RH
12235 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12236 code = EQ;
32b5b1aa 12237 }
9a915772
JH
12238 break;
12239
12240 case UNORDERED:
12241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12242 code = NE;
12243 break;
12244 case ORDERED:
12245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12246 code = EQ;
12247 break;
12248
12249 default:
d0396b79 12250 gcc_unreachable ();
32b5b1aa 12251 }
32b5b1aa 12252 }
e075ae69
RH
12253
12254 /* Return the test that should be put into the flags user, i.e.
12255 the bcc, scc, or cmov instruction. */
12256 return gen_rtx_fmt_ee (code, VOIDmode,
12257 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12258 const0_rtx);
12259}
12260
9e3e266c 12261rtx
b96a374d 12262ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
12263{
12264 rtx op0, op1, ret;
12265 op0 = ix86_compare_op0;
12266 op1 = ix86_compare_op1;
12267
a1b8572c
JH
12268 if (second_test)
12269 *second_test = NULL_RTX;
12270 if (bypass_test)
12271 *bypass_test = NULL_RTX;
12272
1ef45b77
RH
12273 if (ix86_compare_emitted)
12274 {
12275 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12276 ix86_compare_emitted = NULL_RTX;
12277 }
ebb109ad 12278 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
27ac40e2
UB
12279 {
12280 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12281 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12282 second_test, bypass_test);
12283 }
32b5b1aa 12284 else
e075ae69
RH
12285 ret = ix86_expand_int_compare (code, op0, op1);
12286
12287 return ret;
12288}
12289
03598dea
JH
12290/* Return true if the CODE will result in nontrivial jump sequence. */
12291bool
b96a374d 12292ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
12293{
12294 enum rtx_code bypass_code, first_code, second_code;
12295 if (!TARGET_CMOVE)
12296 return true;
12297 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 12298 return bypass_code != UNKNOWN || second_code != UNKNOWN;
03598dea
JH
12299}
12300
e075ae69 12301void
b96a374d 12302ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 12303{
3a3677ff 12304 rtx tmp;
e075ae69 12305
3d763bcf
KH
12306 /* If we have emitted a compare insn, go straight to simple.
12307 ix86_expand_compare won't emit anything if ix86_compare_emitted
12308 is non NULL. */
12309 if (ix86_compare_emitted)
12310 goto simple;
12311
3a3677ff 12312 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 12313 {
3a3677ff
RH
12314 case QImode:
12315 case HImode:
12316 case SImode:
0d7d98ee 12317 simple:
a1b8572c 12318 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
12319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12320 gen_rtx_LABEL_REF (VOIDmode, label),
12321 pc_rtx);
12322 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 12323 return;
e075ae69 12324
3a3677ff
RH
12325 case SFmode:
12326 case DFmode:
0f290768 12327 case XFmode:
3a3677ff
RH
12328 {
12329 rtvec vec;
12330 int use_fcomi;
03598dea 12331 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
12332
12333 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12334 &ix86_compare_op1);
fce5a9f2 12335
03598dea
JH
12336 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12337
12338 /* Check whether we will use the natural sequence with one jump. If
12339 so, we can expand jump early. Otherwise delay expansion by
12340 creating compound insn to not confuse optimizers. */
1406ee90 12341 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
03598dea
JH
12342 {
12343 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12344 gen_rtx_LABEL_REF (VOIDmode, label),
7c82106f 12345 pc_rtx, NULL_RTX, NULL_RTX);
03598dea
JH
12346 }
12347 else
12348 {
12349 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12350 ix86_compare_op0, ix86_compare_op1);
12351 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12352 gen_rtx_LABEL_REF (VOIDmode, label),
12353 pc_rtx);
12354 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12355
12356 use_fcomi = ix86_use_fcomi_compare (code);
12357 vec = rtvec_alloc (3 + !use_fcomi);
12358 RTVEC_ELT (vec, 0) = tmp;
12359 RTVEC_ELT (vec, 1)
d02cb675 12360 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
03598dea 12361 RTVEC_ELT (vec, 2)
d02cb675 12362 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
03598dea
JH
12363 if (! use_fcomi)
12364 RTVEC_ELT (vec, 3)
12365 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12366
12367 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12368 }
3a3677ff
RH
12369 return;
12370 }
32b5b1aa 12371
3a3677ff 12372 case DImode:
0d7d98ee
JH
12373 if (TARGET_64BIT)
12374 goto simple;
28356f52 12375 case TImode:
3a3677ff
RH
12376 /* Expand DImode branch into multiple compare+branch. */
12377 {
12378 rtx lo[2], hi[2], label2;
12379 enum rtx_code code1, code2, code3;
28356f52 12380 enum machine_mode submode;
32b5b1aa 12381
3a3677ff
RH
12382 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12383 {
12384 tmp = ix86_compare_op0;
12385 ix86_compare_op0 = ix86_compare_op1;
12386 ix86_compare_op1 = tmp;
12387 code = swap_condition (code);
12388 }
28356f52
JB
12389 if (GET_MODE (ix86_compare_op0) == DImode)
12390 {
12391 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12392 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12393 submode = SImode;
12394 }
12395 else
12396 {
12397 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12398 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12399 submode = DImode;
12400 }
32b5b1aa 12401
3a3677ff
RH
12402 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12403 avoid two branches. This costs one extra insn, so disable when
12404 optimizing for size. */
32b5b1aa 12405
3a3677ff
RH
12406 if ((code == EQ || code == NE)
12407 && (!optimize_size
12408 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12409 {
12410 rtx xor0, xor1;
32b5b1aa 12411
3a3677ff
RH
12412 xor1 = hi[0];
12413 if (hi[1] != const0_rtx)
28356f52 12414 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
3a3677ff 12415 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 12416
3a3677ff
RH
12417 xor0 = lo[0];
12418 if (lo[1] != const0_rtx)
28356f52 12419 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
3a3677ff 12420 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 12421
28356f52 12422 tmp = expand_binop (submode, ior_optab, xor1, xor0,
3a3677ff 12423 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 12424
3a3677ff
RH
12425 ix86_compare_op0 = tmp;
12426 ix86_compare_op1 = const0_rtx;
12427 ix86_expand_branch (code, label);
12428 return;
12429 }
e075ae69 12430
1f9124e4
JJ
12431 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12432 op1 is a constant and the low word is zero, then we can just
18117c05
JJ
12433 examine the high word. Similarly for low word -1 and
12434 less-or-equal-than or greater-than. */
32b5b1aa 12435
18117c05 12436 if (CONST_INT_P (hi[1]))
1f9124e4
JJ
12437 switch (code)
12438 {
12439 case LT: case LTU: case GE: case GEU:
18117c05
JJ
12440 if (lo[1] == const0_rtx)
12441 {
12442 ix86_compare_op0 = hi[0];
12443 ix86_compare_op1 = hi[1];
12444 ix86_expand_branch (code, label);
12445 return;
12446 }
c754abbf 12447 break;
18117c05
JJ
12448 case LE: case LEU: case GT: case GTU:
12449 if (lo[1] == constm1_rtx)
12450 {
12451 ix86_compare_op0 = hi[0];
12452 ix86_compare_op1 = hi[1];
12453 ix86_expand_branch (code, label);
12454 return;
12455 }
c754abbf 12456 break;
1f9124e4
JJ
12457 default:
12458 break;
12459 }
e075ae69 12460
3a3677ff 12461 /* Otherwise, we need two or three jumps. */
e075ae69 12462
3a3677ff 12463 label2 = gen_label_rtx ();
e075ae69 12464
3a3677ff
RH
12465 code1 = code;
12466 code2 = swap_condition (code);
12467 code3 = unsigned_condition (code);
e075ae69 12468
3a3677ff
RH
12469 switch (code)
12470 {
12471 case LT: case GT: case LTU: case GTU:
12472 break;
e075ae69 12473
3a3677ff
RH
12474 case LE: code1 = LT; code2 = GT; break;
12475 case GE: code1 = GT; code2 = LT; break;
12476 case LEU: code1 = LTU; code2 = GTU; break;
12477 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 12478
f822d252
ZW
12479 case EQ: code1 = UNKNOWN; code2 = NE; break;
12480 case NE: code2 = UNKNOWN; break;
e075ae69 12481
3a3677ff 12482 default:
d0396b79 12483 gcc_unreachable ();
3a3677ff 12484 }
e075ae69 12485
3a3677ff
RH
12486 /*
12487 * a < b =>
12488 * if (hi(a) < hi(b)) goto true;
12489 * if (hi(a) > hi(b)) goto false;
12490 * if (lo(a) < lo(b)) goto true;
12491 * false:
12492 */
12493
12494 ix86_compare_op0 = hi[0];
12495 ix86_compare_op1 = hi[1];
12496
f822d252 12497 if (code1 != UNKNOWN)
3a3677ff 12498 ix86_expand_branch (code1, label);
f822d252 12499 if (code2 != UNKNOWN)
3a3677ff
RH
12500 ix86_expand_branch (code2, label2);
12501
12502 ix86_compare_op0 = lo[0];
12503 ix86_compare_op1 = lo[1];
12504 ix86_expand_branch (code3, label);
12505
f822d252 12506 if (code2 != UNKNOWN)
3a3677ff
RH
12507 emit_label (label2);
12508 return;
12509 }
e075ae69 12510
3a3677ff 12511 default:
d0396b79 12512 gcc_unreachable ();
3a3677ff 12513 }
32b5b1aa 12514}
e075ae69 12515
9e7adcb3
JH
12516/* Split branch based on floating point condition. */
12517void
b96a374d 12518ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
7c82106f 12519 rtx target1, rtx target2, rtx tmp, rtx pushed)
9e7adcb3
JH
12520{
12521 rtx second, bypass;
12522 rtx label = NULL_RTX;
03598dea 12523 rtx condition;
6b24c259
JH
12524 int bypass_probability = -1, second_probability = -1, probability = -1;
12525 rtx i;
9e7adcb3
JH
12526
12527 if (target2 != pc_rtx)
12528 {
12529 rtx tmp = target2;
12530 code = reverse_condition_maybe_unordered (code);
12531 target2 = target1;
12532 target1 = tmp;
12533 }
12534
12535 condition = ix86_expand_fp_compare (code, op1, op2,
12536 tmp, &second, &bypass);
6b24c259 12537
7c82106f
UB
12538 /* Remove pushed operand from stack. */
12539 if (pushed)
12540 ix86_free_from_memory (GET_MODE (pushed));
12541
6b24c259
JH
12542 if (split_branch_probability >= 0)
12543 {
12544 /* Distribute the probabilities across the jumps.
12545 Assume the BYPASS and SECOND to be always test
12546 for UNORDERED. */
12547 probability = split_branch_probability;
12548
d6a7951f 12549 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
12550 to be updated. Later we may run some experiments and see
12551 if unordered values are more frequent in practice. */
12552 if (bypass)
12553 bypass_probability = 1;
12554 if (second)
12555 second_probability = 1;
12556 }
9e7adcb3
JH
12557 if (bypass != NULL_RTX)
12558 {
12559 label = gen_label_rtx ();
6b24c259
JH
12560 i = emit_jump_insn (gen_rtx_SET
12561 (VOIDmode, pc_rtx,
12562 gen_rtx_IF_THEN_ELSE (VOIDmode,
12563 bypass,
12564 gen_rtx_LABEL_REF (VOIDmode,
12565 label),
12566 pc_rtx)));
12567 if (bypass_probability >= 0)
12568 REG_NOTES (i)
12569 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12570 GEN_INT (bypass_probability),
12571 REG_NOTES (i));
12572 }
12573 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
12574 (VOIDmode, pc_rtx,
12575 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
12576 condition, target1, target2)));
12577 if (probability >= 0)
12578 REG_NOTES (i)
12579 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12580 GEN_INT (probability),
12581 REG_NOTES (i));
12582 if (second != NULL_RTX)
9e7adcb3 12583 {
6b24c259
JH
12584 i = emit_jump_insn (gen_rtx_SET
12585 (VOIDmode, pc_rtx,
12586 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12587 target2)));
12588 if (second_probability >= 0)
12589 REG_NOTES (i)
12590 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12591 GEN_INT (second_probability),
12592 REG_NOTES (i));
9e7adcb3 12593 }
9e7adcb3
JH
12594 if (label != NULL_RTX)
12595 emit_label (label);
12596}
12597
32b5b1aa 12598int
b96a374d 12599ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 12600{
3a627503 12601 rtx ret, tmp, tmpreg, equiv;
a1b8572c 12602 rtx second_test, bypass_test;
e075ae69 12603
28356f52 12604 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
e075ae69
RH
12605 return 0; /* FAIL */
12606
d0396b79 12607 gcc_assert (GET_MODE (dest) == QImode);
e075ae69 12608
a1b8572c 12609 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
12610 PUT_MODE (ret, QImode);
12611
12612 tmp = dest;
a1b8572c 12613 tmpreg = dest;
32b5b1aa 12614
e075ae69 12615 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
12616 if (bypass_test || second_test)
12617 {
12618 rtx test = second_test;
12619 int bypass = 0;
12620 rtx tmp2 = gen_reg_rtx (QImode);
12621 if (bypass_test)
12622 {
d0396b79 12623 gcc_assert (!second_test);
a1b8572c
JH
12624 test = bypass_test;
12625 bypass = 1;
12626 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12627 }
12628 PUT_MODE (test, QImode);
12629 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12630
12631 if (bypass)
12632 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12633 else
12634 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12635 }
e075ae69 12636
3a627503 12637 /* Attach a REG_EQUAL note describing the comparison result. */
1ef45b77
RH
12638 if (ix86_compare_op0 && ix86_compare_op1)
12639 {
12640 equiv = simplify_gen_relational (code, QImode,
12641 GET_MODE (ix86_compare_op0),
12642 ix86_compare_op0, ix86_compare_op1);
12643 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12644 }
3a627503 12645
e075ae69 12646 return 1; /* DONE */
32b5b1aa 12647}
e075ae69 12648
c35d187f
RH
12649/* Expand comparison setting or clearing carry flag. Return true when
12650 successful and set pop for the operation. */
12651static bool
b96a374d 12652ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
12653{
12654 enum machine_mode mode =
12655 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12656
d02cb675 12657 /* Do not handle DImode compares that go through special path. */
28356f52 12658 if (mode == (TARGET_64BIT ? TImode : DImode))
e6e81735 12659 return false;
27ac40e2
UB
12660
12661 if (SCALAR_FLOAT_MODE_P (mode))
e6e81735
JH
12662 {
12663 rtx second_test = NULL, bypass_test = NULL;
12664 rtx compare_op, compare_seq;
12665
27ac40e2
UB
12666 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12667
12668 /* Shortcut: following common codes never translate
12669 into carry flag compares. */
e6e81735
JH
12670 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12671 || code == ORDERED || code == UNORDERED)
12672 return false;
12673
12674 /* These comparisons require zero flag; swap operands so they won't. */
12675 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12676 && !TARGET_IEEE_FP)
12677 {
12678 rtx tmp = op0;
12679 op0 = op1;
12680 op1 = tmp;
12681 code = swap_condition (code);
12682 }
12683
d02cb675
UB
12684 /* Try to expand the comparison and verify that we end up with
12685 carry flag based comparison. This fails to be true only when
12686 we decide to expand comparison using arithmetic that is not
12687 too common scenario. */
e6e81735
JH
12688 start_sequence ();
12689 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12690 &second_test, &bypass_test);
12691 compare_seq = get_insns ();
12692 end_sequence ();
12693
12694 if (second_test || bypass_test)
12695 return false;
d02cb675 12696
e6e81735
JH
12697 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12698 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12699 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12700 else
12701 code = GET_CODE (compare_op);
d02cb675 12702
e6e81735
JH
12703 if (code != LTU && code != GEU)
12704 return false;
d02cb675 12705
e6e81735
JH
12706 emit_insn (compare_seq);
12707 *pop = compare_op;
12708 return true;
12709 }
d02cb675 12710
e6e81735 12711 if (!INTEGRAL_MODE_P (mode))
4977bab6 12712 return false;
d02cb675 12713
4977bab6
ZW
12714 switch (code)
12715 {
12716 case LTU:
12717 case GEU:
12718 break;
12719
12720 /* Convert a==0 into (unsigned)a<1. */
12721 case EQ:
12722 case NE:
12723 if (op1 != const0_rtx)
12724 return false;
12725 op1 = const1_rtx;
12726 code = (code == EQ ? LTU : GEU);
12727 break;
12728
12729 /* Convert a>b into b<a or a>=b-1. */
12730 case GTU:
12731 case LEU:
7656aee4 12732 if (CONST_INT_P (op1))
4977bab6
ZW
12733 {
12734 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12735 /* Bail out on overflow. We still can swap operands but that
43f3a59d 12736 would force loading of the constant into register. */
4977bab6
ZW
12737 if (op1 == const0_rtx
12738 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12739 return false;
12740 code = (code == GTU ? GEU : LTU);
12741 }
12742 else
12743 {
12744 rtx tmp = op1;
12745 op1 = op0;
12746 op0 = tmp;
12747 code = (code == GTU ? LTU : GEU);
12748 }
12749 break;
12750
ccea753c 12751 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
12752 case LT:
12753 case GE:
12754 if (mode == DImode || op1 != const0_rtx)
12755 return false;
ccea753c 12756 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
12757 code = (code == LT ? GEU : LTU);
12758 break;
12759 case LE:
12760 case GT:
12761 if (mode == DImode || op1 != constm1_rtx)
12762 return false;
ccea753c 12763 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
12764 code = (code == LE ? GEU : LTU);
12765 break;
12766
12767 default:
12768 return false;
12769 }
ebe75517
JH
12770 /* Swapping operands may cause constant to appear as first operand. */
12771 if (!nonimmediate_operand (op0, VOIDmode))
12772 {
b3a13419 12773 if (!can_create_pseudo_p ())
ebe75517
JH
12774 return false;
12775 op0 = force_reg (mode, op0);
12776 }
4977bab6
ZW
12777 ix86_compare_op0 = op0;
12778 ix86_compare_op1 = op1;
12779 *pop = ix86_expand_compare (code, NULL, NULL);
d0396b79 12780 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
4977bab6
ZW
12781 return true;
12782}
12783
32b5b1aa 12784int
b96a374d 12785ix86_expand_int_movcc (rtx operands[])
32b5b1aa 12786{
e075ae69
RH
12787 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12788 rtx compare_seq, compare_op;
a1b8572c 12789 rtx second_test, bypass_test;
635559ab 12790 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 12791 bool sign_bit_compare_p = false;;
3a3677ff 12792
e075ae69 12793 start_sequence ();
a1b8572c 12794 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 12795 compare_seq = get_insns ();
e075ae69
RH
12796 end_sequence ();
12797
12798 compare_code = GET_CODE (compare_op);
12799
4977bab6
ZW
12800 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12801 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12802 sign_bit_compare_p = true;
12803
e075ae69
RH
12804 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12805 HImode insns, we'd be swallowed in word prefix ops. */
12806
4977bab6 12807 if ((mode != HImode || TARGET_FAST_PREFIX)
28356f52 12808 && (mode != (TARGET_64BIT ? TImode : DImode))
7656aee4
UB
12809 && CONST_INT_P (operands[2])
12810 && CONST_INT_P (operands[3]))
e075ae69
RH
12811 {
12812 rtx out = operands[0];
12813 HOST_WIDE_INT ct = INTVAL (operands[2]);
12814 HOST_WIDE_INT cf = INTVAL (operands[3]);
12815 HOST_WIDE_INT diff;
12816
4977bab6
ZW
12817 diff = ct - cf;
12818 /* Sign bit compares are better done using shifts than we do by using
b96a374d 12819 sbb. */
4977bab6
ZW
12820 if (sign_bit_compare_p
12821 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12822 ix86_compare_op1, &compare_op))
e075ae69 12823 {
e075ae69
RH
12824 /* Detect overlap between destination and compare sources. */
12825 rtx tmp = out;
12826
4977bab6 12827 if (!sign_bit_compare_p)
36583fea 12828 {
e6e81735
JH
12829 bool fpcmp = false;
12830
4977bab6
ZW
12831 compare_code = GET_CODE (compare_op);
12832
e6e81735
JH
12833 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12834 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12835 {
12836 fpcmp = true;
12837 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12838 }
12839
4977bab6
ZW
12840 /* To simplify rest of code, restrict to the GEU case. */
12841 if (compare_code == LTU)
12842 {
12843 HOST_WIDE_INT tmp = ct;
12844 ct = cf;
12845 cf = tmp;
12846 compare_code = reverse_condition (compare_code);
12847 code = reverse_condition (code);
12848 }
e6e81735
JH
12849 else
12850 {
12851 if (fpcmp)
12852 PUT_CODE (compare_op,
12853 reverse_condition_maybe_unordered
12854 (GET_CODE (compare_op)));
12855 else
12856 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12857 }
4977bab6 12858 diff = ct - cf;
36583fea 12859
4977bab6
ZW
12860 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12861 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12862 tmp = gen_reg_rtx (mode);
e075ae69 12863
4977bab6 12864 if (mode == DImode)
e6e81735 12865 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 12866 else
e6e81735 12867 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 12868 }
14f73b5a 12869 else
4977bab6
ZW
12870 {
12871 if (code == GT || code == GE)
12872 code = reverse_condition (code);
12873 else
12874 {
12875 HOST_WIDE_INT tmp = ct;
12876 ct = cf;
12877 cf = tmp;
5fb48685 12878 diff = ct - cf;
4977bab6
ZW
12879 }
12880 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12881 ix86_compare_op1, VOIDmode, 0, -1);
12882 }
e075ae69 12883
36583fea
JH
12884 if (diff == 1)
12885 {
12886 /*
12887 * cmpl op0,op1
12888 * sbbl dest,dest
12889 * [addl dest, ct]
12890 *
12891 * Size 5 - 8.
12892 */
12893 if (ct)
b96a374d 12894 tmp = expand_simple_binop (mode, PLUS,
635559ab 12895 tmp, GEN_INT (ct),
4977bab6 12896 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12897 }
12898 else if (cf == -1)
12899 {
12900 /*
12901 * cmpl op0,op1
12902 * sbbl dest,dest
12903 * orl $ct, dest
12904 *
12905 * Size 8.
12906 */
635559ab
JH
12907 tmp = expand_simple_binop (mode, IOR,
12908 tmp, GEN_INT (ct),
4977bab6 12909 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12910 }
12911 else if (diff == -1 && ct)
12912 {
12913 /*
12914 * cmpl op0,op1
12915 * sbbl dest,dest
06ec023f 12916 * notl dest
36583fea
JH
12917 * [addl dest, cf]
12918 *
12919 * Size 8 - 11.
12920 */
4977bab6 12921 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 12922 if (cf)
b96a374d 12923 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
12924 copy_rtx (tmp), GEN_INT (cf),
12925 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12926 }
12927 else
12928 {
12929 /*
12930 * cmpl op0,op1
12931 * sbbl dest,dest
06ec023f 12932 * [notl dest]
36583fea
JH
12933 * andl cf - ct, dest
12934 * [addl dest, ct]
12935 *
12936 * Size 8 - 11.
12937 */
06ec023f
RB
12938
12939 if (cf == 0)
12940 {
12941 cf = ct;
12942 ct = 0;
4977bab6 12943 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
12944 }
12945
635559ab 12946 tmp = expand_simple_binop (mode, AND,
4977bab6 12947 copy_rtx (tmp),
d8bf17f9 12948 gen_int_mode (cf - ct, mode),
4977bab6 12949 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 12950 if (ct)
b96a374d 12951 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
12952 copy_rtx (tmp), GEN_INT (ct),
12953 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 12954 }
e075ae69 12955
4977bab6
ZW
12956 if (!rtx_equal_p (tmp, out))
12957 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
12958
12959 return 1; /* DONE */
12960 }
12961
e075ae69
RH
12962 if (diff < 0)
12963 {
27ac40e2
UB
12964 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12965
e075ae69
RH
12966 HOST_WIDE_INT tmp;
12967 tmp = ct, ct = cf, cf = tmp;
12968 diff = -diff;
27ac40e2
UB
12969
12970 if (SCALAR_FLOAT_MODE_P (cmp_mode))
734dba19 12971 {
27ac40e2
UB
12972 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12973
734dba19
JH
12974 /* We may be reversing unordered compare to normal compare, that
12975 is not valid in general (we may convert non-trapping condition
12976 to trapping one), however on i386 we currently emit all
12977 comparisons unordered. */
12978 compare_code = reverse_condition_maybe_unordered (compare_code);
12979 code = reverse_condition_maybe_unordered (code);
12980 }
12981 else
12982 {
12983 compare_code = reverse_condition (compare_code);
12984 code = reverse_condition (code);
12985 }
e075ae69 12986 }
0f2a3457 12987
f822d252 12988 compare_code = UNKNOWN;
0f2a3457 12989 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
7656aee4 12990 && CONST_INT_P (ix86_compare_op1))
0f2a3457
JJ
12991 {
12992 if (ix86_compare_op1 == const0_rtx
12993 && (code == LT || code == GE))
12994 compare_code = code;
12995 else if (ix86_compare_op1 == constm1_rtx)
12996 {
12997 if (code == LE)
12998 compare_code = LT;
12999 else if (code == GT)
13000 compare_code = GE;
13001 }
13002 }
13003
13004 /* Optimize dest = (op0 < 0) ? -1 : cf. */
f822d252 13005 if (compare_code != UNKNOWN
0f2a3457
JJ
13006 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
13007 && (cf == -1 || ct == -1))
13008 {
13009 /* If lea code below could be used, only optimize
13010 if it results in a 2 insn sequence. */
13011
13012 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
13013 || diff == 3 || diff == 5 || diff == 9)
13014 || (compare_code == LT && ct == -1)
13015 || (compare_code == GE && cf == -1))
13016 {
13017 /*
13018 * notl op1 (if necessary)
13019 * sarl $31, op1
13020 * orl cf, op1
13021 */
13022 if (ct != -1)
13023 {
13024 cf = ct;
b96a374d 13025 ct = -1;
0f2a3457
JJ
13026 code = reverse_condition (code);
13027 }
13028
13029 out = emit_store_flag (out, code, ix86_compare_op0,
13030 ix86_compare_op1, VOIDmode, 0, -1);
13031
13032 out = expand_simple_binop (mode, IOR,
13033 out, GEN_INT (cf),
13034 out, 1, OPTAB_DIRECT);
13035 if (out != operands[0])
13036 emit_move_insn (operands[0], out);
13037
13038 return 1; /* DONE */
13039 }
13040 }
13041
4977bab6 13042
635559ab
JH
13043 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
13044 || diff == 3 || diff == 5 || diff == 9)
4977bab6 13045 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
8fe75e43
RH
13046 && (mode != DImode
13047 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
e075ae69
RH
13048 {
13049 /*
13050 * xorl dest,dest
13051 * cmpl op1,op2
13052 * setcc dest
13053 * lea cf(dest*(ct-cf)),dest
13054 *
13055 * Size 14.
13056 *
13057 * This also catches the degenerate setcc-only case.
13058 */
13059
13060 rtx tmp;
13061 int nops;
13062
13063 out = emit_store_flag (out, code, ix86_compare_op0,
13064 ix86_compare_op1, VOIDmode, 0, 1);
13065
13066 nops = 0;
97f51ac4
RB
13067 /* On x86_64 the lea instruction operates on Pmode, so we need
13068 to get arithmetics done in proper mode to match. */
e075ae69 13069 if (diff == 1)
068f5dea 13070 tmp = copy_rtx (out);
e075ae69
RH
13071 else
13072 {
885a70fd 13073 rtx out1;
068f5dea 13074 out1 = copy_rtx (out);
635559ab 13075 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
13076 nops++;
13077 if (diff & 1)
13078 {
635559ab 13079 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
13080 nops++;
13081 }
13082 }
13083 if (cf != 0)
13084 {
635559ab 13085 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
13086 nops++;
13087 }
4977bab6 13088 if (!rtx_equal_p (tmp, out))
e075ae69 13089 {
14f73b5a 13090 if (nops == 1)
a5cf80f0 13091 out = force_operand (tmp, copy_rtx (out));
e075ae69 13092 else
4977bab6 13093 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 13094 }
4977bab6 13095 if (!rtx_equal_p (out, operands[0]))
1985ef90 13096 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
13097
13098 return 1; /* DONE */
13099 }
13100
13101 /*
13102 * General case: Jumpful:
13103 * xorl dest,dest cmpl op1, op2
13104 * cmpl op1, op2 movl ct, dest
13105 * setcc dest jcc 1f
13106 * decl dest movl cf, dest
13107 * andl (cf-ct),dest 1:
13108 * addl ct,dest
0f290768 13109 *
e075ae69
RH
13110 * Size 20. Size 14.
13111 *
13112 * This is reasonably steep, but branch mispredict costs are
13113 * high on modern cpus, so consider failing only if optimizing
13114 * for space.
e075ae69
RH
13115 */
13116
4977bab6
ZW
13117 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13118 && BRANCH_COST >= 2)
e075ae69 13119 {
97f51ac4 13120 if (cf == 0)
e075ae69 13121 {
27ac40e2
UB
13122 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13123
97f51ac4
RB
13124 cf = ct;
13125 ct = 0;
27ac40e2
UB
13126
13127 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13128 {
13129 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13130
13131 /* We may be reversing unordered compare to normal compare,
13132 that is not valid in general (we may convert non-trapping
13133 condition to trapping one), however on i386 we currently
13134 emit all comparisons unordered. */
13135 code = reverse_condition_maybe_unordered (code);
13136 }
0f2a3457
JJ
13137 else
13138 {
13139 code = reverse_condition (code);
f822d252 13140 if (compare_code != UNKNOWN)
0f2a3457
JJ
13141 compare_code = reverse_condition (compare_code);
13142 }
13143 }
13144
f822d252 13145 if (compare_code != UNKNOWN)
0f2a3457
JJ
13146 {
13147 /* notl op1 (if needed)
13148 sarl $31, op1
13149 andl (cf-ct), op1
b96a374d 13150 addl ct, op1
0f2a3457
JJ
13151
13152 For x < 0 (resp. x <= -1) there will be no notl,
13153 so if possible swap the constants to get rid of the
13154 complement.
13155 True/false will be -1/0 while code below (store flag
13156 followed by decrement) is 0/-1, so the constants need
13157 to be exchanged once more. */
13158
13159 if (compare_code == GE || !cf)
734dba19 13160 {
b96a374d 13161 code = reverse_condition (code);
0f2a3457 13162 compare_code = LT;
734dba19
JH
13163 }
13164 else
13165 {
0f2a3457 13166 HOST_WIDE_INT tmp = cf;
b96a374d 13167 cf = ct;
0f2a3457 13168 ct = tmp;
734dba19 13169 }
0f2a3457
JJ
13170
13171 out = emit_store_flag (out, code, ix86_compare_op0,
13172 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 13173 }
0f2a3457
JJ
13174 else
13175 {
13176 out = emit_store_flag (out, code, ix86_compare_op0,
13177 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 13178
4977bab6
ZW
13179 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13180 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 13181 }
e075ae69 13182
4977bab6 13183 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 13184 gen_int_mode (cf - ct, mode),
4977bab6 13185 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 13186 if (ct)
4977bab6
ZW
13187 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13188 copy_rtx (out), 1, OPTAB_DIRECT);
13189 if (!rtx_equal_p (out, operands[0]))
13190 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
13191
13192 return 1; /* DONE */
13193 }
13194 }
13195
4977bab6 13196 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
13197 {
13198 /* Try a few things more with specific constants and a variable. */
13199
78a0d70c 13200 optab op;
e075ae69
RH
13201 rtx var, orig_out, out, tmp;
13202
4977bab6 13203 if (BRANCH_COST <= 2)
e075ae69
RH
13204 return 0; /* FAIL */
13205
0f290768 13206 /* If one of the two operands is an interesting constant, load a
e075ae69 13207 constant with the above and mask it in with a logical operation. */
0f290768 13208
7656aee4 13209 if (CONST_INT_P (operands[2]))
e075ae69
RH
13210 {
13211 var = operands[3];
4977bab6 13212 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 13213 operands[3] = constm1_rtx, op = and_optab;
4977bab6 13214 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 13215 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
13216 else
13217 return 0; /* FAIL */
e075ae69 13218 }
7656aee4 13219 else if (CONST_INT_P (operands[3]))
e075ae69
RH
13220 {
13221 var = operands[2];
4977bab6 13222 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 13223 operands[2] = constm1_rtx, op = and_optab;
4977bab6 13224 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 13225 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
13226 else
13227 return 0; /* FAIL */
e075ae69 13228 }
78a0d70c 13229 else
e075ae69
RH
13230 return 0; /* FAIL */
13231
13232 orig_out = operands[0];
635559ab 13233 tmp = gen_reg_rtx (mode);
e075ae69
RH
13234 operands[0] = tmp;
13235
13236 /* Recurse to get the constant loaded. */
13237 if (ix86_expand_int_movcc (operands) == 0)
13238 return 0; /* FAIL */
13239
13240 /* Mask in the interesting variable. */
635559ab 13241 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 13242 OPTAB_WIDEN);
4977bab6
ZW
13243 if (!rtx_equal_p (out, orig_out))
13244 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
13245
13246 return 1; /* DONE */
13247 }
13248
13249 /*
13250 * For comparison with above,
13251 *
13252 * movl cf,dest
13253 * movl ct,tmp
13254 * cmpl op1,op2
13255 * cmovcc tmp,dest
13256 *
13257 * Size 15.
13258 */
13259
635559ab
JH
13260 if (! nonimmediate_operand (operands[2], mode))
13261 operands[2] = force_reg (mode, operands[2]);
13262 if (! nonimmediate_operand (operands[3], mode))
13263 operands[3] = force_reg (mode, operands[3]);
e075ae69 13264
a1b8572c
JH
13265 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13266 {
635559ab 13267 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
13268 emit_move_insn (tmp, operands[3]);
13269 operands[3] = tmp;
13270 }
13271 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13272 {
635559ab 13273 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
13274 emit_move_insn (tmp, operands[2]);
13275 operands[2] = tmp;
13276 }
4977bab6 13277
c9682caf 13278 if (! register_operand (operands[2], VOIDmode)
b96a374d 13279 && (mode == QImode
4977bab6 13280 || ! register_operand (operands[3], VOIDmode)))
635559ab 13281 operands[2] = force_reg (mode, operands[2]);
a1b8572c 13282
4977bab6
ZW
13283 if (mode == QImode
13284 && ! register_operand (operands[3], VOIDmode))
13285 operands[3] = force_reg (mode, operands[3]);
13286
e075ae69
RH
13287 emit_insn (compare_seq);
13288 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 13289 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
13290 compare_op, operands[2],
13291 operands[3])));
a1b8572c 13292 if (bypass_test)
4977bab6 13293 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 13294 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 13295 bypass_test,
4977bab6
ZW
13296 copy_rtx (operands[3]),
13297 copy_rtx (operands[0]))));
a1b8572c 13298 if (second_test)
4977bab6 13299 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 13300 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 13301 second_test,
4977bab6
ZW
13302 copy_rtx (operands[2]),
13303 copy_rtx (operands[0]))));
e075ae69
RH
13304
13305 return 1; /* DONE */
e9a25f70 13306}
e075ae69 13307
ab8efbd8
RH
13308/* Swap, force into registers, or otherwise massage the two operands
13309 to an sse comparison with a mask result. Thus we differ a bit from
13310 ix86_prepare_fp_compare_args which expects to produce a flags result.
13311
13312 The DEST operand exists to help determine whether to commute commutative
13313 operators. The POP0/POP1 operands are updated in place. The new
13314 comparison code is returned, or UNKNOWN if not implementable. */
13315
13316static enum rtx_code
13317ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13318 rtx *pop0, rtx *pop1)
13319{
13320 rtx tmp;
13321
13322 switch (code)
13323 {
13324 case LTGT:
13325 case UNEQ:
13326 /* We have no LTGT as an operator. We could implement it with
13327 NE & ORDERED, but this requires an extra temporary. It's
13328 not clear that it's worth it. */
13329 return UNKNOWN;
13330
13331 case LT:
13332 case LE:
13333 case UNGT:
13334 case UNGE:
13335 /* These are supported directly. */
13336 break;
13337
13338 case EQ:
13339 case NE:
13340 case UNORDERED:
13341 case ORDERED:
13342 /* For commutative operators, try to canonicalize the destination
13343 operand to be first in the comparison - this helps reload to
13344 avoid extra moves. */
13345 if (!dest || !rtx_equal_p (dest, *pop1))
13346 break;
13347 /* FALLTHRU */
13348
13349 case GE:
13350 case GT:
13351 case UNLE:
13352 case UNLT:
13353 /* These are not supported directly. Swap the comparison operands
13354 to transform into something that is supported. */
13355 tmp = *pop0;
13356 *pop0 = *pop1;
13357 *pop1 = tmp;
13358 code = swap_condition (code);
13359 break;
13360
13361 default:
13362 gcc_unreachable ();
13363 }
13364
13365 return code;
13366}
13367
13368/* Detect conditional moves that exactly match min/max operational
13369 semantics. Note that this is IEEE safe, as long as we don't
13370 interchange the operands.
13371
13372 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13373 and TRUE if the operation is successful and instructions are emitted. */
13374
13375static bool
13376ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13377 rtx cmp_op1, rtx if_true, rtx if_false)
13378{
13379 enum machine_mode mode;
13380 bool is_min;
13381 rtx tmp;
13382
13383 if (code == LT)
13384 ;
13385 else if (code == UNGE)
13386 {
13387 tmp = if_true;
13388 if_true = if_false;
13389 if_false = tmp;
13390 }
13391 else
13392 return false;
13393
13394 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13395 is_min = true;
13396 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13397 is_min = false;
13398 else
13399 return false;
13400
13401 mode = GET_MODE (dest);
13402
13403 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13404 but MODE may be a vector mode and thus not appropriate. */
13405 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13406 {
13407 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13408 rtvec v;
13409
13410 if_true = force_reg (mode, if_true);
13411 v = gen_rtvec (2, if_true, if_false);
13412 tmp = gen_rtx_UNSPEC (mode, v, u);
13413 }
13414 else
13415 {
13416 code = is_min ? SMIN : SMAX;
13417 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13418 }
13419
13420 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13421 return true;
13422}
13423
ae46a07a
RH
13424/* Expand an sse vector comparison. Return the register with the result. */
13425
13426static rtx
13427ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13428 rtx op_true, rtx op_false)
ab8efbd8
RH
13429{
13430 enum machine_mode mode = GET_MODE (dest);
ae46a07a 13431 rtx x;
ab8efbd8
RH
13432
13433 cmp_op0 = force_reg (mode, cmp_op0);
13434 if (!nonimmediate_operand (cmp_op1, mode))
13435 cmp_op1 = force_reg (mode, cmp_op1);
13436
13437 if (optimize
13438 || reg_overlap_mentioned_p (dest, op_true)
13439 || reg_overlap_mentioned_p (dest, op_false))
ae46a07a 13440 dest = gen_reg_rtx (mode);
ab8efbd8
RH
13441
13442 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
ae46a07a
RH
13443 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13444
13445 return dest;
13446}
13447
13448/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13449 operations. This is used for both scalar and vector conditional moves. */
13450
13451static void
13452ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13453{
13454 enum machine_mode mode = GET_MODE (dest);
13455 rtx t2, t3, x;
ab8efbd8 13456
71d46ca5 13457 if (op_false == CONST0_RTX (mode))
ab8efbd8
RH
13458 {
13459 op_true = force_reg (mode, op_true);
ae46a07a 13460 x = gen_rtx_AND (mode, cmp, op_true);
ab8efbd8
RH
13461 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13462 }
13463 else if (op_true == CONST0_RTX (mode))
13464 {
13465 op_false = force_reg (mode, op_false);
ae46a07a 13466 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
13467 x = gen_rtx_AND (mode, x, op_false);
13468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13469 }
71d46ca5
MM
13470 else if (TARGET_SSE5)
13471 {
13472 rtx pcmov = gen_rtx_SET (mode, dest,
13473 gen_rtx_IF_THEN_ELSE (mode, cmp,
13474 op_true,
13475 op_false));
13476 emit_insn (pcmov);
13477 }
ab8efbd8
RH
13478 else
13479 {
13480 op_true = force_reg (mode, op_true);
13481 op_false = force_reg (mode, op_false);
13482
13483 t2 = gen_reg_rtx (mode);
13484 if (optimize)
13485 t3 = gen_reg_rtx (mode);
13486 else
13487 t3 = dest;
13488
ae46a07a 13489 x = gen_rtx_AND (mode, op_true, cmp);
ab8efbd8
RH
13490 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13491
ae46a07a 13492 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
13493 x = gen_rtx_AND (mode, x, op_false);
13494 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13495
13496 x = gen_rtx_IOR (mode, t3, t2);
13497 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13498 }
13499}
13500
ae46a07a
RH
13501/* Expand a floating-point conditional move. Return true if successful. */
13502
32b5b1aa 13503int
b96a374d 13504ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 13505{
eaa49b49
RH
13506 enum machine_mode mode = GET_MODE (operands[0]);
13507 enum rtx_code code = GET_CODE (operands[1]);
13508 rtx tmp, compare_op, second_test, bypass_test;
13509
13510 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13511 {
ab8efbd8 13512 enum machine_mode cmode;
eaa49b49
RH
13513
13514 /* Since we've no cmove for sse registers, don't force bad register
13515 allocation just to gain access to it. Deny movcc when the
13516 comparison mode doesn't match the move mode. */
ab8efbd8 13517 cmode = GET_MODE (ix86_compare_op0);
eaa49b49 13518 if (cmode == VOIDmode)
ab8efbd8 13519 cmode = GET_MODE (ix86_compare_op1);
eaa49b49
RH
13520 if (cmode != mode)
13521 return 0;
13522
ab8efbd8
RH
13523 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13524 &ix86_compare_op0,
13525 &ix86_compare_op1);
13526 if (code == UNKNOWN)
51d7bae6
RH
13527 return 0;
13528
ab8efbd8
RH
13529 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13530 ix86_compare_op1, operands[2],
13531 operands[3]))
13532 return 1;
eaa49b49 13533
ae46a07a
RH
13534 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13535 ix86_compare_op1, operands[2], operands[3]);
13536 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
0073023d
JH
13537 return 1;
13538 }
13539
e075ae69 13540 /* The floating point conditional move instructions don't directly
0f290768 13541 support conditions resulting from a signed integer comparison. */
32b5b1aa 13542
a1b8572c 13543 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
13544
13545 /* The floating point conditional move instructions don't directly
13546 support signed integer comparisons. */
13547
a1b8572c 13548 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 13549 {
d0396b79 13550 gcc_assert (!second_test && !bypass_test);
e075ae69 13551 tmp = gen_reg_rtx (QImode);
3a3677ff 13552 ix86_expand_setcc (code, tmp);
e075ae69
RH
13553 code = NE;
13554 ix86_compare_op0 = tmp;
13555 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
13556 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13557 }
13558 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13559 {
eaa49b49 13560 tmp = gen_reg_rtx (mode);
a1b8572c
JH
13561 emit_move_insn (tmp, operands[3]);
13562 operands[3] = tmp;
13563 }
13564 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13565 {
eaa49b49 13566 tmp = gen_reg_rtx (mode);
a1b8572c
JH
13567 emit_move_insn (tmp, operands[2]);
13568 operands[2] = tmp;
e075ae69 13569 }
e9a25f70 13570
e075ae69 13571 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
13572 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13573 operands[2], operands[3])));
a1b8572c
JH
13574 if (bypass_test)
13575 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
13576 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13577 operands[3], operands[0])));
a1b8572c
JH
13578 if (second_test)
13579 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
13580 gen_rtx_IF_THEN_ELSE (mode, second_test,
13581 operands[2], operands[0])));
32b5b1aa 13582
e075ae69 13583 return 1;
32b5b1aa
SC
13584}
13585
ae46a07a
RH
13586/* Expand a floating-point vector conditional move; a vcond operation
13587 rather than a movcc operation. */
13588
13589bool
13590ix86_expand_fp_vcond (rtx operands[])
13591{
13592 enum rtx_code code = GET_CODE (operands[3]);
13593 rtx cmp;
13594
13595 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13596 &operands[4], &operands[5]);
13597 if (code == UNKNOWN)
13598 return false;
13599
13600 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13601 operands[5], operands[1], operands[2]))
13602 return true;
13603
13604 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13605 operands[1], operands[2]);
13606 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13607 return true;
13608}
13609
3b8dd071 13610/* Expand a signed/unsigned integral vector conditional move. */
ae46a07a
RH
13611
13612bool
9fb93f89 13613ix86_expand_int_vcond (rtx operands[])
ae46a07a
RH
13614{
13615 enum machine_mode mode = GET_MODE (operands[0]);
13616 enum rtx_code code = GET_CODE (operands[3]);
9fb93f89
RH
13617 bool negate = false;
13618 rtx x, cop0, cop1;
ae46a07a 13619
9fb93f89
RH
13620 cop0 = operands[4];
13621 cop1 = operands[5];
13622
71d46ca5
MM
13623 /* SSE5 supports all of the comparisons on all vector int types. */
13624 if (!TARGET_SSE5)
3b8dd071 13625 {
71d46ca5 13626 /* Canonicalize the comparison to EQ, GT, GTU. */
3b8dd071
L
13627 switch (code)
13628 {
13629 case EQ:
3b8dd071
L
13630 case GT:
13631 case GTU:
71d46ca5
MM
13632 break;
13633
13634 case NE:
13635 case LE:
13636 case LEU:
13637 code = reverse_condition (code);
13638 negate = true;
13639 break;
13640
13641 case GE:
13642 case GEU:
13643 code = reverse_condition (code);
13644 negate = true;
13645 /* FALLTHRU */
13646
13647 case LT:
13648 case LTU:
13649 code = swap_condition (code);
13650 x = cop0, cop0 = cop1, cop1 = x;
3b8dd071
L
13651 break;
13652
13653 default:
13654 gcc_unreachable ();
13655 }
3b8dd071 13656
71d46ca5
MM
13657 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13658 if (mode == V2DImode)
13659 {
13660 switch (code)
13661 {
13662 case EQ:
13663 /* SSE4.1 supports EQ. */
13664 if (!TARGET_SSE4_1)
13665 return false;
13666 break;
55b2de75 13667
71d46ca5
MM
13668 case GT:
13669 case GTU:
13670 /* SSE4.2 supports GT/GTU. */
13671 if (!TARGET_SSE4_2)
13672 return false;
13673 break;
13674
13675 default:
13676 gcc_unreachable ();
13677 }
13678 }
13679
13680 /* Unsigned parallel compare is not supported by the hardware. Play some
13681 tricks to turn this into a signed comparison against 0. */
13682 if (code == GTU)
ae46a07a 13683 {
71d46ca5 13684 cop0 = force_reg (mode, cop0);
9fb93f89 13685
71d46ca5
MM
13686 switch (mode)
13687 {
13688 case V4SImode:
13689 case V2DImode:
13690 {
13691 rtx t1, t2, mask;
13692
13693 /* Perform a parallel modulo subtraction. */
13694 t1 = gen_reg_rtx (mode);
13695 emit_insn ((mode == V4SImode
13696 ? gen_subv4si3
13697 : gen_subv2di3) (t1, cop0, cop1));
13698
13699 /* Extract the original sign bit of op0. */
13700 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13701 true, false);
13702 t2 = gen_reg_rtx (mode);
13703 emit_insn ((mode == V4SImode
13704 ? gen_andv4si3
13705 : gen_andv2di3) (t2, cop0, mask));
13706
13707 /* XOR it back into the result of the subtraction. This results
13708 in the sign bit set iff we saw unsigned underflow. */
13709 x = gen_reg_rtx (mode);
13710 emit_insn ((mode == V4SImode
13711 ? gen_xorv4si3
13712 : gen_xorv2di3) (x, t1, t2));
13713
13714 code = GT;
13715 }
13716 break;
9fb93f89 13717
71d46ca5
MM
13718 case V16QImode:
13719 case V8HImode:
13720 /* Perform a parallel unsigned saturating subtraction. */
13721 x = gen_reg_rtx (mode);
13722 emit_insn (gen_rtx_SET (VOIDmode, x,
13723 gen_rtx_US_MINUS (mode, cop0, cop1)));
9fb93f89 13724
71d46ca5
MM
13725 code = EQ;
13726 negate = !negate;
13727 break;
13728
13729 default:
13730 gcc_unreachable ();
13731 }
ae46a07a 13732
71d46ca5
MM
13733 cop0 = x;
13734 cop1 = CONST0_RTX (mode);
13735 }
ae46a07a 13736 }
ae46a07a 13737
9fb93f89
RH
13738 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13739 operands[1+negate], operands[2-negate]);
13740
13741 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13742 operands[2-negate]);
ae46a07a
RH
13743 return true;
13744}
13745
89d67cca
DN
13746/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13747 true if we should do zero extension, else sign extension. HIGH_P is
13748 true if we want the N/2 high elements, else the low elements. */
13749
13750void
13751ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13752{
13753 enum machine_mode imode = GET_MODE (operands[1]);
13754 rtx (*unpack)(rtx, rtx, rtx);
13755 rtx se, dest;
13756
13757 switch (imode)
13758 {
13759 case V16QImode:
13760 if (high_p)
13761 unpack = gen_vec_interleave_highv16qi;
13762 else
13763 unpack = gen_vec_interleave_lowv16qi;
13764 break;
13765 case V8HImode:
13766 if (high_p)
13767 unpack = gen_vec_interleave_highv8hi;
13768 else
13769 unpack = gen_vec_interleave_lowv8hi;
13770 break;
13771 case V4SImode:
13772 if (high_p)
13773 unpack = gen_vec_interleave_highv4si;
54a88090 13774 else
89d67cca
DN
13775 unpack = gen_vec_interleave_lowv4si;
13776 break;
13777 default:
54a88090 13778 gcc_unreachable ();
89d67cca
DN
13779 }
13780
13781 dest = gen_lowpart (imode, operands[0]);
13782
13783 if (unsigned_p)
13784 se = force_reg (imode, CONST0_RTX (imode));
13785 else
13786 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13787 operands[1], pc_rtx, pc_rtx);
13788
13789 emit_insn (unpack (dest, operands[1], se));
13790}
13791
e5ac0b9b
L
13792/* This function performs the same task as ix86_expand_sse_unpack,
13793 but with SSE4.1 instructions. */
13794
13795void
13796ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13797{
13798 enum machine_mode imode = GET_MODE (operands[1]);
13799 rtx (*unpack)(rtx, rtx);
13800 rtx src, dest;
13801
13802 switch (imode)
13803 {
13804 case V16QImode:
13805 if (unsigned_p)
13806 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13807 else
13808 unpack = gen_sse4_1_extendv8qiv8hi2;
13809 break;
13810 case V8HImode:
13811 if (unsigned_p)
13812 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13813 else
13814 unpack = gen_sse4_1_extendv4hiv4si2;
13815 break;
13816 case V4SImode:
13817 if (unsigned_p)
13818 unpack = gen_sse4_1_zero_extendv2siv2di2;
13819 else
13820 unpack = gen_sse4_1_extendv2siv2di2;
13821 break;
13822 default:
13823 gcc_unreachable ();
13824 }
13825
13826 dest = operands[0];
13827 if (high_p)
13828 {
13829 /* Shift higher 8 bytes to lower 8 bytes. */
13830 src = gen_reg_rtx (imode);
13831 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13832 gen_lowpart (TImode, operands[1]),
13833 GEN_INT (64)));
13834 }
13835 else
13836 src = operands[1];
13837
13838 emit_insn (unpack (dest, src));
13839}
13840
04e1d06b 13841/* This function performs the same task as ix86_expand_sse_unpack,
71d46ca5 13842 but with sse5 instructions. */
04e1d06b
MM
13843
13844void
13845ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13846{
13847 enum machine_mode imode = GET_MODE (operands[1]);
13848 int pperm_bytes[16];
13849 int i;
13850 int h = (high_p) ? 8 : 0;
13851 int h2;
13852 int sign_extend;
13853 rtvec v = rtvec_alloc (16);
13854 rtvec vs;
13855 rtx x, p;
13856 rtx op0 = operands[0], op1 = operands[1];
13857
13858 switch (imode)
13859 {
13860 case V16QImode:
13861 vs = rtvec_alloc (8);
13862 h2 = (high_p) ? 8 : 0;
13863 for (i = 0; i < 8; i++)
13864 {
13865 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13866 pperm_bytes[2*i+1] = ((unsigned_p)
13867 ? PPERM_ZERO
13868 : PPERM_SIGN | PPERM_SRC2 | i | h);
13869 }
13870
13871 for (i = 0; i < 16; i++)
13872 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13873
13874 for (i = 0; i < 8; i++)
13875 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13876
13877 p = gen_rtx_PARALLEL (VOIDmode, vs);
13878 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13879 if (unsigned_p)
13880 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13881 else
13882 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13883 break;
13884
13885 case V8HImode:
13886 vs = rtvec_alloc (4);
13887 h2 = (high_p) ? 4 : 0;
13888 for (i = 0; i < 4; i++)
13889 {
13890 sign_extend = ((unsigned_p)
13891 ? PPERM_ZERO
13892 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13893 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13894 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13895 pperm_bytes[4*i+2] = sign_extend;
13896 pperm_bytes[4*i+3] = sign_extend;
13897 }
13898
13899 for (i = 0; i < 16; i++)
13900 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13901
13902 for (i = 0; i < 4; i++)
13903 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13904
13905 p = gen_rtx_PARALLEL (VOIDmode, vs);
13906 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13907 if (unsigned_p)
13908 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13909 else
13910 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13911 break;
13912
13913 case V4SImode:
13914 vs = rtvec_alloc (2);
13915 h2 = (high_p) ? 2 : 0;
13916 for (i = 0; i < 2; i++)
13917 {
13918 sign_extend = ((unsigned_p)
13919 ? PPERM_ZERO
13920 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13921 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13922 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13923 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13924 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13925 pperm_bytes[8*i+4] = sign_extend;
13926 pperm_bytes[8*i+5] = sign_extend;
13927 pperm_bytes[8*i+6] = sign_extend;
13928 pperm_bytes[8*i+7] = sign_extend;
13929 }
13930
13931 for (i = 0; i < 16; i++)
13932 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13933
53a83348 13934 for (i = 0; i < 2; i++)
04e1d06b
MM
13935 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13936
13937 p = gen_rtx_PARALLEL (VOIDmode, vs);
13938 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13939 if (unsigned_p)
53a83348 13940 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
04e1d06b 13941 else
53a83348 13942 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
04e1d06b
MM
13943 break;
13944
13945 default:
13946 gcc_unreachable ();
13947 }
13948
13949 return;
13950}
13951
13952/* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13953 next narrower integer vector type */
13954void
13955ix86_expand_sse5_pack (rtx operands[3])
13956{
13957 enum machine_mode imode = GET_MODE (operands[0]);
13958 int pperm_bytes[16];
13959 int i;
13960 rtvec v = rtvec_alloc (16);
13961 rtx x;
13962 rtx op0 = operands[0];
13963 rtx op1 = operands[1];
13964 rtx op2 = operands[2];
13965
13966 switch (imode)
13967 {
13968 case V16QImode:
13969 for (i = 0; i < 8; i++)
13970 {
13971 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13972 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13973 }
13974
13975 for (i = 0; i < 16; i++)
13976 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13977
13978 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13979 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13980 break;
13981
13982 case V8HImode:
13983 for (i = 0; i < 4; i++)
13984 {
13985 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13986 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13987 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13988 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13989 }
13990
13991 for (i = 0; i < 16; i++)
13992 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13993
13994 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13995 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13996 break;
13997
13998 case V4SImode:
13999 for (i = 0; i < 2; i++)
14000 {
14001 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
14002 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
14003 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
14004 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
14005 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
14006 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
14007 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
14008 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
14009 }
14010
14011 for (i = 0; i < 16; i++)
14012 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14013
14014 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14015 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14016 break;
14017
14018 default:
14019 gcc_unreachable ();
14020 }
14021
14022 return;
14023}
14024
7b52eede
JH
14025/* Expand conditional increment or decrement using adb/sbb instructions.
14026 The default case using setcc followed by the conditional move can be
14027 done by generic code. */
14028int
b96a374d 14029ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
14030{
14031 enum rtx_code code = GET_CODE (operands[1]);
14032 rtx compare_op;
14033 rtx val = const0_rtx;
e6e81735 14034 bool fpcmp = false;
e6e81735 14035 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
14036
14037 if (operands[3] != const1_rtx
14038 && operands[3] != constm1_rtx)
14039 return 0;
14040 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14041 ix86_compare_op1, &compare_op))
14042 return 0;
e6e81735
JH
14043 code = GET_CODE (compare_op);
14044
14045 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14046 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14047 {
14048 fpcmp = true;
14049 code = ix86_fp_compare_code_to_integer (code);
14050 }
14051
14052 if (code != LTU)
14053 {
14054 val = constm1_rtx;
14055 if (fpcmp)
14056 PUT_CODE (compare_op,
14057 reverse_condition_maybe_unordered
14058 (GET_CODE (compare_op)));
14059 else
14060 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14061 }
14062 PUT_MODE (compare_op, mode);
14063
14064 /* Construct either adc or sbb insn. */
14065 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
14066 {
14067 switch (GET_MODE (operands[0]))
14068 {
14069 case QImode:
e6e81735 14070 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
14071 break;
14072 case HImode:
e6e81735 14073 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
14074 break;
14075 case SImode:
e6e81735 14076 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
14077 break;
14078 case DImode:
e6e81735 14079 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
14080 break;
14081 default:
d0396b79 14082 gcc_unreachable ();
7b52eede
JH
14083 }
14084 }
14085 else
14086 {
14087 switch (GET_MODE (operands[0]))
14088 {
14089 case QImode:
e6e81735 14090 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
14091 break;
14092 case HImode:
e6e81735 14093 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
14094 break;
14095 case SImode:
e6e81735 14096 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
14097 break;
14098 case DImode:
e6e81735 14099 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
14100 break;
14101 default:
d0396b79 14102 gcc_unreachable ();
7b52eede
JH
14103 }
14104 }
14105 return 1; /* DONE */
14106}
14107
14108
2450a057
JH
14109/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
14110 works for floating pointer parameters and nonoffsetable memories.
14111 For pushes, it returns just stack offsets; the values will be saved
14112 in the right order. Maximally three parts are generated. */
14113
2b589241 14114static int
b96a374d 14115ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 14116{
26e5b205
JH
14117 int size;
14118
14119 if (!TARGET_64BIT)
f8a1ebc6 14120 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
14121 else
14122 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 14123
7656aee4 14124 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
d0396b79 14125 gcc_assert (size >= 2 && size <= 3);
2450a057 14126
f996902d
RH
14127 /* Optimize constant pool reference to immediates. This is used by fp
14128 moves, that force all constants to memory to allow combining. */
7656aee4 14129 if (MEM_P (operand) && MEM_READONLY_P (operand))
f996902d
RH
14130 {
14131 rtx tmp = maybe_get_pool_constant (operand);
14132 if (tmp)
14133 operand = tmp;
14134 }
d7a29404 14135
7656aee4 14136 if (MEM_P (operand) && !offsettable_memref_p (operand))
e075ae69 14137 {
2450a057 14138 /* The only non-offsetable memories we handle are pushes. */
d0396b79 14139 int ok = push_operand (operand, VOIDmode);
5656a184 14140
d0396b79 14141 gcc_assert (ok);
5656a184 14142
26e5b205
JH
14143 operand = copy_rtx (operand);
14144 PUT_MODE (operand, Pmode);
2450a057 14145 parts[0] = parts[1] = parts[2] = operand;
b4e82619 14146 return size;
2450a057 14147 }
b4e82619
RH
14148
14149 if (GET_CODE (operand) == CONST_VECTOR)
14150 {
14151 enum machine_mode imode = int_mode_for_mode (mode);
bd08db74
RH
14152 /* Caution: if we looked through a constant pool memory above,
14153 the operand may actually have a different mode now. That's
14154 ok, since we want to pun this all the way back to an integer. */
14155 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
b4e82619
RH
14156 gcc_assert (operand != NULL);
14157 mode = imode;
14158 }
14159
14160 if (!TARGET_64BIT)
2450a057
JH
14161 {
14162 if (mode == DImode)
14163 split_di (&operand, 1, &parts[0], &parts[1]);
14164 else
e075ae69 14165 {
2450a057
JH
14166 if (REG_P (operand))
14167 {
d0396b79 14168 gcc_assert (reload_completed);
2450a057
JH
14169 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
14170 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
14171 if (size == 3)
14172 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
14173 }
14174 else if (offsettable_memref_p (operand))
14175 {
f4ef873c 14176 operand = adjust_address (operand, SImode, 0);
2450a057 14177 parts[0] = operand;
b72f00af 14178 parts[1] = adjust_address (operand, SImode, 4);
2450a057 14179 if (size == 3)
b72f00af 14180 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
14181 }
14182 else if (GET_CODE (operand) == CONST_DOUBLE)
14183 {
14184 REAL_VALUE_TYPE r;
2b589241 14185 long l[4];
2450a057
JH
14186
14187 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14188 switch (mode)
14189 {
14190 case XFmode:
14191 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 14192 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
14193 break;
14194 case DFmode:
14195 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14196 break;
14197 default:
d0396b79 14198 gcc_unreachable ();
2450a057 14199 }
d8bf17f9
LB
14200 parts[1] = gen_int_mode (l[1], SImode);
14201 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
14202 }
14203 else
d0396b79 14204 gcc_unreachable ();
e075ae69 14205 }
2450a057 14206 }
26e5b205
JH
14207 else
14208 {
44cf5b6a
JH
14209 if (mode == TImode)
14210 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
14211 if (mode == XFmode || mode == TFmode)
14212 {
f8a1ebc6 14213 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
14214 if (REG_P (operand))
14215 {
d0396b79 14216 gcc_assert (reload_completed);
26e5b205 14217 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 14218 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
14219 }
14220 else if (offsettable_memref_p (operand))
14221 {
b72f00af 14222 operand = adjust_address (operand, DImode, 0);
26e5b205 14223 parts[0] = operand;
f8a1ebc6 14224 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
14225 }
14226 else if (GET_CODE (operand) == CONST_DOUBLE)
14227 {
14228 REAL_VALUE_TYPE r;
38606553 14229 long l[4];
26e5b205
JH
14230
14231 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 14232 real_to_target (l, &r, mode);
38606553 14233
26e5b205
JH
14234 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14235 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 14236 parts[0]
d8bf17f9 14237 = gen_int_mode
44cf5b6a 14238 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 14239 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 14240 DImode);
26e5b205
JH
14241 else
14242 parts[0] = immed_double_const (l[0], l[1], DImode);
38606553 14243
f8a1ebc6
JH
14244 if (upper_mode == SImode)
14245 parts[1] = gen_int_mode (l[2], SImode);
14246 else if (HOST_BITS_PER_WIDE_INT >= 64)
14247 parts[1]
14248 = gen_int_mode
14249 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14250 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14251 DImode);
14252 else
14253 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
14254 }
14255 else
d0396b79 14256 gcc_unreachable ();
26e5b205
JH
14257 }
14258 }
2450a057 14259
2b589241 14260 return size;
2450a057
JH
14261}
14262
14263/* Emit insns to perform a move or push of DI, DF, and XF values.
14264 Return false when normal moves are needed; true when all required
14265 insns have been emitted. Operands 2-4 contain the input values
14266 int the correct order; operands 5-7 contain the output values. */
14267
26e5b205 14268void
b96a374d 14269ix86_split_long_move (rtx operands[])
2450a057
JH
14270{
14271 rtx part[2][3];
26e5b205 14272 int nparts;
2450a057
JH
14273 int push = 0;
14274 int collisions = 0;
26e5b205
JH
14275 enum machine_mode mode = GET_MODE (operands[0]);
14276
14277 /* The DFmode expanders may ask us to move double.
14278 For 64bit target this is single move. By hiding the fact
14279 here we simplify i386.md splitters. */
14280 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14281 {
8cdfa312
RH
14282 /* Optimize constant pool reference to immediates. This is used by
14283 fp moves, that force all constants to memory to allow combining. */
26e5b205 14284
7656aee4 14285 if (MEM_P (operands[1])
26e5b205
JH
14286 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14287 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14288 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14289 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
14290 {
14291 operands[0] = copy_rtx (operands[0]);
14292 PUT_MODE (operands[0], Pmode);
14293 }
26e5b205
JH
14294 else
14295 operands[0] = gen_lowpart (DImode, operands[0]);
14296 operands[1] = gen_lowpart (DImode, operands[1]);
14297 emit_move_insn (operands[0], operands[1]);
14298 return;
14299 }
2450a057 14300
2450a057
JH
14301 /* The only non-offsettable memory we handle is push. */
14302 if (push_operand (operands[0], VOIDmode))
14303 push = 1;
d0396b79 14304 else
7656aee4 14305 gcc_assert (!MEM_P (operands[0])
d0396b79 14306 || offsettable_memref_p (operands[0]));
2450a057 14307
26e5b205
JH
14308 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14309 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
14310
14311 /* When emitting push, take care for source operands on the stack. */
7656aee4 14312 if (push && MEM_P (operands[1])
2450a057
JH
14313 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14314 {
26e5b205 14315 if (nparts == 3)
886cbb88
JH
14316 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14317 XEXP (part[1][2], 0));
14318 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14319 XEXP (part[1][1], 0));
2450a057
JH
14320 }
14321
0f290768 14322 /* We need to do copy in the right order in case an address register
2450a057 14323 of the source overlaps the destination. */
7656aee4 14324 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
2450a057
JH
14325 {
14326 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14327 collisions++;
14328 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14329 collisions++;
26e5b205 14330 if (nparts == 3
2450a057
JH
14331 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14332 collisions++;
14333
14334 /* Collision in the middle part can be handled by reordering. */
26e5b205 14335 if (collisions == 1 && nparts == 3
2450a057 14336 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 14337 {
2450a057
JH
14338 rtx tmp;
14339 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14340 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14341 }
e075ae69 14342
2450a057
JH
14343 /* If there are more collisions, we can't handle it by reordering.
14344 Do an lea to the last part and use only one colliding move. */
14345 else if (collisions > 1)
14346 {
8231b3f9
RH
14347 rtx base;
14348
2450a057 14349 collisions = 1;
8231b3f9
RH
14350
14351 base = part[0][nparts - 1];
14352
14353 /* Handle the case when the last part isn't valid for lea.
14354 Happens in 64-bit mode storing the 12-byte XFmode. */
14355 if (GET_MODE (base) != Pmode)
14356 base = gen_rtx_REG (Pmode, REGNO (base));
14357
14358 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14359 part[1][0] = replace_equiv_address (part[1][0], base);
14360 part[1][1] = replace_equiv_address (part[1][1],
14361 plus_constant (base, UNITS_PER_WORD));
26e5b205 14362 if (nparts == 3)
8231b3f9
RH
14363 part[1][2] = replace_equiv_address (part[1][2],
14364 plus_constant (base, 8));
2450a057
JH
14365 }
14366 }
14367
14368 if (push)
14369 {
26e5b205 14370 if (!TARGET_64BIT)
2b589241 14371 {
26e5b205
JH
14372 if (nparts == 3)
14373 {
f8a1ebc6
JH
14374 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14375 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
14376 emit_move_insn (part[0][2], part[1][2]);
14377 }
2b589241 14378 }
26e5b205
JH
14379 else
14380 {
14381 /* In 64bit mode we don't have 32bit push available. In case this is
14382 register, it is OK - we will just use larger counterpart. We also
14383 retype memory - these comes from attempt to avoid REX prefix on
14384 moving of second half of TFmode value. */
14385 if (GET_MODE (part[1][1]) == SImode)
14386 {
d0396b79
NS
14387 switch (GET_CODE (part[1][1]))
14388 {
14389 case MEM:
14390 part[1][1] = adjust_address (part[1][1], DImode, 0);
14391 break;
14392
14393 case REG:
14394 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14395 break;
14396
14397 default:
14398 gcc_unreachable ();
14399 }
5656a184 14400
886cbb88
JH
14401 if (GET_MODE (part[1][0]) == SImode)
14402 part[1][0] = part[1][1];
26e5b205
JH
14403 }
14404 }
14405 emit_move_insn (part[0][1], part[1][1]);
14406 emit_move_insn (part[0][0], part[1][0]);
14407 return;
2450a057
JH
14408 }
14409
14410 /* Choose correct order to not overwrite the source before it is copied. */
14411 if ((REG_P (part[0][0])
14412 && REG_P (part[1][1])
14413 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 14414 || (nparts == 3
2450a057
JH
14415 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14416 || (collisions > 0
14417 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14418 {
26e5b205 14419 if (nparts == 3)
2450a057 14420 {
26e5b205
JH
14421 operands[2] = part[0][2];
14422 operands[3] = part[0][1];
14423 operands[4] = part[0][0];
14424 operands[5] = part[1][2];
14425 operands[6] = part[1][1];
14426 operands[7] = part[1][0];
2450a057
JH
14427 }
14428 else
14429 {
26e5b205
JH
14430 operands[2] = part[0][1];
14431 operands[3] = part[0][0];
14432 operands[5] = part[1][1];
14433 operands[6] = part[1][0];
2450a057
JH
14434 }
14435 }
14436 else
14437 {
26e5b205 14438 if (nparts == 3)
2450a057 14439 {
26e5b205
JH
14440 operands[2] = part[0][0];
14441 operands[3] = part[0][1];
14442 operands[4] = part[0][2];
14443 operands[5] = part[1][0];
14444 operands[6] = part[1][1];
14445 operands[7] = part[1][2];
2450a057
JH
14446 }
14447 else
14448 {
26e5b205
JH
14449 operands[2] = part[0][0];
14450 operands[3] = part[0][1];
14451 operands[5] = part[1][0];
14452 operands[6] = part[1][1];
e075ae69
RH
14453 }
14454 }
903a5059 14455
0e40b5f2 14456 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
903a5059
RS
14457 if (optimize_size)
14458 {
7656aee4 14459 if (CONST_INT_P (operands[5])
903a5059
RS
14460 && operands[5] != const0_rtx
14461 && REG_P (operands[2]))
14462 {
7656aee4 14463 if (CONST_INT_P (operands[6])
903a5059
RS
14464 && INTVAL (operands[6]) == INTVAL (operands[5]))
14465 operands[6] = operands[2];
14466
14467 if (nparts == 3
7656aee4 14468 && CONST_INT_P (operands[7])
903a5059
RS
14469 && INTVAL (operands[7]) == INTVAL (operands[5]))
14470 operands[7] = operands[2];
14471 }
14472
14473 if (nparts == 3
7656aee4 14474 && CONST_INT_P (operands[6])
903a5059
RS
14475 && operands[6] != const0_rtx
14476 && REG_P (operands[3])
7656aee4 14477 && CONST_INT_P (operands[7])
903a5059
RS
14478 && INTVAL (operands[7]) == INTVAL (operands[6]))
14479 operands[7] = operands[3];
14480 }
14481
26e5b205
JH
14482 emit_move_insn (operands[2], operands[5]);
14483 emit_move_insn (operands[3], operands[6]);
14484 if (nparts == 3)
14485 emit_move_insn (operands[4], operands[7]);
32b5b1aa 14486
26e5b205 14487 return;
32b5b1aa 14488}
32b5b1aa 14489
28356f52 14490/* Helper function of ix86_split_ashl used to generate an SImode/DImode
1b83d209
RS
14491 left shift by a constant, either using a single shift or
14492 a sequence of add instructions. */
14493
14494static void
28356f52 14495ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
1b83d209
RS
14496{
14497 if (count == 1)
28356f52
JB
14498 {
14499 emit_insn ((mode == DImode
14500 ? gen_addsi3
14501 : gen_adddi3) (operand, operand, operand));
14502 }
1b83d209
RS
14503 else if (!optimize_size
14504 && count * ix86_cost->add <= ix86_cost->shift_const)
14505 {
14506 int i;
14507 for (i=0; i<count; i++)
28356f52
JB
14508 {
14509 emit_insn ((mode == DImode
14510 ? gen_addsi3
14511 : gen_adddi3) (operand, operand, operand));
14512 }
1b83d209
RS
14513 }
14514 else
28356f52
JB
14515 emit_insn ((mode == DImode
14516 ? gen_ashlsi3
14517 : gen_ashldi3) (operand, operand, GEN_INT (count)));
1b83d209
RS
14518}
14519
e075ae69 14520void
28356f52 14521ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 14522{
e075ae69
RH
14523 rtx low[2], high[2];
14524 int count;
28356f52 14525 const int single_width = mode == DImode ? 32 : 64;
b985a30f 14526
7656aee4 14527 if (CONST_INT_P (operands[2]))
e075ae69 14528 {
28356f52
JB
14529 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14530 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 14531
28356f52 14532 if (count >= single_width)
e075ae69
RH
14533 {
14534 emit_move_insn (high[0], low[1]);
14535 emit_move_insn (low[0], const0_rtx);
b985a30f 14536
28356f52
JB
14537 if (count > single_width)
14538 ix86_expand_ashl_const (high[0], count - single_width, mode);
e075ae69
RH
14539 }
14540 else
14541 {
14542 if (!rtx_equal_p (operands[0], operands[1]))
14543 emit_move_insn (operands[0], operands[1]);
28356f52
JB
14544 emit_insn ((mode == DImode
14545 ? gen_x86_shld_1
14546 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14547 ix86_expand_ashl_const (low[0], count, mode);
e075ae69 14548 }
93330ea1 14549 return;
e075ae69 14550 }
93330ea1 14551
28356f52 14552 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
93330ea1
RH
14553
14554 if (operands[1] == const1_rtx)
e075ae69 14555 {
28356f52
JB
14556 /* Assuming we've chosen a QImode capable registers, then 1 << N
14557 can be done with two 32/64-bit shifts, no branches, no cmoves. */
93330ea1
RH
14558 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14559 {
14560 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
b985a30f 14561
93330ea1
RH
14562 ix86_expand_clear (low[0]);
14563 ix86_expand_clear (high[0]);
28356f52 14564 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
5656a184 14565
93330ea1
RH
14566 d = gen_lowpart (QImode, low[0]);
14567 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14568 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14569 emit_insn (gen_rtx_SET (VOIDmode, d, s));
b985a30f 14570
93330ea1
RH
14571 d = gen_lowpart (QImode, high[0]);
14572 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14573 s = gen_rtx_NE (QImode, flags, const0_rtx);
14574 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14575 }
32b5b1aa 14576
93330ea1 14577 /* Otherwise, we can get the same results by manually performing
28356f52 14578 a bit extract operation on bit 5/6, and then performing the two
93330ea1
RH
14579 shifts. The two methods of getting 0/1 into low/high are exactly
14580 the same size. Avoiding the shift in the bit extract case helps
14581 pentium4 a bit; no one else seems to care much either way. */
14582 else
e075ae69 14583 {
93330ea1
RH
14584 rtx x;
14585
14586 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
28356f52 14587 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
e075ae69 14588 else
28356f52 14589 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
93330ea1 14590 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
e075ae69 14591
28356f52
JB
14592 emit_insn ((mode == DImode
14593 ? gen_lshrsi3
14594 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14595 emit_insn ((mode == DImode
14596 ? gen_andsi3
14597 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
93330ea1 14598 emit_move_insn (low[0], high[0]);
28356f52
JB
14599 emit_insn ((mode == DImode
14600 ? gen_xorsi3
14601 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
e075ae69 14602 }
93330ea1 14603
28356f52
JB
14604 emit_insn ((mode == DImode
14605 ? gen_ashlsi3
14606 : gen_ashldi3) (low[0], low[0], operands[2]));
14607 emit_insn ((mode == DImode
14608 ? gen_ashlsi3
14609 : gen_ashldi3) (high[0], high[0], operands[2]));
93330ea1
RH
14610 return;
14611 }
14612
14613 if (operands[1] == constm1_rtx)
14614 {
28356f52
JB
14615 /* For -1 << N, we can avoid the shld instruction, because we
14616 know that we're shifting 0...31/63 ones into a -1. */
93330ea1
RH
14617 emit_move_insn (low[0], constm1_rtx);
14618 if (optimize_size)
28356f52 14619 emit_move_insn (high[0], low[0]);
e075ae69 14620 else
93330ea1 14621 emit_move_insn (high[0], constm1_rtx);
e075ae69 14622 }
93330ea1
RH
14623 else
14624 {
14625 if (!rtx_equal_p (operands[0], operands[1]))
14626 emit_move_insn (operands[0], operands[1]);
14627
28356f52
JB
14628 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14629 emit_insn ((mode == DImode
14630 ? gen_x86_shld_1
14631 : gen_x86_64_shld) (high[0], low[0], operands[2]));
93330ea1
RH
14632 }
14633
28356f52 14634 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
93330ea1
RH
14635
14636 if (TARGET_CMOVE && scratch)
14637 {
14638 ix86_expand_clear (scratch);
28356f52
JB
14639 emit_insn ((mode == DImode
14640 ? gen_x86_shift_adj_1
14641 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
93330ea1
RH
14642 }
14643 else
14644 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
e9a25f70 14645}
32b5b1aa 14646
e075ae69 14647void
28356f52 14648ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 14649{
e075ae69
RH
14650 rtx low[2], high[2];
14651 int count;
28356f52 14652 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 14653
7656aee4 14654 if (CONST_INT_P (operands[2]))
e075ae69 14655 {
28356f52
JB
14656 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14657 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 14658
28356f52 14659 if (count == single_width * 2 - 1)
8937b6a2
RS
14660 {
14661 emit_move_insn (high[0], high[1]);
28356f52
JB
14662 emit_insn ((mode == DImode
14663 ? gen_ashrsi3
14664 : gen_ashrdi3) (high[0], high[0],
14665 GEN_INT (single_width - 1)));
8937b6a2
RS
14666 emit_move_insn (low[0], high[0]);
14667
14668 }
28356f52 14669 else if (count >= single_width)
e075ae69
RH
14670 {
14671 emit_move_insn (low[0], high[1]);
93330ea1 14672 emit_move_insn (high[0], low[0]);
28356f52
JB
14673 emit_insn ((mode == DImode
14674 ? gen_ashrsi3
14675 : gen_ashrdi3) (high[0], high[0],
14676 GEN_INT (single_width - 1)));
14677 if (count > single_width)
14678 emit_insn ((mode == DImode
14679 ? gen_ashrsi3
14680 : gen_ashrdi3) (low[0], low[0],
14681 GEN_INT (count - single_width)));
e075ae69
RH
14682 }
14683 else
14684 {
14685 if (!rtx_equal_p (operands[0], operands[1]))
14686 emit_move_insn (operands[0], operands[1]);
28356f52
JB
14687 emit_insn ((mode == DImode
14688 ? gen_x86_shrd_1
14689 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14690 emit_insn ((mode == DImode
14691 ? gen_ashrsi3
14692 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69
RH
14693 }
14694 }
14695 else
32b5b1aa 14696 {
e075ae69
RH
14697 if (!rtx_equal_p (operands[0], operands[1]))
14698 emit_move_insn (operands[0], operands[1]);
14699
28356f52 14700 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 14701
28356f52
JB
14702 emit_insn ((mode == DImode
14703 ? gen_x86_shrd_1
14704 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14705 emit_insn ((mode == DImode
14706 ? gen_ashrsi3
14707 : gen_ashrdi3) (high[0], high[0], operands[2]));
e075ae69 14708
93330ea1 14709 if (TARGET_CMOVE && scratch)
e075ae69 14710 {
e075ae69 14711 emit_move_insn (scratch, high[0]);
28356f52
JB
14712 emit_insn ((mode == DImode
14713 ? gen_ashrsi3
14714 : gen_ashrdi3) (scratch, scratch,
14715 GEN_INT (single_width - 1)));
14716 emit_insn ((mode == DImode
14717 ? gen_x86_shift_adj_1
14718 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14719 scratch));
e075ae69
RH
14720 }
14721 else
14722 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 14723 }
e075ae69 14724}
32b5b1aa 14725
e075ae69 14726void
28356f52 14727ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
e075ae69
RH
14728{
14729 rtx low[2], high[2];
14730 int count;
28356f52 14731 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 14732
7656aee4 14733 if (CONST_INT_P (operands[2]))
32b5b1aa 14734 {
28356f52
JB
14735 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14736 count = INTVAL (operands[2]) & (single_width * 2 - 1);
e075ae69 14737
28356f52 14738 if (count >= single_width)
c7271385 14739 {
e075ae69 14740 emit_move_insn (low[0], high[1]);
93330ea1 14741 ix86_expand_clear (high[0]);
32b5b1aa 14742
28356f52
JB
14743 if (count > single_width)
14744 emit_insn ((mode == DImode
14745 ? gen_lshrsi3
14746 : gen_lshrdi3) (low[0], low[0],
14747 GEN_INT (count - single_width)));
e075ae69
RH
14748 }
14749 else
14750 {
14751 if (!rtx_equal_p (operands[0], operands[1]))
14752 emit_move_insn (operands[0], operands[1]);
28356f52
JB
14753 emit_insn ((mode == DImode
14754 ? gen_x86_shrd_1
14755 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14756 emit_insn ((mode == DImode
14757 ? gen_lshrsi3
14758 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69 14759 }
32b5b1aa 14760 }
e075ae69
RH
14761 else
14762 {
14763 if (!rtx_equal_p (operands[0], operands[1]))
14764 emit_move_insn (operands[0], operands[1]);
32b5b1aa 14765
28356f52 14766 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 14767
28356f52
JB
14768 emit_insn ((mode == DImode
14769 ? gen_x86_shrd_1
14770 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14771 emit_insn ((mode == DImode
14772 ? gen_lshrsi3
14773 : gen_lshrdi3) (high[0], high[0], operands[2]));
e075ae69
RH
14774
14775 /* Heh. By reversing the arguments, we can reuse this pattern. */
93330ea1 14776 if (TARGET_CMOVE && scratch)
e075ae69 14777 {
93330ea1 14778 ix86_expand_clear (scratch);
28356f52
JB
14779 emit_insn ((mode == DImode
14780 ? gen_x86_shift_adj_1
14781 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14782 scratch));
e075ae69
RH
14783 }
14784 else
14785 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14786 }
32b5b1aa 14787}
3f803cd9 14788
8c996513
JH
14789/* Predict just emitted jump instruction to be taken with probability PROB. */
14790static void
14791predict_jump (int prob)
14792{
14793 rtx insn = get_last_insn ();
7656aee4 14794 gcc_assert (JUMP_P (insn));
8c996513
JH
14795 REG_NOTES (insn)
14796 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14797 GEN_INT (prob),
14798 REG_NOTES (insn));
14799}
14800
0407c02b 14801/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
14802 it is aligned to VALUE bytes. If true, jump to the label. */
14803static rtx
8c996513 14804ix86_expand_aligntest (rtx variable, int value, bool epilogue)
0945b39d
JH
14805{
14806 rtx label = gen_label_rtx ();
14807 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14808 if (GET_MODE (variable) == DImode)
14809 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14810 else
14811 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14812 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 14813 1, label);
8c996513
JH
14814 if (epilogue)
14815 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14816 else
14817 predict_jump (REG_BR_PROB_BASE * 90 / 100);
0945b39d
JH
14818 return label;
14819}
14820
14821/* Adjust COUNTER by the VALUE. */
14822static void
b96a374d 14823ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
14824{
14825 if (GET_MODE (countreg) == DImode)
14826 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14827 else
14828 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14829}
14830
14831/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 14832rtx
b96a374d 14833ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
14834{
14835 rtx r;
14836 if (GET_MODE (exp) == VOIDmode)
14837 return force_reg (Pmode, exp);
14838 if (GET_MODE (exp) == Pmode)
14839 return copy_to_mode_reg (Pmode, exp);
14840 r = gen_reg_rtx (Pmode);
14841 emit_insn (gen_zero_extendsidi2 (r, exp));
14842 return r;
14843}
14844
8c996513
JH
14845/* Divide COUNTREG by SCALE. */
14846static rtx
14847scale_counter (rtx countreg, int scale)
0945b39d 14848{
8c996513
JH
14849 rtx sc;
14850 rtx piece_size_mask;
0945b39d 14851
8c996513
JH
14852 if (scale == 1)
14853 return countreg;
7656aee4 14854 if (CONST_INT_P (countreg))
8c996513
JH
14855 return GEN_INT (INTVAL (countreg) / scale);
14856 gcc_assert (REG_P (countreg));
0945b39d 14857
8c996513
JH
14858 piece_size_mask = GEN_INT (scale - 1);
14859 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14860 GEN_INT (exact_log2 (scale)),
14861 NULL, 1, OPTAB_DIRECT);
14862 return sc;
14863}
d0a5295a 14864
7fa7289d
KH
14865/* Return mode for the memcpy/memset loop counter. Prefer SImode over
14866 DImode for constant loop counts. */
bd8d4d19
JH
14867
14868static enum machine_mode
14869counter_mode (rtx count_exp)
14870{
14871 if (GET_MODE (count_exp) != VOIDmode)
14872 return GET_MODE (count_exp);
14873 if (GET_CODE (count_exp) != CONST_INT)
14874 return Pmode;
14875 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14876 return DImode;
14877 return SImode;
14878}
14879
8c996513
JH
14880/* When SRCPTR is non-NULL, output simple loop to move memory
14881 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14882 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14883 equivalent loop to set memory by VALUE (supposed to be in MODE).
0945b39d 14884
8c996513
JH
14885 The size is rounded down to whole number of chunk size moved at once.
14886 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
54a88090 14887
8c996513
JH
14888
14889static void
14890expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14891 rtx destptr, rtx srcptr, rtx value,
14892 rtx count, enum machine_mode mode, int unroll,
14893 int expected_size)
14894{
14895 rtx out_label, top_label, iter, tmp;
bd8d4d19 14896 enum machine_mode iter_mode = counter_mode (count);
8c996513
JH
14897 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14898 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14899 rtx size;
14900 rtx x_addr;
14901 rtx y_addr;
14902 int i;
14903
8c996513
JH
14904 top_label = gen_label_rtx ();
14905 out_label = gen_label_rtx ();
14906 iter = gen_reg_rtx (iter_mode);
14907
14908 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14909 NULL, 1, OPTAB_DIRECT);
14910 /* Those two should combine. */
14911 if (piece_size == const1_rtx)
26771da7 14912 {
8c996513
JH
14913 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14914 true, out_label);
14915 predict_jump (REG_BR_PROB_BASE * 10 / 100);
26771da7 14916 }
8c996513 14917 emit_move_insn (iter, const0_rtx);
0945b39d 14918
8c996513 14919 emit_label (top_label);
0945b39d 14920
8c996513
JH
14921 tmp = convert_modes (Pmode, iter_mode, iter, true);
14922 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14923 destmem = change_address (destmem, mode, x_addr);
0945b39d 14924
8c996513
JH
14925 if (srcmem)
14926 {
14927 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14928 srcmem = change_address (srcmem, mode, y_addr);
4e44c1ef 14929
8c996513 14930 /* When unrolling for chips that reorder memory reads and writes,
54a88090 14931 we can save registers by using single temporary.
8c996513
JH
14932 Also using 4 temporaries is overkill in 32bit mode. */
14933 if (!TARGET_64BIT && 0)
14934 {
14935 for (i = 0; i < unroll; i++)
14936 {
14937 if (i)
14938 {
14939 destmem =
14940 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14941 srcmem =
14942 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14943 }
14944 emit_move_insn (destmem, srcmem);
14945 }
14946 }
14947 else
14948 {
14949 rtx tmpreg[4];
14950 gcc_assert (unroll <= 4);
14951 for (i = 0; i < unroll; i++)
14952 {
14953 tmpreg[i] = gen_reg_rtx (mode);
14954 if (i)
95935e2d 14955 {
8c996513
JH
14956 srcmem =
14957 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
95935e2d 14958 }
8c996513
JH
14959 emit_move_insn (tmpreg[i], srcmem);
14960 }
14961 for (i = 0; i < unroll; i++)
14962 {
14963 if (i)
14964 {
14965 destmem =
14966 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14967 }
14968 emit_move_insn (destmem, tmpreg[i]);
14969 }
14970 }
14971 }
14972 else
14973 for (i = 0; i < unroll; i++)
14974 {
14975 if (i)
14976 destmem =
14977 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14978 emit_move_insn (destmem, value);
14979 }
14980
14981 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14982 true, OPTAB_LIB_WIDEN);
14983 if (tmp != iter)
14984 emit_move_insn (iter, tmp);
14985
14986 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14987 true, top_label);
14988 if (expected_size != -1)
14989 {
14990 expected_size /= GET_MODE_SIZE (mode) * unroll;
14991 if (expected_size == 0)
14992 predict_jump (0);
14993 else if (expected_size > REG_BR_PROB_BASE)
14994 predict_jump (REG_BR_PROB_BASE - 1);
14995 else
14996 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14997 }
14998 else
14999 predict_jump (REG_BR_PROB_BASE * 80 / 100);
15000 iter = ix86_zero_extend_to_Pmode (iter);
15001 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
15002 true, OPTAB_LIB_WIDEN);
15003 if (tmp != destptr)
15004 emit_move_insn (destptr, tmp);
15005 if (srcptr)
15006 {
15007 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
15008 true, OPTAB_LIB_WIDEN);
15009 if (tmp != srcptr)
15010 emit_move_insn (srcptr, tmp);
15011 }
15012 emit_label (out_label);
15013}
15014
54a88090 15015/* Output "rep; mov" instruction.
8c996513
JH
15016 Arguments have same meaning as for previous function */
15017static void
15018expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15019 rtx destptr, rtx srcptr,
15020 rtx count,
15021 enum machine_mode mode)
15022{
15023 rtx destexp;
15024 rtx srcexp;
15025 rtx countreg;
15026
15027 /* If the size is known, it is shorter to use rep movs. */
7656aee4 15028 if (mode == QImode && CONST_INT_P (count)
8c996513
JH
15029 && !(INTVAL (count) & 3))
15030 mode = SImode;
15031
15032 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15033 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15034 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15035 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15036 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15037 if (mode != QImode)
15038 {
15039 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15040 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15041 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15042 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
15043 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15044 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
15045 }
15046 else
15047 {
15048 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15049 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
15050 }
15051 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
15052 destexp, srcexp));
15053}
15054
54a88090 15055/* Output "rep; stos" instruction.
8c996513
JH
15056 Arguments have same meaning as for previous function */
15057static void
15058expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
15059 rtx count,
15060 enum machine_mode mode)
15061{
15062 rtx destexp;
15063 rtx countreg;
15064
15065 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15066 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15067 value = force_reg (mode, gen_lowpart (mode, value));
15068 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15069 if (mode != QImode)
15070 {
15071 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15072 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15073 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15074 }
15075 else
15076 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15077 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
15078}
15079
15080static void
15081emit_strmov (rtx destmem, rtx srcmem,
15082 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
15083{
15084 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
15085 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
15086 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15087}
15088
15089/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
15090static void
15091expand_movmem_epilogue (rtx destmem, rtx srcmem,
15092 rtx destptr, rtx srcptr, rtx count, int max_size)
15093{
15094 rtx src, dest;
7656aee4 15095 if (CONST_INT_P (count))
8c996513
JH
15096 {
15097 HOST_WIDE_INT countval = INTVAL (count);
15098 int offset = 0;
15099
73013054 15100 if ((countval & 0x10) && max_size > 16)
8c996513
JH
15101 {
15102 if (TARGET_64BIT)
15103 {
15104 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15105 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
95935e2d 15106 }
8c996513
JH
15107 else
15108 gcc_unreachable ();
15109 offset += 16;
15110 }
15111 if ((countval & 0x08) && max_size > 8)
15112 {
15113 if (TARGET_64BIT)
15114 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
95935e2d
DV
15115 else
15116 {
bd8d4d19
JH
15117 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15118 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
95935e2d 15119 }
8c996513 15120 offset += 8;
0945b39d 15121 }
8c996513 15122 if ((countval & 0x04) && max_size > 4)
4e44c1ef 15123 {
8c996513 15124 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
4e44c1ef
JJ
15125 offset += 4;
15126 }
8c996513 15127 if ((countval & 0x02) && max_size > 2)
4e44c1ef 15128 {
8c996513 15129 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
4e44c1ef
JJ
15130 offset += 2;
15131 }
8c996513 15132 if ((countval & 0x01) && max_size > 1)
4e44c1ef 15133 {
8c996513
JH
15134 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15135 offset += 1;
4e44c1ef 15136 }
8c996513 15137 return;
0945b39d 15138 }
8c996513 15139 if (max_size > 8)
0945b39d 15140 {
8c996513
JH
15141 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15142 count, 1, OPTAB_DIRECT);
15143 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15144 count, QImode, 1, 4);
15145 return;
15146 }
0945b39d 15147
8c996513
JH
15148 /* When there are stringops, we can cheaply increase dest and src pointers.
15149 Otherwise we save code size by maintaining offset (zero is readily
2f8e468b 15150 available from preceding rep operation) and using x86 addressing modes.
8c996513
JH
15151 */
15152 if (TARGET_SINGLE_STRINGOP)
15153 {
15154 if (max_size > 4)
0945b39d 15155 {
8c996513
JH
15156 rtx label = ix86_expand_aligntest (count, 4, true);
15157 src = change_address (srcmem, SImode, srcptr);
15158 dest = change_address (destmem, SImode, destptr);
15159 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15160 emit_label (label);
15161 LABEL_NUSES (label) = 1;
0945b39d 15162 }
8c996513 15163 if (max_size > 2)
0945b39d 15164 {
8c996513
JH
15165 rtx label = ix86_expand_aligntest (count, 2, true);
15166 src = change_address (srcmem, HImode, srcptr);
15167 dest = change_address (destmem, HImode, destptr);
15168 emit_insn (gen_strmov (destptr, dest, srcptr, src));
0945b39d
JH
15169 emit_label (label);
15170 LABEL_NUSES (label) = 1;
15171 }
8c996513 15172 if (max_size > 1)
0945b39d 15173 {
8c996513
JH
15174 rtx label = ix86_expand_aligntest (count, 1, true);
15175 src = change_address (srcmem, QImode, srcptr);
15176 dest = change_address (destmem, QImode, destptr);
15177 emit_insn (gen_strmov (destptr, dest, srcptr, src));
0945b39d
JH
15178 emit_label (label);
15179 LABEL_NUSES (label) = 1;
15180 }
8c996513
JH
15181 }
15182 else
15183 {
15184 rtx offset = force_reg (Pmode, const0_rtx);
15185 rtx tmp;
15186
15187 if (max_size > 4)
0945b39d 15188 {
8c996513
JH
15189 rtx label = ix86_expand_aligntest (count, 4, true);
15190 src = change_address (srcmem, SImode, srcptr);
15191 dest = change_address (destmem, SImode, destptr);
15192 emit_move_insn (dest, src);
15193 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15194 true, OPTAB_LIB_WIDEN);
15195 if (tmp != offset)
15196 emit_move_insn (offset, tmp);
0945b39d
JH
15197 emit_label (label);
15198 LABEL_NUSES (label) = 1;
15199 }
8c996513
JH
15200 if (max_size > 2)
15201 {
15202 rtx label = ix86_expand_aligntest (count, 2, true);
15203 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15204 src = change_address (srcmem, HImode, tmp);
15205 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15206 dest = change_address (destmem, HImode, tmp);
15207 emit_move_insn (dest, src);
15208 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15209 true, OPTAB_LIB_WIDEN);
15210 if (tmp != offset)
15211 emit_move_insn (offset, tmp);
15212 emit_label (label);
15213 LABEL_NUSES (label) = 1;
15214 }
15215 if (max_size > 1)
37ad04a5 15216 {
8c996513
JH
15217 rtx label = ix86_expand_aligntest (count, 1, true);
15218 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15219 src = change_address (srcmem, QImode, tmp);
15220 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15221 dest = change_address (destmem, QImode, tmp);
15222 emit_move_insn (dest, src);
37ad04a5
JH
15223 emit_label (label);
15224 LABEL_NUSES (label) = 1;
37ad04a5 15225 }
8c996513
JH
15226 }
15227}
15228
15229/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15230static void
15231expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15232 rtx count, int max_size)
15233{
15234 count =
bd8d4d19
JH
15235 expand_simple_binop (counter_mode (count), AND, count,
15236 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
8c996513
JH
15237 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15238 gen_lowpart (QImode, value), count, QImode,
15239 1, max_size / 2);
15240}
15241
15242/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15243static void
15244expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15245{
15246 rtx dest;
2a4f771a 15247
7656aee4 15248 if (CONST_INT_P (count))
8c996513
JH
15249 {
15250 HOST_WIDE_INT countval = INTVAL (count);
15251 int offset = 0;
15252
73013054 15253 if ((countval & 0x10) && max_size > 16)
0945b39d 15254 {
8c996513
JH
15255 if (TARGET_64BIT)
15256 {
15257 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15258 emit_insn (gen_strset (destptr, dest, value));
15259 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15260 emit_insn (gen_strset (destptr, dest, value));
15261 }
15262 else
15263 gcc_unreachable ();
15264 offset += 16;
0945b39d 15265 }
8c996513 15266 if ((countval & 0x08) && max_size > 8)
0945b39d 15267 {
8c996513
JH
15268 if (TARGET_64BIT)
15269 {
15270 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15271 emit_insn (gen_strset (destptr, dest, value));
15272 }
15273 else
15274 {
15275 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15276 emit_insn (gen_strset (destptr, dest, value));
15277 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15278 emit_insn (gen_strset (destptr, dest, value));
15279 }
15280 offset += 8;
0945b39d 15281 }
8c996513 15282 if ((countval & 0x04) && max_size > 4)
0945b39d 15283 {
8c996513
JH
15284 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15285 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15286 offset += 4;
0945b39d 15287 }
8c996513 15288 if ((countval & 0x02) && max_size > 2)
4e44c1ef 15289 {
8c996513
JH
15290 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15291 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15292 offset += 2;
4e44c1ef 15293 }
8c996513 15294 if ((countval & 0x01) && max_size > 1)
0945b39d 15295 {
8c996513
JH
15296 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15297 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15298 offset += 1;
0945b39d 15299 }
8c996513
JH
15300 return;
15301 }
15302 if (max_size > 32)
15303 {
15304 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15305 return;
15306 }
15307 if (max_size > 16)
15308 {
15309 rtx label = ix86_expand_aligntest (count, 16, true);
15310 if (TARGET_64BIT)
4e44c1ef 15311 {
8c996513
JH
15312 dest = change_address (destmem, DImode, destptr);
15313 emit_insn (gen_strset (destptr, dest, value));
15314 emit_insn (gen_strset (destptr, dest, value));
4e44c1ef 15315 }
8c996513 15316 else
0945b39d 15317 {
8c996513
JH
15318 dest = change_address (destmem, SImode, destptr);
15319 emit_insn (gen_strset (destptr, dest, value));
15320 emit_insn (gen_strset (destptr, dest, value));
15321 emit_insn (gen_strset (destptr, dest, value));
15322 emit_insn (gen_strset (destptr, dest, value));
0945b39d 15323 }
8c996513
JH
15324 emit_label (label);
15325 LABEL_NUSES (label) = 1;
15326 }
15327 if (max_size > 8)
15328 {
15329 rtx label = ix86_expand_aligntest (count, 8, true);
15330 if (TARGET_64BIT)
4e44c1ef 15331 {
8c996513
JH
15332 dest = change_address (destmem, DImode, destptr);
15333 emit_insn (gen_strset (destptr, dest, value));
4e44c1ef 15334 }
8c996513 15335 else
0945b39d 15336 {
8c996513
JH
15337 dest = change_address (destmem, SImode, destptr);
15338 emit_insn (gen_strset (destptr, dest, value));
15339 emit_insn (gen_strset (destptr, dest, value));
0945b39d 15340 }
8c996513
JH
15341 emit_label (label);
15342 LABEL_NUSES (label) = 1;
15343 }
15344 if (max_size > 4)
15345 {
15346 rtx label = ix86_expand_aligntest (count, 4, true);
15347 dest = change_address (destmem, SImode, destptr);
15348 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15349 emit_label (label);
15350 LABEL_NUSES (label) = 1;
0945b39d 15351 }
8c996513
JH
15352 if (max_size > 2)
15353 {
15354 rtx label = ix86_expand_aligntest (count, 2, true);
15355 dest = change_address (destmem, HImode, destptr);
15356 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15357 emit_label (label);
15358 LABEL_NUSES (label) = 1;
15359 }
15360 if (max_size > 1)
15361 {
15362 rtx label = ix86_expand_aligntest (count, 1, true);
15363 dest = change_address (destmem, QImode, destptr);
15364 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15365 emit_label (label);
15366 LABEL_NUSES (label) = 1;
15367 }
15368}
0945b39d 15369
8c996513
JH
15370/* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15371 DESIRED_ALIGNMENT. */
15372static void
15373expand_movmem_prologue (rtx destmem, rtx srcmem,
15374 rtx destptr, rtx srcptr, rtx count,
15375 int align, int desired_alignment)
15376{
15377 if (align <= 1 && desired_alignment > 1)
15378 {
15379 rtx label = ix86_expand_aligntest (destptr, 1, false);
15380 srcmem = change_address (srcmem, QImode, srcptr);
15381 destmem = change_address (destmem, QImode, destptr);
15382 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15383 ix86_adjust_counter (count, 1);
15384 emit_label (label);
15385 LABEL_NUSES (label) = 1;
15386 }
15387 if (align <= 2 && desired_alignment > 2)
15388 {
15389 rtx label = ix86_expand_aligntest (destptr, 2, false);
15390 srcmem = change_address (srcmem, HImode, srcptr);
15391 destmem = change_address (destmem, HImode, destptr);
15392 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15393 ix86_adjust_counter (count, 2);
15394 emit_label (label);
15395 LABEL_NUSES (label) = 1;
15396 }
15397 if (align <= 4 && desired_alignment > 4)
15398 {
15399 rtx label = ix86_expand_aligntest (destptr, 4, false);
15400 srcmem = change_address (srcmem, SImode, srcptr);
15401 destmem = change_address (destmem, SImode, destptr);
15402 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15403 ix86_adjust_counter (count, 4);
15404 emit_label (label);
15405 LABEL_NUSES (label) = 1;
15406 }
15407 gcc_assert (desired_alignment <= 8);
0945b39d
JH
15408}
15409
8c996513
JH
15410/* Set enough from DEST to align DEST known to by aligned by ALIGN to
15411 DESIRED_ALIGNMENT. */
15412static void
15413expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15414 int align, int desired_alignment)
15415{
15416 if (align <= 1 && desired_alignment > 1)
15417 {
15418 rtx label = ix86_expand_aligntest (destptr, 1, false);
15419 destmem = change_address (destmem, QImode, destptr);
15420 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15421 ix86_adjust_counter (count, 1);
15422 emit_label (label);
15423 LABEL_NUSES (label) = 1;
15424 }
15425 if (align <= 2 && desired_alignment > 2)
15426 {
15427 rtx label = ix86_expand_aligntest (destptr, 2, false);
15428 destmem = change_address (destmem, HImode, destptr);
15429 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15430 ix86_adjust_counter (count, 2);
15431 emit_label (label);
15432 LABEL_NUSES (label) = 1;
15433 }
15434 if (align <= 4 && desired_alignment > 4)
15435 {
15436 rtx label = ix86_expand_aligntest (destptr, 4, false);
15437 destmem = change_address (destmem, SImode, destptr);
15438 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15439 ix86_adjust_counter (count, 4);
15440 emit_label (label);
15441 LABEL_NUSES (label) = 1;
15442 }
15443 gcc_assert (desired_alignment <= 8);
15444}
15445
15446/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15447static enum stringop_alg
15448decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15449 int *dynamic_check)
15450{
15451 const struct stringop_algs * algs;
3c285765
NF
15452 /* Algorithms using the rep prefix want at least edi and ecx;
15453 additionally, memset wants eax and memcpy wants esi. Don't
15454 consider such algorithms if the user has appropriated those
15455 registers for their own purposes. */
ec382b8c
UB
15456 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15457 || (memset
15458 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
3c285765
NF
15459
15460#define ALG_USABLE_P(alg) (rep_prefix_usable \
15461 || (alg != rep_prefix_1_byte \
15462 && alg != rep_prefix_4_byte \
15463 && alg != rep_prefix_8_byte))
8c996513
JH
15464
15465 *dynamic_check = -1;
15466 if (memset)
15467 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15468 else
15469 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3c285765 15470 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
8c996513
JH
15471 return stringop_alg;
15472 /* rep; movq or rep; movl is the smallest variant. */
15473 else if (optimize_size)
15474 {
15475 if (!count || (count & 3))
3c285765 15476 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
8c996513 15477 else
3c285765 15478 return rep_prefix_usable ? rep_prefix_4_byte : loop;
8c996513
JH
15479 }
15480 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15481 */
15482 else if (expected_size != -1 && expected_size < 4)
15483 return loop_1_byte;
15484 else if (expected_size != -1)
15485 {
15486 unsigned int i;
15487 enum stringop_alg alg = libcall;
15488 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15489 {
3c285765
NF
15490 /* We get here if the algorithms that were not libcall-based
15491 were rep-prefix based and we are unable to use rep prefixes
15492 based on global register usage. Break out of the loop and
15493 use the heuristic below. */
15494 if (algs->size[i].max == 0)
15495 break;
8c996513
JH
15496 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15497 {
3c285765
NF
15498 enum stringop_alg candidate = algs->size[i].alg;
15499
15500 if (candidate != libcall && ALG_USABLE_P (candidate))
15501 alg = candidate;
8c996513 15502 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
3c285765 15503 last non-libcall inline algorithm. */
8c996513
JH
15504 if (TARGET_INLINE_ALL_STRINGOPS)
15505 {
cc0faf9d 15506 /* When the current size is best to be copied by a libcall,
3c285765 15507 but we are still forced to inline, run the heuristic below
cc0faf9d
JH
15508 that will pick code for medium sized blocks. */
15509 if (alg != libcall)
15510 return alg;
15511 break;
8c996513 15512 }
3c285765
NF
15513 else if (ALG_USABLE_P (candidate))
15514 return candidate;
8c996513
JH
15515 }
15516 }
3c285765 15517 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
8c996513
JH
15518 }
15519 /* When asked to inline the call anyway, try to pick meaningful choice.
15520 We look for maximal size of block that is faster to copy by hand and
15521 take blocks of at most of that size guessing that average size will
54a88090 15522 be roughly half of the block.
8c996513
JH
15523
15524 If this turns out to be bad, we might simply specify the preferred
15525 choice in ix86_costs. */
15526 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
3c285765 15527 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
8c996513
JH
15528 {
15529 int max = -1;
15530 enum stringop_alg alg;
15531 int i;
3c285765 15532 bool any_alg_usable_p = true;
8c996513
JH
15533
15534 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
3c285765
NF
15535 {
15536 enum stringop_alg candidate = algs->size[i].alg;
15537 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15538
15539 if (candidate != libcall && candidate
15540 && ALG_USABLE_P (candidate))
15541 max = algs->size[i].max;
15542 }
15543 /* If there aren't any usable algorithms, then recursing on
15544 smaller sizes isn't going to find anything. Just return the
15545 simple byte-at-a-time copy loop. */
15546 if (!any_alg_usable_p)
15547 {
15548 /* Pick something reasonable. */
15549 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15550 *dynamic_check = 128;
15551 return loop_1_byte;
15552 }
8c996513
JH
15553 if (max == -1)
15554 max = 4096;
15555 alg = decide_alg (count, max / 2, memset, dynamic_check);
15556 gcc_assert (*dynamic_check == -1);
15557 gcc_assert (alg != libcall);
15558 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15559 *dynamic_check = max;
15560 return alg;
15561 }
3c285765
NF
15562 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15563#undef ALG_USABLE_P
8c996513
JH
15564}
15565
15566/* Decide on alignment. We know that the operand is already aligned to ALIGN
15567 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15568static int
15569decide_alignment (int align,
15570 enum stringop_alg alg,
15571 int expected_size)
15572{
15573 int desired_align = 0;
15574 switch (alg)
15575 {
15576 case no_stringop:
15577 gcc_unreachable ();
15578 case loop:
15579 case unrolled_loop:
15580 desired_align = GET_MODE_SIZE (Pmode);
15581 break;
15582 case rep_prefix_8_byte:
15583 desired_align = 8;
15584 break;
15585 case rep_prefix_4_byte:
15586 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15587 copying whole cacheline at once. */
15588 if (TARGET_PENTIUMPRO)
15589 desired_align = 8;
15590 else
15591 desired_align = 4;
15592 break;
15593 case rep_prefix_1_byte:
15594 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15595 copying whole cacheline at once. */
15596 if (TARGET_PENTIUMPRO)
15597 desired_align = 8;
15598 else
15599 desired_align = 1;
15600 break;
15601 case loop_1_byte:
15602 desired_align = 1;
15603 break;
15604 case libcall:
15605 return 0;
15606 }
15607
15608 if (optimize_size)
15609 desired_align = 1;
15610 if (desired_align < align)
15611 desired_align = align;
15612 if (expected_size != -1 && expected_size < 4)
15613 desired_align = align;
15614 return desired_align;
15615}
15616
2e226e66 15617/* Return the smallest power of 2 greater than VAL. */
2a4f771a
JH
15618static int
15619smallest_pow2_greater_than (int val)
15620{
15621 int ret = 1;
15622 while (ret <= val)
15623 ret <<= 1;
15624 return ret;
15625}
15626
8c996513 15627/* Expand string move (memcpy) operation. Use i386 string operations when
1b22c72e 15628 profitable. expand_setmem contains similar code. The code depends upon
2a4f771a
JH
15629 architecture, block size and alignment, but always has the same
15630 overall structure:
15631
15632 1) Prologue guard: Conditional that jumps up to epilogues for small
15633 blocks that can be handled by epilogue alone. This is faster but
15634 also needed for correctness, since prologue assume the block is larger
2e226e66 15635 than the desired alignment.
2a4f771a
JH
15636
15637 Optional dynamic check for size and libcall for large
15638 blocks is emitted here too, with -minline-stringops-dynamically.
15639
15640 2) Prologue: copy first few bytes in order to get destination aligned
15641 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15642 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15643 We emit either a jump tree on power of two sized blocks, or a byte loop.
15644
15645 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15646 with specified algorithm.
15647
15648 4) Epilogue: code copying tail of the block that is too small to be
15649 handled by main body (or up to size guarded by prologue guard). */
54a88090 15650
0945b39d 15651int
8c996513
JH
15652ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15653 rtx expected_align_exp, rtx expected_size_exp)
0945b39d 15654{
8c996513
JH
15655 rtx destreg;
15656 rtx srcreg;
15657 rtx label = NULL;
15658 rtx tmp;
15659 rtx jump_around_label = NULL;
15660 HOST_WIDE_INT align = 1;
0945b39d 15661 unsigned HOST_WIDE_INT count = 0;
8c996513 15662 HOST_WIDE_INT expected_size = -1;
2a4f771a 15663 int size_needed = 0, epilogue_size_needed;
8c996513
JH
15664 int desired_align = 0;
15665 enum stringop_alg alg;
15666 int dynamic_check;
0945b39d 15667
7656aee4 15668 if (CONST_INT_P (align_exp))
0945b39d 15669 align = INTVAL (align_exp);
2f8e468b 15670 /* i386 can do misaligned access on reasonably increased cost. */
7656aee4 15671 if (CONST_INT_P (expected_align_exp)
8c996513
JH
15672 && INTVAL (expected_align_exp) > align)
15673 align = INTVAL (expected_align_exp);
7656aee4 15674 if (CONST_INT_P (count_exp))
8c996513 15675 count = expected_size = INTVAL (count_exp);
7656aee4 15676 if (CONST_INT_P (expected_size_exp) && count == 0)
2a4f771a
JH
15677 expected_size = INTVAL (expected_size_exp);
15678
1b22c72e
EB
15679 /* Make sure we don't need to care about overflow later on. */
15680 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15681 return 0;
15682
2a4f771a
JH
15683 /* Step 0: Decide on preferred algorithm, desired alignment and
15684 size of chunks to be copied by main loop. */
0945b39d 15685
8c996513
JH
15686 alg = decide_alg (count, expected_size, false, &dynamic_check);
15687 desired_align = decide_alignment (align, alg, expected_size);
d0a5295a 15688
0945b39d 15689 if (!TARGET_ALIGN_STRINGOPS)
8c996513 15690 align = desired_align;
0945b39d 15691
8c996513
JH
15692 if (alg == libcall)
15693 return 0;
15694 gcc_assert (alg != no_stringop);
15695 if (!count)
15696 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15697 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15698 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15699 switch (alg)
26771da7 15700 {
8c996513
JH
15701 case libcall:
15702 case no_stringop:
15703 gcc_unreachable ();
15704 case loop:
15705 size_needed = GET_MODE_SIZE (Pmode);
15706 break;
15707 case unrolled_loop:
15708 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15709 break;
15710 case rep_prefix_8_byte:
15711 size_needed = 8;
15712 break;
15713 case rep_prefix_4_byte:
15714 size_needed = 4;
15715 break;
15716 case rep_prefix_1_byte:
15717 case loop_1_byte:
15718 size_needed = 1;
15719 break;
26771da7 15720 }
0945b39d 15721
2a4f771a
JH
15722 epilogue_size_needed = size_needed;
15723
15724 /* Step 1: Prologue guard. */
15725
8c996513 15726 /* Alignment code needs count to be in register. */
7656aee4 15727 if (CONST_INT_P (count_exp) && desired_align > align)
097f5e21 15728 count_exp = force_reg (counter_mode (count_exp), count_exp);
8c996513 15729 gcc_assert (desired_align >= 1 && align >= 1);
2a4f771a 15730
8c996513 15731 /* Ensure that alignment prologue won't copy past end of block. */
bd8d4d19 15732 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
8c996513 15733 {
2a4f771a 15734 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
2a4f771a
JH
15735 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15736 Make sure it is power of 2. */
15737 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
90c56b45 15738
097f5e21
MM
15739 if (CONST_INT_P (count_exp))
15740 {
15741 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15742 goto epilogue;
15743 }
8c996513 15744 else
097f5e21
MM
15745 {
15746 label = gen_label_rtx ();
15747 emit_cmp_and_jump_insns (count_exp,
15748 GEN_INT (epilogue_size_needed),
15749 LTU, 0, counter_mode (count_exp), 1, label);
15750 if (expected_size == -1 || expected_size < epilogue_size_needed)
15751 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15752 else
15753 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15754 }
8c996513 15755 }
097f5e21 15756
8c996513
JH
15757 /* Emit code to decide on runtime whether library call or inline should be
15758 used. */
15759 if (dynamic_check != -1)
15760 {
097f5e21
MM
15761 if (CONST_INT_P (count_exp))
15762 {
15763 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15764 {
15765 emit_block_move_via_libcall (dst, src, count_exp, false);
15766 count_exp = const0_rtx;
15767 goto epilogue;
15768 }
15769 }
15770 else
15771 {
15772 rtx hot_label = gen_label_rtx ();
15773 jump_around_label = gen_label_rtx ();
15774 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15775 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15776 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15777 emit_block_move_via_libcall (dst, src, count_exp, false);
15778 emit_jump (jump_around_label);
15779 emit_label (hot_label);
15780 }
8c996513 15781 }
0945b39d 15782
2a4f771a 15783 /* Step 2: Alignment prologue. */
0945b39d 15784
8c996513 15785 if (desired_align > align)
0945b39d 15786 {
8c996513
JH
15787 /* Except for the first move in epilogue, we no longer know
15788 constant offset in aliasing info. It don't seems to worth
15789 the pain to maintain it for the first move, so throw away
15790 the info early. */
15791 src = change_address (src, BLKmode, srcreg);
15792 dst = change_address (dst, BLKmode, destreg);
8c996513
JH
15793 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15794 desired_align);
0945b39d 15795 }
8c996513 15796 if (label && size_needed == 1)
0945b39d 15797 {
8c996513
JH
15798 emit_label (label);
15799 LABEL_NUSES (label) = 1;
15800 label = NULL;
15801 }
4e44c1ef 15802
2a4f771a
JH
15803 /* Step 3: Main loop. */
15804
8c996513
JH
15805 switch (alg)
15806 {
15807 case libcall:
15808 case no_stringop:
15809 gcc_unreachable ();
15810 case loop_1_byte:
15811 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15812 count_exp, QImode, 1, expected_size);
15813 break;
15814 case loop:
15815 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15816 count_exp, Pmode, 1, expected_size);
15817 break;
15818 case unrolled_loop:
15819 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15820 registers for 4 temporaries anyway. */
15821 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15822 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15823 expected_size);
15824 break;
15825 case rep_prefix_8_byte:
8c996513
JH
15826 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15827 DImode);
15828 break;
15829 case rep_prefix_4_byte:
8c996513
JH
15830 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15831 SImode);
15832 break;
15833 case rep_prefix_1_byte:
8c996513
JH
15834 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15835 QImode);
15836 break;
15837 }
15838 /* Adjust properly the offset of src and dest memory for aliasing. */
7656aee4 15839 if (CONST_INT_P (count_exp))
8c996513
JH
15840 {
15841 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15842 (count / size_needed) * size_needed);
15843 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15844 (count / size_needed) * size_needed);
15845 }
15846 else
15847 {
15848 src = change_address (src, BLKmode, srcreg);
15849 dst = change_address (dst, BLKmode, destreg);
15850 }
6b32b628 15851
2a4f771a 15852 /* Step 4: Epilogue to copy the remaining bytes. */
097f5e21 15853 epilogue:
8c996513
JH
15854 if (label)
15855 {
2a4f771a
JH
15856 /* When the main loop is done, COUNT_EXP might hold original count,
15857 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15858 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15859 bytes. Compensate if needed. */
54a88090 15860
2a4f771a 15861 if (size_needed < epilogue_size_needed)
0945b39d 15862 {
8c996513 15863 tmp =
bd8d4d19 15864 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
8c996513
JH
15865 GEN_INT (size_needed - 1), count_exp, 1,
15866 OPTAB_DIRECT);
8c996513
JH
15867 if (tmp != count_exp)
15868 emit_move_insn (count_exp, tmp);
15869 }
15870 emit_label (label);
15871 LABEL_NUSES (label) = 1;
15872 }
2a4f771a
JH
15873
15874 if (count_exp != const0_rtx && epilogue_size_needed > 1)
90c56b45 15875 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
2a4f771a 15876 epilogue_size_needed);
8c996513
JH
15877 if (jump_around_label)
15878 emit_label (jump_around_label);
15879 return 1;
15880}
6b32b628 15881
8c996513
JH
15882/* Helper function for memcpy. For QImode value 0xXY produce
15883 0xXYXYXYXY of wide specified by MODE. This is essentially
15884 a * 0x10101010, but we can do slightly better than
15885 synth_mult by unwinding the sequence by hand on CPUs with
15886 slow multiply. */
15887static rtx
15888promote_duplicated_reg (enum machine_mode mode, rtx val)
15889{
15890 enum machine_mode valmode = GET_MODE (val);
15891 rtx tmp;
15892 int nops = mode == DImode ? 3 : 2;
6b32b628 15893
8c996513
JH
15894 gcc_assert (mode == SImode || mode == DImode);
15895 if (val == const0_rtx)
15896 return copy_to_mode_reg (mode, const0_rtx);
7656aee4 15897 if (CONST_INT_P (val))
8c996513
JH
15898 {
15899 HOST_WIDE_INT v = INTVAL (val) & 255;
6b32b628 15900
8c996513
JH
15901 v |= v << 8;
15902 v |= v << 16;
15903 if (mode == DImode)
15904 v |= (v << 16) << 16;
15905 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15906 }
15907
15908 if (valmode == VOIDmode)
15909 valmode = QImode;
15910 if (valmode != QImode)
15911 val = gen_lowpart (QImode, val);
15912 if (mode == QImode)
15913 return val;
15914 if (!TARGET_PARTIAL_REG_STALL)
15915 nops--;
15916 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15917 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15918 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15919 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15920 {
15921 rtx reg = convert_modes (mode, QImode, val, true);
15922 tmp = promote_duplicated_reg (mode, const1_rtx);
15923 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15924 OPTAB_DIRECT);
0945b39d
JH
15925 }
15926 else
15927 {
8c996513 15928 rtx reg = convert_modes (mode, QImode, val, true);
0945b39d 15929
8c996513
JH
15930 if (!TARGET_PARTIAL_REG_STALL)
15931 if (mode == SImode)
15932 emit_insn (gen_movsi_insv_1 (reg, reg));
15933 else
15934 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15935 else
0945b39d 15936 {
8c996513
JH
15937 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15938 NULL, 1, OPTAB_DIRECT);
15939 reg =
15940 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
0945b39d 15941 }
8c996513
JH
15942 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15943 NULL, 1, OPTAB_DIRECT);
15944 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15945 if (mode == SImode)
15946 return reg;
15947 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15948 NULL, 1, OPTAB_DIRECT);
15949 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15950 return reg;
15951 }
15952}
0945b39d 15953
2a4f771a
JH
15954/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15955 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15956 alignment from ALIGN to DESIRED_ALIGN. */
15957static rtx
15958promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15959{
15960 rtx promoted_val;
15961
15962 if (TARGET_64BIT
15963 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15964 promoted_val = promote_duplicated_reg (DImode, val);
15965 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15966 promoted_val = promote_duplicated_reg (SImode, val);
15967 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15968 promoted_val = promote_duplicated_reg (HImode, val);
15969 else
15970 promoted_val = val;
15971
15972 return promoted_val;
15973}
15974
8c996513 15975/* Expand string clear operation (bzero). Use i386 string operations when
2a4f771a 15976 profitable. See expand_movmem comment for explanation of individual
2e226e66 15977 steps performed. */
8c996513
JH
15978int
15979ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15980 rtx expected_align_exp, rtx expected_size_exp)
15981{
15982 rtx destreg;
15983 rtx label = NULL;
15984 rtx tmp;
15985 rtx jump_around_label = NULL;
15986 HOST_WIDE_INT align = 1;
15987 unsigned HOST_WIDE_INT count = 0;
15988 HOST_WIDE_INT expected_size = -1;
2a4f771a 15989 int size_needed = 0, epilogue_size_needed;
8c996513
JH
15990 int desired_align = 0;
15991 enum stringop_alg alg;
2a4f771a 15992 rtx promoted_val = NULL;
8c996513
JH
15993 bool force_loopy_epilogue = false;
15994 int dynamic_check;
37ad04a5 15995
7656aee4 15996 if (CONST_INT_P (align_exp))
8c996513 15997 align = INTVAL (align_exp);
2f8e468b 15998 /* i386 can do misaligned access on reasonably increased cost. */
7656aee4 15999 if (CONST_INT_P (expected_align_exp)
8c996513
JH
16000 && INTVAL (expected_align_exp) > align)
16001 align = INTVAL (expected_align_exp);
7656aee4 16002 if (CONST_INT_P (count_exp))
8c996513 16003 count = expected_size = INTVAL (count_exp);
7656aee4 16004 if (CONST_INT_P (expected_size_exp) && count == 0)
8c996513 16005 expected_size = INTVAL (expected_size_exp);
4e44c1ef 16006
1b22c72e
EB
16007 /* Make sure we don't need to care about overflow later on. */
16008 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16009 return 0;
16010
2a4f771a
JH
16011 /* Step 0: Decide on preferred algorithm, desired alignment and
16012 size of chunks to be copied by main loop. */
16013
8c996513
JH
16014 alg = decide_alg (count, expected_size, true, &dynamic_check);
16015 desired_align = decide_alignment (align, alg, expected_size);
37ad04a5 16016
8c996513
JH
16017 if (!TARGET_ALIGN_STRINGOPS)
16018 align = desired_align;
16019
16020 if (alg == libcall)
16021 return 0;
16022 gcc_assert (alg != no_stringop);
16023 if (!count)
bd8d4d19 16024 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
8c996513
JH
16025 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16026 switch (alg)
16027 {
16028 case libcall:
16029 case no_stringop:
16030 gcc_unreachable ();
16031 case loop:
16032 size_needed = GET_MODE_SIZE (Pmode);
16033 break;
16034 case unrolled_loop:
16035 size_needed = GET_MODE_SIZE (Pmode) * 4;
16036 break;
16037 case rep_prefix_8_byte:
16038 size_needed = 8;
16039 break;
16040 case rep_prefix_4_byte:
16041 size_needed = 4;
16042 break;
16043 case rep_prefix_1_byte:
16044 case loop_1_byte:
16045 size_needed = 1;
16046 break;
16047 }
2a4f771a
JH
16048 epilogue_size_needed = size_needed;
16049
16050 /* Step 1: Prologue guard. */
16051
8c996513 16052 /* Alignment code needs count to be in register. */
7656aee4 16053 if (CONST_INT_P (count_exp) && desired_align > align)
8c996513
JH
16054 {
16055 enum machine_mode mode = SImode;
16056 if (TARGET_64BIT && (count & ~0xffffffff))
16057 mode = DImode;
16058 count_exp = force_reg (mode, count_exp);
16059 }
54a88090 16060 /* Do the cheap promotion to allow better CSE across the
2a4f771a
JH
16061 main loop and epilogue (ie one load of the big constant in the
16062 front of all code. */
7656aee4 16063 if (CONST_INT_P (val_exp))
2a4f771a
JH
16064 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16065 desired_align, align);
8c996513 16066 /* Ensure that alignment prologue won't copy past end of block. */
bd8d4d19 16067 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
8c996513 16068 {
2a4f771a 16069 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
2a4f771a
JH
16070 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16071 Make sure it is power of 2. */
16072 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16073
16074 /* To improve performance of small blocks, we jump around the VAL
16075 promoting mode. This mean that if the promoted VAL is not constant,
16076 we might not use it in the epilogue and have to use byte
16077 loop variant. */
16078 if (epilogue_size_needed > 2 && !promoted_val)
16079 force_loopy_epilogue = true;
8c996513
JH
16080 label = gen_label_rtx ();
16081 emit_cmp_and_jump_insns (count_exp,
2a4f771a 16082 GEN_INT (epilogue_size_needed),
bd8d4d19
JH
16083 LTU, 0, counter_mode (count_exp), 1, label);
16084 if (GET_CODE (count_exp) == CONST_INT)
16085 ;
16086 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
8c996513
JH
16087 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16088 else
16089 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16090 }
16091 if (dynamic_check != -1)
16092 {
16093 rtx hot_label = gen_label_rtx ();
16094 jump_around_label = gen_label_rtx ();
16095 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
bd8d4d19 16096 LEU, 0, counter_mode (count_exp), 1, hot_label);
8c996513
JH
16097 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16098 set_storage_via_libcall (dst, count_exp, val_exp, false);
16099 emit_jump (jump_around_label);
16100 emit_label (hot_label);
16101 }
2a4f771a
JH
16102
16103 /* Step 2: Alignment prologue. */
16104
16105 /* Do the expensive promotion once we branched off the small blocks. */
16106 if (!promoted_val)
16107 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16108 desired_align, align);
8c996513 16109 gcc_assert (desired_align >= 1 && align >= 1);
90c56b45 16110
8c996513
JH
16111 if (desired_align > align)
16112 {
16113 /* Except for the first move in epilogue, we no longer know
16114 constant offset in aliasing info. It don't seems to worth
16115 the pain to maintain it for the first move, so throw away
16116 the info early. */
16117 dst = change_address (dst, BLKmode, destreg);
8c996513
JH
16118 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
16119 desired_align);
16120 }
16121 if (label && size_needed == 1)
16122 {
16123 emit_label (label);
16124 LABEL_NUSES (label) = 1;
16125 label = NULL;
16126 }
2a4f771a
JH
16127
16128 /* Step 3: Main loop. */
16129
8c996513
JH
16130 switch (alg)
16131 {
16132 case libcall:
16133 case no_stringop:
16134 gcc_unreachable ();
16135 case loop_1_byte:
16136 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16137 count_exp, QImode, 1, expected_size);
16138 break;
16139 case loop:
16140 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16141 count_exp, Pmode, 1, expected_size);
16142 break;
16143 case unrolled_loop:
16144 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16145 count_exp, Pmode, 4, expected_size);
16146 break;
16147 case rep_prefix_8_byte:
8c996513
JH
16148 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16149 DImode);
16150 break;
16151 case rep_prefix_4_byte:
8c996513
JH
16152 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16153 SImode);
16154 break;
16155 case rep_prefix_1_byte:
8c996513
JH
16156 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16157 QImode);
16158 break;
16159 }
16160 /* Adjust properly the offset of src and dest memory for aliasing. */
7656aee4 16161 if (CONST_INT_P (count_exp))
8c996513
JH
16162 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16163 (count / size_needed) * size_needed);
16164 else
16165 dst = change_address (dst, BLKmode, destreg);
16166
2a4f771a
JH
16167 /* Step 4: Epilogue to copy the remaining bytes. */
16168
8c996513
JH
16169 if (label)
16170 {
2a4f771a
JH
16171 /* When the main loop is done, COUNT_EXP might hold original count,
16172 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16173 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16174 bytes. Compensate if needed. */
16175
8c996513 16176 if (size_needed < desired_align - align)
0945b39d 16177 {
8c996513 16178 tmp =
bd8d4d19 16179 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
8c996513
JH
16180 GEN_INT (size_needed - 1), count_exp, 1,
16181 OPTAB_DIRECT);
16182 size_needed = desired_align - align + 1;
16183 if (tmp != count_exp)
16184 emit_move_insn (count_exp, tmp);
0945b39d 16185 }
8c996513
JH
16186 emit_label (label);
16187 LABEL_NUSES (label) = 1;
16188 }
2a4f771a 16189 if (count_exp != const0_rtx && epilogue_size_needed > 1)
8c996513
JH
16190 {
16191 if (force_loopy_epilogue)
16192 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16193 size_needed);
16194 else
90c56b45
UB
16195 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16196 size_needed);
0945b39d 16197 }
8c996513
JH
16198 if (jump_around_label)
16199 emit_label (jump_around_label);
0945b39d
JH
16200 return 1;
16201}
4e44c1ef 16202
e075ae69
RH
16203/* Expand the appropriate insns for doing strlen if not just doing
16204 repnz; scasb
16205
16206 out = result, initialized with the start address
16207 align_rtx = alignment of the address.
16208 scratch = scratch register, initialized with the startaddress when
77ebd435 16209 not aligned, otherwise undefined
3f803cd9 16210
39e3f58c 16211 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
16212 some address computing at the end. These things are done in i386.md. */
16213
0945b39d 16214static void
4e44c1ef 16215ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 16216{
e075ae69
RH
16217 int align;
16218 rtx tmp;
16219 rtx align_2_label = NULL_RTX;
16220 rtx align_3_label = NULL_RTX;
16221 rtx align_4_label = gen_label_rtx ();
16222 rtx end_0_label = gen_label_rtx ();
e075ae69 16223 rtx mem;
e2e52e1b 16224 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 16225 rtx scratch = gen_reg_rtx (SImode);
e6e81735 16226 rtx cmp;
e075ae69
RH
16227
16228 align = 0;
7656aee4 16229 if (CONST_INT_P (align_rtx))
e075ae69 16230 align = INTVAL (align_rtx);
3f803cd9 16231
e9a25f70 16232 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 16233
e9a25f70 16234 /* Is there a known alignment and is it less than 4? */
e075ae69 16235 if (align < 4)
3f803cd9 16236 {
0945b39d
JH
16237 rtx scratch1 = gen_reg_rtx (Pmode);
16238 emit_move_insn (scratch1, out);
e9a25f70 16239 /* Is there a known alignment and is it not 2? */
e075ae69 16240 if (align != 2)
3f803cd9 16241 {
e075ae69
RH
16242 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16243 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16244
16245 /* Leave just the 3 lower bits. */
0945b39d 16246 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
16247 NULL_RTX, 0, OPTAB_WIDEN);
16248
9076b9c1 16249 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 16250 Pmode, 1, align_4_label);
60c81c89 16251 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 16252 Pmode, 1, align_2_label);
60c81c89 16253 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 16254 Pmode, 1, align_3_label);
3f803cd9
SC
16255 }
16256 else
16257 {
e9a25f70
JL
16258 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16259 check if is aligned to 4 - byte. */
e9a25f70 16260
60c81c89 16261 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
16262 NULL_RTX, 0, OPTAB_WIDEN);
16263
9076b9c1 16264 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 16265 Pmode, 1, align_4_label);
3f803cd9
SC
16266 }
16267
4e44c1ef 16268 mem = change_address (src, QImode, out);
e9a25f70 16269
e075ae69 16270 /* Now compare the bytes. */
e9a25f70 16271
0f290768 16272 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 16273 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 16274 QImode, 1, end_0_label);
3f803cd9 16275
0f290768 16276 /* Increment the address. */
999d3194 16277 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
e9a25f70 16278
e075ae69
RH
16279 /* Not needed with an alignment of 2 */
16280 if (align != 2)
16281 {
16282 emit_label (align_2_label);
3f803cd9 16283
d43e0b7d
RK
16284 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16285 end_0_label);
e075ae69 16286
999d3194 16287 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
e075ae69
RH
16288
16289 emit_label (align_3_label);
16290 }
16291
d43e0b7d
RK
16292 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16293 end_0_label);
e075ae69 16294
999d3194 16295 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
3f803cd9
SC
16296 }
16297
e075ae69
RH
16298 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16299 align this loop. It gives only huge programs, but does not help to
16300 speed up. */
16301 emit_label (align_4_label);
3f803cd9 16302
4e44c1ef 16303 mem = change_address (src, SImode, out);
e075ae69 16304 emit_move_insn (scratch, mem);
999d3194 16305 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
e075ae69 16306
e2e52e1b
JH
16307 /* This formula yields a nonzero result iff one of the bytes is zero.
16308 This saves three branches inside loop and many cycles. */
16309
16310 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16311 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16312 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 16313 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 16314 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
16315 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16316 align_4_label);
e2e52e1b
JH
16317
16318 if (TARGET_CMOVE)
16319 {
16320 rtx reg = gen_reg_rtx (SImode);
0945b39d 16321 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
16322 emit_move_insn (reg, tmpreg);
16323 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16324
0f290768 16325 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 16326 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
16327 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16328 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16329 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16330 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
16331 reg,
16332 tmpreg)));
e2e52e1b 16333 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 16334 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 16335 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
16336
16337 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16338 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16339 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 16340 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
16341 reg2,
16342 out)));
e2e52e1b
JH
16343
16344 }
16345 else
16346 {
16347 rtx end_2_label = gen_label_rtx ();
16348 /* Is zero in the first two bytes? */
16349
16189740 16350 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
16351 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16352 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16353 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16354 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16355 pc_rtx);
16356 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16357 JUMP_LABEL (tmp) = end_2_label;
16358
0f290768 16359 /* Not in the first two. Move two bytes forward. */
e2e52e1b 16360 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
999d3194 16361 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
e2e52e1b
JH
16362
16363 emit_label (end_2_label);
16364
16365 }
16366
0f290768 16367 /* Avoid branch in fixing the byte. */
e2e52e1b 16368 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 16369 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
d02cb675 16370 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
999d3194 16371 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
e075ae69
RH
16372
16373 emit_label (end_0_label);
16374}
0e07aff3 16375
2ed941ec
RH
16376/* Expand strlen. */
16377
16378int
16379ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16380{
16381 rtx addr, scratch1, scratch2, scratch3, scratch4;
16382
16383 /* The generic case of strlen expander is long. Avoid it's
16384 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16385
16386 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16387 && !TARGET_INLINE_ALL_STRINGOPS
16388 && !optimize_size
16389 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16390 return 0;
16391
16392 addr = force_reg (Pmode, XEXP (src, 0));
16393 scratch1 = gen_reg_rtx (Pmode);
16394
16395 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16396 && !optimize_size)
16397 {
16398 /* Well it seems that some optimizer does not combine a call like
16399 foo(strlen(bar), strlen(bar));
16400 when the move and the subtraction is done here. It does calculate
16401 the length just once when these instructions are done inside of
16402 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16403 often used and I use one fewer register for the lifetime of
16404 output_strlen_unroll() this is better. */
16405
16406 emit_move_insn (out, addr);
16407
16408 ix86_expand_strlensi_unroll_1 (out, src, align);
16409
16410 /* strlensi_unroll_1 returns the address of the zero at the end of
16411 the string, like memchr(), so compute the length by subtracting
16412 the start address. */
999d3194 16413 emit_insn ((*ix86_gen_sub3) (out, out, addr));
2ed941ec
RH
16414 }
16415 else
16416 {
16417 rtx unspec;
3c285765
NF
16418
16419 /* Can't use this if the user has appropriated eax, ecx, or edi. */
ec382b8c 16420 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
3c285765
NF
16421 return false;
16422
2ed941ec
RH
16423 scratch2 = gen_reg_rtx (Pmode);
16424 scratch3 = gen_reg_rtx (Pmode);
16425 scratch4 = force_reg (Pmode, constm1_rtx);
16426
16427 emit_move_insn (scratch3, addr);
16428 eoschar = force_reg (QImode, eoschar);
16429
16430 src = replace_equiv_address_nv (src, scratch3);
16431
16432 /* If .md starts supporting :P, this can be done in .md. */
16433 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16434 scratch4), UNSPEC_SCAS);
16435 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
999d3194
L
16436 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
16437 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
2ed941ec
RH
16438 }
16439 return 1;
16440}
16441
dc4d7240
JH
16442/* For given symbol (function) construct code to compute address of it's PLT
16443 entry in large x86-64 PIC model. */
16444rtx
16445construct_plt_address (rtx symbol)
16446{
16447 rtx tmp = gen_reg_rtx (Pmode);
16448 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16449
16450 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16451 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16452
16453 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16454 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16455 return tmp;
16456}
16457
0e07aff3 16458void
0f901c4c
SH
16459ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16460 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 16461 rtx pop, int sibcall)
0e07aff3
RH
16462{
16463 rtx use = NULL, call;
16464
16465 if (pop == const0_rtx)
16466 pop = NULL;
d0396b79 16467 gcc_assert (!TARGET_64BIT || !pop);
0e07aff3 16468
f7288899
EC
16469 if (TARGET_MACHO && !TARGET_64BIT)
16470 {
b069de3b 16471#if TARGET_MACHO
f7288899
EC
16472 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16473 fnaddr = machopic_indirect_call_target (fnaddr);
16474#endif
16475 }
16476 else
16477 {
16478 /* Static functions and indirect calls don't need the pic register. */
dc4d7240 16479 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
f7288899
EC
16480 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16481 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16482 use_reg (&use, pic_offset_table_rtx);
16483 }
0e07aff3
RH
16484
16485 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16486 {
29b74761 16487 rtx al = gen_rtx_REG (QImode, AX_REG);
0e07aff3
RH
16488 emit_move_insn (al, callarg2);
16489 use_reg (&use, al);
16490 }
16491
dc4d7240
JH
16492 if (ix86_cmodel == CM_LARGE_PIC
16493 && GET_CODE (fnaddr) == MEM
16494 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16495 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16496 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16497 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
0e07aff3
RH
16498 {
16499 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16500 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16501 }
4977bab6
ZW
16502 if (sibcall && TARGET_64BIT
16503 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16504 {
16505 rtx addr;
16506 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
3c4ace25 16507 fnaddr = gen_rtx_REG (Pmode, R11_REG);
4977bab6
ZW
16508 emit_move_insn (fnaddr, addr);
16509 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16510 }
0e07aff3
RH
16511
16512 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16513 if (retval)
16514 call = gen_rtx_SET (VOIDmode, retval, call);
16515 if (pop)
16516 {
16517 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16518 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16519 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16520 }
16521
16522 call = emit_call_insn (call);
16523 if (use)
16524 CALL_INSN_FUNCTION_USAGE (call) = use;
16525}
fce5a9f2 16526
e075ae69 16527\f
e075ae69
RH
16528/* Clear stack slot assignments remembered from previous functions.
16529 This is called from INIT_EXPANDERS once before RTL is emitted for each
16530 function. */
16531
e2500fed 16532static struct machine_function *
b96a374d 16533ix86_init_machine_status (void)
37b15744 16534{
d7394366
JH
16535 struct machine_function *f;
16536
9415ab7d 16537 f = GGC_CNEW (struct machine_function);
d7394366 16538 f->use_fast_prologue_epilogue_nregs = -1;
5bf5a10b 16539 f->tls_descriptor_call_expanded_p = 0;
7c800926 16540 f->call_abi = DEFAULT_ABI;
8330e2c6
AJ
16541
16542 return f;
1526a060
BS
16543}
16544
e075ae69
RH
16545/* Return a MEM corresponding to a stack slot with mode MODE.
16546 Allocate a new slot if necessary.
16547
16548 The RTL for a function can have several slots available: N is
16549 which slot to use. */
16550
16551rtx
ff680eb1 16552assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
e075ae69 16553{
ddb0ae00
ZW
16554 struct stack_local_entry *s;
16555
ff680eb1 16556 gcc_assert (n < MAX_386_STACK_LOCALS);
e075ae69 16557
80dcd3aa
UB
16558 /* Virtual slot is valid only before vregs are instantiated. */
16559 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16560
ddb0ae00
ZW
16561 for (s = ix86_stack_locals; s; s = s->next)
16562 if (s->mode == mode && s->n == n)
3e916873 16563 return copy_rtx (s->rtl);
ddb0ae00
ZW
16564
16565 s = (struct stack_local_entry *)
16566 ggc_alloc (sizeof (struct stack_local_entry));
16567 s->n = n;
16568 s->mode = mode;
808e68bb 16569 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 16570
ddb0ae00
ZW
16571 s->next = ix86_stack_locals;
16572 ix86_stack_locals = s;
16573 return s->rtl;
e075ae69 16574}
f996902d
RH
16575
16576/* Construct the SYMBOL_REF for the tls_get_addr function. */
16577
e2500fed 16578static GTY(()) rtx ix86_tls_symbol;
f996902d 16579rtx
b96a374d 16580ix86_tls_get_addr (void)
f996902d 16581{
f996902d 16582
e2500fed 16583 if (!ix86_tls_symbol)
f996902d 16584 {
75d38379 16585 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
5bf5a10b
AO
16586 (TARGET_ANY_GNU_TLS
16587 && !TARGET_64BIT)
75d38379
JJ
16588 ? "___tls_get_addr"
16589 : "__tls_get_addr");
f996902d
RH
16590 }
16591
e2500fed 16592 return ix86_tls_symbol;
f996902d 16593}
5bf5a10b
AO
16594
16595/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16596
16597static GTY(()) rtx ix86_tls_module_base_symbol;
16598rtx
16599ix86_tls_module_base (void)
16600{
16601
16602 if (!ix86_tls_module_base_symbol)
16603 {
16604 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16605 "_TLS_MODULE_BASE_");
16606 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16607 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16608 }
16609
16610 return ix86_tls_module_base_symbol;
16611}
e075ae69
RH
16612\f
16613/* Calculate the length of the memory address in the instruction
16614 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16615
8fe75e43 16616int
b96a374d 16617memory_address_length (rtx addr)
e075ae69
RH
16618{
16619 struct ix86_address parts;
16620 rtx base, index, disp;
16621 int len;
d0396b79 16622 int ok;
e075ae69
RH
16623
16624 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
16625 || GET_CODE (addr) == POST_INC
16626 || GET_CODE (addr) == PRE_MODIFY
16627 || GET_CODE (addr) == POST_MODIFY)
e075ae69 16628 return 0;
3f803cd9 16629
d0396b79
NS
16630 ok = ix86_decompose_address (addr, &parts);
16631 gcc_assert (ok);
3f803cd9 16632
7c93c2cc
PB
16633 if (parts.base && GET_CODE (parts.base) == SUBREG)
16634 parts.base = SUBREG_REG (parts.base);
16635 if (parts.index && GET_CODE (parts.index) == SUBREG)
16636 parts.index = SUBREG_REG (parts.index);
16637
e075ae69
RH
16638 base = parts.base;
16639 index = parts.index;
16640 disp = parts.disp;
16641 len = 0;
3f803cd9 16642
7b65ed54
EB
16643 /* Rule of thumb:
16644 - esp as the base always wants an index,
16645 - ebp as the base always wants a displacement. */
16646
e075ae69
RH
16647 /* Register Indirect. */
16648 if (base && !index && !disp)
16649 {
7b65ed54
EB
16650 /* esp (for its index) and ebp (for its displacement) need
16651 the two-byte modrm form. */
e075ae69
RH
16652 if (addr == stack_pointer_rtx
16653 || addr == arg_pointer_rtx
564d80f4
JH
16654 || addr == frame_pointer_rtx
16655 || addr == hard_frame_pointer_rtx)
e075ae69 16656 len = 1;
3f803cd9 16657 }
e9a25f70 16658
e075ae69
RH
16659 /* Direct Addressing. */
16660 else if (disp && !base && !index)
16661 len = 4;
16662
3f803cd9
SC
16663 else
16664 {
e075ae69
RH
16665 /* Find the length of the displacement constant. */
16666 if (disp)
16667 {
f38840db 16668 if (base && satisfies_constraint_K (disp))
e075ae69
RH
16669 len = 1;
16670 else
16671 len = 4;
16672 }
7b65ed54
EB
16673 /* ebp always wants a displacement. */
16674 else if (base == hard_frame_pointer_rtx)
16675 len = 1;
3f803cd9 16676
43f3a59d 16677 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
16678 if (index
16679 /* ...like esp, which always wants an index. */
16680 || base == stack_pointer_rtx
16681 || base == arg_pointer_rtx
16682 || base == frame_pointer_rtx)
e075ae69 16683 len += 1;
3f803cd9
SC
16684 }
16685
e075ae69
RH
16686 return len;
16687}
79325812 16688
5bf0ebab
RH
16689/* Compute default value for "length_immediate" attribute. When SHORTFORM
16690 is set, expect that insn have 8bit immediate alternative. */
e075ae69 16691int
b96a374d 16692ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 16693{
6ef67412
JH
16694 int len = 0;
16695 int i;
6c698a6d 16696 extract_insn_cached (insn);
6ef67412
JH
16697 for (i = recog_data.n_operands - 1; i >= 0; --i)
16698 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 16699 {
d0396b79 16700 gcc_assert (!len);
f38840db 16701 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
6ef67412
JH
16702 len = 1;
16703 else
16704 {
16705 switch (get_attr_mode (insn))
16706 {
16707 case MODE_QI:
16708 len+=1;
16709 break;
16710 case MODE_HI:
16711 len+=2;
16712 break;
16713 case MODE_SI:
16714 len+=4;
16715 break;
14f73b5a
JH
16716 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16717 case MODE_DI:
16718 len+=4;
16719 break;
6ef67412 16720 default:
c725bd79 16721 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
16722 }
16723 }
3071fab5 16724 }
6ef67412
JH
16725 return len;
16726}
16727/* Compute default value for "length_address" attribute. */
16728int
b96a374d 16729ix86_attr_length_address_default (rtx insn)
6ef67412
JH
16730{
16731 int i;
9b73c90a
EB
16732
16733 if (get_attr_type (insn) == TYPE_LEA)
16734 {
16735 rtx set = PATTERN (insn);
d0396b79
NS
16736
16737 if (GET_CODE (set) == PARALLEL)
9b73c90a 16738 set = XVECEXP (set, 0, 0);
d0396b79
NS
16739
16740 gcc_assert (GET_CODE (set) == SET);
9b73c90a
EB
16741
16742 return memory_address_length (SET_SRC (set));
16743 }
16744
6c698a6d 16745 extract_insn_cached (insn);
1ccbefce 16746 for (i = recog_data.n_operands - 1; i >= 0; --i)
7656aee4 16747 if (MEM_P (recog_data.operand[i]))
e075ae69 16748 {
6ef67412 16749 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
16750 break;
16751 }
6ef67412 16752 return 0;
3f803cd9 16753}
e075ae69
RH
16754\f
16755/* Return the maximum number of instructions a cpu can issue. */
b657fc39 16756
c237e94a 16757static int
b96a374d 16758ix86_issue_rate (void)
b657fc39 16759{
9e555526 16760 switch (ix86_tune)
b657fc39 16761 {
e075ae69
RH
16762 case PROCESSOR_PENTIUM:
16763 case PROCESSOR_K6:
16764 return 2;
79325812 16765
e075ae69 16766 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
16767 case PROCESSOR_PENTIUM4:
16768 case PROCESSOR_ATHLON:
4977bab6 16769 case PROCESSOR_K8:
21efb4d4 16770 case PROCESSOR_AMDFAM10:
89c43c0a 16771 case PROCESSOR_NOCONA:
d326eaf0
JH
16772 case PROCESSOR_GENERIC32:
16773 case PROCESSOR_GENERIC64:
e075ae69 16774 return 3;
b657fc39 16775
05f85dbb
VM
16776 case PROCESSOR_CORE2:
16777 return 4;
16778
b657fc39 16779 default:
e075ae69 16780 return 1;
b657fc39 16781 }
b657fc39
L
16782}
16783
e075ae69
RH
16784/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16785 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 16786
e075ae69 16787static int
d1c78882 16788ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
16789{
16790 rtx set, set2;
b657fc39 16791
e075ae69
RH
16792 /* Simplify the test for uninteresting insns. */
16793 if (insn_type != TYPE_SETCC
16794 && insn_type != TYPE_ICMOV
16795 && insn_type != TYPE_FCMOV
16796 && insn_type != TYPE_IBR)
16797 return 0;
b657fc39 16798
e075ae69
RH
16799 if ((set = single_set (dep_insn)) != 0)
16800 {
16801 set = SET_DEST (set);
16802 set2 = NULL_RTX;
16803 }
16804 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16805 && XVECLEN (PATTERN (dep_insn), 0) == 2
16806 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16807 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16808 {
16809 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16810 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16811 }
78a0d70c
ZW
16812 else
16813 return 0;
b657fc39 16814
7656aee4 16815 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
78a0d70c 16816 return 0;
b657fc39 16817
f5143c46 16818 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
16819 not any other potentially set register. */
16820 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16821 return 0;
16822
16823 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16824 return 0;
16825
16826 return 1;
e075ae69 16827}
b657fc39 16828
e075ae69
RH
16829/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16830 address with operands set by DEP_INSN. */
16831
16832static int
d1c78882 16833ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
16834{
16835 rtx addr;
16836
6ad48e84
JH
16837 if (insn_type == TYPE_LEA
16838 && TARGET_PENTIUM)
5fbdde42
RH
16839 {
16840 addr = PATTERN (insn);
d0396b79
NS
16841
16842 if (GET_CODE (addr) == PARALLEL)
5fbdde42 16843 addr = XVECEXP (addr, 0, 0);
5656a184 16844
d0396b79 16845 gcc_assert (GET_CODE (addr) == SET);
5656a184 16846
5fbdde42
RH
16847 addr = SET_SRC (addr);
16848 }
e075ae69
RH
16849 else
16850 {
16851 int i;
6c698a6d 16852 extract_insn_cached (insn);
1ccbefce 16853 for (i = recog_data.n_operands - 1; i >= 0; --i)
7656aee4 16854 if (MEM_P (recog_data.operand[i]))
e075ae69 16855 {
1ccbefce 16856 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
16857 goto found;
16858 }
16859 return 0;
16860 found:;
b657fc39
L
16861 }
16862
e075ae69 16863 return modified_in_p (addr, dep_insn);
b657fc39 16864}
a269a03c 16865
c237e94a 16866static int
b96a374d 16867ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 16868{
e075ae69 16869 enum attr_type insn_type, dep_insn_type;
8695f61e 16870 enum attr_memory memory;
e075ae69 16871 rtx set, set2;
9b00189f 16872 int dep_insn_code_number;
a269a03c 16873
d1f87653 16874 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 16875 if (REG_NOTE_KIND (link) != 0)
309ada50 16876 return 0;
a269a03c 16877
9b00189f
JH
16878 dep_insn_code_number = recog_memoized (dep_insn);
16879
e075ae69 16880 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 16881 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 16882 return cost;
a269a03c 16883
1c71e60e
JH
16884 insn_type = get_attr_type (insn);
16885 dep_insn_type = get_attr_type (dep_insn);
9b00189f 16886
9e555526 16887 switch (ix86_tune)
a269a03c
JC
16888 {
16889 case PROCESSOR_PENTIUM:
e075ae69 16890 /* Address Generation Interlock adds a cycle of latency. */
d1c78882 16891 if (ix86_agi_dependent (insn, dep_insn, insn_type))
e075ae69
RH
16892 cost += 1;
16893
16894 /* ??? Compares pair with jump/setcc. */
d1c78882 16895 if (ix86_flags_dependent (insn, dep_insn, insn_type))
e075ae69
RH
16896 cost = 0;
16897
d1f87653 16898 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 16899 if (insn_type == TYPE_FMOV
e075ae69 16900 && get_attr_memory (insn) == MEMORY_STORE
d1c78882 16901 && !ix86_agi_dependent (insn, dep_insn, insn_type))
e075ae69
RH
16902 cost += 1;
16903 break;
a269a03c 16904
e075ae69 16905 case PROCESSOR_PENTIUMPRO:
6ad48e84 16906 memory = get_attr_memory (insn);
e075ae69
RH
16907
16908 /* INT->FP conversion is expensive. */
16909 if (get_attr_fp_int_src (dep_insn))
16910 cost += 5;
16911
16912 /* There is one cycle extra latency between an FP op and a store. */
16913 if (insn_type == TYPE_FMOV
16914 && (set = single_set (dep_insn)) != NULL_RTX
16915 && (set2 = single_set (insn)) != NULL_RTX
16916 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7656aee4 16917 && MEM_P (SET_DEST (set2)))
e075ae69 16918 cost += 1;
6ad48e84
JH
16919
16920 /* Show ability of reorder buffer to hide latency of load by executing
16921 in parallel with previous instruction in case
16922 previous instruction is not needed to compute the address. */
16923 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16924 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16925 {
6ad48e84
JH
16926 /* Claim moves to take one cycle, as core can issue one load
16927 at time and the next load can start cycle later. */
16928 if (dep_insn_type == TYPE_IMOV
16929 || dep_insn_type == TYPE_FMOV)
16930 cost = 1;
16931 else if (cost > 1)
16932 cost--;
16933 }
e075ae69 16934 break;
a269a03c 16935
e075ae69 16936 case PROCESSOR_K6:
6ad48e84 16937 memory = get_attr_memory (insn);
8695f61e 16938
e075ae69
RH
16939 /* The esp dependency is resolved before the instruction is really
16940 finished. */
16941 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16942 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16943 return 1;
a269a03c 16944
e075ae69
RH
16945 /* INT->FP conversion is expensive. */
16946 if (get_attr_fp_int_src (dep_insn))
16947 cost += 5;
6ad48e84
JH
16948
16949 /* Show ability of reorder buffer to hide latency of load by executing
16950 in parallel with previous instruction in case
16951 previous instruction is not needed to compute the address. */
16952 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16953 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16954 {
6ad48e84
JH
16955 /* Claim moves to take one cycle, as core can issue one load
16956 at time and the next load can start cycle later. */
16957 if (dep_insn_type == TYPE_IMOV
16958 || dep_insn_type == TYPE_FMOV)
16959 cost = 1;
16960 else if (cost > 2)
16961 cost -= 2;
16962 else
16963 cost = 1;
16964 }
a14003ee 16965 break;
e075ae69 16966
309ada50 16967 case PROCESSOR_ATHLON:
4977bab6 16968 case PROCESSOR_K8:
21efb4d4 16969 case PROCESSOR_AMDFAM10:
d326eaf0
JH
16970 case PROCESSOR_GENERIC32:
16971 case PROCESSOR_GENERIC64:
6ad48e84 16972 memory = get_attr_memory (insn);
6ad48e84 16973
6ad48e84
JH
16974 /* Show ability of reorder buffer to hide latency of load by executing
16975 in parallel with previous instruction in case
16976 previous instruction is not needed to compute the address. */
16977 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16978 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16979 {
26f74aa3
JH
16980 enum attr_unit unit = get_attr_unit (insn);
16981 int loadcost = 3;
16982
16983 /* Because of the difference between the length of integer and
16984 floating unit pipeline preparation stages, the memory operands
b96a374d 16985 for floating point are cheaper.
26f74aa3 16986
c51e6d85 16987 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
16988 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16989 loadcost = 3;
16990 else
16991 loadcost = TARGET_ATHLON ? 2 : 0;
16992
16993 if (cost >= loadcost)
16994 cost -= loadcost;
6ad48e84
JH
16995 else
16996 cost = 0;
16997 }
309ada50 16998
a269a03c 16999 default:
a269a03c
JC
17000 break;
17001 }
17002
17003 return cost;
17004}
0a726ef1 17005
9b690711
RH
17006/* How many alternative schedules to try. This should be as wide as the
17007 scheduling freedom in the DFA, but no wider. Making this value too
17008 large results extra work for the scheduler. */
17009
17010static int
b96a374d 17011ia32_multipass_dfa_lookahead (void)
9b690711 17012{
8383d43c
UB
17013 switch (ix86_tune)
17014 {
17015 case PROCESSOR_PENTIUM:
17016 return 2;
56bab446 17017
8383d43c
UB
17018 case PROCESSOR_PENTIUMPRO:
17019 case PROCESSOR_K6:
17020 return 1;
56bab446 17021
8383d43c
UB
17022 default:
17023 return 0;
17024 }
9b690711
RH
17025}
17026
0e4970d7 17027\f
a7180f70
BS
17028/* Compute the alignment given to a constant that is being placed in memory.
17029 EXP is the constant and ALIGN is the alignment that the object would
17030 ordinarily have.
17031 The value of this function is used instead of that alignment to align
17032 the object. */
17033
17034int
b96a374d 17035ix86_constant_alignment (tree exp, int align)
a7180f70 17036{
be3d4789
UB
17037 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17038 || TREE_CODE (exp) == INTEGER_CST)
a7180f70
BS
17039 {
17040 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
17041 return 64;
17042 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
17043 return 128;
17044 }
4137ba7a
JJ
17045 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17046 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17047 return BITS_PER_WORD;
a7180f70
BS
17048
17049 return align;
17050}
17051
17052/* Compute the alignment for a static variable.
17053 TYPE is the data type, and ALIGN is the alignment that
17054 the object would ordinarily have. The value of this function is used
17055 instead of that alignment to align the object. */
17056
17057int
b96a374d 17058ix86_data_alignment (tree type, int align)
a7180f70 17059{
bf69f9d2 17060 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
6c23a1f2 17061
a7180f70 17062 if (AGGREGATE_TYPE_P (type)
6c23a1f2
JB
17063 && TYPE_SIZE (type)
17064 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17065 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
17066 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
17067 && align < max_align)
17068 align = max_align;
a7180f70 17069
0d7d98ee
JH
17070 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17071 to 16byte boundary. */
17072 if (TARGET_64BIT)
17073 {
17074 if (AGGREGATE_TYPE_P (type)
17075 && TYPE_SIZE (type)
17076 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17077 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
17078 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17079 return 128;
17080 }
17081
a7180f70
BS
17082 if (TREE_CODE (type) == ARRAY_TYPE)
17083 {
17084 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17085 return 64;
17086 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17087 return 128;
17088 }
17089 else if (TREE_CODE (type) == COMPLEX_TYPE)
17090 {
0f290768 17091
a7180f70
BS
17092 if (TYPE_MODE (type) == DCmode && align < 64)
17093 return 64;
17094 if (TYPE_MODE (type) == XCmode && align < 128)
17095 return 128;
17096 }
17097 else if ((TREE_CODE (type) == RECORD_TYPE
17098 || TREE_CODE (type) == UNION_TYPE
17099 || TREE_CODE (type) == QUAL_UNION_TYPE)
17100 && TYPE_FIELDS (type))
17101 {
17102 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17103 return 64;
17104 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17105 return 128;
17106 }
17107 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17108 || TREE_CODE (type) == INTEGER_TYPE)
17109 {
17110 if (TYPE_MODE (type) == DFmode && align < 64)
17111 return 64;
17112 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17113 return 128;
17114 }
17115
17116 return align;
17117}
17118
76fe54f0
L
17119/* Compute the alignment for a local variable or a stack slot. TYPE is
17120 the data type, MODE is the widest mode available and ALIGN is the
17121 alignment that the object would ordinarily have. The value of this
17122 macro is used instead of that alignment to align the object. */
17123
17124unsigned int
17125ix86_local_alignment (tree type, enum machine_mode mode,
17126 unsigned int align)
a7180f70 17127{
76fe54f0
L
17128 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17129 register in MODE. We will return the largest alignment of XF
17130 and DF. */
17131 if (!type)
17132 {
17133 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17134 align = GET_MODE_ALIGNMENT (DFmode);
17135 return align;
17136 }
17137
0d7d98ee
JH
17138 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17139 to 16byte boundary. */
17140 if (TARGET_64BIT)
17141 {
17142 if (AGGREGATE_TYPE_P (type)
17143 && TYPE_SIZE (type)
17144 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17145 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17146 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17147 return 128;
17148 }
a7180f70
BS
17149 if (TREE_CODE (type) == ARRAY_TYPE)
17150 {
17151 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17152 return 64;
17153 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17154 return 128;
17155 }
17156 else if (TREE_CODE (type) == COMPLEX_TYPE)
17157 {
17158 if (TYPE_MODE (type) == DCmode && align < 64)
17159 return 64;
17160 if (TYPE_MODE (type) == XCmode && align < 128)
17161 return 128;
17162 }
17163 else if ((TREE_CODE (type) == RECORD_TYPE
17164 || TREE_CODE (type) == UNION_TYPE
17165 || TREE_CODE (type) == QUAL_UNION_TYPE)
17166 && TYPE_FIELDS (type))
17167 {
17168 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17169 return 64;
17170 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17171 return 128;
17172 }
17173 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17174 || TREE_CODE (type) == INTEGER_TYPE)
17175 {
0f290768 17176
a7180f70
BS
17177 if (TYPE_MODE (type) == DFmode && align < 64)
17178 return 64;
17179 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17180 return 128;
17181 }
17182 return align;
17183}
0ed08620
JH
17184\f
17185/* Emit RTL insns to initialize the variable parts of a trampoline.
17186 FNADDR is an RTX for the address of the function's pure code.
17187 CXT is an RTX for the static chain value for the function. */
17188void
b96a374d 17189x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
17190{
17191 if (!TARGET_64BIT)
17192 {
17193 /* Compute offset from the end of the jmp to the target function. */
17194 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17195 plus_constant (tramp, 10),
17196 NULL_RTX, 1, OPTAB_DIRECT);
17197 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 17198 gen_int_mode (0xb9, QImode));
0ed08620
JH
17199 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17200 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 17201 gen_int_mode (0xe9, QImode));
0ed08620
JH
17202 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17203 }
17204 else
17205 {
17206 int offset = 0;
17207 /* Try to load address using shorter movl instead of movabs.
17208 We may want to support movq for kernel mode, but kernel does not use
17209 trampolines at the moment. */
8fe75e43 17210 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
0ed08620
JH
17211 {
17212 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17213 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17214 gen_int_mode (0xbb41, HImode));
0ed08620
JH
17215 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17216 gen_lowpart (SImode, fnaddr));
17217 offset += 6;
17218 }
17219 else
17220 {
17221 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17222 gen_int_mode (0xbb49, HImode));
0ed08620
JH
17223 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17224 fnaddr);
17225 offset += 10;
17226 }
17227 /* Load static chain using movabs to r10. */
17228 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17229 gen_int_mode (0xba49, HImode));
0ed08620
JH
17230 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17231 cxt);
17232 offset += 10;
17233 /* Jump to the r11 */
17234 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17235 gen_int_mode (0xff49, HImode));
0ed08620 17236 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 17237 gen_int_mode (0xe3, QImode));
0ed08620 17238 offset += 3;
d0396b79 17239 gcc_assert (offset <= TRAMPOLINE_SIZE);
0ed08620 17240 }
5791cc29 17241
e7a742ec 17242#ifdef ENABLE_EXECUTE_STACK
f84d109f 17243 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
17244 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17245#endif
0ed08620 17246}
eeb06b1b 17247\f
eb701deb
RH
17248/* Codes for all the SSE/MMX builtins. */
17249enum ix86_builtins
17250{
17251 IX86_BUILTIN_ADDPS,
17252 IX86_BUILTIN_ADDSS,
17253 IX86_BUILTIN_DIVPS,
17254 IX86_BUILTIN_DIVSS,
17255 IX86_BUILTIN_MULPS,
17256 IX86_BUILTIN_MULSS,
17257 IX86_BUILTIN_SUBPS,
17258 IX86_BUILTIN_SUBSS,
17259
17260 IX86_BUILTIN_CMPEQPS,
17261 IX86_BUILTIN_CMPLTPS,
17262 IX86_BUILTIN_CMPLEPS,
17263 IX86_BUILTIN_CMPGTPS,
17264 IX86_BUILTIN_CMPGEPS,
17265 IX86_BUILTIN_CMPNEQPS,
17266 IX86_BUILTIN_CMPNLTPS,
17267 IX86_BUILTIN_CMPNLEPS,
17268 IX86_BUILTIN_CMPNGTPS,
17269 IX86_BUILTIN_CMPNGEPS,
17270 IX86_BUILTIN_CMPORDPS,
17271 IX86_BUILTIN_CMPUNORDPS,
eb701deb
RH
17272 IX86_BUILTIN_CMPEQSS,
17273 IX86_BUILTIN_CMPLTSS,
17274 IX86_BUILTIN_CMPLESS,
17275 IX86_BUILTIN_CMPNEQSS,
17276 IX86_BUILTIN_CMPNLTSS,
17277 IX86_BUILTIN_CMPNLESS,
17278 IX86_BUILTIN_CMPNGTSS,
17279 IX86_BUILTIN_CMPNGESS,
17280 IX86_BUILTIN_CMPORDSS,
17281 IX86_BUILTIN_CMPUNORDSS,
eb701deb
RH
17282
17283 IX86_BUILTIN_COMIEQSS,
17284 IX86_BUILTIN_COMILTSS,
17285 IX86_BUILTIN_COMILESS,
17286 IX86_BUILTIN_COMIGTSS,
17287 IX86_BUILTIN_COMIGESS,
17288 IX86_BUILTIN_COMINEQSS,
17289 IX86_BUILTIN_UCOMIEQSS,
17290 IX86_BUILTIN_UCOMILTSS,
17291 IX86_BUILTIN_UCOMILESS,
17292 IX86_BUILTIN_UCOMIGTSS,
17293 IX86_BUILTIN_UCOMIGESS,
17294 IX86_BUILTIN_UCOMINEQSS,
17295
17296 IX86_BUILTIN_CVTPI2PS,
17297 IX86_BUILTIN_CVTPS2PI,
17298 IX86_BUILTIN_CVTSI2SS,
17299 IX86_BUILTIN_CVTSI642SS,
17300 IX86_BUILTIN_CVTSS2SI,
17301 IX86_BUILTIN_CVTSS2SI64,
17302 IX86_BUILTIN_CVTTPS2PI,
17303 IX86_BUILTIN_CVTTSS2SI,
17304 IX86_BUILTIN_CVTTSS2SI64,
17305
17306 IX86_BUILTIN_MAXPS,
17307 IX86_BUILTIN_MAXSS,
17308 IX86_BUILTIN_MINPS,
17309 IX86_BUILTIN_MINSS,
17310
17311 IX86_BUILTIN_LOADUPS,
17312 IX86_BUILTIN_STOREUPS,
17313 IX86_BUILTIN_MOVSS,
17314
17315 IX86_BUILTIN_MOVHLPS,
17316 IX86_BUILTIN_MOVLHPS,
17317 IX86_BUILTIN_LOADHPS,
17318 IX86_BUILTIN_LOADLPS,
17319 IX86_BUILTIN_STOREHPS,
17320 IX86_BUILTIN_STORELPS,
17321
17322 IX86_BUILTIN_MASKMOVQ,
17323 IX86_BUILTIN_MOVMSKPS,
17324 IX86_BUILTIN_PMOVMSKB,
17325
17326 IX86_BUILTIN_MOVNTPS,
17327 IX86_BUILTIN_MOVNTQ,
17328
17329 IX86_BUILTIN_LOADDQU,
17330 IX86_BUILTIN_STOREDQU,
eb701deb
RH
17331
17332 IX86_BUILTIN_PACKSSWB,
17333 IX86_BUILTIN_PACKSSDW,
17334 IX86_BUILTIN_PACKUSWB,
17335
17336 IX86_BUILTIN_PADDB,
17337 IX86_BUILTIN_PADDW,
17338 IX86_BUILTIN_PADDD,
17339 IX86_BUILTIN_PADDQ,
17340 IX86_BUILTIN_PADDSB,
17341 IX86_BUILTIN_PADDSW,
17342 IX86_BUILTIN_PADDUSB,
17343 IX86_BUILTIN_PADDUSW,
17344 IX86_BUILTIN_PSUBB,
17345 IX86_BUILTIN_PSUBW,
17346 IX86_BUILTIN_PSUBD,
17347 IX86_BUILTIN_PSUBQ,
17348 IX86_BUILTIN_PSUBSB,
17349 IX86_BUILTIN_PSUBSW,
17350 IX86_BUILTIN_PSUBUSB,
17351 IX86_BUILTIN_PSUBUSW,
17352
17353 IX86_BUILTIN_PAND,
17354 IX86_BUILTIN_PANDN,
17355 IX86_BUILTIN_POR,
17356 IX86_BUILTIN_PXOR,
17357
17358 IX86_BUILTIN_PAVGB,
17359 IX86_BUILTIN_PAVGW,
17360
17361 IX86_BUILTIN_PCMPEQB,
17362 IX86_BUILTIN_PCMPEQW,
17363 IX86_BUILTIN_PCMPEQD,
17364 IX86_BUILTIN_PCMPGTB,
17365 IX86_BUILTIN_PCMPGTW,
17366 IX86_BUILTIN_PCMPGTD,
17367
17368 IX86_BUILTIN_PMADDWD,
17369
17370 IX86_BUILTIN_PMAXSW,
17371 IX86_BUILTIN_PMAXUB,
17372 IX86_BUILTIN_PMINSW,
17373 IX86_BUILTIN_PMINUB,
17374
17375 IX86_BUILTIN_PMULHUW,
17376 IX86_BUILTIN_PMULHW,
17377 IX86_BUILTIN_PMULLW,
17378
17379 IX86_BUILTIN_PSADBW,
17380 IX86_BUILTIN_PSHUFW,
17381
17382 IX86_BUILTIN_PSLLW,
17383 IX86_BUILTIN_PSLLD,
17384 IX86_BUILTIN_PSLLQ,
17385 IX86_BUILTIN_PSRAW,
17386 IX86_BUILTIN_PSRAD,
17387 IX86_BUILTIN_PSRLW,
17388 IX86_BUILTIN_PSRLD,
17389 IX86_BUILTIN_PSRLQ,
17390 IX86_BUILTIN_PSLLWI,
17391 IX86_BUILTIN_PSLLDI,
17392 IX86_BUILTIN_PSLLQI,
17393 IX86_BUILTIN_PSRAWI,
17394 IX86_BUILTIN_PSRADI,
17395 IX86_BUILTIN_PSRLWI,
17396 IX86_BUILTIN_PSRLDI,
17397 IX86_BUILTIN_PSRLQI,
17398
17399 IX86_BUILTIN_PUNPCKHBW,
17400 IX86_BUILTIN_PUNPCKHWD,
17401 IX86_BUILTIN_PUNPCKHDQ,
17402 IX86_BUILTIN_PUNPCKLBW,
17403 IX86_BUILTIN_PUNPCKLWD,
17404 IX86_BUILTIN_PUNPCKLDQ,
17405
17406 IX86_BUILTIN_SHUFPS,
17407
17408 IX86_BUILTIN_RCPPS,
17409 IX86_BUILTIN_RCPSS,
17410 IX86_BUILTIN_RSQRTPS,
3dc0f23a 17411 IX86_BUILTIN_RSQRTPS_NR,
eb701deb 17412 IX86_BUILTIN_RSQRTSS,
6b889d89 17413 IX86_BUILTIN_RSQRTF,
eb701deb 17414 IX86_BUILTIN_SQRTPS,
3dc0f23a 17415 IX86_BUILTIN_SQRTPS_NR,
eb701deb
RH
17416 IX86_BUILTIN_SQRTSS,
17417
17418 IX86_BUILTIN_UNPCKHPS,
17419 IX86_BUILTIN_UNPCKLPS,
17420
17421 IX86_BUILTIN_ANDPS,
17422 IX86_BUILTIN_ANDNPS,
17423 IX86_BUILTIN_ORPS,
17424 IX86_BUILTIN_XORPS,
17425
17426 IX86_BUILTIN_EMMS,
17427 IX86_BUILTIN_LDMXCSR,
17428 IX86_BUILTIN_STMXCSR,
17429 IX86_BUILTIN_SFENCE,
17430
17431 /* 3DNow! Original */
17432 IX86_BUILTIN_FEMMS,
17433 IX86_BUILTIN_PAVGUSB,
17434 IX86_BUILTIN_PF2ID,
17435 IX86_BUILTIN_PFACC,
17436 IX86_BUILTIN_PFADD,
17437 IX86_BUILTIN_PFCMPEQ,
17438 IX86_BUILTIN_PFCMPGE,
17439 IX86_BUILTIN_PFCMPGT,
17440 IX86_BUILTIN_PFMAX,
17441 IX86_BUILTIN_PFMIN,
17442 IX86_BUILTIN_PFMUL,
17443 IX86_BUILTIN_PFRCP,
17444 IX86_BUILTIN_PFRCPIT1,
17445 IX86_BUILTIN_PFRCPIT2,
17446 IX86_BUILTIN_PFRSQIT1,
17447 IX86_BUILTIN_PFRSQRT,
17448 IX86_BUILTIN_PFSUB,
17449 IX86_BUILTIN_PFSUBR,
17450 IX86_BUILTIN_PI2FD,
17451 IX86_BUILTIN_PMULHRW,
17452
17453 /* 3DNow! Athlon Extensions */
17454 IX86_BUILTIN_PF2IW,
17455 IX86_BUILTIN_PFNACC,
17456 IX86_BUILTIN_PFPNACC,
17457 IX86_BUILTIN_PI2FW,
17458 IX86_BUILTIN_PSWAPDSI,
17459 IX86_BUILTIN_PSWAPDSF,
17460
17461 /* SSE2 */
17462 IX86_BUILTIN_ADDPD,
17463 IX86_BUILTIN_ADDSD,
17464 IX86_BUILTIN_DIVPD,
17465 IX86_BUILTIN_DIVSD,
17466 IX86_BUILTIN_MULPD,
17467 IX86_BUILTIN_MULSD,
17468 IX86_BUILTIN_SUBPD,
17469 IX86_BUILTIN_SUBSD,
17470
17471 IX86_BUILTIN_CMPEQPD,
17472 IX86_BUILTIN_CMPLTPD,
17473 IX86_BUILTIN_CMPLEPD,
17474 IX86_BUILTIN_CMPGTPD,
17475 IX86_BUILTIN_CMPGEPD,
17476 IX86_BUILTIN_CMPNEQPD,
17477 IX86_BUILTIN_CMPNLTPD,
17478 IX86_BUILTIN_CMPNLEPD,
17479 IX86_BUILTIN_CMPNGTPD,
17480 IX86_BUILTIN_CMPNGEPD,
17481 IX86_BUILTIN_CMPORDPD,
17482 IX86_BUILTIN_CMPUNORDPD,
eb701deb
RH
17483 IX86_BUILTIN_CMPEQSD,
17484 IX86_BUILTIN_CMPLTSD,
17485 IX86_BUILTIN_CMPLESD,
17486 IX86_BUILTIN_CMPNEQSD,
17487 IX86_BUILTIN_CMPNLTSD,
17488 IX86_BUILTIN_CMPNLESD,
17489 IX86_BUILTIN_CMPORDSD,
17490 IX86_BUILTIN_CMPUNORDSD,
eb701deb
RH
17491
17492 IX86_BUILTIN_COMIEQSD,
17493 IX86_BUILTIN_COMILTSD,
17494 IX86_BUILTIN_COMILESD,
17495 IX86_BUILTIN_COMIGTSD,
17496 IX86_BUILTIN_COMIGESD,
17497 IX86_BUILTIN_COMINEQSD,
17498 IX86_BUILTIN_UCOMIEQSD,
17499 IX86_BUILTIN_UCOMILTSD,
17500 IX86_BUILTIN_UCOMILESD,
17501 IX86_BUILTIN_UCOMIGTSD,
17502 IX86_BUILTIN_UCOMIGESD,
17503 IX86_BUILTIN_UCOMINEQSD,
17504
17505 IX86_BUILTIN_MAXPD,
17506 IX86_BUILTIN_MAXSD,
17507 IX86_BUILTIN_MINPD,
17508 IX86_BUILTIN_MINSD,
17509
17510 IX86_BUILTIN_ANDPD,
17511 IX86_BUILTIN_ANDNPD,
17512 IX86_BUILTIN_ORPD,
17513 IX86_BUILTIN_XORPD,
17514
17515 IX86_BUILTIN_SQRTPD,
17516 IX86_BUILTIN_SQRTSD,
17517
17518 IX86_BUILTIN_UNPCKHPD,
17519 IX86_BUILTIN_UNPCKLPD,
17520
17521 IX86_BUILTIN_SHUFPD,
17522
17523 IX86_BUILTIN_LOADUPD,
17524 IX86_BUILTIN_STOREUPD,
17525 IX86_BUILTIN_MOVSD,
17526
17527 IX86_BUILTIN_LOADHPD,
17528 IX86_BUILTIN_LOADLPD,
17529
17530 IX86_BUILTIN_CVTDQ2PD,
17531 IX86_BUILTIN_CVTDQ2PS,
17532
17533 IX86_BUILTIN_CVTPD2DQ,
17534 IX86_BUILTIN_CVTPD2PI,
17535 IX86_BUILTIN_CVTPD2PS,
17536 IX86_BUILTIN_CVTTPD2DQ,
17537 IX86_BUILTIN_CVTTPD2PI,
17538
17539 IX86_BUILTIN_CVTPI2PD,
17540 IX86_BUILTIN_CVTSI2SD,
17541 IX86_BUILTIN_CVTSI642SD,
17542
17543 IX86_BUILTIN_CVTSD2SI,
17544 IX86_BUILTIN_CVTSD2SI64,
17545 IX86_BUILTIN_CVTSD2SS,
17546 IX86_BUILTIN_CVTSS2SD,
17547 IX86_BUILTIN_CVTTSD2SI,
17548 IX86_BUILTIN_CVTTSD2SI64,
17549
17550 IX86_BUILTIN_CVTPS2DQ,
17551 IX86_BUILTIN_CVTPS2PD,
17552 IX86_BUILTIN_CVTTPS2DQ,
17553
17554 IX86_BUILTIN_MOVNTI,
17555 IX86_BUILTIN_MOVNTPD,
17556 IX86_BUILTIN_MOVNTDQ,
17557
17558 /* SSE2 MMX */
17559 IX86_BUILTIN_MASKMOVDQU,
17560 IX86_BUILTIN_MOVMSKPD,
17561 IX86_BUILTIN_PMOVMSKB128,
eb701deb
RH
17562
17563 IX86_BUILTIN_PACKSSWB128,
17564 IX86_BUILTIN_PACKSSDW128,
17565 IX86_BUILTIN_PACKUSWB128,
17566
17567 IX86_BUILTIN_PADDB128,
17568 IX86_BUILTIN_PADDW128,
17569 IX86_BUILTIN_PADDD128,
17570 IX86_BUILTIN_PADDQ128,
17571 IX86_BUILTIN_PADDSB128,
17572 IX86_BUILTIN_PADDSW128,
17573 IX86_BUILTIN_PADDUSB128,
17574 IX86_BUILTIN_PADDUSW128,
17575 IX86_BUILTIN_PSUBB128,
17576 IX86_BUILTIN_PSUBW128,
17577 IX86_BUILTIN_PSUBD128,
17578 IX86_BUILTIN_PSUBQ128,
17579 IX86_BUILTIN_PSUBSB128,
17580 IX86_BUILTIN_PSUBSW128,
17581 IX86_BUILTIN_PSUBUSB128,
17582 IX86_BUILTIN_PSUBUSW128,
17583
17584 IX86_BUILTIN_PAND128,
17585 IX86_BUILTIN_PANDN128,
17586 IX86_BUILTIN_POR128,
17587 IX86_BUILTIN_PXOR128,
17588
17589 IX86_BUILTIN_PAVGB128,
17590 IX86_BUILTIN_PAVGW128,
17591
17592 IX86_BUILTIN_PCMPEQB128,
17593 IX86_BUILTIN_PCMPEQW128,
17594 IX86_BUILTIN_PCMPEQD128,
17595 IX86_BUILTIN_PCMPGTB128,
17596 IX86_BUILTIN_PCMPGTW128,
17597 IX86_BUILTIN_PCMPGTD128,
17598
17599 IX86_BUILTIN_PMADDWD128,
17600
17601 IX86_BUILTIN_PMAXSW128,
17602 IX86_BUILTIN_PMAXUB128,
17603 IX86_BUILTIN_PMINSW128,
17604 IX86_BUILTIN_PMINUB128,
17605
17606 IX86_BUILTIN_PMULUDQ,
17607 IX86_BUILTIN_PMULUDQ128,
17608 IX86_BUILTIN_PMULHUW128,
17609 IX86_BUILTIN_PMULHW128,
17610 IX86_BUILTIN_PMULLW128,
17611
17612 IX86_BUILTIN_PSADBW128,
17613 IX86_BUILTIN_PSHUFHW,
17614 IX86_BUILTIN_PSHUFLW,
17615 IX86_BUILTIN_PSHUFD,
17616
eb701deb
RH
17617 IX86_BUILTIN_PSLLDQI128,
17618 IX86_BUILTIN_PSLLWI128,
17619 IX86_BUILTIN_PSLLDI128,
17620 IX86_BUILTIN_PSLLQI128,
17621 IX86_BUILTIN_PSRAWI128,
17622 IX86_BUILTIN_PSRADI128,
17623 IX86_BUILTIN_PSRLDQI128,
17624 IX86_BUILTIN_PSRLWI128,
17625 IX86_BUILTIN_PSRLDI128,
17626 IX86_BUILTIN_PSRLQI128,
17627
24bfafbc
RH
17628 IX86_BUILTIN_PSLLDQ128,
17629 IX86_BUILTIN_PSLLW128,
17630 IX86_BUILTIN_PSLLD128,
17631 IX86_BUILTIN_PSLLQ128,
17632 IX86_BUILTIN_PSRAW128,
17633 IX86_BUILTIN_PSRAD128,
17634 IX86_BUILTIN_PSRLW128,
17635 IX86_BUILTIN_PSRLD128,
17636 IX86_BUILTIN_PSRLQ128,
17637
eb701deb
RH
17638 IX86_BUILTIN_PUNPCKHBW128,
17639 IX86_BUILTIN_PUNPCKHWD128,
17640 IX86_BUILTIN_PUNPCKHDQ128,
17641 IX86_BUILTIN_PUNPCKHQDQ128,
17642 IX86_BUILTIN_PUNPCKLBW128,
17643 IX86_BUILTIN_PUNPCKLWD128,
17644 IX86_BUILTIN_PUNPCKLDQ128,
17645 IX86_BUILTIN_PUNPCKLQDQ128,
17646
17647 IX86_BUILTIN_CLFLUSH,
17648 IX86_BUILTIN_MFENCE,
17649 IX86_BUILTIN_LFENCE,
17650
f318ff0a 17651 /* SSE3. */
eb701deb
RH
17652 IX86_BUILTIN_ADDSUBPS,
17653 IX86_BUILTIN_HADDPS,
17654 IX86_BUILTIN_HSUBPS,
17655 IX86_BUILTIN_MOVSHDUP,
17656 IX86_BUILTIN_MOVSLDUP,
17657 IX86_BUILTIN_ADDSUBPD,
17658 IX86_BUILTIN_HADDPD,
17659 IX86_BUILTIN_HSUBPD,
17660 IX86_BUILTIN_LDDQU,
17661
17662 IX86_BUILTIN_MONITOR,
17663 IX86_BUILTIN_MWAIT,
17664
b1875f52
L
17665 /* SSSE3. */
17666 IX86_BUILTIN_PHADDW,
17667 IX86_BUILTIN_PHADDD,
17668 IX86_BUILTIN_PHADDSW,
17669 IX86_BUILTIN_PHSUBW,
17670 IX86_BUILTIN_PHSUBD,
17671 IX86_BUILTIN_PHSUBSW,
17672 IX86_BUILTIN_PMADDUBSW,
17673 IX86_BUILTIN_PMULHRSW,
17674 IX86_BUILTIN_PSHUFB,
17675 IX86_BUILTIN_PSIGNB,
17676 IX86_BUILTIN_PSIGNW,
17677 IX86_BUILTIN_PSIGND,
17678 IX86_BUILTIN_PALIGNR,
17679 IX86_BUILTIN_PABSB,
17680 IX86_BUILTIN_PABSW,
17681 IX86_BUILTIN_PABSD,
17682
17683 IX86_BUILTIN_PHADDW128,
17684 IX86_BUILTIN_PHADDD128,
17685 IX86_BUILTIN_PHADDSW128,
17686 IX86_BUILTIN_PHSUBW128,
17687 IX86_BUILTIN_PHSUBD128,
17688 IX86_BUILTIN_PHSUBSW128,
17689 IX86_BUILTIN_PMADDUBSW128,
17690 IX86_BUILTIN_PMULHRSW128,
17691 IX86_BUILTIN_PSHUFB128,
17692 IX86_BUILTIN_PSIGNB128,
17693 IX86_BUILTIN_PSIGNW128,
17694 IX86_BUILTIN_PSIGND128,
17695 IX86_BUILTIN_PALIGNR128,
17696 IX86_BUILTIN_PABSB128,
17697 IX86_BUILTIN_PABSW128,
17698 IX86_BUILTIN_PABSD128,
17699
21efb4d4
HJ
17700 /* AMDFAM10 - SSE4A New Instructions. */
17701 IX86_BUILTIN_MOVNTSD,
17702 IX86_BUILTIN_MOVNTSS,
17703 IX86_BUILTIN_EXTRQI,
17704 IX86_BUILTIN_EXTRQ,
17705 IX86_BUILTIN_INSERTQI,
17706 IX86_BUILTIN_INSERTQ,
17707
9a5cee02
L
17708 /* SSE4.1. */
17709 IX86_BUILTIN_BLENDPD,
17710 IX86_BUILTIN_BLENDPS,
17711 IX86_BUILTIN_BLENDVPD,
17712 IX86_BUILTIN_BLENDVPS,
17713 IX86_BUILTIN_PBLENDVB128,
17714 IX86_BUILTIN_PBLENDW128,
17715
17716 IX86_BUILTIN_DPPD,
17717 IX86_BUILTIN_DPPS,
17718
17719 IX86_BUILTIN_INSERTPS128,
17720
17721 IX86_BUILTIN_MOVNTDQA,
17722 IX86_BUILTIN_MPSADBW128,
17723 IX86_BUILTIN_PACKUSDW128,
17724 IX86_BUILTIN_PCMPEQQ,
17725 IX86_BUILTIN_PHMINPOSUW128,
17726
17727 IX86_BUILTIN_PMAXSB128,
17728 IX86_BUILTIN_PMAXSD128,
17729 IX86_BUILTIN_PMAXUD128,
17730 IX86_BUILTIN_PMAXUW128,
17731
17732 IX86_BUILTIN_PMINSB128,
17733 IX86_BUILTIN_PMINSD128,
17734 IX86_BUILTIN_PMINUD128,
17735 IX86_BUILTIN_PMINUW128,
17736
17737 IX86_BUILTIN_PMOVSXBW128,
17738 IX86_BUILTIN_PMOVSXBD128,
17739 IX86_BUILTIN_PMOVSXBQ128,
17740 IX86_BUILTIN_PMOVSXWD128,
17741 IX86_BUILTIN_PMOVSXWQ128,
17742 IX86_BUILTIN_PMOVSXDQ128,
17743
17744 IX86_BUILTIN_PMOVZXBW128,
17745 IX86_BUILTIN_PMOVZXBD128,
17746 IX86_BUILTIN_PMOVZXBQ128,
17747 IX86_BUILTIN_PMOVZXWD128,
17748 IX86_BUILTIN_PMOVZXWQ128,
17749 IX86_BUILTIN_PMOVZXDQ128,
17750
17751 IX86_BUILTIN_PMULDQ128,
17752 IX86_BUILTIN_PMULLD128,
17753
17754 IX86_BUILTIN_ROUNDPD,
17755 IX86_BUILTIN_ROUNDPS,
17756 IX86_BUILTIN_ROUNDSD,
17757 IX86_BUILTIN_ROUNDSS,
17758
17759 IX86_BUILTIN_PTESTZ,
17760 IX86_BUILTIN_PTESTC,
17761 IX86_BUILTIN_PTESTNZC,
17762
eb701deb
RH
17763 IX86_BUILTIN_VEC_INIT_V2SI,
17764 IX86_BUILTIN_VEC_INIT_V4HI,
17765 IX86_BUILTIN_VEC_INIT_V8QI,
17766 IX86_BUILTIN_VEC_EXT_V2DF,
17767 IX86_BUILTIN_VEC_EXT_V2DI,
17768 IX86_BUILTIN_VEC_EXT_V4SF,
ed9b5396 17769 IX86_BUILTIN_VEC_EXT_V4SI,
eb701deb 17770 IX86_BUILTIN_VEC_EXT_V8HI,
0f2698d0 17771 IX86_BUILTIN_VEC_EXT_V2SI,
eb701deb 17772 IX86_BUILTIN_VEC_EXT_V4HI,
9a5cee02
L
17773 IX86_BUILTIN_VEC_EXT_V16QI,
17774 IX86_BUILTIN_VEC_SET_V2DI,
17775 IX86_BUILTIN_VEC_SET_V4SF,
17776 IX86_BUILTIN_VEC_SET_V4SI,
eb701deb
RH
17777 IX86_BUILTIN_VEC_SET_V8HI,
17778 IX86_BUILTIN_VEC_SET_V4HI,
9a5cee02 17779 IX86_BUILTIN_VEC_SET_V16QI,
eb701deb 17780
b40c4f68
UB
17781 IX86_BUILTIN_VEC_PACK_SFIX,
17782
3b8dd071
L
17783 /* SSE4.2. */
17784 IX86_BUILTIN_CRC32QI,
17785 IX86_BUILTIN_CRC32HI,
17786 IX86_BUILTIN_CRC32SI,
17787 IX86_BUILTIN_CRC32DI,
17788
06f4e35d
L
17789 IX86_BUILTIN_PCMPESTRI128,
17790 IX86_BUILTIN_PCMPESTRM128,
17791 IX86_BUILTIN_PCMPESTRA128,
17792 IX86_BUILTIN_PCMPESTRC128,
17793 IX86_BUILTIN_PCMPESTRO128,
17794 IX86_BUILTIN_PCMPESTRS128,
17795 IX86_BUILTIN_PCMPESTRZ128,
17796 IX86_BUILTIN_PCMPISTRI128,
17797 IX86_BUILTIN_PCMPISTRM128,
17798 IX86_BUILTIN_PCMPISTRA128,
17799 IX86_BUILTIN_PCMPISTRC128,
17800 IX86_BUILTIN_PCMPISTRO128,
17801 IX86_BUILTIN_PCMPISTRS128,
17802 IX86_BUILTIN_PCMPISTRZ128,
17803
3b8dd071
L
17804 IX86_BUILTIN_PCMPGTQ,
17805
8b96a312
L
17806 /* AES instructions */
17807 IX86_BUILTIN_AESENC128,
17808 IX86_BUILTIN_AESENCLAST128,
17809 IX86_BUILTIN_AESDEC128,
17810 IX86_BUILTIN_AESDECLAST128,
17811 IX86_BUILTIN_AESIMC128,
17812 IX86_BUILTIN_AESKEYGENASSIST128,
17813
17814 /* PCLMUL instruction */
17815 IX86_BUILTIN_PCLMULQDQ128,
17816
edc5bbcd
UB
17817 /* TFmode support builtins. */
17818 IX86_BUILTIN_INFQ,
17819 IX86_BUILTIN_FABSQ,
17820 IX86_BUILTIN_COPYSIGNQ,
17821
04e1d06b
MM
17822 /* SSE5 instructions */
17823 IX86_BUILTIN_FMADDSS,
17824 IX86_BUILTIN_FMADDSD,
17825 IX86_BUILTIN_FMADDPS,
17826 IX86_BUILTIN_FMADDPD,
17827 IX86_BUILTIN_FMSUBSS,
17828 IX86_BUILTIN_FMSUBSD,
17829 IX86_BUILTIN_FMSUBPS,
17830 IX86_BUILTIN_FMSUBPD,
17831 IX86_BUILTIN_FNMADDSS,
17832 IX86_BUILTIN_FNMADDSD,
17833 IX86_BUILTIN_FNMADDPS,
17834 IX86_BUILTIN_FNMADDPD,
17835 IX86_BUILTIN_FNMSUBSS,
17836 IX86_BUILTIN_FNMSUBSD,
17837 IX86_BUILTIN_FNMSUBPS,
17838 IX86_BUILTIN_FNMSUBPD,
17839 IX86_BUILTIN_PCMOV_V2DI,
17840 IX86_BUILTIN_PCMOV_V4SI,
17841 IX86_BUILTIN_PCMOV_V8HI,
17842 IX86_BUILTIN_PCMOV_V16QI,
17843 IX86_BUILTIN_PCMOV_V4SF,
17844 IX86_BUILTIN_PCMOV_V2DF,
17845 IX86_BUILTIN_PPERM,
17846 IX86_BUILTIN_PERMPS,
17847 IX86_BUILTIN_PERMPD,
17848 IX86_BUILTIN_PMACSSWW,
17849 IX86_BUILTIN_PMACSWW,
17850 IX86_BUILTIN_PMACSSWD,
17851 IX86_BUILTIN_PMACSWD,
17852 IX86_BUILTIN_PMACSSDD,
17853 IX86_BUILTIN_PMACSDD,
17854 IX86_BUILTIN_PMACSSDQL,
17855 IX86_BUILTIN_PMACSSDQH,
17856 IX86_BUILTIN_PMACSDQL,
17857 IX86_BUILTIN_PMACSDQH,
17858 IX86_BUILTIN_PMADCSSWD,
17859 IX86_BUILTIN_PMADCSWD,
17860 IX86_BUILTIN_PHADDBW,
17861 IX86_BUILTIN_PHADDBD,
17862 IX86_BUILTIN_PHADDBQ,
17863 IX86_BUILTIN_PHADDWD,
17864 IX86_BUILTIN_PHADDWQ,
17865 IX86_BUILTIN_PHADDDQ,
17866 IX86_BUILTIN_PHADDUBW,
17867 IX86_BUILTIN_PHADDUBD,
17868 IX86_BUILTIN_PHADDUBQ,
17869 IX86_BUILTIN_PHADDUWD,
17870 IX86_BUILTIN_PHADDUWQ,
17871 IX86_BUILTIN_PHADDUDQ,
17872 IX86_BUILTIN_PHSUBBW,
17873 IX86_BUILTIN_PHSUBWD,
17874 IX86_BUILTIN_PHSUBDQ,
17875 IX86_BUILTIN_PROTB,
17876 IX86_BUILTIN_PROTW,
17877 IX86_BUILTIN_PROTD,
17878 IX86_BUILTIN_PROTQ,
17879 IX86_BUILTIN_PROTB_IMM,
17880 IX86_BUILTIN_PROTW_IMM,
17881 IX86_BUILTIN_PROTD_IMM,
17882 IX86_BUILTIN_PROTQ_IMM,
17883 IX86_BUILTIN_PSHLB,
17884 IX86_BUILTIN_PSHLW,
17885 IX86_BUILTIN_PSHLD,
17886 IX86_BUILTIN_PSHLQ,
17887 IX86_BUILTIN_PSHAB,
17888 IX86_BUILTIN_PSHAW,
17889 IX86_BUILTIN_PSHAD,
17890 IX86_BUILTIN_PSHAQ,
17891 IX86_BUILTIN_FRCZSS,
17892 IX86_BUILTIN_FRCZSD,
17893 IX86_BUILTIN_FRCZPS,
17894 IX86_BUILTIN_FRCZPD,
17895 IX86_BUILTIN_CVTPH2PS,
17896 IX86_BUILTIN_CVTPS2PH,
17897
17898 IX86_BUILTIN_COMEQSS,
17899 IX86_BUILTIN_COMNESS,
17900 IX86_BUILTIN_COMLTSS,
17901 IX86_BUILTIN_COMLESS,
17902 IX86_BUILTIN_COMGTSS,
17903 IX86_BUILTIN_COMGESS,
17904 IX86_BUILTIN_COMUEQSS,
17905 IX86_BUILTIN_COMUNESS,
17906 IX86_BUILTIN_COMULTSS,
17907 IX86_BUILTIN_COMULESS,
17908 IX86_BUILTIN_COMUGTSS,
17909 IX86_BUILTIN_COMUGESS,
17910 IX86_BUILTIN_COMORDSS,
17911 IX86_BUILTIN_COMUNORDSS,
17912 IX86_BUILTIN_COMFALSESS,
17913 IX86_BUILTIN_COMTRUESS,
17914
17915 IX86_BUILTIN_COMEQSD,
17916 IX86_BUILTIN_COMNESD,
17917 IX86_BUILTIN_COMLTSD,
17918 IX86_BUILTIN_COMLESD,
17919 IX86_BUILTIN_COMGTSD,
17920 IX86_BUILTIN_COMGESD,
17921 IX86_BUILTIN_COMUEQSD,
17922 IX86_BUILTIN_COMUNESD,
17923 IX86_BUILTIN_COMULTSD,
17924 IX86_BUILTIN_COMULESD,
17925 IX86_BUILTIN_COMUGTSD,
17926 IX86_BUILTIN_COMUGESD,
17927 IX86_BUILTIN_COMORDSD,
17928 IX86_BUILTIN_COMUNORDSD,
17929 IX86_BUILTIN_COMFALSESD,
17930 IX86_BUILTIN_COMTRUESD,
17931
17932 IX86_BUILTIN_COMEQPS,
17933 IX86_BUILTIN_COMNEPS,
17934 IX86_BUILTIN_COMLTPS,
17935 IX86_BUILTIN_COMLEPS,
17936 IX86_BUILTIN_COMGTPS,
17937 IX86_BUILTIN_COMGEPS,
17938 IX86_BUILTIN_COMUEQPS,
17939 IX86_BUILTIN_COMUNEPS,
17940 IX86_BUILTIN_COMULTPS,
17941 IX86_BUILTIN_COMULEPS,
17942 IX86_BUILTIN_COMUGTPS,
17943 IX86_BUILTIN_COMUGEPS,
17944 IX86_BUILTIN_COMORDPS,
17945 IX86_BUILTIN_COMUNORDPS,
17946 IX86_BUILTIN_COMFALSEPS,
17947 IX86_BUILTIN_COMTRUEPS,
17948
17949 IX86_BUILTIN_COMEQPD,
17950 IX86_BUILTIN_COMNEPD,
17951 IX86_BUILTIN_COMLTPD,
17952 IX86_BUILTIN_COMLEPD,
17953 IX86_BUILTIN_COMGTPD,
17954 IX86_BUILTIN_COMGEPD,
17955 IX86_BUILTIN_COMUEQPD,
17956 IX86_BUILTIN_COMUNEPD,
17957 IX86_BUILTIN_COMULTPD,
17958 IX86_BUILTIN_COMULEPD,
17959 IX86_BUILTIN_COMUGTPD,
17960 IX86_BUILTIN_COMUGEPD,
17961 IX86_BUILTIN_COMORDPD,
17962 IX86_BUILTIN_COMUNORDPD,
17963 IX86_BUILTIN_COMFALSEPD,
17964 IX86_BUILTIN_COMTRUEPD,
17965
17966 IX86_BUILTIN_PCOMEQUB,
17967 IX86_BUILTIN_PCOMNEUB,
17968 IX86_BUILTIN_PCOMLTUB,
17969 IX86_BUILTIN_PCOMLEUB,
17970 IX86_BUILTIN_PCOMGTUB,
17971 IX86_BUILTIN_PCOMGEUB,
17972 IX86_BUILTIN_PCOMFALSEUB,
17973 IX86_BUILTIN_PCOMTRUEUB,
17974 IX86_BUILTIN_PCOMEQUW,
17975 IX86_BUILTIN_PCOMNEUW,
17976 IX86_BUILTIN_PCOMLTUW,
17977 IX86_BUILTIN_PCOMLEUW,
17978 IX86_BUILTIN_PCOMGTUW,
17979 IX86_BUILTIN_PCOMGEUW,
17980 IX86_BUILTIN_PCOMFALSEUW,
17981 IX86_BUILTIN_PCOMTRUEUW,
17982 IX86_BUILTIN_PCOMEQUD,
17983 IX86_BUILTIN_PCOMNEUD,
17984 IX86_BUILTIN_PCOMLTUD,
17985 IX86_BUILTIN_PCOMLEUD,
17986 IX86_BUILTIN_PCOMGTUD,
17987 IX86_BUILTIN_PCOMGEUD,
17988 IX86_BUILTIN_PCOMFALSEUD,
17989 IX86_BUILTIN_PCOMTRUEUD,
17990 IX86_BUILTIN_PCOMEQUQ,
17991 IX86_BUILTIN_PCOMNEUQ,
17992 IX86_BUILTIN_PCOMLTUQ,
17993 IX86_BUILTIN_PCOMLEUQ,
17994 IX86_BUILTIN_PCOMGTUQ,
17995 IX86_BUILTIN_PCOMGEUQ,
17996 IX86_BUILTIN_PCOMFALSEUQ,
17997 IX86_BUILTIN_PCOMTRUEUQ,
17998
17999 IX86_BUILTIN_PCOMEQB,
18000 IX86_BUILTIN_PCOMNEB,
18001 IX86_BUILTIN_PCOMLTB,
18002 IX86_BUILTIN_PCOMLEB,
18003 IX86_BUILTIN_PCOMGTB,
18004 IX86_BUILTIN_PCOMGEB,
18005 IX86_BUILTIN_PCOMFALSEB,
18006 IX86_BUILTIN_PCOMTRUEB,
18007 IX86_BUILTIN_PCOMEQW,
18008 IX86_BUILTIN_PCOMNEW,
18009 IX86_BUILTIN_PCOMLTW,
18010 IX86_BUILTIN_PCOMLEW,
18011 IX86_BUILTIN_PCOMGTW,
18012 IX86_BUILTIN_PCOMGEW,
18013 IX86_BUILTIN_PCOMFALSEW,
18014 IX86_BUILTIN_PCOMTRUEW,
18015 IX86_BUILTIN_PCOMEQD,
18016 IX86_BUILTIN_PCOMNED,
18017 IX86_BUILTIN_PCOMLTD,
18018 IX86_BUILTIN_PCOMLED,
18019 IX86_BUILTIN_PCOMGTD,
18020 IX86_BUILTIN_PCOMGED,
18021 IX86_BUILTIN_PCOMFALSED,
18022 IX86_BUILTIN_PCOMTRUED,
18023 IX86_BUILTIN_PCOMEQQ,
18024 IX86_BUILTIN_PCOMNEQ,
18025 IX86_BUILTIN_PCOMLTQ,
18026 IX86_BUILTIN_PCOMLEQ,
18027 IX86_BUILTIN_PCOMGTQ,
18028 IX86_BUILTIN_PCOMGEQ,
18029 IX86_BUILTIN_PCOMFALSEQ,
18030 IX86_BUILTIN_PCOMTRUEQ,
18031
eb701deb
RH
18032 IX86_BUILTIN_MAX
18033};
18034
4f7d8547
RG
18035/* Table for the ix86 builtin decls. */
18036static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
18037
110abdbc 18038/* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
4f7d8547
RG
18039 * if the target_flags include one of MASK. Stores the function decl
18040 * in the ix86_builtins array.
18041 * Returns the function decl or NULL_TREE, if the builtin was not added. */
18042
18043static inline tree
18044def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
18045{
18046 tree decl = NULL_TREE;
18047
0a1c5e55 18048 if (mask & ix86_isa_flags
853a33f3 18049 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
4f7d8547
RG
18050 {
18051 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
18052 NULL, NULL_TREE);
18053 ix86_builtins[(int) code] = decl;
18054 }
18055
18056 return decl;
18057}
18058
18059/* Like def_builtin, but also marks the function decl "const". */
18060
18061static inline tree
18062def_builtin_const (int mask, const char *name, tree type,
18063 enum ix86_builtins code)
18064{
18065 tree decl = def_builtin (mask, name, type, code);
18066 if (decl)
18067 TREE_READONLY (decl) = 1;
18068 return decl;
18069}
bd793c65 18070
e358acde
RH
18071/* Bits for builtin_description.flag. */
18072
18073/* Set when we don't support the comparison natively, and should
18074 swap_comparison in order to support it. */
18075#define BUILTIN_DESC_SWAP_OPERANDS 1
18076
bd793c65
BS
18077struct builtin_description
18078{
8b60264b
KG
18079 const unsigned int mask;
18080 const enum insn_code icode;
18081 const char *const name;
18082 const enum ix86_builtins code;
18083 const enum rtx_code comparison;
06f4e35d 18084 const int flag;
bd793c65
BS
18085};
18086
8b60264b 18087static const struct builtin_description bdesc_comi[] =
bd793c65 18088{
853a33f3
UB
18089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
18090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
18091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
18092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
18093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
18094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
18095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
18096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
18097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
18098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
18099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
18100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
18101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
18102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
18103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
18104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
18105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
18106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
18107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
18108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
18109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
18110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
18111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
18112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
18113};
18114
06f4e35d
L
18115static const struct builtin_description bdesc_pcmpestr[] =
18116{
18117 /* SSE4.2 */
9415ab7d
TN
18118 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
18121 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
18122 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18123 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18124 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
06f4e35d
L
18125};
18126
18127static const struct builtin_description bdesc_pcmpistr[] =
18128{
18129 /* SSE4.2 */
9415ab7d
TN
18130 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18133 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18134 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18135 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18136 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
06f4e35d
L
18137};
18138
bb1418c1
L
18139/* Special builtin types */
18140enum ix86_special_builtin_type
18141{
18142 SPECIAL_FTYPE_UNKNOWN,
18143 VOID_FTYPE_VOID,
18144 V16QI_FTYPE_PCCHAR,
18145 V4SF_FTYPE_PCFLOAT,
18146 V2DF_FTYPE_PCDOUBLE,
18147 V4SF_FTYPE_V4SF_PCV2SF,
18148 V2DF_FTYPE_V2DF_PCDOUBLE,
18149 V2DI_FTYPE_PV2DI,
18150 VOID_FTYPE_PV2SF_V4SF,
18151 VOID_FTYPE_PV2DI_V2DI,
18152 VOID_FTYPE_PCHAR_V16QI,
18153 VOID_FTYPE_PFLOAT_V4SF,
18154 VOID_FTYPE_PDOUBLE_V2DF,
18155 VOID_FTYPE_PDI_DI,
18156 VOID_FTYPE_PINT_INT
18157};
18158
f318ff0a
L
18159/* Builtin types */
18160enum ix86_builtin_type
18161{
18162 FTYPE_UNKNOWN,
18163 FLOAT128_FTYPE_FLOAT128,
bb1418c1 18164 FLOAT_FTYPE_FLOAT,
1b667c82 18165 FLOAT128_FTYPE_FLOAT128_FLOAT128,
b282a8d3 18166 INT_FTYPE_V2DI_V2DI_PTEST,
f318ff0a
L
18167 INT64_FTYPE_V4SF,
18168 INT64_FTYPE_V2DF,
18169 INT_FTYPE_V16QI,
18170 INT_FTYPE_V8QI,
18171 INT_FTYPE_V4SF,
18172 INT_FTYPE_V2DF,
18173 V16QI_FTYPE_V16QI,
18174 V8HI_FTYPE_V8HI,
18175 V8HI_FTYPE_V16QI,
18176 V8QI_FTYPE_V8QI,
18177 V4SI_FTYPE_V4SI,
18178 V4SI_FTYPE_V16QI,
f318ff0a 18179 V4SI_FTYPE_V8HI,
3c7ae64a 18180 V4SI_FTYPE_V4SF,
f318ff0a
L
18181 V4SI_FTYPE_V2DF,
18182 V4HI_FTYPE_V4HI,
18183 V4SF_FTYPE_V4SF,
bb1418c1 18184 V4SF_FTYPE_V4SF_VEC_MERGE,
f318ff0a
L
18185 V4SF_FTYPE_V4SI,
18186 V4SF_FTYPE_V2DF,
18187 V2DI_FTYPE_V2DI,
18188 V2DI_FTYPE_V16QI,
18189 V2DI_FTYPE_V8HI,
18190 V2DI_FTYPE_V4SI,
18191 V2DF_FTYPE_V2DF,
bb1418c1 18192 V2DF_FTYPE_V2DF_VEC_MERGE,
f318ff0a
L
18193 V2DF_FTYPE_V4SI,
18194 V2DF_FTYPE_V4SF,
18195 V2DF_FTYPE_V2SI,
18196 V2SI_FTYPE_V2SI,
18197 V2SI_FTYPE_V4SF,
18198 V2SI_FTYPE_V2SF,
18199 V2SI_FTYPE_V2DF,
18200 V2SF_FTYPE_V2SF,
18201 V2SF_FTYPE_V2SI,
1b667c82
L
18202 V16QI_FTYPE_V16QI_V16QI,
18203 V16QI_FTYPE_V8HI_V8HI,
18204 V8QI_FTYPE_V8QI_V8QI,
18205 V8QI_FTYPE_V4HI_V4HI,
18206 V8HI_FTYPE_V8HI_V8HI,
858e5e79 18207 V8HI_FTYPE_V8HI_V8HI_COUNT,
1b667c82
L
18208 V8HI_FTYPE_V16QI_V16QI,
18209 V8HI_FTYPE_V4SI_V4SI,
858e5e79 18210 V8HI_FTYPE_V8HI_SI_COUNT,
1b667c82 18211 V4SI_FTYPE_V4SI_V4SI,
858e5e79 18212 V4SI_FTYPE_V4SI_V4SI_COUNT,
1b667c82
L
18213 V4SI_FTYPE_V8HI_V8HI,
18214 V4SI_FTYPE_V4SF_V4SF,
18215 V4SI_FTYPE_V2DF_V2DF,
858e5e79 18216 V4SI_FTYPE_V4SI_SI_COUNT,
1b667c82 18217 V4HI_FTYPE_V4HI_V4HI,
858e5e79 18218 V4HI_FTYPE_V4HI_V4HI_COUNT,
1b667c82
L
18219 V4HI_FTYPE_V8QI_V8QI,
18220 V4HI_FTYPE_V2SI_V2SI,
858e5e79 18221 V4HI_FTYPE_V4HI_SI_COUNT,
1b667c82
L
18222 V4SF_FTYPE_V4SF_V4SF,
18223 V4SF_FTYPE_V4SF_V4SF_SWAP,
18224 V4SF_FTYPE_V4SF_V2SI,
18225 V4SF_FTYPE_V4SF_V2DF,
18226 V4SF_FTYPE_V4SF_DI,
18227 V4SF_FTYPE_V4SF_SI,
18228 V2DI_FTYPE_V2DI_V2DI,
858e5e79 18229 V2DI_FTYPE_V2DI_V2DI_COUNT,
1b667c82
L
18230 V2DI_FTYPE_V16QI_V16QI,
18231 V2DI_FTYPE_V4SI_V4SI,
bd9513ea 18232 V2DI_FTYPE_V2DI_V16QI,
1b667c82 18233 V2DI_FTYPE_V2DF_V2DF,
858e5e79 18234 V2DI_FTYPE_V2DI_SI_COUNT,
1b667c82 18235 V2SI_FTYPE_V2SI_V2SI,
858e5e79 18236 V2SI_FTYPE_V2SI_V2SI_COUNT,
1b667c82
L
18237 V2SI_FTYPE_V4HI_V4HI,
18238 V2SI_FTYPE_V2SF_V2SF,
858e5e79 18239 V2SI_FTYPE_V2SI_SI_COUNT,
1b667c82
L
18240 V2DF_FTYPE_V2DF_V2DF,
18241 V2DF_FTYPE_V2DF_V2DF_SWAP,
18242 V2DF_FTYPE_V2DF_V4SF,
18243 V2DF_FTYPE_V2DF_DI,
18244 V2DF_FTYPE_V2DF_SI,
18245 V2SF_FTYPE_V2SF_V2SF,
18246 V1DI_FTYPE_V1DI_V1DI,
858e5e79 18247 V1DI_FTYPE_V1DI_V1DI_COUNT,
1b667c82
L
18248 V1DI_FTYPE_V8QI_V8QI,
18249 V1DI_FTYPE_V2SI_V2SI,
858e5e79 18250 V1DI_FTYPE_V1DI_SI_COUNT,
a44acfb9
L
18251 UINT64_FTYPE_UINT64_UINT64,
18252 UINT_FTYPE_UINT_UINT,
18253 UINT_FTYPE_UINT_USHORT,
18254 UINT_FTYPE_UINT_UCHAR,
858e5e79
L
18255 V8HI_FTYPE_V8HI_INT,
18256 V4SI_FTYPE_V4SI_INT,
18257 V4HI_FTYPE_V4HI_INT,
ec2e9a15
L
18258 V4SF_FTYPE_V4SF_INT,
18259 V2DI_FTYPE_V2DI_INT,
a44acfb9 18260 V2DI2TI_FTYPE_V2DI_INT,
ec2e9a15
L
18261 V2DF_FTYPE_V2DF_INT,
18262 V16QI_FTYPE_V16QI_V16QI_V16QI,
18263 V4SF_FTYPE_V4SF_V4SF_V4SF,
18264 V2DF_FTYPE_V2DF_V2DF_V2DF,
18265 V16QI_FTYPE_V16QI_V16QI_INT,
18266 V8HI_FTYPE_V8HI_V8HI_INT,
18267 V4SI_FTYPE_V4SI_V4SI_INT,
18268 V4SF_FTYPE_V4SF_V4SF_INT,
18269 V2DI_FTYPE_V2DI_V2DI_INT,
a44acfb9 18270 V2DI2TI_FTYPE_V2DI_V2DI_INT,
99c25ac1 18271 V1DI2DI_FTYPE_V1DI_V1DI_INT,
858e5e79 18272 V2DF_FTYPE_V2DF_V2DF_INT,
bd9513ea 18273 V2DI_FTYPE_V2DI_UINT_UINT,
99c25ac1 18274 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
ec2e9a15
L
18275};
18276
bb1418c1
L
18277/* Special builtins with variable number of arguments. */
18278static const struct builtin_description bdesc_special_args[] =
18279{
18280 /* MMX */
18281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18282
18283 /* 3DNow! */
18284 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18285
18286 /* SSE */
18287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18290
3a3f9d87
UB
18291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
bb1418c1
L
18293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18295
18296 /* SSE or 3DNow!A */
18297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18298 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18299
18300 /* SSE2 */
18301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18310
3a3f9d87
UB
18311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
bb1418c1
L
18313
18314 /* SSE3 */
18315 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18316
18317 /* SSE4.1 */
18318 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18319
18320 /* SSE4A */
18321 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18322 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18323};
18324
f318ff0a
L
18325/* Builtins with variable number of arguments. */
18326static const struct builtin_description bdesc_args[] =
9a5cee02 18327{
1b667c82
L
18328 /* MMX */
18329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18335
18336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18344
18345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18347
18348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18352
18353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18359
18360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18366
18367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18370
18371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18372
858e5e79
L
18373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18379
18380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18386
18387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18391
f318ff0a
L
18392 /* 3DNow! */
18393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18395 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18396 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18397
1b667c82
L
18398 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18399 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18400 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18401 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18402 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18403 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18404 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18405 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18406 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18407 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18409 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18410 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18411 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18412 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18413
f318ff0a
L
18414 /* 3DNow!A */
18415 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18416 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18417 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18418 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
1b667c82
L
18419 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18420 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
f318ff0a 18421
abe08645 18422 /* SSE */
f318ff0a
L
18423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18425 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18427 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18431 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18434 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18435
ec2e9a15 18436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
abe08645 18437
1b667c82
L
18438 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18439 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18440 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18446
18447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18469
18470 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18471 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18474
18475 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18477 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18478 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18479
18480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
3a3f9d87
UB
18481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
1b667c82
L
18483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18485
18486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18488 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18489
bb1418c1
L
18490 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18491
18492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
858e5e79
L
18495
18496 /* SSE MMX or 3Dnow!A */
1b667c82
L
18497 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18498 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18499 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18500
18501 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18502 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18503 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18504 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18505
18506 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
f318ff0a
L
18507 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18508
858e5e79
L
18509 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18510
abe08645 18511 /* SSE2 */
ec2e9a15 18512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
abe08645 18513
f318ff0a
L
18514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18519
18520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18525
18526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18527
18528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18530 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18531 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18532
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18536
1b667c82
L
18537 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18538 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18539 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18540 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18545
18546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18566
18567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18568 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18571
18572 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18574 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18575 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18576
18577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
3a3f9d87
UB
18578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
1b667c82
L
18580
18581 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18582
1b667c82
L
18583 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18584 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18585 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18586 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18587 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18588 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18589 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18590 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18591
18592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18600
18601 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18602 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18603
18604 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18606 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18607 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18608
18609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18611
18612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18618
18619 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18620 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18621 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18623
18624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18632
18633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18636
18637 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18639
18640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18642
18643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18644
18645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18646 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18649
a44acfb9 18650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
858e5e79
L
18651 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18652 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18653 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18654 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18655 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18656 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18657
a44acfb9 18658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
858e5e79
L
18659 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18660 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18661 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18662 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18663 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18664 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18665
18666 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18667 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18668 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18669 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18670
bb1418c1
L
18671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18674
18675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18676
1b667c82
L
18677 /* SSE2 MMX */
18678 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18679 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18680
f318ff0a
L
18681 /* SSE3 */
18682 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18683 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18684
1b667c82
L
18685 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18686 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18687 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18688 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18689 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18690 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18691
f318ff0a
L
18692 /* SSSE3 */
18693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18699
1b667c82
L
18700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18724
858e5e79 18725 /* SSSE3. */
a44acfb9 18726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
99c25ac1 18727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
858e5e79 18728
9a5cee02 18729 /* SSE4.1 */
ec2e9a15
L
18730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18740
f318ff0a
L
18741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18754
1b667c82
L
18755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18767
ec2e9a15
L
18768 /* SSE4.1 and SSE5 */
18769 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18770 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18771 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18772 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18773
b282a8d3
L
18774 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18775 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18776 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18777
1b667c82
L
18778 /* SSE4.2 */
18779 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
a44acfb9
L
18780 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18781 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18782 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18783 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
1b667c82 18784
bd9513ea
L
18785 /* SSE4A */
18786 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18787 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18788 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18789 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18790
ec2e9a15
L
18791 /* AES */
18792 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
f318ff0a 18793 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
8b96a312 18794
1b667c82
L
18795 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18796 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18797 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18798 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18799
8b96a312 18800 /* PCLMUL */
ec2e9a15 18801 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
f318ff0a
L
18802
18803 /* 64bit */
18804 { OPTION_MASK_ISA_64BIT, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
1b667c82 18805 { OPTION_MASK_ISA_64BIT, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
bd793c65
BS
18806};
18807
04e1d06b
MM
18808/* SSE5 */
18809enum multi_arg_type {
18810 MULTI_ARG_UNKNOWN,
18811 MULTI_ARG_3_SF,
18812 MULTI_ARG_3_DF,
18813 MULTI_ARG_3_DI,
18814 MULTI_ARG_3_SI,
18815 MULTI_ARG_3_SI_DI,
18816 MULTI_ARG_3_HI,
18817 MULTI_ARG_3_HI_SI,
18818 MULTI_ARG_3_QI,
18819 MULTI_ARG_3_PERMPS,
18820 MULTI_ARG_3_PERMPD,
18821 MULTI_ARG_2_SF,
18822 MULTI_ARG_2_DF,
18823 MULTI_ARG_2_DI,
18824 MULTI_ARG_2_SI,
18825 MULTI_ARG_2_HI,
18826 MULTI_ARG_2_QI,
18827 MULTI_ARG_2_DI_IMM,
18828 MULTI_ARG_2_SI_IMM,
18829 MULTI_ARG_2_HI_IMM,
18830 MULTI_ARG_2_QI_IMM,
18831 MULTI_ARG_2_SF_CMP,
18832 MULTI_ARG_2_DF_CMP,
18833 MULTI_ARG_2_DI_CMP,
18834 MULTI_ARG_2_SI_CMP,
18835 MULTI_ARG_2_HI_CMP,
18836 MULTI_ARG_2_QI_CMP,
18837 MULTI_ARG_2_DI_TF,
18838 MULTI_ARG_2_SI_TF,
18839 MULTI_ARG_2_HI_TF,
18840 MULTI_ARG_2_QI_TF,
18841 MULTI_ARG_2_SF_TF,
18842 MULTI_ARG_2_DF_TF,
18843 MULTI_ARG_1_SF,
18844 MULTI_ARG_1_DF,
18845 MULTI_ARG_1_DI,
18846 MULTI_ARG_1_SI,
18847 MULTI_ARG_1_HI,
18848 MULTI_ARG_1_QI,
18849 MULTI_ARG_1_SI_DI,
18850 MULTI_ARG_1_HI_DI,
18851 MULTI_ARG_1_HI_SI,
18852 MULTI_ARG_1_QI_DI,
18853 MULTI_ARG_1_QI_SI,
18854 MULTI_ARG_1_QI_HI,
18855 MULTI_ARG_1_PH2PS,
18856 MULTI_ARG_1_PS2PH
18857};
18858
18859static const struct builtin_description bdesc_multi_arg[] =
18860{
18861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
71d46ca5
MM
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
04e1d06b
MM
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18936
18937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18953
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18970
18971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18987
18988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19004
19005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
19006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
19009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
19010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
19011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
19012
19013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19020
19021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19028
19029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
19033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
19034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
19035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
19036
19037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
19038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
19041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
19042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
19043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
19044
19045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
19046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
19049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
19050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
19051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
19052
19053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
19054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
19057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
19058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
19059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
19060
19061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
19065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
19066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
19067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
19068
19069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
19070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
19071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
19072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
19073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
19074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
19075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
19076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
19077
19078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19086
19087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19095};
19096
f6155fda 19097/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
19098 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
19099 builtins. */
e37af218 19100static void
b96a374d 19101ix86_init_mmx_sse_builtins (void)
bd793c65 19102{
8b60264b 19103 const struct builtin_description * d;
77ebd435 19104 size_t i;
bd793c65 19105
00c8e9f6 19106 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
4a5eab38 19107 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
10a97ae6
UB
19108 tree V1DI_type_node
19109 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
4a5eab38 19110 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
a16da3ae
RH
19111 tree V2DI_type_node
19112 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
4a5eab38
PB
19113 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
19114 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
19115 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
19116 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
00c8e9f6 19117 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
4a5eab38
PB
19118 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
19119
bd793c65 19120 tree pchar_type_node = build_pointer_type (char_type_node);
bb1418c1
L
19121 tree pcchar_type_node
19122 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
bd793c65 19123 tree pfloat_type_node = build_pointer_type (float_type_node);
bb1418c1
L
19124 tree pcfloat_type_node
19125 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19126 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19127 tree pcv2sf_type_node
19128 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
916b60b7 19129 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
19130 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19131
19132 /* Comparisons. */
19133 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
19134 = build_function_type_list (integer_type_node,
19135 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 19136 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
19137 = build_function_type_list (V4SI_type_node,
19138 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 19139 /* MMX/SSE/integer conversions. */
bd793c65 19140 tree int_ftype_v4sf
b4de2f7d
AH
19141 = build_function_type_list (integer_type_node,
19142 V4SF_type_node, NULL_TREE);
453ee231
JH
19143 tree int64_ftype_v4sf
19144 = build_function_type_list (long_long_integer_type_node,
19145 V4SF_type_node, NULL_TREE);
bd793c65 19146 tree int_ftype_v8qi
b4de2f7d 19147 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 19148 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
19149 = build_function_type_list (V4SF_type_node,
19150 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
19151 tree v4sf_ftype_v4sf_int64
19152 = build_function_type_list (V4SF_type_node,
19153 V4SF_type_node, long_long_integer_type_node,
19154 NULL_TREE);
bd793c65 19155 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
19156 = build_function_type_list (V4SF_type_node,
19157 V4SF_type_node, V2SI_type_node, NULL_TREE);
eb701deb 19158
bd793c65
BS
19159 /* Miscellaneous. */
19160 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
19161 = build_function_type_list (V8QI_type_node,
19162 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 19163 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
19164 = build_function_type_list (V4HI_type_node,
19165 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 19166 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
19167 = build_function_type_list (V4SF_type_node,
19168 V4SF_type_node, V4SF_type_node,
19169 integer_type_node, NULL_TREE);
bd793c65 19170 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
19171 = build_function_type_list (V2SI_type_node,
19172 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 19173 tree v4hi_ftype_v4hi_int
b4de2f7d 19174 = build_function_type_list (V4HI_type_node,
e7a60f56 19175 V4HI_type_node, integer_type_node, NULL_TREE);
10a97ae6 19176 tree v2si_ftype_v2si_int
b4de2f7d 19177 = build_function_type_list (V2SI_type_node,
10a97ae6
UB
19178 V2SI_type_node, integer_type_node, NULL_TREE);
19179 tree v1di_ftype_v1di_int
19180 = build_function_type_list (V1DI_type_node,
19181 V1DI_type_node, integer_type_node, NULL_TREE);
19182
bd793c65 19183 tree void_ftype_void
b4de2f7d 19184 = build_function_type (void_type_node, void_list_node);
bd793c65 19185 tree void_ftype_unsigned
b4de2f7d 19186 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
19187 tree void_ftype_unsigned_unsigned
19188 = build_function_type_list (void_type_node, unsigned_type_node,
19189 unsigned_type_node, NULL_TREE);
19190 tree void_ftype_pcvoid_unsigned_unsigned
19191 = build_function_type_list (void_type_node, const_ptr_type_node,
19192 unsigned_type_node, unsigned_type_node,
19193 NULL_TREE);
bd793c65 19194 tree unsigned_ftype_void
b4de2f7d 19195 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 19196 tree v2si_ftype_v4sf
b4de2f7d 19197 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 19198 /* Loads/stores. */
bd793c65 19199 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
19200 = build_function_type_list (void_type_node,
19201 V8QI_type_node, V8QI_type_node,
19202 pchar_type_node, NULL_TREE);
068f5dea
JH
19203 tree v4sf_ftype_pcfloat
19204 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bb1418c1 19205 tree v4sf_ftype_v4sf_pcv2sf
b4de2f7d 19206 = build_function_type_list (V4SF_type_node,
bb1418c1
L
19207 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19208 tree void_ftype_pv2sf_v4sf
b4de2f7d 19209 = build_function_type_list (void_type_node,
bb1418c1 19210 pv2sf_type_node, V4SF_type_node, NULL_TREE);
bd793c65 19211 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
19212 = build_function_type_list (void_type_node,
19213 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 19214 tree void_ftype_pdi_di
b4de2f7d
AH
19215 = build_function_type_list (void_type_node,
19216 pdi_type_node, long_long_unsigned_type_node,
19217 NULL_TREE);
916b60b7 19218 tree void_ftype_pv2di_v2di
b4de2f7d
AH
19219 = build_function_type_list (void_type_node,
19220 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
19221 /* Normal vector unops. */
19222 tree v4sf_ftype_v4sf
b4de2f7d 19223 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
b1875f52
L
19224 tree v16qi_ftype_v16qi
19225 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19226 tree v8hi_ftype_v8hi
19227 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19228 tree v4si_ftype_v4si
19229 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19230 tree v8qi_ftype_v8qi
19231 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19232 tree v4hi_ftype_v4hi
19233 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
0f290768 19234
bd793c65
BS
19235 /* Normal vector binops. */
19236 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
19237 = build_function_type_list (V4SF_type_node,
19238 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 19239 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
19240 = build_function_type_list (V8QI_type_node,
19241 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 19242 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
19243 = build_function_type_list (V4HI_type_node,
19244 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 19245 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
19246 = build_function_type_list (V2SI_type_node,
19247 V2SI_type_node, V2SI_type_node, NULL_TREE);
10a97ae6
UB
19248 tree v1di_ftype_v1di_v1di
19249 = build_function_type_list (V1DI_type_node,
19250 V1DI_type_node, V1DI_type_node, NULL_TREE);
99c25ac1
L
19251 tree v1di_ftype_v1di_v1di_int
19252 = build_function_type_list (V1DI_type_node,
19253 V1DI_type_node, V1DI_type_node,
b1875f52 19254 integer_type_node, NULL_TREE);
47f339cf 19255 tree v2si_ftype_v2sf
ae3aa00d 19256 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 19257 tree v2sf_ftype_v2si
b4de2f7d 19258 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 19259 tree v2si_ftype_v2si
b4de2f7d 19260 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 19261 tree v2sf_ftype_v2sf
b4de2f7d 19262 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 19263 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
19264 = build_function_type_list (V2SF_type_node,
19265 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 19266 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
19267 = build_function_type_list (V2SI_type_node,
19268 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
19269 tree pint_type_node = build_pointer_type (integer_type_node);
19270 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
19271 tree pcdouble_type_node = build_pointer_type (
19272 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 19273 tree int_ftype_v2df_v2df
b4de2f7d
AH
19274 = build_function_type_list (integer_type_node,
19275 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19276
068f5dea
JH
19277 tree void_ftype_pcvoid
19278 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 19279 tree v4sf_ftype_v4si
b4de2f7d 19280 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 19281 tree v4si_ftype_v4sf
b4de2f7d 19282 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 19283 tree v2df_ftype_v4si
b4de2f7d 19284 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 19285 tree v4si_ftype_v2df
b4de2f7d 19286 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
b40c4f68
UB
19287 tree v4si_ftype_v2df_v2df
19288 = build_function_type_list (V4SI_type_node,
19289 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19290 tree v2si_ftype_v2df
b4de2f7d 19291 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19292 tree v4sf_ftype_v2df
b4de2f7d 19293 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19294 tree v2df_ftype_v2si
b4de2f7d 19295 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 19296 tree v2df_ftype_v4sf
b4de2f7d 19297 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 19298 tree int_ftype_v2df
b4de2f7d 19299 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
19300 tree int64_ftype_v2df
19301 = build_function_type_list (long_long_integer_type_node,
b96a374d 19302 V2DF_type_node, NULL_TREE);
fbe5eb6d 19303 tree v2df_ftype_v2df_int
b4de2f7d
AH
19304 = build_function_type_list (V2DF_type_node,
19305 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
19306 tree v2df_ftype_v2df_int64
19307 = build_function_type_list (V2DF_type_node,
19308 V2DF_type_node, long_long_integer_type_node,
19309 NULL_TREE);
fbe5eb6d 19310 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
19311 = build_function_type_list (V4SF_type_node,
19312 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19313 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
19314 = build_function_type_list (V2DF_type_node,
19315 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 19316 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
19317 = build_function_type_list (V2DF_type_node,
19318 V2DF_type_node, V2DF_type_node,
19319 integer_type_node,
19320 NULL_TREE);
1c47af84 19321 tree v2df_ftype_v2df_pcdouble
b4de2f7d 19322 = build_function_type_list (V2DF_type_node,
1c47af84 19323 V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 19324 tree void_ftype_pdouble_v2df
b4de2f7d
AH
19325 = build_function_type_list (void_type_node,
19326 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19327 tree void_ftype_pint_int
b4de2f7d
AH
19328 = build_function_type_list (void_type_node,
19329 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 19330 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
19331 = build_function_type_list (void_type_node,
19332 V16QI_type_node, V16QI_type_node,
19333 pchar_type_node, NULL_TREE);
068f5dea
JH
19334 tree v2df_ftype_pcdouble
19335 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 19336 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
19337 = build_function_type_list (V2DF_type_node,
19338 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19339 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
19340 = build_function_type_list (V16QI_type_node,
19341 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 19342 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
19343 = build_function_type_list (V8HI_type_node,
19344 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 19345 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
19346 = build_function_type_list (V4SI_type_node,
19347 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 19348 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
19349 = build_function_type_list (V2DI_type_node,
19350 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 19351 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
19352 = build_function_type_list (V2DI_type_node,
19353 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 19354 tree v2df_ftype_v2df
b4de2f7d 19355 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
916b60b7 19356 tree v2di_ftype_v2di_int
b4de2f7d
AH
19357 = build_function_type_list (V2DI_type_node,
19358 V2DI_type_node, integer_type_node, NULL_TREE);
b1875f52
L
19359 tree v2di_ftype_v2di_v2di_int
19360 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19361 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 19362 tree v4si_ftype_v4si_int
b4de2f7d
AH
19363 = build_function_type_list (V4SI_type_node,
19364 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 19365 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
19366 = build_function_type_list (V8HI_type_node,
19367 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 19368 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
19369 = build_function_type_list (V4SI_type_node,
19370 V8HI_type_node, V8HI_type_node, NULL_TREE);
ab555a5b
UB
19371 tree v1di_ftype_v8qi_v8qi
19372 = build_function_type_list (V1DI_type_node,
b4de2f7d 19373 V8QI_type_node, V8QI_type_node, NULL_TREE);
ab555a5b
UB
19374 tree v1di_ftype_v2si_v2si
19375 = build_function_type_list (V1DI_type_node,
9e9fb0ce 19376 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 19377 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
19378 = build_function_type_list (V2DI_type_node,
19379 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
19380 tree v2di_ftype_v4si_v4si
19381 = build_function_type_list (V2DI_type_node,
19382 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 19383 tree int_ftype_v16qi
b4de2f7d 19384 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
19385 tree v16qi_ftype_pcchar
19386 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
19387 tree void_ftype_pchar_v16qi
19388 = build_function_type_list (void_type_node,
19389 pchar_type_node, V16QI_type_node, NULL_TREE);
47f339cf 19390
21efb4d4
HJ
19391 tree v2di_ftype_v2di_unsigned_unsigned
19392 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19393 unsigned_type_node, unsigned_type_node,
19394 NULL_TREE);
19395 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19396 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19397 unsigned_type_node, unsigned_type_node,
19398 NULL_TREE);
19399 tree v2di_ftype_v2di_v16qi
19400 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19401 NULL_TREE);
9a5cee02
L
19402 tree v2df_ftype_v2df_v2df_v2df
19403 = build_function_type_list (V2DF_type_node,
19404 V2DF_type_node, V2DF_type_node,
19405 V2DF_type_node, NULL_TREE);
19406 tree v4sf_ftype_v4sf_v4sf_v4sf
19407 = build_function_type_list (V4SF_type_node,
19408 V4SF_type_node, V4SF_type_node,
19409 V4SF_type_node, NULL_TREE);
19410 tree v8hi_ftype_v16qi
19411 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19412 NULL_TREE);
19413 tree v4si_ftype_v16qi
19414 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19415 NULL_TREE);
19416 tree v2di_ftype_v16qi
19417 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19418 NULL_TREE);
19419 tree v4si_ftype_v8hi
19420 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19421 NULL_TREE);
19422 tree v2di_ftype_v8hi
19423 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19424 NULL_TREE);
19425 tree v2di_ftype_v4si
19426 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19427 NULL_TREE);
19428 tree v2di_ftype_pv2di
19429 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19430 NULL_TREE);
19431 tree v16qi_ftype_v16qi_v16qi_int
19432 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19433 V16QI_type_node, integer_type_node,
19434 NULL_TREE);
19435 tree v16qi_ftype_v16qi_v16qi_v16qi
19436 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19437 V16QI_type_node, V16QI_type_node,
19438 NULL_TREE);
19439 tree v8hi_ftype_v8hi_v8hi_int
19440 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19441 V8HI_type_node, integer_type_node,
19442 NULL_TREE);
19443 tree v4si_ftype_v4si_v4si_int
19444 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19445 V4SI_type_node, integer_type_node,
19446 NULL_TREE);
19447 tree int_ftype_v2di_v2di
19448 = build_function_type_list (integer_type_node,
19449 V2DI_type_node, V2DI_type_node,
19450 NULL_TREE);
06f4e35d
L
19451 tree int_ftype_v16qi_int_v16qi_int_int
19452 = build_function_type_list (integer_type_node,
19453 V16QI_type_node,
19454 integer_type_node,
19455 V16QI_type_node,
19456 integer_type_node,
19457 integer_type_node,
19458 NULL_TREE);
19459 tree v16qi_ftype_v16qi_int_v16qi_int_int
19460 = build_function_type_list (V16QI_type_node,
19461 V16QI_type_node,
19462 integer_type_node,
19463 V16QI_type_node,
19464 integer_type_node,
19465 integer_type_node,
19466 NULL_TREE);
19467 tree int_ftype_v16qi_v16qi_int
19468 = build_function_type_list (integer_type_node,
19469 V16QI_type_node,
19470 V16QI_type_node,
19471 integer_type_node,
19472 NULL_TREE);
04e1d06b
MM
19473
19474 /* SSE5 instructions */
19475 tree v2di_ftype_v2di_v2di_v2di
19476 = build_function_type_list (V2DI_type_node,
19477 V2DI_type_node,
19478 V2DI_type_node,
19479 V2DI_type_node,
19480 NULL_TREE);
19481
19482 tree v4si_ftype_v4si_v4si_v4si
19483 = build_function_type_list (V4SI_type_node,
19484 V4SI_type_node,
19485 V4SI_type_node,
19486 V4SI_type_node,
19487 NULL_TREE);
19488
19489 tree v4si_ftype_v4si_v4si_v2di
19490 = build_function_type_list (V4SI_type_node,
19491 V4SI_type_node,
19492 V4SI_type_node,
19493 V2DI_type_node,
19494 NULL_TREE);
19495
19496 tree v8hi_ftype_v8hi_v8hi_v8hi
19497 = build_function_type_list (V8HI_type_node,
19498 V8HI_type_node,
19499 V8HI_type_node,
19500 V8HI_type_node,
19501 NULL_TREE);
19502
19503 tree v8hi_ftype_v8hi_v8hi_v4si
19504 = build_function_type_list (V8HI_type_node,
19505 V8HI_type_node,
19506 V8HI_type_node,
19507 V4SI_type_node,
19508 NULL_TREE);
19509
19510 tree v2df_ftype_v2df_v2df_v16qi
19511 = build_function_type_list (V2DF_type_node,
19512 V2DF_type_node,
19513 V2DF_type_node,
19514 V16QI_type_node,
19515 NULL_TREE);
19516
19517 tree v4sf_ftype_v4sf_v4sf_v16qi
19518 = build_function_type_list (V4SF_type_node,
19519 V4SF_type_node,
19520 V4SF_type_node,
19521 V16QI_type_node,
19522 NULL_TREE);
19523
19524 tree v2di_ftype_v2di_si
19525 = build_function_type_list (V2DI_type_node,
19526 V2DI_type_node,
19527 integer_type_node,
19528 NULL_TREE);
19529
19530 tree v4si_ftype_v4si_si
19531 = build_function_type_list (V4SI_type_node,
19532 V4SI_type_node,
19533 integer_type_node,
19534 NULL_TREE);
19535
19536 tree v8hi_ftype_v8hi_si
19537 = build_function_type_list (V8HI_type_node,
19538 V8HI_type_node,
19539 integer_type_node,
19540 NULL_TREE);
19541
19542 tree v16qi_ftype_v16qi_si
19543 = build_function_type_list (V16QI_type_node,
19544 V16QI_type_node,
19545 integer_type_node,
19546 NULL_TREE);
19547 tree v4sf_ftype_v4hi
19548 = build_function_type_list (V4SF_type_node,
19549 V4HI_type_node,
19550 NULL_TREE);
19551
19552 tree v4hi_ftype_v4sf
19553 = build_function_type_list (V4HI_type_node,
19554 V4SF_type_node,
19555 NULL_TREE);
19556
19557 tree v2di_ftype_v2di
19558 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19559
1b667c82
L
19560 tree v16qi_ftype_v8hi_v8hi
19561 = build_function_type_list (V16QI_type_node,
19562 V8HI_type_node, V8HI_type_node,
19563 NULL_TREE);
19564 tree v8hi_ftype_v4si_v4si
19565 = build_function_type_list (V8HI_type_node,
19566 V4SI_type_node, V4SI_type_node,
19567 NULL_TREE);
19568 tree v8hi_ftype_v16qi_v16qi
19569 = build_function_type_list (V8HI_type_node,
19570 V16QI_type_node, V16QI_type_node,
19571 NULL_TREE);
19572 tree v4hi_ftype_v8qi_v8qi
19573 = build_function_type_list (V4HI_type_node,
19574 V8QI_type_node, V8QI_type_node,
19575 NULL_TREE);
a44acfb9
L
19576 tree unsigned_ftype_unsigned_uchar
19577 = build_function_type_list (unsigned_type_node,
19578 unsigned_type_node,
19579 unsigned_char_type_node,
19580 NULL_TREE);
19581 tree unsigned_ftype_unsigned_ushort
19582 = build_function_type_list (unsigned_type_node,
19583 unsigned_type_node,
19584 short_unsigned_type_node,
19585 NULL_TREE);
19586 tree unsigned_ftype_unsigned_unsigned
19587 = build_function_type_list (unsigned_type_node,
19588 unsigned_type_node,
19589 unsigned_type_node,
19590 NULL_TREE);
19591 tree uint64_ftype_uint64_uint64
19592 = build_function_type_list (long_long_unsigned_type_node,
19593 long_long_unsigned_type_node,
19594 long_long_unsigned_type_node,
19595 NULL_TREE);
bb1418c1
L
19596 tree float_ftype_float
19597 = build_function_type_list (float_type_node,
19598 float_type_node,
19599 NULL_TREE);
1b667c82 19600
eb701deb 19601 tree ftype;
f8a1ebc6
JH
19602
19603 /* The __float80 type. */
19604 if (TYPE_MODE (long_double_type_node) == XFmode)
19605 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19606 "__float80");
19607 else
19608 {
19609 /* The __float80 type. */
edc5bbcd
UB
19610 tree float80_type_node = make_node (REAL_TYPE);
19611
19612 TYPE_PRECISION (float80_type_node) = 80;
19613 layout_type (float80_type_node);
19614 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19615 "__float80");
f8a1ebc6
JH
19616 }
19617
f749178d
JH
19618 if (TARGET_64BIT)
19619 {
edc5bbcd
UB
19620 tree float128_type_node = make_node (REAL_TYPE);
19621
19622 TYPE_PRECISION (float128_type_node) = 128;
19623 layout_type (float128_type_node);
19624 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19625 "__float128");
19626
19627 /* TFmode support builtins. */
19628 ftype = build_function_type (float128_type_node,
19629 void_list_node);
9c32f507 19630 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
edc5bbcd
UB
19631
19632 ftype = build_function_type_list (float128_type_node,
19633 float128_type_node,
19634 NULL_TREE);
e41ef486 19635 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
edc5bbcd
UB
19636
19637 ftype = build_function_type_list (float128_type_node,
19638 float128_type_node,
19639 float128_type_node,
19640 NULL_TREE);
e41ef486 19641 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
f749178d 19642 }
f8a1ebc6 19643
bb1418c1
L
19644 /* Add all special builtins with variable number of operands. */
19645 for (i = 0, d = bdesc_special_args;
19646 i < ARRAY_SIZE (bdesc_special_args);
19647 i++, d++)
19648 {
19649 tree type;
19650
19651 if (d->name == 0)
19652 continue;
19653
19654 switch ((enum ix86_special_builtin_type) d->flag)
19655 {
19656 case VOID_FTYPE_VOID:
19657 type = void_ftype_void;
19658 break;
19659 case V16QI_FTYPE_PCCHAR:
19660 type = v16qi_ftype_pcchar;
19661 break;
19662 case V4SF_FTYPE_PCFLOAT:
19663 type = v4sf_ftype_pcfloat;
19664 break;
19665 case V2DI_FTYPE_PV2DI:
19666 type = v2di_ftype_pv2di;
19667 break;
19668 case V2DF_FTYPE_PCDOUBLE:
19669 type = v2df_ftype_pcdouble;
19670 break;
19671 case V4SF_FTYPE_V4SF_PCV2SF:
19672 type = v4sf_ftype_v4sf_pcv2sf;
19673 break;
19674 case V2DF_FTYPE_V2DF_PCDOUBLE:
19675 type = v2df_ftype_v2df_pcdouble;
19676 break;
19677 case VOID_FTYPE_PV2SF_V4SF:
19678 type = void_ftype_pv2sf_v4sf;
19679 break;
19680 case VOID_FTYPE_PV2DI_V2DI:
19681 type = void_ftype_pv2di_v2di;
19682 break;
19683 case VOID_FTYPE_PCHAR_V16QI:
19684 type = void_ftype_pchar_v16qi;
19685 break;
19686 case VOID_FTYPE_PFLOAT_V4SF:
19687 type = void_ftype_pfloat_v4sf;
19688 break;
19689 case VOID_FTYPE_PDOUBLE_V2DF:
19690 type = void_ftype_pdouble_v2df;
19691 break;
19692 case VOID_FTYPE_PDI_DI:
19693 type = void_ftype_pdi_di;
19694 break;
19695 case VOID_FTYPE_PINT_INT:
19696 type = void_ftype_pint_int;
19697 break;
19698 default:
19699 gcc_unreachable ();
19700 }
19701
19702 def_builtin (d->mask, d->name, type, d->code);
19703 }
19704
f318ff0a
L
19705 /* Add all builtins with variable number of operands. */
19706 for (i = 0, d = bdesc_args;
19707 i < ARRAY_SIZE (bdesc_args);
9a5cee02
L
19708 i++, d++)
19709 {
9a5cee02
L
19710 tree type;
19711
19712 if (d->name == 0)
19713 continue;
9a5cee02 19714
f318ff0a 19715 switch ((enum ix86_builtin_type) d->flag)
9a5cee02 19716 {
bb1418c1
L
19717 case FLOAT_FTYPE_FLOAT:
19718 type = float_ftype_float;
19719 break;
b282a8d3
L
19720 case INT_FTYPE_V2DI_V2DI_PTEST:
19721 type = int_ftype_v2di_v2di;
19722 break;
f318ff0a
L
19723 case INT64_FTYPE_V4SF:
19724 type = int64_ftype_v4sf;
19725 break;
19726 case INT64_FTYPE_V2DF:
19727 type = int64_ftype_v2df;
19728 break;
19729 case INT_FTYPE_V16QI:
19730 type = int_ftype_v16qi;
19731 break;
19732 case INT_FTYPE_V8QI:
19733 type = int_ftype_v8qi;
19734 break;
19735 case INT_FTYPE_V4SF:
19736 type = int_ftype_v4sf;
19737 break;
19738 case INT_FTYPE_V2DF:
19739 type = int_ftype_v2df;
19740 break;
19741 case V16QI_FTYPE_V16QI:
19742 type = v16qi_ftype_v16qi;
19743 break;
19744 case V8HI_FTYPE_V8HI:
19745 type = v8hi_ftype_v8hi;
19746 break;
19747 case V8HI_FTYPE_V16QI:
19748 type = v8hi_ftype_v16qi;
19749 break;
19750 case V8QI_FTYPE_V8QI:
19751 type = v8qi_ftype_v8qi;
19752 break;
19753 case V4SI_FTYPE_V4SI:
19754 type = v4si_ftype_v4si;
19755 break;
19756 case V4SI_FTYPE_V16QI:
19757 type = v4si_ftype_v16qi;
19758 break;
19759 case V4SI_FTYPE_V8HI:
19760 type = v4si_ftype_v8hi;
19761 break;
f318ff0a
L
19762 case V4SI_FTYPE_V4SF:
19763 type = v4si_ftype_v4sf;
19764 break;
19765 case V4SI_FTYPE_V2DF:
19766 type = v4si_ftype_v2df;
19767 break;
3c7ae64a
L
19768 case V4HI_FTYPE_V4HI:
19769 type = v4hi_ftype_v4hi;
19770 break;
f318ff0a 19771 case V4SF_FTYPE_V4SF:
bb1418c1 19772 case V4SF_FTYPE_V4SF_VEC_MERGE:
f318ff0a
L
19773 type = v4sf_ftype_v4sf;
19774 break;
19775 case V4SF_FTYPE_V4SI:
19776 type = v4sf_ftype_v4si;
19777 break;
19778 case V4SF_FTYPE_V2DF:
19779 type = v4sf_ftype_v2df;
19780 break;
19781 case V2DI_FTYPE_V2DI:
19782 type = v2di_ftype_v2di;
19783 break;
19784 case V2DI_FTYPE_V16QI:
19785 type = v2di_ftype_v16qi;
19786 break;
19787 case V2DI_FTYPE_V8HI:
19788 type = v2di_ftype_v8hi;
19789 break;
19790 case V2DI_FTYPE_V4SI:
19791 type = v2di_ftype_v4si;
19792 break;
19793 case V2SI_FTYPE_V2SI:
19794 type = v2si_ftype_v2si;
19795 break;
19796 case V2SI_FTYPE_V4SF:
19797 type = v2si_ftype_v4sf;
19798 break;
19799 case V2SI_FTYPE_V2DF:
19800 type = v2si_ftype_v2df;
19801 break;
19802 case V2SI_FTYPE_V2SF:
19803 type = v2si_ftype_v2sf;
19804 break;
19805 case V2DF_FTYPE_V4SF:
19806 type = v2df_ftype_v4sf;
19807 break;
19808 case V2DF_FTYPE_V2DF:
bb1418c1 19809 case V2DF_FTYPE_V2DF_VEC_MERGE:
f318ff0a
L
19810 type = v2df_ftype_v2df;
19811 break;
19812 case V2DF_FTYPE_V2SI:
19813 type = v2df_ftype_v2si;
19814 break;
19815 case V2DF_FTYPE_V4SI:
19816 type = v2df_ftype_v4si;
19817 break;
19818 case V2SF_FTYPE_V2SF:
19819 type = v2sf_ftype_v2sf;
19820 break;
19821 case V2SF_FTYPE_V2SI:
19822 type = v2sf_ftype_v2si;
19823 break;
1b667c82
L
19824 case V16QI_FTYPE_V16QI_V16QI:
19825 type = v16qi_ftype_v16qi_v16qi;
19826 break;
19827 case V16QI_FTYPE_V8HI_V8HI:
19828 type = v16qi_ftype_v8hi_v8hi;
19829 break;
19830 case V8QI_FTYPE_V8QI_V8QI:
19831 type = v8qi_ftype_v8qi_v8qi;
19832 break;
19833 case V8QI_FTYPE_V4HI_V4HI:
19834 type = v8qi_ftype_v4hi_v4hi;
19835 break;
19836 case V8HI_FTYPE_V8HI_V8HI:
858e5e79 19837 case V8HI_FTYPE_V8HI_V8HI_COUNT:
1b667c82
L
19838 type = v8hi_ftype_v8hi_v8hi;
19839 break;
19840 case V8HI_FTYPE_V16QI_V16QI:
19841 type = v8hi_ftype_v16qi_v16qi;
19842 break;
19843 case V8HI_FTYPE_V4SI_V4SI:
19844 type = v8hi_ftype_v4si_v4si;
19845 break;
858e5e79
L
19846 case V8HI_FTYPE_V8HI_SI_COUNT:
19847 type = v8hi_ftype_v8hi_int;
19848 break;
1b667c82 19849 case V4SI_FTYPE_V4SI_V4SI:
858e5e79 19850 case V4SI_FTYPE_V4SI_V4SI_COUNT:
1b667c82
L
19851 type = v4si_ftype_v4si_v4si;
19852 break;
19853 case V4SI_FTYPE_V8HI_V8HI:
19854 type = v4si_ftype_v8hi_v8hi;
19855 break;
19856 case V4SI_FTYPE_V4SF_V4SF:
19857 type = v4si_ftype_v4sf_v4sf;
19858 break;
19859 case V4SI_FTYPE_V2DF_V2DF:
19860 type = v4si_ftype_v2df_v2df;
19861 break;
858e5e79
L
19862 case V4SI_FTYPE_V4SI_SI_COUNT:
19863 type = v4si_ftype_v4si_int;
19864 break;
1b667c82 19865 case V4HI_FTYPE_V4HI_V4HI:
858e5e79 19866 case V4HI_FTYPE_V4HI_V4HI_COUNT:
1b667c82
L
19867 type = v4hi_ftype_v4hi_v4hi;
19868 break;
19869 case V4HI_FTYPE_V8QI_V8QI:
19870 type = v4hi_ftype_v8qi_v8qi;
19871 break;
19872 case V4HI_FTYPE_V2SI_V2SI:
19873 type = v4hi_ftype_v2si_v2si;
19874 break;
858e5e79
L
19875 case V4HI_FTYPE_V4HI_SI_COUNT:
19876 type = v4hi_ftype_v4hi_int;
19877 break;
1b667c82
L
19878 case V4SF_FTYPE_V4SF_V4SF:
19879 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19880 type = v4sf_ftype_v4sf_v4sf;
19881 break;
19882 case V4SF_FTYPE_V4SF_V2SI:
19883 type = v4sf_ftype_v4sf_v2si;
19884 break;
19885 case V4SF_FTYPE_V4SF_V2DF:
19886 type = v4sf_ftype_v4sf_v2df;
19887 break;
19888 case V4SF_FTYPE_V4SF_DI:
19889 type = v4sf_ftype_v4sf_int64;
19890 break;
19891 case V4SF_FTYPE_V4SF_SI:
19892 type = v4sf_ftype_v4sf_int;
19893 break;
19894 case V2DI_FTYPE_V2DI_V2DI:
858e5e79 19895 case V2DI_FTYPE_V2DI_V2DI_COUNT:
1b667c82
L
19896 type = v2di_ftype_v2di_v2di;
19897 break;
19898 case V2DI_FTYPE_V16QI_V16QI:
19899 type = v2di_ftype_v16qi_v16qi;
19900 break;
19901 case V2DI_FTYPE_V4SI_V4SI:
19902 type = v2di_ftype_v4si_v4si;
19903 break;
bd9513ea
L
19904 case V2DI_FTYPE_V2DI_V16QI:
19905 type = v2di_ftype_v2di_v16qi;
19906 break;
1b667c82
L
19907 case V2DI_FTYPE_V2DF_V2DF:
19908 type = v2di_ftype_v2df_v2df;
19909 break;
858e5e79
L
19910 case V2DI_FTYPE_V2DI_SI_COUNT:
19911 type = v2di_ftype_v2di_int;
19912 break;
1b667c82 19913 case V2SI_FTYPE_V2SI_V2SI:
858e5e79 19914 case V2SI_FTYPE_V2SI_V2SI_COUNT:
1b667c82
L
19915 type = v2si_ftype_v2si_v2si;
19916 break;
19917 case V2SI_FTYPE_V4HI_V4HI:
19918 type = v2si_ftype_v4hi_v4hi;
19919 break;
19920 case V2SI_FTYPE_V2SF_V2SF:
19921 type = v2si_ftype_v2sf_v2sf;
19922 break;
858e5e79
L
19923 case V2SI_FTYPE_V2SI_SI_COUNT:
19924 type = v2si_ftype_v2si_int;
19925 break;
1b667c82
L
19926 case V2DF_FTYPE_V2DF_V2DF:
19927 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19928 type = v2df_ftype_v2df_v2df;
19929 break;
19930 case V2DF_FTYPE_V2DF_V4SF:
19931 type = v2df_ftype_v2df_v4sf;
19932 break;
19933 case V2DF_FTYPE_V2DF_DI:
19934 type = v2df_ftype_v2df_int64;
19935 break;
19936 case V2DF_FTYPE_V2DF_SI:
19937 type = v2df_ftype_v2df_int;
19938 break;
19939 case V2SF_FTYPE_V2SF_V2SF:
19940 type = v2sf_ftype_v2sf_v2sf;
19941 break;
19942 case V1DI_FTYPE_V1DI_V1DI:
858e5e79 19943 case V1DI_FTYPE_V1DI_V1DI_COUNT:
1b667c82
L
19944 type = v1di_ftype_v1di_v1di;
19945 break;
19946 case V1DI_FTYPE_V8QI_V8QI:
19947 type = v1di_ftype_v8qi_v8qi;
19948 break;
19949 case V1DI_FTYPE_V2SI_V2SI:
19950 type = v1di_ftype_v2si_v2si;
19951 break;
858e5e79
L
19952 case V1DI_FTYPE_V1DI_SI_COUNT:
19953 type = v1di_ftype_v1di_int;
19954 break;
a44acfb9
L
19955 case UINT64_FTYPE_UINT64_UINT64:
19956 type = uint64_ftype_uint64_uint64;
19957 break;
19958 case UINT_FTYPE_UINT_UINT:
19959 type = unsigned_ftype_unsigned_unsigned;
19960 break;
19961 case UINT_FTYPE_UINT_USHORT:
19962 type = unsigned_ftype_unsigned_ushort;
19963 break;
19964 case UINT_FTYPE_UINT_UCHAR:
19965 type = unsigned_ftype_unsigned_uchar;
19966 break;
858e5e79
L
19967 case V8HI_FTYPE_V8HI_INT:
19968 type = v8hi_ftype_v8hi_int;
19969 break;
19970 case V4SI_FTYPE_V4SI_INT:
19971 type = v4si_ftype_v4si_int;
19972 break;
19973 case V4HI_FTYPE_V4HI_INT:
19974 type = v4hi_ftype_v4hi_int;
19975 break;
ec2e9a15
L
19976 case V4SF_FTYPE_V4SF_INT:
19977 type = v4sf_ftype_v4sf_int;
19978 break;
19979 case V2DI_FTYPE_V2DI_INT:
a44acfb9 19980 case V2DI2TI_FTYPE_V2DI_INT:
ec2e9a15
L
19981 type = v2di_ftype_v2di_int;
19982 break;
19983 case V2DF_FTYPE_V2DF_INT:
19984 type = v2df_ftype_v2df_int;
19985 break;
19986 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19987 type = v16qi_ftype_v16qi_v16qi_v16qi;
19988 break;
19989 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19990 type = v4sf_ftype_v4sf_v4sf_v4sf;
19991 break;
19992 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19993 type = v2df_ftype_v2df_v2df_v2df;
19994 break;
19995 case V16QI_FTYPE_V16QI_V16QI_INT:
9a5cee02
L
19996 type = v16qi_ftype_v16qi_v16qi_int;
19997 break;
ec2e9a15 19998 case V8HI_FTYPE_V8HI_V8HI_INT:
9a5cee02
L
19999 type = v8hi_ftype_v8hi_v8hi_int;
20000 break;
ec2e9a15 20001 case V4SI_FTYPE_V4SI_V4SI_INT:
9a5cee02
L
20002 type = v4si_ftype_v4si_v4si_int;
20003 break;
ec2e9a15
L
20004 case V4SF_FTYPE_V4SF_V4SF_INT:
20005 type = v4sf_ftype_v4sf_v4sf_int;
20006 break;
20007 case V2DI_FTYPE_V2DI_V2DI_INT:
a44acfb9 20008 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
9a5cee02
L
20009 type = v2di_ftype_v2di_v2di_int;
20010 break;
ec2e9a15 20011 case V2DF_FTYPE_V2DF_V2DF_INT:
9a5cee02
L
20012 type = v2df_ftype_v2df_v2df_int;
20013 break;
bd9513ea
L
20014 case V2DI_FTYPE_V2DI_UINT_UINT:
20015 type = v2di_ftype_v2di_unsigned_unsigned;
20016 break;
20017 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20018 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
20019 break;
99c25ac1
L
20020 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20021 type = v1di_ftype_v1di_v1di_int;
858e5e79 20022 break;
9a5cee02
L
20023 default:
20024 gcc_unreachable ();
20025 }
20026
e41ef486 20027 def_builtin_const (d->mask, d->name, type, d->code);
9a5cee02
L
20028 }
20029
06f4e35d
L
20030 /* pcmpestr[im] insns. */
20031 for (i = 0, d = bdesc_pcmpestr;
20032 i < ARRAY_SIZE (bdesc_pcmpestr);
20033 i++, d++)
20034 {
20035 if (d->code == IX86_BUILTIN_PCMPESTRM128)
20036 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20037 else
20038 ftype = int_ftype_v16qi_int_v16qi_int_int;
e41ef486 20039 def_builtin_const (d->mask, d->name, ftype, d->code);
06f4e35d
L
20040 }
20041
20042 /* pcmpistr[im] insns. */
20043 for (i = 0, d = bdesc_pcmpistr;
20044 i < ARRAY_SIZE (bdesc_pcmpistr);
20045 i++, d++)
20046 {
20047 if (d->code == IX86_BUILTIN_PCMPISTRM128)
20048 ftype = v16qi_ftype_v16qi_v16qi_int;
20049 else
20050 ftype = int_ftype_v16qi_v16qi_int;
e41ef486 20051 def_builtin_const (d->mask, d->name, ftype, d->code);
06f4e35d
L
20052 }
20053
bd793c65 20054 /* comi/ucomi insns. */
ca7558fc 20055 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
853a33f3 20056 if (d->mask == OPTION_MASK_ISA_SSE2)
e41ef486 20057 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
fbe5eb6d 20058 else
e41ef486 20059 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 20060
bb1418c1 20061 /* SSE */
853a33f3
UB
20062 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20063 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
37f22004 20064
bb1418c1 20065 /* SSE or 3DNow!A */
853a33f3 20066 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
37f22004 20067
fbe5eb6d 20068 /* SSE2 */
853a33f3 20069 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
fbe5eb6d 20070
853a33f3 20071 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
79f5e442 20072 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 20073
f318ff0a 20074 /* SSE3. */
853a33f3
UB
20075 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
20076 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
04e1d06b 20077
8b96a312
L
20078 /* AES */
20079 if (TARGET_AES)
20080 {
20081 /* Define AES built-in functions only if AES is enabled. */
20082 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
20083 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
20084 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
20085 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
20086 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
20087 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
20088 }
20089
20090 /* PCLMUL */
20091 if (TARGET_PCLMUL)
20092 {
20093 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
20094 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
20095 }
20096
eb701deb
RH
20097 /* Access to the vec_init patterns. */
20098 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
20099 integer_type_node, NULL_TREE);
e41ef486 20100 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
eb701deb
RH
20101
20102 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
20103 short_integer_type_node,
20104 short_integer_type_node,
20105 short_integer_type_node, NULL_TREE);
e41ef486 20106 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
eb701deb
RH
20107
20108 ftype = build_function_type_list (V8QI_type_node, char_type_node,
20109 char_type_node, char_type_node,
20110 char_type_node, char_type_node,
20111 char_type_node, char_type_node,
20112 char_type_node, NULL_TREE);
e41ef486 20113 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
eb701deb
RH
20114
20115 /* Access to the vec_extract patterns. */
20116 ftype = build_function_type_list (double_type_node, V2DF_type_node,
20117 integer_type_node, NULL_TREE);
e41ef486 20118 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
eb701deb
RH
20119
20120 ftype = build_function_type_list (long_long_integer_type_node,
20121 V2DI_type_node, integer_type_node,
20122 NULL_TREE);
e41ef486 20123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
eb701deb
RH
20124
20125 ftype = build_function_type_list (float_type_node, V4SF_type_node,
20126 integer_type_node, NULL_TREE);
e41ef486 20127 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
eb701deb 20128
ed9b5396
RH
20129 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20130 integer_type_node, NULL_TREE);
e41ef486 20131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
ed9b5396 20132
eb701deb
RH
20133 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20134 integer_type_node, NULL_TREE);
e41ef486 20135 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
eb701deb
RH
20136
20137 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20138 integer_type_node, NULL_TREE);
e41ef486 20139 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
eb701deb 20140
0f2698d0
RH
20141 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20142 integer_type_node, NULL_TREE);
e41ef486 20143 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
0f2698d0 20144
9a5cee02
L
20145 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20146 integer_type_node, NULL_TREE);
e41ef486 20147 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
9a5cee02 20148
eb701deb 20149 /* Access to the vec_set patterns. */
9a5cee02
L
20150 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20151 intDI_type_node,
20152 integer_type_node, NULL_TREE);
e41ef486 20153 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
9a5cee02
L
20154
20155 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20156 float_type_node,
20157 integer_type_node, NULL_TREE);
e41ef486 20158 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
9a5cee02
L
20159
20160 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20161 intSI_type_node,
20162 integer_type_node, NULL_TREE);
e41ef486 20163 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
9a5cee02 20164
eb701deb
RH
20165 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20166 intHI_type_node,
20167 integer_type_node, NULL_TREE);
e41ef486 20168 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
5656a184 20169
eb701deb
RH
20170 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20171 intHI_type_node,
20172 integer_type_node, NULL_TREE);
e41ef486 20173 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
9a5cee02
L
20174
20175 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20176 intQI_type_node,
20177 integer_type_node, NULL_TREE);
e41ef486 20178 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
04e1d06b
MM
20179
20180 /* Add SSE5 multi-arg argument instructions */
20181 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20182 {
20183 tree mtype = NULL_TREE;
20184
20185 if (d->name == 0)
20186 continue;
20187
20188 switch ((enum multi_arg_type)d->flag)
20189 {
20190 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20191 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20192 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20193 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20194 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20195 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20196 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20197 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20198 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20199 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20200 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20201 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20202 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20203 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20204 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20205 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20206 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20207 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20208 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20209 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20210 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20211 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20212 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20213 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20214 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20215 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20216 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20217 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20218 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20219 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20220 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20221 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20222 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20223 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20224 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20225 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20226 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20227 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20228 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20229 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20230 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20231 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20232 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20233 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20234 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20235 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20236 case MULTI_ARG_UNKNOWN:
20237 default:
20238 gcc_unreachable ();
20239 }
20240
20241 if (mtype)
20242 def_builtin_const (d->mask, d->name, mtype, d->code);
20243 }
bd793c65
BS
20244}
20245
2ed941ec
RH
20246static void
20247ix86_init_builtins (void)
20248{
20249 if (TARGET_MMX)
20250 ix86_init_mmx_sse_builtins ();
20251}
20252
bd793c65
BS
20253/* Errors in the source file can cause expand_expr to return const0_rtx
20254 where we expect a vector. To avoid crashing, use one of the vector
20255 clear instructions. */
20256static rtx
b96a374d 20257safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65 20258{
ef719a44
RH
20259 if (x == const0_rtx)
20260 x = CONST0_RTX (mode);
bd793c65
BS
20261 return x;
20262}
20263
20264/* Subroutine of ix86_expand_builtin to take care of binop insns. */
20265
20266static rtx
5039610b 20267ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
bd793c65 20268{
ffbaf337 20269 rtx pat;
5039610b
SL
20270 tree arg0 = CALL_EXPR_ARG (exp, 0);
20271 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20272 rtx op0 = expand_normal (arg0);
20273 rtx op1 = expand_normal (arg1);
bd793c65
BS
20274 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20275 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20276 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20277
20278 if (VECTOR_MODE_P (mode0))
20279 op0 = safe_vector_operand (op0, mode0);
20280 if (VECTOR_MODE_P (mode1))
20281 op1 = safe_vector_operand (op1, mode1);
20282
e358acde 20283 if (optimize || !target
bd793c65
BS
20284 || GET_MODE (target) != tmode
20285 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20286 target = gen_reg_rtx (tmode);
20287
d9deed68
JH
20288 if (GET_MODE (op1) == SImode && mode1 == TImode)
20289 {
20290 rtx x = gen_reg_rtx (V4SImode);
20291 emit_insn (gen_sse2_loadd (x, op1));
20292 op1 = gen_lowpart (TImode, x);
20293 }
20294
ef719a44 20295 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 20296 op0 = copy_to_mode_reg (mode0, op0);
ef719a44 20297 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
20298 op1 = copy_to_mode_reg (mode1, op1);
20299
eb701deb 20300 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
20301 if (! pat)
20302 return 0;
ffbaf337 20303
bd793c65 20304 emit_insn (pat);
ffbaf337 20305
bd793c65
BS
20306 return target;
20307}
20308
04e1d06b
MM
20309/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20310
20311static rtx
20312ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20313 enum multi_arg_type m_type,
20314 enum insn_code sub_code)
20315{
20316 rtx pat;
20317 int i;
20318 int nargs;
20319 bool comparison_p = false;
20320 bool tf_p = false;
20321 bool last_arg_constant = false;
20322 int num_memory = 0;
20323 struct {
20324 rtx op;
20325 enum machine_mode mode;
20326 } args[4];
20327
20328 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20329
20330 switch (m_type)
20331 {
20332 case MULTI_ARG_3_SF:
20333 case MULTI_ARG_3_DF:
20334 case MULTI_ARG_3_DI:
20335 case MULTI_ARG_3_SI:
20336 case MULTI_ARG_3_SI_DI:
20337 case MULTI_ARG_3_HI:
20338 case MULTI_ARG_3_HI_SI:
20339 case MULTI_ARG_3_QI:
20340 case MULTI_ARG_3_PERMPS:
20341 case MULTI_ARG_3_PERMPD:
20342 nargs = 3;
20343 break;
20344
20345 case MULTI_ARG_2_SF:
20346 case MULTI_ARG_2_DF:
20347 case MULTI_ARG_2_DI:
20348 case MULTI_ARG_2_SI:
20349 case MULTI_ARG_2_HI:
20350 case MULTI_ARG_2_QI:
20351 nargs = 2;
20352 break;
20353
20354 case MULTI_ARG_2_DI_IMM:
20355 case MULTI_ARG_2_SI_IMM:
20356 case MULTI_ARG_2_HI_IMM:
20357 case MULTI_ARG_2_QI_IMM:
20358 nargs = 2;
20359 last_arg_constant = true;
20360 break;
20361
20362 case MULTI_ARG_1_SF:
20363 case MULTI_ARG_1_DF:
20364 case MULTI_ARG_1_DI:
20365 case MULTI_ARG_1_SI:
20366 case MULTI_ARG_1_HI:
20367 case MULTI_ARG_1_QI:
20368 case MULTI_ARG_1_SI_DI:
20369 case MULTI_ARG_1_HI_DI:
20370 case MULTI_ARG_1_HI_SI:
20371 case MULTI_ARG_1_QI_DI:
20372 case MULTI_ARG_1_QI_SI:
20373 case MULTI_ARG_1_QI_HI:
20374 case MULTI_ARG_1_PH2PS:
20375 case MULTI_ARG_1_PS2PH:
20376 nargs = 1;
20377 break;
20378
20379 case MULTI_ARG_2_SF_CMP:
20380 case MULTI_ARG_2_DF_CMP:
20381 case MULTI_ARG_2_DI_CMP:
20382 case MULTI_ARG_2_SI_CMP:
20383 case MULTI_ARG_2_HI_CMP:
20384 case MULTI_ARG_2_QI_CMP:
20385 nargs = 2;
20386 comparison_p = true;
20387 break;
20388
20389 case MULTI_ARG_2_SF_TF:
20390 case MULTI_ARG_2_DF_TF:
20391 case MULTI_ARG_2_DI_TF:
20392 case MULTI_ARG_2_SI_TF:
20393 case MULTI_ARG_2_HI_TF:
20394 case MULTI_ARG_2_QI_TF:
20395 nargs = 2;
20396 tf_p = true;
20397 break;
20398
20399 case MULTI_ARG_UNKNOWN:
20400 default:
20401 gcc_unreachable ();
20402 }
20403
20404 if (optimize || !target
20405 || GET_MODE (target) != tmode
20406 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20407 target = gen_reg_rtx (tmode);
20408
20409 gcc_assert (nargs <= 4);
20410
20411 for (i = 0; i < nargs; i++)
20412 {
20413 tree arg = CALL_EXPR_ARG (exp, i);
20414 rtx op = expand_normal (arg);
20415 int adjust = (comparison_p) ? 1 : 0;
20416 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20417
20418 if (last_arg_constant && i == nargs-1)
20419 {
20420 if (GET_CODE (op) != CONST_INT)
20421 {
20422 error ("last argument must be an immediate");
20423 return gen_reg_rtx (tmode);
20424 }
20425 }
20426 else
20427 {
20428 if (VECTOR_MODE_P (mode))
20429 op = safe_vector_operand (op, mode);
20430
20431 /* If we aren't optimizing, only allow one memory operand to be
20432 generated. */
20433 if (memory_operand (op, mode))
20434 num_memory++;
20435
20436 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20437
4f3f76e6 20438 if (optimize
04e1d06b
MM
20439 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20440 || num_memory > 1)
20441 op = force_reg (mode, op);
20442 }
20443
20444 args[i].op = op;
20445 args[i].mode = mode;
20446 }
20447
20448 switch (nargs)
20449 {
20450 case 1:
20451 pat = GEN_FCN (icode) (target, args[0].op);
20452 break;
20453
20454 case 2:
20455 if (tf_p)
20456 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20457 GEN_INT ((int)sub_code));
20458 else if (! comparison_p)
20459 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20460 else
20461 {
20462 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20463 args[0].op,
20464 args[1].op);
20465
20466 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20467 }
20468 break;
20469
20470 case 3:
20471 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20472 break;
20473
20474 default:
20475 gcc_unreachable ();
20476 }
20477
20478 if (! pat)
20479 return 0;
20480
20481 emit_insn (pat);
20482 return target;
20483}
20484
bb1418c1
L
20485/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20486 insns with vec_merge. */
bd793c65
BS
20487
20488static rtx
bb1418c1
L
20489ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20490 rtx target)
bd793c65
BS
20491{
20492 rtx pat;
5039610b 20493 tree arg0 = CALL_EXPR_ARG (exp, 0);
84217346 20494 rtx op1, op0 = expand_normal (arg0);
bd793c65
BS
20495 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20496 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20497
e358acde 20498 if (optimize || !target
bd793c65
BS
20499 || GET_MODE (target) != tmode
20500 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20501 target = gen_reg_rtx (tmode);
20502
20503 if (VECTOR_MODE_P (mode0))
20504 op0 = safe_vector_operand (op0, mode0);
20505
e358acde
RH
20506 if ((optimize && !register_operand (op0, mode0))
20507 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 20508 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 20509
59bef189
RH
20510 op1 = op0;
20511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20512 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 20513
59bef189 20514 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
20515 if (! pat)
20516 return 0;
20517 emit_insn (pat);
20518 return target;
20519}
20520
20521/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20522
20523static rtx
1b667c82
L
20524ix86_expand_sse_compare (const struct builtin_description *d,
20525 tree exp, rtx target, bool swap)
bd793c65
BS
20526{
20527 rtx pat;
5039610b
SL
20528 tree arg0 = CALL_EXPR_ARG (exp, 0);
20529 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20530 rtx op0 = expand_normal (arg0);
20531 rtx op1 = expand_normal (arg1);
bd793c65
BS
20532 rtx op2;
20533 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20534 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20535 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20536 enum rtx_code comparison = d->comparison;
20537
20538 if (VECTOR_MODE_P (mode0))
20539 op0 = safe_vector_operand (op0, mode0);
20540 if (VECTOR_MODE_P (mode1))
20541 op1 = safe_vector_operand (op1, mode1);
20542
20543 /* Swap operands if we have a comparison that isn't available in
20544 hardware. */
1b667c82 20545 if (swap)
bd793c65 20546 {
21e1b5f1
BS
20547 rtx tmp = gen_reg_rtx (mode1);
20548 emit_move_insn (tmp, op1);
bd793c65 20549 op1 = op0;
21e1b5f1 20550 op0 = tmp;
bd793c65 20551 }
21e1b5f1 20552
e358acde 20553 if (optimize || !target
21e1b5f1
BS
20554 || GET_MODE (target) != tmode
20555 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
20556 target = gen_reg_rtx (tmode);
20557
e358acde
RH
20558 if ((optimize && !register_operand (op0, mode0))
20559 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
bd793c65 20560 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
20561 if ((optimize && !register_operand (op1, mode1))
20562 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
20563 op1 = copy_to_mode_reg (mode1, op1);
20564
20565 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20566 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20567 if (! pat)
20568 return 0;
20569 emit_insn (pat);
20570 return target;
20571}
20572
b282a8d3 20573/* Subroutine of ix86_expand_builtin to take care of comi insns. */
1b667c82
L
20574
20575static rtx
b282a8d3
L
20576ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20577 rtx target)
1b667c82 20578{
b282a8d3
L
20579 rtx pat;
20580 tree arg0 = CALL_EXPR_ARG (exp, 0);
20581 tree arg1 = CALL_EXPR_ARG (exp, 1);
20582 rtx op0 = expand_normal (arg0);
20583 rtx op1 = expand_normal (arg1);
20584 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20585 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
1b667c82
L
20586 enum rtx_code comparison = d->comparison;
20587
b282a8d3
L
20588 if (VECTOR_MODE_P (mode0))
20589 op0 = safe_vector_operand (op0, mode0);
20590 if (VECTOR_MODE_P (mode1))
20591 op1 = safe_vector_operand (op1, mode1);
20592
20593 /* Swap operands if we have a comparison that isn't available in
20594 hardware. */
20595 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
1b667c82 20596 {
b282a8d3
L
20597 rtx tmp = op1;
20598 op1 = op0;
20599 op0 = tmp;
1b667c82
L
20600 }
20601
b282a8d3
L
20602 target = gen_reg_rtx (SImode);
20603 emit_move_insn (target, const0_rtx);
20604 target = gen_rtx_SUBREG (QImode, target, 0);
1b667c82 20605
b282a8d3
L
20606 if ((optimize && !register_operand (op0, mode0))
20607 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20608 op0 = copy_to_mode_reg (mode0, op0);
20609 if ((optimize && !register_operand (op1, mode1))
20610 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20611 op1 = copy_to_mode_reg (mode1, op1);
1b667c82 20612
b282a8d3
L
20613 pat = GEN_FCN (d->icode) (op0, op1);
20614 if (! pat)
20615 return 0;
20616 emit_insn (pat);
20617 emit_insn (gen_rtx_SET (VOIDmode,
20618 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20619 gen_rtx_fmt_ee (comparison, QImode,
20620 SET_DEST (pat),
20621 const0_rtx)));
1b667c82 20622
b282a8d3
L
20623 return SUBREG_REG (target);
20624}
1b667c82 20625
b282a8d3 20626/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
bd793c65
BS
20627
20628static rtx
b282a8d3
L
20629ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20630 rtx target)
bd793c65
BS
20631{
20632 rtx pat;
5039610b
SL
20633 tree arg0 = CALL_EXPR_ARG (exp, 0);
20634 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20635 rtx op0 = expand_normal (arg0);
20636 rtx op1 = expand_normal (arg1);
bd793c65
BS
20637 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20638 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20639 enum rtx_code comparison = d->comparison;
20640
20641 if (VECTOR_MODE_P (mode0))
20642 op0 = safe_vector_operand (op0, mode0);
20643 if (VECTOR_MODE_P (mode1))
20644 op1 = safe_vector_operand (op1, mode1);
20645
bd793c65
BS
20646 target = gen_reg_rtx (SImode);
20647 emit_move_insn (target, const0_rtx);
20648 target = gen_rtx_SUBREG (QImode, target, 0);
20649
e358acde
RH
20650 if ((optimize && !register_operand (op0, mode0))
20651 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
bd793c65 20652 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
20653 if ((optimize && !register_operand (op1, mode1))
20654 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
20655 op1 = copy_to_mode_reg (mode1, op1);
20656
1194ca05 20657 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
20658 if (! pat)
20659 return 0;
20660 emit_insn (pat);
29628f27
BS
20661 emit_insn (gen_rtx_SET (VOIDmode,
20662 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20663 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 20664 SET_DEST (pat),
29628f27 20665 const0_rtx)));
bd793c65 20666
6f1a6c5b 20667 return SUBREG_REG (target);
bd793c65
BS
20668}
20669
b282a8d3 20670/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
9a5cee02
L
20671
20672static rtx
b282a8d3
L
20673ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20674 tree exp, rtx target)
9a5cee02
L
20675{
20676 rtx pat;
20677 tree arg0 = CALL_EXPR_ARG (exp, 0);
20678 tree arg1 = CALL_EXPR_ARG (exp, 1);
b282a8d3
L
20679 tree arg2 = CALL_EXPR_ARG (exp, 2);
20680 tree arg3 = CALL_EXPR_ARG (exp, 3);
20681 tree arg4 = CALL_EXPR_ARG (exp, 4);
20682 rtx scratch0, scratch1;
06f4e35d
L
20683 rtx op0 = expand_normal (arg0);
20684 rtx op1 = expand_normal (arg1);
20685 rtx op2 = expand_normal (arg2);
20686 rtx op3 = expand_normal (arg3);
20687 rtx op4 = expand_normal (arg4);
20688 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20689
20690 tmode0 = insn_data[d->icode].operand[0].mode;
20691 tmode1 = insn_data[d->icode].operand[1].mode;
20692 modev2 = insn_data[d->icode].operand[2].mode;
20693 modei3 = insn_data[d->icode].operand[3].mode;
20694 modev4 = insn_data[d->icode].operand[4].mode;
20695 modei5 = insn_data[d->icode].operand[5].mode;
20696 modeimm = insn_data[d->icode].operand[6].mode;
20697
20698 if (VECTOR_MODE_P (modev2))
20699 op0 = safe_vector_operand (op0, modev2);
20700 if (VECTOR_MODE_P (modev4))
20701 op2 = safe_vector_operand (op2, modev4);
20702
c7a69424 20703 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
06f4e35d 20704 op0 = copy_to_mode_reg (modev2, op0);
c7a69424 20705 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
06f4e35d
L
20706 op1 = copy_to_mode_reg (modei3, op1);
20707 if ((optimize && !register_operand (op2, modev4))
20708 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20709 op2 = copy_to_mode_reg (modev4, op2);
c7a69424 20710 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
06f4e35d
L
20711 op3 = copy_to_mode_reg (modei5, op3);
20712
20713 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20714 {
20715 error ("the fifth argument must be a 8-bit immediate");
20716 return const0_rtx;
20717 }
20718
20719 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20720 {
20721 if (optimize || !target
20722 || GET_MODE (target) != tmode0
20723 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20724 target = gen_reg_rtx (tmode0);
20725
20726 scratch1 = gen_reg_rtx (tmode1);
20727
20728 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20729 }
20730 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20731 {
20732 if (optimize || !target
20733 || GET_MODE (target) != tmode1
20734 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20735 target = gen_reg_rtx (tmode1);
20736
b282a8d3
L
20737 scratch0 = gen_reg_rtx (tmode0);
20738
20739 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20740 }
20741 else
20742 {
20743 gcc_assert (d->flag);
20744
20745 scratch0 = gen_reg_rtx (tmode0);
20746 scratch1 = gen_reg_rtx (tmode1);
20747
20748 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20749 }
20750
20751 if (! pat)
20752 return 0;
20753
20754 emit_insn (pat);
20755
20756 if (d->flag)
20757 {
20758 target = gen_reg_rtx (SImode);
20759 emit_move_insn (target, const0_rtx);
20760 target = gen_rtx_SUBREG (QImode, target, 0);
20761
20762 emit_insn
20763 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20764 gen_rtx_fmt_ee (EQ, QImode,
20765 gen_rtx_REG ((enum machine_mode) d->flag,
20766 FLAGS_REG),
20767 const0_rtx)));
20768 return SUBREG_REG (target);
20769 }
20770 else
20771 return target;
20772}
20773
20774
20775/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20776
20777static rtx
20778ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20779 tree exp, rtx target)
20780{
20781 rtx pat;
20782 tree arg0 = CALL_EXPR_ARG (exp, 0);
20783 tree arg1 = CALL_EXPR_ARG (exp, 1);
20784 tree arg2 = CALL_EXPR_ARG (exp, 2);
20785 rtx scratch0, scratch1;
20786 rtx op0 = expand_normal (arg0);
20787 rtx op1 = expand_normal (arg1);
20788 rtx op2 = expand_normal (arg2);
20789 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20790
20791 tmode0 = insn_data[d->icode].operand[0].mode;
20792 tmode1 = insn_data[d->icode].operand[1].mode;
20793 modev2 = insn_data[d->icode].operand[2].mode;
20794 modev3 = insn_data[d->icode].operand[3].mode;
20795 modeimm = insn_data[d->icode].operand[4].mode;
20796
20797 if (VECTOR_MODE_P (modev2))
20798 op0 = safe_vector_operand (op0, modev2);
20799 if (VECTOR_MODE_P (modev3))
20800 op1 = safe_vector_operand (op1, modev3);
20801
20802 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20803 op0 = copy_to_mode_reg (modev2, op0);
20804 if ((optimize && !register_operand (op1, modev3))
20805 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20806 op1 = copy_to_mode_reg (modev3, op1);
20807
20808 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20809 {
20810 error ("the third argument must be a 8-bit immediate");
20811 return const0_rtx;
20812 }
20813
20814 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20815 {
20816 if (optimize || !target
20817 || GET_MODE (target) != tmode0
20818 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20819 target = gen_reg_rtx (tmode0);
20820
20821 scratch1 = gen_reg_rtx (tmode1);
20822
20823 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20824 }
20825 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20826 {
20827 if (optimize || !target
20828 || GET_MODE (target) != tmode1
20829 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20830 target = gen_reg_rtx (tmode1);
20831
20832 scratch0 = gen_reg_rtx (tmode0);
20833
20834 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20835 }
20836 else
20837 {
20838 gcc_assert (d->flag);
20839
20840 scratch0 = gen_reg_rtx (tmode0);
20841 scratch1 = gen_reg_rtx (tmode1);
20842
20843 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20844 }
20845
20846 if (! pat)
20847 return 0;
20848
20849 emit_insn (pat);
20850
20851 if (d->flag)
20852 {
20853 target = gen_reg_rtx (SImode);
20854 emit_move_insn (target, const0_rtx);
20855 target = gen_rtx_SUBREG (QImode, target, 0);
20856
20857 emit_insn
20858 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20859 gen_rtx_fmt_ee (EQ, QImode,
20860 gen_rtx_REG ((enum machine_mode) d->flag,
20861 FLAGS_REG),
20862 const0_rtx)));
20863 return SUBREG_REG (target);
20864 }
20865 else
20866 return target;
20867}
20868
20869/* Subroutine of ix86_expand_builtin to take care of insns with
20870 variable number of operands. */
20871
20872static rtx
20873ix86_expand_args_builtin (const struct builtin_description *d,
20874 tree exp, rtx target)
20875{
20876 rtx pat, real_target;
20877 unsigned int i, nargs;
20878 unsigned int nargs_constant = 0;
20879 int num_memory = 0;
20880 struct
20881 {
20882 rtx op;
20883 enum machine_mode mode;
20884 } args[4];
20885 bool last_arg_count = false;
20886 enum insn_code icode = d->icode;
20887 const struct insn_data *insn_p = &insn_data[icode];
20888 enum machine_mode tmode = insn_p->operand[0].mode;
20889 enum machine_mode rmode = VOIDmode;
20890 bool swap = false;
20891 enum rtx_code comparison = d->comparison;
20892
20893 switch ((enum ix86_builtin_type) d->flag)
20894 {
20895 case INT_FTYPE_V2DI_V2DI_PTEST:
20896 return ix86_expand_sse_ptest (d, exp, target);
20897 case FLOAT128_FTYPE_FLOAT128:
20898 case FLOAT_FTYPE_FLOAT:
20899 case INT64_FTYPE_V4SF:
20900 case INT64_FTYPE_V2DF:
20901 case INT_FTYPE_V16QI:
20902 case INT_FTYPE_V8QI:
20903 case INT_FTYPE_V4SF:
20904 case INT_FTYPE_V2DF:
20905 case V16QI_FTYPE_V16QI:
20906 case V8HI_FTYPE_V8HI:
20907 case V8HI_FTYPE_V16QI:
20908 case V8QI_FTYPE_V8QI:
20909 case V4SI_FTYPE_V4SI:
20910 case V4SI_FTYPE_V16QI:
20911 case V4SI_FTYPE_V4SF:
20912 case V4SI_FTYPE_V8HI:
20913 case V4SI_FTYPE_V2DF:
20914 case V4HI_FTYPE_V4HI:
20915 case V4SF_FTYPE_V4SF:
20916 case V4SF_FTYPE_V4SI:
20917 case V4SF_FTYPE_V2DF:
20918 case V2DI_FTYPE_V2DI:
20919 case V2DI_FTYPE_V16QI:
20920 case V2DI_FTYPE_V8HI:
20921 case V2DI_FTYPE_V4SI:
20922 case V2DF_FTYPE_V2DF:
20923 case V2DF_FTYPE_V4SI:
20924 case V2DF_FTYPE_V4SF:
20925 case V2DF_FTYPE_V2SI:
20926 case V2SI_FTYPE_V2SI:
20927 case V2SI_FTYPE_V4SF:
20928 case V2SI_FTYPE_V2SF:
20929 case V2SI_FTYPE_V2DF:
20930 case V2SF_FTYPE_V2SF:
20931 case V2SF_FTYPE_V2SI:
20932 nargs = 1;
20933 break;
20934 case V4SF_FTYPE_V4SF_VEC_MERGE:
20935 case V2DF_FTYPE_V2DF_VEC_MERGE:
20936 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20937 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20938 case V16QI_FTYPE_V16QI_V16QI:
20939 case V16QI_FTYPE_V8HI_V8HI:
20940 case V8QI_FTYPE_V8QI_V8QI:
20941 case V8QI_FTYPE_V4HI_V4HI:
20942 case V8HI_FTYPE_V8HI_V8HI:
20943 case V8HI_FTYPE_V16QI_V16QI:
20944 case V8HI_FTYPE_V4SI_V4SI:
20945 case V4SI_FTYPE_V4SI_V4SI:
20946 case V4SI_FTYPE_V8HI_V8HI:
20947 case V4SI_FTYPE_V4SF_V4SF:
20948 case V4SI_FTYPE_V2DF_V2DF:
20949 case V4HI_FTYPE_V4HI_V4HI:
20950 case V4HI_FTYPE_V8QI_V8QI:
20951 case V4HI_FTYPE_V2SI_V2SI:
20952 case V4SF_FTYPE_V4SF_V4SF:
20953 case V4SF_FTYPE_V4SF_V2SI:
20954 case V4SF_FTYPE_V4SF_V2DF:
20955 case V4SF_FTYPE_V4SF_DI:
20956 case V4SF_FTYPE_V4SF_SI:
20957 case V2DI_FTYPE_V2DI_V2DI:
20958 case V2DI_FTYPE_V16QI_V16QI:
20959 case V2DI_FTYPE_V4SI_V4SI:
20960 case V2DI_FTYPE_V2DI_V16QI:
20961 case V2DI_FTYPE_V2DF_V2DF:
20962 case V2SI_FTYPE_V2SI_V2SI:
20963 case V2SI_FTYPE_V4HI_V4HI:
20964 case V2SI_FTYPE_V2SF_V2SF:
20965 case V2DF_FTYPE_V2DF_V2DF:
20966 case V2DF_FTYPE_V2DF_V4SF:
20967 case V2DF_FTYPE_V2DF_DI:
20968 case V2DF_FTYPE_V2DF_SI:
20969 case V2SF_FTYPE_V2SF_V2SF:
20970 case V1DI_FTYPE_V1DI_V1DI:
20971 case V1DI_FTYPE_V8QI_V8QI:
20972 case V1DI_FTYPE_V2SI_V2SI:
20973 if (comparison == UNKNOWN)
20974 return ix86_expand_binop_builtin (icode, exp, target);
20975 nargs = 2;
20976 break;
20977 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20978 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20979 gcc_assert (comparison != UNKNOWN);
20980 nargs = 2;
20981 swap = true;
20982 break;
20983 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20984 case V8HI_FTYPE_V8HI_SI_COUNT:
20985 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20986 case V4SI_FTYPE_V4SI_SI_COUNT:
20987 case V4HI_FTYPE_V4HI_V4HI_COUNT:
20988 case V4HI_FTYPE_V4HI_SI_COUNT:
20989 case V2DI_FTYPE_V2DI_V2DI_COUNT:
20990 case V2DI_FTYPE_V2DI_SI_COUNT:
20991 case V2SI_FTYPE_V2SI_V2SI_COUNT:
20992 case V2SI_FTYPE_V2SI_SI_COUNT:
20993 case V1DI_FTYPE_V1DI_V1DI_COUNT:
20994 case V1DI_FTYPE_V1DI_SI_COUNT:
20995 nargs = 2;
20996 last_arg_count = true;
20997 break;
20998 case UINT64_FTYPE_UINT64_UINT64:
20999 case UINT_FTYPE_UINT_UINT:
21000 case UINT_FTYPE_UINT_USHORT:
21001 case UINT_FTYPE_UINT_UCHAR:
21002 nargs = 2;
21003 break;
21004 case V2DI2TI_FTYPE_V2DI_INT:
21005 nargs = 2;
21006 rmode = V2DImode;
21007 nargs_constant = 1;
21008 break;
21009 case V8HI_FTYPE_V8HI_INT:
21010 case V4SI_FTYPE_V4SI_INT:
21011 case V4HI_FTYPE_V4HI_INT:
21012 case V4SF_FTYPE_V4SF_INT:
21013 case V2DI_FTYPE_V2DI_INT:
21014 case V2DF_FTYPE_V2DF_INT:
21015 nargs = 2;
21016 nargs_constant = 1;
21017 break;
21018 case V16QI_FTYPE_V16QI_V16QI_V16QI:
21019 case V4SF_FTYPE_V4SF_V4SF_V4SF:
21020 case V2DF_FTYPE_V2DF_V2DF_V2DF:
21021 nargs = 3;
21022 break;
21023 case V16QI_FTYPE_V16QI_V16QI_INT:
21024 case V8HI_FTYPE_V8HI_V8HI_INT:
21025 case V4SI_FTYPE_V4SI_V4SI_INT:
21026 case V4SF_FTYPE_V4SF_V4SF_INT:
21027 case V2DI_FTYPE_V2DI_V2DI_INT:
21028 case V2DF_FTYPE_V2DF_V2DF_INT:
21029 nargs = 3;
21030 nargs_constant = 1;
21031 break;
21032 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21033 nargs = 3;
21034 rmode = V2DImode;
21035 nargs_constant = 1;
21036 break;
21037 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21038 nargs = 3;
21039 rmode = DImode;
21040 nargs_constant = 1;
21041 break;
21042 case V2DI_FTYPE_V2DI_UINT_UINT:
21043 nargs = 3;
21044 nargs_constant = 2;
21045 break;
21046 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21047 nargs = 4;
21048 nargs_constant = 2;
21049 break;
21050 default:
21051 gcc_unreachable ();
21052 }
21053
21054 gcc_assert (nargs <= ARRAY_SIZE (args));
21055
21056 if (comparison != UNKNOWN)
21057 {
21058 gcc_assert (nargs == 2);
21059 return ix86_expand_sse_compare (d, exp, target, swap);
21060 }
21061
21062 if (rmode == VOIDmode || rmode == tmode)
21063 {
21064 if (optimize
21065 || target == 0
21066 || GET_MODE (target) != tmode
21067 || ! (*insn_p->operand[0].predicate) (target, tmode))
21068 target = gen_reg_rtx (tmode);
21069 real_target = target;
21070 }
21071 else
21072 {
21073 target = gen_reg_rtx (rmode);
21074 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
21075 }
21076
21077 for (i = 0; i < nargs; i++)
21078 {
21079 tree arg = CALL_EXPR_ARG (exp, i);
21080 rtx op = expand_normal (arg);
21081 enum machine_mode mode = insn_p->operand[i + 1].mode;
21082 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
21083
21084 if (last_arg_count && (i + 1) == nargs)
21085 {
21086 /* SIMD shift insns take either an 8-bit immediate or
21087 register as count. But builtin functions take int as
21088 count. If count doesn't match, we put it in register. */
21089 if (!match)
21090 {
21091 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
21092 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
21093 op = copy_to_reg (op);
21094 }
21095 }
21096 else if ((nargs - i) <= nargs_constant)
21097 {
21098 if (!match)
21099 switch (icode)
21100 {
21101 case CODE_FOR_sse4_1_roundpd:
21102 case CODE_FOR_sse4_1_roundps:
21103 case CODE_FOR_sse4_1_roundsd:
21104 case CODE_FOR_sse4_1_roundss:
21105 case CODE_FOR_sse4_1_blendps:
21106 error ("the last argument must be a 4-bit immediate");
21107 return const0_rtx;
21108
21109 case CODE_FOR_sse4_1_blendpd:
21110 error ("the last argument must be a 2-bit immediate");
21111 return const0_rtx;
21112
21113 default:
21114 switch (nargs_constant)
21115 {
21116 case 2:
21117 if ((nargs - i) == nargs_constant)
21118 {
21119 error ("the next to last argument must be an 8-bit immediate");
21120 break;
21121 }
21122 case 1:
21123 error ("the last argument must be an 8-bit immediate");
21124 break;
21125 default:
21126 gcc_unreachable ();
21127 }
21128 return const0_rtx;
21129 }
21130 }
21131 else
21132 {
21133 if (VECTOR_MODE_P (mode))
21134 op = safe_vector_operand (op, mode);
21135
21136 /* If we aren't optimizing, only allow one memory operand to
21137 be generated. */
21138 if (memory_operand (op, mode))
21139 num_memory++;
06f4e35d 21140
b282a8d3
L
21141 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21142 {
21143 if (optimize || !match || num_memory > 1)
21144 op = copy_to_mode_reg (mode, op);
21145 }
21146 else
21147 {
21148 op = copy_to_reg (op);
21149 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21150 }
21151 }
06f4e35d 21152
b282a8d3
L
21153 args[i].op = op;
21154 args[i].mode = mode;
21155 }
06f4e35d 21156
b282a8d3
L
21157 switch (nargs)
21158 {
21159 case 1:
21160 pat = GEN_FCN (icode) (real_target, args[0].op);
21161 break;
21162 case 2:
21163 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21164 break;
21165 case 3:
21166 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21167 args[2].op);
21168 break;
21169 case 4:
21170 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21171 args[2].op, args[3].op);
21172 break;
21173 default:
21174 gcc_unreachable ();
06f4e35d
L
21175 }
21176
21177 if (! pat)
21178 return 0;
21179
21180 emit_insn (pat);
b282a8d3 21181 return target;
06f4e35d
L
21182}
21183
b282a8d3
L
21184/* Subroutine of ix86_expand_builtin to take care of special insns
21185 with variable number of operands. */
06f4e35d
L
21186
21187static rtx
b282a8d3
L
21188ix86_expand_special_args_builtin (const struct builtin_description *d,
21189 tree exp, rtx target)
06f4e35d 21190{
b282a8d3
L
21191 tree arg;
21192 rtx pat, op;
21193 unsigned int i, nargs, arg_adjust, memory;
21194 struct
21195 {
21196 rtx op;
21197 enum machine_mode mode;
21198 } args[2];
21199 enum insn_code icode = d->icode;
21200 bool last_arg_constant = false;
21201 const struct insn_data *insn_p = &insn_data[icode];
21202 enum machine_mode tmode = insn_p->operand[0].mode;
21203 enum { load, store } class;
06f4e35d 21204
b282a8d3
L
21205 switch ((enum ix86_special_builtin_type) d->flag)
21206 {
21207 case VOID_FTYPE_VOID:
21208 emit_insn (GEN_FCN (icode) (target));
21209 return 0;
21210 case V2DI_FTYPE_PV2DI:
21211 case V16QI_FTYPE_PCCHAR:
21212 case V4SF_FTYPE_PCFLOAT:
21213 case V2DF_FTYPE_PCDOUBLE:
21214 nargs = 1;
21215 class = load;
21216 memory = 0;
21217 break;
21218 case VOID_FTYPE_PV2SF_V4SF:
21219 case VOID_FTYPE_PV2DI_V2DI:
21220 case VOID_FTYPE_PCHAR_V16QI:
21221 case VOID_FTYPE_PFLOAT_V4SF:
21222 case VOID_FTYPE_PDOUBLE_V2DF:
21223 case VOID_FTYPE_PDI_DI:
21224 case VOID_FTYPE_PINT_INT:
21225 nargs = 1;
21226 class = store;
21227 /* Reserve memory operand for target. */
21228 memory = ARRAY_SIZE (args);
21229 break;
21230 case V4SF_FTYPE_V4SF_PCV2SF:
21231 case V2DF_FTYPE_V2DF_PCDOUBLE:
21232 nargs = 2;
21233 class = load;
21234 memory = 1;
21235 break;
21236 default:
21237 gcc_unreachable ();
21238 }
06f4e35d 21239
b282a8d3 21240 gcc_assert (nargs <= ARRAY_SIZE (args));
06f4e35d 21241
b282a8d3 21242 if (class == store)
06f4e35d 21243 {
b282a8d3
L
21244 arg = CALL_EXPR_ARG (exp, 0);
21245 op = expand_normal (arg);
21246 gcc_assert (target == 0);
21247 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21248 arg_adjust = 1;
21249 }
21250 else
21251 {
21252 arg_adjust = 0;
21253 if (optimize
21254 || target == 0
21255 || GET_MODE (target) != tmode
21256 || ! (*insn_p->operand[0].predicate) (target, tmode))
21257 target = gen_reg_rtx (tmode);
06f4e35d
L
21258 }
21259
b282a8d3 21260 for (i = 0; i < nargs; i++)
06f4e35d 21261 {
b282a8d3
L
21262 enum machine_mode mode = insn_p->operand[i + 1].mode;
21263 bool match;
06f4e35d 21264
b282a8d3
L
21265 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21266 op = expand_normal (arg);
21267 match = (*insn_p->operand[i + 1].predicate) (op, mode);
06f4e35d 21268
b282a8d3
L
21269 if (last_arg_constant && (i + 1) == nargs)
21270 {
21271 if (!match)
21272 switch (icode)
21273 {
21274 default:
21275 error ("the last argument must be an 8-bit immediate");
21276 return const0_rtx;
21277 }
21278 }
21279 else
21280 {
21281 if (i == memory)
21282 {
21283 /* This must be the memory operand. */
21284 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21285 gcc_assert (GET_MODE (op) == mode
21286 || GET_MODE (op) == VOIDmode);
21287 }
21288 else
21289 {
21290 /* This must be register. */
21291 if (VECTOR_MODE_P (mode))
21292 op = safe_vector_operand (op, mode);
06f4e35d 21293
b282a8d3
L
21294 gcc_assert (GET_MODE (op) == mode
21295 || GET_MODE (op) == VOIDmode);
21296 op = copy_to_mode_reg (mode, op);
21297 }
21298 }
06f4e35d 21299
b282a8d3
L
21300 args[i].op = op;
21301 args[i].mode = mode;
06f4e35d 21302 }
06f4e35d 21303
b282a8d3
L
21304 switch (nargs)
21305 {
21306 case 1:
21307 pat = GEN_FCN (icode) (target, args[0].op);
21308 break;
21309 case 2:
21310 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21311 break;
21312 default:
21313 gcc_unreachable ();
06f4e35d
L
21314 }
21315
21316 if (! pat)
21317 return 0;
06f4e35d 21318 emit_insn (pat);
b282a8d3 21319 return class == store ? 0 : target;
06f4e35d
L
21320}
21321
eb701deb
RH
21322/* Return the integer constant in ARG. Constrain it to be in the range
21323 of the subparts of VEC_TYPE; issue an error if not. */
21324
21325static int
21326get_element_number (tree vec_type, tree arg)
21327{
21328 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21329
21330 if (!host_integerp (arg, 1)
21331 || (elt = tree_low_cst (arg, 1), elt > max))
21332 {
ea40ba9c 21333 error ("selector must be an integer constant in the range 0..%wi", max);
eb701deb
RH
21334 return 0;
21335 }
21336
21337 return elt;
21338}
21339
21340/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21341 ix86_expand_vector_init. We DO have language-level syntax for this, in
21342 the form of (type){ init-list }. Except that since we can't place emms
21343 instructions from inside the compiler, we can't allow the use of MMX
21344 registers unless the user explicitly asks for it. So we do *not* define
21345 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
5656a184 21346 we have builtins invoked by mmintrin.h that gives us license to emit
eb701deb
RH
21347 these sorts of instructions. */
21348
21349static rtx
5039610b 21350ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
eb701deb
RH
21351{
21352 enum machine_mode tmode = TYPE_MODE (type);
21353 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21354 int i, n_elt = GET_MODE_NUNITS (tmode);
21355 rtvec v = rtvec_alloc (n_elt);
21356
21357 gcc_assert (VECTOR_MODE_P (tmode));
5039610b 21358 gcc_assert (call_expr_nargs (exp) == n_elt);
eb701deb 21359
5039610b 21360 for (i = 0; i < n_elt; ++i)
eb701deb 21361 {
5039610b 21362 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
eb701deb
RH
21363 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21364 }
21365
eb701deb
RH
21366 if (!target || !register_operand (target, tmode))
21367 target = gen_reg_rtx (tmode);
21368
21369 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21370 return target;
21371}
21372
21373/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21374 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21375 had a language-level syntax for referencing vector elements. */
21376
21377static rtx
5039610b 21378ix86_expand_vec_ext_builtin (tree exp, rtx target)
eb701deb
RH
21379{
21380 enum machine_mode tmode, mode0;
21381 tree arg0, arg1;
21382 int elt;
21383 rtx op0;
21384
5039610b
SL
21385 arg0 = CALL_EXPR_ARG (exp, 0);
21386 arg1 = CALL_EXPR_ARG (exp, 1);
eb701deb 21387
84217346 21388 op0 = expand_normal (arg0);
eb701deb
RH
21389 elt = get_element_number (TREE_TYPE (arg0), arg1);
21390
21391 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21392 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21393 gcc_assert (VECTOR_MODE_P (mode0));
21394
21395 op0 = force_reg (mode0, op0);
21396
21397 if (optimize || !target || !register_operand (target, tmode))
21398 target = gen_reg_rtx (tmode);
21399
21400 ix86_expand_vector_extract (true, target, op0, elt);
21401
21402 return target;
21403}
21404
21405/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21406 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21407 a language-level syntax for referencing vector elements. */
21408
21409static rtx
5039610b 21410ix86_expand_vec_set_builtin (tree exp)
eb701deb
RH
21411{
21412 enum machine_mode tmode, mode1;
21413 tree arg0, arg1, arg2;
21414 int elt;
7bb4a6be 21415 rtx op0, op1, target;
eb701deb 21416
5039610b
SL
21417 arg0 = CALL_EXPR_ARG (exp, 0);
21418 arg1 = CALL_EXPR_ARG (exp, 1);
21419 arg2 = CALL_EXPR_ARG (exp, 2);
eb701deb
RH
21420
21421 tmode = TYPE_MODE (TREE_TYPE (arg0));
21422 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21423 gcc_assert (VECTOR_MODE_P (tmode));
21424
9415ab7d
TN
21425 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21426 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
eb701deb
RH
21427 elt = get_element_number (TREE_TYPE (arg0), arg2);
21428
21429 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21430 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21431
21432 op0 = force_reg (tmode, op0);
21433 op1 = force_reg (mode1, op1);
21434
7bb4a6be 21435 /* OP0 is the source of these builtin functions and shouldn't be
9cb116cb 21436 modified. Create a copy, use it and return it as target. */
7bb4a6be
L
21437 target = gen_reg_rtx (tmode);
21438 emit_move_insn (target, op0);
21439 ix86_expand_vector_set (true, target, op1, elt);
eb701deb 21440
7bb4a6be 21441 return target;
eb701deb
RH
21442}
21443
bd793c65
BS
21444/* Expand an expression EXP that calls a built-in function,
21445 with result going to TARGET if that's convenient
21446 (and in mode MODE if that's convenient).
21447 SUBTARGET may be used as the target for computing one of EXP's operands.
21448 IGNORE is nonzero if the value is to be ignored. */
21449
eb701deb 21450static rtx
b96a374d
AJ
21451ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21452 enum machine_mode mode ATTRIBUTE_UNUSED,
21453 int ignore ATTRIBUTE_UNUSED)
bd793c65 21454{
8b60264b 21455 const struct builtin_description *d;
77ebd435 21456 size_t i;
bd793c65 21457 enum insn_code icode;
5039610b 21458 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
bd9513ea
L
21459 tree arg0, arg1, arg2;
21460 rtx op0, op1, op2, pat;
bb1418c1 21461 enum machine_mode mode0, mode1, mode2;
8752c357 21462 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
21463
21464 switch (fcode)
21465 {
bd793c65 21466 case IX86_BUILTIN_MASKMOVQ:
077084dd 21467 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d 21468 icode = (fcode == IX86_BUILTIN_MASKMOVQ
80e8bb90 21469 ? CODE_FOR_mmx_maskmovq
ef719a44 21470 : CODE_FOR_sse2_maskmovdqu);
bd793c65 21471 /* Note the arg order is different from the operand order. */
5039610b
SL
21472 arg1 = CALL_EXPR_ARG (exp, 0);
21473 arg2 = CALL_EXPR_ARG (exp, 1);
21474 arg0 = CALL_EXPR_ARG (exp, 2);
84217346
MD
21475 op0 = expand_normal (arg0);
21476 op1 = expand_normal (arg1);
21477 op2 = expand_normal (arg2);
bd793c65
BS
21478 mode0 = insn_data[icode].operand[0].mode;
21479 mode1 = insn_data[icode].operand[1].mode;
21480 mode2 = insn_data[icode].operand[2].mode;
21481
80e8bb90
RH
21482 op0 = force_reg (Pmode, op0);
21483 op0 = gen_rtx_MEM (mode1, op0);
ef719a44 21484
5c464583 21485 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
21486 op0 = copy_to_mode_reg (mode0, op0);
21487 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21488 op1 = copy_to_mode_reg (mode1, op1);
21489 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21490 op2 = copy_to_mode_reg (mode2, op2);
21491 pat = GEN_FCN (icode) (op0, op1, op2);
21492 if (! pat)
21493 return 0;
21494 emit_insn (pat);
21495 return 0;
21496
bd793c65 21497 case IX86_BUILTIN_LDMXCSR:
5039610b 21498 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
80dcd3aa 21499 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
bd793c65 21500 emit_move_insn (target, op0);
80e8bb90 21501 emit_insn (gen_sse_ldmxcsr (target));
bd793c65
BS
21502 return 0;
21503
21504 case IX86_BUILTIN_STMXCSR:
80dcd3aa 21505 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
80e8bb90 21506 emit_insn (gen_sse_stmxcsr (target));
bd793c65
BS
21507 return copy_to_mode_reg (SImode, target);
21508
fbe5eb6d 21509 case IX86_BUILTIN_CLFLUSH:
5039610b 21510 arg0 = CALL_EXPR_ARG (exp, 0);
84217346 21511 op0 = expand_normal (arg0);
fbe5eb6d 21512 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
21513 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21514 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
21515
21516 emit_insn (gen_sse2_clflush (op0));
21517 return 0;
21518
22c7c85e 21519 case IX86_BUILTIN_MONITOR:
5039610b
SL
21520 arg0 = CALL_EXPR_ARG (exp, 0);
21521 arg1 = CALL_EXPR_ARG (exp, 1);
21522 arg2 = CALL_EXPR_ARG (exp, 2);
84217346
MD
21523 op0 = expand_normal (arg0);
21524 op1 = expand_normal (arg1);
21525 op2 = expand_normal (arg2);
22c7c85e 21526 if (!REG_P (op0))
546be535 21527 op0 = copy_to_mode_reg (Pmode, op0);
22c7c85e
L
21528 if (!REG_P (op1))
21529 op1 = copy_to_mode_reg (SImode, op1);
21530 if (!REG_P (op2))
21531 op2 = copy_to_mode_reg (SImode, op2);
999d3194 21532 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
22c7c85e
L
21533 return 0;
21534
21535 case IX86_BUILTIN_MWAIT:
5039610b
SL
21536 arg0 = CALL_EXPR_ARG (exp, 0);
21537 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
21538 op0 = expand_normal (arg0);
21539 op1 = expand_normal (arg1);
22c7c85e
L
21540 if (!REG_P (op0))
21541 op0 = copy_to_mode_reg (SImode, op0);
21542 if (!REG_P (op1))
21543 op1 = copy_to_mode_reg (SImode, op1);
ef719a44 21544 emit_insn (gen_sse3_mwait (op0, op1));
22c7c85e
L
21545 return 0;
21546
eb701deb
RH
21547 case IX86_BUILTIN_VEC_INIT_V2SI:
21548 case IX86_BUILTIN_VEC_INIT_V4HI:
21549 case IX86_BUILTIN_VEC_INIT_V8QI:
5039610b 21550 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
eb701deb
RH
21551
21552 case IX86_BUILTIN_VEC_EXT_V2DF:
21553 case IX86_BUILTIN_VEC_EXT_V2DI:
21554 case IX86_BUILTIN_VEC_EXT_V4SF:
ed9b5396 21555 case IX86_BUILTIN_VEC_EXT_V4SI:
eb701deb 21556 case IX86_BUILTIN_VEC_EXT_V8HI:
0f2698d0 21557 case IX86_BUILTIN_VEC_EXT_V2SI:
eb701deb 21558 case IX86_BUILTIN_VEC_EXT_V4HI:
9a5cee02 21559 case IX86_BUILTIN_VEC_EXT_V16QI:
5039610b 21560 return ix86_expand_vec_ext_builtin (exp, target);
eb701deb 21561
9a5cee02
L
21562 case IX86_BUILTIN_VEC_SET_V2DI:
21563 case IX86_BUILTIN_VEC_SET_V4SF:
21564 case IX86_BUILTIN_VEC_SET_V4SI:
eb701deb
RH
21565 case IX86_BUILTIN_VEC_SET_V8HI:
21566 case IX86_BUILTIN_VEC_SET_V4HI:
9a5cee02 21567 case IX86_BUILTIN_VEC_SET_V16QI:
5039610b 21568 return ix86_expand_vec_set_builtin (exp);
22c7c85e 21569
edc5bbcd
UB
21570 case IX86_BUILTIN_INFQ:
21571 {
21572 REAL_VALUE_TYPE inf;
21573 rtx tmp;
21574
21575 real_inf (&inf);
21576 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21577
21578 tmp = validize_mem (force_const_mem (mode, tmp));
21579
21580 if (target == 0)
21581 target = gen_reg_rtx (mode);
21582
21583 emit_move_insn (target, tmp);
21584 return target;
21585 }
21586
bd793c65
BS
21587 default:
21588 break;
21589 }
21590
bb1418c1
L
21591 for (i = 0, d = bdesc_special_args;
21592 i < ARRAY_SIZE (bdesc_special_args);
21593 i++, d++)
21594 if (d->code == fcode)
21595 return ix86_expand_special_args_builtin (d, exp, target);
21596
f318ff0a
L
21597 for (i = 0, d = bdesc_args;
21598 i < ARRAY_SIZE (bdesc_args);
9a5cee02
L
21599 i++, d++)
21600 if (d->code == fcode)
1b667c82 21601 return ix86_expand_args_builtin (d, exp, target);
bd793c65 21602
ca7558fc 21603 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65 21604 if (d->code == fcode)
5039610b 21605 return ix86_expand_sse_comi (d, exp, target);
0f290768 21606
06f4e35d
L
21607 for (i = 0, d = bdesc_pcmpestr;
21608 i < ARRAY_SIZE (bdesc_pcmpestr);
21609 i++, d++)
21610 if (d->code == fcode)
21611 return ix86_expand_sse_pcmpestr (d, exp, target);
21612
21613 for (i = 0, d = bdesc_pcmpistr;
21614 i < ARRAY_SIZE (bdesc_pcmpistr);
21615 i++, d++)
21616 if (d->code == fcode)
21617 return ix86_expand_sse_pcmpistr (d, exp, target);
21618
04e1d06b
MM
21619 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21620 if (d->code == fcode)
21621 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21622 (enum multi_arg_type)d->flag,
21623 d->comparison);
21624
ed9b5396 21625 gcc_unreachable ();
bd793c65 21626}
4211a8fb 21627
db3cf6bd
RG
21628/* Returns a function decl for a vectorized version of the builtin function
21629 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21630 if it is not available. */
21631
21632static tree
9415ab7d 21633ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
b95becfc 21634 tree type_in)
db3cf6bd 21635{
b95becfc
RG
21636 enum machine_mode in_mode, out_mode;
21637 int in_n, out_n;
db3cf6bd 21638
b95becfc
RG
21639 if (TREE_CODE (type_out) != VECTOR_TYPE
21640 || TREE_CODE (type_in) != VECTOR_TYPE)
db3cf6bd
RG
21641 return NULL_TREE;
21642
b95becfc
RG
21643 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21644 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21645 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21646 in_n = TYPE_VECTOR_SUBPARTS (type_in);
db3cf6bd
RG
21647
21648 switch (fn)
21649 {
21650 case BUILT_IN_SQRT:
b95becfc
RG
21651 if (out_mode == DFmode && out_n == 2
21652 && in_mode == DFmode && in_n == 2)
db3cf6bd 21653 return ix86_builtins[IX86_BUILTIN_SQRTPD];
a5ea943c 21654 break;
db3cf6bd
RG
21655
21656 case BUILT_IN_SQRTF:
b95becfc
RG
21657 if (out_mode == SFmode && out_n == 4
21658 && in_mode == SFmode && in_n == 4)
3dc0f23a 21659 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
a5ea943c 21660 break;
db3cf6bd 21661
b40c4f68
UB
21662 case BUILT_IN_LRINT:
21663 if (out_mode == SImode && out_n == 4
21664 && in_mode == DFmode && in_n == 2)
21665 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
a5ea943c 21666 break;
b40c4f68 21667
b95becfc
RG
21668 case BUILT_IN_LRINTF:
21669 if (out_mode == SImode && out_n == 4
21670 && in_mode == SFmode && in_n == 4)
21671 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
a5ea943c 21672 break;
b95becfc 21673
db3cf6bd
RG
21674 default:
21675 ;
21676 }
21677
a5ea943c
RG
21678 /* Dispatch to a handler for a vectorization library. */
21679 if (ix86_veclib_handler)
21680 return (*ix86_veclib_handler)(fn, type_out, type_in);
21681
db3cf6bd
RG
21682 return NULL_TREE;
21683}
21684
9aba5d22
UB
21685/* Handler for an SVML-style interface to
21686 a library with vectorized intrinsics. */
21687
21688static tree
21689ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21690{
21691 char name[20];
21692 tree fntype, new_fndecl, args;
21693 unsigned arity;
21694 const char *bname;
21695 enum machine_mode el_mode, in_mode;
21696 int n, in_n;
21697
21698 /* The SVML is suitable for unsafe math only. */
21699 if (!flag_unsafe_math_optimizations)
21700 return NULL_TREE;
21701
21702 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21703 n = TYPE_VECTOR_SUBPARTS (type_out);
21704 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21705 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21706 if (el_mode != in_mode
21707 || n != in_n)
21708 return NULL_TREE;
21709
21710 switch (fn)
21711 {
21712 case BUILT_IN_EXP:
21713 case BUILT_IN_LOG:
21714 case BUILT_IN_LOG10:
21715 case BUILT_IN_POW:
21716 case BUILT_IN_TANH:
21717 case BUILT_IN_TAN:
21718 case BUILT_IN_ATAN:
21719 case BUILT_IN_ATAN2:
21720 case BUILT_IN_ATANH:
21721 case BUILT_IN_CBRT:
21722 case BUILT_IN_SINH:
21723 case BUILT_IN_SIN:
21724 case BUILT_IN_ASINH:
21725 case BUILT_IN_ASIN:
21726 case BUILT_IN_COSH:
21727 case BUILT_IN_COS:
21728 case BUILT_IN_ACOSH:
21729 case BUILT_IN_ACOS:
21730 if (el_mode != DFmode || n != 2)
21731 return NULL_TREE;
21732 break;
21733
21734 case BUILT_IN_EXPF:
21735 case BUILT_IN_LOGF:
21736 case BUILT_IN_LOG10F:
21737 case BUILT_IN_POWF:
21738 case BUILT_IN_TANHF:
21739 case BUILT_IN_TANF:
21740 case BUILT_IN_ATANF:
21741 case BUILT_IN_ATAN2F:
21742 case BUILT_IN_ATANHF:
21743 case BUILT_IN_CBRTF:
21744 case BUILT_IN_SINHF:
21745 case BUILT_IN_SINF:
21746 case BUILT_IN_ASINHF:
21747 case BUILT_IN_ASINF:
21748 case BUILT_IN_COSHF:
21749 case BUILT_IN_COSF:
21750 case BUILT_IN_ACOSHF:
21751 case BUILT_IN_ACOSF:
21752 if (el_mode != SFmode || n != 4)
21753 return NULL_TREE;
21754 break;
21755
21756 default:
21757 return NULL_TREE;
21758 }
21759
21760 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21761
21762 if (fn == BUILT_IN_LOGF)
21763 strcpy (name, "vmlsLn4");
21764 else if (fn == BUILT_IN_LOG)
21765 strcpy (name, "vmldLn2");
21766 else if (n == 4)
21767 {
21768 sprintf (name, "vmls%s", bname+10);
21769 name[strlen (name)-1] = '4';
21770 }
21771 else
21772 sprintf (name, "vmld%s2", bname+10);
21773
21774 /* Convert to uppercase. */
21775 name[4] &= ~0x20;
21776
21777 arity = 0;
21778 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21779 args = TREE_CHAIN (args))
21780 arity++;
21781
21782 if (arity == 1)
21783 fntype = build_function_type_list (type_out, type_in, NULL);
21784 else
21785 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21786
21787 /* Build a function declaration for the vectorized function. */
21788 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21789 TREE_PUBLIC (new_fndecl) = 1;
21790 DECL_EXTERNAL (new_fndecl) = 1;
21791 DECL_IS_NOVOPS (new_fndecl) = 1;
21792 TREE_READONLY (new_fndecl) = 1;
21793
21794 return new_fndecl;
21795}
21796
21797/* Handler for an ACML-style interface to
21798 a library with vectorized intrinsics. */
a5ea943c
RG
21799
21800static tree
21801ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21802{
21803 char name[20] = "__vr.._";
21804 tree fntype, new_fndecl, args;
21805 unsigned arity;
21806 const char *bname;
21807 enum machine_mode el_mode, in_mode;
21808 int n, in_n;
21809
21810 /* The ACML is 64bits only and suitable for unsafe math only as
21811 it does not correctly support parts of IEEE with the required
21812 precision such as denormals. */
21813 if (!TARGET_64BIT
21814 || !flag_unsafe_math_optimizations)
21815 return NULL_TREE;
21816
21817 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21818 n = TYPE_VECTOR_SUBPARTS (type_out);
21819 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21820 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21821 if (el_mode != in_mode
21822 || n != in_n)
21823 return NULL_TREE;
21824
21825 switch (fn)
21826 {
21827 case BUILT_IN_SIN:
21828 case BUILT_IN_COS:
21829 case BUILT_IN_EXP:
21830 case BUILT_IN_LOG:
21831 case BUILT_IN_LOG2:
21832 case BUILT_IN_LOG10:
21833 name[4] = 'd';
21834 name[5] = '2';
21835 if (el_mode != DFmode
21836 || n != 2)
21837 return NULL_TREE;
21838 break;
21839
21840 case BUILT_IN_SINF:
21841 case BUILT_IN_COSF:
21842 case BUILT_IN_EXPF:
21843 case BUILT_IN_POWF:
21844 case BUILT_IN_LOGF:
21845 case BUILT_IN_LOG2F:
21846 case BUILT_IN_LOG10F:
21847 name[4] = 's';
21848 name[5] = '4';
21849 if (el_mode != SFmode
21850 || n != 4)
21851 return NULL_TREE;
21852 break;
4f3f76e6 21853
a5ea943c
RG
21854 default:
21855 return NULL_TREE;
21856 }
21857
21858 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21859 sprintf (name + 7, "%s", bname+10);
21860
21861 arity = 0;
21862 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21863 args = TREE_CHAIN (args))
21864 arity++;
21865
21866 if (arity == 1)
21867 fntype = build_function_type_list (type_out, type_in, NULL);
21868 else
21869 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21870
21871 /* Build a function declaration for the vectorized function. */
21872 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21873 TREE_PUBLIC (new_fndecl) = 1;
21874 DECL_EXTERNAL (new_fndecl) = 1;
21875 DECL_IS_NOVOPS (new_fndecl) = 1;
21876 TREE_READONLY (new_fndecl) = 1;
21877
21878 return new_fndecl;
21879}
21880
21881
4c38b6d9
UB
21882/* Returns a decl of a function that implements conversion of the
21883 input vector of type TYPE, or NULL_TREE if it is not available. */
21884
21885static tree
6b889d89 21886ix86_vectorize_builtin_conversion (unsigned int code, tree type)
4c38b6d9
UB
21887{
21888 if (TREE_CODE (type) != VECTOR_TYPE)
21889 return NULL_TREE;
54a88090 21890
4c38b6d9
UB
21891 switch (code)
21892 {
21893 case FLOAT_EXPR:
21894 switch (TYPE_MODE (type))
21895 {
21896 case V4SImode:
21897 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21898 default:
21899 return NULL_TREE;
21900 }
21901
21902 case FIX_TRUNC_EXPR:
21903 switch (TYPE_MODE (type))
21904 {
21905 case V4SFmode:
21906 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21907 default:
21908 return NULL_TREE;
21909 }
21910 default:
21911 return NULL_TREE;
21912
21913 }
21914}
21915
6b889d89
UB
21916/* Returns a code for a target-specific builtin that implements
21917 reciprocal of the function, or NULL_TREE if not available. */
21918
21919static tree
ac10986f
UB
21920ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21921 bool sqrt ATTRIBUTE_UNUSED)
6b889d89 21922{
4d869718 21923 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
6b889d89
UB
21924 && flag_finite_math_only && !flag_trapping_math
21925 && flag_unsafe_math_optimizations))
21926 return NULL_TREE;
21927
ac10986f
UB
21928 if (md_fn)
21929 /* Machine dependent builtins. */
21930 switch (fn)
21931 {
21932 /* Vectorized version of sqrt to rsqrt conversion. */
3dc0f23a
UB
21933 case IX86_BUILTIN_SQRTPS_NR:
21934 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
6b889d89 21935
ac10986f
UB
21936 default:
21937 return NULL_TREE;
21938 }
21939 else
21940 /* Normal builtins. */
21941 switch (fn)
21942 {
21943 /* Sqrt to rsqrt conversion. */
21944 case BUILT_IN_SQRTF:
21945 return ix86_builtins[IX86_BUILTIN_RSQRTF];
6b889d89 21946
ac10986f
UB
21947 default:
21948 return NULL_TREE;
21949 }
6b889d89
UB
21950}
21951
4211a8fb 21952/* Store OPERAND to the memory after reload is completed. This means
f710504c 21953 that we can't easily use assign_stack_local. */
4211a8fb 21954rtx
b96a374d 21955ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 21956{
898d374d 21957 rtx result;
5656a184 21958
d0396b79 21959 gcc_assert (reload_completed);
394a378c 21960 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
898d374d
JH
21961 {
21962 result = gen_rtx_MEM (mode,
21963 gen_rtx_PLUS (Pmode,
21964 stack_pointer_rtx,
21965 GEN_INT (-RED_ZONE_SIZE)));
21966 emit_move_insn (result, operand);
21967 }
394a378c 21968 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
4211a8fb 21969 {
898d374d 21970 switch (mode)
4211a8fb 21971 {
898d374d
JH
21972 case HImode:
21973 case SImode:
21974 operand = gen_lowpart (DImode, operand);
5efb1046 21975 /* FALLTHRU */
898d374d 21976 case DImode:
4211a8fb 21977 emit_insn (
898d374d
JH
21978 gen_rtx_SET (VOIDmode,
21979 gen_rtx_MEM (DImode,
21980 gen_rtx_PRE_DEC (DImode,
21981 stack_pointer_rtx)),
21982 operand));
21983 break;
21984 default:
d0396b79 21985 gcc_unreachable ();
898d374d
JH
21986 }
21987 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21988 }
21989 else
21990 {
21991 switch (mode)
21992 {
21993 case DImode:
21994 {
21995 rtx operands[2];
21996 split_di (&operand, 1, operands, operands + 1);
21997 emit_insn (
21998 gen_rtx_SET (VOIDmode,
21999 gen_rtx_MEM (SImode,
22000 gen_rtx_PRE_DEC (Pmode,
22001 stack_pointer_rtx)),
22002 operands[1]));
22003 emit_insn (
22004 gen_rtx_SET (VOIDmode,
22005 gen_rtx_MEM (SImode,
22006 gen_rtx_PRE_DEC (Pmode,
22007 stack_pointer_rtx)),
22008 operands[0]));
22009 }
22010 break;
22011 case HImode:
69642eae
JJ
22012 /* Store HImodes as SImodes. */
22013 operand = gen_lowpart (SImode, operand);
5efb1046 22014 /* FALLTHRU */
898d374d 22015 case SImode:
4211a8fb 22016 emit_insn (
898d374d
JH
22017 gen_rtx_SET (VOIDmode,
22018 gen_rtx_MEM (GET_MODE (operand),
22019 gen_rtx_PRE_DEC (SImode,
22020 stack_pointer_rtx)),
22021 operand));
22022 break;
22023 default:
d0396b79 22024 gcc_unreachable ();
4211a8fb 22025 }
898d374d 22026 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 22027 }
898d374d 22028 return result;
4211a8fb
JH
22029}
22030
22031/* Free operand from the memory. */
22032void
b96a374d 22033ix86_free_from_memory (enum machine_mode mode)
4211a8fb 22034{
394a378c 22035 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
898d374d
JH
22036 {
22037 int size;
22038
22039 if (mode == DImode || TARGET_64BIT)
22040 size = 8;
898d374d
JH
22041 else
22042 size = 4;
22043 /* Use LEA to deallocate stack space. In peephole2 it will be converted
22044 to pop or add instruction if registers are available. */
22045 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22046 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
22047 GEN_INT (size))));
22048 }
4211a8fb 22049}
a946dd00 22050
f84aa48a
JH
22051/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
22052 QImode must go into class Q_REGS.
22053 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 22054 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 22055enum reg_class
9415ab7d 22056ix86_preferred_reload_class (rtx x, enum reg_class regclass)
f84aa48a 22057{
b5c82fa1
PB
22058 enum machine_mode mode = GET_MODE (x);
22059
5656a184 22060 /* We're only allowed to return a subclass of CLASS. Many of the
51df7179 22061 following checks fail for NO_REGS, so eliminate that early. */
9415ab7d 22062 if (regclass == NO_REGS)
f75959a6 22063 return NO_REGS;
51df7179
RH
22064
22065 /* All classes can load zeros. */
b5c82fa1 22066 if (x == CONST0_RTX (mode))
9415ab7d 22067 return regclass;
51df7179 22068
917f1b7e 22069 /* Force constants into memory if we are loading a (nonzero) constant into
b5c82fa1
PB
22070 an MMX or SSE register. This is because there are no MMX/SSE instructions
22071 to load from a constant. */
22072 if (CONSTANT_P (x)
9415ab7d 22073 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
b5c82fa1
PB
22074 return NO_REGS;
22075
22076 /* Prefer SSE regs only, if we can use them for math. */
22077 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
9415ab7d 22078 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
b5c82fa1 22079
51df7179 22080 /* Floating-point constants need more complex checks. */
f84aa48a
JH
22081 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
22082 {
f84aa48a 22083 /* General regs can load everything. */
9415ab7d
TN
22084 if (reg_class_subset_p (regclass, GENERAL_REGS))
22085 return regclass;
51df7179
RH
22086
22087 /* Floats can load 0 and 1 plus some others. Note that we eliminated
22088 zero above. We only want to wind up preferring 80387 registers if
22089 we plan on doing computation with them. */
22090 if (TARGET_80387
51df7179
RH
22091 && standard_80387_constant_p (x))
22092 {
22093 /* Limit class to non-sse. */
9415ab7d 22094 if (regclass == FLOAT_SSE_REGS)
51df7179 22095 return FLOAT_REGS;
9415ab7d 22096 if (regclass == FP_TOP_SSE_REGS)
51df7179 22097 return FP_TOP_REG;
9415ab7d 22098 if (regclass == FP_SECOND_SSE_REGS)
51df7179 22099 return FP_SECOND_REG;
9415ab7d
TN
22100 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22101 return regclass;
51df7179
RH
22102 }
22103
22104 return NO_REGS;
f84aa48a 22105 }
51df7179
RH
22106
22107 /* Generally when we see PLUS here, it's the function invariant
22108 (plus soft-fp const_int). Which can only be computed into general
22109 regs. */
22110 if (GET_CODE (x) == PLUS)
9415ab7d 22111 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
51df7179
RH
22112
22113 /* QImode constants are easy to load, but non-constant QImode data
22114 must go into Q_REGS. */
22115 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22116 {
9415ab7d
TN
22117 if (reg_class_subset_p (regclass, Q_REGS))
22118 return regclass;
22119 if (reg_class_subset_p (Q_REGS, regclass))
51df7179
RH
22120 return Q_REGS;
22121 return NO_REGS;
22122 }
22123
9415ab7d 22124 return regclass;
f84aa48a
JH
22125}
22126
b5c82fa1
PB
22127/* Discourage putting floating-point values in SSE registers unless
22128 SSE math is being used, and likewise for the 387 registers. */
22129enum reg_class
9415ab7d 22130ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
b5c82fa1
PB
22131{
22132 enum machine_mode mode = GET_MODE (x);
22133
22134 /* Restrict the output reload class to the register bank that we are doing
22135 math on. If we would like not to return a subset of CLASS, reject this
22136 alternative: if reload cannot do this, it will still use its choice. */
22137 mode = GET_MODE (x);
22138 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9415ab7d 22139 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
b5c82fa1 22140
27ac40e2 22141 if (X87_FLOAT_MODE_P (mode))
b5c82fa1 22142 {
9415ab7d 22143 if (regclass == FP_TOP_SSE_REGS)
b5c82fa1 22144 return FP_TOP_REG;
9415ab7d 22145 else if (regclass == FP_SECOND_SSE_REGS)
b5c82fa1
PB
22146 return FP_SECOND_REG;
22147 else
9415ab7d 22148 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
b5c82fa1
PB
22149 }
22150
9415ab7d 22151 return regclass;
b5c82fa1
PB
22152}
22153
83689be0
UB
22154static enum reg_class
22155ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22156 enum machine_mode mode,
22157 secondary_reload_info *sri ATTRIBUTE_UNUSED)
22158{
22159 /* QImode spills from non-QI registers require
22160 intermediate register on 32bit targets. */
22161 if (!in_p && mode == QImode && !TARGET_64BIT
22162 && (class == GENERAL_REGS
22163 || class == LEGACY_REGS
22164 || class == INDEX_REGS))
22165 {
22166 int regno;
22167
22168 if (REG_P (x))
22169 regno = REGNO (x);
22170 else
22171 regno = -1;
22172
22173 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22174 regno = true_regnum (x);
22175
22176 /* Return Q_REGS if the operand is in memory. */
22177 if (regno == -1)
22178 return Q_REGS;
22179 }
22180
22181 return NO_REGS;
22182}
22183
f84aa48a
JH
22184/* If we are copying between general and FP registers, we need a memory
22185 location. The same is true for SSE and MMX registers.
22186
6232eadc
JH
22187 To optimize register_move_cost performance, allow inline variant.
22188
f84aa48a
JH
22189 The macro can't work reliably when one of the CLASSES is class containing
22190 registers from multiple units (SSE, MMX, integer). We avoid this by never
22191 combining those units in single alternative in the machine description.
22192 Ensure that this constraint holds to avoid unexpected surprises.
22193
22194 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22195 enforce these sanity checks. */
f75959a6 22196
6232eadc
JH
22197static inline int
22198inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
b96a374d 22199 enum machine_mode mode, int strict)
f84aa48a
JH
22200{
22201 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22202 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22203 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22204 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22205 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22206 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22207 {
d0396b79 22208 gcc_assert (!strict);
f75959a6 22209 return true;
f84aa48a 22210 }
f75959a6
RH
22211
22212 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22213 return true;
22214
22215 /* ??? This is a lie. We do have moves between mmx/general, and for
22216 mmx/sse2. But by saying we need secondary memory we discourage the
22217 register allocator from using the mmx registers unless needed. */
22218 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22219 return true;
22220
22221 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22222 {
22223 /* SSE1 doesn't have any direct moves from other classes. */
22224 if (!TARGET_SSE2)
22225 return true;
22226
5656a184 22227 /* If the target says that inter-unit moves are more expensive
f75959a6 22228 than moving through memory, then don't generate them. */
ed69105c 22229 if (!TARGET_INTER_UNIT_MOVES)
f75959a6
RH
22230 return true;
22231
22232 /* Between SSE and general, we have moves no larger than word size. */
22233 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22234 return true;
f75959a6
RH
22235 }
22236
22237 return false;
f84aa48a 22238}
f75959a6 22239
6232eadc
JH
22240int
22241ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22242 enum machine_mode mode, int strict)
22243{
22244 return inline_secondary_memory_needed (class1, class2, mode, strict);
22245}
22246
1272914c
RH
22247/* Return true if the registers in CLASS cannot represent the change from
22248 modes FROM to TO. */
22249
22250bool
22251ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9415ab7d 22252 enum reg_class regclass)
1272914c
RH
22253{
22254 if (from == to)
22255 return false;
22256
0fa2e4df 22257 /* x87 registers can't do subreg at all, as all values are reformatted
1272914c 22258 to extended precision. */
9415ab7d 22259 if (MAYBE_FLOAT_CLASS_P (regclass))
1272914c
RH
22260 return true;
22261
9415ab7d 22262 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
1272914c
RH
22263 {
22264 /* Vector registers do not support QI or HImode loads. If we don't
22265 disallow a change to these modes, reload will assume it's ok to
22266 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22267 the vec_dupv4hi pattern. */
22268 if (GET_MODE_SIZE (from) < 4)
22269 return true;
22270
22271 /* Vector registers do not support subreg with nonzero offsets, which
5656a184 22272 are otherwise valid for integer registers. Since we can't see
1272914c
RH
22273 whether we have a nonzero offset from here, prohibit all
22274 nonparadoxical subregs changing size. */
22275 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22276 return true;
22277 }
22278
22279 return false;
22280}
22281
6232eadc
JH
22282/* Return the cost of moving data of mode M between a
22283 register and memory. A value of 2 is the default; this cost is
22284 relative to those in `REGISTER_MOVE_COST'.
22285
22286 This function is used extensively by register_move_cost that is used to
22287 build tables at startup. Make it inline in this case.
22288 When IN is 2, return maximum of in and out move cost.
22289
22290 If moving between registers and memory is more expensive than
22291 between two registers, you should define this macro to express the
22292 relative cost.
22293
22294 Model also increased moving costs of QImode registers in non
22295 Q_REGS classes.
22296 */
22297static inline int
22298inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22299 int in)
22300{
22301 int cost;
22302 if (FLOAT_CLASS_P (regclass))
22303 {
22304 int index;
22305 switch (mode)
22306 {
22307 case SFmode:
22308 index = 0;
22309 break;
22310 case DFmode:
22311 index = 1;
22312 break;
22313 case XFmode:
22314 index = 2;
22315 break;
22316 default:
22317 return 100;
22318 }
22319 if (in == 2)
22320 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22321 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22322 }
22323 if (SSE_CLASS_P (regclass))
22324 {
22325 int index;
22326 switch (GET_MODE_SIZE (mode))
22327 {
22328 case 4:
22329 index = 0;
22330 break;
22331 case 8:
22332 index = 1;
22333 break;
22334 case 16:
22335 index = 2;
22336 break;
22337 default:
22338 return 100;
22339 }
22340 if (in == 2)
22341 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22342 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22343 }
22344 if (MMX_CLASS_P (regclass))
22345 {
22346 int index;
22347 switch (GET_MODE_SIZE (mode))
22348 {
22349 case 4:
22350 index = 0;
22351 break;
22352 case 8:
22353 index = 1;
22354 break;
22355 default:
22356 return 100;
22357 }
22358 if (in)
22359 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22360 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22361 }
22362 switch (GET_MODE_SIZE (mode))
22363 {
22364 case 1:
22365 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22366 {
22367 if (!in)
22368 return ix86_cost->int_store[0];
22369 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22370 cost = ix86_cost->movzbl_load;
22371 else
22372 cost = ix86_cost->int_load[0];
22373 if (in == 2)
22374 return MAX (cost, ix86_cost->int_store[0]);
22375 return cost;
22376 }
22377 else
22378 {
22379 if (in == 2)
22380 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22381 if (in)
22382 return ix86_cost->movzbl_load;
22383 else
22384 return ix86_cost->int_store[0] + 4;
22385 }
22386 break;
22387 case 2:
22388 if (in == 2)
22389 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22390 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22391 default:
22392 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22393 if (mode == TFmode)
22394 mode = XFmode;
22395 if (in == 2)
22396 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22397 else if (in)
22398 cost = ix86_cost->int_load[2];
22399 else
22400 cost = ix86_cost->int_store[2];
22401 return (cost * (((int) GET_MODE_SIZE (mode)
22402 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22403 }
22404}
22405
22406int
22407ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22408{
22409 return inline_memory_move_cost (mode, regclass, in);
22410}
22411
22412
f84aa48a 22413/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 22414 one in class CLASS2.
f84aa48a
JH
22415
22416 It is not required that the cost always equal 2 when FROM is the same as TO;
22417 on some machines it is expensive to move between registers if they are not
22418 general registers. */
f75959a6 22419
f84aa48a 22420int
b96a374d
AJ
22421ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22422 enum reg_class class2)
f84aa48a
JH
22423{
22424 /* In case we require secondary memory, compute cost of the store followed
b96a374d 22425 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
22426 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22427
6232eadc 22428 if (inline_secondary_memory_needed (class1, class2, mode, 0))
f84aa48a 22429 {
d631b80a
RH
22430 int cost = 1;
22431
6232eadc
JH
22432 cost += inline_memory_move_cost (mode, class1, 2);
22433 cost += inline_memory_move_cost (mode, class2, 2);
b96a374d 22434
d631b80a
RH
22435 /* In case of copying from general_purpose_register we may emit multiple
22436 stores followed by single load causing memory size mismatch stall.
d1f87653 22437 Count this as arbitrarily high cost of 20. */
62415523 22438 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
22439 cost += 20;
22440
22441 /* In the case of FP/MMX moves, the registers actually overlap, and we
22442 have to switch modes in order to treat them differently. */
22443 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22444 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22445 cost += 20;
22446
22447 return cost;
f84aa48a 22448 }
d631b80a 22449
92d0fb09 22450 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
22451 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22452 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
147bbdd0
UB
22453
22454 /* ??? By keeping returned value relatively high, we limit the number
22455 of moves between integer and MMX/SSE registers for all targets.
22456 Additionally, high value prevents problem with x86_modes_tieable_p(),
22457 where integer modes in MMX/SSE registers are not tieable
22458 because of missing QImode and HImode moves to, from or between
22459 MMX/SSE registers. */
979c67a5 22460 return MAX (8, ix86_cost->mmxsse_to_integer);
147bbdd0 22461
fa79946e
JH
22462 if (MAYBE_FLOAT_CLASS_P (class1))
22463 return ix86_cost->fp_move;
22464 if (MAYBE_SSE_CLASS_P (class1))
22465 return ix86_cost->sse_move;
22466 if (MAYBE_MMX_CLASS_P (class1))
22467 return ix86_cost->mmx_move;
f84aa48a
JH
22468 return 2;
22469}
22470
a946dd00 22471/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
c1c5b5e3
RH
22472
22473bool
b96a374d 22474ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
22475{
22476 /* Flags and only flags can only hold CCmode values. */
22477 if (CC_REGNO_P (regno))
22478 return GET_MODE_CLASS (mode) == MODE_CC;
22479 if (GET_MODE_CLASS (mode) == MODE_CC
22480 || GET_MODE_CLASS (mode) == MODE_RANDOM
22481 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22482 return 0;
22483 if (FP_REGNO_P (regno))
22484 return VALID_FP_MODE_P (mode);
22485 if (SSE_REGNO_P (regno))
dcbca208 22486 {
6c4ccfd8
RH
22487 /* We implement the move patterns for all vector modes into and
22488 out of SSE registers, even when no operation instructions
22489 are available. */
22490 return (VALID_SSE_REG_MODE (mode)
22491 || VALID_SSE2_REG_MODE (mode)
22492 || VALID_MMX_REG_MODE (mode)
22493 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 22494 }
a946dd00 22495 if (MMX_REGNO_P (regno))
dcbca208 22496 {
6c4ccfd8
RH
22497 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22498 so if the register is available at all, then we can move data of
22499 the given mode into or out of it. */
22500 return (VALID_MMX_REG_MODE (mode)
22501 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 22502 }
b4e82619
RH
22503
22504 if (mode == QImode)
22505 {
22506 /* Take care for QImode values - they can be in non-QI regs,
22507 but then they do cause partial register stalls. */
22508 if (regno < 4 || TARGET_64BIT)
22509 return 1;
22510 if (!TARGET_PARTIAL_REG_STALL)
22511 return 1;
22512 return reload_in_progress || reload_completed;
22513 }
22514 /* We handle both integer and floats in the general purpose registers. */
22515 else if (VALID_INT_MODE_P (mode))
22516 return 1;
22517 else if (VALID_FP_MODE_P (mode))
22518 return 1;
62d75179
L
22519 else if (VALID_DFP_MODE_P (mode))
22520 return 1;
b4e82619 22521 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
5656a184 22522 on to use that value in smaller contexts, this can easily force a
b4e82619
RH
22523 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22524 supporting DImode, allow it. */
22525 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
a946dd00 22526 return 1;
b4e82619
RH
22527
22528 return 0;
a946dd00 22529}
fa79946e 22530
5656a184 22531/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
c1c5b5e3
RH
22532 tieable integer mode. */
22533
22534static bool
22535ix86_tieable_integer_mode_p (enum machine_mode mode)
22536{
22537 switch (mode)
22538 {
22539 case HImode:
22540 case SImode:
22541 return true;
22542
22543 case QImode:
22544 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22545
22546 case DImode:
22547 return TARGET_64BIT;
22548
22549 default:
22550 return false;
22551 }
22552}
22553
22554/* Return true if MODE1 is accessible in a register that can hold MODE2
22555 without copying. That is, all register classes that can hold MODE2
22556 can also hold MODE1. */
22557
22558bool
22559ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22560{
22561 if (mode1 == mode2)
22562 return true;
22563
22564 if (ix86_tieable_integer_mode_p (mode1)
22565 && ix86_tieable_integer_mode_p (mode2))
22566 return true;
22567
22568 /* MODE2 being XFmode implies fp stack or general regs, which means we
22569 can tie any smaller floating point modes to it. Note that we do not
22570 tie this with TFmode. */
22571 if (mode2 == XFmode)
22572 return mode1 == SFmode || mode1 == DFmode;
22573
22574 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22575 that we can tie it with SFmode. */
22576 if (mode2 == DFmode)
22577 return mode1 == SFmode;
22578
5656a184 22579 /* If MODE2 is only appropriate for an SSE register, then tie with
c1c5b5e3 22580 any other mode acceptable to SSE registers. */
8ab93332 22581 if (GET_MODE_SIZE (mode2) == 16
b4e82619 22582 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
8ab93332
UB
22583 return (GET_MODE_SIZE (mode1) == 16
22584 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
c1c5b5e3 22585
8ab93332 22586 /* If MODE2 is appropriate for an MMX register, then tie
c1c5b5e3 22587 with any other mode acceptable to MMX registers. */
b4e82619
RH
22588 if (GET_MODE_SIZE (mode2) == 8
22589 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
93de7743 22590 return (GET_MODE_SIZE (mode1) == 8
8ab93332 22591 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
c1c5b5e3
RH
22592
22593 return false;
22594}
22595
3c50106f
RH
22596/* Compute a (partial) cost for rtx X. Return true if the complete
22597 cost has been computed, and false if subexpressions should be
22598 scanned. In either case, *TOTAL contains the cost result. */
22599
22600static bool
9415ab7d 22601ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
3c50106f 22602{
9415ab7d 22603 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
3c50106f
RH
22604 enum machine_mode mode = GET_MODE (x);
22605
22606 switch (code)
22607 {
22608 case CONST_INT:
22609 case CONST:
22610 case LABEL_REF:
22611 case SYMBOL_REF:
8fe75e43 22612 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
3c50106f 22613 *total = 3;
8fe75e43 22614 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
3c50106f 22615 *total = 2;
3504dad3
JH
22616 else if (flag_pic && SYMBOLIC_CONST (x)
22617 && (!TARGET_64BIT
22618 || (!GET_CODE (x) != LABEL_REF
22619 && (GET_CODE (x) != SYMBOL_REF
12969f45 22620 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
22621 *total = 1;
22622 else
22623 *total = 0;
22624 return true;
22625
22626 case CONST_DOUBLE:
22627 if (mode == VOIDmode)
22628 *total = 0;
22629 else
22630 switch (standard_80387_constant_p (x))
22631 {
22632 case 1: /* 0.0 */
22633 *total = 1;
22634 break;
881b2a96 22635 default: /* Other constants */
3c50106f
RH
22636 *total = 2;
22637 break;
881b2a96
RS
22638 case 0:
22639 case -1:
3c50106f
RH
22640 /* Start with (MEM (SYMBOL_REF)), since that's where
22641 it'll probably end up. Add a penalty for size. */
22642 *total = (COSTS_N_INSNS (1)
3504dad3 22643 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
22644 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22645 break;
22646 }
22647 return true;
22648
22649 case ZERO_EXTEND:
22650 /* The zero extensions is often completely free on x86_64, so make
22651 it as cheap as possible. */
22652 if (TARGET_64BIT && mode == DImode
22653 && GET_MODE (XEXP (x, 0)) == SImode)
22654 *total = 1;
22655 else if (TARGET_ZERO_EXTEND_WITH_AND)
a9cc9cc6 22656 *total = ix86_cost->add;
3c50106f 22657 else
a9cc9cc6 22658 *total = ix86_cost->movzx;
3c50106f
RH
22659 return false;
22660
22661 case SIGN_EXTEND:
a9cc9cc6 22662 *total = ix86_cost->movsx;
3c50106f
RH
22663 return false;
22664
22665 case ASHIFT:
7656aee4 22666 if (CONST_INT_P (XEXP (x, 1))
3c50106f
RH
22667 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22668 {
22669 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22670 if (value == 1)
22671 {
a9cc9cc6 22672 *total = ix86_cost->add;
3c50106f
RH
22673 return false;
22674 }
22675 if ((value == 2 || value == 3)
3c50106f
RH
22676 && ix86_cost->lea <= ix86_cost->shift_const)
22677 {
a9cc9cc6 22678 *total = ix86_cost->lea;
3c50106f
RH
22679 return false;
22680 }
22681 }
5efb1046 22682 /* FALLTHRU */
3c50106f
RH
22683
22684 case ROTATE:
22685 case ASHIFTRT:
22686 case LSHIFTRT:
22687 case ROTATERT:
22688 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22689 {
7656aee4 22690 if (CONST_INT_P (XEXP (x, 1)))
3c50106f
RH
22691 {
22692 if (INTVAL (XEXP (x, 1)) > 32)
a9cc9cc6 22693 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
3c50106f 22694 else
a9cc9cc6 22695 *total = ix86_cost->shift_const * 2;
3c50106f
RH
22696 }
22697 else
22698 {
22699 if (GET_CODE (XEXP (x, 1)) == AND)
a9cc9cc6 22700 *total = ix86_cost->shift_var * 2;
3c50106f 22701 else
a9cc9cc6 22702 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
3c50106f
RH
22703 }
22704 }
22705 else
22706 {
7656aee4 22707 if (CONST_INT_P (XEXP (x, 1)))
a9cc9cc6 22708 *total = ix86_cost->shift_const;
3c50106f 22709 else
a9cc9cc6 22710 *total = ix86_cost->shift_var;
3c50106f
RH
22711 }
22712 return false;
22713
22714 case MULT:
27ac40e2
UB
22715 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22716 {
22717 /* ??? SSE scalar cost should be used here. */
22718 *total = ix86_cost->fmul;
22719 return false;
22720 }
22721 else if (X87_FLOAT_MODE_P (mode))
3c50106f 22722 {
a9cc9cc6 22723 *total = ix86_cost->fmul;
4a5eab38 22724 return false;
3c50106f 22725 }
27ac40e2
UB
22726 else if (FLOAT_MODE_P (mode))
22727 {
22728 /* ??? SSE vector cost should be used here. */
22729 *total = ix86_cost->fmul;
22730 return false;
22731 }
3c50106f
RH
22732 else
22733 {
4a5eab38
PB
22734 rtx op0 = XEXP (x, 0);
22735 rtx op1 = XEXP (x, 1);
22736 int nbits;
7656aee4 22737 if (CONST_INT_P (XEXP (x, 1)))
4a5eab38
PB
22738 {
22739 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22740 for (nbits = 0; value != 0; value &= value - 1)
22741 nbits++;
22742 }
22743 else
22744 /* This is arbitrary. */
22745 nbits = 7;
22746
22747 /* Compute costs correctly for widening multiplication. */
e3dafdf4 22748 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
4a5eab38
PB
22749 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22750 == GET_MODE_SIZE (mode))
22751 {
22752 int is_mulwiden = 0;
22753 enum machine_mode inner_mode = GET_MODE (op0);
22754
22755 if (GET_CODE (op0) == GET_CODE (op1))
22756 is_mulwiden = 1, op1 = XEXP (op1, 0);
7656aee4 22757 else if (CONST_INT_P (op1))
4a5eab38
PB
22758 {
22759 if (GET_CODE (op0) == SIGN_EXTEND)
22760 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22761 == INTVAL (op1);
22762 else
22763 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22764 }
22765
22766 if (is_mulwiden)
22767 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22768 }
f676971a 22769
a9cc9cc6
JH
22770 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22771 + nbits * ix86_cost->mult_bit
22772 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
4a5eab38
PB
22773
22774 return true;
3c50106f 22775 }
3c50106f
RH
22776
22777 case DIV:
22778 case UDIV:
22779 case MOD:
22780 case UMOD:
27ac40e2
UB
22781 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22782 /* ??? SSE cost should be used here. */
22783 *total = ix86_cost->fdiv;
22784 else if (X87_FLOAT_MODE_P (mode))
22785 *total = ix86_cost->fdiv;
22786 else if (FLOAT_MODE_P (mode))
22787 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22788 *total = ix86_cost->fdiv;
3c50106f 22789 else
a9cc9cc6 22790 *total = ix86_cost->divide[MODE_INDEX (mode)];
3c50106f
RH
22791 return false;
22792
22793 case PLUS:
27ac40e2 22794 if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
22795 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22796 {
22797 if (GET_CODE (XEXP (x, 0)) == PLUS
22798 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7656aee4 22799 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
3c50106f
RH
22800 && CONSTANT_P (XEXP (x, 1)))
22801 {
22802 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22803 if (val == 2 || val == 4 || val == 8)
22804 {
a9cc9cc6 22805 *total = ix86_cost->lea;
3c50106f
RH
22806 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22807 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22808 outer_code);
22809 *total += rtx_cost (XEXP (x, 1), outer_code);
22810 return true;
22811 }
22812 }
22813 else if (GET_CODE (XEXP (x, 0)) == MULT
7656aee4 22814 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3c50106f
RH
22815 {
22816 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22817 if (val == 2 || val == 4 || val == 8)
22818 {
a9cc9cc6 22819 *total = ix86_cost->lea;
3c50106f
RH
22820 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22821 *total += rtx_cost (XEXP (x, 1), outer_code);
22822 return true;
22823 }
22824 }
22825 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22826 {
a9cc9cc6 22827 *total = ix86_cost->lea;
3c50106f
RH
22828 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22829 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22830 *total += rtx_cost (XEXP (x, 1), outer_code);
22831 return true;
22832 }
22833 }
5efb1046 22834 /* FALLTHRU */
3c50106f
RH
22835
22836 case MINUS:
27ac40e2
UB
22837 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22838 {
22839 /* ??? SSE cost should be used here. */
22840 *total = ix86_cost->fadd;
22841 return false;
22842 }
22843 else if (X87_FLOAT_MODE_P (mode))
22844 {
22845 *total = ix86_cost->fadd;
22846 return false;
22847 }
22848 else if (FLOAT_MODE_P (mode))
3c50106f 22849 {
27ac40e2 22850 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22851 *total = ix86_cost->fadd;
3c50106f
RH
22852 return false;
22853 }
5efb1046 22854 /* FALLTHRU */
3c50106f
RH
22855
22856 case AND:
22857 case IOR:
22858 case XOR:
22859 if (!TARGET_64BIT && mode == DImode)
22860 {
a9cc9cc6 22861 *total = (ix86_cost->add * 2
3c50106f
RH
22862 + (rtx_cost (XEXP (x, 0), outer_code)
22863 << (GET_MODE (XEXP (x, 0)) != DImode))
22864 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 22865 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
22866 return true;
22867 }
5efb1046 22868 /* FALLTHRU */
3c50106f
RH
22869
22870 case NEG:
27ac40e2
UB
22871 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22872 {
22873 /* ??? SSE cost should be used here. */
22874 *total = ix86_cost->fchs;
22875 return false;
22876 }
22877 else if (X87_FLOAT_MODE_P (mode))
22878 {
22879 *total = ix86_cost->fchs;
22880 return false;
22881 }
22882 else if (FLOAT_MODE_P (mode))
3c50106f 22883 {
27ac40e2 22884 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22885 *total = ix86_cost->fchs;
3c50106f
RH
22886 return false;
22887 }
5efb1046 22888 /* FALLTHRU */
3c50106f
RH
22889
22890 case NOT:
22891 if (!TARGET_64BIT && mode == DImode)
a9cc9cc6 22892 *total = ix86_cost->add * 2;
3c50106f 22893 else
a9cc9cc6 22894 *total = ix86_cost->add;
3c50106f
RH
22895 return false;
22896
c271ba77
KH
22897 case COMPARE:
22898 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22899 && XEXP (XEXP (x, 0), 1) == const1_rtx
7656aee4 22900 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
c271ba77
KH
22901 && XEXP (x, 1) == const0_rtx)
22902 {
22903 /* This kind of construct is implemented using test[bwl].
22904 Treat it as if we had an AND. */
a9cc9cc6 22905 *total = (ix86_cost->add
c271ba77
KH
22906 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22907 + rtx_cost (const1_rtx, outer_code));
22908 return true;
22909 }
22910 return false;
22911
3c50106f 22912 case FLOAT_EXTEND:
27ac40e2 22913 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
ce7d4645 22914 *total = 0;
3c50106f
RH
22915 return false;
22916
22917 case ABS:
27ac40e2
UB
22918 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22919 /* ??? SSE cost should be used here. */
22920 *total = ix86_cost->fabs;
22921 else if (X87_FLOAT_MODE_P (mode))
22922 *total = ix86_cost->fabs;
22923 else if (FLOAT_MODE_P (mode))
22924 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22925 *total = ix86_cost->fabs;
3c50106f
RH
22926 return false;
22927
22928 case SQRT:
27ac40e2
UB
22929 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22930 /* ??? SSE cost should be used here. */
22931 *total = ix86_cost->fsqrt;
22932 else if (X87_FLOAT_MODE_P (mode))
22933 *total = ix86_cost->fsqrt;
22934 else if (FLOAT_MODE_P (mode))
22935 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22936 *total = ix86_cost->fsqrt;
3c50106f
RH
22937 return false;
22938
74dc3e94
RH
22939 case UNSPEC:
22940 if (XINT (x, 1) == UNSPEC_TP)
22941 *total = 0;
22942 return false;
22943
3c50106f
RH
22944 default:
22945 return false;
22946 }
22947}
22948
b069de3b
SS
22949#if TARGET_MACHO
22950
22951static int current_machopic_label_num;
22952
22953/* Given a symbol name and its associated stub, write out the
22954 definition of the stub. */
22955
22956void
b96a374d 22957machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
22958{
22959 unsigned int length;
22960 char *binder_name, *symbol_name, lazy_ptr_name[32];
22961 int label = ++current_machopic_label_num;
22962
f7288899
EC
22963 /* For 64-bit we shouldn't get here. */
22964 gcc_assert (!TARGET_64BIT);
22965
b069de3b
SS
22966 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22967 symb = (*targetm.strip_name_encoding) (symb);
22968
22969 length = strlen (stub);
5ead67f6 22970 binder_name = XALLOCAVEC (char, length + 32);
b069de3b
SS
22971 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22972
22973 length = strlen (symb);
5ead67f6 22974 symbol_name = XALLOCAVEC (char, length + 32);
b069de3b
SS
22975 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22976
22977 sprintf (lazy_ptr_name, "L%d$lz", label);
22978
22979 if (MACHOPIC_PURE)
56c779bc 22980 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
b069de3b 22981 else
56c779bc 22982 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
b069de3b
SS
22983
22984 fprintf (file, "%s:\n", stub);
22985 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22986
22987 if (MACHOPIC_PURE)
22988 {
4b1c1f6f
EC
22989 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22990 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22991 fprintf (file, "\tjmp\t*%%edx\n");
b069de3b
SS
22992 }
22993 else
4b1c1f6f 22994 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
b96a374d 22995
b069de3b 22996 fprintf (file, "%s:\n", binder_name);
b96a374d 22997
b069de3b
SS
22998 if (MACHOPIC_PURE)
22999 {
4b1c1f6f
EC
23000 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
23001 fprintf (file, "\tpushl\t%%eax\n");
b069de3b
SS
23002 }
23003 else
4b1c1f6f 23004 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
b069de3b 23005
4b1c1f6f 23006 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
b069de3b 23007
56c779bc 23008 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
b069de3b
SS
23009 fprintf (file, "%s:\n", lazy_ptr_name);
23010 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23011 fprintf (file, "\t.long %s\n", binder_name);
23012}
7d072037
SH
23013
23014void
23015darwin_x86_file_end (void)
23016{
23017 darwin_file_end ();
23018 ix86_file_end ();
23019}
b069de3b
SS
23020#endif /* TARGET_MACHO */
23021
162f023b
JH
23022/* Order the registers for register allocator. */
23023
23024void
b96a374d 23025x86_order_regs_for_local_alloc (void)
162f023b
JH
23026{
23027 int pos = 0;
23028 int i;
23029
23030 /* First allocate the local general purpose registers. */
23031 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23032 if (GENERAL_REGNO_P (i) && call_used_regs[i])
23033 reg_alloc_order [pos++] = i;
23034
23035 /* Global general purpose registers. */
23036 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23037 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
23038 reg_alloc_order [pos++] = i;
23039
23040 /* x87 registers come first in case we are doing FP math
23041 using them. */
23042 if (!TARGET_SSE_MATH)
23043 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23044 reg_alloc_order [pos++] = i;
fce5a9f2 23045
162f023b
JH
23046 /* SSE registers. */
23047 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23048 reg_alloc_order [pos++] = i;
23049 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23050 reg_alloc_order [pos++] = i;
23051
d1f87653 23052 /* x87 registers. */
162f023b
JH
23053 if (TARGET_SSE_MATH)
23054 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23055 reg_alloc_order [pos++] = i;
23056
23057 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23058 reg_alloc_order [pos++] = i;
23059
23060 /* Initialize the rest of array as we do not allocate some registers
23061 at all. */
23062 while (pos < FIRST_PSEUDO_REGISTER)
23063 reg_alloc_order [pos++] = 0;
23064}
194734e9 23065
fe77449a
DR
23066/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
23067 struct attribute_spec.handler. */
23068static tree
b96a374d
AJ
23069ix86_handle_struct_attribute (tree *node, tree name,
23070 tree args ATTRIBUTE_UNUSED,
23071 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
23072{
23073 tree *type = NULL;
23074 if (DECL_P (*node))
23075 {
23076 if (TREE_CODE (*node) == TYPE_DECL)
23077 type = &TREE_TYPE (*node);
23078 }
23079 else
23080 type = node;
23081
23082 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
23083 || TREE_CODE (*type) == UNION_TYPE)))
23084 {
5c498b10
DD
23085 warning (OPT_Wattributes, "%qs attribute ignored",
23086 IDENTIFIER_POINTER (name));
fe77449a
DR
23087 *no_add_attrs = true;
23088 }
23089
23090 else if ((is_attribute_p ("ms_struct", name)
23091 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
23092 || ((is_attribute_p ("gcc_struct", name)
23093 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
23094 {
5c498b10 23095 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
fe77449a
DR
23096 IDENTIFIER_POINTER (name));
23097 *no_add_attrs = true;
23098 }
23099
23100 return NULL_TREE;
23101}
23102
4977bab6 23103static bool
3101faab 23104ix86_ms_bitfield_layout_p (const_tree record_type)
4977bab6 23105{
6ac49599 23106 return (TARGET_MS_BITFIELD_LAYOUT &&
021bad8e 23107 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 23108 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
23109}
23110
483ab821
MM
23111/* Returns an expression indicating where the this parameter is
23112 located on entry to the FUNCTION. */
23113
23114static rtx
b96a374d 23115x86_this_parameter (tree function)
483ab821
MM
23116{
23117 tree type = TREE_TYPE (function);
ccf8e764 23118 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
34614fff 23119 int nregs;
483ab821 23120
3961e8fe
RH
23121 if (TARGET_64BIT)
23122 {
ccf8e764
RH
23123 const int *parm_regs;
23124
7c800926 23125 if (ix86_function_type_abi (type) == MS_ABI)
ccf8e764
RH
23126 parm_regs = x86_64_ms_abi_int_parameter_registers;
23127 else
23128 parm_regs = x86_64_int_parameter_registers;
23129 return gen_rtx_REG (DImode, parm_regs[aggr]);
3961e8fe
RH
23130 }
23131
34614fff
MP
23132 nregs = ix86_function_regparm (type, function);
23133
23134 if (nregs > 0 && !stdarg_p (type))
483ab821 23135 {
34614fff
MP
23136 int regno;
23137
ccf8e764 23138 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
34614fff
MP
23139 regno = aggr ? DX_REG : CX_REG;
23140 else
23141 {
23142 regno = AX_REG;
23143 if (aggr)
23144 {
23145 regno = DX_REG;
23146 if (nregs == 1)
23147 return gen_rtx_MEM (SImode,
23148 plus_constant (stack_pointer_rtx, 4));
23149 }
23150 }
ccf8e764 23151 return gen_rtx_REG (SImode, regno);
483ab821
MM
23152 }
23153
ccf8e764 23154 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
483ab821
MM
23155}
23156
3961e8fe
RH
23157/* Determine whether x86_output_mi_thunk can succeed. */
23158
23159static bool
3101faab 23160x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
b96a374d 23161 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
3101faab 23162 HOST_WIDE_INT vcall_offset, const_tree function)
3961e8fe
RH
23163{
23164 /* 64-bit can handle anything. */
23165 if (TARGET_64BIT)
23166 return true;
23167
23168 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 23169 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
23170 return true;
23171
23172 /* Need a free register for vcall_offset. */
23173 if (vcall_offset)
23174 return false;
23175
23176 /* Need a free register for GOT references. */
23177 if (flag_pic && !(*targetm.binds_local_p) (function))
23178 return false;
23179
23180 /* Otherwise ok. */
23181 return true;
23182}
23183
23184/* Output the assembler code for a thunk function. THUNK_DECL is the
23185 declaration for the thunk function itself, FUNCTION is the decl for
23186 the target function. DELTA is an immediate constant offset to be
272d0bee 23187 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 23188 *(*this + vcall_offset) should be added to THIS. */
483ab821 23189
c590b625 23190static void
b96a374d
AJ
23191x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23192 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23193 HOST_WIDE_INT vcall_offset, tree function)
194734e9 23194{
194734e9 23195 rtx xops[3];
9415ab7d 23196 rtx this_param = x86_this_parameter (function);
3961e8fe 23197 rtx this_reg, tmp;
194734e9 23198
3961e8fe
RH
23199 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23200 pull it in now and let DELTA benefit. */
9415ab7d
TN
23201 if (REG_P (this_param))
23202 this_reg = this_param;
3961e8fe
RH
23203 else if (vcall_offset)
23204 {
23205 /* Put the this parameter into %eax. */
9415ab7d 23206 xops[0] = this_param;
29b74761 23207 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
999d3194 23208 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
3961e8fe
RH
23209 }
23210 else
23211 this_reg = NULL_RTX;
23212
23213 /* Adjust the this parameter by a fixed constant. */
23214 if (delta)
194734e9 23215 {
483ab821 23216 xops[0] = GEN_INT (delta);
9415ab7d 23217 xops[1] = this_reg ? this_reg : this_param;
3961e8fe 23218 if (TARGET_64BIT)
194734e9 23219 {
3961e8fe
RH
23220 if (!x86_64_general_operand (xops[0], DImode))
23221 {
3c4ace25 23222 tmp = gen_rtx_REG (DImode, R10_REG);
3961e8fe
RH
23223 xops[1] = tmp;
23224 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23225 xops[0] = tmp;
9415ab7d 23226 xops[1] = this_param;
3961e8fe
RH
23227 }
23228 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
23229 }
23230 else
3961e8fe 23231 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 23232 }
3961e8fe
RH
23233
23234 /* Adjust the this parameter by a value stored in the vtable. */
23235 if (vcall_offset)
194734e9 23236 {
3961e8fe 23237 if (TARGET_64BIT)
3c4ace25 23238 tmp = gen_rtx_REG (DImode, R10_REG);
3961e8fe 23239 else
e767b5be 23240 {
29b74761 23241 int tmp_regno = CX_REG;
e767b5be 23242 if (lookup_attribute ("fastcall",
ccf8e764 23243 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29b74761 23244 tmp_regno = AX_REG;
e767b5be
JH
23245 tmp = gen_rtx_REG (SImode, tmp_regno);
23246 }
483ab821 23247
3961e8fe
RH
23248 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23249 xops[1] = tmp;
999d3194 23250 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
483ab821 23251
3961e8fe
RH
23252 /* Adjust the this parameter. */
23253 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23254 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23255 {
3c4ace25 23256 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
3961e8fe
RH
23257 xops[0] = GEN_INT (vcall_offset);
23258 xops[1] = tmp2;
23259 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23260 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 23261 }
3961e8fe 23262 xops[1] = this_reg;
999d3194 23263 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
3961e8fe 23264 }
194734e9 23265
3961e8fe 23266 /* If necessary, drop THIS back to its stack slot. */
9415ab7d 23267 if (this_reg && this_reg != this_param)
3961e8fe
RH
23268 {
23269 xops[0] = this_reg;
9415ab7d 23270 xops[1] = this_param;
999d3194 23271 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
3961e8fe 23272 }
194734e9 23273
89ce1c8f 23274 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
23275 if (TARGET_64BIT)
23276 {
23277 if (!flag_pic || (*targetm.binds_local_p) (function))
23278 output_asm_insn ("jmp\t%P0", xops);
ccf8e764
RH
23279 /* All thunks should be in the same object as their target,
23280 and thus binds_local_p should be true. */
7c800926 23281 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
ccf8e764 23282 gcc_unreachable ();
3961e8fe 23283 else
fcbe3b89 23284 {
89ce1c8f 23285 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
23286 tmp = gen_rtx_CONST (Pmode, tmp);
23287 tmp = gen_rtx_MEM (QImode, tmp);
23288 xops[0] = tmp;
23289 output_asm_insn ("jmp\t%A0", xops);
23290 }
3961e8fe
RH
23291 }
23292 else
23293 {
23294 if (!flag_pic || (*targetm.binds_local_p) (function))
23295 output_asm_insn ("jmp\t%P0", xops);
194734e9 23296 else
21ff35fb 23297#if TARGET_MACHO
095fa594
SH
23298 if (TARGET_MACHO)
23299 {
11abc112 23300 rtx sym_ref = XEXP (DECL_RTL (function), 0);
f676971a
EC
23301 tmp = (gen_rtx_SYMBOL_REF
23302 (Pmode,
11abc112 23303 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
23304 tmp = gen_rtx_MEM (QImode, tmp);
23305 xops[0] = tmp;
23306 output_asm_insn ("jmp\t%0", xops);
23307 }
23308 else
23309#endif /* TARGET_MACHO */
194734e9 23310 {
29b74761 23311 tmp = gen_rtx_REG (SImode, CX_REG);
7d072037 23312 output_set_got (tmp, NULL_RTX);
3961e8fe
RH
23313
23314 xops[1] = tmp;
23315 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23316 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
23317 }
23318 }
23319}
e2500fed 23320
1bc7c5b6 23321static void
b96a374d 23322x86_file_start (void)
1bc7c5b6
ZW
23323{
23324 default_file_start ();
192d0f89
GK
23325#if TARGET_MACHO
23326 darwin_file_start ();
23327#endif
1bc7c5b6
ZW
23328 if (X86_FILE_START_VERSION_DIRECTIVE)
23329 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23330 if (X86_FILE_START_FLTUSED)
23331 fputs ("\t.global\t__fltused\n", asm_out_file);
23332 if (ix86_asm_dialect == ASM_INTEL)
9ad5e54f 23333 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
1bc7c5b6
ZW
23334}
23335
e932b21b 23336int
b96a374d 23337x86_field_alignment (tree field, int computed)
e932b21b
JH
23338{
23339 enum machine_mode mode;
ad9335eb
JJ
23340 tree type = TREE_TYPE (field);
23341
23342 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 23343 return computed;
dd25a747 23344 mode = TYPE_MODE (strip_array_types (type));
39e3a681
JJ
23345 if (mode == DFmode || mode == DCmode
23346 || GET_MODE_CLASS (mode) == MODE_INT
23347 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
23348 return MIN (32, computed);
23349 return computed;
23350}
23351
a5fa1ecd
JH
23352/* Output assembler code to FILE to increment profiler label # LABELNO
23353 for profiling a function entry. */
23354void
b96a374d 23355x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
23356{
23357 if (TARGET_64BIT)
ccf8e764 23358 {
a5fa1ecd 23359#ifndef NO_PROFILE_COUNTERS
ccf8e764 23360 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
a5fa1ecd 23361#endif
ccf8e764 23362
7c800926 23363 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
a5fa1ecd 23364 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
ccf8e764 23365 else
a5fa1ecd 23366 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
ccf8e764 23367 }
a5fa1ecd
JH
23368 else if (flag_pic)
23369 {
23370#ifndef NO_PROFILE_COUNTERS
23371 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23372 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23373#endif
23374 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23375 }
23376 else
23377 {
23378#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 23379 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
23380 PROFILE_COUNT_REGISTER);
23381#endif
23382 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23383 }
23384}
23385
d2c49530
JH
23386/* We don't have exact information about the insn sizes, but we may assume
23387 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 23388 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
23389 99% of cases. */
23390
23391static int
b96a374d 23392min_insn_size (rtx insn)
d2c49530
JH
23393{
23394 int l = 0;
23395
23396 if (!INSN_P (insn) || !active_insn_p (insn))
23397 return 0;
23398
23399 /* Discard alignments we've emit and jump instructions. */
23400 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23401 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23402 return 0;
7656aee4 23403 if (JUMP_P (insn)
d2c49530
JH
23404 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23405 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23406 return 0;
23407
23408 /* Important case - calls are always 5 bytes.
23409 It is common to have many calls in the row. */
7656aee4 23410 if (CALL_P (insn)
d2c49530
JH
23411 && symbolic_reference_mentioned_p (PATTERN (insn))
23412 && !SIBLING_CALL_P (insn))
23413 return 5;
23414 if (get_attr_length (insn) <= 1)
23415 return 1;
23416
23417 /* For normal instructions we may rely on the sizes of addresses
23418 and the presence of symbol to require 4 bytes of encoding.
23419 This is not the case for jumps where references are PC relative. */
7656aee4 23420 if (!JUMP_P (insn))
d2c49530
JH
23421 {
23422 l = get_attr_length_address (insn);
23423 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23424 l = 4;
23425 }
23426 if (l)
23427 return 1+l;
23428 else
23429 return 2;
23430}
23431
c51e6d85 23432/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
23433 window. */
23434
23435static void
be04394b 23436ix86_avoid_jump_misspredicts (void)
d2c49530
JH
23437{
23438 rtx insn, start = get_insns ();
23439 int nbytes = 0, njumps = 0;
23440 int isjump = 0;
23441
23442 /* Look for all minimal intervals of instructions containing 4 jumps.
23443 The intervals are bounded by START and INSN. NBYTES is the total
23444 size of instructions in the interval including INSN and not including
23445 START. When the NBYTES is smaller than 16 bytes, it is possible
23446 that the end of START and INSN ends up in the same 16byte page.
23447
23448 The smallest offset in the page INSN can start is the case where START
23449 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23450 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23451 */
23452 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23453 {
23454
23455 nbytes += min_insn_size (insn);
c263766c
RH
23456 if (dump_file)
23457 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530 23458 INSN_UID (insn), min_insn_size (insn));
7656aee4 23459 if ((JUMP_P (insn)
d2c49530
JH
23460 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23461 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
7656aee4 23462 || CALL_P (insn))
d2c49530
JH
23463 njumps++;
23464 else
23465 continue;
23466
23467 while (njumps > 3)
23468 {
23469 start = NEXT_INSN (start);
7656aee4 23470 if ((JUMP_P (start)
d2c49530
JH
23471 && GET_CODE (PATTERN (start)) != ADDR_VEC
23472 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
7656aee4 23473 || CALL_P (start))
d2c49530
JH
23474 njumps--, isjump = 1;
23475 else
23476 isjump = 0;
23477 nbytes -= min_insn_size (start);
23478 }
d0396b79 23479 gcc_assert (njumps >= 0);
c263766c
RH
23480 if (dump_file)
23481 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
23482 INSN_UID (start), INSN_UID (insn), nbytes);
23483
23484 if (njumps == 3 && isjump && nbytes < 16)
23485 {
23486 int padsize = 15 - nbytes + min_insn_size (insn);
23487
c263766c
RH
23488 if (dump_file)
23489 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23490 INSN_UID (insn), padsize);
d2c49530
JH
23491 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23492 }
23493 }
23494}
23495
be04394b 23496/* AMD Athlon works faster
d1f87653 23497 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
23498 by other jump instruction. We avoid the penalty by inserting NOP just
23499 before the RET instructions in such cases. */
18dbd950 23500static void
be04394b 23501ix86_pad_returns (void)
2a500b9e
JH
23502{
23503 edge e;
628f6a4e 23504 edge_iterator ei;
2a500b9e 23505
628f6a4e
BE
23506 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23507 {
23508 basic_block bb = e->src;
23509 rtx ret = BB_END (bb);
23510 rtx prev;
23511 bool replace = false;
23512
7656aee4 23513 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
628f6a4e
BE
23514 || !maybe_hot_bb_p (bb))
23515 continue;
23516 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
7656aee4 23517 if (active_insn_p (prev) || LABEL_P (prev))
628f6a4e 23518 break;
7656aee4 23519 if (prev && LABEL_P (prev))
628f6a4e
BE
23520 {
23521 edge e;
23522 edge_iterator ei;
23523
23524 FOR_EACH_EDGE (e, ei, bb->preds)
23525 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23526 && !(e->flags & EDGE_FALLTHRU))
23527 replace = true;
23528 }
23529 if (!replace)
23530 {
23531 prev = prev_active_insn (ret);
23532 if (prev
7656aee4
UB
23533 && ((JUMP_P (prev) && any_condjump_p (prev))
23534 || CALL_P (prev)))
253c7a00 23535 replace = true;
628f6a4e
BE
23536 /* Empty functions get branch mispredict even when the jump destination
23537 is not visible to us. */
23538 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23539 replace = true;
23540 }
23541 if (replace)
23542 {
23543 emit_insn_before (gen_return_internal_long (), ret);
23544 delete_insn (ret);
23545 }
23546 }
be04394b
JH
23547}
23548
23549/* Implement machine specific optimizations. We implement padding of returns
23550 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23551static void
23552ix86_reorg (void)
23553{
d326eaf0 23554 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
be04394b
JH
23555 ix86_pad_returns ();
23556 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23557 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
23558}
23559
4977bab6
ZW
23560/* Return nonzero when QImode register that must be represented via REX prefix
23561 is used. */
23562bool
b96a374d 23563x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
23564{
23565 int i;
23566 extract_insn_cached (insn);
23567 for (i = 0; i < recog_data.n_operands; i++)
23568 if (REG_P (recog_data.operand[i])
23569 && REGNO (recog_data.operand[i]) >= 4)
23570 return true;
23571 return false;
23572}
23573
23574/* Return nonzero when P points to register encoded via REX prefix.
23575 Called via for_each_rtx. */
23576static int
b96a374d 23577extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
23578{
23579 unsigned int regno;
23580 if (!REG_P (*p))
23581 return 0;
23582 regno = REGNO (*p);
23583 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23584}
23585
23586/* Return true when INSN mentions register that must be encoded using REX
23587 prefix. */
23588bool
b96a374d 23589x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
23590{
23591 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23592}
23593
1d6ba901 23594/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
23595 optabs would emit if we didn't have TFmode patterns. */
23596
23597void
b96a374d 23598x86_emit_floatuns (rtx operands[2])
8d705469
JH
23599{
23600 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
23601 enum machine_mode mode, inmode;
23602
23603 inmode = GET_MODE (operands[1]);
d0396b79 23604 gcc_assert (inmode == SImode || inmode == DImode);
8d705469
JH
23605
23606 out = operands[0];
1d6ba901 23607 in = force_reg (inmode, operands[1]);
8d705469
JH
23608 mode = GET_MODE (out);
23609 neglab = gen_label_rtx ();
23610 donelab = gen_label_rtx ();
8d705469
JH
23611 f0 = gen_reg_rtx (mode);
23612
ebff937c
SH
23613 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23614
23615 expand_float (out, in, 0);
8d705469 23616
8d705469
JH
23617 emit_jump_insn (gen_jump (donelab));
23618 emit_barrier ();
23619
23620 emit_label (neglab);
23621
ebff937c
SH
23622 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23623 1, OPTAB_DIRECT);
23624 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23625 1, OPTAB_DIRECT);
23626 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23627
8d705469 23628 expand_float (f0, i0, 0);
ebff937c 23629
8d705469
JH
23630 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23631
23632 emit_label (donelab);
23633}
eb701deb
RH
23634\f
23635/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23636 with all elements equal to VAR. Return true if successful. */
23637
23638static bool
23639ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23640 rtx target, rtx val)
23641{
23642 enum machine_mode smode, wsmode, wvmode;
23643 rtx x;
23644
23645 switch (mode)
23646 {
23647 case V2SImode:
23648 case V2SFmode:
12b3553f 23649 if (!mmx_ok)
eb701deb
RH
23650 return false;
23651 /* FALLTHRU */
23652
23653 case V2DFmode:
23654 case V2DImode:
23655 case V4SFmode:
23656 case V4SImode:
23657 val = force_reg (GET_MODE_INNER (mode), val);
23658 x = gen_rtx_VEC_DUPLICATE (mode, val);
23659 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23660 return true;
23661
23662 case V4HImode:
23663 if (!mmx_ok)
23664 return false;
f817d5d4
RH
23665 if (TARGET_SSE || TARGET_3DNOW_A)
23666 {
23667 val = gen_lowpart (SImode, val);
23668 x = gen_rtx_TRUNCATE (HImode, val);
23669 x = gen_rtx_VEC_DUPLICATE (mode, x);
23670 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23671 return true;
23672 }
23673 else
23674 {
23675 smode = HImode;
23676 wsmode = SImode;
23677 wvmode = V2SImode;
23678 goto widen;
23679 }
eb701deb
RH
23680
23681 case V8QImode:
23682 if (!mmx_ok)
23683 return false;
23684 smode = QImode;
23685 wsmode = HImode;
23686 wvmode = V4HImode;
23687 goto widen;
23688 case V8HImode:
2ff61948
RS
23689 if (TARGET_SSE2)
23690 {
23691 rtx tmp1, tmp2;
23692 /* Extend HImode to SImode using a paradoxical SUBREG. */
23693 tmp1 = gen_reg_rtx (SImode);
23694 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23695 /* Insert the SImode value as low element of V4SImode vector. */
23696 tmp2 = gen_reg_rtx (V4SImode);
23697 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23698 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23699 CONST0_RTX (V4SImode),
23700 const1_rtx);
23701 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23702 /* Cast the V4SImode vector back to a V8HImode vector. */
23703 tmp1 = gen_reg_rtx (V8HImode);
23704 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23705 /* Duplicate the low short through the whole low SImode word. */
23706 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23707 /* Cast the V8HImode vector back to a V4SImode vector. */
23708 tmp2 = gen_reg_rtx (V4SImode);
23709 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23710 /* Replicate the low element of the V4SImode vector. */
23711 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23712 /* Cast the V2SImode back to V8HImode, and store in target. */
23713 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23714 return true;
23715 }
eb701deb
RH
23716 smode = HImode;
23717 wsmode = SImode;
23718 wvmode = V4SImode;
23719 goto widen;
23720 case V16QImode:
2ff61948
RS
23721 if (TARGET_SSE2)
23722 {
23723 rtx tmp1, tmp2;
23724 /* Extend QImode to SImode using a paradoxical SUBREG. */
23725 tmp1 = gen_reg_rtx (SImode);
23726 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23727 /* Insert the SImode value as low element of V4SImode vector. */
23728 tmp2 = gen_reg_rtx (V4SImode);
23729 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23730 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23731 CONST0_RTX (V4SImode),
23732 const1_rtx);
23733 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23734 /* Cast the V4SImode vector back to a V16QImode vector. */
23735 tmp1 = gen_reg_rtx (V16QImode);
23736 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23737 /* Duplicate the low byte through the whole low SImode word. */
23738 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23739 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23740 /* Cast the V16QImode vector back to a V4SImode vector. */
23741 tmp2 = gen_reg_rtx (V4SImode);
23742 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23743 /* Replicate the low element of the V4SImode vector. */
23744 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23745 /* Cast the V2SImode back to V16QImode, and store in target. */
23746 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23747 return true;
23748 }
eb701deb
RH
23749 smode = QImode;
23750 wsmode = HImode;
23751 wvmode = V8HImode;
23752 goto widen;
23753 widen:
23754 /* Replicate the value once into the next wider mode and recurse. */
23755 val = convert_modes (wsmode, smode, val, true);
23756 x = expand_simple_binop (wsmode, ASHIFT, val,
23757 GEN_INT (GET_MODE_BITSIZE (smode)),
23758 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23759 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23760
23761 x = gen_reg_rtx (wvmode);
23762 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23763 gcc_unreachable ();
23764 emit_move_insn (target, gen_lowpart (mode, x));
23765 return true;
23766
23767 default:
23768 return false;
23769 }
23770}
23771
23772/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
acef130f 23773 whose ONE_VAR element is VAR, and other elements are zero. Return true
eb701deb
RH
23774 if successful. */
23775
23776static bool
acef130f
RS
23777ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23778 rtx target, rtx var, int one_var)
eb701deb
RH
23779{
23780 enum machine_mode vsimode;
acef130f
RS
23781 rtx new_target;
23782 rtx x, tmp;
af7ae5d1
L
23783 bool use_vector_set = false;
23784
23785 switch (mode)
23786 {
23787 case V2DImode:
23788 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23789 break;
23790 case V16QImode:
23791 case V4SImode:
23792 case V4SFmode:
23793 use_vector_set = TARGET_SSE4_1;
23794 break;
23795 case V8HImode:
23796 use_vector_set = TARGET_SSE2;
23797 break;
23798 case V4HImode:
23799 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
a35f6a35 23800 break;
af7ae5d1
L
23801 default:
23802 break;
23803 }
23804
23805 if (use_vector_set)
23806 {
23807 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23808 var = force_reg (GET_MODE_INNER (mode), var);
23809 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23810 return true;
23811 }
eb701deb
RH
23812
23813 switch (mode)
23814 {
23815 case V2SFmode:
23816 case V2SImode:
12b3553f 23817 if (!mmx_ok)
eb701deb
RH
23818 return false;
23819 /* FALLTHRU */
23820
23821 case V2DFmode:
23822 case V2DImode:
acef130f
RS
23823 if (one_var != 0)
23824 return false;
eb701deb
RH
23825 var = force_reg (GET_MODE_INNER (mode), var);
23826 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23827 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23828 return true;
23829
23830 case V4SFmode:
23831 case V4SImode:
acef130f
RS
23832 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23833 new_target = gen_reg_rtx (mode);
23834 else
23835 new_target = target;
eb701deb
RH
23836 var = force_reg (GET_MODE_INNER (mode), var);
23837 x = gen_rtx_VEC_DUPLICATE (mode, var);
23838 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
acef130f
RS
23839 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23840 if (one_var != 0)
23841 {
23842 /* We need to shuffle the value to the correct position, so
23843 create a new pseudo to store the intermediate result. */
23844
23845 /* With SSE2, we can use the integer shuffle insns. */
23846 if (mode != V4SFmode && TARGET_SSE2)
23847 {
23848 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23849 GEN_INT (1),
23850 GEN_INT (one_var == 1 ? 0 : 1),
23851 GEN_INT (one_var == 2 ? 0 : 1),
23852 GEN_INT (one_var == 3 ? 0 : 1)));
23853 if (target != new_target)
23854 emit_move_insn (target, new_target);
23855 return true;
23856 }
23857
23858 /* Otherwise convert the intermediate result to V4SFmode and
23859 use the SSE1 shuffle instructions. */
23860 if (mode != V4SFmode)
23861 {
23862 tmp = gen_reg_rtx (V4SFmode);
23863 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23864 }
23865 else
23866 tmp = new_target;
23867
23868 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23869 GEN_INT (1),
23870 GEN_INT (one_var == 1 ? 0 : 1),
23871 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23872 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23873
23874 if (mode != V4SFmode)
23875 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23876 else if (tmp != target)
23877 emit_move_insn (target, tmp);
23878 }
23879 else if (target != new_target)
23880 emit_move_insn (target, new_target);
eb701deb
RH
23881 return true;
23882
23883 case V8HImode:
23884 case V16QImode:
23885 vsimode = V4SImode;
23886 goto widen;
23887 case V4HImode:
23888 case V8QImode:
23889 if (!mmx_ok)
23890 return false;
23891 vsimode = V2SImode;
23892 goto widen;
23893 widen:
acef130f
RS
23894 if (one_var != 0)
23895 return false;
23896
eb701deb
RH
23897 /* Zero extend the variable element to SImode and recurse. */
23898 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23899
23900 x = gen_reg_rtx (vsimode);
acef130f
RS
23901 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23902 var, one_var))
eb701deb
RH
23903 gcc_unreachable ();
23904
23905 emit_move_insn (target, gen_lowpart (mode, x));
23906 return true;
23907
23908 default:
23909 return false;
23910 }
23911}
23912
23913/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23914 consisting of the values in VALS. It is known that all elements
23915 except ONE_VAR are constants. Return true if successful. */
23916
23917static bool
23918ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23919 rtx target, rtx vals, int one_var)
23920{
23921 rtx var = XVECEXP (vals, 0, one_var);
23922 enum machine_mode wmode;
23923 rtx const_vec, x;
23924
9fc5fa7b
GK
23925 const_vec = copy_rtx (vals);
23926 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
277fc67e 23927 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
eb701deb
RH
23928
23929 switch (mode)
23930 {
23931 case V2DFmode:
23932 case V2DImode:
23933 case V2SFmode:
23934 case V2SImode:
23935 /* For the two element vectors, it's just as easy to use
23936 the general case. */
23937 return false;
23938
23939 case V4SFmode:
23940 case V4SImode:
23941 case V8HImode:
23942 case V4HImode:
23943 break;
23944
23945 case V16QImode:
31ebe593
L
23946 if (TARGET_SSE4_1)
23947 break;
eb701deb
RH
23948 wmode = V8HImode;
23949 goto widen;
23950 case V8QImode:
23951 wmode = V4HImode;
23952 goto widen;
23953 widen:
23954 /* There's no way to set one QImode entry easily. Combine
23955 the variable value with its adjacent constant value, and
23956 promote to an HImode set. */
23957 x = XVECEXP (vals, 0, one_var ^ 1);
23958 if (one_var & 1)
23959 {
23960 var = convert_modes (HImode, QImode, var, true);
23961 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23962 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23963 x = GEN_INT (INTVAL (x) & 0xff);
23964 }
23965 else
23966 {
23967 var = convert_modes (HImode, QImode, var, true);
23968 x = gen_int_mode (INTVAL (x) << 8, HImode);
23969 }
23970 if (x != const0_rtx)
23971 var = expand_simple_binop (HImode, IOR, var, x, var,
23972 1, OPTAB_LIB_WIDEN);
23973
23974 x = gen_reg_rtx (wmode);
23975 emit_move_insn (x, gen_lowpart (wmode, const_vec));
ceda96fc 23976 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
eb701deb
RH
23977
23978 emit_move_insn (target, gen_lowpart (mode, x));
23979 return true;
23980
23981 default:
23982 return false;
23983 }
23984
23985 emit_move_insn (target, const_vec);
23986 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23987 return true;
23988}
23989
63e46cce
L
23990/* A subroutine of ix86_expand_vector_init_general. Use vector
23991 concatenate to handle the most general case: all values variable,
23992 and none identical. */
eb701deb
RH
23993
23994static void
63e46cce
L
23995ix86_expand_vector_init_concat (enum machine_mode mode,
23996 rtx target, rtx *ops, int n)
eb701deb 23997{
63e46cce 23998 enum machine_mode cmode, hmode = VOIDmode;
06a5f1ec 23999 rtx first[4], second[2];
63e46cce
L
24000 rtvec v;
24001 int i, j;
eb701deb 24002
63e46cce 24003 switch (n)
eb701deb 24004 {
63e46cce
L
24005 case 2:
24006 switch (mode)
24007 {
24008 case V4SImode:
24009 cmode = V2SImode;
24010 break;
24011 case V4SFmode:
24012 cmode = V2SFmode;
24013 break;
24014 case V2DImode:
24015 cmode = DImode;
24016 break;
24017 case V2SImode:
24018 cmode = SImode;
24019 break;
24020 case V2DFmode:
24021 cmode = DFmode;
24022 break;
24023 case V2SFmode:
24024 cmode = SFmode;
24025 break;
24026 default:
24027 gcc_unreachable ();
24028 }
eb701deb 24029
63e46cce
L
24030 if (!register_operand (ops[1], cmode))
24031 ops[1] = force_reg (cmode, ops[1]);
24032 if (!register_operand (ops[0], cmode))
24033 ops[0] = force_reg (cmode, ops[0]);
24034 emit_insn (gen_rtx_SET (VOIDmode, target,
24035 gen_rtx_VEC_CONCAT (mode, ops[0],
24036 ops[1])));
eb701deb
RH
24037 break;
24038
63e46cce
L
24039 case 4:
24040 switch (mode)
24041 {
24042 case V4SImode:
24043 cmode = V2SImode;
24044 break;
24045 case V4SFmode:
24046 cmode = V2SFmode;
24047 break;
24048 default:
24049 gcc_unreachable ();
24050 }
eb701deb 24051 goto half;
eb701deb 24052
63e46cce
L
24053half:
24054 /* FIXME: We process inputs backward to help RA. PR 36222. */
24055 i = n - 1;
24056 j = (n >> 1) - 1;
24057 for (; i > 0; i -= 2, j--)
24058 {
24059 first[j] = gen_reg_rtx (cmode);
24060 v = gen_rtvec (2, ops[i - 1], ops[i]);
24061 ix86_expand_vector_init (false, first[j],
24062 gen_rtx_PARALLEL (cmode, v));
24063 }
eb701deb 24064
63e46cce
L
24065 n >>= 1;
24066 if (n > 2)
24067 {
24068 gcc_assert (hmode != VOIDmode);
24069 for (i = j = 0; i < n; i += 2, j++)
24070 {
24071 second[j] = gen_reg_rtx (hmode);
24072 ix86_expand_vector_init_concat (hmode, second [j],
24073 &first [i], 2);
24074 }
24075 n >>= 1;
24076 ix86_expand_vector_init_concat (mode, target, second, n);
24077 }
24078 else
24079 ix86_expand_vector_init_concat (mode, target, first, n);
24080 break;
eb701deb 24081
63e46cce
L
24082 default:
24083 gcc_unreachable ();
24084 }
24085}
08e64088 24086
63e46cce
L
24087/* A subroutine of ix86_expand_vector_init_general. Use vector
24088 interleave to handle the most general case: all values variable,
24089 and none identical. */
eb701deb 24090
63e46cce
L
24091static void
24092ix86_expand_vector_init_interleave (enum machine_mode mode,
24093 rtx target, rtx *ops, int n)
24094{
24095 enum machine_mode first_imode, second_imode, third_imode;
24096 int i, j;
24097 rtx op0, op1;
24098 rtx (*gen_load_even) (rtx, rtx, rtx);
24099 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
24100 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
24101
24102 switch (mode)
24103 {
eb701deb 24104 case V8HImode:
63e46cce
L
24105 gen_load_even = gen_vec_setv8hi;
24106 gen_interleave_first_low = gen_vec_interleave_lowv4si;
24107 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24108 first_imode = V4SImode;
24109 second_imode = V2DImode;
24110 third_imode = VOIDmode;
24111 break;
24112 case V16QImode:
24113 gen_load_even = gen_vec_setv16qi;
24114 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
24115 gen_interleave_second_low = gen_vec_interleave_lowv4si;
24116 first_imode = V8HImode;
24117 second_imode = V4SImode;
24118 third_imode = V2DImode;
24119 break;
24120 default:
24121 gcc_unreachable ();
24122 }
24123
24124 for (i = 0; i < n; i++)
24125 {
24126 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
24127 op0 = gen_reg_rtx (SImode);
24128 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
d0208f9b 24129
63e46cce
L
24130 /* Insert the SImode value as low element of V4SImode vector. */
24131 op1 = gen_reg_rtx (V4SImode);
24132 op0 = gen_rtx_VEC_MERGE (V4SImode,
24133 gen_rtx_VEC_DUPLICATE (V4SImode,
24134 op0),
24135 CONST0_RTX (V4SImode),
24136 const1_rtx);
24137 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
d0208f9b 24138
63e46cce
L
24139 /* Cast the V4SImode vector back to a vector in orignal mode. */
24140 op0 = gen_reg_rtx (mode);
24141 emit_move_insn (op0, gen_lowpart (mode, op1));
24142
24143 /* Load even elements into the second positon. */
24144 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24145 const1_rtx));
d0208f9b 24146
63e46cce
L
24147 /* Cast vector to FIRST_IMODE vector. */
24148 ops[i] = gen_reg_rtx (first_imode);
24149 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24150 }
d0208f9b 24151
63e46cce
L
24152 /* Interleave low FIRST_IMODE vectors. */
24153 for (i = j = 0; i < n; i += 2, j++)
24154 {
24155 op0 = gen_reg_rtx (first_imode);
24156 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
d0208f9b 24157
63e46cce
L
24158 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
24159 ops[j] = gen_reg_rtx (second_imode);
24160 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24161 }
24162
24163 /* Interleave low SECOND_IMODE vectors. */
24164 switch (second_imode)
24165 {
24166 case V4SImode:
24167 for (i = j = 0; i < n / 2; i += 2, j++)
d0208f9b 24168 {
63e46cce
L
24169 op0 = gen_reg_rtx (second_imode);
24170 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24171 ops[i + 1]));
d0208f9b 24172
63e46cce
L
24173 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24174 vector. */
24175 ops[j] = gen_reg_rtx (third_imode);
24176 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24177 }
24178 second_imode = V2DImode;
24179 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24180 /* FALLTHRU */
d0208f9b 24181
63e46cce
L
24182 case V2DImode:
24183 op0 = gen_reg_rtx (second_imode);
24184 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24185 ops[1]));
d0208f9b 24186
63e46cce
L
24187 /* Cast the SECOND_IMODE vector back to a vector on original
24188 mode. */
24189 emit_insn (gen_rtx_SET (VOIDmode, target,
24190 gen_lowpart (mode, op0)));
24191 break;
d0208f9b 24192
63e46cce
L
24193 default:
24194 gcc_unreachable ();
24195 }
24196}
d0208f9b 24197
63e46cce
L
24198/* A subroutine of ix86_expand_vector_init. Handle the most general case:
24199 all values variable, and none identical. */
24200
24201static void
24202ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24203 rtx target, rtx vals)
24204{
06a5f1ec 24205 rtx ops[16];
63e46cce
L
24206 int n, i;
24207
24208 switch (mode)
24209 {
24210 case V2SFmode:
24211 case V2SImode:
24212 if (!mmx_ok && !TARGET_SSE)
24213 break;
0864e1e8 24214 /* FALLTHRU */
63e46cce
L
24215
24216 case V4SFmode:
24217 case V4SImode:
63e46cce
L
24218 case V2DFmode:
24219 case V2DImode:
e73caa00 24220 n = GET_MODE_NUNITS (mode);
63e46cce
L
24221 for (i = 0; i < n; i++)
24222 ops[i] = XVECEXP (vals, 0, i);
24223 ix86_expand_vector_init_concat (mode, target, ops, n);
24224 return;
24225
24226 case V16QImode:
24227 if (!TARGET_SSE4_1)
24228 break;
0864e1e8 24229 /* FALLTHRU */
63e46cce
L
24230
24231 case V8HImode:
24232 if (!TARGET_SSE2)
24233 break;
24234
e73caa00 24235 n = GET_MODE_NUNITS (mode);
63e46cce
L
24236 for (i = 0; i < n; i++)
24237 ops[i] = XVECEXP (vals, 0, i);
24238 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24239 return;
d0208f9b 24240
eb701deb
RH
24241 case V4HImode:
24242 case V8QImode:
24243 break;
24244
24245 default:
24246 gcc_unreachable ();
24247 }
24248
eb701deb
RH
24249 {
24250 int i, j, n_elts, n_words, n_elt_per_word;
24251 enum machine_mode inner_mode;
24252 rtx words[4], shift;
24253
24254 inner_mode = GET_MODE_INNER (mode);
24255 n_elts = GET_MODE_NUNITS (mode);
24256 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24257 n_elt_per_word = n_elts / n_words;
24258 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24259
24260 for (i = 0; i < n_words; ++i)
24261 {
24262 rtx word = NULL_RTX;
24263
24264 for (j = 0; j < n_elt_per_word; ++j)
24265 {
24266 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24267 elt = convert_modes (word_mode, inner_mode, elt, true);
24268
24269 if (j == 0)
24270 word = elt;
24271 else
24272 {
24273 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24274 word, 1, OPTAB_LIB_WIDEN);
24275 word = expand_simple_binop (word_mode, IOR, word, elt,
24276 word, 1, OPTAB_LIB_WIDEN);
24277 }
24278 }
24279
24280 words[i] = word;
24281 }
24282
24283 if (n_words == 1)
24284 emit_move_insn (target, gen_lowpart (mode, words[0]));
24285 else if (n_words == 2)
24286 {
24287 rtx tmp = gen_reg_rtx (mode);
c41c1387 24288 emit_clobber (tmp);
eb701deb
RH
24289 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24290 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24291 emit_move_insn (target, tmp);
24292 }
24293 else if (n_words == 4)
24294 {
24295 rtx tmp = gen_reg_rtx (V4SImode);
63e46cce 24296 gcc_assert (word_mode == SImode);
eb701deb 24297 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ceda96fc 24298 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
eb701deb
RH
24299 emit_move_insn (target, gen_lowpart (mode, tmp));
24300 }
24301 else
24302 gcc_unreachable ();
24303 }
24304}
24305
5656a184 24306/* Initialize vector TARGET via VALS. Suppress the use of MMX
eb701deb 24307 instructions unless MMX_OK is true. */
8d705469 24308
997404de 24309void
eb701deb 24310ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
997404de
JH
24311{
24312 enum machine_mode mode = GET_MODE (target);
eb701deb
RH
24313 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24314 int n_elts = GET_MODE_NUNITS (mode);
24315 int n_var = 0, one_var = -1;
24316 bool all_same = true, all_const_zero = true;
997404de 24317 int i;
eb701deb 24318 rtx x;
f676971a 24319
eb701deb
RH
24320 for (i = 0; i < n_elts; ++i)
24321 {
24322 x = XVECEXP (vals, 0, i);
fcc44808
UB
24323 if (!(CONST_INT_P (x)
24324 || GET_CODE (x) == CONST_DOUBLE
24325 || GET_CODE (x) == CONST_FIXED))
eb701deb
RH
24326 n_var++, one_var = i;
24327 else if (x != CONST0_RTX (inner_mode))
24328 all_const_zero = false;
24329 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24330 all_same = false;
24331 }
997404de 24332
eb701deb
RH
24333 /* Constants are best loaded from the constant pool. */
24334 if (n_var == 0)
997404de
JH
24335 {
24336 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24337 return;
24338 }
24339
eb701deb
RH
24340 /* If all values are identical, broadcast the value. */
24341 if (all_same
24342 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24343 XVECEXP (vals, 0, 0)))
24344 return;
24345
24346 /* Values where only one field is non-constant are best loaded from
24347 the pool and overwritten via move later. */
24348 if (n_var == 1)
997404de 24349 {
acef130f
RS
24350 if (all_const_zero
24351 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24352 XVECEXP (vals, 0, one_var),
24353 one_var))
eb701deb
RH
24354 return;
24355
24356 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24357 return;
24358 }
24359
24360 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24361}
24362
24363void
24364ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24365{
24366 enum machine_mode mode = GET_MODE (target);
24367 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24368 bool use_vec_merge = false;
24369 rtx tmp;
24370
24371 switch (mode)
24372 {
24373 case V2SFmode:
24374 case V2SImode:
0f2698d0
RH
24375 if (mmx_ok)
24376 {
24377 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24378 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24379 if (elt == 0)
24380 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24381 else
24382 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24383 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24384 return;
24385 }
24386 break;
eb701deb 24387
eb701deb 24388 case V2DImode:
9a5cee02
L
24389 use_vec_merge = TARGET_SSE4_1;
24390 if (use_vec_merge)
24391 break;
24392
24393 case V2DFmode:
eb701deb
RH
24394 {
24395 rtx op0, op1;
24396
24397 /* For the two element vectors, we implement a VEC_CONCAT with
24398 the extraction of the other element. */
24399
24400 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24401 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24402
24403 if (elt == 0)
24404 op0 = val, op1 = tmp;
24405 else
24406 op0 = tmp, op1 = val;
24407
24408 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24409 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24410 }
24411 return;
1c47af84 24412
eb701deb 24413 case V4SFmode:
9a5cee02
L
24414 use_vec_merge = TARGET_SSE4_1;
24415 if (use_vec_merge)
24416 break;
24417
eb701deb 24418 switch (elt)
997404de 24419 {
eb701deb
RH
24420 case 0:
24421 use_vec_merge = true;
1c47af84
RH
24422 break;
24423
eb701deb 24424 case 1:
125886c7 24425 /* tmp = target = A B C D */
eb701deb 24426 tmp = copy_to_reg (target);
125886c7 24427 /* target = A A B B */
eb701deb 24428 emit_insn (gen_sse_unpcklps (target, target, target));
125886c7 24429 /* target = X A B B */
eb701deb 24430 ix86_expand_vector_set (false, target, val, 0);
125886c7 24431 /* target = A X C D */
eb701deb
RH
24432 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24433 GEN_INT (1), GEN_INT (0),
b100079f 24434 GEN_INT (2+4), GEN_INT (3+4)));
eb701deb
RH
24435 return;
24436
24437 case 2:
125886c7 24438 /* tmp = target = A B C D */
eb701deb 24439 tmp = copy_to_reg (target);
125886c7
JJ
24440 /* tmp = X B C D */
24441 ix86_expand_vector_set (false, tmp, val, 0);
24442 /* target = A B X D */
eb701deb
RH
24443 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24444 GEN_INT (0), GEN_INT (1),
b100079f 24445 GEN_INT (0+4), GEN_INT (3+4)));
eb701deb
RH
24446 return;
24447
24448 case 3:
125886c7 24449 /* tmp = target = A B C D */
eb701deb 24450 tmp = copy_to_reg (target);
125886c7
JJ
24451 /* tmp = X B C D */
24452 ix86_expand_vector_set (false, tmp, val, 0);
24453 /* target = A B X D */
eb701deb
RH
24454 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24455 GEN_INT (0), GEN_INT (1),
b100079f 24456 GEN_INT (2+4), GEN_INT (0+4)));
eb701deb 24457 return;
1c47af84
RH
24458
24459 default:
eb701deb
RH
24460 gcc_unreachable ();
24461 }
24462 break;
24463
24464 case V4SImode:
9a5cee02
L
24465 use_vec_merge = TARGET_SSE4_1;
24466 if (use_vec_merge)
24467 break;
24468
eb701deb
RH
24469 /* Element 0 handled by vec_merge below. */
24470 if (elt == 0)
24471 {
24472 use_vec_merge = true;
1c47af84 24473 break;
997404de 24474 }
eb701deb
RH
24475
24476 if (TARGET_SSE2)
24477 {
24478 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24479 store into element 0, then shuffle them back. */
24480
24481 rtx order[4];
24482
24483 order[0] = GEN_INT (elt);
24484 order[1] = const1_rtx;
24485 order[2] = const2_rtx;
24486 order[3] = GEN_INT (3);
24487 order[elt] = const0_rtx;
24488
24489 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24490 order[1], order[2], order[3]));
24491
24492 ix86_expand_vector_set (false, target, val, 0);
24493
24494 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24495 order[1], order[2], order[3]));
24496 }
24497 else
24498 {
24499 /* For SSE1, we have to reuse the V4SF code. */
24500 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24501 gen_lowpart (SFmode, val), elt);
24502 }
997404de 24503 return;
eb701deb
RH
24504
24505 case V8HImode:
24506 use_vec_merge = TARGET_SSE2;
24507 break;
24508 case V4HImode:
24509 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24510 break;
24511
24512 case V16QImode:
9a5cee02
L
24513 use_vec_merge = TARGET_SSE4_1;
24514 break;
24515
eb701deb
RH
24516 case V8QImode:
24517 default:
24518 break;
997404de
JH
24519 }
24520
eb701deb 24521 if (use_vec_merge)
997404de 24522 {
eb701deb
RH
24523 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24524 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24525 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24526 }
24527 else
24528 {
24529 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24530
24531 emit_move_insn (mem, target);
24532
24533 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24534 emit_move_insn (tmp, val);
24535
24536 emit_move_insn (target, mem);
24537 }
24538}
24539
24540void
24541ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24542{
24543 enum machine_mode mode = GET_MODE (vec);
24544 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24545 bool use_vec_extr = false;
24546 rtx tmp;
24547
24548 switch (mode)
24549 {
24550 case V2SImode:
24551 case V2SFmode:
24552 if (!mmx_ok)
24553 break;
24554 /* FALLTHRU */
24555
24556 case V2DFmode:
24557 case V2DImode:
24558 use_vec_extr = true;
24559 break;
24560
24561 case V4SFmode:
9a5cee02
L
24562 use_vec_extr = TARGET_SSE4_1;
24563 if (use_vec_extr)
24564 break;
24565
eb701deb 24566 switch (elt)
997404de 24567 {
eb701deb
RH
24568 case 0:
24569 tmp = vec;
24570 break;
997404de 24571
eb701deb
RH
24572 case 1:
24573 case 3:
24574 tmp = gen_reg_rtx (mode);
24575 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24576 GEN_INT (elt), GEN_INT (elt),
b100079f 24577 GEN_INT (elt+4), GEN_INT (elt+4)));
eb701deb
RH
24578 break;
24579
24580 case 2:
24581 tmp = gen_reg_rtx (mode);
24582 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24583 break;
24584
24585 default:
24586 gcc_unreachable ();
997404de 24587 }
eb701deb
RH
24588 vec = tmp;
24589 use_vec_extr = true;
ed9b5396 24590 elt = 0;
eb701deb
RH
24591 break;
24592
24593 case V4SImode:
9a5cee02
L
24594 use_vec_extr = TARGET_SSE4_1;
24595 if (use_vec_extr)
24596 break;
24597
eb701deb 24598 if (TARGET_SSE2)
997404de 24599 {
eb701deb
RH
24600 switch (elt)
24601 {
24602 case 0:
24603 tmp = vec;
24604 break;
24605
24606 case 1:
24607 case 3:
24608 tmp = gen_reg_rtx (mode);
24609 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24610 GEN_INT (elt), GEN_INT (elt),
24611 GEN_INT (elt), GEN_INT (elt)));
24612 break;
24613
24614 case 2:
24615 tmp = gen_reg_rtx (mode);
24616 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24617 break;
24618
24619 default:
24620 gcc_unreachable ();
24621 }
24622 vec = tmp;
24623 use_vec_extr = true;
ed9b5396 24624 elt = 0;
997404de 24625 }
eb701deb
RH
24626 else
24627 {
24628 /* For SSE1, we have to reuse the V4SF code. */
24629 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24630 gen_lowpart (V4SFmode, vec), elt);
24631 return;
24632 }
24633 break;
24634
24635 case V8HImode:
24636 use_vec_extr = TARGET_SSE2;
24637 break;
24638 case V4HImode:
24639 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24640 break;
24641
24642 case V16QImode:
9a5cee02
L
24643 use_vec_extr = TARGET_SSE4_1;
24644 break;
24645
eb701deb
RH
24646 case V8QImode:
24647 /* ??? Could extract the appropriate HImode element and shift. */
24648 default:
24649 break;
997404de 24650 }
997404de 24651
eb701deb
RH
24652 if (use_vec_extr)
24653 {
24654 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24655 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24656
24657 /* Let the rtl optimizers know about the zero extension performed. */
9a5cee02 24658 if (inner_mode == QImode || inner_mode == HImode)
eb701deb
RH
24659 {
24660 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24661 target = gen_lowpart (SImode, target);
24662 }
24663
24664 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24665 }
24666 else
24667 {
24668 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24669
24670 emit_move_insn (mem, vec);
24671
24672 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24673 emit_move_insn (target, tmp);
24674 }
24675}
2ab1754e 24676
536fa7b7 24677/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
2ab1754e
RH
24678 pattern to reduce; DEST is the destination; IN is the input vector. */
24679
24680void
24681ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24682{
24683 rtx tmp1, tmp2, tmp3;
24684
24685 tmp1 = gen_reg_rtx (V4SFmode);
24686 tmp2 = gen_reg_rtx (V4SFmode);
24687 tmp3 = gen_reg_rtx (V4SFmode);
24688
24689 emit_insn (gen_sse_movhlps (tmp1, in, in));
24690 emit_insn (fn (tmp2, tmp1, in));
24691
24692 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24693 GEN_INT (1), GEN_INT (1),
24694 GEN_INT (1+4), GEN_INT (1+4)));
24695 emit_insn (fn (dest, tmp2, tmp3));
24696}
eb701deb 24697\f
a81083b2
BE
24698/* Target hook for scalar_mode_supported_p. */
24699static bool
24700ix86_scalar_mode_supported_p (enum machine_mode mode)
24701{
24702 if (DECIMAL_FLOAT_MODE_P (mode))
24703 return true;
27735edb
UB
24704 else if (mode == TFmode)
24705 return TARGET_64BIT;
a81083b2
BE
24706 else
24707 return default_scalar_mode_supported_p (mode);
24708}
24709
f676971a
EC
24710/* Implements target hook vector_mode_supported_p. */
24711static bool
24712ix86_vector_mode_supported_p (enum machine_mode mode)
24713{
dcbca208 24714 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
f676971a 24715 return true;
dcbca208 24716 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
f676971a 24717 return true;
dcbca208 24718 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
f676971a 24719 return true;
dcbca208
RH
24720 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24721 return true;
24722 return false;
f676971a
EC
24723}
24724
c77cd3d1
UB
24725/* Target hook for c_mode_for_suffix. */
24726static enum machine_mode
24727ix86_c_mode_for_suffix (char suffix)
24728{
24729 if (TARGET_64BIT && suffix == 'q')
24730 return TFmode;
24731 if (TARGET_MMX && suffix == 'w')
24732 return XFmode;
24733
24734 return VOIDmode;
24735}
24736
67dfe110
KH
24737/* Worker function for TARGET_MD_ASM_CLOBBERS.
24738
24739 We do this in the new i386 backend to maintain source compatibility
24740 with the old cc0-based compiler. */
24741
24742static tree
61158923
HPN
24743ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24744 tree inputs ATTRIBUTE_UNUSED,
24745 tree clobbers)
67dfe110 24746{
f676971a
EC
24747 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24748 clobbers);
24749 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24750 clobbers);
67dfe110
KH
24751 return clobbers;
24752}
24753
7fa7289d 24754/* Implements target vector targetm.asm.encode_section_info. This
2ed941ec 24755 is not used by netware. */
7dcbf659 24756
2ed941ec 24757static void ATTRIBUTE_UNUSED
7dcbf659
JH
24758ix86_encode_section_info (tree decl, rtx rtl, int first)
24759{
24760 default_encode_section_info (decl, rtl, first);
24761
24762 if (TREE_CODE (decl) == VAR_DECL
24763 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24764 && ix86_in_large_data_p (decl))
24765 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24766}
24767
3c5cb3e4
KH
24768/* Worker function for REVERSE_CONDITION. */
24769
24770enum rtx_code
24771ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24772{
24773 return (mode != CCFPmode && mode != CCFPUmode
24774 ? reverse_condition (code)
24775 : reverse_condition_maybe_unordered (code));
24776}
24777
5ea9cb6e
RS
24778/* Output code to perform an x87 FP register move, from OPERANDS[1]
24779 to OPERANDS[0]. */
24780
24781const char *
24782output_387_reg_move (rtx insn, rtx *operands)
24783{
d869c351 24784 if (REG_P (operands[0]))
5ea9cb6e 24785 {
d869c351
UB
24786 if (REG_P (operands[1])
24787 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24788 {
24789 if (REGNO (operands[0]) == FIRST_STACK_REG)
24790 return output_387_ffreep (operands, 0);
24791 return "fstp\t%y0";
24792 }
24793 if (STACK_TOP_P (operands[0]))
24794 return "fld%z1\t%y1";
24795 return "fst\t%y0";
5ea9cb6e 24796 }
d869c351
UB
24797 else if (MEM_P (operands[0]))
24798 {
24799 gcc_assert (REG_P (operands[1]));
24800 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24801 return "fstp%z0\t%y0";
24802 else
24803 {
24804 /* There is no non-popping store to memory for XFmode.
24805 So if we need one, follow the store with a load. */
24806 if (GET_MODE (operands[0]) == XFmode)
24807 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24808 else
24809 return "fst%z0\t%y0";
24810 }
24811 }
24812 else
24813 gcc_unreachable();
5ea9cb6e
RS
24814}
24815
5ae27cfa
UB
24816/* Output code to perform a conditional jump to LABEL, if C2 flag in
24817 FP status register is set. */
24818
24819void
24820ix86_emit_fp_unordered_jump (rtx label)
24821{
24822 rtx reg = gen_reg_rtx (HImode);
24823 rtx temp;
24824
24825 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35 24826
3c2d980c 24827 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
2484cc35
UB
24828 {
24829 emit_insn (gen_x86_sahf_1 (reg));
24830
f676971a 24831 temp = gen_rtx_REG (CCmode, FLAGS_REG);
2484cc35
UB
24832 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24833 }
24834 else
24835 {
24836 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24837
f676971a 24838 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
2484cc35
UB
24839 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24840 }
f676971a 24841
5ae27cfa
UB
24842 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24843 gen_rtx_LABEL_REF (VOIDmode, label),
24844 pc_rtx);
24845 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
79cd820a 24846
5ae27cfa 24847 emit_jump_insn (temp);
79cd820a 24848 predict_jump (REG_BR_PROB_BASE * 10 / 100);
5ae27cfa
UB
24849}
24850
c2fcfa4f
UB
24851/* Output code to perform a log1p XFmode calculation. */
24852
24853void ix86_emit_i387_log1p (rtx op0, rtx op1)
24854{
24855 rtx label1 = gen_label_rtx ();
24856 rtx label2 = gen_label_rtx ();
24857
24858 rtx tmp = gen_reg_rtx (XFmode);
24859 rtx tmp2 = gen_reg_rtx (XFmode);
24860
24861 emit_insn (gen_absxf2 (tmp, op1));
24862 emit_insn (gen_cmpxf (tmp,
24863 CONST_DOUBLE_FROM_REAL_VALUE (
24864 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24865 XFmode)));
24866 emit_jump_insn (gen_bge (label1));
24867
24868 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
0ac45694 24869 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
c2fcfa4f
UB
24870 emit_jump (label2);
24871
24872 emit_label (label1);
24873 emit_move_insn (tmp, CONST1_RTX (XFmode));
24874 emit_insn (gen_addxf3 (tmp, op1, tmp));
24875 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
0ac45694 24876 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
c2fcfa4f
UB
24877
24878 emit_label (label2);
24879}
f676971a 24880
6b889d89
UB
24881/* Output code to perform a Newton-Rhapson approximation of a single precision
24882 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24883
24884void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24885{
24886 rtx x0, x1, e0, e1, two;
24887
24888 x0 = gen_reg_rtx (mode);
24889 e0 = gen_reg_rtx (mode);
24890 e1 = gen_reg_rtx (mode);
24891 x1 = gen_reg_rtx (mode);
24892
24893 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24894
24895 if (VECTOR_MODE_P (mode))
24896 two = ix86_build_const_vector (SFmode, true, two);
24897
24898 two = force_reg (mode, two);
24899
24900 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24901
43db7979 24902 /* x0 = rcp(b) estimate */
6b889d89
UB
24903 emit_insn (gen_rtx_SET (VOIDmode, x0,
24904 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24905 UNSPEC_RCP)));
24906 /* e0 = x0 * b */
24907 emit_insn (gen_rtx_SET (VOIDmode, e0,
24908 gen_rtx_MULT (mode, x0, b)));
24909 /* e1 = 2. - e0 */
24910 emit_insn (gen_rtx_SET (VOIDmode, e1,
24911 gen_rtx_MINUS (mode, two, e0)));
24912 /* x1 = x0 * e1 */
24913 emit_insn (gen_rtx_SET (VOIDmode, x1,
24914 gen_rtx_MULT (mode, x0, e1)));
24915 /* res = a * x1 */
24916 emit_insn (gen_rtx_SET (VOIDmode, res,
24917 gen_rtx_MULT (mode, a, x1)));
24918}
24919
24920/* Output code to perform a Newton-Rhapson approximation of a
24921 single precision floating point [reciprocal] square root. */
24922
24923void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24924 bool recip)
24925{
43db7979
UB
24926 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24927 REAL_VALUE_TYPE r;
6b889d89
UB
24928
24929 x0 = gen_reg_rtx (mode);
24930 e0 = gen_reg_rtx (mode);
24931 e1 = gen_reg_rtx (mode);
24932 e2 = gen_reg_rtx (mode);
24933 e3 = gen_reg_rtx (mode);
24934
aefa9d43 24935 real_from_integer (&r, VOIDmode, -3, -1, 0);
43db7979 24936 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
670a8526 24937
43db7979
UB
24938 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24939 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
6b889d89
UB
24940
24941 if (VECTOR_MODE_P (mode))
24942 {
43db7979
UB
24943 mthree = ix86_build_const_vector (SFmode, true, mthree);
24944 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
6b889d89
UB
24945 }
24946
43db7979
UB
24947 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24948 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
6b889d89 24949
43db7979 24950 /* x0 = rsqrt(a) estimate */
6b889d89
UB
24951 emit_insn (gen_rtx_SET (VOIDmode, x0,
24952 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24953 UNSPEC_RSQRT)));
43db7979
UB
24954
24955 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
5a37a60c 24956 if (!recip)
43db7979
UB
24957 {
24958 rtx zero, mask;
24959
24960 zero = gen_reg_rtx (mode);
24961 mask = gen_reg_rtx (mode);
24962
24963 zero = force_reg (mode, CONST0_RTX(mode));
24964 emit_insn (gen_rtx_SET (VOIDmode, mask,
24965 gen_rtx_NE (mode, zero, a)));
24966
24967 emit_insn (gen_rtx_SET (VOIDmode, x0,
24968 gen_rtx_AND (mode, x0, mask)));
24969 }
24970
6b889d89
UB
24971 /* e0 = x0 * a */
24972 emit_insn (gen_rtx_SET (VOIDmode, e0,
24973 gen_rtx_MULT (mode, x0, a)));
24974 /* e1 = e0 * x0 */
24975 emit_insn (gen_rtx_SET (VOIDmode, e1,
24976 gen_rtx_MULT (mode, e0, x0)));
43db7979
UB
24977
24978 /* e2 = e1 - 3. */
24979 mthree = force_reg (mode, mthree);
6b889d89 24980 emit_insn (gen_rtx_SET (VOIDmode, e2,
43db7979
UB
24981 gen_rtx_PLUS (mode, e1, mthree)));
24982
24983 mhalf = force_reg (mode, mhalf);
6b889d89 24984 if (recip)
43db7979 24985 /* e3 = -.5 * x0 */
6b889d89 24986 emit_insn (gen_rtx_SET (VOIDmode, e3,
43db7979 24987 gen_rtx_MULT (mode, x0, mhalf)));
6b889d89 24988 else
43db7979 24989 /* e3 = -.5 * e0 */
6b889d89 24990 emit_insn (gen_rtx_SET (VOIDmode, e3,
43db7979 24991 gen_rtx_MULT (mode, e0, mhalf)));
6b889d89
UB
24992 /* ret = e2 * e3 */
24993 emit_insn (gen_rtx_SET (VOIDmode, res,
24994 gen_rtx_MULT (mode, e2, e3)));
24995}
24996
d6b5193b 24997/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
a8e68029 24998
2ed941ec 24999static void ATTRIBUTE_UNUSED
a8e68029
DJ
25000i386_solaris_elf_named_section (const char *name, unsigned int flags,
25001 tree decl)
25002{
25003 /* With Binutils 2.15, the "@unwind" marker must be specified on
25004 every occurrence of the ".eh_frame" section, not just the first
25005 one. */
25006 if (TARGET_64BIT
25007 && strcmp (name, ".eh_frame") == 0)
25008 {
25009 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25010 flags & SECTION_WRITE ? "aw" : "a");
25011 return;
25012 }
25013 default_elf_asm_named_section (name, flags, decl);
25014}
25015
cac24f06
JM
25016/* Return the mangling of TYPE if it is an extended fundamental type. */
25017
25018static const char *
3101faab 25019ix86_mangle_type (const_tree type)
cac24f06 25020{
608063c3
JB
25021 type = TYPE_MAIN_VARIANT (type);
25022
25023 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25024 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25025 return NULL;
25026
cac24f06
JM
25027 switch (TYPE_MODE (type))
25028 {
25029 case TFmode:
25030 /* __float128 is "g". */
25031 return "g";
25032 case XFmode:
25033 /* "long double" or __float80 is "e". */
25034 return "e";
25035 default:
25036 return NULL;
25037 }
25038}
25039
7ce918c5
JJ
25040/* For 32-bit code we can save PIC register setup by using
25041 __stack_chk_fail_local hidden function instead of calling
25042 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25043 register, so it is better to call __stack_chk_fail directly. */
25044
25045static tree
25046ix86_stack_protect_fail (void)
25047{
25048 return TARGET_64BIT
25049 ? default_external_stack_protect_fail ()
25050 : default_hidden_stack_protect_fail ();
25051}
25052
72ce3d4a
JH
25053/* Select a format to encode pointers in exception handling data. CODE
25054 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25055 true if the symbol may be affected by dynamic relocations.
25056
25057 ??? All x86 object file formats are capable of representing this.
25058 After all, the relocation needed is the same as for the call insn.
25059 Whether or not a particular assembler allows us to enter such, I
25060 guess we'll have to see. */
25061int
25062asm_preferred_eh_data_format (int code, int global)
25063{
25064 if (flag_pic)
25065 {
a46cec70 25066 int type = DW_EH_PE_sdata8;
72ce3d4a
JH
25067 if (!TARGET_64BIT
25068 || ix86_cmodel == CM_SMALL_PIC
25069 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25070 type = DW_EH_PE_sdata4;
25071 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25072 }
25073 if (ix86_cmodel == CM_SMALL
25074 || (ix86_cmodel == CM_MEDIUM && code))
25075 return DW_EH_PE_udata4;
25076 return DW_EH_PE_absptr;
25077}
4d81bf84
RG
25078\f
25079/* Expand copysign from SIGN to the positive value ABS_VALUE
c7d32ff6
RG
25080 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
25081 the sign-bit. */
4d81bf84 25082static void
c7d32ff6 25083ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
4d81bf84
RG
25084{
25085 enum machine_mode mode = GET_MODE (sign);
25086 rtx sgn = gen_reg_rtx (mode);
c7d32ff6
RG
25087 if (mask == NULL_RTX)
25088 {
25089 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
25090 if (!VECTOR_MODE_P (mode))
25091 {
25092 /* We need to generate a scalar mode mask in this case. */
25093 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25094 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25095 mask = gen_reg_rtx (mode);
25096 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25097 }
25098 }
25099 else
25100 mask = gen_rtx_NOT (mode, mask);
25101 emit_insn (gen_rtx_SET (VOIDmode, sgn,
25102 gen_rtx_AND (mode, mask, sign)));
25103 emit_insn (gen_rtx_SET (VOIDmode, result,
25104 gen_rtx_IOR (mode, abs_value, sgn)));
25105}
25106
25107/* Expand fabs (OP0) and return a new rtx that holds the result. The
25108 mask for masking out the sign-bit is stored in *SMASK, if that is
25109 non-null. */
25110static rtx
25111ix86_expand_sse_fabs (rtx op0, rtx *smask)
25112{
25113 enum machine_mode mode = GET_MODE (op0);
25114 rtx xa, mask;
25115
25116 xa = gen_reg_rtx (mode);
25117 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
4d81bf84
RG
25118 if (!VECTOR_MODE_P (mode))
25119 {
25120 /* We need to generate a scalar mode mask in this case. */
25121 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25122 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25123 mask = gen_reg_rtx (mode);
25124 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25125 }
c7d32ff6
RG
25126 emit_insn (gen_rtx_SET (VOIDmode, xa,
25127 gen_rtx_AND (mode, op0, mask)));
25128
25129 if (smask)
25130 *smask = mask;
25131
25132 return xa;
4d81bf84
RG
25133}
25134
c3a4177f
RG
25135/* Expands a comparison of OP0 with OP1 using comparison code CODE,
25136 swapping the operands if SWAP_OPERANDS is true. The expanded
25137 code is a forward jump to a newly created label in case the
25138 comparison is true. The generated label rtx is returned. */
25139static rtx
25140ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25141 bool swap_operands)
25142{
25143 rtx label, tmp;
25144
25145 if (swap_operands)
25146 {
25147 tmp = op0;
25148 op0 = op1;
25149 op1 = tmp;
25150 }
25151
25152 label = gen_label_rtx ();
25153 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25154 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25155 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25156 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25157 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25158 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25159 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25160 JUMP_LABEL (tmp) = label;
25161
25162 return label;
25163}
25164
d096ecdd
RG
25165/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25166 using comparison code CODE. Operands are swapped for the comparison if
25167 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
25168static rtx
25169ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25170 bool swap_operands)
25171{
25172 enum machine_mode mode = GET_MODE (op0);
25173 rtx mask = gen_reg_rtx (mode);
25174
25175 if (swap_operands)
25176 {
25177 rtx tmp = op0;
25178 op0 = op1;
25179 op1 = tmp;
25180 }
25181
25182 if (mode == DFmode)
25183 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25184 gen_rtx_fmt_ee (code, mode, op0, op1)));
25185 else
25186 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25187 gen_rtx_fmt_ee (code, mode, op0, op1)));
25188
25189 return mask;
25190}
25191
c7d32ff6
RG
25192/* Generate and return a rtx of mode MODE for 2**n where n is the number
25193 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
25194static rtx
25195ix86_gen_TWO52 (enum machine_mode mode)
25196{
25197 REAL_VALUE_TYPE TWO52r;
25198 rtx TWO52;
25199
25200 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25201 TWO52 = const_double_from_real_value (TWO52r, mode);
25202 TWO52 = force_reg (mode, TWO52);
25203
25204 return TWO52;
25205}
25206
4d81bf84
RG
25207/* Expand SSE sequence for computing lround from OP1 storing
25208 into OP0. */
25209void
25210ix86_expand_lround (rtx op0, rtx op1)
25211{
25212 /* C code for the stuff we're doing below:
25213 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25214 return (long)tmp;
25215 */
25216 enum machine_mode mode = GET_MODE (op1);
25217 const struct real_format *fmt;
25218 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25219 rtx adj;
25220
25221 /* load nextafter (0.5, 0.0) */
25222 fmt = REAL_MODE_FORMAT (mode);
6ef9a246 25223 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
4d81bf84
RG
25224 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25225
25226 /* adj = copysign (0.5, op1) */
25227 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
c7d32ff6 25228 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
4d81bf84
RG
25229
25230 /* adj = op1 + adj */
63be4b32 25231 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
4d81bf84
RG
25232
25233 /* op0 = (imode)adj */
25234 expand_fix (op0, adj, 0);
25235}
72ce3d4a 25236
c3a4177f
RG
25237/* Expand SSE2 sequence for computing lround from OPERAND1 storing
25238 into OPERAND0. */
25239void
25240ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25241{
25242 /* C code for the stuff we're doing below (for do_floor):
25243 xi = (long)op1;
25244 xi -= (double)xi > op1 ? 1 : 0;
25245 return xi;
25246 */
25247 enum machine_mode fmode = GET_MODE (op1);
25248 enum machine_mode imode = GET_MODE (op0);
63be4b32 25249 rtx ireg, freg, label, tmp;
c3a4177f
RG
25250
25251 /* reg = (long)op1 */
25252 ireg = gen_reg_rtx (imode);
25253 expand_fix (ireg, op1, 0);
25254
25255 /* freg = (double)reg */
25256 freg = gen_reg_rtx (fmode);
25257 expand_float (freg, ireg, 0);
25258
25259 /* ireg = (freg > op1) ? ireg - 1 : ireg */
25260 label = ix86_expand_sse_compare_and_jump (UNLE,
25261 freg, op1, !do_floor);
63be4b32
RG
25262 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25263 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25264 emit_move_insn (ireg, tmp);
25265
c3a4177f
RG
25266 emit_label (label);
25267 LABEL_NUSES (label) = 1;
25268
25269 emit_move_insn (op0, ireg);
25270}
25271
c7d32ff6
RG
25272/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25273 result in OPERAND0. */
25274void
25275ix86_expand_rint (rtx operand0, rtx operand1)
25276{
25277 /* C code for the stuff we're doing below:
7e35fcb3
RG
25278 xa = fabs (operand1);
25279 if (!isless (xa, 2**52))
c7d32ff6 25280 return operand1;
7e35fcb3
RG
25281 xa = xa + 2**52 - 2**52;
25282 return copysign (xa, operand1);
c7d32ff6
RG
25283 */
25284 enum machine_mode mode = GET_MODE (operand0);
25285 rtx res, xa, label, TWO52, mask;
25286
25287 res = gen_reg_rtx (mode);
25288 emit_move_insn (res, operand1);
25289
25290 /* xa = abs (operand1) */
25291 xa = ix86_expand_sse_fabs (res, &mask);
25292
25293 /* if (!isless (xa, TWO52)) goto label; */
25294 TWO52 = ix86_gen_TWO52 (mode);
25295 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25296
63be4b32
RG
25297 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25298 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
c7d32ff6 25299
7e35fcb3 25300 ix86_sse_copysign_to_positive (res, xa, res, mask);
c7d32ff6
RG
25301
25302 emit_label (label);
25303 LABEL_NUSES (label) = 1;
25304
25305 emit_move_insn (operand0, res);
25306}
25307
d096ecdd
RG
25308/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25309 into OPERAND0. */
25310void
25311ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25312{
25313 /* C code for the stuff we expand below.
25314 double xa = fabs (x), x2;
25315 if (!isless (xa, TWO52))
25316 return x;
25317 xa = xa + TWO52 - TWO52;
25318 x2 = copysign (xa, x);
25319 Compensate. Floor:
25320 if (x2 > x)
25321 x2 -= 1;
25322 Compensate. Ceil:
25323 if (x2 < x)
7e35fcb3 25324 x2 -= -1;
d096ecdd
RG
25325 return x2;
25326 */
25327 enum machine_mode mode = GET_MODE (operand0);
25328 rtx xa, TWO52, tmp, label, one, res, mask;
25329
25330 TWO52 = ix86_gen_TWO52 (mode);
25331
25332 /* Temporary for holding the result, initialized to the input
25333 operand to ease control flow. */
25334 res = gen_reg_rtx (mode);
25335 emit_move_insn (res, operand1);
25336
25337 /* xa = abs (operand1) */
25338 xa = ix86_expand_sse_fabs (res, &mask);
25339
25340 /* if (!isless (xa, TWO52)) goto label; */
25341 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25342
25343 /* xa = xa + TWO52 - TWO52; */
63be4b32
RG
25344 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25345 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
d096ecdd
RG
25346
25347 /* xa = copysign (xa, operand1) */
25348 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25349
7e35fcb3
RG
25350 /* generate 1.0 or -1.0 */
25351 one = force_reg (mode,
25352 const_double_from_real_value (do_floor
25353 ? dconst1 : dconstm1, mode));
d096ecdd
RG
25354
25355 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25356 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25357 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25358 gen_rtx_AND (mode, one, tmp)));
7e35fcb3 25359 /* We always need to subtract here to preserve signed zero. */
63be4b32
RG
25360 tmp = expand_simple_binop (mode, MINUS,
25361 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25362 emit_move_insn (res, tmp);
d096ecdd
RG
25363
25364 emit_label (label);
25365 LABEL_NUSES (label) = 1;
25366
25367 emit_move_insn (operand0, res);
25368}
25369
25370/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25371 into OPERAND0. */
25372void
25373ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25374{
25375 /* C code for the stuff we expand below.
25376 double xa = fabs (x), x2;
25377 if (!isless (xa, TWO52))
25378 return x;
25379 x2 = (double)(long)x;
25380 Compensate. Floor:
25381 if (x2 > x)
25382 x2 -= 1;
25383 Compensate. Ceil:
25384 if (x2 < x)
25385 x2 += 1;
7e35fcb3
RG
25386 if (HONOR_SIGNED_ZEROS (mode))
25387 return copysign (x2, x);
d096ecdd
RG
25388 return x2;
25389 */
25390 enum machine_mode mode = GET_MODE (operand0);
7e35fcb3 25391 rtx xa, xi, TWO52, tmp, label, one, res, mask;
d096ecdd
RG
25392
25393 TWO52 = ix86_gen_TWO52 (mode);
25394
25395 /* Temporary for holding the result, initialized to the input
25396 operand to ease control flow. */
25397 res = gen_reg_rtx (mode);
25398 emit_move_insn (res, operand1);
25399
25400 /* xa = abs (operand1) */
7e35fcb3 25401 xa = ix86_expand_sse_fabs (res, &mask);
d096ecdd
RG
25402
25403 /* if (!isless (xa, TWO52)) goto label; */
25404 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25405
25406 /* xa = (double)(long)x */
25407 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25408 expand_fix (xi, res, 0);
25409 expand_float (xa, xi, 0);
25410
25411 /* generate 1.0 */
25412 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25413
25414 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25415 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25416 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25417 gen_rtx_AND (mode, one, tmp)));
63be4b32
RG
25418 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25419 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25420 emit_move_insn (res, tmp);
d096ecdd 25421
7e35fcb3
RG
25422 if (HONOR_SIGNED_ZEROS (mode))
25423 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25424
d096ecdd
RG
25425 emit_label (label);
25426 LABEL_NUSES (label) = 1;
25427
25428 emit_move_insn (operand0, res);
25429}
25430
097f2964
RG
25431/* Expand SSE sequence for computing round from OPERAND1 storing
25432 into OPERAND0. Sequence that works without relying on DImode truncation
25433 via cvttsd2siq that is only available on 64bit targets. */
25434void
25435ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25436{
25437 /* C code for the stuff we expand below.
25438 double xa = fabs (x), xa2, x2;
25439 if (!isless (xa, TWO52))
25440 return x;
25441 Using the absolute value and copying back sign makes
25442 -0.0 -> -0.0 correct.
25443 xa2 = xa + TWO52 - TWO52;
25444 Compensate.
25445 dxa = xa2 - xa;
25446 if (dxa <= -0.5)
25447 xa2 += 1;
25448 else if (dxa > 0.5)
25449 xa2 -= 1;
25450 x2 = copysign (xa2, x);
25451 return x2;
25452 */
25453 enum machine_mode mode = GET_MODE (operand0);
25454 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25455
25456 TWO52 = ix86_gen_TWO52 (mode);
25457
25458 /* Temporary for holding the result, initialized to the input
25459 operand to ease control flow. */
25460 res = gen_reg_rtx (mode);
25461 emit_move_insn (res, operand1);
25462
25463 /* xa = abs (operand1) */
25464 xa = ix86_expand_sse_fabs (res, &mask);
25465
25466 /* if (!isless (xa, TWO52)) goto label; */
25467 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25468
25469 /* xa2 = xa + TWO52 - TWO52; */
63be4b32
RG
25470 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25471 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
097f2964
RG
25472
25473 /* dxa = xa2 - xa; */
63be4b32 25474 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25475
25476 /* generate 0.5, 1.0 and -0.5 */
25477 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
63be4b32
RG
25478 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25479 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25480 0, OPTAB_DIRECT);
097f2964
RG
25481
25482 /* Compensate. */
25483 tmp = gen_reg_rtx (mode);
25484 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25485 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25486 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25487 gen_rtx_AND (mode, one, tmp)));
63be4b32 25488 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25489 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25490 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25491 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25492 gen_rtx_AND (mode, one, tmp)));
63be4b32 25493 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25494
25495 /* res = copysign (xa2, operand1) */
25496 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25497
25498 emit_label (label);
25499 LABEL_NUSES (label) = 1;
25500
25501 emit_move_insn (operand0, res);
25502}
25503
044928d6
RG
25504/* Expand SSE sequence for computing trunc from OPERAND1 storing
25505 into OPERAND0. */
25506void
25507ix86_expand_trunc (rtx operand0, rtx operand1)
25508{
25509 /* C code for SSE variant we expand below.
25510 double xa = fabs (x), x2;
25511 if (!isless (xa, TWO52))
25512 return x;
7e35fcb3
RG
25513 x2 = (double)(long)x;
25514 if (HONOR_SIGNED_ZEROS (mode))
25515 return copysign (x2, x);
25516 return x2;
044928d6
RG
25517 */
25518 enum machine_mode mode = GET_MODE (operand0);
7e35fcb3 25519 rtx xa, xi, TWO52, label, res, mask;
044928d6
RG
25520
25521 TWO52 = ix86_gen_TWO52 (mode);
25522
25523 /* Temporary for holding the result, initialized to the input
25524 operand to ease control flow. */
25525 res = gen_reg_rtx (mode);
25526 emit_move_insn (res, operand1);
25527
25528 /* xa = abs (operand1) */
7e35fcb3 25529 xa = ix86_expand_sse_fabs (res, &mask);
044928d6
RG
25530
25531 /* if (!isless (xa, TWO52)) goto label; */
25532 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25533
25534 /* x = (double)(long)x */
25535 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25536 expand_fix (xi, res, 0);
25537 expand_float (res, xi, 0);
25538
7e35fcb3
RG
25539 if (HONOR_SIGNED_ZEROS (mode))
25540 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25541
044928d6
RG
25542 emit_label (label);
25543 LABEL_NUSES (label) = 1;
25544
25545 emit_move_insn (operand0, res);
25546}
25547
25548/* Expand SSE sequence for computing trunc from OPERAND1 storing
25549 into OPERAND0. */
25550void
25551ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25552{
25553 enum machine_mode mode = GET_MODE (operand0);
63be4b32 25554 rtx xa, mask, TWO52, label, one, res, smask, tmp;
044928d6
RG
25555
25556 /* C code for SSE variant we expand below.
25557 double xa = fabs (x), x2;
25558 if (!isless (xa, TWO52))
25559 return x;
25560 xa2 = xa + TWO52 - TWO52;
25561 Compensate:
25562 if (xa2 > xa)
25563 xa2 -= 1.0;
25564 x2 = copysign (xa2, x);
25565 return x2;
25566 */
25567
25568 TWO52 = ix86_gen_TWO52 (mode);
25569
25570 /* Temporary for holding the result, initialized to the input
25571 operand to ease control flow. */
25572 res = gen_reg_rtx (mode);
25573 emit_move_insn (res, operand1);
25574
25575 /* xa = abs (operand1) */
25576 xa = ix86_expand_sse_fabs (res, &smask);
25577
25578 /* if (!isless (xa, TWO52)) goto label; */
25579 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25580
25581 /* res = xa + TWO52 - TWO52; */
63be4b32
RG
25582 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25583 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25584 emit_move_insn (res, tmp);
044928d6
RG
25585
25586 /* generate 1.0 */
25587 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25588
25589 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25590 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25591 emit_insn (gen_rtx_SET (VOIDmode, mask,
25592 gen_rtx_AND (mode, mask, one)));
63be4b32
RG
25593 tmp = expand_simple_binop (mode, MINUS,
25594 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25595 emit_move_insn (res, tmp);
044928d6
RG
25596
25597 /* res = copysign (res, operand1) */
25598 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25599
25600 emit_label (label);
25601 LABEL_NUSES (label) = 1;
25602
25603 emit_move_insn (operand0, res);
25604}
25605
097f2964
RG
25606/* Expand SSE sequence for computing round from OPERAND1 storing
25607 into OPERAND0. */
25608void
25609ix86_expand_round (rtx operand0, rtx operand1)
25610{
25611 /* C code for the stuff we're doing below:
25612 double xa = fabs (x);
25613 if (!isless (xa, TWO52))
25614 return x;
25615 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25616 return copysign (xa, x);
25617 */
25618 enum machine_mode mode = GET_MODE (operand0);
25619 rtx res, TWO52, xa, label, xi, half, mask;
25620 const struct real_format *fmt;
25621 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25622
25623 /* Temporary for holding the result, initialized to the input
25624 operand to ease control flow. */
25625 res = gen_reg_rtx (mode);
25626 emit_move_insn (res, operand1);
25627
25628 TWO52 = ix86_gen_TWO52 (mode);
25629 xa = ix86_expand_sse_fabs (res, &mask);
25630 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25631
25632 /* load nextafter (0.5, 0.0) */
25633 fmt = REAL_MODE_FORMAT (mode);
6ef9a246 25634 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
097f2964
RG
25635 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25636
25637 /* xa = xa + 0.5 */
25638 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
63be4b32 25639 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25640
25641 /* xa = (double)(int64_t)xa */
25642 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25643 expand_fix (xi, xa, 0);
25644 expand_float (xa, xi, 0);
25645
25646 /* res = copysign (xa, operand1) */
25647 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25648
25649 emit_label (label);
25650 LABEL_NUSES (label) = 1;
25651
25652 emit_move_insn (operand0, res);
25653}
25654
04e1d06b
MM
25655\f
25656/* Validate whether a SSE5 instruction is valid or not.
25657 OPERANDS is the array of operands.
25658 NUM is the number of operands.
84fbffb2 25659 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
04e1d06b 25660 NUM_MEMORY is the maximum number of memory operands to accept. */
71d46ca5 25661
a273c72a 25662bool
71d46ca5
MM
25663ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25664 bool uses_oc0, int num_memory)
04e1d06b
MM
25665{
25666 int mem_mask;
25667 int mem_count;
25668 int i;
25669
25670 /* Count the number of memory arguments */
25671 mem_mask = 0;
25672 mem_count = 0;
25673 for (i = 0; i < num; i++)
25674 {
25675 enum machine_mode mode = GET_MODE (operands[i]);
25676 if (register_operand (operands[i], mode))
25677 ;
25678
25679 else if (memory_operand (operands[i], mode))
25680 {
25681 mem_mask |= (1 << i);
25682 mem_count++;
25683 }
25684
25685 else
25686 {
25687 rtx pattern = PATTERN (insn);
25688
25689 /* allow 0 for pcmov */
25690 if (GET_CODE (pattern) != SET
25691 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25692 || i < 2
25693 || operands[i] != CONST0_RTX (mode))
25694 return false;
25695 }
25696 }
25697
71d46ca5
MM
25698 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25699 a memory operation. */
25700 if (num_memory < 0)
25701 {
25702 num_memory = -num_memory;
25703 if ((mem_mask & (1 << (num-1))) != 0)
25704 {
25705 mem_mask &= ~(1 << (num-1));
25706 mem_count--;
25707 }
25708 }
25709
04e1d06b
MM
25710 /* If there were no memory operations, allow the insn */
25711 if (mem_mask == 0)
25712 return true;
25713
25714 /* Do not allow the destination register to be a memory operand. */
25715 else if (mem_mask & (1 << 0))
25716 return false;
25717
25718 /* If there are too many memory operations, disallow the instruction. While
25719 the hardware only allows 1 memory reference, before register allocation
25720 for some insns, we allow two memory operations sometimes in order to allow
25721 code like the following to be optimized:
25722
25723 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25724
25725 or similar cases that are vectorized into using the fmaddss
25726 instruction. */
25727 else if (mem_count > num_memory)
25728 return false;
25729
25730 /* Don't allow more than one memory operation if not optimizing. */
25731 else if (mem_count > 1 && !optimize)
25732 return false;
25733
25734 else if (num == 4 && mem_count == 1)
25735 {
25736 /* formats (destination is the first argument), example fmaddss:
25737 xmm1, xmm1, xmm2, xmm3/mem
25738 xmm1, xmm1, xmm2/mem, xmm3
25739 xmm1, xmm2, xmm3/mem, xmm1
25740 xmm1, xmm2/mem, xmm3, xmm1 */
25741 if (uses_oc0)
25742 return ((mem_mask == (1 << 1))
25743 || (mem_mask == (1 << 2))
25744 || (mem_mask == (1 << 3)));
25745
25746 /* format, example pmacsdd:
25747 xmm1, xmm2, xmm3/mem, xmm1 */
25748 else
25749 return (mem_mask == (1 << 2));
25750 }
25751
25752 else if (num == 4 && num_memory == 2)
25753 {
25754 /* If there are two memory operations, we can load one of the memory ops
84fbffb2 25755 into the destination register. This is for optimizing the
04e1d06b
MM
25756 multiply/add ops, which the combiner has optimized both the multiply
25757 and the add insns to have a memory operation. We have to be careful
25758 that the destination doesn't overlap with the inputs. */
25759 rtx op0 = operands[0];
25760
25761 if (reg_mentioned_p (op0, operands[1])
25762 || reg_mentioned_p (op0, operands[2])
25763 || reg_mentioned_p (op0, operands[3]))
25764 return false;
25765
25766 /* formats (destination is the first argument), example fmaddss:
25767 xmm1, xmm1, xmm2, xmm3/mem
25768 xmm1, xmm1, xmm2/mem, xmm3
25769 xmm1, xmm2, xmm3/mem, xmm1
25770 xmm1, xmm2/mem, xmm3, xmm1
25771
25772 For the oc0 case, we will load either operands[1] or operands[3] into
25773 operands[0], so any combination of 2 memory operands is ok. */
25774 if (uses_oc0)
25775 return true;
25776
25777 /* format, example pmacsdd:
25778 xmm1, xmm2, xmm3/mem, xmm1
4f3f76e6 25779
04e1d06b
MM
25780 For the integer multiply/add instructions be more restrictive and
25781 require operands[2] and operands[3] to be the memory operands. */
25782 else
25783 return (mem_mask == ((1 << 2) | (1 << 3)));
25784 }
25785
25786 else if (num == 3 && num_memory == 1)
25787 {
25788 /* formats, example protb:
25789 xmm1, xmm2, xmm3/mem
25790 xmm1, xmm2/mem, xmm3 */
25791 if (uses_oc0)
25792 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25793
25794 /* format, example comeq:
25795 xmm1, xmm2, xmm3/mem */
25796 else
25797 return (mem_mask == (1 << 2));
25798 }
25799
25800 else
25801 gcc_unreachable ();
25802
25803 return false;
25804}
25805
25806\f
25807/* Fixup an SSE5 instruction that has 2 memory input references into a form the
25808 hardware will allow by using the destination register to load one of the
25809 memory operations. Presently this is used by the multiply/add routines to
25810 allow 2 memory references. */
25811
25812void
25813ix86_expand_sse5_multiple_memory (rtx operands[],
25814 int num,
25815 enum machine_mode mode)
25816{
25817 rtx op0 = operands[0];
25818 if (num != 4
25819 || memory_operand (op0, mode)
25820 || reg_mentioned_p (op0, operands[1])
25821 || reg_mentioned_p (op0, operands[2])
25822 || reg_mentioned_p (op0, operands[3]))
25823 gcc_unreachable ();
25824
25825 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25826 the destination register. */
25827 if (memory_operand (operands[1], mode))
25828 {
25829 emit_move_insn (op0, operands[1]);
25830 operands[1] = op0;
25831 }
25832 else if (memory_operand (operands[3], mode))
25833 {
25834 emit_move_insn (op0, operands[3]);
25835 operands[3] = op0;
25836 }
25837 else
25838 gcc_unreachable ();
25839
25840 return;
25841}
25842
2ed941ec
RH
25843\f
25844/* Table of valid machine attributes. */
25845static const struct attribute_spec ix86_attribute_table[] =
25846{
25847 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25848 /* Stdcall attribute says callee is responsible for popping arguments
25849 if they are not variable. */
25850 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25851 /* Fastcall attribute says callee is responsible for popping arguments
25852 if they are not variable. */
25853 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25854 /* Cdecl attribute says the callee is a normal C declaration */
25855 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25856 /* Regparm attribute specifies how many integer arguments are to be
25857 passed in registers. */
25858 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25859 /* Sseregparm attribute says we are using x86_64 calling conventions
25860 for FP arguments. */
25861 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25862 /* force_align_arg_pointer says this function realigns the stack at entry. */
25863 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25864 false, true, true, ix86_handle_cconv_attribute },
25865#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25866 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25867 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25868 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25869#endif
25870 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25871 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25872#ifdef SUBTARGET_ATTRIBUTE_TABLE
25873 SUBTARGET_ATTRIBUTE_TABLE,
25874#endif
25875 { NULL, 0, 0, false, false, false, NULL }
25876};
25877
e70444a8 25878/* Implement targetm.vectorize.builtin_vectorization_cost. */
4f3f76e6 25879static int
e70444a8
HJ
25880x86_builtin_vectorization_cost (bool runtime_test)
25881{
25882 /* If the branch of the runtime test is taken - i.e. - the vectorized
25883 version is skipped - this incurs a misprediction cost (because the
25884 vectorized version is expected to be the fall-through). So we subtract
25885 the latency of a mispredicted branch from the costs that are incured
25886 when the vectorized version is executed.
25887
25888 TODO: The values in individual target tables have to be tuned or new
25889 fields may be needed. For eg. on K8, the default branch path is the
25890 not-taken path. If the taken path is predicted correctly, the minimum
25891 penalty of going down the taken-path is 1 cycle. If the taken-path is
25892 not predicted correctly, then the minimum penalty is 10 cycles. */
25893
25894 if (runtime_test)
25895 {
25896 return (-(ix86_cost->cond_taken_branch_cost));
25897 }
25898 else
25899 return 0;
25900}
25901
2ed941ec 25902/* Initialize the GCC target structure. */
f680436b
KT
25903#undef TARGET_RETURN_IN_MEMORY
25904#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25905
2ed941ec
RH
25906#undef TARGET_ATTRIBUTE_TABLE
25907#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25908#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25909# undef TARGET_MERGE_DECL_ATTRIBUTES
25910# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25911#endif
25912
25913#undef TARGET_COMP_TYPE_ATTRIBUTES
25914#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25915
25916#undef TARGET_INIT_BUILTINS
25917#define TARGET_INIT_BUILTINS ix86_init_builtins
25918#undef TARGET_EXPAND_BUILTIN
25919#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25920
25921#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
6b889d89
UB
25922#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25923 ix86_builtin_vectorized_function
25924
2ed941ec 25925#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
6b889d89
UB
25926#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25927
25928#undef TARGET_BUILTIN_RECIPROCAL
25929#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
2ed941ec
RH
25930
25931#undef TARGET_ASM_FUNCTION_EPILOGUE
25932#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25933
25934#undef TARGET_ENCODE_SECTION_INFO
25935#ifndef SUBTARGET_ENCODE_SECTION_INFO
25936#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25937#else
25938#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25939#endif
25940
25941#undef TARGET_ASM_OPEN_PAREN
25942#define TARGET_ASM_OPEN_PAREN ""
25943#undef TARGET_ASM_CLOSE_PAREN
25944#define TARGET_ASM_CLOSE_PAREN ""
25945
25946#undef TARGET_ASM_ALIGNED_HI_OP
25947#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25948#undef TARGET_ASM_ALIGNED_SI_OP
25949#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25950#ifdef ASM_QUAD
25951#undef TARGET_ASM_ALIGNED_DI_OP
25952#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25953#endif
25954
25955#undef TARGET_ASM_UNALIGNED_HI_OP
25956#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25957#undef TARGET_ASM_UNALIGNED_SI_OP
25958#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25959#undef TARGET_ASM_UNALIGNED_DI_OP
25960#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25961
25962#undef TARGET_SCHED_ADJUST_COST
25963#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25964#undef TARGET_SCHED_ISSUE_RATE
25965#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25966#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25967#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25968 ia32_multipass_dfa_lookahead
25969
25970#undef TARGET_FUNCTION_OK_FOR_SIBCALL
25971#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25972
25973#ifdef HAVE_AS_TLS
25974#undef TARGET_HAVE_TLS
25975#define TARGET_HAVE_TLS true
25976#endif
25977#undef TARGET_CANNOT_FORCE_CONST_MEM
25978#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25979#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
3101faab 25980#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
2ed941ec
RH
25981
25982#undef TARGET_DELEGITIMIZE_ADDRESS
25983#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25984
25985#undef TARGET_MS_BITFIELD_LAYOUT_P
25986#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25987
25988#if TARGET_MACHO
25989#undef TARGET_BINDS_LOCAL_P
25990#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25991#endif
da489f73
RH
25992#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25993#undef TARGET_BINDS_LOCAL_P
25994#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25995#endif
2ed941ec
RH
25996
25997#undef TARGET_ASM_OUTPUT_MI_THUNK
25998#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25999#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26000#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26001
26002#undef TARGET_ASM_FILE_START
26003#define TARGET_ASM_FILE_START x86_file_start
26004
26005#undef TARGET_DEFAULT_TARGET_FLAGS
26006#define TARGET_DEFAULT_TARGET_FLAGS \
26007 (TARGET_DEFAULT \
2ed941ec
RH
26008 | TARGET_SUBTARGET_DEFAULT \
26009 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
26010
26011#undef TARGET_HANDLE_OPTION
26012#define TARGET_HANDLE_OPTION ix86_handle_option
26013
26014#undef TARGET_RTX_COSTS
26015#define TARGET_RTX_COSTS ix86_rtx_costs
26016#undef TARGET_ADDRESS_COST
26017#define TARGET_ADDRESS_COST ix86_address_cost
26018
26019#undef TARGET_FIXED_CONDITION_CODE_REGS
26020#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26021#undef TARGET_CC_MODES_COMPATIBLE
26022#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26023
26024#undef TARGET_MACHINE_DEPENDENT_REORG
26025#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26026
26027#undef TARGET_BUILD_BUILTIN_VA_LIST
26028#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26029
d7bd8aeb
JJ
26030#undef TARGET_EXPAND_BUILTIN_VA_START
26031#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26032
2ed941ec
RH
26033#undef TARGET_MD_ASM_CLOBBERS
26034#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
26035
26036#undef TARGET_PROMOTE_PROTOTYPES
586de218 26037#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
2ed941ec
RH
26038#undef TARGET_STRUCT_VALUE_RTX
26039#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
26040#undef TARGET_SETUP_INCOMING_VARARGS
26041#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26042#undef TARGET_MUST_PASS_IN_STACK
26043#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26044#undef TARGET_PASS_BY_REFERENCE
26045#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26046#undef TARGET_INTERNAL_ARG_POINTER
26047#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26048#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
26049#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
ccf8e764
RH
26050#undef TARGET_STRICT_ARGUMENT_NAMING
26051#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
2ed941ec
RH
26052
26053#undef TARGET_GIMPLIFY_VA_ARG_EXPR
26054#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26055
26056#undef TARGET_SCALAR_MODE_SUPPORTED_P
26057#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26058
26059#undef TARGET_VECTOR_MODE_SUPPORTED_P
26060#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26061
c77cd3d1
UB
26062#undef TARGET_C_MODE_FOR_SUFFIX
26063#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26064
2ed941ec
RH
26065#ifdef HAVE_AS_TLS
26066#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26067#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26068#endif
26069
26070#ifdef SUBTARGET_INSERT_ATTRIBUTES
26071#undef TARGET_INSERT_ATTRIBUTES
26072#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26073#endif
26074
608063c3
JB
26075#undef TARGET_MANGLE_TYPE
26076#define TARGET_MANGLE_TYPE ix86_mangle_type
2ed941ec
RH
26077
26078#undef TARGET_STACK_PROTECT_FAIL
26079#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26080
26081#undef TARGET_FUNCTION_VALUE
26082#define TARGET_FUNCTION_VALUE ix86_function_value
26083
83689be0
UB
26084#undef TARGET_SECONDARY_RELOAD
26085#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26086
e70444a8
HJ
26087#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26088#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
26089
2ed941ec
RH
26090struct gcc_target targetm = TARGET_INITIALIZER;
26091\f
e2500fed 26092#include "gt-i386.h"