]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/i386.c
targhooks.h (default_emutls_var_fields, [...]): Declare.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
f1bf33ce
UB
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
2a2ab3f9 5
188fc5b5 6This file is part of GCC.
2a2ab3f9 7
188fc5b5 8GCC is free software; you can redistribute it and/or modify
2a2ab3f9 9it under the terms of the GNU General Public License as published by
2f83c7d6 10the Free Software Foundation; either version 3, or (at your option)
2a2ab3f9
JVA
11any later version.
12
188fc5b5 13GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
2f83c7d6
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
c818d019 38#include "c-common.h"
a8ffcc81 39#include "except.h"
ecbc4695 40#include "function.h"
00c79232 41#include "recog.h"
ced8dd8c 42#include "expr.h"
e78d8e51 43#include "optabs.h"
f103890b 44#include "toplev.h"
e075ae69 45#include "basic-block.h"
1526a060 46#include "ggc.h"
672a6f42
NB
47#include "target.h"
48#include "target-def.h"
f1e639b1 49#include "langhooks.h"
dafc5b82 50#include "cgraph.h"
cd3ce9b4 51#include "tree-gimple.h"
72ce3d4a 52#include "dwarf2.h"
6fb5fa3c 53#include "df.h"
279bb624 54#include "tm-constrs.h"
47eb5b32 55#include "params.h"
2a2ab3f9 56
e70444a8 57static int x86_builtin_vectorization_cost (bool);
8502420b 58static rtx legitimize_dllimport_symbol (rtx, bool);
e70444a8 59
8dfe5673 60#ifndef CHECK_STACK_LIMIT
07933f72 61#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
62#endif
63
3c50106f
RH
64/* Return index of given mode in mult and division cost tables. */
65#define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
70 : 4)
71
2ab0437e 72/* Processor costs (relative to an add) */
3dd0df7f
RS
73/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74#define COSTS_N_BYTES(N) ((N) * 2)
75
8c996513
JH
76#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
77
fce5a9f2 78static const
2a8a8292 79struct processor_costs size_cost = { /* costs for tuning for size */
3dd0df7f
RS
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
2ab0437e 89 0, /* cost of multiply per each bit set */
3dd0df7f
RS
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
2ab0437e
JH
97 0, /* "large" insn */
98 2, /* MOVE_RATIO */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
75bcbcdb
L
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
2ab0437e
JH
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
46cb0441
ZD
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
f4365627
JH
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
3dd0df7f
RS
124 2, /* Branch cost */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
8c996513
JH
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
e70444a8
HJ
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
2ab0437e 146};
229b303a 147
32b5b1aa 148/* Processor costs (relative to an add) */
fce5a9f2 149static const
32b5b1aa 150struct processor_costs i386_cost = { /* 386 specific costs */
a9cc9cc6
JH
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 168 15, /* "large" insn */
e2e52e1b 169 3, /* MOVE_RATIO */
7c6b971d 170 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
0f290768 173 Relative to reg-reg move (2). */
96e7ae40
JH
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
75bcbcdb
L
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
fa79946e
JH
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
46cb0441
ZD
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
f4365627
JH
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
4977bab6 195 1, /* Branch cost */
a9cc9cc6
JH
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
8c996513
JH
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
e70444a8
HJ
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
32b5b1aa
SC
217};
218
fce5a9f2 219static const
32b5b1aa 220struct processor_costs i486_cost = { /* 486 specific costs */
a9cc9cc6
JH
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
32b5b1aa 230 1, /* cost of multiply per each bit set */
a9cc9cc6
JH
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 238 15, /* "large" insn */
e2e52e1b 239 3, /* MOVE_RATIO */
7c6b971d 240 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
0f290768 243 Relative to reg-reg move (2). */
96e7ae40
JH
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
75bcbcdb
L
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
fa79946e
JH
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
f4365627 260 3, /* MMX or SSE register to integer */
46cb0441
ZD
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
f4365627
JH
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
4977bab6 267 1, /* Branch cost */
a9cc9cc6
JH
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
8c996513
JH
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
e70444a8
HJ
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
32b5b1aa
SC
289};
290
fce5a9f2 291static const
e5cb57e8 292struct processor_costs pentium_cost = {
a9cc9cc6
JH
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
856b07a1 302 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 310 8, /* "large" insn */
e2e52e1b 311 6, /* MOVE_RATIO */
7c6b971d 312 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
0f290768 315 Relative to reg-reg move (2). */
96e7ae40
JH
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
75bcbcdb
L
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
fa79946e
JH
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
f4365627 332 3, /* MMX or SSE register to integer */
46cb0441
ZD
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
f4365627
JH
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
4977bab6 337 2, /* Branch cost */
a9cc9cc6
JH
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
8c996513
JH
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
e70444a8
HJ
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
32b5b1aa
SC
359};
360
fce5a9f2 361static const
856b07a1 362struct processor_costs pentiumpro_cost = {
a9cc9cc6
JH
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
856b07a1 372 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
96e7ae40 380 8, /* "large" insn */
e2e52e1b 381 6, /* MOVE_RATIO */
7c6b971d 382 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
0f290768 385 Relative to reg-reg move (2). */
96e7ae40
JH
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
75bcbcdb
L
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
fa79946e
JH
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
f4365627 402 3, /* MMX or SSE register to integer */
46cb0441
ZD
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
f4365627
JH
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
4977bab6 407 2, /* Branch cost */
a9cc9cc6
JH
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
8c996513
JH
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
418 */
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
856b07a1
SC
436};
437
cfe1b18f
VM
438static const
439struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
46cb0441
ZD
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
cfe1b18f
VM
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
485 1, /* Branch cost */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
8c996513
JH
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
cfe1b18f
VM
507};
508
fce5a9f2 509static const
a269a03c 510struct processor_costs k6_cost = {
a9cc9cc6
JH
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
a269a03c 520 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 528 8, /* "large" insn */
e2e52e1b 529 4, /* MOVE_RATIO */
7c6b971d 530 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
0f290768 533 Relative to reg-reg move (2). */
96e7ae40
JH
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
75bcbcdb
L
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
fa79946e
JH
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
f4365627 550 6, /* MMX or SSE register to integer */
46cb0441
ZD
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
f4365627
JH
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
4977bab6 558 1, /* Branch cost */
a9cc9cc6
JH
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
8c996513
JH
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
a269a03c
JC
580};
581
fce5a9f2 582static const
309ada50 583struct processor_costs athlon_cost = {
a9cc9cc6
JH
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
309ada50 593 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
309ada50 601 8, /* "large" insn */
e2e52e1b 602 9, /* MOVE_RATIO */
309ada50 603 4, /* cost for loading QImode using movzbl */
b72b1c29 604 {3, 4, 3}, /* cost of loading integer registers
309ada50 605 in QImode, HImode and SImode.
0f290768 606 Relative to reg-reg move (2). */
b72b1c29 607 {3, 4, 3}, /* cost of storing integer registers */
309ada50 608 4, /* cost of reg,reg fld/fst */
b72b1c29 609 {4, 4, 12}, /* cost of loading fp registers
309ada50 610 in SFmode, DFmode and XFmode */
75bcbcdb
L
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
fa79946e 613 2, /* cost of moving MMX register */
b72b1c29 614 {4, 4}, /* cost of loading MMX registers
fa79946e 615 in SImode and DImode */
b72b1c29 616 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
b72b1c29 619 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 620 in SImode, DImode and TImode */
b72b1c29 621 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 622 in SImode, DImode and TImode */
b72b1c29 623 5, /* MMX or SSE register to integer */
46cb0441
ZD
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
f4365627
JH
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
8c1e80e9 628 5, /* Branch cost */
a9cc9cc6
JH
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
8c996513 635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
9c134b65 636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
8c996513
JH
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
309ada50
JH
653};
654
4977bab6
ZW
655static const
656struct processor_costs k8_cost = {
a9cc9cc6
JH
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
4977bab6 666 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
4977bab6
ZW
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
75bcbcdb
L
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
4977bab6
ZW
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
46cb0441
ZD
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
4977bab6 699 64, /* size of prefetch block */
8fbbf354 700 /* New AMD processors never drop prefetches; if they cannot be performed
47eb5b32
ZD
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
704 time). */
705 100, /* number of parallel prefetches */
cedbd764 706 3, /* Branch cost */
a9cc9cc6
JH
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
8c996513
JH
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
cedbd764
JS
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
4977bab6
ZW
732};
733
21efb4d4
HJ
734struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
753 9, /* MOVE_RATIO */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
775 /* On K8
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
778 On AMDFAM10
779 MOVD reg64, xmmreg Double FADD 3
780 1/1 1/1
781 MOVD reg32, xmmreg Double FADD 3
782 1/1 1/1 */
46cb0441
ZD
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
21efb4d4
HJ
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
790 time). */
791 100, /* number of parallel prefetches */
cedbd764 792 2, /* Branch cost */
21efb4d4
HJ
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
799
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
e70444a8
HJ
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
cedbd764 817 2, /* cond_taken_branch_cost. */
e70444a8 818 1, /* cond_not_taken_branch_cost. */
21efb4d4
HJ
819};
820
fce5a9f2 821static const
b4e89e2d 822struct processor_costs pentium4_cost = {
a9cc9cc6
JH
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
b4e89e2d 832 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
b4e89e2d
JH
840 16, /* "large" insn */
841 6, /* MOVE_RATIO */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
75bcbcdb
L
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
b4e89e2d
JH
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
46cb0441
ZD
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
f4365627
JH
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
4977bab6 867 2, /* Branch cost */
a9cc9cc6
JH
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
e850f028 874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
cc0faf9d 875 DUMMY_STRINGOP_ALGS},
e850f028 876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
cc0faf9d
JH
877 {-1, libcall}}},
878 DUMMY_STRINGOP_ALGS},
e70444a8
HJ
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
b4e89e2d
JH
890};
891
89c43c0a
VM
892static const
893struct processor_costs nocona_cost = {
a9cc9cc6
JH
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
89c43c0a 903 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
89c43c0a 911 16, /* "large" insn */
ea407814 912 17, /* MOVE_RATIO */
89c43c0a
VM
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
75bcbcdb
L
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
89c43c0a
VM
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
46cb0441
ZD
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
89c43c0a
VM
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
938 1, /* Branch cost */
a9cc9cc6
JH
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
e850f028 945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
8c996513
JH
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
e850f028 948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
cc0faf9d 949 {-1, libcall}}},
8c996513 950 {libcall, {{24, loop}, {64, unrolled_loop},
e70444a8
HJ
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
89c43c0a
VM
963};
964
05f85dbb
VM
965static const
966struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
985 16, /* MOVE_RATIO */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
46cb0441
ZD
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
05f85dbb
VM
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
8c996513
JH
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
e70444a8
HJ
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
05f85dbb
VM
1035};
1036
d326eaf0
JH
1037/* Generic64 should produce code tuned for Nocona and K8. */
1038static const
1039struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
6fc0bb99 1043 use of unnecessary temporary registers causing regression on several
d326eaf0
JH
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
75bcbcdb
L
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
d326eaf0
JH
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
46cb0441
ZD
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
d326eaf0
JH
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
8c996513
JH
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
e70444a8
HJ
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
d326eaf0
JH
1112};
1113
1114/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1115static const
1116struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
75bcbcdb
L
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
d326eaf0
JH
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
46cb0441
ZD
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
d326eaf0
JH
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
8c996513
JH
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
e70444a8
HJ
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
d326eaf0
JH
1183};
1184
8b60264b 1185const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 1186
a269a03c
JC
1187/* Processor feature/optimization bitmasks. */
1188#define m_386 (1<<PROCESSOR_I386)
1189#define m_486 (1<<PROCESSOR_I486)
1190#define m_PENT (1<<PROCESSOR_PENTIUM)
1191#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
8691cea3
UB
1192#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193#define m_NOCONA (1<<PROCESSOR_NOCONA)
1194#define m_CORE2 (1<<PROCESSOR_CORE2)
1195
cfe1b18f 1196#define m_GEODE (1<<PROCESSOR_GEODE)
a269a03c 1197#define m_K6 (1<<PROCESSOR_K6)
8691cea3 1198#define m_K6_GEODE (m_K6 | m_GEODE)
4977bab6 1199#define m_K8 (1<<PROCESSOR_K8)
8691cea3 1200#define m_ATHLON (1<<PROCESSOR_ATHLON)
4977bab6 1201#define m_ATHLON_K8 (m_K8 | m_ATHLON)
21efb4d4 1202#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
04e1d06b 1203#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
8691cea3 1204
d326eaf0
JH
1205#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
d326eaf0
JH
1207
1208/* Generic instruction choice should be common subset of supported CPUs
05f85dbb 1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
8691cea3 1210#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
d326eaf0 1211
80fd744f
RH
1212/* Feature tests against the various tunings. */
1213unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
04e1d06b 1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
80fd744f
RH
1219
1220 /* X86_TUNE_PUSH_MEMORY */
04e1d06b 1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
80fd744f
RH
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1223
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1225 m_486 | m_PENT,
1226
1227 /* X86_TUNE_USE_BIT_TEST */
1228 m_386,
1229
1230 /* X86_TUNE_UNROLL_STRLEN */
04e1d06b 1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
80fd744f
RH
1232
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
04e1d06b 1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
80fd744f
RH
1235
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1240 0,
1241
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1243 ~m_386,
54a88090 1244
80fd744f 1245 /* X86_TUNE_USE_SAHF */
3c2d980c 1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
5be6cb59 1247 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1248
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
3c2d980c 1250 partial dependencies. */
04e1d06b 1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
80fd744f
RH
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1253
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1262 m_PPRO,
1263
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
54a88090 1266
80fd744f
RH
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1269
1270 /* X86_TUNE_USE_SIMODE_FIOP */
04e1d06b 1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
80fd744f
RH
1272
1273 /* X86_TUNE_USE_MOV0 */
1274 m_K6,
54a88090 1275
80fd744f
RH
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1278
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1280 m_PENT4,
1281
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1283 m_PPRO,
1284
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1286 ~m_PENT,
1287
1288 /* X86_TUNE_READ_MODIFY */
1289 ~(m_PENT | m_PPRO),
1290
1291 /* X86_TUNE_PROMOTE_QIMODE */
04e1d06b 1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
80fd744f
RH
1293 | m_GENERIC /* | m_PENT4 ? */,
1294
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1297
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
54a88090 1300
80fd744f
RH
1301 /* X86_TUNE_QIMODE_MATH */
1302 ~0,
54a88090 1303
80fd744f
RH
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1308 ~m_PPRO,
1309
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1311 0,
1312
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1314 m_PPRO,
1315
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
04e1d06b 1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1318
1319 /* X86_TUNE_ADD_ESP_8 */
04e1d06b 1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
80fd744f
RH
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323 /* X86_TUNE_SUB_ESP_4 */
04e1d06b 1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1325
1326 /* X86_TUNE_SUB_ESP_8 */
04e1d06b 1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
80fd744f
RH
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
04e1d06b 1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
80fd744f
RH
1333 | m_GENERIC | m_GEODE),
1334
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
04e1d06b 1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1337
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1348
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1350 m_AMDFAM10,
1351
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1356 m_ATHLON_K8,
21efb4d4 1357
80fd744f 1358 /* X86_TUNE_SSE_TYPELESS_STORES */
04e1d06b 1359 m_AMD_MULTIPLE,
21efb4d4 1360
80fd744f
RH
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
21efb4d4 1363
80fd744f 1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
04e1d06b 1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
21efb4d4 1366
80fd744f
RH
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372
1373 /* X86_TUNE_SHIFT1 */
1374 ~m_486,
1375
1376 /* X86_TUNE_USE_FFREEP */
04e1d06b 1377 m_AMD_MULTIPLE,
80fd744f
RH
1378
1379 /* X86_TUNE_INTER_UNIT_MOVES */
04e1d06b 1380 ~(m_AMD_MULTIPLE | m_GENERIC),
80fd744f 1381
630ecd8d
JH
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1383 ~(m_AMDFAM10),
1384
80fd744f
RH
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
04e1d06b 1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1388
1389 /* X86_TUNE_SCHEDULE */
04e1d06b 1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
80fd744f
RH
1391
1392 /* X86_TUNE_USE_BT */
04e1d06b 1393 m_AMD_MULTIPLE,
80fd744f
RH
1394
1395 /* X86_TUNE_USE_INCDEC */
77c1632f 1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
80fd744f
RH
1397
1398 /* X86_TUNE_PAD_RETURNS */
04e1d06b 1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
80fd744f
RH
1400
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
ddff69b9
MM
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1403
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1405 ~m_K8,
1406
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1408 m_K8 | m_GENERIC64,
1409
a646aded
UB
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1412 ~(m_386 | m_486),
1413
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
ddff69b9
MM
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1417
a646aded
UB
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1419 machines. */
ddff69b9
MM
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421
a646aded
UB
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423 than a MOV. */
ddff69b9
MM
1424 m_PENT,
1425
a646aded
UB
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
ddff69b9
MM
1428 m_PENT,
1429
a646aded 1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
ddff69b9 1431 operand that cannot be represented using a modRM byte. The XOR
a646aded 1432 replacement is long decoded, so this split helps here as well. */
ddff69b9 1433 m_K6,
4845dbb5 1434
84fbffb2 1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
4e9d897d 1436 from integer to FP. */
4845dbb5 1437 m_AMDFAM10,
80fd744f
RH
1438};
1439
1440/* Feature tests against the various architecture variations. */
1441unsigned int ix86_arch_features[X86_ARCH_LAST] = {
0a1c5e55
UB
1442 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1443 ~(m_386 | m_486 | m_PENT | m_K6),
80fd744f
RH
1444
1445 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1446 ~m_386,
1447
1448 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1449 ~(m_386 | m_486),
1450
1451 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1452 ~m_386,
1453
1454 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1455 ~m_386,
1456};
1457
1458static const unsigned int x86_accumulate_outgoing_args
04e1d06b 1459 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
80fd744f
RH
1460
1461static const unsigned int x86_arch_always_fancy_math_387
04e1d06b 1462 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
80fd744f 1463 | m_NOCONA | m_CORE2 | m_GENERIC;
a269a03c 1464
8c996513
JH
1465static enum stringop_alg stringop_alg = no_stringop;
1466
d1f87653 1467/* In case the average insn count for single function invocation is
6ab16dd9
JH
1468 lower than this constant, emit fast (but longer) prologue and
1469 epilogue code. */
4977bab6 1470#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 1471
5bf0ebab
RH
1472/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1473static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1474static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1475static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
1476
1477/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 1478 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 1479
e075ae69 1480enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
1481{
1482 /* ax, dx, cx, bx */
ab408a86 1483 AREG, DREG, CREG, BREG,
4c0d89b5 1484 /* si, di, bp, sp */
e075ae69 1485 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
1486 /* FP registers */
1487 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 1488 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 1489 /* arg pointer */
83774849 1490 NON_Q_REGS,
b0d95de8
UB
1491 /* flags, fpsr, fpcr, frame */
1492 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
06f4e35d
L
1493 /* SSE registers */
1494 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
a7180f70 1495 SSE_REGS, SSE_REGS,
06f4e35d 1496 /* MMX registers */
a7180f70 1497 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30 1498 MMX_REGS, MMX_REGS,
06f4e35d 1499 /* REX registers */
3d117b30
JH
1500 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
06f4e35d 1502 /* SSE REX registers */
3d117b30
JH
1503 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1504 SSE_REGS, SSE_REGS,
4c0d89b5 1505};
c572e5ba 1506
3d117b30 1507/* The "default" register map used in 32bit mode. */
83774849 1508
0f290768 1509int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
1510{
1511 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1512 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
b0d95de8 1513 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
a7180f70
BS
1514 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1515 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
1516 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1517 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
1518};
1519
5bf0ebab
RH
1520static int const x86_64_int_parameter_registers[6] =
1521{
1522 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1523 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1524};
1525
ccf8e764
RH
1526static int const x86_64_ms_abi_int_parameter_registers[4] =
1527{
1528 2 /*RCX*/, 1 /*RDX*/,
1529 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1530};
1531
5bf0ebab
RH
1532static int const x86_64_int_return_registers[4] =
1533{
ccf8e764 1534 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
5bf0ebab 1535};
53c17031 1536
0f7fa3d0
JH
1537/* The "default" register map used in 64bit mode. */
1538int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539{
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
b0d95de8 1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
0f7fa3d0
JH
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1547};
1548
83774849
RH
1549/* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1593 numbers.
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1602*/
0f290768 1603int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
1604{
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
b0d95de8 1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
a7180f70
BS
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
1612};
1613
c572e5ba
JVA
1614/* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1616
07933f72
GS
1617rtx ix86_compare_op0 = NULL_RTX;
1618rtx ix86_compare_op1 = NULL_RTX;
1ef45b77 1619rtx ix86_compare_emitted = NULL_RTX;
f5316dfe 1620
8362f420
JH
1621/* Size of the register save area. */
1622#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
1623
1624/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
1625
1626struct stack_local_entry GTY(())
1627{
1628 unsigned short mode;
1629 unsigned short n;
1630 rtx rtl;
1631 struct stack_local_entry *next;
1632};
1633
4dd2ac2c
JH
1634/* Structure describing stack frame layout.
1635 Stack grows downward:
1636
1637 [arguments]
1638 <- ARG_POINTER
1639 saved pc
1640
1641 saved frame pointer if frame_pointer_needed
1642 <- HARD_FRAME_POINTER
1643 [saved regs]
1644
1645 [padding1] \
1646 )
1647 [va_arg registers] (
1648 > to_allocate <- FRAME_POINTER
1649 [frame] (
1650 )
1651 [padding2] /
1652 */
1653struct ix86_frame
1654{
1655 int nregs;
1656 int padding1;
8362f420 1657 int va_arg_size;
4dd2ac2c
JH
1658 HOST_WIDE_INT frame;
1659 int padding2;
1660 int outgoing_arguments_size;
8362f420 1661 int red_zone_size;
4dd2ac2c
JH
1662
1663 HOST_WIDE_INT to_allocate;
1664 /* The offsets relative to ARG_POINTER. */
1665 HOST_WIDE_INT frame_pointer_offset;
1666 HOST_WIDE_INT hard_frame_pointer_offset;
1667 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
1668
1669 /* When save_regs_using_mov is set, emit prologue using
1670 move instead of push instructions. */
1671 bool save_regs_using_mov;
4dd2ac2c
JH
1672};
1673
55bea00a 1674/* Code model option. */
6189a572 1675enum cmodel ix86_cmodel;
80f33d06 1676/* Asm dialect. */
80f33d06 1677enum asm_dialect ix86_asm_dialect = ASM_ATT;
5bf5a10b 1678/* TLS dialects. */
f996902d 1679enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 1680
5bf0ebab 1681/* Which unit we are generating floating point math for. */
965f5423
JH
1682enum fpmath_unit ix86_fpmath;
1683
5bf0ebab 1684/* Which cpu are we scheduling for. */
9e555526 1685enum processor_type ix86_tune;
8691cea3 1686
5bf0ebab
RH
1687/* Which instruction set architecture to use. */
1688enum processor_type ix86_arch;
c8c5cb99 1689
f4365627
JH
1690/* true if sse prefetch instruction is not NOOP. */
1691int x86_prefetch_sse;
1692
e075ae69 1693/* ix86_regparm_string as a number */
6ac49599 1694static int ix86_regparm;
e9a25f70 1695
33932946
SH
1696/* -mstackrealign option */
1697extern int ix86_force_align_arg_pointer;
1698static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1699
3af4bd89 1700/* Preferred alignment for stack boundary in bits. */
95899b34 1701unsigned int ix86_preferred_stack_boundary;
3af4bd89 1702
e9a25f70 1703/* Values 1-5: see jump.c */
e075ae69 1704int ix86_branch_cost;
623fe810 1705
7dcbf659
JH
1706/* Variables which are this size or smaller are put in the data/bss
1707 or ldata/lbss sections. */
1708
1709int ix86_section_threshold = 65536;
1710
623fe810 1711/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
8fe75e43
RH
1712char internal_label_prefix[16];
1713int internal_label_prefix_len;
e56feed6 1714
79f5e442
ZD
1715/* Fence to use after loop using movnt. */
1716tree x86_mfence;
1717
53c17031
JH
1718/* Register class used for passing given 64bit part of the argument.
1719 These represent classes as documented by the PS ABI, with the exception
1720 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 1721 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 1722
d1f87653 1723 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2ed941ec 1724 whenever possible (upper half does contain padding). */
53c17031
JH
1725enum x86_64_reg_class
1726 {
1727 X86_64_NO_CLASS,
1728 X86_64_INTEGER_CLASS,
1729 X86_64_INTEGERSI_CLASS,
1730 X86_64_SSE_CLASS,
1731 X86_64_SSESF_CLASS,
1732 X86_64_SSEDF_CLASS,
1733 X86_64_SSEUP_CLASS,
1734 X86_64_X87_CLASS,
1735 X86_64_X87UP_CLASS,
499accd7 1736 X86_64_COMPLEX_X87_CLASS,
53c17031
JH
1737 X86_64_MEMORY_CLASS
1738 };
2ed941ec
RH
1739static const char * const x86_64_reg_class_name[] =
1740{
6c4ccfd8
RH
1741 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1742 "sseup", "x87", "x87up", "cplx87", "no"
1743};
53c17031
JH
1744
1745#define MAX_CLASSES 4
881b2a96 1746
43f3a59d 1747/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
1748static REAL_VALUE_TYPE ext_80387_constants_table [5];
1749static bool ext_80387_constants_init = 0;
cb1119b7 1750
2ed941ec
RH
1751\f
1752static struct machine_function * ix86_init_machine_status (void);
586de218 1753static rtx ix86_function_value (const_tree, const_tree, bool);
3101faab 1754static int ix86_function_regparm (const_tree, const_tree);
2ed941ec
RH
1755static void ix86_compute_frame_layout (struct ix86_frame *);
1756static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1757 rtx, rtx, int);
89c43c0a 1758
e075ae69 1759\f
67c2b45f
JS
1760/* The svr4 ABI for the i386 says that records and unions are returned
1761 in memory. */
1762#ifndef DEFAULT_PCC_STRUCT_RETURN
1763#define DEFAULT_PCC_STRUCT_RETURN 1
1764#endif
1765
0a1c5e55
UB
1766/* Bit flags that specify the ISA we are compiling for. */
1767int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1768
1769/* A mask of ix86_isa_flags that includes bit X if X
1770 was set or cleared on the command line. */
1771static int ix86_isa_flags_explicit;
1772
287a7d41
L
1773/* Define a set of ISAs which are available when a given ISA is
1774 enabled. MMX and SSE ISAs are handled separately. */
1775
1776#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1777#define OPTION_MASK_ISA_3DNOW_SET \
1778 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1779
1780#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1781#define OPTION_MASK_ISA_SSE2_SET \
1782 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1783#define OPTION_MASK_ISA_SSE3_SET \
1784 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1785#define OPTION_MASK_ISA_SSSE3_SET \
1786 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1787#define OPTION_MASK_ISA_SSE4_1_SET \
1788 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1789#define OPTION_MASK_ISA_SSE4_2_SET \
1790 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1791
1792/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1793 as -msse4.2. */
1794#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1795
1796#define OPTION_MASK_ISA_SSE4A_SET \
1797 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1798#define OPTION_MASK_ISA_SSE5_SET \
1799 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1800
1801/* Define a set of ISAs which aren't available when a given ISA is
1802 disabled. MMX and SSE ISAs are handled separately. */
3b8dd071
L
1803
1804#define OPTION_MASK_ISA_MMX_UNSET \
287a7d41
L
1805 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1806#define OPTION_MASK_ISA_3DNOW_UNSET \
1807 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1808#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
3b8dd071
L
1809
1810#define OPTION_MASK_ISA_SSE_UNSET \
287a7d41 1811 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
3b8dd071 1812#define OPTION_MASK_ISA_SSE2_UNSET \
287a7d41 1813 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
3b8dd071 1814#define OPTION_MASK_ISA_SSE3_UNSET \
287a7d41
L
1815 (OPTION_MASK_ISA_SSE3 \
1816 | OPTION_MASK_ISA_SSSE3_UNSET \
1817 | OPTION_MASK_ISA_SSE4A_UNSET )
3b8dd071 1818#define OPTION_MASK_ISA_SSSE3_UNSET \
287a7d41 1819 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
3b8dd071 1820#define OPTION_MASK_ISA_SSE4_1_UNSET \
287a7d41
L
1821 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1822#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
3b8dd071 1823
287a7d41
L
1824/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1825 as -mno-sse4.1. */
3b8dd071
L
1826#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1827
287a7d41
L
1828#define OPTION_MASK_ISA_SSE4A_UNSET \
1829 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
3b8dd071 1830
287a7d41 1831#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
04e1d06b 1832
a5ea943c
RG
1833/* Vectorization library interface and handlers. */
1834tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
9aba5d22 1835static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
a5ea943c
RG
1836static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1837
6ac49599
RS
1838/* Implement TARGET_HANDLE_OPTION. */
1839
1840static bool
55bea00a 1841ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
6ac49599
RS
1842{
1843 switch (code)
1844 {
0a1c5e55 1845 case OPT_mmmx:
287a7d41
L
1846 if (value)
1847 {
1848 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1850 }
1851 else
6ac49599 1852 {
3b8dd071
L
1853 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
6ac49599
RS
1855 }
1856 return true;
1857
0a1c5e55 1858 case OPT_m3dnow:
287a7d41
L
1859 if (value)
1860 {
1861 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1863 }
1864 else
6ac49599 1865 {
3b8dd071
L
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
6ac49599
RS
1868 }
1869 return true;
1870
0a1c5e55
UB
1871 case OPT_m3dnowa:
1872 return false;
1873
6ac49599 1874 case OPT_msse:
287a7d41
L
1875 if (value)
1876 {
1877 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1879 }
1880 else
6ac49599 1881 {
3b8dd071
L
1882 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1883 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
6ac49599
RS
1884 }
1885 return true;
1886
1887 case OPT_msse2:
287a7d41
L
1888 if (value)
1889 {
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1892 }
1893 else
6ac49599 1894 {
3b8dd071
L
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
21efb4d4
HJ
1897 }
1898 return true;
1899
1900 case OPT_msse3:
287a7d41
L
1901 if (value)
1902 {
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1905 }
1906 else
594dc048 1907 {
3b8dd071
L
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
594dc048
L
1910 }
1911 return true;
1912
1913 case OPT_mssse3:
287a7d41
L
1914 if (value)
1915 {
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1918 }
1919 else
9a5cee02 1920 {
3b8dd071
L
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
9a5cee02
L
1923 }
1924 return true;
1925
1926 case OPT_msse4_1:
287a7d41
L
1927 if (value)
1928 {
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1931 }
1932 else
21efb4d4 1933 {
3b8dd071
L
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1936 }
1937 return true;
1938
1939 case OPT_msse4_2:
287a7d41
L
1940 if (value)
1941 {
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1944 }
1945 else
3b8dd071
L
1946 {
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
6ac49599
RS
1949 }
1950 return true;
1951
3b8dd071 1952 case OPT_msse4:
287a7d41
L
1953 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1954 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
3b8dd071
L
1955 return true;
1956
1957 case OPT_mno_sse4:
1958 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1959 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1960 return true;
1961
9a5cee02 1962 case OPT_msse4a:
287a7d41
L
1963 if (value)
1964 {
1965 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1966 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1967 }
1968 else
9a5cee02 1969 {
3b8dd071
L
1970 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1971 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
9a5cee02
L
1972 }
1973 return true;
1974
04e1d06b 1975 case OPT_msse5:
287a7d41
L
1976 if (value)
1977 {
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1980 }
1981 else
04e1d06b
MM
1982 {
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1985 }
1986 return true;
1987
6ac49599
RS
1988 default:
1989 return true;
1990 }
1991}
1992
f5316dfe
MM
1993/* Sometimes certain combinations of command options do not make
1994 sense on a particular target machine. You can define a macro
1995 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1996 defined, is executed once just after all the command options have
1997 been parsed.
1998
1999 Don't use this macro to turn on various extra optimizations for
2000 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2001
2002void
b96a374d 2003override_options (void)
f5316dfe 2004{
400500c4 2005 int i;
3326f410 2006 int ix86_tune_defaulted = 0;
b26f6ed7 2007 int ix86_arch_specified = 0;
80fd744f 2008 unsigned int ix86_arch_mask, ix86_tune_mask;
3326f410 2009
e075ae69
RH
2010 /* Comes from final.c -- no real reason to change it. */
2011#define MAX_CODE_ALIGN 16
f5316dfe 2012
c8c5cb99
SC
2013 static struct ptt
2014 {
8b60264b 2015 const struct processor_costs *cost; /* Processor costs */
8b60264b 2016 const int align_loop; /* Default alignments. */
2cca7283 2017 const int align_loop_max_skip;
8b60264b 2018 const int align_jump;
2cca7283 2019 const int align_jump_max_skip;
8b60264b 2020 const int align_func;
e075ae69 2021 }
0f290768 2022 const processor_target_table[PROCESSOR_max] =
e075ae69 2023 {
0a1c5e55
UB
2024 {&i386_cost, 4, 3, 4, 3, 4},
2025 {&i486_cost, 16, 15, 16, 15, 16},
2026 {&pentium_cost, 16, 7, 16, 7, 16},
461a73b5 2027 {&pentiumpro_cost, 16, 15, 16, 10, 16},
0a1c5e55
UB
2028 {&geode_cost, 0, 0, 0, 0, 0},
2029 {&k6_cost, 32, 7, 32, 7, 32},
2030 {&athlon_cost, 16, 7, 16, 7, 16},
2031 {&pentium4_cost, 0, 0, 0, 0, 0},
2032 {&k8_cost, 16, 7, 16, 7, 16},
2033 {&nocona_cost, 0, 0, 0, 0, 0},
461a73b5 2034 {&core2_cost, 16, 10, 16, 10, 16},
0a1c5e55 2035 {&generic32_cost, 16, 7, 16, 7, 16},
461a73b5 2036 {&generic64_cost, 16, 10, 16, 10, 16},
0a1c5e55 2037 {&amdfam10_cost, 32, 24, 32, 7, 32}
e075ae69
RH
2038 };
2039
c2f17e19
UB
2040 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2041 {
2042 "generic",
2043 "i386",
2044 "i486",
2045 "pentium",
2046 "pentium-mmx",
2047 "pentiumpro",
2048 "pentium2",
2049 "pentium3",
2050 "pentium4",
2051 "pentium-m",
2052 "prescott",
2053 "nocona",
2054 "core2",
2055 "geode",
2056 "k6",
2057 "k6-2",
2058 "k6-3",
2059 "athlon",
2060 "athlon-4",
2061 "k8",
2062 "amdfam10"
2063 };
2064
9415ab7d
TN
2065 enum pta_flags
2066 {
2067 PTA_SSE = 1 << 0,
2068 PTA_SSE2 = 1 << 1,
2069 PTA_SSE3 = 1 << 2,
2070 PTA_MMX = 1 << 3,
2071 PTA_PREFETCH_SSE = 1 << 4,
2072 PTA_3DNOW = 1 << 5,
2073 PTA_3DNOW_A = 1 << 6,
2074 PTA_64BIT = 1 << 7,
2075 PTA_SSSE3 = 1 << 8,
2076 PTA_CX16 = 1 << 9,
2077 PTA_POPCNT = 1 << 10,
2078 PTA_ABM = 1 << 11,
2079 PTA_SSE4A = 1 << 12,
2080 PTA_NO_SAHF = 1 << 13,
2081 PTA_SSE4_1 = 1 << 14,
04e1d06b 2082 PTA_SSE4_2 = 1 << 15,
8b96a312
L
2083 PTA_SSE5 = 1 << 16,
2084 PTA_AES = 1 << 17,
2085 PTA_PCLMUL = 1 << 18
9415ab7d
TN
2086 };
2087
e075ae69
RH
2088 static struct pta
2089 {
8b60264b
KG
2090 const char *const name; /* processor name or nickname. */
2091 const enum processor_type processor;
9415ab7d 2092 const unsigned /*enum pta_flags*/ flags;
e075ae69 2093 }
0f290768 2094 const processor_alias_table[] =
e075ae69 2095 {
0dd0e980
JH
2096 {"i386", PROCESSOR_I386, 0},
2097 {"i486", PROCESSOR_I486, 0},
2098 {"i586", PROCESSOR_PENTIUM, 0},
2099 {"pentium", PROCESSOR_PENTIUM, 0},
2100 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
2101 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2102 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0a1c5e55 2104 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
0dd0e980
JH
2105 {"i686", PROCESSOR_PENTIUMPRO, 0},
2106 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2107 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
0a1c5e55
UB
2108 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2110 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2111 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2112 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2113 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
4f3f76e6 2114 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
0a1c5e55
UB
2115 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2116 | PTA_CX16 | PTA_NO_SAHF)},
2117 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2118 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2119 | PTA_SSSE3
2120 | PTA_CX16)},
2121 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2122 |PTA_PREFETCH_SSE)},
0dd0e980
JH
2123 {"k6", PROCESSOR_K6, PTA_MMX},
2124 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
0a1c5e55
UB
2126 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2127 | PTA_PREFETCH_SSE)},
2128 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2129 | PTA_PREFETCH_SSE)},
2130 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2131 | PTA_SSE)},
2132 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2133 | PTA_SSE)},
2134 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2135 | PTA_SSE)},
2136 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2137 | PTA_MMX | PTA_SSE | PTA_SSE2
2138 | PTA_NO_SAHF)},
2139 {"k8", PROCESSOR_K8, (PTA_64BIT
2140 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2141 | PTA_SSE | PTA_SSE2
2142 | PTA_NO_SAHF)},
2143 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2144 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2145 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2146 | PTA_NO_SAHF)},
2147 {"opteron", PROCESSOR_K8, (PTA_64BIT
2148 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149 | PTA_SSE | PTA_SSE2
2150 | PTA_NO_SAHF)},
2151 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2152 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2153 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2154 | PTA_NO_SAHF)},
2155 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2156 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2157 | PTA_SSE | PTA_SSE2
2158 | PTA_NO_SAHF)},
2159 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2160 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2161 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2162 | PTA_NO_SAHF)},
2163 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2164 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2165 | PTA_SSE | PTA_SSE2
2166 | PTA_NO_SAHF)},
2167 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2168 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2169 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2170 | PTA_SSE4A
2171 | PTA_CX16 | PTA_ABM)},
2172 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2175 | PTA_SSE4A
2176 | PTA_CX16 | PTA_ABM)},
d326eaf0
JH
2177 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2178 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
3af4bd89 2179 };
c8c5cb99 2180
ca7558fc 2181 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 2182
554707bd
DJ
2183#ifdef SUBTARGET_OVERRIDE_OPTIONS
2184 SUBTARGET_OVERRIDE_OPTIONS;
2185#endif
2186
f475fd3c
MS
2187#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2188 SUBSUBTARGET_OVERRIDE_OPTIONS;
2189#endif
2190
f7288899
EC
2191 /* -fPIC is the default for x86_64. */
2192 if (TARGET_MACHO && TARGET_64BIT)
2193 flag_pic = 2;
2194
41ed2237 2195 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 2196 in case they weren't overwritten by command line options. */
55ba61f3
JH
2197 if (TARGET_64BIT)
2198 {
f7288899 2199 /* Mach-O doesn't support omitting the frame pointer for now. */
55ba61f3 2200 if (flag_omit_frame_pointer == 2)
f7288899 2201 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
55ba61f3
JH
2202 if (flag_asynchronous_unwind_tables == 2)
2203 flag_asynchronous_unwind_tables = 1;
2204 if (flag_pcc_struct_return == 2)
2205 flag_pcc_struct_return = 0;
2206 }
2207 else
2208 {
2209 if (flag_omit_frame_pointer == 2)
2210 flag_omit_frame_pointer = 0;
2211 if (flag_asynchronous_unwind_tables == 2)
2212 flag_asynchronous_unwind_tables = 0;
2213 if (flag_pcc_struct_return == 2)
7c712dcc 2214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
2215 }
2216
d326eaf0
JH
2217 /* Need to check -mtune=generic first. */
2218 if (ix86_tune_string)
3326f410 2219 {
d326eaf0 2220 if (!strcmp (ix86_tune_string, "generic")
fa959ce4
MM
2221 || !strcmp (ix86_tune_string, "i686")
2222 /* As special support for cross compilers we read -mtune=native
2223 as -mtune=generic. With native compilers we won't see the
2224 -mtune=native, as it was changed by the driver. */
2225 || !strcmp (ix86_tune_string, "native"))
d326eaf0
JH
2226 {
2227 if (TARGET_64BIT)
2228 ix86_tune_string = "generic64";
2229 else
2230 ix86_tune_string = "generic32";
2231 }
2232 else if (!strncmp (ix86_tune_string, "generic", 7))
2233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
3326f410 2234 }
d326eaf0
JH
2235 else
2236 {
2237 if (ix86_arch_string)
2238 ix86_tune_string = ix86_arch_string;
2239 if (!ix86_tune_string)
2240 {
c2f17e19 2241 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
d326eaf0
JH
2242 ix86_tune_defaulted = 1;
2243 }
2244
2245 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2246 need to use a sensible tune option. */
2247 if (!strcmp (ix86_tune_string, "generic")
2248 || !strcmp (ix86_tune_string, "x86-64")
2249 || !strcmp (ix86_tune_string, "i686"))
2250 {
2251 if (TARGET_64BIT)
2252 ix86_tune_string = "generic64";
2253 else
2254 ix86_tune_string = "generic32";
2255 }
2256 }
8c996513
JH
2257 if (ix86_stringop_string)
2258 {
2259 if (!strcmp (ix86_stringop_string, "rep_byte"))
2260 stringop_alg = rep_prefix_1_byte;
2261 else if (!strcmp (ix86_stringop_string, "libcall"))
2262 stringop_alg = libcall;
2263 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2264 stringop_alg = rep_prefix_4_byte;
2265 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2266 stringop_alg = rep_prefix_8_byte;
2267 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2268 stringop_alg = loop_1_byte;
2269 else if (!strcmp (ix86_stringop_string, "loop"))
2270 stringop_alg = loop;
2271 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2272 stringop_alg = unrolled_loop;
2273 else
2274 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2275 }
d326eaf0
JH
2276 if (!strcmp (ix86_tune_string, "x86-64"))
2277 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2278 "-mtune=generic instead as appropriate.");
2279
f4365627 2280 if (!ix86_arch_string)
3fec9fa9 2281 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
b26f6ed7
EC
2282 else
2283 ix86_arch_specified = 1;
4f3f76e6 2284
d326eaf0
JH
2285 if (!strcmp (ix86_arch_string, "generic"))
2286 error ("generic CPU can be used only for -mtune= switch");
2287 if (!strncmp (ix86_arch_string, "generic", 7))
2288 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 2289
6189a572
JH
2290 if (ix86_cmodel_string != 0)
2291 {
2292 if (!strcmp (ix86_cmodel_string, "small"))
2293 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
7dcbf659
JH
2294 else if (!strcmp (ix86_cmodel_string, "medium"))
2295 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
dc4d7240
JH
2296 else if (!strcmp (ix86_cmodel_string, "large"))
2297 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
6189a572 2298 else if (flag_pic)
dc4d7240 2299 error ("code model %s does not support PIC mode", ix86_cmodel_string);
6189a572
JH
2300 else if (!strcmp (ix86_cmodel_string, "32"))
2301 ix86_cmodel = CM_32;
2302 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2303 ix86_cmodel = CM_KERNEL;
6189a572
JH
2304 else
2305 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2306 }
2307 else
2308 {
ccf8e764
RH
2309 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2310 use of rip-relative addressing. This eliminates fixups that
2311 would otherwise be needed if this object is to be placed in a
2312 DLL, and is essentially just as efficient as direct addressing. */
2313 if (TARGET_64BIT_MS_ABI)
2314 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2315 else if (TARGET_64BIT)
6189a572 2316 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
ccf8e764
RH
2317 else
2318 ix86_cmodel = CM_32;
6189a572 2319 }
c93e80a5
JH
2320 if (ix86_asm_string != 0)
2321 {
1f4c2c57
MS
2322 if (! TARGET_MACHO
2323 && !strcmp (ix86_asm_string, "intel"))
c93e80a5
JH
2324 ix86_asm_dialect = ASM_INTEL;
2325 else if (!strcmp (ix86_asm_string, "att"))
2326 ix86_asm_dialect = ASM_ATT;
2327 else
2328 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2329 }
6189a572 2330 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
9e637a26 2331 error ("code model %qs not supported in the %s bit mode",
6189a572 2332 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
853a33f3 2333 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
c725bd79 2334 sorry ("%i-bit mode not compiled in",
853a33f3 2335 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
6189a572 2336
f4365627
JH
2337 for (i = 0; i < pta_size; i++)
2338 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2339 {
2340 ix86_arch = processor_alias_table[i].processor;
2341 /* Default cpu tuning to the architecture. */
9e555526 2342 ix86_tune = ix86_arch;
0a1c5e55
UB
2343
2344 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2345 error ("CPU you selected does not support x86-64 "
2346 "instruction set");
2347
f4365627 2348 if (processor_alias_table[i].flags & PTA_MMX
853a33f3
UB
2349 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2350 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
f4365627 2351 if (processor_alias_table[i].flags & PTA_3DNOW
853a33f3
UB
2352 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2353 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
f4365627 2354 if (processor_alias_table[i].flags & PTA_3DNOW_A
853a33f3
UB
2355 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2356 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
f4365627 2357 if (processor_alias_table[i].flags & PTA_SSE
853a33f3
UB
2358 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
f4365627 2360 if (processor_alias_table[i].flags & PTA_SSE2
853a33f3
UB
2361 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2362 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
5bbeea44 2363 if (processor_alias_table[i].flags & PTA_SSE3
853a33f3
UB
2364 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2365 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
b1875f52 2366 if (processor_alias_table[i].flags & PTA_SSSE3
853a33f3
UB
2367 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2368 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
9a5cee02 2369 if (processor_alias_table[i].flags & PTA_SSE4_1
853a33f3
UB
2370 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3b8dd071
L
2372 if (processor_alias_table[i].flags & PTA_SSE4_2
2373 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2374 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
0a1c5e55 2375 if (processor_alias_table[i].flags & PTA_SSE4A
853a33f3
UB
2376 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2377 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
04e1d06b
MM
2378 if (processor_alias_table[i].flags & PTA_SSE5
2379 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2380 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
0a1c5e55
UB
2381
2382 if (processor_alias_table[i].flags & PTA_ABM)
2383 x86_abm = true;
15a26abf
JJ
2384 if (processor_alias_table[i].flags & PTA_CX16)
2385 x86_cmpxchg16b = true;
0a1c5e55
UB
2386 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2387 x86_popcnt = true;
2388 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2389 x86_prefetch_sse = true;
9064c533 2390 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
3c2d980c 2391 x86_sahf = true;
8b96a312
L
2392 if (processor_alias_table[i].flags & PTA_AES)
2393 x86_aes = true;
2394 if (processor_alias_table[i].flags & PTA_PCLMUL)
2395 x86_pclmul = true;
0a1c5e55 2396
6716ecbc
JM
2397 break;
2398 }
2399
2400 if (i == pta_size)
2401 error ("bad value (%s) for -march= switch", ix86_arch_string);
2402
80fd744f
RH
2403 ix86_arch_mask = 1u << ix86_arch;
2404 for (i = 0; i < X86_ARCH_LAST; ++i)
2405 ix86_arch_features[i] &= ix86_arch_mask;
2406
6716ecbc
JM
2407 for (i = 0; i < pta_size; i++)
2408 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2409 {
2410 ix86_tune = processor_alias_table[i].processor;
4977bab6 2411 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
2412 {
2413 if (ix86_tune_defaulted)
2414 {
2415 ix86_tune_string = "x86-64";
2416 for (i = 0; i < pta_size; i++)
2417 if (! strcmp (ix86_tune_string,
2418 processor_alias_table[i].name))
2419 break;
2420 ix86_tune = processor_alias_table[i].processor;
2421 }
2422 else
2423 error ("CPU you selected does not support x86-64 "
2424 "instruction set");
2425 }
c618c6ec
JJ
2426 /* Intel CPUs have always interpreted SSE prefetch instructions as
2427 NOPs; so, we can enable SSE prefetch instructions even when
2428 -mtune (rather than -march) points us to a processor that has them.
2429 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2430 higher processors. */
0a1c5e55
UB
2431 if (TARGET_CMOVE
2432 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
c618c6ec 2433 x86_prefetch_sse = true;
f4365627
JH
2434 break;
2435 }
f4365627 2436 if (i == pta_size)
9e555526 2437 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 2438
8b96a312
L
2439 /* Enable SSE2 if AES or PCLMUL is enabled. */
2440 if ((x86_aes || x86_pclmul)
2441 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2442 {
2443 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2444 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2445 }
2446
80fd744f
RH
2447 ix86_tune_mask = 1u << ix86_tune;
2448 for (i = 0; i < X86_TUNE_LAST; ++i)
2449 ix86_tune_features[i] &= ix86_tune_mask;
0fa4c370 2450
2ab0437e
JH
2451 if (optimize_size)
2452 ix86_cost = &size_cost;
2453 else
9e555526 2454 ix86_cost = processor_target_table[ix86_tune].cost;
e075ae69 2455
36edd3cc
BS
2456 /* Arrange to set up i386_stack_locals for all functions. */
2457 init_machine_status = ix86_init_machine_status;
fce5a9f2 2458
0f290768 2459 /* Validate -mregparm= value. */
e075ae69 2460 if (ix86_regparm_string)
b08de47e 2461 {
ccf8e764
RH
2462 if (TARGET_64BIT)
2463 warning (0, "-mregparm is ignored in 64-bit mode");
400500c4
RK
2464 i = atoi (ix86_regparm_string);
2465 if (i < 0 || i > REGPARM_MAX)
2466 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2467 else
2468 ix86_regparm = i;
b08de47e 2469 }
ccf8e764
RH
2470 if (TARGET_64BIT)
2471 ix86_regparm = REGPARM_MAX;
b08de47e 2472
3e18fdf6 2473 /* If the user has provided any of the -malign-* options,
a4f31c00 2474 warn and use that value only if -falign-* is not set.
3e18fdf6 2475 Remove this code in GCC 3.2 or later. */
e075ae69 2476 if (ix86_align_loops_string)
b08de47e 2477 {
d4ee4d25 2478 warning (0, "-malign-loops is obsolete, use -falign-loops");
3e18fdf6
GK
2479 if (align_loops == 0)
2480 {
2481 i = atoi (ix86_align_loops_string);
2482 if (i < 0 || i > MAX_CODE_ALIGN)
2483 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2484 else
2485 align_loops = 1 << i;
2486 }
b08de47e 2487 }
3af4bd89 2488
e075ae69 2489 if (ix86_align_jumps_string)
b08de47e 2490 {
d4ee4d25 2491 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
3e18fdf6
GK
2492 if (align_jumps == 0)
2493 {
2494 i = atoi (ix86_align_jumps_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2497 else
2498 align_jumps = 1 << i;
2499 }
b08de47e 2500 }
b08de47e 2501
e075ae69 2502 if (ix86_align_funcs_string)
b08de47e 2503 {
d4ee4d25 2504 warning (0, "-malign-functions is obsolete, use -falign-functions");
3e18fdf6
GK
2505 if (align_functions == 0)
2506 {
2507 i = atoi (ix86_align_funcs_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2510 else
2511 align_functions = 1 << i;
2512 }
b08de47e 2513 }
3af4bd89 2514
3e18fdf6 2515 /* Default align_* from the processor table. */
3e18fdf6 2516 if (align_loops == 0)
2cca7283 2517 {
9e555526
RH
2518 align_loops = processor_target_table[ix86_tune].align_loop;
2519 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 2520 }
3e18fdf6 2521 if (align_jumps == 0)
2cca7283 2522 {
9e555526
RH
2523 align_jumps = processor_target_table[ix86_tune].align_jump;
2524 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 2525 }
3e18fdf6 2526 if (align_functions == 0)
2cca7283 2527 {
9e555526 2528 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 2529 }
3e18fdf6 2530
0f290768 2531 /* Validate -mbranch-cost= value, or provide default. */
3dd0df7f 2532 ix86_branch_cost = ix86_cost->branch_cost;
e075ae69 2533 if (ix86_branch_cost_string)
804a8ee0 2534 {
400500c4
RK
2535 i = atoi (ix86_branch_cost_string);
2536 if (i < 0 || i > 5)
2537 error ("-mbranch-cost=%d is not between 0 and 5", i);
2538 else
2539 ix86_branch_cost = i;
804a8ee0 2540 }
7dcbf659
JH
2541 if (ix86_section_threshold_string)
2542 {
2543 i = atoi (ix86_section_threshold_string);
2544 if (i < 0)
2545 error ("-mlarge-data-threshold=%d is negative", i);
2546 else
2547 ix86_section_threshold = i;
2548 }
804a8ee0 2549
f996902d
RH
2550 if (ix86_tls_dialect_string)
2551 {
2552 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2553 ix86_tls_dialect = TLS_DIALECT_GNU;
5bf5a10b
AO
2554 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2555 ix86_tls_dialect = TLS_DIALECT_GNU2;
f996902d
RH
2556 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2557 ix86_tls_dialect = TLS_DIALECT_SUN;
2558 else
2559 error ("bad value (%s) for -mtls-dialect= switch",
2560 ix86_tls_dialect_string);
2561 }
2562
577565f9
UB
2563 if (ix87_precision_string)
2564 {
2565 i = atoi (ix87_precision_string);
2566 if (i != 32 && i != 64 && i != 80)
2567 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2568 }
2569
d6b0b376
EC
2570 if (TARGET_64BIT)
2571 {
2572 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2573
2574 /* Enable by default the SSE and MMX builtins. Do allow the user to
2575 explicitly disable any of these. In particular, disabling SSE and
2576 MMX for kernel code is extremely useful. */
b26f6ed7 2577 if (!ix86_arch_specified)
d6b0b376
EC
2578 ix86_isa_flags
2579 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2580 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2581
2582 if (TARGET_RTD)
2583 warning (0, "-mrtd is ignored in 64bit mode");
2584 }
2585 else
2586 {
2587 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2588
b26f6ed7 2589 if (!ix86_arch_specified)
d6b0b376
EC
2590 ix86_isa_flags
2591 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2592
2593 /* i386 ABI does not specify red zone. It still makes sense to use it
2594 when programmer takes care to stack from being destroyed. */
2595 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2596 target_flags |= MASK_NO_RED_ZONE;
2597 }
2598
e9a25f70 2599 /* Keep nonleaf frame pointers. */
14c473b9
RS
2600 if (flag_omit_frame_pointer)
2601 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2602 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 2603 flag_omit_frame_pointer = 1;
e075ae69
RH
2604
2605 /* If we're doing fast math, we don't care about comparison order
2606 wrt NaNs. This lets us use a shorter comparison sequence. */
5a4171a0 2607 if (flag_finite_math_only)
e075ae69
RH
2608 target_flags &= ~MASK_IEEE_FP;
2609
30c99a84
RH
2610 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2611 since the insns won't need emulation. */
e39e8c36 2612 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
30c99a84
RH
2613 target_flags &= ~MASK_NO_FANCY_MATH_387;
2614
ba2baa55 2615 /* Likewise, if the target doesn't have a 387, or we've specified
0fa2e4df 2616 software floating point, don't use 387 inline intrinsics. */
ba2baa55
RS
2617 if (!TARGET_80387)
2618 target_flags |= MASK_NO_FANCY_MATH_387;
2619
a5370cf0
RH
2620 /* Turn on MMX builtins for -msse. */
2621 if (TARGET_SSE)
2622 {
853a33f3 2623 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
a5370cf0
RH
2624 x86_prefetch_sse = true;
2625 }
2626
837a8954
UB
2627 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2628 if (TARGET_SSE4_2 || TARGET_ABM)
0a1c5e55 2629 x86_popcnt = true;
21efb4d4 2630
d0655f33 2631 /* Validate -mpreferred-stack-boundary= value, or provide default.
1395ea39
L
2632 The default of 128 bits is for Pentium III's SSE __m128. We can't
2633 change it because of optimize_size. Otherwise, we can't mix object
2634 files compiled with -Os and -On. */
2635 ix86_preferred_stack_boundary = 128;
d0655f33
JM
2636 if (ix86_preferred_stack_boundary_string)
2637 {
2638 i = atoi (ix86_preferred_stack_boundary_string);
2639 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2640 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2641 TARGET_64BIT ? 4 : 2);
2642 else
2643 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2644 }
2645
1f97667f
RG
2646 /* Accept -msseregparm only if at least SSE support is enabled. */
2647 if (TARGET_SSEREGPARM
2648 && ! TARGET_SSE)
2649 error ("-msseregparm used without SSE enabled");
2650
004d3859 2651 ix86_fpmath = TARGET_FPMATH_DEFAULT;
965f5423
JH
2652 if (ix86_fpmath_string != 0)
2653 {
2654 if (! strcmp (ix86_fpmath_string, "387"))
2655 ix86_fpmath = FPMATH_387;
2656 else if (! strcmp (ix86_fpmath_string, "sse"))
2657 {
2658 if (!TARGET_SSE)
2659 {
d4ee4d25 2660 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
2661 ix86_fpmath = FPMATH_387;
2662 }
2663 else
2664 ix86_fpmath = FPMATH_SSE;
2665 }
2666 else if (! strcmp (ix86_fpmath_string, "387,sse")
2667 || ! strcmp (ix86_fpmath_string, "sse,387"))
2668 {
2669 if (!TARGET_SSE)
2670 {
d4ee4d25 2671 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
2672 ix86_fpmath = FPMATH_387;
2673 }
2674 else if (!TARGET_80387)
2675 {
d4ee4d25 2676 warning (0, "387 instruction set disabled, using SSE arithmetics");
965f5423
JH
2677 ix86_fpmath = FPMATH_SSE;
2678 }
2679 else
9415ab7d 2680 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
965f5423 2681 }
fce5a9f2 2682 else
965f5423
JH
2683 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2684 }
14f73b5a 2685
de004e6d
JS
2686 /* If the i387 is disabled, then do not return values in it. */
2687 if (!TARGET_80387)
2688 target_flags &= ~MASK_FLOAT_RETURNS;
2689
a5ea943c
RG
2690 /* Use external vectorized library in vectorizing intrinsics. */
2691 if (ix86_veclibabi_string)
2692 {
9aba5d22
UB
2693 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2694 ix86_veclib_handler = ix86_veclibabi_svml;
2695 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
a5ea943c
RG
2696 ix86_veclib_handler = ix86_veclibabi_acml;
2697 else
2698 error ("unknown vectorization library ABI type (%s) for "
2699 "-mveclibabi= switch", ix86_veclibabi_string);
2700 }
2701
e39e8c36 2702 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
9ef1b13a 2703 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
2704 && !optimize_size)
2705 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810 2706
d3073c70
RH
2707 /* ??? Unwind info is not correct around the CFG unless either a frame
2708 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2709 unwind info generation to be aware of the CFG and propagating states
2710 around edges. */
2711 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2712 || flag_exceptions || flag_non_call_exceptions)
2713 && flag_omit_frame_pointer
2714 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2715 {
2716 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717 warning (0, "unwind tables currently require either a frame pointer "
2718 "or -maccumulate-outgoing-args for correctness");
2719 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2720 }
2721
2c7c6f54
JM
2722 /* If stack probes are required, the space used for large function
2723 arguments on the stack must also be probed, so enable
2724 -maccumulate-outgoing-args so this happens in the prologue. */
2725 if (TARGET_STACK_PROBE
2726 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2727 {
2728 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2729 warning (0, "stack probing requires -maccumulate-outgoing-args "
2730 "for correctness");
2731 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2732 }
2733
80fd744f
RH
2734 /* For sane SSE instruction set generation we need fcomi instruction.
2735 It is safe to enable all CMOVE instructions. */
2736 if (TARGET_SSE)
2737 TARGET_CMOVE = 1;
2738
623fe810
RH
2739 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2740 {
2741 char *p;
2742 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2743 p = strchr (internal_label_prefix, 'X');
2744 internal_label_prefix_len = p - internal_label_prefix;
2745 *p = '\0';
2746 }
a5370cf0
RH
2747
2748 /* When scheduling description is not available, disable scheduler pass
2749 so it won't slow down the compilation and make x87 code slower. */
ad7b96a9
JH
2750 if (!TARGET_SCHEDULE)
2751 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
47eb5b32
ZD
2752
2753 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2754 set_param_value ("simultaneous-prefetches",
2755 ix86_cost->simultaneous_prefetches);
2756 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2757 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
46cb0441
ZD
2758 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2759 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2760 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2761 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
d7bd8aeb
JJ
2762
2763 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2764 can be optimized to ap = __builtin_next_arg (0). */
2765 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2766 targetm.expand_builtin_va_start = NULL;
f5316dfe
MM
2767}
2768\f
2ed941ec
RH
2769/* Return true if this goes in large data/bss. */
2770
2771static bool
2772ix86_in_large_data_p (tree exp)
2773{
2774 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2775 return false;
2776
2777 /* Functions are never large data. */
2778 if (TREE_CODE (exp) == FUNCTION_DECL)
2779 return false;
2780
2781 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2782 {
2783 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2784 if (strcmp (section, ".ldata") == 0
2785 || strcmp (section, ".lbss") == 0)
2786 return true;
2787 return false;
2788 }
2789 else
2790 {
2791 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2792
2793 /* If this is an incomplete type with size 0, then we can't put it
2794 in data because it might be too big when completed. */
2795 if (!size || size > ix86_section_threshold)
2796 return true;
2797 }
2798
2799 return false;
2800}
2801
2802/* Switch to the appropriate section for output of DECL.
7dcbf659
JH
2803 DECL is either a `VAR_DECL' node or a constant of some sort.
2804 RELOC indicates whether forming the initial value of DECL requires
2805 link-time relocations. */
2806
2ed941ec
RH
2807static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2808 ATTRIBUTE_UNUSED;
2809
d6b5193b 2810static section *
7dcbf659 2811x86_64_elf_select_section (tree decl, int reloc,
d6b5193b 2812 unsigned HOST_WIDE_INT align)
7dcbf659
JH
2813{
2814 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2815 && ix86_in_large_data_p (decl))
2816 {
2817 const char *sname = NULL;
3b10d286 2818 unsigned int flags = SECTION_WRITE;
9b580a0b 2819 switch (categorize_decl_for_section (decl, reloc))
7dcbf659
JH
2820 {
2821 case SECCAT_DATA:
2822 sname = ".ldata";
2823 break;
2824 case SECCAT_DATA_REL:
2825 sname = ".ldata.rel";
2826 break;
2827 case SECCAT_DATA_REL_LOCAL:
2828 sname = ".ldata.rel.local";
2829 break;
2830 case SECCAT_DATA_REL_RO:
2831 sname = ".ldata.rel.ro";
2832 break;
2833 case SECCAT_DATA_REL_RO_LOCAL:
2834 sname = ".ldata.rel.ro.local";
2835 break;
2836 case SECCAT_BSS:
2837 sname = ".lbss";
3b10d286 2838 flags |= SECTION_BSS;
7dcbf659
JH
2839 break;
2840 case SECCAT_RODATA:
2841 case SECCAT_RODATA_MERGE_STR:
2842 case SECCAT_RODATA_MERGE_STR_INIT:
2843 case SECCAT_RODATA_MERGE_CONST:
2844 sname = ".lrodata";
3b10d286 2845 flags = 0;
7dcbf659
JH
2846 break;
2847 case SECCAT_SRODATA:
2848 case SECCAT_SDATA:
2849 case SECCAT_SBSS:
2850 gcc_unreachable ();
2851 case SECCAT_TEXT:
2852 case SECCAT_TDATA:
2853 case SECCAT_TBSS:
2854 /* We don't split these for medium model. Place them into
2855 default sections and hope for best. */
2856 break;
feb60f03
NS
2857 case SECCAT_EMUTLS_VAR:
2858 case SECCAT_EMUTLS_TMPL:
2859 gcc_unreachable ();
7dcbf659
JH
2860 }
2861 if (sname)
3b10d286
JJ
2862 {
2863 /* We might get called with string constants, but get_named_section
2864 doesn't like them as they are not DECLs. Also, we need to set
2865 flags in that case. */
2866 if (!DECL_P (decl))
2867 return get_section (sname, flags, NULL);
2868 return get_named_section (decl, sname, reloc);
2869 }
7dcbf659 2870 }
d6b5193b 2871 return default_elf_select_section (decl, reloc, align);
7dcbf659
JH
2872}
2873
2874/* Build up a unique section name, expressed as a
2875 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2876 RELOC indicates whether the initial value of EXP requires
2877 link-time relocations. */
2878
2ed941ec 2879static void ATTRIBUTE_UNUSED
7dcbf659
JH
2880x86_64_elf_unique_section (tree decl, int reloc)
2881{
2882 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2883 && ix86_in_large_data_p (decl))
2884 {
2885 const char *prefix = NULL;
2886 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2887 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2888
9b580a0b 2889 switch (categorize_decl_for_section (decl, reloc))
7dcbf659
JH
2890 {
2891 case SECCAT_DATA:
2892 case SECCAT_DATA_REL:
2893 case SECCAT_DATA_REL_LOCAL:
2894 case SECCAT_DATA_REL_RO:
2895 case SECCAT_DATA_REL_RO_LOCAL:
feb60f03 2896 prefix = one_only ? ".ld" : ".ldata";
7dcbf659
JH
2897 break;
2898 case SECCAT_BSS:
feb60f03 2899 prefix = one_only ? ".lb" : ".lbss";
7dcbf659
JH
2900 break;
2901 case SECCAT_RODATA:
2902 case SECCAT_RODATA_MERGE_STR:
2903 case SECCAT_RODATA_MERGE_STR_INIT:
2904 case SECCAT_RODATA_MERGE_CONST:
feb60f03 2905 prefix = one_only ? ".lr" : ".lrodata";
7dcbf659
JH
2906 break;
2907 case SECCAT_SRODATA:
2908 case SECCAT_SDATA:
2909 case SECCAT_SBSS:
2910 gcc_unreachable ();
2911 case SECCAT_TEXT:
2912 case SECCAT_TDATA:
2913 case SECCAT_TBSS:
2914 /* We don't split these for medium model. Place them into
2915 default sections and hope for best. */
2916 break;
feb60f03
NS
2917 case SECCAT_EMUTLS_VAR:
2918 prefix = targetm.emutls.var_section;
2919 break;
2920 case SECCAT_EMUTLS_TMPL:
2921 prefix = targetm.emutls.tmpl_section;
2922 break;
7dcbf659
JH
2923 }
2924 if (prefix)
2925 {
feb60f03 2926 const char *name, *linkonce;
7dcbf659 2927 char *string;
7dcbf659
JH
2928
2929 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2930 name = targetm.strip_name_encoding (name);
feb60f03
NS
2931
2932 /* If we're using one_only, then there needs to be a .gnu.linkonce
2933 prefix to the section name. */
2934 linkonce = one_only ? ".gnu.linkonce" : "";
2935
2936 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2937
2938 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
7dcbf659
JH
2939 return;
2940 }
2941 }
2942 default_unique_section (decl, reloc);
2943}
2944
e81d37df 2945#ifdef COMMON_ASM_OP
7dcbf659
JH
2946/* This says how to output assembler code to declare an
2947 uninitialized external linkage data object.
2948
569b7f6a 2949 For medium model x86-64 we need to use .largecomm opcode for
7dcbf659
JH
2950 large objects. */
2951void
2952x86_elf_aligned_common (FILE *file,
2953 const char *name, unsigned HOST_WIDE_INT size,
2954 int align)
2955{
2956 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2957 && size > (unsigned int)ix86_section_threshold)
2958 fprintf (file, ".largecomm\t");
2959 else
2960 fprintf (file, "%s", COMMON_ASM_OP);
2961 assemble_name (file, name);
2962 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2963 size, align / BITS_PER_UNIT);
2964}
29c08d7c 2965#endif
2ed941ec 2966
7dcbf659
JH
2967/* Utility function for targets to use in implementing
2968 ASM_OUTPUT_ALIGNED_BSS. */
2969
2970void
2971x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2972 const char *name, unsigned HOST_WIDE_INT size,
2973 int align)
2974{
2975 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2976 && size > (unsigned int)ix86_section_threshold)
d6b5193b 2977 switch_to_section (get_named_section (decl, ".lbss", 0));
7dcbf659 2978 else
d6b5193b 2979 switch_to_section (bss_section);
7dcbf659
JH
2980 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2981#ifdef ASM_DECLARE_OBJECT_NAME
2982 last_assemble_variable_decl = decl;
2983 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2984#else
2985 /* Standard thing is just output label for the object. */
2986 ASM_OUTPUT_LABEL (file, name);
2987#endif /* ASM_DECLARE_OBJECT_NAME */
2988 ASM_OUTPUT_SKIP (file, size ? size : 1);
2989}
2990\f
32b5b1aa 2991void
b96a374d 2992optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 2993{
e9a25f70
JL
2994 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2995 make the problem with not enough registers even worse. */
32b5b1aa
SC
2996#ifdef INSN_SCHEDULING
2997 if (level > 1)
2998 flag_schedule_insns = 0;
2999#endif
55ba61f3 3000
2e3f0db6
DJ
3001 if (TARGET_MACHO)
3002 /* The Darwin libraries never set errno, so we might as well
3003 avoid calling them when that's the only reason we would. */
3004 flag_errno_math = 0;
3005
55ba61f3
JH
3006 /* The default values of these switches depend on the TARGET_64BIT
3007 that is not known at this moment. Mark these values with 2 and
3008 let user the to override these. In case there is no command line option
3009 specifying them, we will set the defaults in override_options. */
3010 if (optimize >= 1)
3011 flag_omit_frame_pointer = 2;
3012 flag_pcc_struct_return = 2;
3013 flag_asynchronous_unwind_tables = 2;
32070c7b 3014 flag_vect_cost_model = 1;
4f514514
JM
3015#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3016 SUBTARGET_OPTIMIZATION_OPTIONS;
3017#endif
32b5b1aa 3018}
b08de47e 3019\f
5fbf0217
EB
3020/* Decide whether we can make a sibling call to a function. DECL is the
3021 declaration of the function being targeted by the call and EXP is the
3022 CALL_EXPR representing the call. */
4977bab6
ZW
3023
3024static bool
b96a374d 3025ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6 3026{
f19e3a64 3027 tree func;
cb1119b7 3028 rtx a, b;
f19e3a64 3029
4977bab6
ZW
3030 /* If we are generating position-independent code, we cannot sibcall
3031 optimize any indirect call, or a direct call to a global function,
3032 as the PLT requires %ebx be live. */
010ef110 3033 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4977bab6
ZW
3034 return false;
3035
f19e3a64
JJ
3036 if (decl)
3037 func = decl;
3038 else
cb1119b7 3039 {
5039610b 3040 func = TREE_TYPE (CALL_EXPR_FN (exp));
cb1119b7
RG
3041 if (POINTER_TYPE_P (func))
3042 func = TREE_TYPE (func);
3043 }
f19e3a64 3044
cb1119b7
RG
3045 /* Check that the return value locations are the same. Like
3046 if we are returning floats on the 80387 register stack, we cannot
4977bab6 3047 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
3048 function that does or, conversely, from a function that does return
3049 a float to a function that doesn't; the necessary stack adjustment
cb1119b7 3050 would not be executed. This is also the place we notice
cac32996
RG
3051 differences in the return value ABI. Note that it is ok for one
3052 of the functions to have void return type as long as the return
3053 value of the other is passed in a register. */
cb1119b7
RG
3054 a = ix86_function_value (TREE_TYPE (exp), func, false);
3055 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3056 cfun->decl, false);
5d3018ce
RH
3057 if (STACK_REG_P (a) || STACK_REG_P (b))
3058 {
3059 if (!rtx_equal_p (a, b))
3060 return false;
3061 }
3062 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3063 ;
3064 else if (!rtx_equal_p (a, b))
4977bab6
ZW
3065 return false;
3066
3067 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 3068 register for the address of the target function. Make sure that all
4977bab6
ZW
3069 such registers are not used for passing parameters. */
3070 if (!decl && !TARGET_64BIT)
3071 {
e767b5be 3072 tree type;
4977bab6
ZW
3073
3074 /* We're looking at the CALL_EXPR, we need the type of the function. */
5039610b 3075 type = CALL_EXPR_FN (exp); /* pointer expression */
4977bab6
ZW
3076 type = TREE_TYPE (type); /* pointer type */
3077 type = TREE_TYPE (type); /* function type */
3078
e767b5be 3079 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
3080 {
3081 /* ??? Need to count the actual number of registers to be used,
3082 not the possible number of registers. Fix later. */
3083 return false;
3084 }
3085 }
3086
6cc37e7e 3087 /* Dllimport'd functions are also called indirectly. */
da489f73
RH
3088 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3089 && decl && DECL_DLLIMPORT_P (decl)
6cc37e7e
DS
3090 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3091 return false;
6cc37e7e 3092
150cdc9e
RH
3093 /* If we forced aligned the stack, then sibcalling would unalign the
3094 stack, which may break the called function. */
3095 if (cfun->machine->force_align_arg_pointer)
3096 return false;
3097
4977bab6
ZW
3098 /* Otherwise okay. That also includes certain types of indirect calls. */
3099 return true;
3100}
3101
fa283935
UB
3102/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3103 calling convention attributes;
91d231cb 3104 arguments as in struct attribute_spec.handler. */
b08de47e 3105
91d231cb 3106static tree
2f84b963
RG
3107ix86_handle_cconv_attribute (tree *node, tree name,
3108 tree args,
3109 int flags ATTRIBUTE_UNUSED,
3110 bool *no_add_attrs)
91d231cb
JM
3111{
3112 if (TREE_CODE (*node) != FUNCTION_TYPE
3113 && TREE_CODE (*node) != METHOD_TYPE
3114 && TREE_CODE (*node) != FIELD_DECL
3115 && TREE_CODE (*node) != TYPE_DECL)
3116 {
5c498b10 3117 warning (OPT_Wattributes, "%qs attribute only applies to functions",
91d231cb
JM
3118 IDENTIFIER_POINTER (name));
3119 *no_add_attrs = true;
2f84b963 3120 return NULL_TREE;
91d231cb 3121 }
2f84b963
RG
3122
3123 /* Can combine regparm with all attributes but fastcall. */
3124 if (is_attribute_p ("regparm", name))
91d231cb
JM
3125 {
3126 tree cst;
b08de47e 3127
2f84b963
RG
3128 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3129 {
3130 error ("fastcall and regparm attributes are not compatible");
3131 }
3132
91d231cb
JM
3133 cst = TREE_VALUE (args);
3134 if (TREE_CODE (cst) != INTEGER_CST)
3135 {
5c498b10
DD
3136 warning (OPT_Wattributes,
3137 "%qs attribute requires an integer constant argument",
91d231cb
JM
3138 IDENTIFIER_POINTER (name));
3139 *no_add_attrs = true;
3140 }
3141 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3142 {
5c498b10 3143 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
91d231cb
JM
3144 IDENTIFIER_POINTER (name), REGPARM_MAX);
3145 *no_add_attrs = true;
3146 }
e91f04de 3147
33932946
SH
3148 if (!TARGET_64BIT
3149 && lookup_attribute (ix86_force_align_arg_pointer_string,
3150 TYPE_ATTRIBUTES (*node))
3151 && compare_tree_int (cst, REGPARM_MAX-1))
3152 {
3153 error ("%s functions limited to %d register parameters",
3154 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3155 }
3156
2f84b963
RG
3157 return NULL_TREE;
3158 }
3159
3160 if (TARGET_64BIT)
3161 {
ccf8e764
RH
3162 /* Do not warn when emulating the MS ABI. */
3163 if (!TARGET_64BIT_MS_ABI)
3164 warning (OPT_Wattributes, "%qs attribute ignored",
3165 IDENTIFIER_POINTER (name));
2f84b963
RG
3166 *no_add_attrs = true;
3167 return NULL_TREE;
3168 }
3169
fa283935 3170 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2f84b963
RG
3171 if (is_attribute_p ("fastcall", name))
3172 {
3173 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3174 {
3175 error ("fastcall and cdecl attributes are not compatible");
3176 }
3177 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3178 {
3179 error ("fastcall and stdcall attributes are not compatible");
3180 }
3181 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3182 {
e767b5be
JH
3183 error ("fastcall and regparm attributes are not compatible");
3184 }
b08de47e
MM
3185 }
3186
fa283935
UB
3187 /* Can combine stdcall with fastcall (redundant), regparm and
3188 sseregparm. */
2f84b963
RG
3189 else if (is_attribute_p ("stdcall", name))
3190 {
3191 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3192 {
3193 error ("stdcall and cdecl attributes are not compatible");
3194 }
3195 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3196 {
3197 error ("stdcall and fastcall attributes are not compatible");
3198 }
3199 }
3200
fa283935 3201 /* Can combine cdecl with regparm and sseregparm. */
2f84b963
RG
3202 else if (is_attribute_p ("cdecl", name))
3203 {
3204 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3205 {
3206 error ("stdcall and cdecl attributes are not compatible");
3207 }
3208 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3209 {
3210 error ("fastcall and cdecl attributes are not compatible");
3211 }
3212 }
3213
fa283935 3214 /* Can combine sseregparm with all attributes. */
2f84b963 3215
91d231cb 3216 return NULL_TREE;
b08de47e
MM
3217}
3218
3219/* Return 0 if the attributes for two types are incompatible, 1 if they
3220 are compatible, and 2 if they are nearly compatible (which causes a
3221 warning to be generated). */
3222
8d8e52be 3223static int
3101faab 3224ix86_comp_type_attributes (const_tree type1, const_tree type2)
b08de47e 3225{
0f290768 3226 /* Check for mismatch of non-default calling convention. */
27c38fbe 3227 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c 3228
18ff3013
DS
3229 if (TREE_CODE (type1) != FUNCTION_TYPE
3230 && TREE_CODE (type1) != METHOD_TYPE)
afcfe58c
MM
3231 return 1;
3232
2f84b963
RG
3233 /* Check for mismatched fastcall/regparm types. */
3234 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3235 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3236 || (ix86_function_regparm (type1, NULL)
3237 != ix86_function_regparm (type2, NULL)))
3238 return 0;
3239
3240 /* Check for mismatched sseregparm types. */
3241 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3242 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
b96a374d 3243 return 0;
e91f04de 3244
afcfe58c 3245 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
3246 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3247 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac 3248 return 0;
2f84b963 3249
b08de47e
MM
3250 return 1;
3251}
b08de47e 3252\f
0fa2e4df 3253/* Return the regparm value for a function with the indicated TYPE and DECL.
e767b5be 3254 DECL may be NULL when calling function indirectly
839a4992 3255 or considering a libcall. */
483ab821
MM
3256
3257static int
3101faab 3258ix86_function_regparm (const_tree type, const_tree decl)
483ab821
MM
3259{
3260 tree attr;
e767b5be 3261 int regparm = ix86_regparm;
483ab821 3262
27183bba
UB
3263 static bool error_issued;
3264
ee2f65b4
RH
3265 if (TARGET_64BIT)
3266 return regparm;
3267
3268 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3269 if (attr)
27183bba
UB
3270 {
3271 regparm
3272 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3273
3274 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3275 {
3276 /* We can't use regparm(3) for nested functions because
3277 these pass static chain pointer in %ecx register. */
3278 if (!error_issued && regparm == 3
3279 && decl_function_context (decl)
3280 && !DECL_NO_STATIC_CHAIN (decl))
3281 {
3282 error ("nested functions are limited to 2 register parameters");
3283 error_issued = true;
3284 return 0;
3285 }
3286 }
3287
3288 return regparm;
3289 }
ee2f65b4
RH
3290
3291 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3292 return 2;
3293
3294 /* Use register calling convention for local functions when possible. */
ac97d816
UB
3295 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3296 && flag_unit_at_a_time && !profile_flag)
e767b5be 3297 {
3101faab 3298 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
b1d5455a 3299 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
ee2f65b4 3300 if (i && i->local)
e767b5be 3301 {
ee2f65b4
RH
3302 int local_regparm, globals = 0, regno;
3303 struct function *f;
e767b5be 3304
ee2f65b4 3305 /* Make sure no regparm register is taken by a
ec382b8c
UB
3306 fixed register variable. */
3307 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3308 if (fixed_regs[local_regparm])
ee2f65b4 3309 break;
e767b5be 3310
ee2f65b4
RH
3311 /* We can't use regparm(3) for nested functions as these use
3312 static chain pointer in third argument. */
3313 if (local_regparm == 3
f2f0a960
HMC
3314 && (decl_function_context (decl)
3315 || ix86_force_align_arg_pointer)
ee2f65b4
RH
3316 && !DECL_NO_STATIC_CHAIN (decl))
3317 local_regparm = 2;
3318
3319 /* If the function realigns its stackpointer, the prologue will
3320 clobber %ecx. If we've already generated code for the callee,
3321 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3322 scanning the attributes for the self-realigning property. */
3323 f = DECL_STRUCT_FUNCTION (decl);
3324 if (local_regparm == 3
3325 && (f ? !!f->machine->force_align_arg_pointer
3326 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3327 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3328 local_regparm = 2;
3329
ec382b8c
UB
3330 /* Each fixed register usage increases register pressure,
3331 so less registers should be used for argument passing.
3332 This functionality can be overriden by an explicit
3333 regparm value. */
3334 for (regno = 0; regno <= DI_REG; regno++)
3335 if (fixed_regs[regno])
ee2f65b4 3336 globals++;
ec382b8c 3337
ee2f65b4
RH
3338 local_regparm
3339 = globals < local_regparm ? local_regparm - globals : 0;
3340
3341 if (local_regparm > regparm)
3342 regparm = local_regparm;
e767b5be
JH
3343 }
3344 }
ee2f65b4 3345
e767b5be 3346 return regparm;
483ab821
MM
3347}
3348
3e0a5abd
UB
3349/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3350 DFmode (2) arguments in SSE registers for a function with the
3351 indicated TYPE and DECL. DECL may be NULL when calling function
3352 indirectly or considering a libcall. Otherwise return 0. */
2f84b963
RG
3353
3354static int
7074bc2e 3355ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
2f84b963 3356{
ee2f65b4
RH
3357 gcc_assert (!TARGET_64BIT);
3358
2f84b963
RG
3359 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3360 by the sseregparm attribute. */
1f97667f 3361 if (TARGET_SSEREGPARM
ee2f65b4 3362 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2f84b963
RG
3363 {
3364 if (!TARGET_SSE)
3365 {
7074bc2e
L
3366 if (warn)
3367 {
3368 if (decl)
3369 error ("Calling %qD with attribute sseregparm without "
3370 "SSE/SSE2 enabled", decl);
3371 else
3372 error ("Calling %qT with attribute sseregparm without "
3373 "SSE/SSE2 enabled", type);
3374 }
2f84b963
RG
3375 return 0;
3376 }
3377
3378 return 2;
3379 }
3380
56829cae 3381 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
ee2f65b4
RH
3382 (and DFmode for SSE2) arguments in SSE registers. */
3383 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2f84b963 3384 {
586de218 3385 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
b1d5455a 3386 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
2f84b963
RG
3387 if (i && i->local)
3388 return TARGET_SSE2 ? 2 : 1;
3389 }
3390
3391 return 0;
3392}
3393
f676971a 3394/* Return true if EAX is live at the start of the function. Used by
fe9f516f
RH
3395 ix86_expand_prologue to determine if we need special help before
3396 calling allocate_stack_worker. */
3397
3398static bool
3399ix86_eax_live_at_start_p (void)
3400{
3401 /* Cheat. Don't bother working forward from ix86_function_regparm
3402 to the function type to whether an actual argument is located in
3403 eax. Instead just look at cfg info, which is still close enough
3404 to correct at this point. This gives false positives for broken
3405 functions that might use uninitialized data that happens to be
3406 allocated in eax, but who cares? */
eaf7f7e7 3407 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
fe9f516f
RH
3408}
3409
b08de47e
MM
3410/* Value is the number of bytes of arguments automatically
3411 popped when returning from a subroutine call.
3412 FUNDECL is the declaration node of the function (as a tree),
3413 FUNTYPE is the data type of the function (as a tree),
3414 or for a library call it is an identifier node for the subroutine name.
3415 SIZE is the number of bytes of arguments passed on the stack.
3416
3417 On the 80386, the RTD insn may be used to pop them if the number
3418 of args is fixed, but if the number is variable then the caller
3419 must pop them all. RTD can't be used for library calls now
3420 because the library is compiled with the Unix compiler.
3421 Use of RTD is a selectable option, since it is incompatible with
3422 standard Unix calling sequences. If the option is not selected,
3423 the caller must always pop the args.
3424
3425 The attribute stdcall is equivalent to RTD on a per module basis. */
3426
3427int
b96a374d 3428ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 3429{
ee2f65b4
RH
3430 int rtd;
3431
3432 /* None of the 64-bit ABIs pop arguments. */
3433 if (TARGET_64BIT)
3434 return 0;
3435
3436 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 3437
43f3a59d 3438 /* Cdecl functions override -mrtd, and never pop the stack. */
ee2f65b4
RH
3439 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3440 {
3441 /* Stdcall and fastcall functions will pop the stack if not
3442 variable args. */
3443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3444 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3445 rtd = 1;
3446
04e1d06b 3447 if (rtd && ! stdarg_p (funtype))
ee2f65b4
RH
3448 return size;
3449 }
79325812 3450
232b8f52 3451 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 3452 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
61fec9ff 3453 && !KEEP_AGGREGATE_RETURN_POINTER)
232b8f52 3454 {
e767b5be 3455 int nregs = ix86_function_regparm (funtype, fundecl);
ee2f65b4 3456 if (nregs == 0)
232b8f52
JJ
3457 return GET_MODE_SIZE (Pmode);
3458 }
3459
3460 return 0;
b08de47e 3461}
b08de47e
MM
3462\f
3463/* Argument support functions. */
3464
53c17031
JH
3465/* Return true when register may be used to pass function parameters. */
3466bool
b96a374d 3467ix86_function_arg_regno_p (int regno)
53c17031
JH
3468{
3469 int i;
ccf8e764 3470 const int *parm_regs;
ee2f65b4 3471
53c17031 3472 if (!TARGET_64BIT)
88c6f101
HMC
3473 {
3474 if (TARGET_MACHO)
3475 return (regno < REGPARM_MAX
3476 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3477 else
3478 return (regno < REGPARM_MAX
3479 || (TARGET_MMX && MMX_REGNO_P (regno)
3480 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3481 || (TARGET_SSE && SSE_REGNO_P (regno)
3482 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3483 }
3484
3485 if (TARGET_MACHO)
3486 {
3487 if (SSE_REGNO_P (regno) && TARGET_SSE)
3488 return true;
3489 }
3490 else
3491 {
3492 if (TARGET_SSE && SSE_REGNO_P (regno)
3493 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3494 return true;
3495 }
ee2f65b4 3496
53c17031 3497 /* RAX is used as hidden argument to va_arg functions. */
29b74761 3498 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
53c17031 3499 return true;
ee2f65b4 3500
ccf8e764
RH
3501 if (TARGET_64BIT_MS_ABI)
3502 parm_regs = x86_64_ms_abi_int_parameter_registers;
3503 else
3504 parm_regs = x86_64_int_parameter_registers;
53c17031 3505 for (i = 0; i < REGPARM_MAX; i++)
ccf8e764 3506 if (regno == parm_regs[i])
53c17031
JH
3507 return true;
3508 return false;
3509}
3510
fe984136
RH
3511/* Return if we do not know how to pass TYPE solely in registers. */
3512
3513static bool
586de218 3514ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
fe984136
RH
3515{
3516 if (must_pass_in_stack_var_size_or_pad (mode, type))
3517 return true;
dcbca208
RH
3518
3519 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3520 The layout_type routine is crafty and tries to trick us into passing
3521 currently unsupported vector types on the stack by using TImode. */
3522 return (!TARGET_64BIT && mode == TImode
3523 && type && TREE_CODE (type) != VECTOR_TYPE);
fe984136
RH
3524}
3525
b08de47e
MM
3526/* Initialize a variable CUM of type CUMULATIVE_ARGS
3527 for a call to a function whose data type is FNTYPE.
3528 For a library call, FNTYPE is 0. */
3529
3530void
b96a374d
AJ
3531init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3532 tree fntype, /* tree ptr for function decl */
3533 rtx libname, /* SYMBOL_REF of library name or 0 */
3534 tree fndecl)
b08de47e 3535{
d6951cae 3536 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
ee2f65b4 3537 memset (cum, 0, sizeof (*cum));
b08de47e
MM
3538
3539 /* Set up the number of registers to use for passing arguments. */
2f84b963 3540 cum->nregs = ix86_regparm;
78fbfc4b
JB
3541 if (TARGET_SSE)
3542 cum->sse_nregs = SSE_REGPARM_MAX;
3543 if (TARGET_MMX)
3544 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
3545 cum->warn_sse = true;
3546 cum->warn_mmx = true;
d6951cae
JH
3547
3548 /* Because type might mismatch in between caller and callee, we need to
3549 use actual type of function for local calls.
3550 FIXME: cgraph_analyze can be told to actually record if function uses
3551 va_start so for local functions maybe_vaarg can be made aggressive
3552 helping K&R code.
3553 FIXME: once typesytem is fixed, we won't need this code anymore. */
3554 if (i && i->local)
3555 fntype = TREE_TYPE (fndecl);
f8024378 3556 cum->maybe_vaarg = (fntype
04e1d06b 3557 ? (!prototype_p (fntype) || stdarg_p (fntype))
f8024378 3558 : !libname);
b08de47e 3559
ee2f65b4 3560 if (!TARGET_64BIT)
e91f04de 3561 {
ee2f65b4
RH
3562 /* If there are variable arguments, then we won't pass anything
3563 in registers in 32-bit mode. */
64ceac43 3564 if (stdarg_p (fntype))
e91f04de 3565 {
ee2f65b4
RH
3566 cum->nregs = 0;
3567 cum->sse_nregs = 0;
3568 cum->mmx_nregs = 0;
3569 cum->warn_sse = 0;
3570 cum->warn_mmx = 0;
3571 return;
e91f04de 3572 }
2f84b963 3573
ee2f65b4
RH
3574 /* Use ecx and edx registers if function has fastcall attribute,
3575 else look for regparm information. */
3576 if (fntype)
b08de47e 3577 {
ee2f65b4 3578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
53c17031 3579 {
ee2f65b4
RH
3580 cum->nregs = 2;
3581 cum->fastcall = 1;
53c17031 3582 }
ee2f65b4
RH
3583 else
3584 cum->nregs = ix86_function_regparm (fntype, fndecl);
b08de47e 3585 }
f19e3a64 3586
ee2f65b4
RH
3587 /* Set up the number of SSE registers used for passing SFmode
3588 and DFmode arguments. Warn for mismatching ABI. */
7074bc2e 3589 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
ee2f65b4 3590 }
b08de47e
MM
3591}
3592
6c4ccfd8
RH
3593/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3594 But in the case of vector types, it is some vector mode.
3595
3596 When we have only some of our vector isa extensions enabled, then there
3597 are some modes for which vector_mode_supported_p is false. For these
3598 modes, the generic vector support in gcc will choose some non-vector mode
5656a184 3599 in order to implement the type. By computing the natural mode, we'll
6c4ccfd8
RH
3600 select the proper ABI location for the operand and not depend on whatever
3601 the middle-end decides to do with these vector types. */
3602
3603static enum machine_mode
586de218 3604type_natural_mode (const_tree type)
6c4ccfd8
RH
3605{
3606 enum machine_mode mode = TYPE_MODE (type);
3607
3608 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3609 {
3610 HOST_WIDE_INT size = int_size_in_bytes (type);
3611 if ((size == 8 || size == 16)
3612 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3613 && TYPE_VECTOR_SUBPARTS (type) > 1)
3614 {
3615 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3616
3617 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3618 mode = MIN_MODE_VECTOR_FLOAT;
3619 else
3620 mode = MIN_MODE_VECTOR_INT;
3621
3622 /* Get the mode which has this inner mode and number of units. */
3623 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3624 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3625 && GET_MODE_INNER (mode) == innermode)
3626 return mode;
3627
d0396b79 3628 gcc_unreachable ();
6c4ccfd8
RH
3629 }
3630 }
3631
3632 return mode;
3633}
3634
3635/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3636 this may not agree with the mode that the type system has chosen for the
3637 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3638 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3639
3640static rtx
3641gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3642 unsigned int regno)
3643{
3644 rtx tmp;
3645
3646 if (orig_mode != BLKmode)
3647 tmp = gen_rtx_REG (orig_mode, regno);
3648 else
3649 {
3650 tmp = gen_rtx_REG (mode, regno);
3651 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3652 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3653 }
3654
3655 return tmp;
3656}
3657
d1f87653 3658/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 3659 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
3660 class and assign registers accordingly. */
3661
3662/* Return the union class of CLASS1 and CLASS2.
3663 See the x86-64 PS ABI for details. */
3664
3665static enum x86_64_reg_class
b96a374d 3666merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
3667{
3668 /* Rule #1: If both classes are equal, this is the resulting class. */
3669 if (class1 == class2)
3670 return class1;
3671
3672 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3673 the other class. */
3674 if (class1 == X86_64_NO_CLASS)
3675 return class2;
3676 if (class2 == X86_64_NO_CLASS)
3677 return class1;
3678
3679 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3680 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3681 return X86_64_MEMORY_CLASS;
3682
3683 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3684 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3685 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3686 return X86_64_INTEGERSI_CLASS;
3687 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3688 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3689 return X86_64_INTEGER_CLASS;
3690
499accd7
JB
3691 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3692 MEMORY is used. */
3693 if (class1 == X86_64_X87_CLASS
3694 || class1 == X86_64_X87UP_CLASS
3695 || class1 == X86_64_COMPLEX_X87_CLASS
3696 || class2 == X86_64_X87_CLASS
3697 || class2 == X86_64_X87UP_CLASS
3698 || class2 == X86_64_COMPLEX_X87_CLASS)
53c17031
JH
3699 return X86_64_MEMORY_CLASS;
3700
3701 /* Rule #6: Otherwise class SSE is used. */
3702 return X86_64_SSE_CLASS;
3703}
3704
3705/* Classify the argument of type TYPE and mode MODE.
3706 CLASSES will be filled by the register class used to pass each word
3707 of the operand. The number of words is returned. In case the parameter
3708 should be passed in memory, 0 is returned. As a special case for zero
3709 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3710
3711 BIT_OFFSET is used internally for handling records and specifies offset
3712 of the offset in bits modulo 256 to avoid overflow cases.
3713
3714 See the x86-64 PS ABI for details.
3715*/
3716
3717static int
586de218 3718classify_argument (enum machine_mode mode, const_tree type,
b96a374d 3719 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 3720{
296e4ae8 3721 HOST_WIDE_INT bytes =
53c17031 3722 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 3723 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 3724
c60ee6f5
JH
3725 /* Variable sized entities are always passed/returned in memory. */
3726 if (bytes < 0)
3727 return 0;
3728
dafc5b82 3729 if (mode != VOIDmode
fe984136 3730 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
3731 return 0;
3732
53c17031
JH
3733 if (type && AGGREGATE_TYPE_P (type))
3734 {
3735 int i;
3736 tree field;
3737 enum x86_64_reg_class subclasses[MAX_CLASSES];
3738
3739 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3740 if (bytes > 16)
3741 return 0;
3742
3743 for (i = 0; i < words; i++)
3744 classes[i] = X86_64_NO_CLASS;
3745
3746 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3747 signalize memory class, so handle it as special case. */
3748 if (!words)
3749 {
3750 classes[0] = X86_64_NO_CLASS;
3751 return 1;
3752 }
3753
3754 /* Classify each field of record and merge classes. */
d0396b79 3755 switch (TREE_CODE (type))
53c17031 3756 {
d0396b79 3757 case RECORD_TYPE:
43f3a59d 3758 /* And now merge the fields of structure. */
53c17031
JH
3759 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3760 {
3761 if (TREE_CODE (field) == FIELD_DECL)
3762 {
3763 int num;
3764
f7360901
VR
3765 if (TREE_TYPE (field) == error_mark_node)
3766 continue;
3767
53c17031
JH
3768 /* Bitfields are always classified as integer. Handle them
3769 early, since later code would consider them to be
3770 misaligned integers. */
3771 if (DECL_BIT_FIELD (field))
3772 {
9286af97
JH
3773 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3774 i < ((int_bit_position (field) + (bit_offset % 64))
53c17031 3775 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 3776 + 63) / 8 / 8; i++)
53c17031
JH
3777 classes[i] =
3778 merge_classes (X86_64_INTEGER_CLASS,
3779 classes[i]);
3780 }
3781 else
3782 {
3783 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3784 TREE_TYPE (field), subclasses,
3785 (int_bit_position (field)
3786 + bit_offset) % 256);
3787 if (!num)
3788 return 0;
3789 for (i = 0; i < num; i++)
3790 {
3791 int pos =
db01f480 3792 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
3793 classes[i + pos] =
3794 merge_classes (subclasses[i], classes[i + pos]);
3795 }
3796 }
3797 }
3798 }
d0396b79 3799 break;
91ea38f9 3800
d0396b79
NS
3801 case ARRAY_TYPE:
3802 /* Arrays are handled as small records. */
3803 {
3804 int num;
3805 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3806 TREE_TYPE (type), subclasses, bit_offset);
3807 if (!num)
3808 return 0;
91ea38f9 3809
d0396b79
NS
3810 /* The partial classes are now full classes. */
3811 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3812 subclasses[0] = X86_64_SSE_CLASS;
3813 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3814 subclasses[0] = X86_64_INTEGER_CLASS;
5656a184 3815
d0396b79
NS
3816 for (i = 0; i < words; i++)
3817 classes[i] = subclasses[i % num];
5656a184 3818
d0396b79
NS
3819 break;
3820 }
3821 case UNION_TYPE:
3822 case QUAL_UNION_TYPE:
3823 /* Unions are similar to RECORD_TYPE but offset is always 0.
3824 */
53c17031
JH
3825 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3826 {
3827 if (TREE_CODE (field) == FIELD_DECL)
3828 {
3829 int num;
118ed72a
VR
3830
3831 if (TREE_TYPE (field) == error_mark_node)
3832 continue;
3833
53c17031
JH
3834 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3835 TREE_TYPE (field), subclasses,
3836 bit_offset);
3837 if (!num)
3838 return 0;
3839 for (i = 0; i < num; i++)
3840 classes[i] = merge_classes (subclasses[i], classes[i]);
3841 }
3842 }
d0396b79
NS
3843 break;
3844
3845 default:
3846 gcc_unreachable ();
53c17031 3847 }
53c17031
JH
3848
3849 /* Final merger cleanup. */
3850 for (i = 0; i < words; i++)
3851 {
3852 /* If one class is MEMORY, everything should be passed in
3853 memory. */
3854 if (classes[i] == X86_64_MEMORY_CLASS)
3855 return 0;
3856
d6a7951f 3857 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
3858 X86_64_SSE_CLASS. */
3859 if (classes[i] == X86_64_SSEUP_CLASS
3860 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3861 classes[i] = X86_64_SSE_CLASS;
3862
d6a7951f 3863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
3864 if (classes[i] == X86_64_X87UP_CLASS
3865 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3866 classes[i] = X86_64_SSE_CLASS;
3867 }
3868 return words;
3869 }
3870
3871 /* Compute alignment needed. We align all types to natural boundaries with
3872 exception of XFmode that is aligned to 64bits. */
3873 if (mode != VOIDmode && mode != BLKmode)
3874 {
3875 int mode_alignment = GET_MODE_BITSIZE (mode);
3876
3877 if (mode == XFmode)
3878 mode_alignment = 128;
3879 else if (mode == XCmode)
3880 mode_alignment = 256;
2c6b27c3
JH
3881 if (COMPLEX_MODE_P (mode))
3882 mode_alignment /= 2;
f5143c46 3883 /* Misaligned fields are always returned in memory. */
53c17031
JH
3884 if (bit_offset % mode_alignment)
3885 return 0;
3886 }
3887
9e9fb0ce 3888 /* for V1xx modes, just use the base mode */
10a97ae6 3889 if (VECTOR_MODE_P (mode) && mode != V1DImode
9e9fb0ce
JB
3890 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3891 mode = GET_MODE_INNER (mode);
3892
53c17031
JH
3893 /* Classification of atomic types. */
3894 switch (mode)
3895 {
a81083b2
BE
3896 case SDmode:
3897 case DDmode:
3898 classes[0] = X86_64_SSE_CLASS;
3899 return 1;
3900 case TDmode:
3901 classes[0] = X86_64_SSE_CLASS;
3902 classes[1] = X86_64_SSEUP_CLASS;
3903 return 2;
53c17031
JH
3904 case DImode:
3905 case SImode:
3906 case HImode:
3907 case QImode:
3908 case CSImode:
3909 case CHImode:
3910 case CQImode:
3911 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3912 classes[0] = X86_64_INTEGERSI_CLASS;
3913 else
3914 classes[0] = X86_64_INTEGER_CLASS;
3915 return 1;
3916 case CDImode:
3917 case TImode:
3918 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3919 return 2;
3920 case CTImode:
9e9fb0ce 3921 return 0;
53c17031
JH
3922 case SFmode:
3923 if (!(bit_offset % 64))
3924 classes[0] = X86_64_SSESF_CLASS;
3925 else
3926 classes[0] = X86_64_SSE_CLASS;
3927 return 1;
3928 case DFmode:
3929 classes[0] = X86_64_SSEDF_CLASS;
3930 return 1;
f8a1ebc6 3931 case XFmode:
53c17031
JH
3932 classes[0] = X86_64_X87_CLASS;
3933 classes[1] = X86_64_X87UP_CLASS;
3934 return 2;
f8a1ebc6 3935 case TFmode:
9e9fb0ce
JB
3936 classes[0] = X86_64_SSE_CLASS;
3937 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
3938 return 2;
3939 case SCmode:
3940 classes[0] = X86_64_SSE_CLASS;
3941 return 1;
9e9fb0ce
JB
3942 case DCmode:
3943 classes[0] = X86_64_SSEDF_CLASS;
3944 classes[1] = X86_64_SSEDF_CLASS;
3945 return 2;
3946 case XCmode:
499accd7
JB
3947 classes[0] = X86_64_COMPLEX_X87_CLASS;
3948 return 1;
9e9fb0ce 3949 case TCmode:
499accd7 3950 /* This modes is larger than 16 bytes. */
9e9fb0ce 3951 return 0;
e95d6b23
JH
3952 case V4SFmode:
3953 case V4SImode:
495333a6
JH
3954 case V16QImode:
3955 case V8HImode:
3956 case V2DFmode:
3957 case V2DImode:
e95d6b23
JH
3958 classes[0] = X86_64_SSE_CLASS;
3959 classes[1] = X86_64_SSEUP_CLASS;
3960 return 2;
10a97ae6 3961 case V1DImode:
e95d6b23
JH
3962 case V2SFmode:
3963 case V2SImode:
3964 case V4HImode:
3965 case V8QImode:
9e9fb0ce
JB
3966 classes[0] = X86_64_SSE_CLASS;
3967 return 1;
53c17031 3968 case BLKmode:
e95d6b23 3969 case VOIDmode:
53c17031
JH
3970 return 0;
3971 default:
d0396b79 3972 gcc_assert (VECTOR_MODE_P (mode));
5656a184 3973
d0396b79
NS
3974 if (bytes > 16)
3975 return 0;
5656a184 3976
d0396b79 3977 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5656a184 3978
d0396b79
NS
3979 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3980 classes[0] = X86_64_INTEGERSI_CLASS;
3981 else
3982 classes[0] = X86_64_INTEGER_CLASS;
3983 classes[1] = X86_64_INTEGER_CLASS;
3984 return 1 + (bytes > 8);
53c17031
JH
3985 }
3986}
3987
3988/* Examine the argument and return set number of register required in each
f5143c46 3989 class. Return 0 iff parameter should be passed in memory. */
53c17031 3990static int
586de218 3991examine_argument (enum machine_mode mode, const_tree type, int in_return,
b96a374d 3992 int *int_nregs, int *sse_nregs)
53c17031 3993{
9415ab7d
TN
3994 enum x86_64_reg_class regclass[MAX_CLASSES];
3995 int n = classify_argument (mode, type, regclass, 0);
53c17031
JH
3996
3997 *int_nregs = 0;
3998 *sse_nregs = 0;
3999 if (!n)
4000 return 0;
4001 for (n--; n >= 0; n--)
9415ab7d 4002 switch (regclass[n])
53c17031
JH
4003 {
4004 case X86_64_INTEGER_CLASS:
4005 case X86_64_INTEGERSI_CLASS:
4006 (*int_nregs)++;
4007 break;
4008 case X86_64_SSE_CLASS:
4009 case X86_64_SSESF_CLASS:
4010 case X86_64_SSEDF_CLASS:
4011 (*sse_nregs)++;
4012 break;
4013 case X86_64_NO_CLASS:
4014 case X86_64_SSEUP_CLASS:
4015 break;
4016 case X86_64_X87_CLASS:
4017 case X86_64_X87UP_CLASS:
4018 if (!in_return)
4019 return 0;
4020 break;
499accd7
JB
4021 case X86_64_COMPLEX_X87_CLASS:
4022 return in_return ? 2 : 0;
53c17031 4023 case X86_64_MEMORY_CLASS:
d0396b79 4024 gcc_unreachable ();
53c17031
JH
4025 }
4026 return 1;
4027}
6c4ccfd8 4028
53c17031
JH
4029/* Construct container for the argument used by GCC interface. See
4030 FUNCTION_ARG for the detailed description. */
6c4ccfd8 4031
53c17031 4032static rtx
6c4ccfd8 4033construct_container (enum machine_mode mode, enum machine_mode orig_mode,
586de218 4034 const_tree type, int in_return, int nintregs, int nsseregs,
6c4ccfd8 4035 const int *intreg, int sse_regno)
53c17031 4036{
94e76332
RS
4037 /* The following variables hold the static issued_error state. */
4038 static bool issued_sse_arg_error;
4039 static bool issued_sse_ret_error;
4040 static bool issued_x87_ret_error;
4041
53c17031
JH
4042 enum machine_mode tmpmode;
4043 int bytes =
4044 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
9415ab7d 4045 enum x86_64_reg_class regclass[MAX_CLASSES];
53c17031
JH
4046 int n;
4047 int i;
4048 int nexps = 0;
4049 int needed_sseregs, needed_intregs;
4050 rtx exp[MAX_CLASSES];
4051 rtx ret;
4052
9415ab7d 4053 n = classify_argument (mode, type, regclass, 0);
53c17031
JH
4054 if (!n)
4055 return NULL;
6c4ccfd8
RH
4056 if (!examine_argument (mode, type, in_return, &needed_intregs,
4057 &needed_sseregs))
53c17031
JH
4058 return NULL;
4059 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4060 return NULL;
4061
a5370cf0
RH
4062 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4063 some less clueful developer tries to use floating-point anyway. */
4064 if (needed_sseregs && !TARGET_SSE)
4065 {
94e76332 4066 if (in_return)
a5370cf0 4067 {
94e76332
RS
4068 if (!issued_sse_ret_error)
4069 {
4070 error ("SSE register return with SSE disabled");
4071 issued_sse_ret_error = true;
4072 }
4073 }
4074 else if (!issued_sse_arg_error)
4075 {
4076 error ("SSE register argument with SSE disabled");
4077 issued_sse_arg_error = true;
a5370cf0
RH
4078 }
4079 return NULL;
4080 }
4081
94e76332
RS
4082 /* Likewise, error if the ABI requires us to return values in the
4083 x87 registers and the user specified -mno-80387. */
4084 if (!TARGET_80387 && in_return)
4085 for (i = 0; i < n; i++)
9415ab7d
TN
4086 if (regclass[i] == X86_64_X87_CLASS
4087 || regclass[i] == X86_64_X87UP_CLASS
4088 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
94e76332
RS
4089 {
4090 if (!issued_x87_ret_error)
4091 {
4092 error ("x87 register return with x87 disabled");
4093 issued_x87_ret_error = true;
4094 }
4095 return NULL;
4096 }
4097
53c17031
JH
4098 /* First construct simple cases. Avoid SCmode, since we want to use
4099 single register to pass this type. */
4100 if (n == 1 && mode != SCmode)
9415ab7d 4101 switch (regclass[0])
53c17031
JH
4102 {
4103 case X86_64_INTEGER_CLASS:
4104 case X86_64_INTEGERSI_CLASS:
4105 return gen_rtx_REG (mode, intreg[0]);
4106 case X86_64_SSE_CLASS:
4107 case X86_64_SSESF_CLASS:
4108 case X86_64_SSEDF_CLASS:
6c4ccfd8 4109 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
53c17031 4110 case X86_64_X87_CLASS:
499accd7 4111 case X86_64_COMPLEX_X87_CLASS:
53c17031
JH
4112 return gen_rtx_REG (mode, FIRST_STACK_REG);
4113 case X86_64_NO_CLASS:
4114 /* Zero sized array, struct or class. */
4115 return NULL;
4116 default:
d0396b79 4117 gcc_unreachable ();
53c17031 4118 }
9415ab7d
TN
4119 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4120 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
e95d6b23 4121 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
ee2f65b4 4122
53c17031 4123 if (n == 2
9415ab7d 4124 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
f8a1ebc6 4125 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
9415ab7d
TN
4126 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4127 && regclass[1] == X86_64_INTEGER_CLASS
f8a1ebc6 4128 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
4129 && intreg[0] + 1 == intreg[1])
4130 return gen_rtx_REG (mode, intreg[0]);
53c17031
JH
4131
4132 /* Otherwise figure out the entries of the PARALLEL. */
4133 for (i = 0; i < n; i++)
4134 {
9415ab7d 4135 switch (regclass[i])
53c17031
JH
4136 {
4137 case X86_64_NO_CLASS:
4138 break;
4139 case X86_64_INTEGER_CLASS:
4140 case X86_64_INTEGERSI_CLASS:
d1f87653 4141 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
4142 if (i * 8 + 8 > bytes)
4143 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
9415ab7d 4144 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
53c17031
JH
4145 tmpmode = SImode;
4146 else
4147 tmpmode = DImode;
4148 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4149 if (tmpmode == BLKmode)
4150 tmpmode = DImode;
4151 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4152 gen_rtx_REG (tmpmode, *intreg),
4153 GEN_INT (i*8));
4154 intreg++;
4155 break;
4156 case X86_64_SSESF_CLASS:
4157 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4158 gen_rtx_REG (SFmode,
4159 SSE_REGNO (sse_regno)),
4160 GEN_INT (i*8));
4161 sse_regno++;
4162 break;
4163 case X86_64_SSEDF_CLASS:
4164 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4165 gen_rtx_REG (DFmode,
4166 SSE_REGNO (sse_regno)),
4167 GEN_INT (i*8));
4168 sse_regno++;
4169 break;
4170 case X86_64_SSE_CLASS:
9415ab7d 4171 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
12f5c45e 4172 tmpmode = TImode;
53c17031
JH
4173 else
4174 tmpmode = DImode;
4175 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4176 gen_rtx_REG (tmpmode,
4177 SSE_REGNO (sse_regno)),
4178 GEN_INT (i*8));
12f5c45e
JH
4179 if (tmpmode == TImode)
4180 i++;
53c17031
JH
4181 sse_regno++;
4182 break;
4183 default:
d0396b79 4184 gcc_unreachable ();
53c17031
JH
4185 }
4186 }
1b803355
JJ
4187
4188 /* Empty aligned struct, union or class. */
4189 if (nexps == 0)
4190 return NULL;
4191
53c17031
JH
4192 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4193 for (i = 0; i < nexps; i++)
4194 XVECEXP (ret, 0, i) = exp [i];
4195 return ret;
4196}
4197
ee2f65b4
RH
4198/* Update the data in CUM to advance over an argument of mode MODE
4199 and data type TYPE. (TYPE is null for libcalls where that information
4200 may not be available.) */
b08de47e 4201
ee2f65b4
RH
4202static void
4203function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4204 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
b08de47e 4205{
ee2f65b4
RH
4206 switch (mode)
4207 {
4208 default:
4209 break;
b08de47e 4210
ee2f65b4
RH
4211 case BLKmode:
4212 if (bytes < 0)
4213 break;
4214 /* FALLTHRU */
b3a1ca49 4215
ee2f65b4
RH
4216 case DImode:
4217 case SImode:
4218 case HImode:
4219 case QImode:
4220 cum->words += words;
4221 cum->nregs -= words;
4222 cum->regno += words;
b3a1ca49 4223
ee2f65b4 4224 if (cum->nregs <= 0)
82a127a9 4225 {
ee2f65b4
RH
4226 cum->nregs = 0;
4227 cum->regno = 0;
82a127a9 4228 }
ee2f65b4 4229 break;
b3a1ca49 4230
ee2f65b4
RH
4231 case DFmode:
4232 if (cum->float_in_sse < 2)
4233 break;
4234 case SFmode:
4235 if (cum->float_in_sse < 1)
4236 break;
4237 /* FALLTHRU */
f19e3a64 4238
ee2f65b4
RH
4239 case TImode:
4240 case V16QImode:
4241 case V8HImode:
4242 case V4SImode:
4243 case V2DImode:
4244 case V4SFmode:
4245 case V2DFmode:
4246 if (!type || !AGGREGATE_TYPE_P (type))
4247 {
4248 cum->sse_words += words;
4249 cum->sse_nregs -= 1;
4250 cum->sse_regno += 1;
4251 if (cum->sse_nregs <= 0)
b3a1ca49 4252 {
ee2f65b4
RH
4253 cum->sse_nregs = 0;
4254 cum->sse_regno = 0;
b3a1ca49 4255 }
ee2f65b4
RH
4256 }
4257 break;
b3a1ca49 4258
ee2f65b4
RH
4259 case V8QImode:
4260 case V4HImode:
4261 case V2SImode:
4262 case V2SFmode:
10a97ae6 4263 case V1DImode:
ee2f65b4
RH
4264 if (!type || !AGGREGATE_TYPE_P (type))
4265 {
4266 cum->mmx_words += words;
4267 cum->mmx_nregs -= 1;
4268 cum->mmx_regno += 1;
4269 if (cum->mmx_nregs <= 0)
b3a1ca49 4270 {
ee2f65b4
RH
4271 cum->mmx_nregs = 0;
4272 cum->mmx_regno = 0;
b3a1ca49 4273 }
82a127a9 4274 }
ee2f65b4 4275 break;
82a127a9 4276 }
b08de47e
MM
4277}
4278
ee2f65b4
RH
4279static void
4280function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4281 tree type, HOST_WIDE_INT words)
4282{
4283 int int_nregs, sse_nregs;
4284
4285 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4286 cum->words += words;
4287 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4288 {
4289 cum->nregs -= int_nregs;
4290 cum->sse_nregs -= sse_nregs;
4291 cum->regno += int_nregs;
4292 cum->sse_regno += sse_nregs;
4293 }
4294 else
4295 cum->words += words;
4296}
4297
ccf8e764
RH
4298static void
4299function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4300 HOST_WIDE_INT words)
4301{
4302 /* Otherwise, this should be passed indirect. */
4303 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4304
4305 cum->words += words;
4306 if (cum->nregs > 0)
4307 {
4308 cum->nregs -= 1;
4309 cum->regno += 1;
4310 }
4311}
4312
ee2f65b4
RH
4313void
4314function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4315 tree type, int named ATTRIBUTE_UNUSED)
4316{
4317 HOST_WIDE_INT bytes, words;
4318
4319 if (mode == BLKmode)
4320 bytes = int_size_in_bytes (type);
4321 else
4322 bytes = GET_MODE_SIZE (mode);
4323 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4324
4325 if (type)
4326 mode = type_natural_mode (type);
4327
ccf8e764
RH
4328 if (TARGET_64BIT_MS_ABI)
4329 function_arg_advance_ms_64 (cum, bytes, words);
4330 else if (TARGET_64BIT)
ee2f65b4
RH
4331 function_arg_advance_64 (cum, mode, type, words);
4332 else
4333 function_arg_advance_32 (cum, mode, type, bytes, words);
4334}
4335
b08de47e
MM
4336/* Define where to put the arguments to a function.
4337 Value is zero to push the argument on the stack,
4338 or a hard register in which to store the argument.
4339
4340 MODE is the argument's machine mode.
4341 TYPE is the data type of the argument (as a tree).
4342 This is null for libcalls where that information may
4343 not be available.
4344 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4345 the preceding args and about the function being called.
4346 NAMED is nonzero if this argument is a named parameter
4347 (otherwise it is an extra parameter matching an ellipsis). */
4348
ee2f65b4
RH
4349static rtx
4350function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4351 enum machine_mode orig_mode, tree type,
4352 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
b08de47e 4353{
bcf17554 4354 static bool warnedsse, warnedmmx;
b08de47e 4355
ee2f65b4 4356 /* Avoid the AL settings for the Unix64 ABI. */
32ee7d1d 4357 if (mode == VOIDmode)
ee2f65b4
RH
4358 return constm1_rtx;
4359
4360 switch (mode)
b08de47e 4361 {
ee2f65b4
RH
4362 default:
4363 break;
4364
4365 case BLKmode:
4366 if (bytes < 0)
53c17031 4367 break;
ee2f65b4
RH
4368 /* FALLTHRU */
4369 case DImode:
4370 case SImode:
4371 case HImode:
4372 case QImode:
4373 if (words <= cum->nregs)
4374 {
4375 int regno = cum->regno;
53c17031 4376
ee2f65b4 4377 /* Fastcall allocates the first two DWORD (SImode) or
87300e8c
L
4378 smaller arguments to ECX and EDX if it isn't an
4379 aggregate type . */
ee2f65b4
RH
4380 if (cum->fastcall)
4381 {
87300e8c
L
4382 if (mode == BLKmode
4383 || mode == DImode
4384 || (type && AGGREGATE_TYPE_P (type)))
ee2f65b4 4385 break;
b96a374d 4386
ee2f65b4 4387 /* ECX not EAX is the first allocated register. */
29b74761
UB
4388 if (regno == AX_REG)
4389 regno = CX_REG;
ee2f65b4
RH
4390 }
4391 return gen_rtx_REG (mode, regno);
4392 }
4393 break;
b96a374d 4394
ee2f65b4
RH
4395 case DFmode:
4396 if (cum->float_in_sse < 2)
bcf17554 4397 break;
ee2f65b4
RH
4398 case SFmode:
4399 if (cum->float_in_sse < 1)
53c17031 4400 break;
ee2f65b4
RH
4401 /* FALLTHRU */
4402 case TImode:
4403 case V16QImode:
4404 case V8HImode:
4405 case V4SImode:
4406 case V2DImode:
4407 case V4SFmode:
4408 case V2DFmode:
4409 if (!type || !AGGREGATE_TYPE_P (type))
4410 {
4411 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4412 {
4413 warnedsse = true;
4414 warning (0, "SSE vector argument without SSE enabled "
4415 "changes the ABI");
4416 }
4417 if (cum->sse_nregs)
4418 return gen_reg_or_parallel (mode, orig_mode,
4419 cum->sse_regno + FIRST_SSE_REG);
4420 }
4421 break;
b08de47e 4422
ee2f65b4
RH
4423 case V8QImode:
4424 case V4HImode:
4425 case V2SImode:
4426 case V2SFmode:
10a97ae6 4427 case V1DImode:
ee2f65b4
RH
4428 if (!type || !AGGREGATE_TYPE_P (type))
4429 {
4430 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4431 {
4432 warnedmmx = true;
4433 warning (0, "MMX vector argument without MMX enabled "
4434 "changes the ABI");
4435 }
4436 if (cum->mmx_nregs)
4437 return gen_reg_or_parallel (mode, orig_mode,
4438 cum->mmx_regno + FIRST_MMX_REG);
4439 }
4440 break;
4441 }
b08de47e 4442
ee2f65b4
RH
4443 return NULL_RTX;
4444}
b08de47e 4445
ee2f65b4
RH
4446static rtx
4447function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4448 enum machine_mode orig_mode, tree type)
4449{
4450 /* Handle a hidden AL argument containing number of registers
4451 for varargs x86-64 functions. */
4452 if (mode == VOIDmode)
4453 return GEN_INT (cum->maybe_vaarg
4454 ? (cum->sse_nregs < 0
4455 ? SSE_REGPARM_MAX
4456 : cum->sse_regno)
4457 : -1);
4458
4459 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4460 cum->sse_nregs,
4461 &x86_64_int_parameter_registers [cum->regno],
4462 cum->sse_regno);
4463}
b08de47e 4464
ccf8e764
RH
4465static rtx
4466function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
893125e0
KT
4467 enum machine_mode orig_mode, int named,
4468 HOST_WIDE_INT bytes)
ccf8e764
RH
4469{
4470 unsigned int regno;
4471
4472 /* Avoid the AL settings for the Unix64 ABI. */
4473 if (mode == VOIDmode)
4474 return constm1_rtx;
4475
4476 /* If we've run out of registers, it goes on the stack. */
4477 if (cum->nregs == 0)
4478 return NULL_RTX;
4479
4480 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4481
4482 /* Only floating point modes are passed in anything but integer regs. */
4483 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4484 {
4485 if (named)
4486 regno = cum->regno + FIRST_SSE_REG;
4487 else
4488 {
4489 rtx t1, t2;
4490
4491 /* Unnamed floating parameters are passed in both the
4492 SSE and integer registers. */
4493 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4494 t2 = gen_rtx_REG (mode, regno);
4495 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4496 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4497 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4498 }
4499 }
893125e0
KT
4500 /* Handle aggregated types passed in register. */
4501 if (orig_mode == BLKmode)
4502 {
4503 if (bytes > 0 && bytes <= 8)
4504 mode = (bytes > 4 ? DImode : SImode);
4505 if (mode == BLKmode)
4506 mode = DImode;
4507 }
ccf8e764
RH
4508
4509 return gen_reg_or_parallel (mode, orig_mode, regno);
4510}
4511
ee2f65b4
RH
4512rtx
4513function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
ccf8e764 4514 tree type, int named)
ee2f65b4
RH
4515{
4516 enum machine_mode mode = omode;
4517 HOST_WIDE_INT bytes, words;
4518
4519 if (mode == BLKmode)
4520 bytes = int_size_in_bytes (type);
4521 else
4522 bytes = GET_MODE_SIZE (mode);
4523 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4524
4525 /* To simplify the code below, represent vector types with a vector mode
4526 even if MMX/SSE are not active. */
4527 if (type && TREE_CODE (type) == VECTOR_TYPE)
4528 mode = type_natural_mode (type);
4529
ccf8e764 4530 if (TARGET_64BIT_MS_ABI)
893125e0 4531 return function_arg_ms_64 (cum, mode, omode, named, bytes);
ccf8e764 4532 else if (TARGET_64BIT)
ee2f65b4
RH
4533 return function_arg_64 (cum, mode, omode, type);
4534 else
4535 return function_arg_32 (cum, mode, omode, type, bytes, words);
b08de47e 4536}
53c17031 4537
09b2e78d
ZD
4538/* A C expression that indicates when an argument must be passed by
4539 reference. If nonzero for an argument, a copy of that argument is
4540 made in memory and a pointer to the argument is passed instead of
4541 the argument itself. The pointer is passed in whatever way is
4542 appropriate for passing a pointer to that type. */
4543
8cd5a4e0
RH
4544static bool
4545ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4546 enum machine_mode mode ATTRIBUTE_UNUSED,
586de218 4547 const_tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d 4548{
893125e0 4549 /* See Windows x64 Software Convention. */
ccf8e764
RH
4550 if (TARGET_64BIT_MS_ABI)
4551 {
893125e0 4552 int msize = (int) GET_MODE_SIZE (mode);
ccf8e764
RH
4553 if (type)
4554 {
4555 /* Arrays are passed by reference. */
4556 if (TREE_CODE (type) == ARRAY_TYPE)
4557 return true;
4558
4559 if (AGGREGATE_TYPE_P (type))
4560 {
4561 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4562 are passed by reference. */
893125e0 4563 msize = int_size_in_bytes (type);
ccf8e764
RH
4564 }
4565 }
4566
4567 /* __m128 is passed by reference. */
893125e0
KT
4568 switch (msize) {
4569 case 1: case 2: case 4: case 8:
4570 break;
4571 default:
4572 return true;
4573 }
ccf8e764
RH
4574 }
4575 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
ee2f65b4 4576 return 1;
09b2e78d
ZD
4577
4578 return 0;
4579}
4580
8b978a57 4581/* Return true when TYPE should be 128bit aligned for 32bit argument passing
90d5887b 4582 ABI. Only called if TARGET_SSE. */
8b978a57 4583static bool
b96a374d 4584contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
4585{
4586 enum machine_mode mode = TYPE_MODE (type);
4587 if (SSE_REG_MODE_P (mode)
4588 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4589 return true;
4590 if (TYPE_ALIGN (type) < 128)
4591 return false;
4592
4593 if (AGGREGATE_TYPE_P (type))
4594 {
2a43945f 4595 /* Walk the aggregates recursively. */
d0396b79 4596 switch (TREE_CODE (type))
8b978a57 4597 {
d0396b79
NS
4598 case RECORD_TYPE:
4599 case UNION_TYPE:
4600 case QUAL_UNION_TYPE:
4601 {
4602 tree field;
5656a184 4603
1faf92ae 4604 /* Walk all the structure fields. */
d0396b79
NS
4605 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4606 {
4607 if (TREE_CODE (field) == FIELD_DECL
4608 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
fa743e8c 4609 return true;
d0396b79
NS
4610 }
4611 break;
4612 }
4613
4614 case ARRAY_TYPE:
4615 /* Just for use if some languages passes arrays by value. */
8b978a57
JH
4616 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4617 return true;
5139c66b 4618 break;
5656a184 4619
d0396b79
NS
4620 default:
4621 gcc_unreachable ();
8b978a57 4622 }
8b978a57
JH
4623 }
4624 return false;
4625}
4626
bb498ea3
AH
4627/* Gives the alignment boundary, in bits, of an argument with the
4628 specified mode and type. */
53c17031
JH
4629
4630int
b96a374d 4631ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
4632{
4633 int align;
53c17031
JH
4634 if (type)
4635 align = TYPE_ALIGN (type);
4636 else
4637 align = GET_MODE_ALIGNMENT (mode);
4638 if (align < PARM_BOUNDARY)
4639 align = PARM_BOUNDARY;
c84555eb
L
4640 /* Decimal floating point is aligned to its natural boundary. */
4641 if (!TARGET_64BIT && !VALID_DFP_MODE_P (mode))
8b978a57
JH
4642 {
4643 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4644 make an exception for SSE modes since these require 128bit
b96a374d 4645 alignment.
8b978a57
JH
4646
4647 The handling here differs from field_alignment. ICC aligns MMX
4648 arguments to 4 byte boundaries, while structure fields are aligned
4649 to 8 byte boundaries. */
78fbfc4b
JB
4650 if (!TARGET_SSE)
4651 align = PARM_BOUNDARY;
4652 else if (!type)
8b978a57
JH
4653 {
4654 if (!SSE_REG_MODE_P (mode))
4655 align = PARM_BOUNDARY;
4656 }
4657 else
4658 {
4659 if (!contains_128bit_aligned_vector_p (type))
4660 align = PARM_BOUNDARY;
4661 }
8b978a57 4662 }
35dd7cc3
L
4663 if (align > BIGGEST_ALIGNMENT)
4664 align = BIGGEST_ALIGNMENT;
53c17031
JH
4665 return align;
4666}
4667
4668/* Return true if N is a possible register number of function value. */
ee2f65b4 4669
53c17031 4670bool
b96a374d 4671ix86_function_value_regno_p (int regno)
53c17031 4672{
ee2f65b4 4673 switch (regno)
88c6f101 4674 {
ee2f65b4
RH
4675 case 0:
4676 return true;
aa941a60 4677
ee2f65b4 4678 case FIRST_FLOAT_REG:
ccf8e764
RH
4679 if (TARGET_64BIT_MS_ABI)
4680 return false;
ee2f65b4 4681 return TARGET_FLOAT_RETURNS_IN_80387;
aa941a60 4682
ee2f65b4
RH
4683 case FIRST_SSE_REG:
4684 return TARGET_SSE;
4685
4686 case FIRST_MMX_REG:
4687 if (TARGET_MACHO || TARGET_64BIT)
4688 return false;
4689 return TARGET_MMX;
88c6f101 4690 }
ee2f65b4
RH
4691
4692 return false;
53c17031
JH
4693}
4694
4695/* Define how to find the value returned by a function.
4696 VALTYPE is the data type of the value (as a tree).
4697 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4698 otherwise, FUNC is 0. */
ee2f65b4
RH
4699
4700static rtx
4701function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
586de218 4702 const_tree fntype, const_tree fn)
53c17031 4703{
ee2f65b4 4704 unsigned int regno;
b3a1ca49 4705
ee2f65b4
RH
4706 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4707 we normally prevent this case when mmx is not available. However
4708 some ABIs may require the result to be returned like DImode. */
4709 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4710 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4711
4712 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4713 we prevent this case when sse is not available. However some ABIs
4714 may require the result to be returned like integer TImode. */
4715 else if (mode == TImode
4716 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4717 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4718
27ac40e2
UB
4719 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4720 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4721 regno = FIRST_FLOAT_REG;
4722 else
4723 /* Most things go in %eax. */
29b74761 4724 regno = AX_REG;
4f3f76e6 4725
27ac40e2 4726 /* Override FP return register with %xmm0 for local functions when
ee2f65b4 4727 SSE math is enabled or for functions with sseregparm attribute. */
27ac40e2 4728 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
53c17031 4729 {
7074bc2e 4730 int sse_level = ix86_function_sseregparm (fntype, fn, false);
27ac40e2
UB
4731 if ((sse_level >= 1 && mode == SFmode)
4732 || (sse_level == 2 && mode == DFmode))
4733 regno = FIRST_SSE_REG;
53c17031 4734 }
ee2f65b4
RH
4735
4736 return gen_rtx_REG (orig_mode, regno);
4737}
4738
4739static rtx
4740function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
586de218 4741 const_tree valtype)
ee2f65b4
RH
4742{
4743 rtx ret;
4744
4745 /* Handle libcalls, which don't provide a type node. */
4746 if (valtype == NULL)
cb1119b7 4747 {
ee2f65b4
RH
4748 switch (mode)
4749 {
4750 case SFmode:
4751 case SCmode:
4752 case DFmode:
4753 case DCmode:
4754 case TFmode:
4755 case SDmode:
4756 case DDmode:
4757 case TDmode:
4758 return gen_rtx_REG (mode, FIRST_SSE_REG);
4759 case XFmode:
4760 case XCmode:
4761 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4762 case TCmode:
4763 return NULL;
4764 default:
29b74761 4765 return gen_rtx_REG (mode, AX_REG);
ee2f65b4 4766 }
cb1119b7 4767 }
ee2f65b4
RH
4768
4769 ret = construct_container (mode, orig_mode, valtype, 1,
4770 REGPARM_MAX, SSE_REGPARM_MAX,
4771 x86_64_int_return_registers, 0);
4772
4773 /* For zero sized structures, construct_container returns NULL, but we
4774 need to keep rest of compiler happy by returning meaningful value. */
4775 if (!ret)
29b74761 4776 ret = gen_rtx_REG (orig_mode, AX_REG);
ee2f65b4
RH
4777
4778 return ret;
53c17031
JH
4779}
4780
ccf8e764
RH
4781static rtx
4782function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4783{
29b74761 4784 unsigned int regno = AX_REG;
ccf8e764
RH
4785
4786 if (TARGET_SSE)
4787 {
893125e0
KT
4788 switch (GET_MODE_SIZE (mode))
4789 {
4790 case 16:
4791 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4792 && !COMPLEX_MODE_P (mode))
4793 regno = FIRST_SSE_REG;
4794 break;
4795 case 8:
4796 case 4:
4797 if (mode == SFmode || mode == DFmode)
4798 regno = FIRST_SSE_REG;
4799 break;
4800 default:
4801 break;
4802 }
ccf8e764 4803 }
ccf8e764
RH
4804 return gen_rtx_REG (orig_mode, regno);
4805}
4806
ee2f65b4 4807static rtx
586de218 4808ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
ee2f65b4 4809 enum machine_mode orig_mode, enum machine_mode mode)
53c17031 4810{
586de218 4811 const_tree fn, fntype;
ee2f65b4
RH
4812
4813 fn = NULL_TREE;
4814 if (fntype_or_decl && DECL_P (fntype_or_decl))
4815 fn = fntype_or_decl;
4816 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
a30b6839 4817
ccf8e764
RH
4818 if (TARGET_64BIT_MS_ABI)
4819 return function_value_ms_64 (orig_mode, mode);
4820 else if (TARGET_64BIT)
ee2f65b4
RH
4821 return function_value_64 (orig_mode, mode, valtype);
4822 else
4823 return function_value_32 (orig_mode, mode, fntype, fn);
4824}
4825
4826static rtx
586de218 4827ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
ee2f65b4
RH
4828 bool outgoing ATTRIBUTE_UNUSED)
4829{
4830 enum machine_mode mode, orig_mode;
4831
4832 orig_mode = TYPE_MODE (valtype);
4833 mode = type_natural_mode (valtype);
4834 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4835}
4836
4837rtx
4838ix86_libcall_value (enum machine_mode mode)
4839{
4840 return ix86_function_value_1 (NULL, NULL, mode, mode);
4841}
4842
4843/* Return true iff type is returned in memory. */
4844
4845static int
586de218 4846return_in_memory_32 (const_tree type, enum machine_mode mode)
ee2f65b4
RH
4847{
4848 HOST_WIDE_INT size;
a30b6839
RH
4849
4850 if (mode == BLKmode)
4851 return 1;
4852
4853 size = int_size_in_bytes (type);
4854
4855 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4856 return 0;
4857
4858 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 4859 {
a30b6839
RH
4860 /* User-created vectors small enough to fit in EAX. */
4861 if (size < 8)
5e062767 4862 return 0;
a30b6839 4863
74c4a88a
UB
4864 /* MMX/3dNow values are returned in MM0,
4865 except when it doesn't exits. */
a30b6839 4866 if (size == 8)
74c4a88a 4867 return (TARGET_MMX ? 0 : 1);
a30b6839 4868
0397ac35 4869 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 4870 if (size == 16)
0397ac35 4871 return (TARGET_SSE ? 0 : 1);
53c17031 4872 }
a30b6839 4873
cf2348cb 4874 if (mode == XFmode)
a30b6839 4875 return 0;
f8a1ebc6 4876
a81083b2
BE
4877 if (mode == TDmode)
4878 return 1;
4879
a30b6839
RH
4880 if (size > 12)
4881 return 1;
4882 return 0;
53c17031
JH
4883}
4884
ee2f65b4 4885static int
586de218 4886return_in_memory_64 (const_tree type, enum machine_mode mode)
ee2f65b4
RH
4887{
4888 int needed_intregs, needed_sseregs;
4889 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4890}
4891
ccf8e764 4892static int
586de218 4893return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
ccf8e764
RH
4894{
4895 HOST_WIDE_INT size = int_size_in_bytes (type);
4896
893125e0
KT
4897 /* __m128 is returned in xmm0. */
4898 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4899 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
ccf8e764
RH
4900 return 0;
4901
893125e0
KT
4902 /* Otherwise, the size must be exactly in [1248]. */
4903 return (size != 1 && size != 2 && size != 4 && size != 8);
ccf8e764
RH
4904}
4905
ee2f65b4 4906int
586de218 4907ix86_return_in_memory (const_tree type)
ee2f65b4 4908{
586de218 4909 const enum machine_mode mode = type_natural_mode (type);
ee2f65b4 4910
ccf8e764
RH
4911 if (TARGET_64BIT_MS_ABI)
4912 return return_in_memory_ms_64 (type, mode);
4913 else if (TARGET_64BIT)
ee2f65b4
RH
4914 return return_in_memory_64 (type, mode);
4915 else
4916 return return_in_memory_32 (type, mode);
4917}
4918
29173496
RS
4919/* Return false iff TYPE is returned in memory. This version is used
4920 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4921 but differs notably in that when MMX is available, 8-byte vectors
4922 are returned in memory, rather than in MMX registers. */
4923
4f3f76e6 4924int
586de218 4925ix86_sol10_return_in_memory (const_tree type)
29173496 4926{
e797f7e1 4927 int size;
29173496
RS
4928 enum machine_mode mode = type_natural_mode (type);
4929
4930 if (TARGET_64BIT)
4931 return return_in_memory_64 (type, mode);
4932
4933 if (mode == BLKmode)
4934 return 1;
4935
4936 size = int_size_in_bytes (type);
4937
4938 if (VECTOR_MODE_P (mode))
4939 {
4940 /* Return in memory only if MMX registers *are* available. This
4941 seems backwards, but it is consistent with the existing
4942 Solaris x86 ABI. */
4943 if (size == 8)
4944 return TARGET_MMX;
4945 if (size == 16)
4946 return !TARGET_SSE;
4947 }
4948 else if (mode == TImode)
4949 return !TARGET_SSE;
4950 else if (mode == XFmode)
4951 return 0;
4952
4953 return size > 12;
4954}
4955
0397ac35
RH
4956/* When returning SSE vector types, we have a choice of either
4957 (1) being abi incompatible with a -march switch, or
4958 (2) generating an error.
4959 Given no good solution, I think the safest thing is one warning.
4960 The user won't be able to use -Werror, but....
4961
4962 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4963 called in response to actually generating a caller or callee that
4964 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4965 via aggregate_value_p for general type probing from tree-ssa. */
4966
4967static rtx
4968ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4969{
74c4a88a 4970 static bool warnedsse, warnedmmx;
0397ac35 4971
ee2f65b4 4972 if (!TARGET_64BIT && type)
0397ac35
RH
4973 {
4974 /* Look at the return type of the function, not the function type. */
4975 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4976
74c4a88a
UB
4977 if (!TARGET_SSE && !warnedsse)
4978 {
4979 if (mode == TImode
4980 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4981 {
4982 warnedsse = true;
4983 warning (0, "SSE vector return without SSE enabled "
4984 "changes the ABI");
4985 }
4986 }
4987
4988 if (!TARGET_MMX && !warnedmmx)
0397ac35 4989 {
74c4a88a
UB
4990 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4991 {
4992 warnedmmx = true;
4993 warning (0, "MMX vector return without MMX enabled "
4994 "changes the ABI");
4995 }
0397ac35
RH
4996 }
4997 }
4998
4999 return NULL;
5000}
5001
ad919812
JH
5002\f
5003/* Create the va_list data type. */
53c17031 5004
c35d187f
RH
5005static tree
5006ix86_build_builtin_va_list (void)
ad919812
JH
5007{
5008 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 5009
ad919812 5010 /* For i386 we use plain pointer to argument area. */
ccf8e764 5011 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
ad919812
JH
5012 return build_pointer_type (char_type_node);
5013
f1e639b1 5014 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
5015 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5016
fce5a9f2 5017 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 5018 unsigned_type_node);
fce5a9f2 5019 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
5020 unsigned_type_node);
5021 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5022 ptr_type_node);
5023 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5024 ptr_type_node);
5025
9d30f3c1
JJ
5026 va_list_gpr_counter_field = f_gpr;
5027 va_list_fpr_counter_field = f_fpr;
5028
ad919812
JH
5029 DECL_FIELD_CONTEXT (f_gpr) = record;
5030 DECL_FIELD_CONTEXT (f_fpr) = record;
5031 DECL_FIELD_CONTEXT (f_ovf) = record;
5032 DECL_FIELD_CONTEXT (f_sav) = record;
5033
5034 TREE_CHAIN (record) = type_decl;
5035 TYPE_NAME (record) = type_decl;
5036 TYPE_FIELDS (record) = f_gpr;
5037 TREE_CHAIN (f_gpr) = f_fpr;
5038 TREE_CHAIN (f_fpr) = f_ovf;
5039 TREE_CHAIN (f_ovf) = f_sav;
5040
5041 layout_type (record);
5042
5043 /* The correct type is an array type of one element. */
5044 return build_array_type (record, build_index_type (size_zero_node));
5045}
5046
a0524eb3 5047/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 5048
a0524eb3 5049static void
ee2f65b4 5050setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
ad919812 5051{
ee2f65b4 5052 rtx save_area, mem;
ad919812
JH
5053 rtx label;
5054 rtx label_ref;
5055 rtx tmp_reg;
5056 rtx nsse_reg;
4862826d 5057 alias_set_type set;
ad919812
JH
5058 int i;
5059
9d30f3c1
JJ
5060 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5061 return;
5062
ad919812
JH
5063 /* Indicate to allocate space on the stack for varargs save area. */
5064 ix86_save_varrargs_registers = 1;
48dd736c
JH
5065 /* We need 16-byte stack alignment to save SSE registers. If user
5066 asked for lower preferred_stack_boundary, lets just hope that he knows
4f3f76e6 5067 what he is doing and won't varargs SSE values.
48dd736c
JH
5068
5069 We also may end up assuming that only 64bit values are stored in SSE
5070 register let some floating point program work. */
35dd7cc3 5071 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
cb91fab0 5072 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5474eed5 5073
ee2f65b4 5074 save_area = frame_pointer_rtx;
ad919812
JH
5075 set = get_varargs_alias_set ();
5076
ee2f65b4 5077 for (i = cum->regno;
9d30f3c1 5078 i < ix86_regparm
ee2f65b4 5079 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9d30f3c1 5080 i++)
ad919812
JH
5081 {
5082 mem = gen_rtx_MEM (Pmode,
5083 plus_constant (save_area, i * UNITS_PER_WORD));
8476af98 5084 MEM_NOTRAP_P (mem) = 1;
0692acba 5085 set_mem_alias_set (mem, set);
ad919812
JH
5086 emit_move_insn (mem, gen_rtx_REG (Pmode,
5087 x86_64_int_parameter_registers[i]));
5088 }
5089
ee2f65b4 5090 if (cum->sse_nregs && cfun->va_list_fpr_size)
ad919812
JH
5091 {
5092 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 5093 of SSE parameter registers used to call this function. We use
ad919812
JH
5094 sse_prologue_save insn template that produces computed jump across
5095 SSE saves. We need some preparation work to get this working. */
5096
5097 label = gen_label_rtx ();
5098 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5099
5100 /* Compute address to jump to :
5101 label - 5*eax + nnamed_sse_arguments*5 */
5102 tmp_reg = gen_reg_rtx (Pmode);
5103 nsse_reg = gen_reg_rtx (Pmode);
29b74761 5104 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
ad919812 5105 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 5106 gen_rtx_MULT (Pmode, nsse_reg,
ad919812 5107 GEN_INT (4))));
ee2f65b4 5108 if (cum->sse_regno)
ad919812
JH
5109 emit_move_insn
5110 (nsse_reg,
5111 gen_rtx_CONST (DImode,
5112 gen_rtx_PLUS (DImode,
5113 label_ref,
ee2f65b4 5114 GEN_INT (cum->sse_regno * 4))));
ad919812
JH
5115 else
5116 emit_move_insn (nsse_reg, label_ref);
5117 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5118
5119 /* Compute address of memory block we save into. We always use pointer
5120 pointing 127 bytes after first byte to store - this is needed to keep
5121 instruction size limited by 4 bytes. */
5122 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
5123 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5124 plus_constant (save_area,
5125 8 * REGPARM_MAX + 127)));
ad919812 5126 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
8476af98 5127 MEM_NOTRAP_P (mem) = 1;
14f73b5a 5128 set_mem_alias_set (mem, set);
8ac61af7 5129 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
5130
5131 /* And finally do the dirty job! */
8ac61af7 5132 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
ee2f65b4 5133 GEN_INT (cum->sse_regno), label));
ad919812 5134 }
ee2f65b4
RH
5135}
5136
ccf8e764
RH
5137static void
5138setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5139{
4862826d 5140 alias_set_type set = get_varargs_alias_set ();
ccf8e764
RH
5141 int i;
5142
5143 for (i = cum->regno; i < REGPARM_MAX; i++)
5144 {
5145 rtx reg, mem;
5146
5147 mem = gen_rtx_MEM (Pmode,
5148 plus_constant (virtual_incoming_args_rtx,
5149 i * UNITS_PER_WORD));
5150 MEM_NOTRAP_P (mem) = 1;
5151 set_mem_alias_set (mem, set);
5152
5153 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5154 emit_move_insn (mem, reg);
5155 }
5156}
5157
ee2f65b4
RH
5158static void
5159ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5160 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5161 int no_rtl)
5162{
5163 CUMULATIVE_ARGS next_cum;
5164 tree fntype;
ee2f65b4
RH
5165
5166 /* This argument doesn't appear to be used anymore. Which is good,
5167 because the old code here didn't suppress rtl generation. */
5168 gcc_assert (!no_rtl);
5169
5170 if (!TARGET_64BIT)
5171 return;
5172
5173 fntype = TREE_TYPE (current_function_decl);
ad919812 5174
ee2f65b4
RH
5175 /* For varargs, we do not want to skip the dummy va_dcl argument.
5176 For stdargs, we do want to skip the last named argument. */
5177 next_cum = *cum;
04e1d06b 5178 if (stdarg_p (fntype))
ee2f65b4
RH
5179 function_arg_advance (&next_cum, mode, type, 1);
5180
ccf8e764
RH
5181 if (TARGET_64BIT_MS_ABI)
5182 setup_incoming_varargs_ms_64 (&next_cum);
5183 else
5184 setup_incoming_varargs_64 (&next_cum);
ad919812
JH
5185}
5186
5187/* Implement va_start. */
5188
d7bd8aeb 5189static void
b96a374d 5190ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
5191{
5192 HOST_WIDE_INT words, n_gpr, n_fpr;
5193 tree f_gpr, f_fpr, f_ovf, f_sav;
5194 tree gpr, fpr, ovf, sav, t;
3db8a113 5195 tree type;
ad919812
JH
5196
5197 /* Only 64bit target needs something special. */
ccf8e764 5198 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
ad919812 5199 {
e5faf155 5200 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
5201 return;
5202 }
5203
5204 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5205 f_fpr = TREE_CHAIN (f_gpr);
5206 f_ovf = TREE_CHAIN (f_fpr);
5207 f_sav = TREE_CHAIN (f_ovf);
5208
5209 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
47a25a46
RG
5210 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5211 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5212 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5213 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
5214
5215 /* Count number of gp and fp argument registers used. */
38173d38
JH
5216 words = crtl->args.info.words;
5217 n_gpr = crtl->args.info.regno;
5218 n_fpr = crtl->args.info.sse_regno;
ad919812 5219
9d30f3c1
JJ
5220 if (cfun->va_list_gpr_size)
5221 {
3db8a113 5222 type = TREE_TYPE (gpr);
07beea0d 5223 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
3db8a113 5224 build_int_cst (type, n_gpr * 8));
9d30f3c1
JJ
5225 TREE_SIDE_EFFECTS (t) = 1;
5226 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5227 }
ad919812 5228
9d30f3c1
JJ
5229 if (cfun->va_list_fpr_size)
5230 {
3db8a113 5231 type = TREE_TYPE (fpr);
07beea0d 5232 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
3db8a113 5233 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
9d30f3c1
JJ
5234 TREE_SIDE_EFFECTS (t) = 1;
5235 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5236 }
ad919812
JH
5237
5238 /* Find the overflow area. */
3db8a113
RS
5239 type = TREE_TYPE (ovf);
5240 t = make_tree (type, virtual_incoming_args_rtx);
ad919812 5241 if (words != 0)
5be014d5
AP
5242 t = build2 (POINTER_PLUS_EXPR, type, t,
5243 size_int (words * UNITS_PER_WORD));
07beea0d 5244 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
ad919812
JH
5245 TREE_SIDE_EFFECTS (t) = 1;
5246 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5247
9d30f3c1
JJ
5248 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5249 {
5250 /* Find the register save area.
5251 Prologue of the function save it right above stack frame. */
3db8a113
RS
5252 type = TREE_TYPE (sav);
5253 t = make_tree (type, frame_pointer_rtx);
07beea0d 5254 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
9d30f3c1
JJ
5255 TREE_SIDE_EFFECTS (t) = 1;
5256 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5257 }
ad919812
JH
5258}
5259
5260/* Implement va_arg. */
cd3ce9b4 5261
2ed941ec 5262static tree
23a60a04 5263ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 5264{
cd3ce9b4
JM
5265 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5266 tree f_gpr, f_fpr, f_ovf, f_sav;
5267 tree gpr, fpr, ovf, sav, t;
5268 int size, rsize;
5269 tree lab_false, lab_over = NULL_TREE;
5270 tree addr, t2;
5271 rtx container;
5272 int indirect_p = 0;
5273 tree ptrtype;
52cf10a3 5274 enum machine_mode nat_mode;
cd3ce9b4
JM
5275
5276 /* Only 64bit target needs something special. */
ccf8e764 5277 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
23a60a04 5278 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
5279
5280 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5281 f_fpr = TREE_CHAIN (f_gpr);
5282 f_ovf = TREE_CHAIN (f_fpr);
5283 f_sav = TREE_CHAIN (f_ovf);
5284
c2433d7d 5285 valist = build_va_arg_indirect_ref (valist);
47a25a46
RG
5286 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5287 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5288 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5289 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 5290
08b0dc1b
RH
5291 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5292 if (indirect_p)
5293 type = build_pointer_type (type);
cd3ce9b4 5294 size = int_size_in_bytes (type);
cd3ce9b4
JM
5295 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5296
52cf10a3
RH
5297 nat_mode = type_natural_mode (type);
5298 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5299 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
6c4ccfd8
RH
5300
5301 /* Pull the value out of the saved registers. */
cd3ce9b4
JM
5302
5303 addr = create_tmp_var (ptr_type_node, "addr");
5304 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5305
5306 if (container)
5307 {
5308 int needed_intregs, needed_sseregs;
e52a6df5 5309 bool need_temp;
cd3ce9b4
JM
5310 tree int_addr, sse_addr;
5311
5312 lab_false = create_artificial_label ();
5313 lab_over = create_artificial_label ();
5314
52cf10a3 5315 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
cd3ce9b4 5316
e52a6df5
JB
5317 need_temp = (!REG_P (container)
5318 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5319 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
5320
5321 /* In case we are passing structure, verify that it is consecutive block
5322 on the register save area. If not we need to do moves. */
5323 if (!need_temp && !REG_P (container))
5324 {
5325 /* Verify that all registers are strictly consecutive */
5326 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5327 {
5328 int i;
5329
5330 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5331 {
5332 rtx slot = XVECEXP (container, 0, i);
5333 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5334 || INTVAL (XEXP (slot, 1)) != i * 16)
5335 need_temp = 1;
5336 }
5337 }
5338 else
5339 {
5340 int i;
5341
5342 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5343 {
5344 rtx slot = XVECEXP (container, 0, i);
5345 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5346 || INTVAL (XEXP (slot, 1)) != i * 8)
5347 need_temp = 1;
5348 }
5349 }
5350 }
5351 if (!need_temp)
5352 {
5353 int_addr = addr;
5354 sse_addr = addr;
5355 }
5356 else
5357 {
5358 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5359 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5360 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5361 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5362 }
56d76b69 5363
cd3ce9b4
JM
5364 /* First ensure that we fit completely in registers. */
5365 if (needed_intregs)
5366 {
4a90aeeb 5367 t = build_int_cst (TREE_TYPE (gpr),
7d60be94 5368 (REGPARM_MAX - needed_intregs + 1) * 8);
cd3ce9b4
JM
5369 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5370 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 5371 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
5372 gimplify_and_add (t, pre_p);
5373 }
5374 if (needed_sseregs)
5375 {
4a90aeeb
NS
5376 t = build_int_cst (TREE_TYPE (fpr),
5377 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7d60be94 5378 + REGPARM_MAX * 8);
cd3ce9b4
JM
5379 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5380 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 5381 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
5382 gimplify_and_add (t, pre_p);
5383 }
5384
5385 /* Compute index to start of area used for integer regs. */
5386 if (needed_intregs)
5387 {
5388 /* int_addr = gpr + sav; */
5be014d5
AP
5389 t = fold_convert (sizetype, gpr);
5390 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
07beea0d 5391 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
cd3ce9b4
JM
5392 gimplify_and_add (t, pre_p);
5393 }
5394 if (needed_sseregs)
5395 {
5396 /* sse_addr = fpr + sav; */
5be014d5
AP
5397 t = fold_convert (sizetype, fpr);
5398 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
07beea0d 5399 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
cd3ce9b4
JM
5400 gimplify_and_add (t, pre_p);
5401 }
5402 if (need_temp)
5403 {
5404 int i;
5405 tree temp = create_tmp_var (type, "va_arg_tmp");
5406
5407 /* addr = &temp; */
5408 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
07beea0d 5409 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
cd3ce9b4 5410 gimplify_and_add (t, pre_p);
f676971a 5411
cd3ce9b4
JM
5412 for (i = 0; i < XVECLEN (container, 0); i++)
5413 {
5414 rtx slot = XVECEXP (container, 0, i);
5415 rtx reg = XEXP (slot, 0);
5416 enum machine_mode mode = GET_MODE (reg);
5417 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5418 tree addr_type = build_pointer_type (piece_type);
5419 tree src_addr, src;
5420 int src_offset;
5421 tree dest_addr, dest;
5422
5423 if (SSE_REGNO_P (REGNO (reg)))
5424 {
5425 src_addr = sse_addr;
5426 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5427 }
5428 else
5429 {
5430 src_addr = int_addr;
5431 src_offset = REGNO (reg) * 8;
5432 }
8fe75e43 5433 src_addr = fold_convert (addr_type, src_addr);
5be014d5 5434 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
381d35b2 5435 size_int (src_offset));
c2433d7d 5436 src = build_va_arg_indirect_ref (src_addr);
e6e81735 5437
8fe75e43 5438 dest_addr = fold_convert (addr_type, addr);
5be014d5 5439 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
381d35b2 5440 size_int (INTVAL (XEXP (slot, 1))));
c2433d7d 5441 dest = build_va_arg_indirect_ref (dest_addr);
3a3677ff 5442
07beea0d 5443 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
8fe75e43
RH
5444 gimplify_and_add (t, pre_p);
5445 }
5446 }
e6e81735 5447
8fe75e43
RH
5448 if (needed_intregs)
5449 {
5450 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
56d76b69 5451 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
07beea0d 5452 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
8fe75e43
RH
5453 gimplify_and_add (t, pre_p);
5454 }
5455 if (needed_sseregs)
5456 {
4a90aeeb 5457 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
56d76b69 5458 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
07beea0d 5459 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
8fe75e43
RH
5460 gimplify_and_add (t, pre_p);
5461 }
e6e81735 5462
8fe75e43
RH
5463 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5464 gimplify_and_add (t, pre_p);
5465
5466 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5467 append_to_statement_list (t, pre_p);
3a3677ff 5468 }
b840bfb0 5469
8fe75e43 5470 /* ... otherwise out of the overflow area. */
e9e80858 5471
8fe75e43 5472 /* Care for on-stack alignment if needed. */
f5a7da0f
RG
5473 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5474 || integer_zerop (TYPE_SIZE (type)))
8fe75e43 5475 t = ovf;
5be014d5 5476 else
e9e80858 5477 {
8fe75e43 5478 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5be014d5
AP
5479 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5480 size_int (align - 1));
5481 t = fold_convert (sizetype, t);
47a25a46 5482 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5be014d5
AP
5483 size_int (-align));
5484 t = fold_convert (TREE_TYPE (ovf), t);
e9e80858 5485 }
8fe75e43 5486 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
e075ae69 5487
07beea0d 5488 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
8fe75e43 5489 gimplify_and_add (t2, pre_p);
e075ae69 5490
5be014d5
AP
5491 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5492 size_int (rsize * UNITS_PER_WORD));
07beea0d 5493 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
8fe75e43 5494 gimplify_and_add (t, pre_p);
e075ae69 5495
8fe75e43 5496 if (container)
2a2ab3f9 5497 {
8fe75e43
RH
5498 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5499 append_to_statement_list (t, pre_p);
2a2ab3f9 5500 }
e075ae69 5501
8fe75e43
RH
5502 ptrtype = build_pointer_type (type);
5503 addr = fold_convert (ptrtype, addr);
0a726ef1 5504
8fe75e43 5505 if (indirect_p)
c2433d7d
FCE
5506 addr = build_va_arg_indirect_ref (addr);
5507 return build_va_arg_indirect_ref (addr);
0a726ef1 5508}
8fe75e43
RH
5509\f
5510/* Return nonzero if OPNUM's MEM should be matched
5511 in movabs* patterns. */
fee2770d
RS
5512
5513int
8fe75e43 5514ix86_check_movabs (rtx insn, int opnum)
4f2c8ebb 5515{
8fe75e43 5516 rtx set, mem;
e075ae69 5517
8fe75e43
RH
5518 set = PATTERN (insn);
5519 if (GET_CODE (set) == PARALLEL)
5520 set = XVECEXP (set, 0, 0);
d0396b79 5521 gcc_assert (GET_CODE (set) == SET);
8fe75e43
RH
5522 mem = XEXP (set, opnum);
5523 while (GET_CODE (mem) == SUBREG)
5524 mem = SUBREG_REG (mem);
7656aee4 5525 gcc_assert (MEM_P (mem));
8fe75e43 5526 return (volatile_ok || !MEM_VOLATILE_P (mem));
2247f6ed 5527}
e075ae69 5528\f
881b2a96
RS
5529/* Initialize the table of extra 80387 mathematical constants. */
5530
5531static void
b96a374d 5532init_ext_80387_constants (void)
881b2a96
RS
5533{
5534 static const char * cst[5] =
5535 {
5536 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5537 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5538 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5539 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5540 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5541 };
5542 int i;
5543
5544 for (i = 0; i < 5; i++)
5545 {
5546 real_from_string (&ext_80387_constants_table[i], cst[i]);
5547 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 5548 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 5549 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
5550 }
5551
5552 ext_80387_constants_init = 1;
5553}
5554
e075ae69 5555/* Return true if the constant is something that can be loaded with
881b2a96 5556 a special instruction. */
57dbca5e
BS
5557
5558int
b96a374d 5559standard_80387_constant_p (rtx x)
57dbca5e 5560{
27ac40e2
UB
5561 enum machine_mode mode = GET_MODE (x);
5562
2e1f15bd
UB
5563 REAL_VALUE_TYPE r;
5564
27ac40e2 5565 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
e075ae69 5566 return -1;
881b2a96 5567
27ac40e2 5568 if (x == CONST0_RTX (mode))
2b04e52b 5569 return 1;
27ac40e2 5570 if (x == CONST1_RTX (mode))
2b04e52b 5571 return 2;
881b2a96 5572
2e1f15bd
UB
5573 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5574
22cc69c4
RS
5575 /* For XFmode constants, try to find a special 80387 instruction when
5576 optimizing for size or on those CPUs that benefit from them. */
27ac40e2 5577 if (mode == XFmode
80fd744f 5578 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
881b2a96 5579 {
881b2a96
RS
5580 int i;
5581
5582 if (! ext_80387_constants_init)
5583 init_ext_80387_constants ();
5584
881b2a96
RS
5585 for (i = 0; i < 5; i++)
5586 if (real_identical (&r, &ext_80387_constants_table[i]))
5587 return i + 3;
5588 }
5589
2e1f15bd
UB
5590 /* Load of the constant -0.0 or -1.0 will be split as
5591 fldz;fchs or fld1;fchs sequence. */
5592 if (real_isnegzero (&r))
5593 return 8;
5594 if (real_identical (&r, &dconstm1))
5595 return 9;
5596
e075ae69 5597 return 0;
57dbca5e
BS
5598}
5599
881b2a96
RS
5600/* Return the opcode of the special instruction to be used to load
5601 the constant X. */
5602
5603const char *
b96a374d 5604standard_80387_constant_opcode (rtx x)
881b2a96
RS
5605{
5606 switch (standard_80387_constant_p (x))
5607 {
b96a374d 5608 case 1:
881b2a96
RS
5609 return "fldz";
5610 case 2:
5611 return "fld1";
b96a374d 5612 case 3:
881b2a96
RS
5613 return "fldlg2";
5614 case 4:
5615 return "fldln2";
b96a374d 5616 case 5:
881b2a96
RS
5617 return "fldl2e";
5618 case 6:
5619 return "fldl2t";
b96a374d 5620 case 7:
881b2a96 5621 return "fldpi";
2e1f15bd
UB
5622 case 8:
5623 case 9:
5624 return "#";
d0396b79
NS
5625 default:
5626 gcc_unreachable ();
881b2a96 5627 }
881b2a96
RS
5628}
5629
5630/* Return the CONST_DOUBLE representing the 80387 constant that is
5631 loaded by the specified special instruction. The argument IDX
5632 matches the return value from standard_80387_constant_p. */
5633
5634rtx
b96a374d 5635standard_80387_constant_rtx (int idx)
881b2a96
RS
5636{
5637 int i;
5638
5639 if (! ext_80387_constants_init)
5640 init_ext_80387_constants ();
5641
5642 switch (idx)
5643 {
5644 case 3:
5645 case 4:
5646 case 5:
5647 case 6:
5648 case 7:
5649 i = idx - 3;
5650 break;
5651
5652 default:
d0396b79 5653 gcc_unreachable ();
881b2a96
RS
5654 }
5655
1f48e56d 5656 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 5657 XFmode);
881b2a96
RS
5658}
5659
5656a184
EC
5660/* Return 1 if mode is a valid mode for sse. */
5661static int
5662standard_sse_mode_p (enum machine_mode mode)
5663{
5664 switch (mode)
5665 {
5666 case V16QImode:
5667 case V8HImode:
5668 case V4SImode:
5669 case V2DImode:
5670 case V4SFmode:
5671 case V2DFmode:
5672 return 1;
5673
5674 default:
5675 return 0;
5676 }
5677}
5678
2b04e52b
JH
5679/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5680 */
5681int
b96a374d 5682standard_sse_constant_p (rtx x)
2b04e52b 5683{
5656a184
EC
5684 enum machine_mode mode = GET_MODE (x);
5685
5686 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
0e67d460 5687 return 1;
5656a184
EC
5688 if (vector_all_ones_operand (x, mode)
5689 && standard_sse_mode_p (mode))
5690 return TARGET_SSE2 ? 2 : -1;
5691
5692 return 0;
5693}
5694
5695/* Return the opcode of the special instruction to be used to load
5696 the constant X. */
5697
5698const char *
5699standard_sse_constant_opcode (rtx insn, rtx x)
5700{
5701 switch (standard_sse_constant_p (x))
5702 {
5703 case 1:
5704 if (get_attr_mode (insn) == MODE_V4SF)
5705 return "xorps\t%0, %0";
5706 else if (get_attr_mode (insn) == MODE_V2DF)
5707 return "xorpd\t%0, %0";
5708 else
5709 return "pxor\t%0, %0";
5710 case 2:
5711 return "pcmpeqd\t%0, %0";
5712 }
5713 gcc_unreachable ();
2b04e52b
JH
5714}
5715
2a2ab3f9
JVA
5716/* Returns 1 if OP contains a symbol reference */
5717
5718int
b96a374d 5719symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 5720{
8d531ab9
KH
5721 const char *fmt;
5722 int i;
2a2ab3f9
JVA
5723
5724 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5725 return 1;
5726
5727 fmt = GET_RTX_FORMAT (GET_CODE (op));
5728 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5729 {
5730 if (fmt[i] == 'E')
5731 {
8d531ab9 5732 int j;
2a2ab3f9
JVA
5733
5734 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5735 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5736 return 1;
5737 }
e9a25f70 5738
2a2ab3f9
JVA
5739 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5740 return 1;
5741 }
5742
5743 return 0;
5744}
e075ae69
RH
5745
5746/* Return 1 if it is appropriate to emit `ret' instructions in the
5747 body of a function. Do this only if the epilogue is simple, needing a
5748 couple of insns. Prior to reloading, we can't tell how many registers
5749 must be saved, so return 0 then. Return 0 if there is no frame
6e14af16 5750 marker to de-allocate. */
32b5b1aa
SC
5751
5752int
b96a374d 5753ix86_can_use_return_insn_p (void)
32b5b1aa 5754{
4dd2ac2c 5755 struct ix86_frame frame;
9a7372d6 5756
9a7372d6
RH
5757 if (! reload_completed || frame_pointer_needed)
5758 return 0;
32b5b1aa 5759
9a7372d6
RH
5760 /* Don't allow more than 32 pop, since that's all we can do
5761 with one instruction. */
38173d38
JH
5762 if (crtl->args.pops_args
5763 && crtl->args.size >= 32768)
e075ae69 5764 return 0;
32b5b1aa 5765
4dd2ac2c
JH
5766 ix86_compute_frame_layout (&frame);
5767 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 5768}
6189a572 5769\f
6fca22eb
RH
5770/* Value should be nonzero if functions must have frame pointers.
5771 Zero means the frame pointer need not be set up (and parms may
5772 be accessed via the stack pointer) in functions that seem suitable. */
5773
5774int
b96a374d 5775ix86_frame_pointer_required (void)
6fca22eb
RH
5776{
5777 /* If we accessed previous frames, then the generated code expects
5778 to be able to access the saved ebp value in our frame. */
5779 if (cfun->machine->accesses_prev_frame)
5780 return 1;
a4f31c00 5781
6fca22eb
RH
5782 /* Several x86 os'es need a frame pointer for other reasons,
5783 usually pertaining to setjmp. */
5784 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5785 return 1;
5786
5787 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5788 the frame pointer by default. Turn it back on now if we've not
5789 got a leaf function. */
a7943381 5790 if (TARGET_OMIT_LEAF_FRAME_POINTER
5bf5a10b
AO
5791 && (!current_function_is_leaf
5792 || ix86_current_function_calls_tls_descriptor))
55ba61f3
JH
5793 return 1;
5794
e3b5732b 5795 if (crtl->profile)
6fca22eb
RH
5796 return 1;
5797
5798 return 0;
5799}
5800
5801/* Record that the current function accesses previous call frames. */
5802
5803void
b96a374d 5804ix86_setup_frame_addresses (void)
6fca22eb
RH
5805{
5806 cfun->machine->accesses_prev_frame = 1;
5807}
e075ae69 5808\f
7d072037 5809#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
145aacc2
RH
5810# define USE_HIDDEN_LINKONCE 1
5811#else
5812# define USE_HIDDEN_LINKONCE 0
5813#endif
5814
bd09bdeb 5815static int pic_labels_used;
e9a25f70 5816
145aacc2
RH
5817/* Fills in the label name that should be used for a pc thunk for
5818 the given register. */
5819
5820static void
b96a374d 5821get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2 5822{
f7288899
EC
5823 gcc_assert (!TARGET_64BIT);
5824
145aacc2
RH
5825 if (USE_HIDDEN_LINKONCE)
5826 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5827 else
5828 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5829}
5830
5831
e075ae69
RH
5832/* This function generates code for -fpic that loads %ebx with
5833 the return address of the caller and then returns. */
5834
5835void
b96a374d 5836ix86_file_end (void)
e075ae69
RH
5837{
5838 rtx xops[2];
bd09bdeb 5839 int regno;
32b5b1aa 5840
bd09bdeb 5841 for (regno = 0; regno < 8; ++regno)
7c262518 5842 {
145aacc2
RH
5843 char name[32];
5844
bd09bdeb
RH
5845 if (! ((pic_labels_used >> regno) & 1))
5846 continue;
5847
145aacc2 5848 get_pc_thunk_name (name, regno);
bd09bdeb 5849
7d072037
SH
5850#if TARGET_MACHO
5851 if (TARGET_MACHO)
5852 {
5853 switch_to_section (darwin_sections[text_coal_section]);
5854 fputs ("\t.weak_definition\t", asm_out_file);
5855 assemble_name (asm_out_file, name);
5856 fputs ("\n\t.private_extern\t", asm_out_file);
5857 assemble_name (asm_out_file, name);
5858 fputs ("\n", asm_out_file);
5859 ASM_OUTPUT_LABEL (asm_out_file, name);
5860 }
5861 else
5862#endif
145aacc2
RH
5863 if (USE_HIDDEN_LINKONCE)
5864 {
5865 tree decl;
5866
5867 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5868 error_mark_node);
5869 TREE_PUBLIC (decl) = 1;
5870 TREE_STATIC (decl) = 1;
5871 DECL_ONE_ONLY (decl) = 1;
5872
5873 (*targetm.asm_out.unique_section) (decl, 0);
d6b5193b 5874 switch_to_section (get_named_section (decl, NULL, 0));
145aacc2 5875
a5fe455b
ZW
5876 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5877 fputs ("\t.hidden\t", asm_out_file);
5878 assemble_name (asm_out_file, name);
5879 fputc ('\n', asm_out_file);
5880 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
5881 }
5882 else
5883 {
d6b5193b 5884 switch_to_section (text_section);
a5fe455b 5885 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 5886 }
893125e0
KT
5887 if (TARGET_64BIT_MS_ABI)
5888 {
5889 xops[0] = gen_rtx_REG (Pmode, regno);
5890 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5891 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
5892 output_asm_insn ("ret", xops);
5893 }
5894 else
5895 {
5896 xops[0] = gen_rtx_REG (SImode, regno);
5897 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5898 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5899 output_asm_insn ("ret", xops);
5900 }
7c262518 5901 }
3edc56a9 5902
a5fe455b
ZW
5903 if (NEED_INDICATE_EXEC_STACK)
5904 file_end_indicate_exec_stack ();
32b5b1aa 5905}
32b5b1aa 5906
c8c03509 5907/* Emit code for the SET_GOT patterns. */
32b5b1aa 5908
c8c03509 5909const char *
7d072037 5910output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
c8c03509
RH
5911{
5912 rtx xops[3];
0d7d98ee 5913
c8c03509 5914 xops[0] = dest;
170bdaba
RS
5915
5916 if (TARGET_VXWORKS_RTP && flag_pic)
5917 {
5918 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5919 xops[2] = gen_rtx_MEM (Pmode,
5920 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5921 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5922
5923 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5924 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5925 an unadorned address. */
5926 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5927 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5928 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5929 return "";
5930 }
5931
5fc0e5df 5932 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 5933
c8c03509 5934 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 5935 {
7d072037 5936 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
c8c03509
RH
5937
5938 if (!flag_pic)
5939 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5940 else
5941 output_asm_insn ("call\t%a2", xops);
5942
b069de3b 5943#if TARGET_MACHO
7d072037
SH
5944 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5945 is what will be referenced by the Mach-O PIC subsystem. */
5946 if (!label)
5947 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
b069de3b 5948#endif
7d072037 5949
4977bab6 5950 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
5951 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5952
5953 if (flag_pic)
5954 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 5955 }
e075ae69 5956 else
e5cb57e8 5957 {
145aacc2
RH
5958 char name[32];
5959 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 5960 pic_labels_used |= 1 << REGNO (dest);
f996902d 5961
145aacc2 5962 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
5963 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5964 output_asm_insn ("call\t%X2", xops);
7d072037
SH
5965 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5966 is what will be referenced by the Mach-O PIC subsystem. */
5967#if TARGET_MACHO
5968 if (!label)
5969 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
9795d9fd
AP
5970 else
5971 targetm.asm_out.internal_label (asm_out_file, "L",
5972 CODE_LABEL_NUMBER (label));
7d072037 5973#endif
e5cb57e8 5974 }
e5cb57e8 5975
7d072037
SH
5976 if (TARGET_MACHO)
5977 return "";
5978
c8c03509
RH
5979 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5980 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
7d072037 5981 else
4a8ce6ce 5982 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
79325812 5983
c8c03509 5984 return "";
e9a25f70 5985}
8dfe5673 5986
0d7d98ee 5987/* Generate an "push" pattern for input ARG. */
e9a25f70 5988
e075ae69 5989static rtx
b96a374d 5990gen_push (rtx arg)
e9a25f70 5991{
c5c76735 5992 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
5993 gen_rtx_MEM (Pmode,
5994 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
5995 stack_pointer_rtx)),
5996 arg);
e9a25f70
JL
5997}
5998
bd09bdeb
RH
5999/* Return >= 0 if there is an unused call-clobbered register available
6000 for the entire function. */
6001
6002static unsigned int
b96a374d 6003ix86_select_alt_pic_regnum (void)
bd09bdeb 6004{
e3b5732b 6005 if (current_function_is_leaf && !crtl->profile
5bf5a10b 6006 && !ix86_current_function_calls_tls_descriptor)
bd09bdeb
RH
6007 {
6008 int i;
6009 for (i = 2; i >= 0; --i)
6fb5fa3c 6010 if (!df_regs_ever_live_p (i))
bd09bdeb
RH
6011 return i;
6012 }
6013
6014 return INVALID_REGNUM;
6015}
fce5a9f2 6016
4dd2ac2c
JH
6017/* Return 1 if we need to save REGNO. */
6018static int
b96a374d 6019ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 6020{
bd09bdeb
RH
6021 if (pic_offset_table_rtx
6022 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6fb5fa3c 6023 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
e3b5732b
JH
6024 || crtl->profile
6025 || crtl->calls_eh_return
6026 || crtl->uses_const_pool))
bd09bdeb
RH
6027 {
6028 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6029 return 0;
6030 return 1;
6031 }
1020a5ab 6032
e3b5732b 6033 if (crtl->calls_eh_return && maybe_eh_return)
1020a5ab
RH
6034 {
6035 unsigned i;
6036 for (i = 0; ; i++)
6037 {
b531087a 6038 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
6039 if (test == INVALID_REGNUM)
6040 break;
9b690711 6041 if (test == regno)
1020a5ab
RH
6042 return 1;
6043 }
6044 }
4dd2ac2c 6045
150cdc9e
RH
6046 if (cfun->machine->force_align_arg_pointer
6047 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6048 return 1;
6049
6fb5fa3c 6050 return (df_regs_ever_live_p (regno)
1020a5ab
RH
6051 && !call_used_regs[regno]
6052 && !fixed_regs[regno]
6053 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
6054}
6055
0903fcab
JH
6056/* Return number of registers to be saved on the stack. */
6057
6058static int
b96a374d 6059ix86_nsaved_regs (void)
0903fcab
JH
6060{
6061 int nregs = 0;
0903fcab
JH
6062 int regno;
6063
4dd2ac2c 6064 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 6065 if (ix86_save_reg (regno, true))
4dd2ac2c 6066 nregs++;
0903fcab
JH
6067 return nregs;
6068}
6069
6070/* Return the offset between two registers, one to be eliminated, and the other
6071 its replacement, at the start of a routine. */
6072
6073HOST_WIDE_INT
b96a374d 6074ix86_initial_elimination_offset (int from, int to)
0903fcab 6075{
4dd2ac2c
JH
6076 struct ix86_frame frame;
6077 ix86_compute_frame_layout (&frame);
564d80f4
JH
6078
6079 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 6080 return frame.hard_frame_pointer_offset;
564d80f4
JH
6081 else if (from == FRAME_POINTER_REGNUM
6082 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 6083 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
6084 else
6085 {
d0396b79
NS
6086 gcc_assert (to == STACK_POINTER_REGNUM);
6087
6088 if (from == ARG_POINTER_REGNUM)
4dd2ac2c 6089 return frame.stack_pointer_offset;
5656a184 6090
d0396b79
NS
6091 gcc_assert (from == FRAME_POINTER_REGNUM);
6092 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
6093 }
6094}
6095
4dd2ac2c 6096/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 6097
4dd2ac2c 6098static void
b96a374d 6099ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 6100{
65954bd8 6101 HOST_WIDE_INT total_size;
95899b34 6102 unsigned int stack_alignment_needed;
b19ee4bd 6103 HOST_WIDE_INT offset;
95899b34 6104 unsigned int preferred_alignment;
4dd2ac2c 6105 HOST_WIDE_INT size = get_frame_size ();
65954bd8 6106
4dd2ac2c 6107 frame->nregs = ix86_nsaved_regs ();
564d80f4 6108 total_size = size;
65954bd8 6109
cb91fab0
JH
6110 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6111 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
95899b34 6112
d7394366
JH
6113 /* During reload iteration the amount of registers saved can change.
6114 Recompute the value as needed. Do not recompute when amount of registers
aabcd309 6115 didn't change as reload does multiple calls to the function and does not
d7394366
JH
6116 expect the decision to change within single iteration. */
6117 if (!optimize_size
6118 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
6119 {
6120 int count = frame->nregs;
6121
d7394366 6122 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
6123 /* The fast prologue uses move instead of push to save registers. This
6124 is significantly longer, but also executes faster as modern hardware
6125 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 6126
d9b40e8d
JH
6127 Be careful about choosing what prologue to emit: When function takes
6128 many instructions to execute we may use slow version as well as in
6129 case function is known to be outside hot spot (this is known with
6130 feedback only). Weight the size of function by number of registers
6131 to save as it is cheap to use one or two push instructions but very
6132 slow to use many of them. */
6133 if (count)
6134 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6135 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6136 || (flag_branch_probabilities
6137 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6138 cfun->machine->use_fast_prologue_epilogue = false;
6139 else
6140 cfun->machine->use_fast_prologue_epilogue
6141 = !expensive_function_p (count);
6142 }
6143 if (TARGET_PROLOGUE_USING_MOVE
6144 && cfun->machine->use_fast_prologue_epilogue)
6145 frame->save_regs_using_mov = true;
6146 else
6147 frame->save_regs_using_mov = false;
6148
6149
9ba81eaa 6150 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
6151 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6152
6153 frame->hard_frame_pointer_offset = offset;
564d80f4 6154
fcbfaa65
RK
6155 /* Do some sanity checking of stack_alignment_needed and
6156 preferred_alignment, since i386 port is the only using those features
f710504c 6157 that may break easily. */
564d80f4 6158
d0396b79
NS
6159 gcc_assert (!size || stack_alignment_needed);
6160 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6161 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6162 gcc_assert (stack_alignment_needed
6163 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
564d80f4 6164
4dd2ac2c
JH
6165 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6166 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 6167
4dd2ac2c
JH
6168 /* Register save area */
6169 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 6170
8362f420
JH
6171 /* Va-arg area */
6172 if (ix86_save_varrargs_registers)
6173 {
6174 offset += X86_64_VARARGS_SIZE;
6175 frame->va_arg_size = X86_64_VARARGS_SIZE;
6176 }
6177 else
6178 frame->va_arg_size = 0;
6179
4dd2ac2c
JH
6180 /* Align start of frame for local function. */
6181 frame->padding1 = ((offset + stack_alignment_needed - 1)
6182 & -stack_alignment_needed) - offset;
f73ad30e 6183
4dd2ac2c 6184 offset += frame->padding1;
65954bd8 6185
4dd2ac2c
JH
6186 /* Frame pointer points here. */
6187 frame->frame_pointer_offset = offset;
54ff41b7 6188
4dd2ac2c 6189 offset += size;
65954bd8 6190
0b7ae565 6191 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
6192 all the function calls as dead code.
6193 Skipping is however impossible when function calls alloca. Alloca
38173d38 6194 expander assumes that last crtl->outgoing_args_size
965514bd
JH
6195 of stack frame are unused. */
6196 if (ACCUMULATE_OUTGOING_ARGS
e3b5732b 6197 && (!current_function_is_leaf || cfun->calls_alloca
5bf5a10b 6198 || ix86_current_function_calls_tls_descriptor))
4dd2ac2c 6199 {
38173d38
JH
6200 offset += crtl->outgoing_args_size;
6201 frame->outgoing_arguments_size = crtl->outgoing_args_size;
4dd2ac2c
JH
6202 }
6203 else
6204 frame->outgoing_arguments_size = 0;
564d80f4 6205
002ff5bc
RH
6206 /* Align stack boundary. Only needed if we're calling another function
6207 or using alloca. */
e3b5732b 6208 if (!current_function_is_leaf || cfun->calls_alloca
5bf5a10b 6209 || ix86_current_function_calls_tls_descriptor)
0b7ae565
RH
6210 frame->padding2 = ((offset + preferred_alignment - 1)
6211 & -preferred_alignment) - offset;
6212 else
6213 frame->padding2 = 0;
4dd2ac2c
JH
6214
6215 offset += frame->padding2;
6216
6217 /* We've reached end of stack frame. */
6218 frame->stack_pointer_offset = offset;
6219
6220 /* Size prologue needs to allocate. */
6221 frame->to_allocate =
6222 (size + frame->padding1 + frame->padding2
8362f420 6223 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 6224
b19ee4bd
JJ
6225 if ((!frame->to_allocate && frame->nregs <= 1)
6226 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
6227 frame->save_regs_using_mov = false;
6228
a5b378d6 6229 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5bf5a10b
AO
6230 && current_function_is_leaf
6231 && !ix86_current_function_calls_tls_descriptor)
8362f420
JH
6232 {
6233 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
6234 if (frame->save_regs_using_mov)
6235 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
6236 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6237 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6238 }
6239 else
6240 frame->red_zone_size = 0;
6241 frame->to_allocate -= frame->red_zone_size;
6242 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c 6243#if 0
7874f14d
MM
6244 fprintf (stderr, "\n");
6245 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6246 fprintf (stderr, "size: %ld\n", (long)size);
6247 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6248 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6249 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6250 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6251 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6252 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6253 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6254 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6255 (long)frame->hard_frame_pointer_offset);
6256 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6257 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
e3b5732b 6258 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7874f14d 6259 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
4dd2ac2c 6260#endif
65954bd8
JL
6261}
6262
0903fcab
JH
6263/* Emit code to save registers in the prologue. */
6264
6265static void
b96a374d 6266ix86_emit_save_regs (void)
0903fcab 6267{
150cdc9e 6268 unsigned int regno;
0903fcab 6269 rtx insn;
0903fcab 6270
150cdc9e 6271 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
1020a5ab 6272 if (ix86_save_reg (regno, true))
0903fcab 6273 {
0d7d98ee 6274 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
6275 RTX_FRAME_RELATED_P (insn) = 1;
6276 }
6277}
6278
c6036a37
JH
6279/* Emit code to save registers using MOV insns. First register
6280 is restored from POINTER + OFFSET. */
6281static void
b96a374d 6282ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37 6283{
150cdc9e 6284 unsigned int regno;
c6036a37
JH
6285 rtx insn;
6286
6287 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6288 if (ix86_save_reg (regno, true))
6289 {
b72f00af
RK
6290 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6291 Pmode, offset),
c6036a37
JH
6292 gen_rtx_REG (Pmode, regno));
6293 RTX_FRAME_RELATED_P (insn) = 1;
6294 offset += UNITS_PER_WORD;
6295 }
6296}
6297
839a4992 6298/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
6299 The pattern exist to put a dependency on all ebp-based memory accesses.
6300 STYLE should be negative if instructions should be marked as frame related,
6301 zero if %r11 register is live and cannot be freely used and positive
6302 otherwise. */
6303
6304static void
6305pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6306{
6307 rtx insn;
6308
6309 if (! TARGET_64BIT)
6310 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6311 else if (x86_64_immediate_operand (offset, DImode))
6312 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6313 else
6314 {
6315 rtx r11;
6316 /* r11 is used by indirect sibcall return as well, set before the
6317 epilogue and used after the epilogue. ATM indirect sibcall
6318 shouldn't be used together with huge frame sizes in one
6319 function because of the frame_size check in sibcall.c. */
d0396b79 6320 gcc_assert (style);
3c4ace25 6321 r11 = gen_rtx_REG (DImode, R11_REG);
b19ee4bd
JJ
6322 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6323 if (style < 0)
6324 RTX_FRAME_RELATED_P (insn) = 1;
6325 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6326 offset));
6327 }
6328 if (style < 0)
6329 RTX_FRAME_RELATED_P (insn) = 1;
6330}
6331
150cdc9e
RH
6332/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6333
6334static rtx
6335ix86_internal_arg_pointer (void)
6336{
33932946
SH
6337 bool has_force_align_arg_pointer =
6338 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6339 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6340 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6341 && DECL_NAME (current_function_decl)
6342 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6343 && DECL_FILE_SCOPE_P (current_function_decl))
6344 || ix86_force_align_arg_pointer
6345 || has_force_align_arg_pointer)
150cdc9e 6346 {
33932946
SH
6347 /* Nested functions can't realign the stack due to a register
6348 conflict. */
6349 if (DECL_CONTEXT (current_function_decl)
6350 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6351 {
6352 if (ix86_force_align_arg_pointer)
6353 warning (0, "-mstackrealign ignored for nested functions");
6354 if (has_force_align_arg_pointer)
6355 error ("%s not supported for nested functions",
6356 ix86_force_align_arg_pointer_string);
6357 return virtual_incoming_args_rtx;
6358 }
29b74761 6359 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
150cdc9e
RH
6360 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6361 }
6362 else
6363 return virtual_incoming_args_rtx;
6364}
6365
6366/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6367 This is called from dwarf2out.c to emit call frame instructions
6368 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6369static void
6370ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6371{
6372 rtx unspec = SET_SRC (pattern);
6373 gcc_assert (GET_CODE (unspec) == UNSPEC);
6374
6375 switch (index)
6376 {
6377 case UNSPEC_REG_SAVE:
6378 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6379 SET_DEST (pattern));
6380 break;
6381 case UNSPEC_DEF_CFA:
6382 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6383 INTVAL (XVECEXP (unspec, 0, 0)));
6384 break;
6385 default:
6386 gcc_unreachable ();
6387 }
6388}
6389
0f290768 6390/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
6391
6392void
b96a374d 6393ix86_expand_prologue (void)
2a2ab3f9 6394{
564d80f4 6395 rtx insn;
bd09bdeb 6396 bool pic_reg_used;
4dd2ac2c 6397 struct ix86_frame frame;
c6036a37 6398 HOST_WIDE_INT allocate;
4dd2ac2c 6399
4977bab6 6400 ix86_compute_frame_layout (&frame);
79325812 6401
150cdc9e
RH
6402 if (cfun->machine->force_align_arg_pointer)
6403 {
6404 rtx x, y;
6405
6406 /* Grab the argument pointer. */
6407 x = plus_constant (stack_pointer_rtx, 4);
6408 y = cfun->machine->force_align_arg_pointer;
6409 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6410 RTX_FRAME_RELATED_P (insn) = 1;
6411
6412 /* The unwind info consists of two parts: install the fafp as the cfa,
6413 and record the fafp as the "save register" of the stack pointer.
6414 The later is there in order that the unwinder can see where it
6415 should restore the stack pointer across the and insn. */
6416 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6417 x = gen_rtx_SET (VOIDmode, y, x);
6418 RTX_FRAME_RELATED_P (x) = 1;
6419 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6420 UNSPEC_REG_SAVE);
6421 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6422 RTX_FRAME_RELATED_P (y) = 1;
6423 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6424 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6425 REG_NOTES (insn) = x;
6426
6427 /* Align the stack. */
6428 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6429 GEN_INT (-16)));
6430
6431 /* And here we cheat like madmen with the unwind info. We force the
6432 cfa register back to sp+4, which is exactly what it was at the
6433 start of the function. Re-pushing the return address results in
5656a184 6434 the return at the same spot relative to the cfa, and thus is
150cdc9e
RH
6435 correct wrt the unwind info. */
6436 x = cfun->machine->force_align_arg_pointer;
6437 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6438 insn = emit_insn (gen_push (x));
6439 RTX_FRAME_RELATED_P (insn) = 1;
6440
6441 x = GEN_INT (4);
6442 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6443 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6444 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6445 REG_NOTES (insn) = x;
6446 }
6447
e075ae69
RH
6448 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6449 slower on all targets. Also sdb doesn't like it. */
e9a25f70 6450
2a2ab3f9
JVA
6451 if (frame_pointer_needed)
6452 {
564d80f4 6453 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 6454 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 6455
564d80f4 6456 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 6457 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
6458 }
6459
c6036a37 6460 allocate = frame.to_allocate;
c6036a37 6461
d9b40e8d 6462 if (!frame.save_regs_using_mov)
c6036a37
JH
6463 ix86_emit_save_regs ();
6464 else
6465 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 6466
d9b40e8d 6467 /* When using red zone we may start register saving before allocating
6893e828
AN
6468 the stack frame saving one cycle of the prologue. However I will
6469 avoid doing this if I am going to have to probe the stack since
6470 at least on x86_64 the stack probe can turn into a call that clobbers
6471 a red zone location */
6472 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6473 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
d9b40e8d
JH
6474 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6475 : stack_pointer_rtx,
6476 -frame.nregs * UNITS_PER_WORD);
6477
c6036a37 6478 if (allocate == 0)
8dfe5673 6479 ;
e323735c 6480 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
6481 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6482 GEN_INT (-allocate), -1);
79325812 6483 else
8dfe5673 6484 {
fe9f516f 6485 /* Only valid for Win32. */
29b74761 6486 rtx eax = gen_rtx_REG (Pmode, AX_REG);
ccf8e764 6487 bool eax_live;
5fc94ac4 6488 rtx t;
e9a25f70 6489
ccf8e764
RH
6490 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6491
6492 if (TARGET_64BIT_MS_ABI)
6493 eax_live = false;
6494 else
6495 eax_live = ix86_eax_live_at_start_p ();
e075ae69 6496
fe9f516f
RH
6497 if (eax_live)
6498 {
6499 emit_insn (gen_push (eax));
ccf8e764 6500 allocate -= UNITS_PER_WORD;
fe9f516f
RH
6501 }
6502
5fc94ac4 6503 emit_move_insn (eax, GEN_INT (allocate));
98417968 6504
ccf8e764
RH
6505 if (TARGET_64BIT)
6506 insn = gen_allocate_stack_worker_64 (eax);
6507 else
6508 insn = gen_allocate_stack_worker_32 (eax);
6509 insn = emit_insn (insn);
b1177d69 6510 RTX_FRAME_RELATED_P (insn) = 1;
5fc94ac4
RH
6511 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6512 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6513 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6514 t, REG_NOTES (insn));
fe9f516f
RH
6515
6516 if (eax_live)
6517 {
ea5f7a19
RS
6518 if (frame_pointer_needed)
6519 t = plus_constant (hard_frame_pointer_rtx,
6520 allocate
6521 - frame.to_allocate
6522 - frame.nregs * UNITS_PER_WORD);
6523 else
6524 t = plus_constant (stack_pointer_rtx, allocate);
ccf8e764 6525 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
fe9f516f 6526 }
e075ae69 6527 }
fe9f516f 6528
6893e828
AN
6529 if (frame.save_regs_using_mov
6530 && !(TARGET_RED_ZONE
6531 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
c6036a37
JH
6532 {
6533 if (!frame_pointer_needed || !frame.to_allocate)
6534 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6535 else
6536 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6537 -frame.nregs * UNITS_PER_WORD);
6538 }
e9a25f70 6539
bd09bdeb
RH
6540 pic_reg_used = false;
6541 if (pic_offset_table_rtx
6fb5fa3c 6542 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
e3b5732b 6543 || crtl->profile))
bd09bdeb
RH
6544 {
6545 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6546
6547 if (alt_pic_reg_used != INVALID_REGNUM)
6fb5fa3c 6548 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
bd09bdeb
RH
6549
6550 pic_reg_used = true;
6551 }
6552
e9a25f70 6553 if (pic_reg_used)
c8c03509 6554 {
7dcbf659 6555 if (TARGET_64BIT)
dc4d7240
JH
6556 {
6557 if (ix86_cmodel == CM_LARGE_PIC)
6558 {
29b74761 6559 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
dc4d7240
JH
6560 rtx label = gen_label_rtx ();
6561 emit_label (label);
6562 LABEL_PRESERVE_P (label) = 1;
6563 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6564 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
dc4d7240 6565 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
dc4d7240
JH
6566 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6567 pic_offset_table_rtx, tmp_reg));
6568 }
6569 else
6570 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6571 }
7dcbf659
JH
6572 else
6573 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
c8c03509 6574 }
77a989d1 6575
8660aaae 6576 /* Prevent function calls from being scheduled before the call to mcount.
66edd3b4 6577 In the pic_reg_used case, make sure that the got load isn't deleted. */
e3b5732b 6578 if (crtl->profile)
6fb5fa3c
DB
6579 {
6580 if (pic_reg_used)
6581 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6582 emit_insn (gen_blockage ());
6583 }
77a989d1
SC
6584}
6585
da2d1d3a
JH
6586/* Emit code to restore saved registers using MOV insns. First register
6587 is restored from POINTER + OFFSET. */
6588static void
72613dfa
JH
6589ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6590 int maybe_eh_return)
da2d1d3a
JH
6591{
6592 int regno;
72613dfa 6593 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 6594
4dd2ac2c 6595 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 6596 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 6597 {
72613dfa
JH
6598 /* Ensure that adjust_address won't be forced to produce pointer
6599 out of range allowed by x86-64 instruction set. */
6600 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6601 {
6602 rtx r11;
6603
3c4ace25 6604 r11 = gen_rtx_REG (DImode, R11_REG);
72613dfa
JH
6605 emit_move_insn (r11, GEN_INT (offset));
6606 emit_insn (gen_adddi3 (r11, r11, pointer));
6607 base_address = gen_rtx_MEM (Pmode, r11);
6608 offset = 0;
6609 }
4dd2ac2c 6610 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 6611 adjust_address (base_address, Pmode, offset));
4dd2ac2c 6612 offset += UNITS_PER_WORD;
da2d1d3a
JH
6613 }
6614}
6615
0f290768 6616/* Restore function stack, frame, and registers. */
e9a25f70 6617
2a2ab3f9 6618void
b96a374d 6619ix86_expand_epilogue (int style)
2a2ab3f9 6620{
1c71e60e 6621 int regno;
fdb8a883 6622 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 6623 struct ix86_frame frame;
65954bd8 6624 HOST_WIDE_INT offset;
4dd2ac2c
JH
6625
6626 ix86_compute_frame_layout (&frame);
2a2ab3f9 6627
a4f31c00 6628 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
6629 must be taken for the normal return case of a function using
6630 eh_return: the eax and edx registers are marked as saved, but not
6631 restored along this path. */
6632 offset = frame.nregs;
e3b5732b 6633 if (crtl->calls_eh_return && style != 2)
84e306b4
RH
6634 offset -= 2;
6635 offset *= -UNITS_PER_WORD;
2a2ab3f9 6636
fdb8a883
JW
6637 /* If we're only restoring one register and sp is not valid then
6638 using a move instruction to restore the register since it's
0f290768 6639 less work than reloading sp and popping the register.
da2d1d3a
JH
6640
6641 The default code result in stack adjustment using add/lea instruction,
6642 while this code results in LEAVE instruction (or discrete equivalent),
6643 so it is profitable in some other cases as well. Especially when there
6644 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 6645 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 6646 tuning in future. */
4dd2ac2c 6647 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 6648 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 6649 && cfun->machine->use_fast_prologue_epilogue
c6036a37 6650 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 6651 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 6652 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
6653 && cfun->machine->use_fast_prologue_epilogue
6654 && frame.nregs == 1)
e3b5732b 6655 || crtl->calls_eh_return)
2a2ab3f9 6656 {
da2d1d3a
JH
6657 /* Restore registers. We can use ebp or esp to address the memory
6658 locations. If both are available, default to ebp, since offsets
6659 are known to be small. Only exception is esp pointing directly to the
6660 end of block of saved registers, where we may simplify addressing
6661 mode. */
6662
4dd2ac2c 6663 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
6664 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6665 frame.to_allocate, style == 2);
da2d1d3a 6666 else
1020a5ab
RH
6667 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6668 offset, style == 2);
6669
6670 /* eh_return epilogues need %ecx added to the stack pointer. */
6671 if (style == 2)
6672 {
6673 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 6674
1020a5ab
RH
6675 if (frame_pointer_needed)
6676 {
6677 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6678 tmp = plus_constant (tmp, UNITS_PER_WORD);
6679 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6680
6681 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6682 emit_move_insn (hard_frame_pointer_rtx, tmp);
6683
b19ee4bd
JJ
6684 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6685 const0_rtx, style);
1020a5ab
RH
6686 }
6687 else
6688 {
6689 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6690 tmp = plus_constant (tmp, (frame.to_allocate
6691 + frame.nregs * UNITS_PER_WORD));
6692 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6693 }
6694 }
6695 else if (!frame_pointer_needed)
b19ee4bd
JJ
6696 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6697 GEN_INT (frame.to_allocate
6698 + frame.nregs * UNITS_PER_WORD),
6699 style);
0f290768 6700 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
6701 else if (TARGET_USE_LEAVE || optimize_size
6702 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 6703 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 6704 else
2a2ab3f9 6705 {
b19ee4bd
JJ
6706 pro_epilogue_adjust_stack (stack_pointer_rtx,
6707 hard_frame_pointer_rtx,
6708 const0_rtx, style);
8362f420
JH
6709 if (TARGET_64BIT)
6710 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6711 else
6712 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
6713 }
6714 }
1c71e60e 6715 else
68f654ec 6716 {
1c71e60e
JH
6717 /* First step is to deallocate the stack frame so that we can
6718 pop the registers. */
6719 if (!sp_valid)
6720 {
d0396b79 6721 gcc_assert (frame_pointer_needed);
b19ee4bd
JJ
6722 pro_epilogue_adjust_stack (stack_pointer_rtx,
6723 hard_frame_pointer_rtx,
6724 GEN_INT (offset), style);
1c71e60e 6725 }
4dd2ac2c 6726 else if (frame.to_allocate)
b19ee4bd
JJ
6727 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6728 GEN_INT (frame.to_allocate), style);
1c71e60e 6729
4dd2ac2c 6730 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 6731 if (ix86_save_reg (regno, false))
8362f420
JH
6732 {
6733 if (TARGET_64BIT)
6734 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6735 else
6736 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6737 }
4dd2ac2c 6738 if (frame_pointer_needed)
8362f420 6739 {
f5143c46 6740 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
6741 able to grok it fast. */
6742 if (TARGET_USE_LEAVE)
6743 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6744 else if (TARGET_64BIT)
8362f420
JH
6745 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6746 else
6747 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6748 }
68f654ec 6749 }
68f654ec 6750
150cdc9e
RH
6751 if (cfun->machine->force_align_arg_pointer)
6752 {
6753 emit_insn (gen_addsi3 (stack_pointer_rtx,
6754 cfun->machine->force_align_arg_pointer,
6755 GEN_INT (-4)));
6756 }
6757
cbbf65e0 6758 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 6759 if (style == 0)
cbbf65e0
RH
6760 return;
6761
38173d38 6762 if (crtl->args.pops_args && crtl->args.size)
2a2ab3f9 6763 {
38173d38 6764 rtx popc = GEN_INT (crtl->args.pops_args);
2a2ab3f9 6765
b8c752c8
UD
6766 /* i386 can only pop 64K bytes. If asked to pop more, pop
6767 return address, do explicit add, and jump indirectly to the
0f290768 6768 caller. */
2a2ab3f9 6769
38173d38 6770 if (crtl->args.pops_args >= 65536)
2a2ab3f9 6771 {
29b74761 6772 rtx ecx = gen_rtx_REG (SImode, CX_REG);
e9a25f70 6773
ccf8e764 6774 /* There is no "pascal" calling convention in any 64bit ABI. */
d0396b79 6775 gcc_assert (!TARGET_64BIT);
8362f420 6776
e075ae69
RH
6777 emit_insn (gen_popsi1 (ecx));
6778 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 6779 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 6780 }
79325812 6781 else
e075ae69
RH
6782 emit_jump_insn (gen_return_pop_internal (popc));
6783 }
6784 else
6785 emit_jump_insn (gen_return_internal ());
6786}
bd09bdeb
RH
6787
6788/* Reset from the function's potential modifications. */
6789
6790static void
b96a374d
AJ
6791ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6792 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
6793{
6794 if (pic_offset_table_rtx)
6fb5fa3c 6795 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
f8c2645c
AL
6796#if TARGET_MACHO
6797 /* Mach-O doesn't support labels at the end of objects, so if
6798 it looks like we might want one, insert a NOP. */
6799 {
6800 rtx insn = get_last_insn ();
6801 while (insn
6802 && NOTE_P (insn)
a38e7aa5 6803 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
f8c2645c
AL
6804 insn = PREV_INSN (insn);
6805 if (insn
6806 && (LABEL_P (insn)
6807 || (NOTE_P (insn)
a38e7aa5 6808 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
f8c2645c
AL
6809 fputs ("\tnop\n", file);
6810 }
6811#endif
6812
bd09bdeb 6813}
e075ae69
RH
6814\f
6815/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
6816 for an instruction. Return 0 if the structure of the address is
6817 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 6818 strictly valid, but still used for computing length of lea instruction. */
e075ae69 6819
8fe75e43 6820int
8d531ab9 6821ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69 6822{
7c93c2cc
PB
6823 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6824 rtx base_reg, index_reg;
e075ae69
RH
6825 HOST_WIDE_INT scale = 1;
6826 rtx scale_rtx = NULL_RTX;
b446e5a2 6827 int retval = 1;
74dc3e94 6828 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 6829
7656aee4 6830 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
6831 base = addr;
6832 else if (GET_CODE (addr) == PLUS)
6833 {
74dc3e94
RH
6834 rtx addends[4], op;
6835 int n = 0, i;
e075ae69 6836
74dc3e94
RH
6837 op = addr;
6838 do
e075ae69 6839 {
74dc3e94
RH
6840 if (n >= 4)
6841 return 0;
6842 addends[n++] = XEXP (op, 1);
6843 op = XEXP (op, 0);
2a2ab3f9 6844 }
74dc3e94
RH
6845 while (GET_CODE (op) == PLUS);
6846 if (n >= 4)
6847 return 0;
6848 addends[n] = op;
6849
6850 for (i = n; i >= 0; --i)
e075ae69 6851 {
74dc3e94
RH
6852 op = addends[i];
6853 switch (GET_CODE (op))
6854 {
6855 case MULT:
6856 if (index)
6857 return 0;
6858 index = XEXP (op, 0);
6859 scale_rtx = XEXP (op, 1);
6860 break;
6861
6862 case UNSPEC:
6863 if (XINT (op, 1) == UNSPEC_TP
6864 && TARGET_TLS_DIRECT_SEG_REFS
6865 && seg == SEG_DEFAULT)
6866 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6867 else
6868 return 0;
6869 break;
6870
6871 case REG:
6872 case SUBREG:
6873 if (!base)
6874 base = op;
6875 else if (!index)
6876 index = op;
6877 else
6878 return 0;
6879 break;
6880
6881 case CONST:
6882 case CONST_INT:
6883 case SYMBOL_REF:
6884 case LABEL_REF:
6885 if (disp)
6886 return 0;
6887 disp = op;
6888 break;
6889
6890 default:
6891 return 0;
6892 }
e075ae69 6893 }
e075ae69
RH
6894 }
6895 else if (GET_CODE (addr) == MULT)
6896 {
6897 index = XEXP (addr, 0); /* index*scale */
6898 scale_rtx = XEXP (addr, 1);
6899 }
6900 else if (GET_CODE (addr) == ASHIFT)
6901 {
6902 rtx tmp;
6903
6904 /* We're called for lea too, which implements ashift on occasion. */
6905 index = XEXP (addr, 0);
6906 tmp = XEXP (addr, 1);
7656aee4 6907 if (!CONST_INT_P (tmp))
b446e5a2 6908 return 0;
e075ae69
RH
6909 scale = INTVAL (tmp);
6910 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 6911 return 0;
e075ae69 6912 scale = 1 << scale;
b446e5a2 6913 retval = -1;
2a2ab3f9 6914 }
2a2ab3f9 6915 else
e075ae69
RH
6916 disp = addr; /* displacement */
6917
6918 /* Extract the integral value of scale. */
6919 if (scale_rtx)
e9a25f70 6920 {
7656aee4 6921 if (!CONST_INT_P (scale_rtx))
b446e5a2 6922 return 0;
e075ae69 6923 scale = INTVAL (scale_rtx);
e9a25f70 6924 }
3b3c6a3f 6925
7c93c2cc
PB
6926 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6927 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6928
74dc3e94 6929 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7c93c2cc
PB
6930 if (base_reg && index_reg && scale == 1
6931 && (index_reg == arg_pointer_rtx
6932 || index_reg == frame_pointer_rtx
6933 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
e075ae69 6934 {
7c93c2cc
PB
6935 rtx tmp;
6936 tmp = base, base = index, index = tmp;
6937 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
e075ae69
RH
6938 }
6939
6940 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7c93c2cc
PB
6941 if ((base_reg == hard_frame_pointer_rtx
6942 || base_reg == frame_pointer_rtx
6943 || base_reg == arg_pointer_rtx) && !disp)
e075ae69
RH
6944 disp = const0_rtx;
6945
6946 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6947 Avoid this by transforming to [%esi+0]. */
8383d43c 6948 if (TARGET_K6 && !optimize_size
7c93c2cc
PB
6949 && base_reg && !index_reg && !disp
6950 && REG_P (base_reg)
6951 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
e075ae69
RH
6952 disp = const0_rtx;
6953
6954 /* Special case: encode reg+reg instead of reg*2. */
6955 if (!base && index && scale && scale == 2)
7c93c2cc 6956 base = index, base_reg = index_reg, scale = 1;
0f290768 6957
e075ae69
RH
6958 /* Special case: scaling cannot be encoded without base or displacement. */
6959 if (!base && !disp && index && scale != 1)
6960 disp = const0_rtx;
6961
6962 out->base = base;
6963 out->index = index;
6964 out->disp = disp;
6965 out->scale = scale;
74dc3e94 6966 out->seg = seg;
3b3c6a3f 6967
b446e5a2 6968 return retval;
e075ae69 6969}
01329426
JH
6970\f
6971/* Return cost of the memory address x.
6972 For i386, it is better to use a complex address than let gcc copy
6973 the address into a reg and make a new pseudo. But not if the address
6974 requires to two regs - that would mean more pseudos with longer
6975 lifetimes. */
dcefdf67 6976static int
b96a374d 6977ix86_address_cost (rtx x)
01329426
JH
6978{
6979 struct ix86_address parts;
6980 int cost = 1;
d0396b79 6981 int ok = ix86_decompose_address (x, &parts);
3b3c6a3f 6982
d0396b79 6983 gcc_assert (ok);
01329426 6984
7c93c2cc
PB
6985 if (parts.base && GET_CODE (parts.base) == SUBREG)
6986 parts.base = SUBREG_REG (parts.base);
6987 if (parts.index && GET_CODE (parts.index) == SUBREG)
6988 parts.index = SUBREG_REG (parts.index);
6989
01329426
JH
6990 /* Attempt to minimize number of registers in the address. */
6991 if ((parts.base
6992 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6993 || (parts.index
6994 && (!REG_P (parts.index)
6995 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6996 cost++;
6997
6998 if (parts.base
6999 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7000 && parts.index
7001 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7002 && parts.base != parts.index)
7003 cost++;
7004
7005 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7006 since it's predecode logic can't detect the length of instructions
7007 and it degenerates to vector decoded. Increase cost of such
7008 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 7009 to split such addresses or even refuse such addresses at all.
01329426
JH
7010
7011 Following addressing modes are affected:
7012 [base+scale*index]
7013 [scale*index+disp]
7014 [base+index]
0f290768 7015
01329426
JH
7016 The first and last case may be avoidable by explicitly coding the zero in
7017 memory address, but I don't have AMD-K6 machine handy to check this
7018 theory. */
7019
7020 if (TARGET_K6
7021 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7022 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7023 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7024 cost += 10;
0f290768 7025
01329426
JH
7026 return cost;
7027}
7028\f
2ed941ec
RH
7029/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7030 this is used for to form addresses to local data when -fPIC is in
7031 use. */
828a4fe4
MS
7032
7033static bool
7034darwin_local_data_pic (rtx disp)
7035{
7036 if (GET_CODE (disp) == MINUS)
7037 {
7038 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7039 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7040 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7041 {
7042 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7043 if (! strcmp (sym_name, "<pic base>"))
7044 return true;
7045 }
7046 }
7047
7048 return false;
7049}
2ed941ec 7050
f996902d
RH
7051/* Determine if a given RTX is a valid constant. We already know this
7052 satisfies CONSTANT_P. */
7053
7054bool
b96a374d 7055legitimate_constant_p (rtx x)
f996902d 7056{
f996902d
RH
7057 switch (GET_CODE (x))
7058 {
f996902d 7059 case CONST:
1e19ac74 7060 x = XEXP (x, 0);
f996902d 7061
1e19ac74 7062 if (GET_CODE (x) == PLUS)
828a4fe4 7063 {
7656aee4 7064 if (!CONST_INT_P (XEXP (x, 1)))
828a4fe4 7065 return false;
1e19ac74 7066 x = XEXP (x, 0);
828a4fe4
MS
7067 }
7068
1e19ac74 7069 if (TARGET_MACHO && darwin_local_data_pic (x))
828a4fe4
MS
7070 return true;
7071
f996902d 7072 /* Only some unspecs are valid as "constants". */
1e19ac74
RH
7073 if (GET_CODE (x) == UNSPEC)
7074 switch (XINT (x, 1))
f996902d 7075 {
dc4d7240 7076 case UNSPEC_GOT:
7dcbf659 7077 case UNSPEC_GOTOFF:
dc4d7240 7078 case UNSPEC_PLTOFF:
7dcbf659 7079 return TARGET_64BIT;
f996902d 7080 case UNSPEC_TPOFF:
cb0e3e3f 7081 case UNSPEC_NTPOFF:
fd4aca96
RH
7082 x = XVECEXP (x, 0, 0);
7083 return (GET_CODE (x) == SYMBOL_REF
7084 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
cb0e3e3f 7085 case UNSPEC_DTPOFF:
fd4aca96
RH
7086 x = XVECEXP (x, 0, 0);
7087 return (GET_CODE (x) == SYMBOL_REF
7088 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
f996902d
RH
7089 default:
7090 return false;
7091 }
1e19ac74
RH
7092
7093 /* We must have drilled down to a symbol. */
fd4aca96
RH
7094 if (GET_CODE (x) == LABEL_REF)
7095 return true;
7096 if (GET_CODE (x) != SYMBOL_REF)
1e19ac74
RH
7097 return false;
7098 /* FALLTHRU */
7099
7100 case SYMBOL_REF:
7101 /* TLS symbols are never valid. */
fd4aca96 7102 if (SYMBOL_REF_TLS_MODEL (x))
1e19ac74 7103 return false;
da489f73
RH
7104
7105 /* DLLIMPORT symbols are never valid. */
7106 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7107 && SYMBOL_REF_DLLIMPORT_P (x))
7108 return false;
f996902d
RH
7109 break;
7110
d0b89852
RS
7111 case CONST_DOUBLE:
7112 if (GET_MODE (x) == TImode
7113 && x != CONST0_RTX (TImode)
7114 && !TARGET_64BIT)
7115 return false;
7116 break;
7117
7118 case CONST_VECTOR:
7119 if (x == CONST0_RTX (GET_MODE (x)))
7120 return true;
7121 return false;
7122
f996902d
RH
7123 default:
7124 break;
7125 }
7126
7127 /* Otherwise we handle everything else in the move patterns. */
7128 return true;
7129}
7130
3a04ff64
RH
7131/* Determine if it's legal to put X into the constant pool. This
7132 is not possible for the address of thread-local symbols, which
7133 is checked above. */
7134
7135static bool
b96a374d 7136ix86_cannot_force_const_mem (rtx x)
3a04ff64 7137{
d0b89852
RS
7138 /* We can always put integral constants and vectors in memory. */
7139 switch (GET_CODE (x))
7140 {
7141 case CONST_INT:
7142 case CONST_DOUBLE:
7143 case CONST_VECTOR:
7144 return false;
7145
7146 default:
7147 break;
7148 }
3a04ff64
RH
7149 return !legitimate_constant_p (x);
7150}
7151
f996902d
RH
7152/* Determine if a given RTX is a valid constant address. */
7153
7154bool
b96a374d 7155constant_address_p (rtx x)
f996902d 7156{
a94f136b 7157 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
7158}
7159
7160/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 7161 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
7162 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7163
7164bool
b96a374d 7165legitimate_pic_operand_p (rtx x)
f996902d
RH
7166{
7167 rtx inner;
7168
7169 switch (GET_CODE (x))
7170 {
7171 case CONST:
7172 inner = XEXP (x, 0);
7dcbf659 7173 if (GET_CODE (inner) == PLUS
7656aee4 7174 && CONST_INT_P (XEXP (inner, 1)))
7dcbf659 7175 inner = XEXP (inner, 0);
f996902d
RH
7176
7177 /* Only some unspecs are valid as "constants". */
7178 if (GET_CODE (inner) == UNSPEC)
7179 switch (XINT (inner, 1))
7180 {
dc4d7240 7181 case UNSPEC_GOT:
7dcbf659 7182 case UNSPEC_GOTOFF:
dc4d7240 7183 case UNSPEC_PLTOFF:
7dcbf659 7184 return TARGET_64BIT;
f996902d 7185 case UNSPEC_TPOFF:
fd4aca96
RH
7186 x = XVECEXP (inner, 0, 0);
7187 return (GET_CODE (x) == SYMBOL_REF
7188 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
f996902d
RH
7189 default:
7190 return false;
7191 }
5efb1046 7192 /* FALLTHRU */
f996902d
RH
7193
7194 case SYMBOL_REF:
7195 case LABEL_REF:
7196 return legitimate_pic_address_disp_p (x);
7197
7198 default:
7199 return true;
7200 }
7201}
7202
e075ae69
RH
7203/* Determine if a given CONST RTX is a valid memory displacement
7204 in PIC mode. */
0f290768 7205
59be65f6 7206int
8d531ab9 7207legitimate_pic_address_disp_p (rtx disp)
91bb873f 7208{
f996902d
RH
7209 bool saw_plus;
7210
6eb791fc
JH
7211 /* In 64bit mode we can allow direct addresses of symbols and labels
7212 when they are not dynamic symbols. */
c05dbe81
JH
7213 if (TARGET_64BIT)
7214 {
fd4aca96
RH
7215 rtx op0 = disp, op1;
7216
7217 switch (GET_CODE (disp))
a132b6a8 7218 {
fd4aca96
RH
7219 case LABEL_REF:
7220 return true;
7221
7222 case CONST:
7223 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7224 break;
7225 op0 = XEXP (XEXP (disp, 0), 0);
7226 op1 = XEXP (XEXP (disp, 0), 1);
7656aee4 7227 if (!CONST_INT_P (op1)
fd4aca96
RH
7228 || INTVAL (op1) >= 16*1024*1024
7229 || INTVAL (op1) < -16*1024*1024)
f7288899 7230 break;
fd4aca96
RH
7231 if (GET_CODE (op0) == LABEL_REF)
7232 return true;
7233 if (GET_CODE (op0) != SYMBOL_REF)
7234 break;
7235 /* FALLTHRU */
a132b6a8 7236
fd4aca96 7237 case SYMBOL_REF:
a132b6a8 7238 /* TLS references should always be enclosed in UNSPEC. */
fd4aca96
RH
7239 if (SYMBOL_REF_TLS_MODEL (op0))
7240 return false;
dc4d7240
JH
7241 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7242 && ix86_cmodel != CM_LARGE_PIC)
fd4aca96
RH
7243 return true;
7244 break;
7245
7246 default:
7247 break;
a132b6a8 7248 }
c05dbe81 7249 }
91bb873f
RH
7250 if (GET_CODE (disp) != CONST)
7251 return 0;
7252 disp = XEXP (disp, 0);
7253
6eb791fc
JH
7254 if (TARGET_64BIT)
7255 {
7256 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7257 of GOT tables. We should not need these anyway. */
7258 if (GET_CODE (disp) != UNSPEC
7dcbf659 7259 || (XINT (disp, 1) != UNSPEC_GOTPCREL
dc4d7240
JH
7260 && XINT (disp, 1) != UNSPEC_GOTOFF
7261 && XINT (disp, 1) != UNSPEC_PLTOFF))
6eb791fc
JH
7262 return 0;
7263
7264 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7265 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7266 return 0;
7267 return 1;
7268 }
7269
f996902d 7270 saw_plus = false;
91bb873f
RH
7271 if (GET_CODE (disp) == PLUS)
7272 {
7656aee4 7273 if (!CONST_INT_P (XEXP (disp, 1)))
91bb873f
RH
7274 return 0;
7275 disp = XEXP (disp, 0);
f996902d 7276 saw_plus = true;
91bb873f
RH
7277 }
7278
828a4fe4
MS
7279 if (TARGET_MACHO && darwin_local_data_pic (disp))
7280 return 1;
b069de3b 7281
8ee41eaf 7282 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
7283 return 0;
7284
623fe810
RH
7285 switch (XINT (disp, 1))
7286 {
8ee41eaf 7287 case UNSPEC_GOT:
f996902d
RH
7288 if (saw_plus)
7289 return false;
170bdaba
RS
7290 /* We need to check for both symbols and labels because VxWorks loads
7291 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7292 details. */
7293 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7294 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8ee41eaf 7295 case UNSPEC_GOTOFF:
47efdea4
JH
7296 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7297 While ABI specify also 32bit relocation but we don't produce it in
7298 small PIC model at all. */
7299 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7300 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7301 && !TARGET_64BIT)
170bdaba 7302 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
799b33a0 7303 return false;
f996902d 7304 case UNSPEC_GOTTPOFF:
dea73790
JJ
7305 case UNSPEC_GOTNTPOFF:
7306 case UNSPEC_INDNTPOFF:
f996902d
RH
7307 if (saw_plus)
7308 return false;
fd4aca96
RH
7309 disp = XVECEXP (disp, 0, 0);
7310 return (GET_CODE (disp) == SYMBOL_REF
7311 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
f996902d 7312 case UNSPEC_NTPOFF:
fd4aca96
RH
7313 disp = XVECEXP (disp, 0, 0);
7314 return (GET_CODE (disp) == SYMBOL_REF
7315 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
f996902d 7316 case UNSPEC_DTPOFF:
fd4aca96
RH
7317 disp = XVECEXP (disp, 0, 0);
7318 return (GET_CODE (disp) == SYMBOL_REF
7319 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
623fe810 7320 }
fce5a9f2 7321
623fe810 7322 return 0;
91bb873f
RH
7323}
7324
e075ae69
RH
7325/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7326 memory address for an instruction. The MODE argument is the machine mode
7327 for the MEM expression that wants to use this address.
7328
7329 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7330 convert common non-canonical forms to canonical form so that they will
7331 be recognized. */
7332
3b3c6a3f 7333int
ee2f65b4
RH
7334legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7335 rtx addr, int strict)
3b3c6a3f 7336{
e075ae69
RH
7337 struct ix86_address parts;
7338 rtx base, index, disp;
7339 HOST_WIDE_INT scale;
7340 const char *reason = NULL;
7341 rtx reason_rtx = NULL_RTX;
3b3c6a3f 7342
b446e5a2 7343 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 7344 {
e075ae69 7345 reason = "decomposition failed";
50e60bc3 7346 goto report_error;
3b3c6a3f
MM
7347 }
7348
e075ae69
RH
7349 base = parts.base;
7350 index = parts.index;
7351 disp = parts.disp;
7352 scale = parts.scale;
91f0226f 7353
e075ae69 7354 /* Validate base register.
e9a25f70 7355
7c93c2cc
PB
7356 Don't allow SUBREG's that span more than a word here. It can lead to spill
7357 failures when the base is one word out of a two word structure, which is
7358 represented internally as a DImode int. */
e9a25f70 7359
3b3c6a3f
MM
7360 if (base)
7361 {
7c93c2cc 7362 rtx reg;
e075ae69 7363 reason_rtx = base;
5656a184 7364
7c93c2cc
PB
7365 if (REG_P (base))
7366 reg = base;
7367 else if (GET_CODE (base) == SUBREG
7368 && REG_P (SUBREG_REG (base))
7369 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7370 <= UNITS_PER_WORD)
7371 reg = SUBREG_REG (base);
7372 else
3b3c6a3f 7373 {
e075ae69 7374 reason = "base is not a register";
50e60bc3 7375 goto report_error;
3b3c6a3f
MM
7376 }
7377
c954bd01
RH
7378 if (GET_MODE (base) != Pmode)
7379 {
e075ae69 7380 reason = "base is not in Pmode";
50e60bc3 7381 goto report_error;
c954bd01
RH
7382 }
7383
7c93c2cc
PB
7384 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7385 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 7386 {
e075ae69 7387 reason = "base is not valid";
50e60bc3 7388 goto report_error;
3b3c6a3f
MM
7389 }
7390 }
7391
e075ae69 7392 /* Validate index register.
e9a25f70 7393
7c93c2cc 7394 Don't allow SUBREG's that span more than a word here -- same as above. */
e075ae69
RH
7395
7396 if (index)
3b3c6a3f 7397 {
7c93c2cc 7398 rtx reg;
e075ae69
RH
7399 reason_rtx = index;
7400
7c93c2cc
PB
7401 if (REG_P (index))
7402 reg = index;
7403 else if (GET_CODE (index) == SUBREG
7404 && REG_P (SUBREG_REG (index))
7405 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7406 <= UNITS_PER_WORD)
7407 reg = SUBREG_REG (index);
7408 else
3b3c6a3f 7409 {
e075ae69 7410 reason = "index is not a register";
50e60bc3 7411 goto report_error;
3b3c6a3f
MM
7412 }
7413
e075ae69 7414 if (GET_MODE (index) != Pmode)
c954bd01 7415 {
e075ae69 7416 reason = "index is not in Pmode";
50e60bc3 7417 goto report_error;
c954bd01
RH
7418 }
7419
7c93c2cc
PB
7420 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7421 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 7422 {
e075ae69 7423 reason = "index is not valid";
50e60bc3 7424 goto report_error;
3b3c6a3f
MM
7425 }
7426 }
3b3c6a3f 7427
e075ae69
RH
7428 /* Validate scale factor. */
7429 if (scale != 1)
3b3c6a3f 7430 {
e075ae69
RH
7431 reason_rtx = GEN_INT (scale);
7432 if (!index)
3b3c6a3f 7433 {
e075ae69 7434 reason = "scale without index";
50e60bc3 7435 goto report_error;
3b3c6a3f
MM
7436 }
7437
e075ae69 7438 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 7439 {
e075ae69 7440 reason = "scale is not a valid multiplier";
50e60bc3 7441 goto report_error;
3b3c6a3f
MM
7442 }
7443 }
7444
91bb873f 7445 /* Validate displacement. */
3b3c6a3f
MM
7446 if (disp)
7447 {
e075ae69
RH
7448 reason_rtx = disp;
7449
f996902d
RH
7450 if (GET_CODE (disp) == CONST
7451 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7452 switch (XINT (XEXP (disp, 0), 1))
7453 {
47efdea4
JH
7454 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7455 used. While ABI specify also 32bit relocations, we don't produce
7456 them at all and use IP relative instead. */
f996902d
RH
7457 case UNSPEC_GOT:
7458 case UNSPEC_GOTOFF:
47efdea4
JH
7459 gcc_assert (flag_pic);
7460 if (!TARGET_64BIT)
7461 goto is_legitimate_pic;
7462 reason = "64bit address unspec";
7463 goto report_error;
5656a184 7464
f996902d 7465 case UNSPEC_GOTPCREL:
d0396b79 7466 gcc_assert (flag_pic);
f996902d
RH
7467 goto is_legitimate_pic;
7468
7469 case UNSPEC_GOTTPOFF:
dea73790
JJ
7470 case UNSPEC_GOTNTPOFF:
7471 case UNSPEC_INDNTPOFF:
f996902d
RH
7472 case UNSPEC_NTPOFF:
7473 case UNSPEC_DTPOFF:
7474 break;
7475
7476 default:
7477 reason = "invalid address unspec";
7478 goto report_error;
7479 }
7480
f7288899
EC
7481 else if (SYMBOLIC_CONST (disp)
7482 && (flag_pic
7483 || (TARGET_MACHO
b069de3b 7484#if TARGET_MACHO
f7288899
EC
7485 && MACHOPIC_INDIRECT
7486 && !machopic_operand_p (disp)
b069de3b 7487#endif
f7288899 7488 )))
3b3c6a3f 7489 {
f7288899 7490
f996902d 7491 is_legitimate_pic:
0d7d98ee
JH
7492 if (TARGET_64BIT && (index || base))
7493 {
75d38379
JJ
7494 /* foo@dtpoff(%rX) is ok. */
7495 if (GET_CODE (disp) != CONST
7496 || GET_CODE (XEXP (disp, 0)) != PLUS
7497 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7656aee4 7498 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
75d38379
JJ
7499 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7500 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7501 {
7502 reason = "non-constant pic memory reference";
7503 goto report_error;
7504 }
0d7d98ee 7505 }
75d38379 7506 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 7507 {
e075ae69 7508 reason = "displacement is an invalid pic construct";
50e60bc3 7509 goto report_error;
91bb873f
RH
7510 }
7511
4e9efe54 7512 /* This code used to verify that a symbolic pic displacement
0f290768
KH
7513 includes the pic_offset_table_rtx register.
7514
4e9efe54
JH
7515 While this is good idea, unfortunately these constructs may
7516 be created by "adds using lea" optimization for incorrect
7517 code like:
7518
7519 int a;
7520 int foo(int i)
7521 {
7522 return *(&a+i);
7523 }
7524
50e60bc3 7525 This code is nonsensical, but results in addressing
4e9efe54 7526 GOT table with pic_offset_table_rtx base. We can't
f710504c 7527 just refuse it easily, since it gets matched by
4e9efe54
JH
7528 "addsi3" pattern, that later gets split to lea in the
7529 case output register differs from input. While this
7530 can be handled by separate addsi pattern for this case
7531 that never results in lea, this seems to be easier and
7532 correct fix for crash to disable this test. */
3b3c6a3f 7533 }
a94f136b 7534 else if (GET_CODE (disp) != LABEL_REF
7656aee4 7535 && !CONST_INT_P (disp)
a94f136b
JH
7536 && (GET_CODE (disp) != CONST
7537 || !legitimate_constant_p (disp))
7538 && (GET_CODE (disp) != SYMBOL_REF
7539 || !legitimate_constant_p (disp)))
f996902d
RH
7540 {
7541 reason = "displacement is not constant";
7542 goto report_error;
7543 }
8fe75e43
RH
7544 else if (TARGET_64BIT
7545 && !x86_64_immediate_operand (disp, VOIDmode))
c05dbe81
JH
7546 {
7547 reason = "displacement is out of range";
7548 goto report_error;
7549 }
3b3c6a3f
MM
7550 }
7551
e075ae69 7552 /* Everything looks valid. */
3b3c6a3f 7553 return TRUE;
e075ae69 7554
5bf0ebab 7555 report_error:
e075ae69 7556 return FALSE;
3b3c6a3f 7557}
3b3c6a3f 7558\f
569b7f6a 7559/* Return a unique alias set for the GOT. */
55efb413 7560
4862826d 7561static alias_set_type
b96a374d 7562ix86_GOT_alias_set (void)
55efb413 7563{
4862826d 7564 static alias_set_type set = -1;
5bf0ebab
RH
7565 if (set == -1)
7566 set = new_alias_set ();
7567 return set;
0f290768 7568}
55efb413 7569
3b3c6a3f
MM
7570/* Return a legitimate reference for ORIG (an address) using the
7571 register REG. If REG is 0, a new pseudo is generated.
7572
91bb873f 7573 There are two types of references that must be handled:
3b3c6a3f
MM
7574
7575 1. Global data references must load the address from the GOT, via
7576 the PIC reg. An insn is emitted to do this load, and the reg is
7577 returned.
7578
91bb873f
RH
7579 2. Static data references, constant pool addresses, and code labels
7580 compute the address as an offset from the GOT, whose base is in
2ae5ae57 7581 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
7582 differentiate them from global data objects. The returned
7583 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
7584
7585 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 7586 reg also appears in the address. */
3b3c6a3f 7587
b39edae3 7588static rtx
b96a374d 7589legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
7590{
7591 rtx addr = orig;
9415ab7d 7592 rtx new_rtx = orig;
91bb873f 7593 rtx base;
3b3c6a3f 7594
b069de3b 7595#if TARGET_MACHO
f7288899
EC
7596 if (TARGET_MACHO && !TARGET_64BIT)
7597 {
7598 if (reg == 0)
7599 reg = gen_reg_rtx (Pmode);
7600 /* Use the generic Mach-O PIC machinery. */
7601 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7602 }
b069de3b
SS
7603#endif
7604
c05dbe81 7605 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9415ab7d 7606 new_rtx = addr;
7dcbf659
JH
7607 else if (TARGET_64BIT
7608 && ix86_cmodel != CM_SMALL_PIC
170bdaba 7609 && gotoff_operand (addr, Pmode))
7dcbf659
JH
7610 {
7611 rtx tmpreg;
7612 /* This symbol may be referenced via a displacement from the PIC
7613 base address (@GOTOFF). */
7614
7615 if (reload_in_progress)
6fb5fa3c 7616 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7dcbf659
JH
7617 if (GET_CODE (addr) == CONST)
7618 addr = XEXP (addr, 0);
7619 if (GET_CODE (addr) == PLUS)
7620 {
9415ab7d
TN
7621 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7622 UNSPEC_GOTOFF);
7623 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7dcbf659
JH
7624 }
7625 else
9415ab7d
TN
7626 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7627 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7dcbf659
JH
7628 if (!reg)
7629 tmpreg = gen_reg_rtx (Pmode);
7630 else
7631 tmpreg = reg;
9415ab7d 7632 emit_move_insn (tmpreg, new_rtx);
7dcbf659
JH
7633
7634 if (reg != 0)
7635 {
9415ab7d
TN
7636 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7637 tmpreg, 1, OPTAB_DIRECT);
7638 new_rtx = reg;
7dcbf659 7639 }
9415ab7d 7640 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7dcbf659 7641 }
170bdaba 7642 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
3b3c6a3f 7643 {
c05dbe81
JH
7644 /* This symbol may be referenced via a displacement from the PIC
7645 base address (@GOTOFF). */
3b3c6a3f 7646
c05dbe81 7647 if (reload_in_progress)
6fb5fa3c 7648 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
799b33a0
JH
7649 if (GET_CODE (addr) == CONST)
7650 addr = XEXP (addr, 0);
7651 if (GET_CODE (addr) == PLUS)
7652 {
9415ab7d
TN
7653 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7654 UNSPEC_GOTOFF);
7655 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
799b33a0
JH
7656 }
7657 else
9415ab7d
TN
7658 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7659 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7660 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3b3c6a3f 7661
c05dbe81
JH
7662 if (reg != 0)
7663 {
9415ab7d
TN
7664 emit_move_insn (reg, new_rtx);
7665 new_rtx = reg;
c05dbe81 7666 }
3b3c6a3f 7667 }
170bdaba
RS
7668 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7669 /* We can't use @GOTOFF for text labels on VxWorks;
7670 see gotoff_operand. */
7671 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
3b3c6a3f 7672 {
8502420b
KT
7673 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7674 {
7675 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7676 return legitimize_dllimport_symbol (addr, true);
7677 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7678 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7679 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7680 {
7681 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7682 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7683 }
7684 }
ccf8e764 7685
dc4d7240 7686 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
14f73b5a 7687 {
9415ab7d
TN
7688 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7689 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7690 new_rtx = gen_const_mem (Pmode, new_rtx);
7691 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
14f73b5a
JH
7692
7693 if (reg == 0)
7694 reg = gen_reg_rtx (Pmode);
7695 /* Use directly gen_movsi, otherwise the address is loaded
7696 into register for CSE. We don't want to CSE this addresses,
7697 instead we CSE addresses from the GOT table, so skip this. */
9415ab7d
TN
7698 emit_insn (gen_movsi (reg, new_rtx));
7699 new_rtx = reg;
14f73b5a
JH
7700 }
7701 else
7702 {
7703 /* This symbol must be referenced via a load from the
7704 Global Offset Table (@GOT). */
3b3c6a3f 7705
66edd3b4 7706 if (reload_in_progress)
6fb5fa3c 7707 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9415ab7d
TN
7708 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7709 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
dc4d7240 7710 if (TARGET_64BIT)
9415ab7d
TN
7711 new_rtx = force_reg (Pmode, new_rtx);
7712 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7713 new_rtx = gen_const_mem (Pmode, new_rtx);
7714 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
3b3c6a3f 7715
14f73b5a
JH
7716 if (reg == 0)
7717 reg = gen_reg_rtx (Pmode);
9415ab7d
TN
7718 emit_move_insn (reg, new_rtx);
7719 new_rtx = reg;
14f73b5a 7720 }
0f290768 7721 }
91bb873f
RH
7722 else
7723 {
7656aee4 7724 if (CONST_INT_P (addr)
d8ff1871
JH
7725 && !x86_64_immediate_operand (addr, VOIDmode))
7726 {
7727 if (reg)
7728 {
7729 emit_move_insn (reg, addr);
9415ab7d 7730 new_rtx = reg;
d8ff1871
JH
7731 }
7732 else
9415ab7d 7733 new_rtx = force_reg (Pmode, addr);
d8ff1871
JH
7734 }
7735 else if (GET_CODE (addr) == CONST)
3b3c6a3f 7736 {
91bb873f 7737 addr = XEXP (addr, 0);
e3c8ea67
RH
7738
7739 /* We must match stuff we generate before. Assume the only
7740 unspecs that can get here are ours. Not that we could do
43f3a59d 7741 anything with them anyway.... */
e3c8ea67
RH
7742 if (GET_CODE (addr) == UNSPEC
7743 || (GET_CODE (addr) == PLUS
7744 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7745 return orig;
d0396b79 7746 gcc_assert (GET_CODE (addr) == PLUS);
3b3c6a3f 7747 }
91bb873f
RH
7748 if (GET_CODE (addr) == PLUS)
7749 {
7750 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 7751
91bb873f
RH
7752 /* Check first to see if this is a constant offset from a @GOTOFF
7753 symbol reference. */
170bdaba 7754 if (gotoff_operand (op0, Pmode)
7656aee4 7755 && CONST_INT_P (op1))
91bb873f 7756 {
6eb791fc
JH
7757 if (!TARGET_64BIT)
7758 {
66edd3b4 7759 if (reload_in_progress)
6fb5fa3c 7760 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9415ab7d
TN
7761 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7762 UNSPEC_GOTOFF);
7763 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7764 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7765 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
91bb873f 7766
6eb791fc
JH
7767 if (reg != 0)
7768 {
9415ab7d
TN
7769 emit_move_insn (reg, new_rtx);
7770 new_rtx = reg;
6eb791fc
JH
7771 }
7772 }
7773 else
91bb873f 7774 {
75d38379
JJ
7775 if (INTVAL (op1) < -16*1024*1024
7776 || INTVAL (op1) >= 16*1024*1024)
a7297856
ILT
7777 {
7778 if (!x86_64_immediate_operand (op1, Pmode))
7779 op1 = force_reg (Pmode, op1);
9415ab7d 7780 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
a7297856 7781 }
91bb873f
RH
7782 }
7783 }
7784 else
7785 {
7786 base = legitimize_pic_address (XEXP (addr, 0), reg);
9415ab7d
TN
7787 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7788 base == reg ? NULL_RTX : reg);
91bb873f 7789
9415ab7d
TN
7790 if (CONST_INT_P (new_rtx))
7791 new_rtx = plus_constant (base, INTVAL (new_rtx));
91bb873f
RH
7792 else
7793 {
9415ab7d 7794 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
91bb873f 7795 {
9415ab7d
TN
7796 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7797 new_rtx = XEXP (new_rtx, 1);
91bb873f 7798 }
9415ab7d 7799 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
91bb873f
RH
7800 }
7801 }
7802 }
3b3c6a3f 7803 }
9415ab7d 7804 return new_rtx;
3b3c6a3f
MM
7805}
7806\f
74dc3e94 7807/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
7808
7809static rtx
b96a374d 7810get_thread_pointer (int to_reg)
f996902d 7811{
74dc3e94 7812 rtx tp, reg, insn;
f996902d
RH
7813
7814 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
7815 if (!to_reg)
7816 return tp;
f996902d 7817
74dc3e94
RH
7818 reg = gen_reg_rtx (Pmode);
7819 insn = gen_rtx_SET (VOIDmode, reg, tp);
7820 insn = emit_insn (insn);
7821
7822 return reg;
7823}
7824
7825/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7826 false if we expect this to be used for a memory address and true if
7827 we expect to load the address into a register. */
7828
7829static rtx
b96a374d 7830legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94 7831{
5bf5a10b 7832 rtx dest, base, off, pic, tp;
74dc3e94
RH
7833 int type;
7834
7835 switch (model)
7836 {
7837 case TLS_MODEL_GLOBAL_DYNAMIC:
7838 dest = gen_reg_rtx (Pmode);
5bf5a10b
AO
7839 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7840
7841 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94 7842 {
29b74761 7843 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
74dc3e94
RH
7844
7845 start_sequence ();
7846 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7847 insns = get_insns ();
7848 end_sequence ();
7849
2d79fde8 7850 CONST_OR_PURE_CALL_P (insns) = 1;
74dc3e94
RH
7851 emit_libcall_block (insns, dest, rax, x);
7852 }
5bf5a10b
AO
7853 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7854 emit_insn (gen_tls_global_dynamic_64 (dest, x));
74dc3e94
RH
7855 else
7856 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5bf5a10b
AO
7857
7858 if (TARGET_GNU2_TLS)
7859 {
7860 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7861
7862 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7863 }
74dc3e94
RH
7864 break;
7865
7866 case TLS_MODEL_LOCAL_DYNAMIC:
7867 base = gen_reg_rtx (Pmode);
5bf5a10b
AO
7868 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7869
7870 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94 7871 {
29b74761 7872 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
74dc3e94
RH
7873
7874 start_sequence ();
7875 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7876 insns = get_insns ();
7877 end_sequence ();
7878
7879 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7880 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
2d79fde8 7881 CONST_OR_PURE_CALL_P (insns) = 1;
74dc3e94
RH
7882 emit_libcall_block (insns, base, rax, note);
7883 }
5bf5a10b
AO
7884 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7885 emit_insn (gen_tls_local_dynamic_base_64 (base));
74dc3e94
RH
7886 else
7887 emit_insn (gen_tls_local_dynamic_base_32 (base));
7888
5bf5a10b
AO
7889 if (TARGET_GNU2_TLS)
7890 {
7891 rtx x = ix86_tls_module_base ();
7892
31ebc801
AO
7893 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7894 gen_rtx_MINUS (Pmode, x, tp));
5bf5a10b
AO
7895 }
7896
74dc3e94
RH
7897 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7898 off = gen_rtx_CONST (Pmode, off);
7899
5bf5a10b 7900 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
31ebc801
AO
7901
7902 if (TARGET_GNU2_TLS)
7903 {
7904 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7905
7906 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7907 }
7908
5bf5a10b 7909 break;
74dc3e94
RH
7910
7911 case TLS_MODEL_INITIAL_EXEC:
7912 if (TARGET_64BIT)
7913 {
7914 pic = NULL;
7915 type = UNSPEC_GOTNTPOFF;
7916 }
7917 else if (flag_pic)
7918 {
7919 if (reload_in_progress)
6fb5fa3c 7920 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
74dc3e94 7921 pic = pic_offset_table_rtx;
5bf5a10b 7922 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
74dc3e94 7923 }
5bf5a10b 7924 else if (!TARGET_ANY_GNU_TLS)
74dc3e94
RH
7925 {
7926 pic = gen_reg_rtx (Pmode);
7927 emit_insn (gen_set_got (pic));
7928 type = UNSPEC_GOTTPOFF;
7929 }
7930 else
7931 {
7932 pic = NULL;
7933 type = UNSPEC_INDNTPOFF;
7934 }
7935
7936 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7937 off = gen_rtx_CONST (Pmode, off);
7938 if (pic)
7939 off = gen_rtx_PLUS (Pmode, pic, off);
542a8afa 7940 off = gen_const_mem (Pmode, off);
74dc3e94
RH
7941 set_mem_alias_set (off, ix86_GOT_alias_set ());
7942
5bf5a10b 7943 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
7944 {
7945 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7946 off = force_reg (Pmode, off);
7947 return gen_rtx_PLUS (Pmode, base, off);
7948 }
7949 else
7950 {
7951 base = get_thread_pointer (true);
7952 dest = gen_reg_rtx (Pmode);
7953 emit_insn (gen_subsi3 (dest, base, off));
7954 }
7955 break;
7956
7957 case TLS_MODEL_LOCAL_EXEC:
7958 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5bf5a10b 7959 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
7960 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7961 off = gen_rtx_CONST (Pmode, off);
7962
5bf5a10b 7963 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
7964 {
7965 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7966 return gen_rtx_PLUS (Pmode, base, off);
7967 }
7968 else
7969 {
7970 base = get_thread_pointer (true);
7971 dest = gen_reg_rtx (Pmode);
7972 emit_insn (gen_subsi3 (dest, base, off));
7973 }
7974 break;
7975
7976 default:
d0396b79 7977 gcc_unreachable ();
74dc3e94
RH
7978 }
7979
7980 return dest;
f996902d 7981}
fce5a9f2 7982
da489f73
RH
7983/* Create or return the unique __imp_DECL dllimport symbol corresponding
7984 to symbol DECL. */
7985
7986static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7987 htab_t dllimport_map;
7988
7989static tree
7990get_dllimport_decl (tree decl)
7991{
7992 struct tree_map *h, in;
7993 void **loc;
7994 const char *name;
7995 const char *prefix;
7996 size_t namelen, prefixlen;
7997 char *imp_name;
7998 tree to;
7999 rtx rtl;
8000
8001 if (!dllimport_map)
8002 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8003
8004 in.hash = htab_hash_pointer (decl);
8005 in.base.from = decl;
8006 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9415ab7d 8007 h = (struct tree_map *) *loc;
da489f73
RH
8008 if (h)
8009 return h->to;
8010
9415ab7d 8011 *loc = h = GGC_NEW (struct tree_map);
da489f73
RH
8012 h->hash = in.hash;
8013 h->base.from = decl;
8014 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8015 DECL_ARTIFICIAL (to) = 1;
8016 DECL_IGNORED_P (to) = 1;
8017 DECL_EXTERNAL (to) = 1;
8018 TREE_READONLY (to) = 1;
8019
8020 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8021 name = targetm.strip_name_encoding (name);
e599ac2b 8022 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
da489f73
RH
8023 namelen = strlen (name);
8024 prefixlen = strlen (prefix);
9415ab7d 8025 imp_name = (char *) alloca (namelen + prefixlen + 1);
da489f73
RH
8026 memcpy (imp_name, prefix, prefixlen);
8027 memcpy (imp_name + prefixlen, name, namelen + 1);
8028
8029 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8030 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8031 SET_SYMBOL_REF_DECL (rtl, to);
8032 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8033
8034 rtl = gen_const_mem (Pmode, rtl);
8035 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8036
8037 SET_DECL_RTL (to, rtl);
18ff3013 8038 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
da489f73
RH
8039
8040 return to;
8041}
8042
8043/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8044 true if we require the result be a register. */
8045
8046static rtx
8047legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8048{
8049 tree imp_decl;
8050 rtx x;
8051
8052 gcc_assert (SYMBOL_REF_DECL (symbol));
8053 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8054
8055 x = DECL_RTL (imp_decl);
8056 if (want_reg)
8057 x = force_reg (Pmode, x);
8058 return x;
8059}
8060
3b3c6a3f
MM
8061/* Try machine-dependent ways of modifying an illegitimate address
8062 to be legitimate. If we find one, return the new, valid address.
8063 This macro is used in only one place: `memory_address' in explow.c.
8064
8065 OLDX is the address as it was before break_out_memory_refs was called.
8066 In some cases it is useful to look at this to decide what needs to be done.
8067
8068 MODE and WIN are passed so that this macro can use
8069 GO_IF_LEGITIMATE_ADDRESS.
8070
8071 It is always safe for this macro to do nothing. It exists to recognize
8072 opportunities to optimize the output.
8073
8074 For the 80386, we handle X+REG by loading X into a register R and
8075 using R+REG. R will go in a general reg and indexing will be used.
8076 However, if REG is a broken-out memory address or multiplication,
8077 nothing needs to be done because REG can certainly go in a general reg.
8078
8079 When -fpic is used, special handling is needed for symbolic references.
8080 See comments by legitimize_pic_address in i386.c for details. */
8081
8082rtx
8d531ab9 8083legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
8084{
8085 int changed = 0;
8086 unsigned log;
8087
8fe75e43 8088 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
f996902d 8089 if (log)
9415ab7d 8090 return legitimize_tls_address (x, (enum tls_model) log, false);
b39edae3
RH
8091 if (GET_CODE (x) == CONST
8092 && GET_CODE (XEXP (x, 0)) == PLUS
8fe75e43
RH
8093 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8094 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
b39edae3 8095 {
9415ab7d
TN
8096 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8097 (enum tls_model) log, false);
b39edae3
RH
8098 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8099 }
f996902d 8100
da489f73
RH
8101 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8102 {
8103 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8104 return legitimize_dllimport_symbol (x, true);
8105 if (GET_CODE (x) == CONST
8106 && GET_CODE (XEXP (x, 0)) == PLUS
8107 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8108 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8109 {
8110 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8111 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8112 }
8113 }
8114
ea2666ba
KT
8115 if (flag_pic && SYMBOLIC_CONST (x))
8116 return legitimize_pic_address (x, 0);
8117
3b3c6a3f
MM
8118 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8119 if (GET_CODE (x) == ASHIFT
7656aee4 8120 && CONST_INT_P (XEXP (x, 1))
85b583d3 8121 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
3b3c6a3f
MM
8122 {
8123 changed = 1;
85b583d3 8124 log = INTVAL (XEXP (x, 1));
a269a03c
JC
8125 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8126 GEN_INT (1 << log));
3b3c6a3f
MM
8127 }
8128
8129 if (GET_CODE (x) == PLUS)
8130 {
0f290768 8131 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 8132
3b3c6a3f 8133 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7656aee4 8134 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
85b583d3 8135 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
3b3c6a3f
MM
8136 {
8137 changed = 1;
85b583d3 8138 log = INTVAL (XEXP (XEXP (x, 0), 1));
c5c76735
JL
8139 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8140 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8141 GEN_INT (1 << log));
3b3c6a3f
MM
8142 }
8143
8144 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7656aee4 8145 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
85b583d3 8146 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
3b3c6a3f
MM
8147 {
8148 changed = 1;
85b583d3 8149 log = INTVAL (XEXP (XEXP (x, 1), 1));
c5c76735
JL
8150 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8151 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8152 GEN_INT (1 << log));
3b3c6a3f
MM
8153 }
8154
0f290768 8155 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
8156 if (GET_CODE (XEXP (x, 1)) == MULT)
8157 {
8158 rtx tmp = XEXP (x, 0);
8159 XEXP (x, 0) = XEXP (x, 1);
8160 XEXP (x, 1) = tmp;
8161 changed = 1;
8162 }
8163
8164 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8165 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8166 created by virtual register instantiation, register elimination, and
8167 similar optimizations. */
8168 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8169 {
8170 changed = 1;
c5c76735
JL
8171 x = gen_rtx_PLUS (Pmode,
8172 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8173 XEXP (XEXP (x, 1), 0)),
8174 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
8175 }
8176
e9a25f70
JL
8177 /* Canonicalize
8178 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
8179 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8180 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8181 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8182 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8183 && CONSTANT_P (XEXP (x, 1)))
8184 {
00c79232
ML
8185 rtx constant;
8186 rtx other = NULL_RTX;
3b3c6a3f 8187
7656aee4 8188 if (CONST_INT_P (XEXP (x, 1)))
3b3c6a3f
MM
8189 {
8190 constant = XEXP (x, 1);
8191 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8192 }
7656aee4 8193 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
3b3c6a3f
MM
8194 {
8195 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8196 other = XEXP (x, 1);
8197 }
8198 else
8199 constant = 0;
8200
8201 if (constant)
8202 {
8203 changed = 1;
c5c76735
JL
8204 x = gen_rtx_PLUS (Pmode,
8205 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8206 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8207 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
8208 }
8209 }
8210
8211 if (changed && legitimate_address_p (mode, x, FALSE))
8212 return x;
8213
8214 if (GET_CODE (XEXP (x, 0)) == MULT)
8215 {
8216 changed = 1;
8217 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8218 }
8219
8220 if (GET_CODE (XEXP (x, 1)) == MULT)
8221 {
8222 changed = 1;
8223 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8224 }
8225
8226 if (changed
7656aee4
UB
8227 && REG_P (XEXP (x, 1))
8228 && REG_P (XEXP (x, 0)))
3b3c6a3f
MM
8229 return x;
8230
8231 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8232 {
8233 changed = 1;
8234 x = legitimize_pic_address (x, 0);
8235 }
8236
8237 if (changed && legitimate_address_p (mode, x, FALSE))
8238 return x;
8239
7656aee4 8240 if (REG_P (XEXP (x, 0)))
3b3c6a3f 8241 {
8d531ab9
KH
8242 rtx temp = gen_reg_rtx (Pmode);
8243 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
8244 if (val != temp)
8245 emit_move_insn (temp, val);
8246
8247 XEXP (x, 1) = temp;
8248 return x;
8249 }
8250
7656aee4 8251 else if (REG_P (XEXP (x, 1)))
3b3c6a3f 8252 {
8d531ab9
KH
8253 rtx temp = gen_reg_rtx (Pmode);
8254 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
8255 if (val != temp)
8256 emit_move_insn (temp, val);
8257
8258 XEXP (x, 0) = temp;
8259 return x;
8260 }
8261 }
8262
8263 return x;
8264}
2a2ab3f9
JVA
8265\f
8266/* Print an integer constant expression in assembler syntax. Addition
8267 and subtraction are the only arithmetic that may appear in these
8268 expressions. FILE is the stdio stream to write to, X is the rtx, and
8269 CODE is the operand print code from the output string. */
8270
8271static void
b96a374d 8272output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
8273{
8274 char buf[256];
8275
8276 switch (GET_CODE (x))
8277 {
8278 case PC:
d0396b79
NS
8279 gcc_assert (flag_pic);
8280 putc ('.', file);
2a2ab3f9
JVA
8281 break;
8282
8283 case SYMBOL_REF:
320ce1d3
MS
8284 if (! TARGET_MACHO || TARGET_64BIT)
8285 output_addr_const (file, x);
8286 else
8287 {
8288 const char *name = XSTR (x, 0);
8289
ccf8e764
RH
8290 /* Mark the decl as referenced so that cgraph will
8291 output the function. */
320ce1d3
MS
8292 if (SYMBOL_REF_DECL (x))
8293 mark_decl_referenced (SYMBOL_REF_DECL (x));
8294
320ce1d3 8295#if TARGET_MACHO
c88fc50c
MS
8296 if (MACHOPIC_INDIRECT
8297 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
320ce1d3 8298 name = machopic_indirection_name (x, /*stub_p=*/true);
c88fc50c 8299#endif
320ce1d3
MS
8300 assemble_name (file, name);
8301 }
ccf8e764
RH
8302 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8303 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 8304 fputs ("@PLT", file);
2a2ab3f9
JVA
8305 break;
8306
91bb873f
RH
8307 case LABEL_REF:
8308 x = XEXP (x, 0);
5efb1046 8309 /* FALLTHRU */
2a2ab3f9
JVA
8310 case CODE_LABEL:
8311 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8312 assemble_name (asm_out_file, buf);
8313 break;
8314
8315 case CONST_INT:
f64cecad 8316 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
8317 break;
8318
8319 case CONST:
8320 /* This used to output parentheses around the expression,
8321 but that does not work on the 386 (either ATT or BSD assembler). */
8322 output_pic_addr_const (file, XEXP (x, 0), code);
8323 break;
8324
8325 case CONST_DOUBLE:
8326 if (GET_MODE (x) == VOIDmode)
8327 {
8328 /* We can use %d if the number is <32 bits and positive. */
8329 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
8330 fprintf (file, "0x%lx%08lx",
8331 (unsigned long) CONST_DOUBLE_HIGH (x),
8332 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 8333 else
f64cecad 8334 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
8335 }
8336 else
8337 /* We can't handle floating point constants;
8338 PRINT_OPERAND must handle them. */
8339 output_operand_lossage ("floating constant misused");
8340 break;
8341
8342 case PLUS:
e9a25f70 8343 /* Some assemblers need integer constants to appear first. */
7656aee4 8344 if (CONST_INT_P (XEXP (x, 0)))
2a2ab3f9 8345 {
2a2ab3f9 8346 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 8347 putc ('+', file);
e9a25f70 8348 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 8349 }
5656a184 8350 else
2a2ab3f9 8351 {
7656aee4 8352 gcc_assert (CONST_INT_P (XEXP (x, 1)));
2a2ab3f9 8353 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 8354 putc ('+', file);
e9a25f70 8355 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9
JVA
8356 }
8357 break;
8358
8359 case MINUS:
b069de3b
SS
8360 if (!TARGET_MACHO)
8361 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 8362 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 8363 putc ('-', file);
2a2ab3f9 8364 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
8365 if (!TARGET_MACHO)
8366 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
8367 break;
8368
91bb873f 8369 case UNSPEC:
d0396b79 8370 gcc_assert (XVECLEN (x, 0) == 1);
91bb873f
RH
8371 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8372 switch (XINT (x, 1))
77ebd435 8373 {
8ee41eaf 8374 case UNSPEC_GOT:
77ebd435
AJ
8375 fputs ("@GOT", file);
8376 break;
8ee41eaf 8377 case UNSPEC_GOTOFF:
77ebd435
AJ
8378 fputs ("@GOTOFF", file);
8379 break;
dc4d7240
JH
8380 case UNSPEC_PLTOFF:
8381 fputs ("@PLTOFF", file);
8382 break;
8ee41eaf 8383 case UNSPEC_GOTPCREL:
9ad5e54f
RIL
8384 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8385 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
6eb791fc 8386 break;
f996902d 8387 case UNSPEC_GOTTPOFF:
dea73790 8388 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
8389 fputs ("@GOTTPOFF", file);
8390 break;
8391 case UNSPEC_TPOFF:
8392 fputs ("@TPOFF", file);
8393 break;
8394 case UNSPEC_NTPOFF:
75d38379
JJ
8395 if (TARGET_64BIT)
8396 fputs ("@TPOFF", file);
8397 else
8398 fputs ("@NTPOFF", file);
f996902d
RH
8399 break;
8400 case UNSPEC_DTPOFF:
8401 fputs ("@DTPOFF", file);
8402 break;
dea73790 8403 case UNSPEC_GOTNTPOFF:
75d38379 8404 if (TARGET_64BIT)
9ad5e54f
RIL
8405 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8406 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
75d38379
JJ
8407 else
8408 fputs ("@GOTNTPOFF", file);
dea73790
JJ
8409 break;
8410 case UNSPEC_INDNTPOFF:
8411 fputs ("@INDNTPOFF", file);
8412 break;
77ebd435
AJ
8413 default:
8414 output_operand_lossage ("invalid UNSPEC as operand");
8415 break;
8416 }
91bb873f
RH
8417 break;
8418
2a2ab3f9
JVA
8419 default:
8420 output_operand_lossage ("invalid expression as operand");
8421 }
8422}
1865dbb5 8423
fdbe66f2 8424/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
b9203463
RH
8425 We need to emit DTP-relative relocations. */
8426
2ed941ec 8427static void ATTRIBUTE_UNUSED
b96a374d 8428i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 8429{
75d38379
JJ
8430 fputs (ASM_LONG, file);
8431 output_addr_const (file, x);
8432 fputs ("@DTPOFF", file);
b9203463
RH
8433 switch (size)
8434 {
8435 case 4:
b9203463
RH
8436 break;
8437 case 8:
75d38379 8438 fputs (", 0", file);
b9203463 8439 break;
b9203463 8440 default:
d0396b79 8441 gcc_unreachable ();
b9203463 8442 }
b9203463
RH
8443}
8444
1865dbb5 8445/* In the name of slightly smaller debug output, and to cater to
aabcd309 8446 general assembler lossage, recognize PIC+GOTOFF and turn it back
5656a184 8447 into a direct symbol reference.
dbde310d
GK
8448
8449 On Darwin, this is necessary to avoid a crash, because Darwin
8450 has a different PIC label for each routine but the DWARF debugging
8451 information is not associated with any particular routine, so it's
8452 necessary to remove references to the PIC label from RTL stored by
8453 the DWARF output code. */
1865dbb5 8454
69bd9368 8455static rtx
b96a374d 8456ix86_delegitimize_address (rtx orig_x)
1865dbb5 8457{
dbde310d
GK
8458 rtx x = orig_x;
8459 /* reg_addend is NULL or a multiple of some register. */
8460 rtx reg_addend = NULL_RTX;
8461 /* const_addend is NULL or a const_int. */
8462 rtx const_addend = NULL_RTX;
8463 /* This is the result, or NULL. */
8464 rtx result = NULL_RTX;
1865dbb5 8465
7656aee4 8466 if (MEM_P (x))
4c8c0dec
JJ
8467 x = XEXP (x, 0);
8468
6eb791fc
JH
8469 if (TARGET_64BIT)
8470 {
8471 if (GET_CODE (x) != CONST
8472 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 8473 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7656aee4 8474 || !MEM_P (orig_x))
6eb791fc
JH
8475 return orig_x;
8476 return XVECEXP (XEXP (x, 0), 0, 0);
8477 }
8478
1865dbb5 8479 if (GET_CODE (x) != PLUS
1865dbb5
JM
8480 || GET_CODE (XEXP (x, 1)) != CONST)
8481 return orig_x;
8482
7656aee4 8483 if (REG_P (XEXP (x, 0))
ec65b2e3
JJ
8484 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8485 /* %ebx + GOT/GOTOFF */
dbde310d 8486 ;
ec65b2e3
JJ
8487 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8488 {
8489 /* %ebx + %reg * scale + GOT/GOTOFF */
dbde310d 8490 reg_addend = XEXP (x, 0);
7656aee4 8491 if (REG_P (XEXP (reg_addend, 0))
dbde310d
GK
8492 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8493 reg_addend = XEXP (reg_addend, 1);
7656aee4 8494 else if (REG_P (XEXP (reg_addend, 1))
dbde310d
GK
8495 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8496 reg_addend = XEXP (reg_addend, 0);
ec65b2e3
JJ
8497 else
8498 return orig_x;
7656aee4 8499 if (!REG_P (reg_addend)
dbde310d
GK
8500 && GET_CODE (reg_addend) != MULT
8501 && GET_CODE (reg_addend) != ASHIFT)
ec65b2e3
JJ
8502 return orig_x;
8503 }
8504 else
8505 return orig_x;
8506
1865dbb5 8507 x = XEXP (XEXP (x, 1), 0);
1865dbb5 8508 if (GET_CODE (x) == PLUS
7656aee4 8509 && CONST_INT_P (XEXP (x, 1)))
ec65b2e3 8510 {
dbde310d
GK
8511 const_addend = XEXP (x, 1);
8512 x = XEXP (x, 0);
ec65b2e3 8513 }
1865dbb5 8514
dbde310d 8515 if (GET_CODE (x) == UNSPEC
7656aee4
UB
8516 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8517 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
dbde310d
GK
8518 result = XVECEXP (x, 0, 0);
8519
7931b1be 8520 if (TARGET_MACHO && darwin_local_data_pic (x)
7656aee4 8521 && !MEM_P (orig_x))
dbde310d
GK
8522 result = XEXP (x, 0);
8523
8524 if (! result)
8525 return orig_x;
5656a184 8526
dbde310d
GK
8527 if (const_addend)
8528 result = gen_rtx_PLUS (Pmode, result, const_addend);
8529 if (reg_addend)
8530 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8531 return result;
1865dbb5 8532}
2ed941ec
RH
8533
8534/* If X is a machine specific address (i.e. a symbol or label being
8535 referenced as a displacement from the GOT implemented using an
8536 UNSPEC), then return the base term. Otherwise return X. */
8537
8538rtx
8539ix86_find_base_term (rtx x)
8540{
8541 rtx term;
8542
8543 if (TARGET_64BIT)
8544 {
8545 if (GET_CODE (x) != CONST)
8546 return x;
8547 term = XEXP (x, 0);
8548 if (GET_CODE (term) == PLUS
8549 && (CONST_INT_P (XEXP (term, 1))
8550 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8551 term = XEXP (term, 0);
8552 if (GET_CODE (term) != UNSPEC
8553 || XINT (term, 1) != UNSPEC_GOTPCREL)
8554 return x;
8555
8556 term = XVECEXP (term, 0, 0);
8557
8558 if (GET_CODE (term) != SYMBOL_REF
8559 && GET_CODE (term) != LABEL_REF)
8560 return x;
8561
8562 return term;
8563 }
8564
8565 term = ix86_delegitimize_address (x);
8566
8567 if (GET_CODE (term) != SYMBOL_REF
8568 && GET_CODE (term) != LABEL_REF)
8569 return x;
8570
8571 return term;
8572}
2a2ab3f9 8573\f
a269a03c 8574static void
b96a374d
AJ
8575put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8576 int fp, FILE *file)
a269a03c 8577{
a269a03c
JC
8578 const char *suffix;
8579
9a915772
JH
8580 if (mode == CCFPmode || mode == CCFPUmode)
8581 {
8582 enum rtx_code second_code, bypass_code;
8583 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
d0396b79 8584 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
9a915772
JH
8585 code = ix86_fp_compare_code_to_integer (code);
8586 mode = CCmode;
8587 }
a269a03c
JC
8588 if (reverse)
8589 code = reverse_condition (code);
e075ae69 8590
a269a03c
JC
8591 switch (code)
8592 {
8593 case EQ:
06f4e35d
L
8594 switch (mode)
8595 {
8596 case CCAmode:
8597 suffix = "a";
8598 break;
8599
8600 case CCCmode:
8601 suffix = "c";
8602 break;
8603
8604 case CCOmode:
8605 suffix = "o";
8606 break;
8607
8608 case CCSmode:
8609 suffix = "s";
8610 break;
8611
8612 default:
8613 suffix = "e";
8614 }
a269a03c 8615 break;
a269a03c 8616 case NE:
06f4e35d
L
8617 switch (mode)
8618 {
8619 case CCAmode:
8620 suffix = "na";
8621 break;
8622
8623 case CCCmode:
8624 suffix = "nc";
8625 break;
8626
8627 case CCOmode:
8628 suffix = "no";
8629 break;
8630
8631 case CCSmode:
8632 suffix = "ns";
8633 break;
8634
8635 default:
8636 suffix = "ne";
8637 }
a269a03c 8638 break;
a269a03c 8639 case GT:
d0396b79 8640 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
e075ae69 8641 suffix = "g";
a269a03c 8642 break;
a269a03c 8643 case GTU:
aabcd309
KH
8644 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8645 Those same assemblers have the same but opposite lossage on cmov. */
d39d658d
RIL
8646 if (mode == CCmode)
8647 suffix = fp ? "nbe" : "a";
8648 else if (mode == CCCmode)
8649 suffix = "b";
8650 else
8651 gcc_unreachable ();
a269a03c 8652 break;
a269a03c 8653 case LT:
d0396b79
NS
8654 switch (mode)
8655 {
8656 case CCNOmode:
8657 case CCGOCmode:
8658 suffix = "s";
8659 break;
8660
8661 case CCmode:
8662 case CCGCmode:
8663 suffix = "l";
8664 break;
8665
8666 default:
8667 gcc_unreachable ();
8668 }
a269a03c 8669 break;
a269a03c 8670 case LTU:
d39d658d 8671 gcc_assert (mode == CCmode || mode == CCCmode);
a269a03c
JC
8672 suffix = "b";
8673 break;
a269a03c 8674 case GE:
d0396b79
NS
8675 switch (mode)
8676 {
8677 case CCNOmode:
8678 case CCGOCmode:
8679 suffix = "ns";
8680 break;
8681
8682 case CCmode:
8683 case CCGCmode:
8684 suffix = "ge";
8685 break;
8686
8687 default:
8688 gcc_unreachable ();
8689 }
a269a03c 8690 break;
a269a03c 8691 case GEU:
e075ae69 8692 /* ??? As above. */
d39d658d 8693 gcc_assert (mode == CCmode || mode == CCCmode);
7e08e190 8694 suffix = fp ? "nb" : "ae";
a269a03c 8695 break;
a269a03c 8696 case LE:
d0396b79 8697 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
e075ae69 8698 suffix = "le";
a269a03c 8699 break;
a269a03c 8700 case LEU:
d39d658d
RIL
8701 /* ??? As above. */
8702 if (mode == CCmode)
8703 suffix = "be";
8704 else if (mode == CCCmode)
8705 suffix = fp ? "nb" : "ae";
8706 else
8707 gcc_unreachable ();
a269a03c 8708 break;
3a3677ff 8709 case UNORDERED:
9e7adcb3 8710 suffix = fp ? "u" : "p";
3a3677ff
RH
8711 break;
8712 case ORDERED:
9e7adcb3 8713 suffix = fp ? "nu" : "np";
3a3677ff 8714 break;
a269a03c 8715 default:
d0396b79 8716 gcc_unreachable ();
a269a03c
JC
8717 }
8718 fputs (suffix, file);
8719}
8720
a55f4481
RK
8721/* Print the name of register X to FILE based on its machine mode and number.
8722 If CODE is 'w', pretend the mode is HImode.
8723 If CODE is 'b', pretend the mode is QImode.
8724 If CODE is 'k', pretend the mode is SImode.
8725 If CODE is 'q', pretend the mode is DImode.
d0396b79 8726 If CODE is 'h', pretend the reg is the 'high' byte register.
a55f4481
RK
8727 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8728
e075ae69 8729void
b96a374d 8730print_reg (rtx x, int code, FILE *file)
e5cb57e8 8731{
9ad5e54f
RIL
8732 gcc_assert (x == pc_rtx
8733 || (REGNO (x) != ARG_POINTER_REGNUM
8734 && REGNO (x) != FRAME_POINTER_REGNUM
8735 && REGNO (x) != FLAGS_REG
8736 && REGNO (x) != FPSR_REG
8737 && REGNO (x) != FPCR_REG));
480feac0 8738
9ad5e54f 8739 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
8740 putc ('%', file);
8741
9ad5e54f
RIL
8742 if (x == pc_rtx)
8743 {
8744 gcc_assert (TARGET_64BIT);
8745 fputs ("rip", file);
8746 return;
8747 }
8748
ef6257cd 8749 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
8750 code = 2;
8751 else if (code == 'b')
8752 code = 1;
8753 else if (code == 'k')
8754 code = 4;
3f3f2124
JH
8755 else if (code == 'q')
8756 code = 8;
e075ae69
RH
8757 else if (code == 'y')
8758 code = 3;
8759 else if (code == 'h')
8760 code = 0;
8761 else
8762 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 8763
3f3f2124
JH
8764 /* Irritatingly, AMD extended registers use different naming convention
8765 from the normal registers. */
8766 if (REX_INT_REG_P (x))
8767 {
d0396b79 8768 gcc_assert (TARGET_64BIT);
3f3f2124
JH
8769 switch (code)
8770 {
ef6257cd 8771 case 0:
c725bd79 8772 error ("extended registers have no high halves");
3f3f2124
JH
8773 break;
8774 case 1:
8775 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8776 break;
8777 case 2:
8778 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8779 break;
8780 case 4:
8781 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8782 break;
8783 case 8:
8784 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8785 break;
8786 default:
c725bd79 8787 error ("unsupported operand size for extended register");
3f3f2124
JH
8788 break;
8789 }
8790 return;
8791 }
e075ae69
RH
8792 switch (code)
8793 {
8794 case 3:
8795 if (STACK_TOP_P (x))
8796 {
8797 fputs ("st(0)", file);
8798 break;
8799 }
5efb1046 8800 /* FALLTHRU */
e075ae69 8801 case 8:
3f3f2124 8802 case 4:
e075ae69 8803 case 12:
446988df 8804 if (! ANY_FP_REG_P (x))
885a70fd 8805 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 8806 /* FALLTHRU */
a7180f70 8807 case 16:
e075ae69 8808 case 2:
d4c32b6f 8809 normal:
e075ae69
RH
8810 fputs (hi_reg_name[REGNO (x)], file);
8811 break;
8812 case 1:
d4c32b6f
RH
8813 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8814 goto normal;
e075ae69
RH
8815 fputs (qi_reg_name[REGNO (x)], file);
8816 break;
8817 case 0:
d4c32b6f
RH
8818 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8819 goto normal;
e075ae69
RH
8820 fputs (qi_high_reg_name[REGNO (x)], file);
8821 break;
8822 default:
d0396b79 8823 gcc_unreachable ();
fe25fea3 8824 }
e5cb57e8
SC
8825}
8826
f996902d
RH
8827/* Locate some local-dynamic symbol still in use by this function
8828 so that we can print its name in some tls_local_dynamic_base
8829 pattern. */
8830
2ed941ec
RH
8831static int
8832get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8833{
8834 rtx x = *px;
8835
8836 if (GET_CODE (x) == SYMBOL_REF
8837 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8838 {
8839 cfun->machine->some_ld_name = XSTR (x, 0);
8840 return 1;
8841 }
8842
8843 return 0;
8844}
8845
f996902d 8846static const char *
b96a374d 8847get_some_local_dynamic_name (void)
f996902d
RH
8848{
8849 rtx insn;
8850
8851 if (cfun->machine->some_ld_name)
8852 return cfun->machine->some_ld_name;
8853
8854 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8855 if (INSN_P (insn)
8856 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8857 return cfun->machine->some_ld_name;
8858
d0396b79 8859 gcc_unreachable ();
f996902d
RH
8860}
8861
2a2ab3f9 8862/* Meaning of CODE:
fe25fea3 8863 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 8864 C -- print opcode suffix for set/cmov insn.
fe25fea3 8865 c -- like C, but print reversed condition
ef6257cd 8866 F,f -- likewise, but for floating-point.
f6f5dff2
RO
8867 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8868 otherwise nothing
2a2ab3f9
JVA
8869 R -- print the prefix for register names.
8870 z -- print the opcode suffix for the size of the current operand.
8871 * -- print a star (in certain assembler syntax)
fb204271 8872 A -- print an absolute memory reference.
2a2ab3f9 8873 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
8874 s -- print a shift double count, followed by the assemblers argument
8875 delimiter.
fe25fea3
SC
8876 b -- print the QImode name of the register for the indicated operand.
8877 %b0 would print %al if operands[0] is reg 0.
8878 w -- likewise, print the HImode name of the register.
8879 k -- likewise, print the SImode name of the register.
3f3f2124 8880 q -- likewise, print the DImode name of the register.
ef6257cd
JH
8881 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8882 y -- print "st(0)" instead of "st" as a register.
a46d1d38 8883 D -- print condition for SSE cmp instruction.
ef6257cd
JH
8884 P -- if PIC, print an @PLT suffix.
8885 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 8886 & -- print some in-use local-dynamic symbol name.
ef719a44 8887 H -- print a memory address offset by 8; used for sse high-parts
04e1d06b 8888 Y -- print condition for SSE5 com* instruction.
c9d259cb
UB
8889 + -- print a branch hint as 'cs' or 'ds' prefix
8890 ; -- print a semicolon (after prefixes due to bug in older gas).
a46d1d38 8891 */
2a2ab3f9
JVA
8892
8893void
b96a374d 8894print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
8895{
8896 if (code)
8897 {
8898 switch (code)
8899 {
8900 case '*':
80f33d06 8901 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
8902 putc ('*', file);
8903 return;
8904
f996902d
RH
8905 case '&':
8906 assemble_name (file, get_some_local_dynamic_name ());
8907 return;
8908
fb204271 8909 case 'A':
d0396b79 8910 switch (ASSEMBLER_DIALECT)
fb204271 8911 {
d0396b79
NS
8912 case ASM_ATT:
8913 putc ('*', file);
8914 break;
8915
8916 case ASM_INTEL:
fb204271
DN
8917 /* Intel syntax. For absolute addresses, registers should not
8918 be surrounded by braces. */
7656aee4 8919 if (!REG_P (x))
fb204271
DN
8920 {
8921 putc ('[', file);
8922 PRINT_OPERAND (file, x, 0);
8923 putc (']', file);
8924 return;
8925 }
d0396b79
NS
8926 break;
8927
8928 default:
8929 gcc_unreachable ();
fb204271
DN
8930 }
8931
8932 PRINT_OPERAND (file, x, 0);
8933 return;
8934
8935
2a2ab3f9 8936 case 'L':
80f33d06 8937 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8938 putc ('l', file);
2a2ab3f9
JVA
8939 return;
8940
8941 case 'W':
80f33d06 8942 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8943 putc ('w', file);
2a2ab3f9
JVA
8944 return;
8945
8946 case 'B':
80f33d06 8947 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8948 putc ('b', file);
2a2ab3f9
JVA
8949 return;
8950
8951 case 'Q':
80f33d06 8952 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8953 putc ('l', file);
2a2ab3f9
JVA
8954 return;
8955
8956 case 'S':
80f33d06 8957 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8958 putc ('s', file);
2a2ab3f9
JVA
8959 return;
8960
5f1ec3e6 8961 case 'T':
80f33d06 8962 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8963 putc ('t', file);
5f1ec3e6
JVA
8964 return;
8965
2a2ab3f9
JVA
8966 case 'z':
8967 /* 387 opcodes don't get size suffixes if the operands are
0f290768 8968 registers. */
2a2ab3f9
JVA
8969 if (STACK_REG_P (x))
8970 return;
8971
831c4e87
KC
8972 /* Likewise if using Intel opcodes. */
8973 if (ASSEMBLER_DIALECT == ASM_INTEL)
8974 return;
8975
8976 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
8977 switch (GET_MODE_SIZE (GET_MODE (x)))
8978 {
37fc8424
UB
8979 case 1:
8980 putc ('b', file);
8981 return;
8982
2a2ab3f9 8983 case 2:
f3ba4235
UB
8984 if (MEM_P (x))
8985 {
155d8a47 8986#ifdef HAVE_GAS_FILDS_FISTS
f3ba4235 8987 putc ('s', file);
155d8a47 8988#endif
f3ba4235
UB
8989 return;
8990 }
8991 else
8992 putc ('w', file);
2a2ab3f9
JVA
8993 return;
8994
8995 case 4:
8996 if (GET_MODE (x) == SFmode)
8997 {
e075ae69 8998 putc ('s', file);
2a2ab3f9
JVA
8999 return;
9000 }
9001 else
e075ae69 9002 putc ('l', file);
2a2ab3f9
JVA
9003 return;
9004
5f1ec3e6 9005 case 12:
2b589241 9006 case 16:
e075ae69
RH
9007 putc ('t', file);
9008 return;
5f1ec3e6 9009
2a2ab3f9
JVA
9010 case 8:
9011 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
9012 {
9013#ifdef GAS_MNEMONICS
e075ae69 9014 putc ('q', file);
56c0e8fa 9015#else
e075ae69
RH
9016 putc ('l', file);
9017 putc ('l', file);
56c0e8fa
JVA
9018#endif
9019 }
e075ae69
RH
9020 else
9021 putc ('l', file);
2a2ab3f9 9022 return;
155d8a47
JW
9023
9024 default:
d0396b79 9025 gcc_unreachable ();
2a2ab3f9 9026 }
4af3895e
JVA
9027
9028 case 'b':
9029 case 'w':
9030 case 'k':
3f3f2124 9031 case 'q':
4af3895e
JVA
9032 case 'h':
9033 case 'y':
5cb6195d 9034 case 'X':
e075ae69 9035 case 'P':
4af3895e
JVA
9036 break;
9037
2d49677f 9038 case 's':
7656aee4 9039 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
2d49677f
SC
9040 {
9041 PRINT_OPERAND (file, x, 0);
e075ae69 9042 putc (',', file);
2d49677f 9043 }
a269a03c
JC
9044 return;
9045
a46d1d38
JH
9046 case 'D':
9047 /* Little bit of braindamage here. The SSE compare instructions
9048 does use completely different names for the comparisons that the
9049 fp conditional moves. */
9050 switch (GET_CODE (x))
9051 {
9052 case EQ:
9053 case UNEQ:
9054 fputs ("eq", file);
9055 break;
9056 case LT:
9057 case UNLT:
9058 fputs ("lt", file);
9059 break;
9060 case LE:
9061 case UNLE:
9062 fputs ("le", file);
9063 break;
9064 case UNORDERED:
9065 fputs ("unord", file);
9066 break;
9067 case NE:
9068 case LTGT:
9069 fputs ("neq", file);
9070 break;
9071 case UNGE:
9072 case GE:
9073 fputs ("nlt", file);
9074 break;
9075 case UNGT:
9076 case GT:
9077 fputs ("nle", file);
9078 break;
9079 case ORDERED:
9080 fputs ("ord", file);
9081 break;
9082 default:
d0396b79 9083 gcc_unreachable ();
a46d1d38
JH
9084 }
9085 return;
048b1c95 9086 case 'O':
f6f5dff2 9087#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
9088 if (ASSEMBLER_DIALECT == ASM_ATT)
9089 {
9090 switch (GET_MODE (x))
9091 {
9092 case HImode: putc ('w', file); break;
9093 case SImode:
9094 case SFmode: putc ('l', file); break;
9095 case DImode:
9096 case DFmode: putc ('q', file); break;
d0396b79 9097 default: gcc_unreachable ();
048b1c95
JJ
9098 }
9099 putc ('.', file);
9100 }
9101#endif
9102 return;
1853aadd 9103 case 'C':
e075ae69 9104 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 9105 return;
fe25fea3 9106 case 'F':
f6f5dff2 9107#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
9108 if (ASSEMBLER_DIALECT == ASM_ATT)
9109 putc ('.', file);
9110#endif
e075ae69 9111 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
9112 return;
9113
e9a25f70 9114 /* Like above, but reverse condition */
e075ae69 9115 case 'c':
fce5a9f2 9116 /* Check to see if argument to %c is really a constant
c1d5afc4 9117 and not a condition code which needs to be reversed. */
ec8e098d 9118 if (!COMPARISON_P (x))
c1d5afc4
CR
9119 {
9120 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9121 return;
9122 }
e075ae69
RH
9123 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9124 return;
fe25fea3 9125 case 'f':
f6f5dff2 9126#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
9127 if (ASSEMBLER_DIALECT == ASM_ATT)
9128 putc ('.', file);
9129#endif
e075ae69 9130 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 9131 return;
ef719a44
RH
9132
9133 case 'H':
9134 /* It doesn't actually matter what mode we use here, as we're
9135 only going to use this for printing. */
9136 x = adjust_address_nv (x, DImode, 8);
9137 break;
9138
ef6257cd
JH
9139 case '+':
9140 {
9141 rtx x;
e5cb57e8 9142
ef6257cd
JH
9143 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9144 return;
a4f31c00 9145
ef6257cd
JH
9146 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9147 if (x)
9148 {
9149 int pred_val = INTVAL (XEXP (x, 0));
9150
9151 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9152 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9153 {
9154 int taken = pred_val > REG_BR_PROB_BASE / 2;
9155 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9156
9157 /* Emit hints only in the case default branch prediction
d1f87653 9158 heuristics would fail. */
ef6257cd
JH
9159 if (taken != cputaken)
9160 {
9161 /* We use 3e (DS) prefix for taken branches and
9162 2e (CS) prefix for not taken branches. */
9163 if (taken)
9164 fputs ("ds ; ", file);
9165 else
9166 fputs ("cs ; ", file);
9167 }
9168 }
9169 }
9170 return;
9171 }
c9d259cb 9172
04e1d06b
MM
9173 case 'Y':
9174 switch (GET_CODE (x))
9175 {
9176 case NE:
9177 fputs ("neq", file);
9178 break;
9179 case EQ:
9180 fputs ("eq", file);
9181 break;
9182 case GE:
9183 case GEU:
9184 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9185 break;
9186 case GT:
9187 case GTU:
9188 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9189 break;
9190 case LE:
9191 case LEU:
9192 fputs ("le", file);
9193 break;
9194 case LT:
9195 case LTU:
9196 fputs ("lt", file);
9197 break;
9198 case UNORDERED:
9199 fputs ("unord", file);
9200 break;
9201 case ORDERED:
9202 fputs ("ord", file);
9203 break;
9204 case UNEQ:
9205 fputs ("ueq", file);
9206 break;
9207 case UNGE:
9208 fputs ("nlt", file);
9209 break;
9210 case UNGT:
9211 fputs ("nle", file);
9212 break;
9213 case UNLE:
9214 fputs ("ule", file);
9215 break;
9216 case UNLT:
9217 fputs ("ult", file);
9218 break;
9219 case LTGT:
9220 fputs ("une", file);
9221 break;
9222 default:
9223 gcc_unreachable ();
9224 }
9225 return;
9226
c9d259cb
UB
9227 case ';':
9228#if TARGET_MACHO
9229 fputs (" ; ", file);
9230#else
9231 fputc (' ', file);
9232#endif
9233 return;
9234
4af3895e 9235 default:
9e637a26 9236 output_operand_lossage ("invalid operand code '%c'", code);
2a2ab3f9
JVA
9237 }
9238 }
e9a25f70 9239
7656aee4 9240 if (REG_P (x))
a55f4481 9241 print_reg (x, code, file);
e9a25f70 9242
7656aee4 9243 else if (MEM_P (x))
2a2ab3f9 9244 {
9ad5e54f
RIL
9245 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9246 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9247 && GET_MODE (x) != BLKmode)
2a2ab3f9 9248 {
69ddee61 9249 const char * size;
e075ae69
RH
9250 switch (GET_MODE_SIZE (GET_MODE (x)))
9251 {
9252 case 1: size = "BYTE"; break;
9253 case 2: size = "WORD"; break;
9254 case 4: size = "DWORD"; break;
9255 case 8: size = "QWORD"; break;
9256 case 12: size = "XWORD"; break;
9ad5e54f
RIL
9257 case 16:
9258 if (GET_MODE (x) == XFmode)
9259 size = "XWORD";
9260 else
9261 size = "XMMWORD";
9262 break;
e075ae69 9263 default:
d0396b79 9264 gcc_unreachable ();
e075ae69 9265 }
fb204271
DN
9266
9267 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9268 if (code == 'b')
9269 size = "BYTE";
9270 else if (code == 'w')
9271 size = "WORD";
9272 else if (code == 'k')
9273 size = "DWORD";
9274
e075ae69
RH
9275 fputs (size, file);
9276 fputs (" PTR ", file);
2a2ab3f9 9277 }
e075ae69
RH
9278
9279 x = XEXP (x, 0);
0d7d98ee 9280 /* Avoid (%rip) for call operands. */
d10f5ecf 9281 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7656aee4 9282 && !CONST_INT_P (x))
0d7d98ee 9283 output_addr_const (file, x);
c8b94768
RH
9284 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9285 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 9286 else
e075ae69 9287 output_address (x);
2a2ab3f9 9288 }
e9a25f70 9289
2a2ab3f9
JVA
9290 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9291 {
e9a25f70
JL
9292 REAL_VALUE_TYPE r;
9293 long l;
9294
5f1ec3e6
JVA
9295 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9296 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 9297
80f33d06 9298 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 9299 putc ('$', file);
3d57d7ce 9300 fprintf (file, "0x%08lx", (long unsigned int) l);
5f1ec3e6 9301 }
e9a25f70 9302
74dc3e94
RH
9303 /* These float cases don't actually occur as immediate operands. */
9304 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 9305 {
e9a25f70
JL
9306 char dstr[30];
9307
da6eec72 9308 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 9309 fprintf (file, "%s", dstr);
2a2ab3f9 9310 }
e9a25f70 9311
2b589241 9312 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 9313 && GET_MODE (x) == XFmode)
2a2ab3f9 9314 {
e9a25f70
JL
9315 char dstr[30];
9316
da6eec72 9317 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 9318 fprintf (file, "%s", dstr);
2a2ab3f9 9319 }
f996902d 9320
79325812 9321 else
2a2ab3f9 9322 {
b4e82619
RH
9323 /* We have patterns that allow zero sets of memory, for instance.
9324 In 64-bit mode, we should probably support all 8-byte vectors,
9325 since we can in fact encode that into an immediate. */
9326 if (GET_CODE (x) == CONST_VECTOR)
9327 {
d0396b79
NS
9328 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9329 x = const0_rtx;
b4e82619
RH
9330 }
9331
4af3895e 9332 if (code != 'P')
2a2ab3f9 9333 {
7656aee4 9334 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
e075ae69 9335 {
80f33d06 9336 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
9337 putc ('$', file);
9338 }
2a2ab3f9
JVA
9339 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9340 || GET_CODE (x) == LABEL_REF)
e075ae69 9341 {
80f33d06 9342 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
9343 putc ('$', file);
9344 else
9345 fputs ("OFFSET FLAT:", file);
9346 }
2a2ab3f9 9347 }
7656aee4 9348 if (CONST_INT_P (x))
e075ae69
RH
9349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9350 else if (flag_pic)
2a2ab3f9
JVA
9351 output_pic_addr_const (file, x, code);
9352 else
9353 output_addr_const (file, x);
9354 }
9355}
9356\f
9357/* Print a memory operand whose address is ADDR. */
9358
9359void
8d531ab9 9360print_operand_address (FILE *file, rtx addr)
2a2ab3f9 9361{
e075ae69
RH
9362 struct ix86_address parts;
9363 rtx base, index, disp;
9364 int scale;
d0396b79 9365 int ok = ix86_decompose_address (addr, &parts);
e9a25f70 9366
d0396b79 9367 gcc_assert (ok);
e9a25f70 9368
e075ae69
RH
9369 base = parts.base;
9370 index = parts.index;
9371 disp = parts.disp;
9372 scale = parts.scale;
e9a25f70 9373
74dc3e94
RH
9374 switch (parts.seg)
9375 {
9376 case SEG_DEFAULT:
9377 break;
9378 case SEG_FS:
9379 case SEG_GS:
9ad5e54f 9380 if (ASSEMBLER_DIALECT == ASM_ATT)
74dc3e94
RH
9381 putc ('%', file);
9382 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9383 break;
9384 default:
d0396b79 9385 gcc_unreachable ();
74dc3e94
RH
9386 }
9387
9ad5e54f
RIL
9388 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9389 if (TARGET_64BIT && !base && !index)
9390 {
9391 rtx symbol = disp;
9392
9393 if (GET_CODE (disp) == CONST
9394 && GET_CODE (XEXP (disp, 0)) == PLUS
9395 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9396 symbol = XEXP (XEXP (disp, 0), 0);
9397
9398 if (GET_CODE (symbol) == LABEL_REF
9399 || (GET_CODE (symbol) == SYMBOL_REF
9400 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9401 base = pc_rtx;
9402 }
e075ae69
RH
9403 if (!base && !index)
9404 {
9405 /* Displacement only requires special attention. */
e9a25f70 9406
7656aee4 9407 if (CONST_INT_P (disp))
2a2ab3f9 9408 {
74dc3e94 9409 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9ad5e54f 9410 fputs ("ds:", file);
74dc3e94 9411 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 9412 }
e075ae69 9413 else if (flag_pic)
74dc3e94 9414 output_pic_addr_const (file, disp, 0);
e075ae69 9415 else
74dc3e94 9416 output_addr_const (file, disp);
e075ae69
RH
9417 }
9418 else
9419 {
80f33d06 9420 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 9421 {
e075ae69 9422 if (disp)
2a2ab3f9 9423 {
c399861d 9424 if (flag_pic)
e075ae69
RH
9425 output_pic_addr_const (file, disp, 0);
9426 else if (GET_CODE (disp) == LABEL_REF)
9427 output_asm_label (disp);
2a2ab3f9 9428 else
e075ae69 9429 output_addr_const (file, disp);
2a2ab3f9
JVA
9430 }
9431
e075ae69
RH
9432 putc ('(', file);
9433 if (base)
a55f4481 9434 print_reg (base, 0, file);
e075ae69 9435 if (index)
2a2ab3f9 9436 {
e075ae69 9437 putc (',', file);
a55f4481 9438 print_reg (index, 0, file);
e075ae69
RH
9439 if (scale != 1)
9440 fprintf (file, ",%d", scale);
2a2ab3f9 9441 }
e075ae69 9442 putc (')', file);
2a2ab3f9 9443 }
2a2ab3f9
JVA
9444 else
9445 {
e075ae69 9446 rtx offset = NULL_RTX;
e9a25f70 9447
e075ae69
RH
9448 if (disp)
9449 {
9450 /* Pull out the offset of a symbol; print any symbol itself. */
9451 if (GET_CODE (disp) == CONST
9452 && GET_CODE (XEXP (disp, 0)) == PLUS
7656aee4 9453 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
e075ae69
RH
9454 {
9455 offset = XEXP (XEXP (disp, 0), 1);
9456 disp = gen_rtx_CONST (VOIDmode,
9457 XEXP (XEXP (disp, 0), 0));
9458 }
ce193852 9459
e075ae69
RH
9460 if (flag_pic)
9461 output_pic_addr_const (file, disp, 0);
9462 else if (GET_CODE (disp) == LABEL_REF)
9463 output_asm_label (disp);
7656aee4 9464 else if (CONST_INT_P (disp))
e075ae69
RH
9465 offset = disp;
9466 else
9467 output_addr_const (file, disp);
9468 }
e9a25f70 9469
e075ae69
RH
9470 putc ('[', file);
9471 if (base)
a8620236 9472 {
a55f4481 9473 print_reg (base, 0, file);
e075ae69
RH
9474 if (offset)
9475 {
9476 if (INTVAL (offset) >= 0)
9477 putc ('+', file);
9478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9479 }
a8620236 9480 }
e075ae69
RH
9481 else if (offset)
9482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 9483 else
e075ae69 9484 putc ('0', file);
e9a25f70 9485
e075ae69
RH
9486 if (index)
9487 {
9488 putc ('+', file);
a55f4481 9489 print_reg (index, 0, file);
e075ae69
RH
9490 if (scale != 1)
9491 fprintf (file, "*%d", scale);
9492 }
9493 putc (']', file);
9494 }
2a2ab3f9
JVA
9495 }
9496}
f996902d
RH
9497
9498bool
b96a374d 9499output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
9500{
9501 rtx op;
9502
9503 if (GET_CODE (x) != UNSPEC)
9504 return false;
9505
9506 op = XVECEXP (x, 0, 0);
9507 switch (XINT (x, 1))
9508 {
9509 case UNSPEC_GOTTPOFF:
9510 output_addr_const (file, op);
dea73790 9511 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
9512 fputs ("@GOTTPOFF", file);
9513 break;
9514 case UNSPEC_TPOFF:
9515 output_addr_const (file, op);
9516 fputs ("@TPOFF", file);
9517 break;
9518 case UNSPEC_NTPOFF:
9519 output_addr_const (file, op);
75d38379
JJ
9520 if (TARGET_64BIT)
9521 fputs ("@TPOFF", file);
9522 else
9523 fputs ("@NTPOFF", file);
f996902d
RH
9524 break;
9525 case UNSPEC_DTPOFF:
9526 output_addr_const (file, op);
9527 fputs ("@DTPOFF", file);
9528 break;
dea73790
JJ
9529 case UNSPEC_GOTNTPOFF:
9530 output_addr_const (file, op);
75d38379 9531 if (TARGET_64BIT)
9ad5e54f
RIL
9532 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9533 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
75d38379
JJ
9534 else
9535 fputs ("@GOTNTPOFF", file);
dea73790
JJ
9536 break;
9537 case UNSPEC_INDNTPOFF:
9538 output_addr_const (file, op);
9539 fputs ("@INDNTPOFF", file);
9540 break;
f996902d
RH
9541
9542 default:
9543 return false;
9544 }
9545
9546 return true;
9547}
2a2ab3f9
JVA
9548\f
9549/* Split one or more DImode RTL references into pairs of SImode
9550 references. The RTL can be REG, offsettable MEM, integer constant, or
9551 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9552 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 9553 that parallel "operands". */
2a2ab3f9
JVA
9554
9555void
b96a374d 9556split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
9557{
9558 while (num--)
9559 {
57dbca5e 9560 rtx op = operands[num];
b932f770
JH
9561
9562 /* simplify_subreg refuse to split volatile memory addresses,
9563 but we still have to handle it. */
7656aee4 9564 if (MEM_P (op))
2a2ab3f9 9565 {
f4ef873c 9566 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 9567 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
9568 }
9569 else
b932f770 9570 {
38ca929b
JH
9571 lo_half[num] = simplify_gen_subreg (SImode, op,
9572 GET_MODE (op) == VOIDmode
9573 ? DImode : GET_MODE (op), 0);
9574 hi_half[num] = simplify_gen_subreg (SImode, op,
9575 GET_MODE (op) == VOIDmode
9576 ? DImode : GET_MODE (op), 4);
b932f770 9577 }
2a2ab3f9
JVA
9578 }
9579}
28356f52 9580/* Split one or more TImode RTL references into pairs of DImode
44cf5b6a
JH
9581 references. The RTL can be REG, offsettable MEM, integer constant, or
9582 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9583 split and "num" is its length. lo_half and hi_half are output arrays
9584 that parallel "operands". */
9585
9586void
b96a374d 9587split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
9588{
9589 while (num--)
9590 {
9591 rtx op = operands[num];
b932f770
JH
9592
9593 /* simplify_subreg refuse to split volatile memory addresses, but we
9594 still have to handle it. */
7656aee4 9595 if (MEM_P (op))
44cf5b6a
JH
9596 {
9597 lo_half[num] = adjust_address (op, DImode, 0);
9598 hi_half[num] = adjust_address (op, DImode, 8);
9599 }
9600 else
b932f770
JH
9601 {
9602 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9603 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9604 }
44cf5b6a
JH
9605 }
9606}
2a2ab3f9 9607\f
2a2ab3f9
JVA
9608/* Output code to perform a 387 binary operation in INSN, one of PLUS,
9609 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9610 is the expression of the binary operation. The output may either be
9611 emitted here, or returned to the caller, like all output_* functions.
9612
9613 There is no guarantee that the operands are the same mode, as they
0f290768 9614 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 9615
e3c2afab
AM
9616#ifndef SYSV386_COMPAT
9617/* Set to 1 for compatibility with brain-damaged assemblers. No-one
9618 wants to fix the assemblers because that causes incompatibility
9619 with gcc. No-one wants to fix gcc because that causes
9620 incompatibility with assemblers... You can use the option of
9621 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9622#define SYSV386_COMPAT 1
9623#endif
9624
69ddee61 9625const char *
b96a374d 9626output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 9627{
e3c2afab 9628 static char buf[30];
69ddee61 9629 const char *p;
1deaa899 9630 const char *ssep;
89b17498 9631 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
2a2ab3f9 9632
e3c2afab
AM
9633#ifdef ENABLE_CHECKING
9634 /* Even if we do not want to check the inputs, this documents input
9635 constraints. Which helps in understanding the following code. */
9636 if (STACK_REG_P (operands[0])
9637 && ((REG_P (operands[1])
9638 && REGNO (operands[0]) == REGNO (operands[1])
7656aee4 9639 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
e3c2afab
AM
9640 || (REG_P (operands[2])
9641 && REGNO (operands[0]) == REGNO (operands[2])
7656aee4 9642 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
e3c2afab
AM
9643 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9644 ; /* ok */
d0396b79
NS
9645 else
9646 gcc_assert (is_sse);
e3c2afab
AM
9647#endif
9648
2a2ab3f9
JVA
9649 switch (GET_CODE (operands[3]))
9650 {
9651 case PLUS:
e075ae69
RH
9652 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9653 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9654 p = "fiadd";
9655 else
9656 p = "fadd";
1deaa899 9657 ssep = "add";
2a2ab3f9
JVA
9658 break;
9659
9660 case MINUS:
e075ae69
RH
9661 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9662 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9663 p = "fisub";
9664 else
9665 p = "fsub";
1deaa899 9666 ssep = "sub";
2a2ab3f9
JVA
9667 break;
9668
9669 case MULT:
e075ae69
RH
9670 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9671 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9672 p = "fimul";
9673 else
9674 p = "fmul";
1deaa899 9675 ssep = "mul";
2a2ab3f9
JVA
9676 break;
9677
9678 case DIV:
e075ae69
RH
9679 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9680 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9681 p = "fidiv";
9682 else
9683 p = "fdiv";
1deaa899 9684 ssep = "div";
2a2ab3f9
JVA
9685 break;
9686
9687 default:
d0396b79 9688 gcc_unreachable ();
2a2ab3f9
JVA
9689 }
9690
1deaa899
JH
9691 if (is_sse)
9692 {
9693 strcpy (buf, ssep);
9694 if (GET_MODE (operands[0]) == SFmode)
9695 strcat (buf, "ss\t{%2, %0|%0, %2}");
9696 else
9697 strcat (buf, "sd\t{%2, %0|%0, %2}");
9698 return buf;
9699 }
e075ae69 9700 strcpy (buf, p);
2a2ab3f9
JVA
9701
9702 switch (GET_CODE (operands[3]))
9703 {
9704 case MULT:
9705 case PLUS:
9706 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9707 {
e3c2afab 9708 rtx temp = operands[2];
2a2ab3f9
JVA
9709 operands[2] = operands[1];
9710 operands[1] = temp;
9711 }
9712
e3c2afab
AM
9713 /* know operands[0] == operands[1]. */
9714
7656aee4 9715 if (MEM_P (operands[2]))
e075ae69
RH
9716 {
9717 p = "%z2\t%2";
9718 break;
9719 }
2a2ab3f9
JVA
9720
9721 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
9722 {
9723 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
9724 /* How is it that we are storing to a dead operand[2]?
9725 Well, presumably operands[1] is dead too. We can't
9726 store the result to st(0) as st(0) gets popped on this
9727 instruction. Instead store to operands[2] (which I
9728 think has to be st(1)). st(1) will be popped later.
9729 gcc <= 2.8.1 didn't have this check and generated
9730 assembly code that the Unixware assembler rejected. */
9731 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 9732 else
e3c2afab 9733 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 9734 break;
6b28fd63 9735 }
2a2ab3f9
JVA
9736
9737 if (STACK_TOP_P (operands[0]))
e3c2afab 9738 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 9739 else
e3c2afab 9740 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 9741 break;
2a2ab3f9
JVA
9742
9743 case MINUS:
9744 case DIV:
7656aee4 9745 if (MEM_P (operands[1]))
e075ae69
RH
9746 {
9747 p = "r%z1\t%1";
9748 break;
9749 }
2a2ab3f9 9750
7656aee4 9751 if (MEM_P (operands[2]))
e075ae69
RH
9752 {
9753 p = "%z2\t%2";
9754 break;
9755 }
2a2ab3f9 9756
2a2ab3f9 9757 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 9758 {
e3c2afab
AM
9759#if SYSV386_COMPAT
9760 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9761 derived assemblers, confusingly reverse the direction of
9762 the operation for fsub{r} and fdiv{r} when the
9763 destination register is not st(0). The Intel assembler
9764 doesn't have this brain damage. Read !SYSV386_COMPAT to
9765 figure out what the hardware really does. */
9766 if (STACK_TOP_P (operands[0]))
9767 p = "{p\t%0, %2|rp\t%2, %0}";
9768 else
9769 p = "{rp\t%2, %0|p\t%0, %2}";
9770#else
6b28fd63 9771 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
9772 /* As above for fmul/fadd, we can't store to st(0). */
9773 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 9774 else
e3c2afab
AM
9775 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9776#endif
e075ae69 9777 break;
6b28fd63 9778 }
2a2ab3f9
JVA
9779
9780 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 9781 {
e3c2afab 9782#if SYSV386_COMPAT
6b28fd63 9783 if (STACK_TOP_P (operands[0]))
e3c2afab 9784 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 9785 else
e3c2afab
AM
9786 p = "{p\t%1, %0|rp\t%0, %1}";
9787#else
9788 if (STACK_TOP_P (operands[0]))
9789 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9790 else
9791 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9792#endif
e075ae69 9793 break;
6b28fd63 9794 }
2a2ab3f9
JVA
9795
9796 if (STACK_TOP_P (operands[0]))
9797 {
9798 if (STACK_TOP_P (operands[1]))
e3c2afab 9799 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 9800 else
e3c2afab 9801 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 9802 break;
2a2ab3f9
JVA
9803 }
9804 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
9805 {
9806#if SYSV386_COMPAT
9807 p = "{\t%1, %0|r\t%0, %1}";
9808#else
9809 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9810#endif
9811 }
2a2ab3f9 9812 else
e3c2afab
AM
9813 {
9814#if SYSV386_COMPAT
9815 p = "{r\t%2, %0|\t%0, %2}";
9816#else
9817 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9818#endif
9819 }
e075ae69 9820 break;
2a2ab3f9
JVA
9821
9822 default:
d0396b79 9823 gcc_unreachable ();
2a2ab3f9 9824 }
e075ae69
RH
9825
9826 strcat (buf, p);
9827 return buf;
2a2ab3f9 9828}
e075ae69 9829
ff680eb1
UB
9830/* Return needed mode for entity in optimize_mode_switching pass. */
9831
9832int
9833ix86_mode_needed (int entity, rtx insn)
9834{
9835 enum attr_i387_cw mode;
9836
9837 /* The mode UNINITIALIZED is used to store control word after a
9838 function call or ASM pattern. The mode ANY specify that function
9839 has no requirements on the control word and make no changes in the
9840 bits we are interested in. */
9841
9842 if (CALL_P (insn)
9843 || (NONJUMP_INSN_P (insn)
9844 && (asm_noperands (PATTERN (insn)) >= 0
9845 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9846 return I387_CW_UNINITIALIZED;
9847
9848 if (recog_memoized (insn) < 0)
9849 return I387_CW_ANY;
9850
9851 mode = get_attr_i387_cw (insn);
9852
9853 switch (entity)
9854 {
9855 case I387_TRUNC:
9856 if (mode == I387_CW_TRUNC)
9857 return mode;
9858 break;
9859
9860 case I387_FLOOR:
9861 if (mode == I387_CW_FLOOR)
9862 return mode;
9863 break;
9864
9865 case I387_CEIL:
9866 if (mode == I387_CW_CEIL)
9867 return mode;
9868 break;
9869
9870 case I387_MASK_PM:
9871 if (mode == I387_CW_MASK_PM)
9872 return mode;
9873 break;
9874
9875 default:
9876 gcc_unreachable ();
9877 }
9878
9879 return I387_CW_ANY;
9880}
9881
edeacc14
UB
9882/* Output code to initialize control word copies used by trunc?f?i and
9883 rounding patterns. CURRENT_MODE is set to current control word,
9884 while NEW_MODE is set to new control word. */
9885
7a2e09f4 9886void
ff680eb1 9887emit_i387_cw_initialization (int mode)
7a2e09f4 9888{
ff680eb1
UB
9889 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9890 rtx new_mode;
9891
9415ab7d 9892 enum ix86_stack_slot slot;
ff680eb1 9893
7a2e09f4
JH
9894 rtx reg = gen_reg_rtx (HImode);
9895
ff680eb1 9896 emit_insn (gen_x86_fnstcw_1 (stored_mode));
3e916873 9897 emit_move_insn (reg, copy_rtx (stored_mode));
edeacc14 9898
ff680eb1 9899 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
edeacc14
UB
9900 {
9901 switch (mode)
9902 {
ff680eb1
UB
9903 case I387_CW_TRUNC:
9904 /* round toward zero (truncate) */
9905 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9906 slot = SLOT_CW_TRUNC;
9907 break;
9908
edeacc14
UB
9909 case I387_CW_FLOOR:
9910 /* round down toward -oo */
ff680eb1
UB
9911 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9912 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9913 slot = SLOT_CW_FLOOR;
edeacc14
UB
9914 break;
9915
9916 case I387_CW_CEIL:
9917 /* round up toward +oo */
ff680eb1
UB
9918 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9919 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9920 slot = SLOT_CW_CEIL;
edeacc14
UB
9921 break;
9922
edeacc14
UB
9923 case I387_CW_MASK_PM:
9924 /* mask precision exception for nearbyint() */
9925 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 9926 slot = SLOT_CW_MASK_PM;
edeacc14
UB
9927 break;
9928
9929 default:
d0396b79 9930 gcc_unreachable ();
edeacc14
UB
9931 }
9932 }
7a2e09f4 9933 else
edeacc14
UB
9934 {
9935 switch (mode)
9936 {
ff680eb1
UB
9937 case I387_CW_TRUNC:
9938 /* round toward zero (truncate) */
9939 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9940 slot = SLOT_CW_TRUNC;
9941 break;
9942
edeacc14
UB
9943 case I387_CW_FLOOR:
9944 /* round down toward -oo */
ff680eb1
UB
9945 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9946 slot = SLOT_CW_FLOOR;
edeacc14
UB
9947 break;
9948
9949 case I387_CW_CEIL:
9950 /* round up toward +oo */
ff680eb1
UB
9951 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9952 slot = SLOT_CW_CEIL;
edeacc14 9953 break;
5656a184 9954
edeacc14
UB
9955 case I387_CW_MASK_PM:
9956 /* mask precision exception for nearbyint() */
9957 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 9958 slot = SLOT_CW_MASK_PM;
edeacc14
UB
9959 break;
9960
9961 default:
d0396b79 9962 gcc_unreachable ();
edeacc14
UB
9963 }
9964 }
9965
ff680eb1
UB
9966 gcc_assert (slot < MAX_386_STACK_LOCALS);
9967
9968 new_mode = assign_386_stack_local (HImode, slot);
edeacc14 9969 emit_move_insn (new_mode, reg);
7a2e09f4
JH
9970}
9971
2a2ab3f9 9972/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 9973 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 9974 operand may be [SDX]Fmode. */
2a2ab3f9 9975
69ddee61 9976const char *
9199f050 9977output_fix_trunc (rtx insn, rtx *operands, int fisttp)
2a2ab3f9
JVA
9978{
9979 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 9980 int dimode_p = GET_MODE (operands[0]) == DImode;
6e858d45 9981 int round_mode = get_attr_i387_cw (insn);
2a2ab3f9 9982
e075ae69
RH
9983 /* Jump through a hoop or two for DImode, since the hardware has no
9984 non-popping instruction. We used to do this a different way, but
9985 that was somewhat fragile and broke with post-reload splitters. */
9199f050 9986 if ((dimode_p || fisttp) && !stack_top_dies)
a05924f9 9987 output_asm_insn ("fld\t%y1", operands);
e075ae69 9988
d0396b79 9989 gcc_assert (STACK_TOP_P (operands[1]));
7656aee4 9990 gcc_assert (MEM_P (operands[0]));
54a88090 9991 gcc_assert (GET_MODE (operands[1]) != TFmode);
e9a25f70 9992
9199f050
UB
9993 if (fisttp)
9994 output_asm_insn ("fisttp%z0\t%0", operands);
10195bd8 9995 else
9199f050 9996 {
6e858d45
UB
9997 if (round_mode != I387_CW_ANY)
9998 output_asm_insn ("fldcw\t%3", operands);
9199f050
UB
9999 if (stack_top_dies || dimode_p)
10000 output_asm_insn ("fistp%z0\t%0", operands);
10001 else
10002 output_asm_insn ("fist%z0\t%0", operands);
6e858d45
UB
10003 if (round_mode != I387_CW_ANY)
10004 output_asm_insn ("fldcw\t%2", operands);
9199f050 10005 }
10195bd8 10006
e075ae69 10007 return "";
2a2ab3f9 10008}
cda749b1 10009
b6c03bcd
RS
10010/* Output code for x87 ffreep insn. The OPNO argument, which may only
10011 have the values zero or one, indicates the ffreep insn's operand
10012 from the OPERANDS array. */
10013
10014static const char *
10015output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10016{
10017 if (TARGET_USE_FFREEP)
10018#if HAVE_AS_IX86_FFREEP
10019 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10020#else
87ccbc5c
UB
10021 {
10022 static char retval[] = ".word\t0xc_df";
10023 int regno = REGNO (operands[opno]);
54a88090 10024
87ccbc5c
UB
10025 gcc_assert (FP_REGNO_P (regno));
10026
10027 retval[9] = '0' + (regno - FIRST_STACK_REG);
10028 return retval;
10029 }
b6c03bcd
RS
10030#endif
10031
10032 return opno ? "fstp\t%y1" : "fstp\t%y0";
10033}
10034
10035
e075ae69 10036/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7c82106f 10037 should be used. UNORDERED_P is true when fucom should be used. */
e075ae69 10038
69ddee61 10039const char *
b96a374d 10040output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 10041{
e075ae69 10042 int stack_top_dies;
869d095e 10043 rtx cmp_op0, cmp_op1;
7c82106f 10044 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
e075ae69 10045
7c82106f 10046 if (eflags_p)
e075ae69 10047 {
7c82106f
UB
10048 cmp_op0 = operands[0];
10049 cmp_op1 = operands[1];
e075ae69 10050 }
869d095e
UB
10051 else
10052 {
7c82106f
UB
10053 cmp_op0 = operands[1];
10054 cmp_op1 = operands[2];
869d095e
UB
10055 }
10056
0644b628
JH
10057 if (is_sse)
10058 {
10059 if (GET_MODE (operands[0]) == SFmode)
10060 if (unordered_p)
10061 return "ucomiss\t{%1, %0|%0, %1}";
10062 else
a5cf80f0 10063 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
10064 else
10065 if (unordered_p)
10066 return "ucomisd\t{%1, %0|%0, %1}";
10067 else
a5cf80f0 10068 return "comisd\t{%1, %0|%0, %1}";
0644b628 10069 }
cda749b1 10070
d0396b79 10071 gcc_assert (STACK_TOP_P (cmp_op0));
cda749b1 10072
e075ae69 10073 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 10074
869d095e
UB
10075 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10076 {
10077 if (stack_top_dies)
10078 {
10079 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
b6c03bcd 10080 return output_387_ffreep (operands, 1);
869d095e
UB
10081 }
10082 else
10083 return "ftst\n\tfnstsw\t%0";
10084 }
10085
e075ae69
RH
10086 if (STACK_REG_P (cmp_op1)
10087 && stack_top_dies
10088 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10089 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 10090 {
e075ae69
RH
10091 /* If both the top of the 387 stack dies, and the other operand
10092 is also a stack register that dies, then this must be a
10093 `fcompp' float compare */
10094
7c82106f 10095 if (eflags_p)
e075ae69
RH
10096 {
10097 /* There is no double popping fcomi variant. Fortunately,
10098 eflags is immune from the fstp's cc clobbering. */
10099 if (unordered_p)
10100 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10101 else
10102 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
b6c03bcd 10103 return output_387_ffreep (operands, 0);
e075ae69
RH
10104 }
10105 else
cda749b1 10106 {
7c82106f
UB
10107 if (unordered_p)
10108 return "fucompp\n\tfnstsw\t%0";
cda749b1 10109 else
7c82106f 10110 return "fcompp\n\tfnstsw\t%0";
cda749b1 10111 }
cda749b1
JW
10112 }
10113 else
10114 {
e075ae69 10115 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 10116
7c82106f 10117 static const char * const alt[16] =
e075ae69 10118 {
7c82106f
UB
10119 "fcom%z2\t%y2\n\tfnstsw\t%0",
10120 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10121 "fucom%z2\t%y2\n\tfnstsw\t%0",
10122 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 10123
7c82106f
UB
10124 "ficom%z2\t%y2\n\tfnstsw\t%0",
10125 "ficomp%z2\t%y2\n\tfnstsw\t%0",
e075ae69
RH
10126 NULL,
10127 NULL,
10128
10129 "fcomi\t{%y1, %0|%0, %y1}",
10130 "fcomip\t{%y1, %0|%0, %y1}",
10131 "fucomi\t{%y1, %0|%0, %y1}",
10132 "fucomip\t{%y1, %0|%0, %y1}",
10133
10134 NULL,
10135 NULL,
10136 NULL,
e075ae69
RH
10137 NULL
10138 };
10139
10140 int mask;
69ddee61 10141 const char *ret;
e075ae69
RH
10142
10143 mask = eflags_p << 3;
7c82106f 10144 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
e075ae69
RH
10145 mask |= unordered_p << 1;
10146 mask |= stack_top_dies;
10147
d0396b79 10148 gcc_assert (mask < 16);
e075ae69 10149 ret = alt[mask];
d0396b79 10150 gcc_assert (ret);
cda749b1 10151
e075ae69 10152 return ret;
cda749b1
JW
10153 }
10154}
2a2ab3f9 10155
f88c65f7 10156void
b96a374d 10157ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
10158{
10159 const char *directive = ASM_LONG;
10160
f88c65f7 10161#ifdef ASM_QUAD
d0396b79
NS
10162 if (TARGET_64BIT)
10163 directive = ASM_QUAD;
f88c65f7 10164#else
d0396b79 10165 gcc_assert (!TARGET_64BIT);
f88c65f7 10166#endif
f88c65f7
RH
10167
10168 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10169}
10170
10171void
b96a374d 10172ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7 10173{
dc4d7240
JH
10174 const char *directive = ASM_LONG;
10175
10176#ifdef ASM_QUAD
10177 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10178 directive = ASM_QUAD;
10179#else
10180 gcc_assert (!TARGET_64BIT);
10181#endif
170bdaba
RS
10182 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10183 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
74411039 10184 fprintf (file, "%s%s%d-%s%d\n",
dc4d7240 10185 directive, LPREFIX, value, LPREFIX, rel);
f88c65f7
RH
10186 else if (HAVE_AS_GOTOFF_IN_DATA)
10187 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
10188#if TARGET_MACHO
10189 else if (TARGET_MACHO)
86ecdfb6
AP
10190 {
10191 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10192 machopic_output_function_base_name (file);
10193 fprintf(file, "\n");
10194 }
b069de3b 10195#endif
f88c65f7 10196 else
5fc0e5df
KW
10197 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10198 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 10199}
32b5b1aa 10200\f
a8bac9ab
RH
10201/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10202 for the target. */
10203
10204void
b96a374d 10205ix86_expand_clear (rtx dest)
a8bac9ab
RH
10206{
10207 rtx tmp;
10208
10209 /* We play register width games, which are only valid after reload. */
d0396b79 10210 gcc_assert (reload_completed);
a8bac9ab
RH
10211
10212 /* Avoid HImode and its attendant prefix byte. */
10213 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10214 dest = gen_rtx_REG (SImode, REGNO (dest));
a8bac9ab
RH
10215 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10216
10217 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10218 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10219 {
d02cb675 10220 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
a8bac9ab
RH
10221 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10222 }
10223
10224 emit_insn (tmp);
10225}
10226
f996902d
RH
10227/* X is an unchanging MEM. If it is a constant pool reference, return
10228 the constant pool rtx, else NULL. */
10229
8fe75e43 10230rtx
b96a374d 10231maybe_get_pool_constant (rtx x)
f996902d 10232{
69bd9368 10233 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
10234
10235 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10236 return get_pool_constant (x);
10237
10238 return NULL_RTX;
10239}
10240
79325812 10241void
b96a374d 10242ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 10243{
74dc3e94
RH
10244 rtx op0, op1;
10245 enum tls_model model;
f996902d
RH
10246
10247 op0 = operands[0];
10248 op1 = operands[1];
10249
d2ad2c8a 10250 if (GET_CODE (op1) == SYMBOL_REF)
f996902d 10251 {
d2ad2c8a
JH
10252 model = SYMBOL_REF_TLS_MODEL (op1);
10253 if (model)
10254 {
10255 op1 = legitimize_tls_address (op1, model, true);
10256 op1 = force_operand (op1, op0);
10257 if (op1 == op0)
10258 return;
10259 }
da489f73
RH
10260 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10261 && SYMBOL_REF_DLLIMPORT_P (op1))
10262 op1 = legitimize_dllimport_symbol (op1, false);
d2ad2c8a
JH
10263 }
10264 else if (GET_CODE (op1) == CONST
10265 && GET_CODE (XEXP (op1, 0)) == PLUS
10266 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10267 {
da489f73
RH
10268 rtx addend = XEXP (XEXP (op1, 0), 1);
10269 rtx symbol = XEXP (XEXP (op1, 0), 0);
10270 rtx tmp = NULL;
10271
10272 model = SYMBOL_REF_TLS_MODEL (symbol);
d2ad2c8a 10273 if (model)
da489f73
RH
10274 tmp = legitimize_tls_address (symbol, model, true);
10275 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10276 && SYMBOL_REF_DLLIMPORT_P (symbol))
10277 tmp = legitimize_dllimport_symbol (symbol, true);
10278
10279 if (tmp)
d2ad2c8a 10280 {
da489f73
RH
10281 tmp = force_operand (tmp, NULL);
10282 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
d2ad2c8a 10283 op0, 1, OPTAB_DIRECT);
da489f73 10284 if (tmp == op0)
d2ad2c8a
JH
10285 return;
10286 }
f996902d 10287 }
74dc3e94
RH
10288
10289 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 10290 {
f7288899
EC
10291 if (TARGET_MACHO && !TARGET_64BIT)
10292 {
b069de3b 10293#if TARGET_MACHO
f7288899
EC
10294 if (MACHOPIC_PURE)
10295 {
10296 rtx temp = ((reload_in_progress
7656aee4 10297 || ((op0 && REG_P (op0))
f7288899
EC
10298 && mode == Pmode))
10299 ? op0 : gen_reg_rtx (Pmode));
10300 op1 = machopic_indirect_data_reference (op1, temp);
10301 op1 = machopic_legitimize_pic_address (op1, mode,
10302 temp == op1 ? 0 : temp);
10303 }
10304 else if (MACHOPIC_INDIRECT)
10305 op1 = machopic_indirect_data_reference (op1, 0);
10306 if (op0 == op1)
10307 return;
10308#endif
10309 }
5656a184 10310 else
f7288899 10311 {
7656aee4 10312 if (MEM_P (op0))
f7288899 10313 op1 = force_reg (Pmode, op1);
dc4d7240 10314 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
170bdaba 10315 {
b3a13419 10316 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
170bdaba
RS
10317 op1 = legitimize_pic_address (op1, reg);
10318 if (op0 == op1)
10319 return;
10320 }
f7288899 10321 }
e075ae69
RH
10322 }
10323 else
10324 {
7656aee4 10325 if (MEM_P (op0)
44cf5b6a 10326 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d 10327 || !push_operand (op0, mode))
7656aee4 10328 && MEM_P (op1))
f996902d 10329 op1 = force_reg (mode, op1);
e9a25f70 10330
f996902d
RH
10331 if (push_operand (op0, mode)
10332 && ! general_no_elim_operand (op1, mode))
10333 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 10334
44cf5b6a
JH
10335 /* Force large constants in 64bit compilation into register
10336 to get them CSEed. */
926f3359
UB
10337 if (can_create_pseudo_p ()
10338 && (mode == DImode) && TARGET_64BIT
f996902d 10339 && immediate_operand (op1, mode)
8fe75e43 10340 && !x86_64_zext_immediate_operand (op1, VOIDmode)
f996902d 10341 && !register_operand (op0, mode)
926f3359 10342 && optimize)
f996902d 10343 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 10344
926f3359
UB
10345 if (can_create_pseudo_p ()
10346 && FLOAT_MODE_P (mode)
10347 && GET_CODE (op1) == CONST_DOUBLE)
32b5b1aa 10348 {
d7a29404
JH
10349 /* If we are loading a floating point constant to a register,
10350 force the value to memory now, since we'll get better code
10351 out the back end. */
e075ae69 10352
926f3359
UB
10353 op1 = validize_mem (force_const_mem (mode, op1));
10354 if (!register_operand (op0, mode))
ddc67067 10355 {
926f3359
UB
10356 rtx temp = gen_reg_rtx (mode);
10357 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10358 emit_move_insn (op0, temp);
10359 return;
ddc67067 10360 }
32b5b1aa 10361 }
32b5b1aa 10362 }
e9a25f70 10363
74dc3e94 10364 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 10365}
e9a25f70 10366
e37af218 10367void
b96a374d 10368ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218 10369{
c38573a8 10370 rtx op0 = operands[0], op1 = operands[1];
4d2a42a1 10371 unsigned int align = GET_MODE_ALIGNMENT (mode);
c38573a8 10372
e37af218
RH
10373 /* Force constants other than zero into memory. We do not know how
10374 the instructions used to build constants modify the upper 64 bits
10375 of the register, once we have that information we may be able
10376 to handle some of them more efficiently. */
926f3359 10377 if (can_create_pseudo_p ()
c38573a8 10378 && register_operand (op0, mode)
4d2a42a1
UB
10379 && (CONSTANT_P (op1)
10380 || (GET_CODE (op1) == SUBREG
10381 && CONSTANT_P (SUBREG_REG (op1))))
5656a184 10382 && standard_sse_constant_p (op1) <= 0)
c38573a8 10383 op1 = validize_mem (force_const_mem (mode, op1));
e37af218 10384
926f3359 10385 /* TDmode values are passed as TImode on the stack. TImode values
4d2a42a1
UB
10386 are moved via xmm registers, and moving them to stack can result in
10387 unaligned memory access. Use ix86_expand_vector_move_misalign()
10388 if memory operand is not aligned correctly. */
b3a13419 10389 if (can_create_pseudo_p ()
e9dd4c3e 10390 && (mode == TImode) && !TARGET_64BIT
4d2a42a1
UB
10391 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10392 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10393 {
10394 rtx tmp[2];
10395
10396 /* ix86_expand_vector_move_misalign() does not like constants ... */
10397 if (CONSTANT_P (op1)
10398 || (GET_CODE (op1) == SUBREG
10399 && CONSTANT_P (SUBREG_REG (op1))))
10400 op1 = validize_mem (force_const_mem (mode, op1));
10401
10402 /* ... nor both arguments in memory. */
10403 if (!register_operand (op0, mode)
10404 && !register_operand (op1, mode))
10405 op1 = force_reg (mode, op1);
10406
10407 tmp[0] = op0; tmp[1] = op1;
10408 ix86_expand_vector_move_misalign (mode, tmp);
10409 return;
10410 }
10411
e37af218 10412 /* Make operand1 a register if it isn't already. */
b3a13419 10413 if (can_create_pseudo_p ()
c38573a8
RH
10414 && !register_operand (op0, mode)
10415 && !register_operand (op1, mode))
e37af218 10416 {
c38573a8 10417 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
e37af218
RH
10418 return;
10419 }
10420
c38573a8 10421 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
fce5a9f2 10422}
e37af218 10423
5656a184 10424/* Implement the movmisalign patterns for SSE. Non-SSE modes go
c38573a8 10425 straight to ix86_expand_vector_move. */
80fd744f
RH
10426/* Code generation for scalar reg-reg moves of single and double precision data:
10427 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10428 movaps reg, reg
10429 else
10430 movss reg, reg
10431 if (x86_sse_partial_reg_dependency == true)
10432 movapd reg, reg
10433 else
10434 movsd reg, reg
10435
10436 Code generation for scalar loads of double precision data:
10437 if (x86_sse_split_regs == true)
10438 movlpd mem, reg (gas syntax)
10439 else
10440 movsd mem, reg
54a88090 10441
80fd744f
RH
10442 Code generation for unaligned packed loads of single precision data
10443 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10444 if (x86_sse_unaligned_move_optimal)
10445 movups mem, reg
10446
10447 if (x86_sse_partial_reg_dependency == true)
10448 {
10449 xorps reg, reg
10450 movlps mem, reg
10451 movhps mem+8, reg
10452 }
10453 else
10454 {
10455 movlps mem, reg
10456 movhps mem+8, reg
10457 }
10458
10459 Code generation for unaligned packed loads of double precision data
10460 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10461 if (x86_sse_unaligned_move_optimal)
10462 movupd mem, reg
10463
10464 if (x86_sse_split_regs == true)
10465 {
10466 movlpd mem, reg
10467 movhpd mem+8, reg
10468 }
10469 else
10470 {
10471 movsd mem, reg
10472 movhpd mem+8, reg
10473 }
10474 */
c38573a8
RH
10475
10476void
10477ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10478{
10479 rtx op0, op1, m;
10480
10481 op0 = operands[0];
10482 op1 = operands[1];
10483
10484 if (MEM_P (op1))
10485 {
10486 /* If we're optimizing for size, movups is the smallest. */
10487 if (optimize_size)
10488 {
10489 op0 = gen_lowpart (V4SFmode, op0);
10490 op1 = gen_lowpart (V4SFmode, op1);
10491 emit_insn (gen_sse_movups (op0, op1));
10492 return;
10493 }
10494
10495 /* ??? If we have typed data, then it would appear that using
10496 movdqu is the only way to get unaligned data loaded with
10497 integer type. */
10498 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10499 {
10500 op0 = gen_lowpart (V16QImode, op0);
10501 op1 = gen_lowpart (V16QImode, op1);
10502 emit_insn (gen_sse2_movdqu (op0, op1));
10503 return;
10504 }
10505
10506 if (TARGET_SSE2 && mode == V2DFmode)
21efb4d4
HJ
10507 {
10508 rtx zero;
10509
10510 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10511 {
10512 op0 = gen_lowpart (V2DFmode, op0);
10513 op1 = gen_lowpart (V2DFmode, op1);
10514 emit_insn (gen_sse2_movupd (op0, op1));
10515 return;
10516 }
eb701deb 10517
c38573a8
RH
10518 /* When SSE registers are split into halves, we can avoid
10519 writing to the top half twice. */
10520 if (TARGET_SSE_SPLIT_REGS)
10521 {
10522 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
eb701deb 10523 zero = op0;
c38573a8
RH
10524 }
10525 else
10526 {
10527 /* ??? Not sure about the best option for the Intel chips.
10528 The following would seem to satisfy; the register is
10529 entirely cleared, breaking the dependency chain. We
10530 then store to the upper half, with a dependency depth
10531 of one. A rumor has it that Intel recommends two movsd
10532 followed by an unpacklpd, but this is unconfirmed. And
10533 given that the dependency depth of the unpacklpd would
10534 still be one, I'm not sure why this would be better. */
eb701deb 10535 zero = CONST0_RTX (V2DFmode);
c38573a8 10536 }
eb701deb
RH
10537
10538 m = adjust_address (op1, DFmode, 0);
10539 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10540 m = adjust_address (op1, DFmode, 8);
10541 emit_insn (gen_sse2_loadhpd (op0, op0, m));
c38573a8
RH
10542 }
10543 else
21efb4d4
HJ
10544 {
10545 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10546 {
10547 op0 = gen_lowpart (V4SFmode, op0);
10548 op1 = gen_lowpart (V4SFmode, op1);
10549 emit_insn (gen_sse_movups (op0, op1));
10550 return;
10551 }
10552
c38573a8
RH
10553 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10554 emit_move_insn (op0, CONST0_RTX (mode));
10555 else
10556 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10557
b4bb3199
RH
10558 if (mode != V4SFmode)
10559 op0 = gen_lowpart (V4SFmode, op0);
2cdb3148
RH
10560 m = adjust_address (op1, V2SFmode, 0);
10561 emit_insn (gen_sse_loadlps (op0, op0, m));
10562 m = adjust_address (op1, V2SFmode, 8);
10563 emit_insn (gen_sse_loadhps (op0, op0, m));
c38573a8
RH
10564 }
10565 }
10566 else if (MEM_P (op0))
10567 {
10568 /* If we're optimizing for size, movups is the smallest. */
10569 if (optimize_size)
10570 {
10571 op0 = gen_lowpart (V4SFmode, op0);
10572 op1 = gen_lowpart (V4SFmode, op1);
10573 emit_insn (gen_sse_movups (op0, op1));
10574 return;
10575 }
10576
10577 /* ??? Similar to above, only less clear because of quote
10578 typeless stores unquote. */
10579 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10580 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10581 {
10582 op0 = gen_lowpart (V16QImode, op0);
10583 op1 = gen_lowpart (V16QImode, op1);
10584 emit_insn (gen_sse2_movdqu (op0, op1));
10585 return;
10586 }
10587
10588 if (TARGET_SSE2 && mode == V2DFmode)
10589 {
10590 m = adjust_address (op0, DFmode, 0);
10591 emit_insn (gen_sse2_storelpd (m, op1));
10592 m = adjust_address (op0, DFmode, 8);
10593 emit_insn (gen_sse2_storehpd (m, op1));
c38573a8
RH
10594 }
10595 else
10596 {
eb701deb
RH
10597 if (mode != V4SFmode)
10598 op1 = gen_lowpart (V4SFmode, op1);
2cdb3148
RH
10599 m = adjust_address (op0, V2SFmode, 0);
10600 emit_insn (gen_sse_storelps (m, op1));
10601 m = adjust_address (op0, V2SFmode, 8);
10602 emit_insn (gen_sse_storehps (m, op1));
c38573a8
RH
10603 }
10604 }
10605 else
10606 gcc_unreachable ();
10607}
10608
6b79c03c
RH
10609/* Expand a push in MODE. This is some mode for which we do not support
10610 proper push instructions, at least from the registers that we expect
10611 the value to live in. */
10612
10613void
10614ix86_expand_push (enum machine_mode mode, rtx x)
10615{
10616 rtx tmp;
10617
10618 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10619 GEN_INT (-GET_MODE_SIZE (mode)),
10620 stack_pointer_rtx, 1, OPTAB_DIRECT);
10621 if (tmp != stack_pointer_rtx)
10622 emit_move_insn (stack_pointer_rtx, tmp);
10623
10624 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10625 emit_move_insn (tmp, x);
10626}
c38573a8 10627
ffa1b3c6
RS
10628/* Helper function of ix86_fixup_binary_operands to canonicalize
10629 operand order. Returns true if the operands should be swapped. */
54a88090 10630
ffa1b3c6
RS
10631static bool
10632ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10633 rtx operands[])
10634{
10635 rtx dst = operands[0];
10636 rtx src1 = operands[1];
10637 rtx src2 = operands[2];
10638
10639 /* If the operation is not commutative, we can't do anything. */
10640 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10641 return false;
10642
10643 /* Highest priority is that src1 should match dst. */
10644 if (rtx_equal_p (dst, src1))
10645 return false;
10646 if (rtx_equal_p (dst, src2))
10647 return true;
10648
10649 /* Next highest priority is that immediate constants come second. */
10650 if (immediate_operand (src2, mode))
10651 return false;
10652 if (immediate_operand (src1, mode))
10653 return true;
10654
10655 /* Lowest priority is that memory references should come second. */
10656 if (MEM_P (src2))
10657 return false;
10658 if (MEM_P (src1))
10659 return true;
10660
10661 return false;
10662}
10663
10664
ef719a44
RH
10665/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10666 destination to use for the operation. If different from the true
10667 destination in operands[0], a copy operation will be required. */
e9a25f70 10668
ef719a44
RH
10669rtx
10670ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10671 rtx operands[])
e075ae69 10672{
ffa1b3c6
RS
10673 rtx dst = operands[0];
10674 rtx src1 = operands[1];
10675 rtx src2 = operands[2];
e075ae69 10676
ffa1b3c6
RS
10677 /* Canonicalize operand order. */
10678 if (ix86_swap_binary_operands_p (code, mode, operands))
e075ae69
RH
10679 {
10680 rtx temp = src1;
10681 src1 = src2;
10682 src2 = temp;
32b5b1aa 10683 }
e9a25f70 10684
e075ae69 10685 /* Both source operands cannot be in memory. */
7656aee4 10686 if (MEM_P (src1) && MEM_P (src2))
e075ae69 10687 {
ffa1b3c6
RS
10688 /* Optimization: Only read from memory once. */
10689 if (rtx_equal_p (src1, src2))
10690 {
10691 src2 = force_reg (mode, src2);
10692 src1 = src2;
10693 }
e075ae69 10694 else
ffa1b3c6 10695 src2 = force_reg (mode, src2);
32b5b1aa 10696 }
e9a25f70 10697
ffa1b3c6
RS
10698 /* If the destination is memory, and we do not have matching source
10699 operands, do things in registers. */
10700 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10701 dst = gen_reg_rtx (mode);
10702
10703 /* Source 1 cannot be a constant. */
10704 if (CONSTANT_P (src1))
10705 src1 = force_reg (mode, src1);
10706
10707 /* Source 1 cannot be a non-matching memory. */
10708 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
e075ae69 10709 src1 = force_reg (mode, src1);
0f290768 10710
ffa1b3c6
RS
10711 operands[1] = src1;
10712 operands[2] = src2;
ef719a44
RH
10713 return dst;
10714}
10715
10716/* Similarly, but assume that the destination has already been
10717 set up properly. */
10718
10719void
10720ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10721 enum machine_mode mode, rtx operands[])
10722{
10723 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10724 gcc_assert (dst == operands[0]);
10725}
10726
10727/* Attempt to expand a binary operator. Make the expansion closer to the
10728 actual machine, then just general_operand, which will allow 3 separate
10729 memory references (one output, two input) in a single insn. */
10730
10731void
10732ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10733 rtx operands[])
10734{
10735 rtx src1, src2, dst, op, clob;
10736
10737 dst = ix86_fixup_binary_operands (code, mode, operands);
10738 src1 = operands[1];
10739 src2 = operands[2];
10740
10741 /* Emit the instruction. */
e075ae69
RH
10742
10743 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10744 if (reload_in_progress)
10745 {
10746 /* Reload doesn't know about the flags register, and doesn't know that
10747 it doesn't want to clobber it. We can only do this with PLUS. */
d0396b79 10748 gcc_assert (code == PLUS);
e075ae69
RH
10749 emit_insn (op);
10750 }
10751 else
32b5b1aa 10752 {
e075ae69
RH
10753 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10754 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 10755 }
e9a25f70 10756
e075ae69
RH
10757 /* Fix up the destination if needed. */
10758 if (dst != operands[0])
10759 emit_move_insn (operands[0], dst);
10760}
10761
10762/* Return TRUE or FALSE depending on whether the binary operator meets the
10763 appropriate constraints. */
10764
10765int
ffa1b3c6 10766ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
b96a374d 10767 rtx operands[3])
e075ae69 10768{
ffa1b3c6
RS
10769 rtx dst = operands[0];
10770 rtx src1 = operands[1];
10771 rtx src2 = operands[2];
10772
e075ae69 10773 /* Both source operands cannot be in memory. */
ffa1b3c6 10774 if (MEM_P (src1) && MEM_P (src2))
e075ae69 10775 return 0;
ffa1b3c6
RS
10776
10777 /* Canonicalize operand order for commutative operators. */
10778 if (ix86_swap_binary_operands_p (code, mode, operands))
10779 {
10780 rtx temp = src1;
10781 src1 = src2;
10782 src2 = temp;
10783 }
10784
e075ae69 10785 /* If the destination is memory, we must have a matching source operand. */
ffa1b3c6
RS
10786 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10787 return 0;
10788
10789 /* Source 1 cannot be a constant. */
10790 if (CONSTANT_P (src1))
e075ae69 10791 return 0;
ffa1b3c6
RS
10792
10793 /* Source 1 cannot be a non-matching memory. */
10794 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
06a964de 10795 return 0;
ffa1b3c6 10796
e075ae69
RH
10797 return 1;
10798}
10799
10800/* Attempt to expand a unary operator. Make the expansion closer to the
10801 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 10802 memory references (one output, one input) in a single insn. */
e075ae69 10803
9d81fc27 10804void
b96a374d
AJ
10805ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10806 rtx operands[])
e075ae69 10807{
06a964de
JH
10808 int matching_memory;
10809 rtx src, dst, op, clob;
10810
10811 dst = operands[0];
10812 src = operands[1];
e075ae69 10813
06a964de
JH
10814 /* If the destination is memory, and we do not have matching source
10815 operands, do things in registers. */
10816 matching_memory = 0;
7cacf53e 10817 if (MEM_P (dst))
32b5b1aa 10818 {
06a964de
JH
10819 if (rtx_equal_p (dst, src))
10820 matching_memory = 1;
e075ae69 10821 else
06a964de 10822 dst = gen_reg_rtx (mode);
32b5b1aa 10823 }
e9a25f70 10824
06a964de 10825 /* When source operand is memory, destination must match. */
7cacf53e 10826 if (MEM_P (src) && !matching_memory)
06a964de 10827 src = force_reg (mode, src);
0f290768 10828
06a964de
JH
10829 /* Emit the instruction. */
10830
10831 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10832 if (reload_in_progress || code == NOT)
10833 {
10834 /* Reload doesn't know about the flags register, and doesn't know that
10835 it doesn't want to clobber it. */
d0396b79 10836 gcc_assert (code == NOT);
06a964de
JH
10837 emit_insn (op);
10838 }
10839 else
10840 {
10841 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10842 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10843 }
10844
10845 /* Fix up the destination if needed. */
10846 if (dst != operands[0])
10847 emit_move_insn (operands[0], dst);
e075ae69
RH
10848}
10849
10850/* Return TRUE or FALSE depending on whether the unary operator meets the
10851 appropriate constraints. */
10852
10853int
b96a374d
AJ
10854ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10855 enum machine_mode mode ATTRIBUTE_UNUSED,
10856 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 10857{
06a964de 10858 /* If one of operands is memory, source and destination must match. */
7656aee4
UB
10859 if ((MEM_P (operands[0])
10860 || MEM_P (operands[1]))
06a964de
JH
10861 && ! rtx_equal_p (operands[0], operands[1]))
10862 return FALSE;
e075ae69
RH
10863 return TRUE;
10864}
7cacf53e 10865
174c12c7
RH
10866/* Post-reload splitter for converting an SF or DFmode value in an
10867 SSE register into an unsigned SImode. */
ebff937c
SH
10868
10869void
174c12c7 10870ix86_split_convert_uns_si_sse (rtx operands[])
ebff937c 10871{
174c12c7
RH
10872 enum machine_mode vecmode;
10873 rtx value, large, zero_or_two31, input, two31, x;
ebff937c 10874
174c12c7
RH
10875 large = operands[1];
10876 zero_or_two31 = operands[2];
10877 input = operands[3];
10878 two31 = operands[4];
10879 vecmode = GET_MODE (large);
10880 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
ebff937c 10881
174c12c7
RH
10882 /* Load up the value into the low element. We must ensure that the other
10883 elements are valid floats -- zero is the easiest such value. */
10884 if (MEM_P (input))
10885 {
10886 if (vecmode == V4SFmode)
10887 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10888 else
10889 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10890 }
10891 else
10892 {
10893 input = gen_rtx_REG (vecmode, REGNO (input));
10894 emit_move_insn (value, CONST0_RTX (vecmode));
10895 if (vecmode == V4SFmode)
10896 emit_insn (gen_sse_movss (value, value, input));
10897 else
10898 emit_insn (gen_sse2_movsd (value, value, input));
10899 }
ebff937c 10900
174c12c7
RH
10901 emit_move_insn (large, two31);
10902 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
ebff937c 10903
174c12c7 10904 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
ebff937c
SH
10905 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10906
174c12c7 10907 x = gen_rtx_AND (vecmode, zero_or_two31, large);
ebff937c
SH
10908 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10909
10910 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10911 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10912
174c12c7
RH
10913 large = gen_rtx_REG (V4SImode, REGNO (large));
10914 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
ebff937c 10915
174c12c7
RH
10916 x = gen_rtx_REG (V4SImode, REGNO (value));
10917 if (vecmode == V4SFmode)
10918 emit_insn (gen_sse2_cvttps2dq (x, value));
10919 else
10920 emit_insn (gen_sse2_cvttpd2dq (x, value));
10921 value = x;
ebff937c 10922
174c12c7 10923 emit_insn (gen_xorv4si3 (value, value, large));
ebff937c
SH
10924}
10925
10926/* Convert an unsigned DImode value into a DFmode, using only SSE.
10927 Expects the 64-bit DImode to be supplied in a pair of integral
10928 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10929 -mfpmath=sse, !optimize_size only. */
10930
10931void
10932ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10933{
10934 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10935 rtx int_xmm, fp_xmm;
10936 rtx biases, exponents;
10937 rtx x;
10938
10939 int_xmm = gen_reg_rtx (V4SImode);
10940 if (TARGET_INTER_UNIT_MOVES)
10941 emit_insn (gen_movdi_to_sse (int_xmm, input));
10942 else if (TARGET_SSE_SPLIT_REGS)
10943 {
10944 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10945 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10946 }
10947 else
10948 {
10949 x = gen_reg_rtx (V2DImode);
10950 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10951 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10952 }
10953
10954 x = gen_rtx_CONST_VECTOR (V4SImode,
10955 gen_rtvec (4, GEN_INT (0x43300000UL),
10956 GEN_INT (0x45300000UL),
10957 const0_rtx, const0_rtx));
10958 exponents = validize_mem (force_const_mem (V4SImode, x));
10959
10960 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10961 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10962
10963 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10964 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10965 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10966 (0x1.0p84 + double(fp_value_hi_xmm)).
10967 Note these exponents differ by 32. */
10968
10969 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10970
10971 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10972 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10973 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10974 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10975 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10976 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10977 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10978 biases = validize_mem (force_const_mem (V2DFmode, biases));
10979 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10980
10981 /* Add the upper and lower DFmode values together. */
10982 if (TARGET_SSE3)
10983 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10984 else
10985 {
10986 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10987 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10988 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10989 }
10990
10991 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10992}
10993
7fb1431b
UB
10994/* Not used, but eases macroization of patterns. */
10995void
10996ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
10997 rtx input ATTRIBUTE_UNUSED)
10998{
10999 gcc_unreachable ();
11000}
11001
ebff937c
SH
11002/* Convert an unsigned SImode value into a DFmode. Only currently used
11003 for SSE, but applicable anywhere. */
11004
11005void
11006ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11007{
11008 REAL_VALUE_TYPE TWO31r;
11009 rtx x, fp;
11010
11011 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11012 NULL, 1, OPTAB_DIRECT);
11013
11014 fp = gen_reg_rtx (DFmode);
11015 emit_insn (gen_floatsidf2 (fp, x));
11016
11017 real_ldexp (&TWO31r, &dconst1, 31);
11018 x = const_double_from_real_value (TWO31r, DFmode);
11019
11020 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11021 if (x != target)
11022 emit_move_insn (target, x);
11023}
11024
11025/* Convert a signed DImode value into a DFmode. Only used for SSE in
11026 32-bit mode; otherwise we have a direct convert instruction. */
11027
11028void
11029ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11030{
11031 REAL_VALUE_TYPE TWO32r;
11032 rtx fp_lo, fp_hi, x;
54a88090 11033
ebff937c
SH
11034 fp_lo = gen_reg_rtx (DFmode);
11035 fp_hi = gen_reg_rtx (DFmode);
11036
11037 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11038
11039 real_ldexp (&TWO32r, &dconst1, 32);
11040 x = const_double_from_real_value (TWO32r, DFmode);
11041 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11042
11043 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11044
11045 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11046 0, OPTAB_DIRECT);
11047 if (x != target)
11048 emit_move_insn (target, x);
11049}
11050
11051/* Convert an unsigned SImode value into a SFmode, using only SSE.
11052 For x86_32, -mfpmath=sse, !optimize_size only. */
11053void
11054ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11055{
11056 REAL_VALUE_TYPE ONE16r;
11057 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11058
11059 real_ldexp (&ONE16r, &dconst1, 16);
11060 x = const_double_from_real_value (ONE16r, SFmode);
11061 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11062 NULL, 0, OPTAB_DIRECT);
11063 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11064 NULL, 0, OPTAB_DIRECT);
11065 fp_hi = gen_reg_rtx (SFmode);
11066 fp_lo = gen_reg_rtx (SFmode);
11067 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11068 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11069 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11070 0, OPTAB_DIRECT);
11071 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11072 0, OPTAB_DIRECT);
11073 if (!rtx_equal_p (target, fp_hi))
11074 emit_move_insn (target, fp_hi);
11075}
11076
11077/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11078 then replicate the value for all elements of the vector
11079 register. */
11080
174c12c7 11081rtx
ebff937c
SH
11082ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11083{
11084 rtvec v;
11085 switch (mode)
11086 {
3b8dd071
L
11087 case SImode:
11088 gcc_assert (vect);
11089 v = gen_rtvec (4, value, value, value, value);
11090 return gen_rtx_CONST_VECTOR (V4SImode, v);
11091
11092 case DImode:
11093 gcc_assert (vect);
11094 v = gen_rtvec (2, value, value);
11095 return gen_rtx_CONST_VECTOR (V2DImode, v);
11096
ebff937c
SH
11097 case SFmode:
11098 if (vect)
11099 v = gen_rtvec (4, value, value, value, value);
11100 else
11101 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11102 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11103 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11104
11105 case DFmode:
11106 if (vect)
11107 v = gen_rtvec (2, value, value);
11108 else
11109 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11110 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11111
11112 default:
11113 gcc_unreachable ();
11114 }
11115}
11116
3b8dd071
L
11117/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11118 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11119 for an SSE register. If VECT is true, then replicate the mask for
11120 all elements of the vector register. If INVERT is true, then create
11121 a mask excluding the sign bit. */
046625fa
RH
11122
11123rtx
11124ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11125{
3b8dd071 11126 enum machine_mode vec_mode, imode;
046625fa
RH
11127 HOST_WIDE_INT hi, lo;
11128 int shift = 63;
ebff937c 11129 rtx v;
046625fa
RH
11130 rtx mask;
11131
11132 /* Find the sign bit, sign extended to 2*HWI. */
3b8dd071
L
11133 switch (mode)
11134 {
11135 case SImode:
11136 case SFmode:
11137 imode = SImode;
11138 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11139 lo = 0x80000000, hi = lo < 0;
11140 break;
11141
11142 case DImode:
11143 case DFmode:
11144 imode = DImode;
11145 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11146 if (HOST_BITS_PER_WIDE_INT >= 64)
11147 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11148 else
11149 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11150 break;
11151
edc5bbcd
UB
11152 case TImode:
11153 case TFmode:
11154 imode = TImode;
11155 vec_mode = VOIDmode;
11156 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11157 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11158 break;
11159
3b8dd071
L
11160 default:
11161 gcc_unreachable ();
11162 }
046625fa
RH
11163
11164 if (invert)
11165 lo = ~lo, hi = ~hi;
11166
11167 /* Force this value into the low part of a fp vector constant. */
3b8dd071 11168 mask = immed_double_const (lo, hi, imode);
046625fa
RH
11169 mask = gen_lowpart (mode, mask);
11170
edc5bbcd
UB
11171 if (vec_mode == VOIDmode)
11172 return force_reg (mode, mask);
11173
ebff937c 11174 v = ix86_build_const_vector (mode, vect, mask);
ebff937c 11175 return force_reg (vec_mode, v);
046625fa
RH
11176}
11177
7cacf53e
RH
11178/* Generate code for floating point ABS or NEG. */
11179
11180void
11181ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11182 rtx operands[])
11183{
11184 rtx mask, set, use, clob, dst, src;
7cacf53e 11185 bool use_sse = false;
ef719a44
RH
11186 bool vector_mode = VECTOR_MODE_P (mode);
11187 enum machine_mode elt_mode = mode;
7cacf53e 11188
ef719a44
RH
11189 if (vector_mode)
11190 {
11191 elt_mode = GET_MODE_INNER (mode);
ef719a44
RH
11192 use_sse = true;
11193 }
edc5bbcd
UB
11194 else if (mode == TFmode)
11195 use_sse = true;
046625fa 11196 else if (TARGET_SSE_MATH)
2aa3d033 11197 use_sse = SSE_FLOAT_MODE_P (mode);
7cacf53e
RH
11198
11199 /* NEG and ABS performed with SSE use bitwise mask operations.
11200 Create the appropriate mask now. */
11201 if (use_sse)
046625fa 11202 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
7cacf53e 11203 else
86ce1825 11204 mask = NULL_RTX;
7cacf53e
RH
11205
11206 dst = operands[0];
11207 src = operands[1];
11208
ef719a44
RH
11209 if (vector_mode)
11210 {
11211 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11212 set = gen_rtx_SET (VOIDmode, dst, set);
11213 emit_insn (set);
11214 }
11215 else
11216 {
11217 set = gen_rtx_fmt_e (code, mode, src);
11218 set = gen_rtx_SET (VOIDmode, dst, set);
86ce1825
RS
11219 if (mask)
11220 {
11221 use = gen_rtx_USE (VOIDmode, mask);
11222 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11223 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11224 gen_rtvec (3, set, use, clob)));
11225 }
11226 else
11227 emit_insn (set);
ef719a44 11228 }
7cacf53e 11229}
e075ae69 11230
b99d6d2b 11231/* Expand a copysign operation. Special case operand 0 being a constant. */
046625fa
RH
11232
11233void
b99d6d2b
RH
11234ix86_expand_copysign (rtx operands[])
11235{
11236 enum machine_mode mode, vmode;
11237 rtx dest, op0, op1, mask, nmask;
11238
11239 dest = operands[0];
11240 op0 = operands[1];
11241 op1 = operands[2];
11242
11243 mode = GET_MODE (dest);
11244 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11245
11246 if (GET_CODE (op0) == CONST_DOUBLE)
11247 {
edc5bbcd 11248 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
b99d6d2b
RH
11249
11250 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11251 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11252
edc5bbcd
UB
11253 if (mode == SFmode || mode == DFmode)
11254 {
11255 if (op0 == CONST0_RTX (mode))
11256 op0 = CONST0_RTX (vmode);
b99d6d2b 11257 else
edc5bbcd
UB
11258 {
11259 rtvec v;
11260
11261 if (mode == SFmode)
11262 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11263 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11264 else
11265 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11266 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11267 }
b99d6d2b
RH
11268 }
11269
11270 mask = ix86_build_signbit_mask (mode, 0, 0);
11271
11272 if (mode == SFmode)
edc5bbcd
UB
11273 copysign_insn = gen_copysignsf3_const;
11274 else if (mode == DFmode)
11275 copysign_insn = gen_copysigndf3_const;
b99d6d2b 11276 else
edc5bbcd
UB
11277 copysign_insn = gen_copysigntf3_const;
11278
11279 emit_insn (copysign_insn (dest, op0, op1, mask));
b99d6d2b
RH
11280 }
11281 else
11282 {
edc5bbcd
UB
11283 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11284
b99d6d2b
RH
11285 nmask = ix86_build_signbit_mask (mode, 0, 1);
11286 mask = ix86_build_signbit_mask (mode, 0, 0);
11287
11288 if (mode == SFmode)
edc5bbcd
UB
11289 copysign_insn = gen_copysignsf3_var;
11290 else if (mode == DFmode)
11291 copysign_insn = gen_copysigndf3_var;
b99d6d2b 11292 else
edc5bbcd
UB
11293 copysign_insn = gen_copysigntf3_var;
11294
11295 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
b99d6d2b
RH
11296 }
11297}
11298
11299/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11300 be a constant, and so has already been expanded into a vector constant. */
11301
11302void
11303ix86_split_copysign_const (rtx operands[])
11304{
11305 enum machine_mode mode, vmode;
11306 rtx dest, op0, op1, mask, x;
11307
11308 dest = operands[0];
11309 op0 = operands[1];
11310 op1 = operands[2];
11311 mask = operands[3];
11312
11313 mode = GET_MODE (dest);
11314 vmode = GET_MODE (mask);
11315
11316 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11317 x = gen_rtx_AND (vmode, dest, mask);
11318 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11319
11320 if (op0 != CONST0_RTX (vmode))
11321 {
11322 x = gen_rtx_IOR (vmode, dest, op0);
11323 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11324 }
11325}
11326
11327/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11328 so we have to do two masks. */
11329
11330void
11331ix86_split_copysign_var (rtx operands[])
046625fa
RH
11332{
11333 enum machine_mode mode, vmode;
11334 rtx dest, scratch, op0, op1, mask, nmask, x;
11335
11336 dest = operands[0];
11337 scratch = operands[1];
11338 op0 = operands[2];
b99d6d2b
RH
11339 op1 = operands[3];
11340 nmask = operands[4];
046625fa
RH
11341 mask = operands[5];
11342
11343 mode = GET_MODE (dest);
11344 vmode = GET_MODE (mask);
11345
11346 if (rtx_equal_p (op0, op1))
11347 {
11348 /* Shouldn't happen often (it's useless, obviously), but when it does
11349 we'd generate incorrect code if we continue below. */
11350 emit_move_insn (dest, op0);
11351 return;
11352 }
11353
11354 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11355 {
11356 gcc_assert (REGNO (op1) == REGNO (scratch));
11357
11358 x = gen_rtx_AND (vmode, scratch, mask);
11359 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11360
11361 dest = mask;
11362 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11363 x = gen_rtx_NOT (vmode, dest);
11364 x = gen_rtx_AND (vmode, x, op0);
11365 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11366 }
11367 else
11368 {
11369 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11370 {
11371 x = gen_rtx_AND (vmode, scratch, mask);
11372 }
11373 else /* alternative 2,4 */
11374 {
11375 gcc_assert (REGNO (mask) == REGNO (scratch));
11376 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11377 x = gen_rtx_AND (vmode, scratch, op1);
11378 }
11379 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11380
11381 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11382 {
11383 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11384 x = gen_rtx_AND (vmode, dest, nmask);
11385 }
11386 else /* alternative 3,4 */
11387 {
11388 gcc_assert (REGNO (nmask) == REGNO (dest));
11389 dest = nmask;
11390 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11391 x = gen_rtx_AND (vmode, dest, op0);
11392 }
11393 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11394 }
11395
11396 x = gen_rtx_IOR (vmode, dest, scratch);
11397 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11398}
11399
16189740
RH
11400/* Return TRUE or FALSE depending on whether the first SET in INSN
11401 has source and destination with matching CC modes, and that the
11402 CC mode is at least as constrained as REQ_MODE. */
11403
11404int
b96a374d 11405ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
11406{
11407 rtx set;
11408 enum machine_mode set_mode;
11409
11410 set = PATTERN (insn);
11411 if (GET_CODE (set) == PARALLEL)
11412 set = XVECEXP (set, 0, 0);
d0396b79
NS
11413 gcc_assert (GET_CODE (set) == SET);
11414 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16189740
RH
11415
11416 set_mode = GET_MODE (SET_DEST (set));
11417 switch (set_mode)
11418 {
9076b9c1
JH
11419 case CCNOmode:
11420 if (req_mode != CCNOmode
11421 && (req_mode != CCmode
11422 || XEXP (SET_SRC (set), 1) != const0_rtx))
11423 return 0;
11424 break;
16189740 11425 case CCmode:
9076b9c1 11426 if (req_mode == CCGCmode)
16189740 11427 return 0;
5efb1046 11428 /* FALLTHRU */
9076b9c1
JH
11429 case CCGCmode:
11430 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11431 return 0;
5efb1046 11432 /* FALLTHRU */
9076b9c1 11433 case CCGOCmode:
16189740
RH
11434 if (req_mode == CCZmode)
11435 return 0;
5efb1046 11436 /* FALLTHRU */
16189740
RH
11437 case CCZmode:
11438 break;
11439
11440 default:
d0396b79 11441 gcc_unreachable ();
16189740
RH
11442 }
11443
11444 return (GET_MODE (SET_SRC (set)) == set_mode);
11445}
11446
e075ae69
RH
11447/* Generate insn patterns to do an integer compare of OPERANDS. */
11448
11449static rtx
b96a374d 11450ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
11451{
11452 enum machine_mode cmpmode;
11453 rtx tmp, flags;
11454
11455 cmpmode = SELECT_CC_MODE (code, op0, op1);
11456 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11457
11458 /* This is very simple, but making the interface the same as in the
11459 FP case makes the rest of the code easier. */
11460 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11461 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11462
11463 /* Return the test that should be put into the flags user, i.e.
11464 the bcc, scc, or cmov instruction. */
11465 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11466}
11467
3a3677ff
RH
11468/* Figure out whether to use ordered or unordered fp comparisons.
11469 Return the appropriate mode to use. */
e075ae69 11470
b1cdafbb 11471enum machine_mode
b96a374d 11472ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 11473{
9e7adcb3
JH
11474 /* ??? In order to make all comparisons reversible, we do all comparisons
11475 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11476 all forms trapping and nontrapping comparisons, we can make inequality
11477 comparisons trapping again, since it results in better code when using
11478 FCOM based compares. */
11479 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
11480}
11481
9076b9c1 11482enum machine_mode
b96a374d 11483ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1 11484{
27ac40e2
UB
11485 enum machine_mode mode = GET_MODE (op0);
11486
11487 if (SCALAR_FLOAT_MODE_P (mode))
11488 {
11489 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11490 return ix86_fp_compare_mode (code);
11491 }
11492
9076b9c1
JH
11493 switch (code)
11494 {
11495 /* Only zero flag is needed. */
11496 case EQ: /* ZF=0 */
11497 case NE: /* ZF!=0 */
11498 return CCZmode;
11499 /* Codes needing carry flag. */
265dab10 11500 case GEU: /* CF=0 */
7e08e190 11501 case LTU: /* CF=1 */
d39d658d
RIL
11502 /* Detect overflow checks. They need just the carry flag. */
11503 if (GET_CODE (op0) == PLUS
11504 && rtx_equal_p (op1, XEXP (op0, 0)))
11505 return CCCmode;
11506 else
11507 return CCmode;
11508 case GTU: /* CF=0 & ZF=0 */
7e08e190 11509 case LEU: /* CF=1 | ZF=1 */
d39d658d
RIL
11510 /* Detect overflow checks. They need just the carry flag. */
11511 if (GET_CODE (op0) == MINUS
11512 && rtx_equal_p (op1, XEXP (op0, 0)))
11513 return CCCmode;
11514 else
11515 return CCmode;
9076b9c1
JH
11516 /* Codes possibly doable only with sign flag when
11517 comparing against zero. */
11518 case GE: /* SF=OF or SF=0 */
7e08e190 11519 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
11520 if (op1 == const0_rtx)
11521 return CCGOCmode;
11522 else
11523 /* For other cases Carry flag is not required. */
11524 return CCGCmode;
11525 /* Codes doable only with sign flag when comparing
11526 against zero, but we miss jump instruction for it
4aae8a9a 11527 so we need to use relational tests against overflow
9076b9c1
JH
11528 that thus needs to be zero. */
11529 case GT: /* ZF=0 & SF=OF */
11530 case LE: /* ZF=1 | SF<>OF */
11531 if (op1 == const0_rtx)
11532 return CCNOmode;
11533 else
11534 return CCGCmode;
7fcd7218
JH
11535 /* strcmp pattern do (use flags) and combine may ask us for proper
11536 mode. */
11537 case USE:
11538 return CCmode;
9076b9c1 11539 default:
d0396b79 11540 gcc_unreachable ();
9076b9c1
JH
11541 }
11542}
11543
e129d93a
ILT
11544/* Return the fixed registers used for condition codes. */
11545
11546static bool
11547ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11548{
11549 *p1 = FLAGS_REG;
11550 *p2 = FPSR_REG;
11551 return true;
11552}
11553
11554/* If two condition code modes are compatible, return a condition code
11555 mode which is compatible with both. Otherwise, return
11556 VOIDmode. */
11557
11558static enum machine_mode
11559ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11560{
11561 if (m1 == m2)
11562 return m1;
11563
11564 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11565 return VOIDmode;
11566
11567 if ((m1 == CCGCmode && m2 == CCGOCmode)
11568 || (m1 == CCGOCmode && m2 == CCGCmode))
11569 return CCGCmode;
11570
11571 switch (m1)
11572 {
11573 default:
d0396b79 11574 gcc_unreachable ();
e129d93a
ILT
11575
11576 case CCmode:
11577 case CCGCmode:
11578 case CCGOCmode:
11579 case CCNOmode:
06f4e35d
L
11580 case CCAmode:
11581 case CCCmode:
11582 case CCOmode:
11583 case CCSmode:
e129d93a
ILT
11584 case CCZmode:
11585 switch (m2)
11586 {
11587 default:
11588 return VOIDmode;
11589
11590 case CCmode:
11591 case CCGCmode:
11592 case CCGOCmode:
11593 case CCNOmode:
06f4e35d
L
11594 case CCAmode:
11595 case CCCmode:
11596 case CCOmode:
11597 case CCSmode:
e129d93a
ILT
11598 case CCZmode:
11599 return CCmode;
11600 }
11601
11602 case CCFPmode:
11603 case CCFPUmode:
11604 /* These are only compatible with themselves, which we already
11605 checked above. */
11606 return VOIDmode;
11607 }
11608}
11609
c0c102a9
JH
11610/* Split comparison code CODE into comparisons we can do using branch
11611 instructions. BYPASS_CODE is comparison code for branch that will
11612 branch around FIRST_CODE and SECOND_CODE. If some of branches
f822d252 11613 is not required, set value to UNKNOWN.
c0c102a9 11614 We never require more than two branches. */
8fe75e43
RH
11615
11616void
b96a374d
AJ
11617ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11618 enum rtx_code *first_code,
11619 enum rtx_code *second_code)
c0c102a9
JH
11620{
11621 *first_code = code;
f822d252
ZW
11622 *bypass_code = UNKNOWN;
11623 *second_code = UNKNOWN;
c0c102a9
JH
11624
11625 /* The fcomi comparison sets flags as follows:
11626
11627 cmp ZF PF CF
11628 > 0 0 0
11629 < 0 0 1
11630 = 1 0 0
11631 un 1 1 1 */
11632
11633 switch (code)
11634 {
11635 case GT: /* GTU - CF=0 & ZF=0 */
11636 case GE: /* GEU - CF=0 */
11637 case ORDERED: /* PF=0 */
11638 case UNORDERED: /* PF=1 */
11639 case UNEQ: /* EQ - ZF=1 */
11640 case UNLT: /* LTU - CF=1 */
11641 case UNLE: /* LEU - CF=1 | ZF=1 */
11642 case LTGT: /* EQ - ZF=0 */
11643 break;
11644 case LT: /* LTU - CF=1 - fails on unordered */
11645 *first_code = UNLT;
11646 *bypass_code = UNORDERED;
11647 break;
11648 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11649 *first_code = UNLE;
11650 *bypass_code = UNORDERED;
11651 break;
11652 case EQ: /* EQ - ZF=1 - fails on unordered */
11653 *first_code = UNEQ;
11654 *bypass_code = UNORDERED;
11655 break;
11656 case NE: /* NE - ZF=0 - fails on unordered */
11657 *first_code = LTGT;
11658 *second_code = UNORDERED;
11659 break;
11660 case UNGE: /* GEU - CF=0 - fails on unordered */
11661 *first_code = GE;
11662 *second_code = UNORDERED;
11663 break;
11664 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11665 *first_code = GT;
11666 *second_code = UNORDERED;
11667 break;
11668 default:
d0396b79 11669 gcc_unreachable ();
c0c102a9
JH
11670 }
11671 if (!TARGET_IEEE_FP)
11672 {
f822d252
ZW
11673 *second_code = UNKNOWN;
11674 *bypass_code = UNKNOWN;
c0c102a9
JH
11675 }
11676}
11677
9e7adcb3 11678/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 11679 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
11680 In future this should be tweaked to compute bytes for optimize_size and
11681 take into account performance of various instructions on various CPUs. */
11682static int
b96a374d 11683ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
11684{
11685 if (!TARGET_IEEE_FP)
11686 return 4;
11687 /* The cost of code output by ix86_expand_fp_compare. */
11688 switch (code)
11689 {
11690 case UNLE:
11691 case UNLT:
11692 case LTGT:
11693 case GT:
11694 case GE:
11695 case UNORDERED:
11696 case ORDERED:
11697 case UNEQ:
11698 return 4;
11699 break;
11700 case LT:
11701 case NE:
11702 case EQ:
11703 case UNGE:
11704 return 5;
11705 break;
11706 case LE:
11707 case UNGT:
11708 return 6;
11709 break;
11710 default:
d0396b79 11711 gcc_unreachable ();
9e7adcb3
JH
11712 }
11713}
11714
11715/* Return cost of comparison done using fcomi operation.
11716 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11717static int
b96a374d 11718ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
11719{
11720 enum rtx_code bypass_code, first_code, second_code;
d1f87653 11721 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
11722 prevents gcc from using it. */
11723 if (!TARGET_CMOVE)
11724 return 1024;
11725 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 11726 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9e7adcb3
JH
11727}
11728
11729/* Return cost of comparison done using sahf operation.
11730 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11731static int
b96a374d 11732ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
11733{
11734 enum rtx_code bypass_code, first_code, second_code;
d1f87653 11735 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3 11736 avoids gcc from using it. */
3c2d980c 11737 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
9e7adcb3
JH
11738 return 1024;
11739 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 11740 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9e7adcb3
JH
11741}
11742
11743/* Compute cost of the comparison done using any method.
11744 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11745static int
b96a374d 11746ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
11747{
11748 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11749 int min;
11750
11751 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11752 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11753
11754 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11755 if (min > sahf_cost)
11756 min = sahf_cost;
11757 if (min > fcomi_cost)
11758 min = fcomi_cost;
11759 return min;
11760}
c0c102a9 11761
2ed941ec
RH
11762/* Return true if we should use an FCOMI instruction for this
11763 fp comparison. */
11764
11765int
11766ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11767{
11768 enum rtx_code swapped_code = swap_condition (code);
11769
11770 return ((ix86_fp_comparison_cost (code)
11771 == ix86_fp_comparison_fcomi_cost (code))
11772 || (ix86_fp_comparison_cost (swapped_code)
11773 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11774}
11775
11776/* Swap, force into registers, or otherwise massage the two operands
11777 to a fp comparison. The operands are updated in place; the new
11778 comparison code is returned. */
11779
11780static enum rtx_code
11781ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11782{
11783 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11784 rtx op0 = *pop0, op1 = *pop1;
11785 enum machine_mode op_mode = GET_MODE (op0);
11786 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11787
11788 /* All of the unordered compare instructions only work on registers.
11789 The same is true of the fcomi compare instructions. The XFmode
11790 compare instructions require registers except when comparing
11791 against zero or when converting operand 1 from fixed point to
11792 floating point. */
11793
11794 if (!is_sse
11795 && (fpcmp_mode == CCFPUmode
11796 || (op_mode == XFmode
11797 && ! (standard_80387_constant_p (op0) == 1
11798 || standard_80387_constant_p (op1) == 1)
11799 && GET_CODE (op1) != FLOAT)
11800 || ix86_use_fcomi_compare (code)))
11801 {
11802 op0 = force_reg (op_mode, op0);
11803 op1 = force_reg (op_mode, op1);
11804 }
11805 else
11806 {
11807 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11808 things around if they appear profitable, otherwise force op0
11809 into a register. */
11810
11811 if (standard_80387_constant_p (op0) == 0
11812 || (MEM_P (op0)
11813 && ! (standard_80387_constant_p (op1) == 0
11814 || MEM_P (op1))))
11815 {
11816 rtx tmp;
11817 tmp = op0, op0 = op1, op1 = tmp;
11818 code = swap_condition (code);
11819 }
11820
11821 if (!REG_P (op0))
11822 op0 = force_reg (op_mode, op0);
11823
11824 if (CONSTANT_P (op1))
11825 {
11826 int tmp = standard_80387_constant_p (op1);
11827 if (tmp == 0)
11828 op1 = validize_mem (force_const_mem (op_mode, op1));
11829 else if (tmp == 1)
11830 {
11831 if (TARGET_CMOVE)
11832 op1 = force_reg (op_mode, op1);
11833 }
11834 else
11835 op1 = force_reg (op_mode, op1);
11836 }
11837 }
11838
11839 /* Try to rearrange the comparison to make it cheaper. */
11840 if (ix86_fp_comparison_cost (code)
11841 > ix86_fp_comparison_cost (swap_condition (code))
b3a13419 11842 && (REG_P (op1) || can_create_pseudo_p ()))
2ed941ec
RH
11843 {
11844 rtx tmp;
11845 tmp = op0, op0 = op1, op1 = tmp;
11846 code = swap_condition (code);
11847 if (!REG_P (op0))
11848 op0 = force_reg (op_mode, op0);
11849 }
11850
11851 *pop0 = op0;
11852 *pop1 = op1;
11853 return code;
11854}
11855
11856/* Convert comparison codes we use to represent FP comparison to integer
11857 code that will result in proper branch. Return UNKNOWN if no such code
11858 is available. */
11859
11860enum rtx_code
11861ix86_fp_compare_code_to_integer (enum rtx_code code)
11862{
11863 switch (code)
11864 {
11865 case GT:
11866 return GTU;
11867 case GE:
11868 return GEU;
11869 case ORDERED:
11870 case UNORDERED:
11871 return code;
11872 break;
11873 case UNEQ:
11874 return EQ;
11875 break;
11876 case UNLT:
11877 return LTU;
11878 break;
11879 case UNLE:
11880 return LEU;
11881 break;
11882 case LTGT:
11883 return NE;
11884 break;
11885 default:
11886 return UNKNOWN;
11887 }
11888}
11889
3a3677ff
RH
11890/* Generate insn patterns to do a floating point compare of OPERANDS. */
11891
9e7adcb3 11892static rtx
b96a374d
AJ
11893ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11894 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
11895{
11896 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 11897 rtx tmp, tmp2;
9e7adcb3 11898 int cost = ix86_fp_comparison_cost (code);
c0c102a9 11899 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
11900
11901 fpcmp_mode = ix86_fp_compare_mode (code);
11902 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11903
9e7adcb3
JH
11904 if (second_test)
11905 *second_test = NULL_RTX;
11906 if (bypass_test)
11907 *bypass_test = NULL_RTX;
11908
c0c102a9
JH
11909 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11910
9e7adcb3 11911 /* Do fcomi/sahf based test when profitable. */
1406ee90 11912 if (ix86_fp_comparison_arithmetics_cost (code) > cost
3c2d980c 11913 && (bypass_code == UNKNOWN || bypass_test)
1406ee90 11914 && (second_code == UNKNOWN || second_test))
32b5b1aa 11915 {
1406ee90
UB
11916 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11917 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11918 tmp);
c0c102a9 11919 if (TARGET_CMOVE)
1406ee90 11920 emit_insn (tmp);
c0c102a9
JH
11921 else
11922 {
1406ee90
UB
11923 gcc_assert (TARGET_SAHF);
11924
bf71a4f8
JH
11925 if (!scratch)
11926 scratch = gen_reg_rtx (HImode);
1406ee90
UB
11927 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11928
11929 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
c0c102a9 11930 }
e075ae69
RH
11931
11932 /* The FP codes work out to act like unsigned. */
9a915772 11933 intcmp_mode = fpcmp_mode;
9e7adcb3 11934 code = first_code;
f822d252 11935 if (bypass_code != UNKNOWN)
9e7adcb3
JH
11936 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11937 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11938 const0_rtx);
f822d252 11939 if (second_code != UNKNOWN)
9e7adcb3
JH
11940 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11941 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11942 const0_rtx);
e075ae69
RH
11943 }
11944 else
11945 {
11946 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 11947 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 11948 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
11949 if (!scratch)
11950 scratch = gen_reg_rtx (HImode);
3a3677ff 11951 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 11952
9a915772
JH
11953 /* In the unordered case, we have to check C2 for NaN's, which
11954 doesn't happen to work out to anything nice combination-wise.
11955 So do some bit twiddling on the value we've got in AH to come
11956 up with an appropriate set of condition codes. */
e075ae69 11957
9a915772
JH
11958 intcmp_mode = CCNOmode;
11959 switch (code)
32b5b1aa 11960 {
9a915772
JH
11961 case GT:
11962 case UNGT:
11963 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 11964 {
3a3677ff 11965 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 11966 code = EQ;
9a915772
JH
11967 }
11968 else
11969 {
11970 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11971 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11972 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11973 intcmp_mode = CCmode;
11974 code = GEU;
11975 }
11976 break;
11977 case LT:
11978 case UNLT:
11979 if (code == LT && TARGET_IEEE_FP)
11980 {
3a3677ff
RH
11981 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11982 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
11983 intcmp_mode = CCmode;
11984 code = EQ;
9a915772
JH
11985 }
11986 else
11987 {
11988 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11989 code = NE;
11990 }
11991 break;
11992 case GE:
11993 case UNGE:
11994 if (code == GE || !TARGET_IEEE_FP)
11995 {
3a3677ff 11996 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 11997 code = EQ;
9a915772
JH
11998 }
11999 else
12000 {
12001 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12002 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12003 GEN_INT (0x01)));
12004 code = NE;
12005 }
12006 break;
12007 case LE:
12008 case UNLE:
12009 if (code == LE && TARGET_IEEE_FP)
12010 {
3a3677ff
RH
12011 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12012 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12013 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
12014 intcmp_mode = CCmode;
12015 code = LTU;
9a915772
JH
12016 }
12017 else
12018 {
12019 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12020 code = NE;
12021 }
12022 break;
12023 case EQ:
12024 case UNEQ:
12025 if (code == EQ && TARGET_IEEE_FP)
12026 {
3a3677ff
RH
12027 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12028 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
12029 intcmp_mode = CCmode;
12030 code = EQ;
9a915772
JH
12031 }
12032 else
12033 {
3a3677ff
RH
12034 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12035 code = NE;
12036 break;
9a915772
JH
12037 }
12038 break;
12039 case NE:
12040 case LTGT:
12041 if (code == NE && TARGET_IEEE_FP)
12042 {
3a3677ff 12043 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
12044 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12045 GEN_INT (0x40)));
3a3677ff 12046 code = NE;
9a915772
JH
12047 }
12048 else
12049 {
3a3677ff
RH
12050 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12051 code = EQ;
32b5b1aa 12052 }
9a915772
JH
12053 break;
12054
12055 case UNORDERED:
12056 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12057 code = NE;
12058 break;
12059 case ORDERED:
12060 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12061 code = EQ;
12062 break;
12063
12064 default:
d0396b79 12065 gcc_unreachable ();
32b5b1aa 12066 }
32b5b1aa 12067 }
e075ae69
RH
12068
12069 /* Return the test that should be put into the flags user, i.e.
12070 the bcc, scc, or cmov instruction. */
12071 return gen_rtx_fmt_ee (code, VOIDmode,
12072 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12073 const0_rtx);
12074}
12075
9e3e266c 12076rtx
b96a374d 12077ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
12078{
12079 rtx op0, op1, ret;
12080 op0 = ix86_compare_op0;
12081 op1 = ix86_compare_op1;
12082
a1b8572c
JH
12083 if (second_test)
12084 *second_test = NULL_RTX;
12085 if (bypass_test)
12086 *bypass_test = NULL_RTX;
12087
1ef45b77
RH
12088 if (ix86_compare_emitted)
12089 {
12090 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12091 ix86_compare_emitted = NULL_RTX;
12092 }
ebb109ad 12093 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
27ac40e2
UB
12094 {
12095 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12096 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12097 second_test, bypass_test);
12098 }
32b5b1aa 12099 else
e075ae69
RH
12100 ret = ix86_expand_int_compare (code, op0, op1);
12101
12102 return ret;
12103}
12104
03598dea
JH
12105/* Return true if the CODE will result in nontrivial jump sequence. */
12106bool
b96a374d 12107ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
12108{
12109 enum rtx_code bypass_code, first_code, second_code;
12110 if (!TARGET_CMOVE)
12111 return true;
12112 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 12113 return bypass_code != UNKNOWN || second_code != UNKNOWN;
03598dea
JH
12114}
12115
e075ae69 12116void
b96a374d 12117ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 12118{
3a3677ff 12119 rtx tmp;
e075ae69 12120
3d763bcf
KH
12121 /* If we have emitted a compare insn, go straight to simple.
12122 ix86_expand_compare won't emit anything if ix86_compare_emitted
12123 is non NULL. */
12124 if (ix86_compare_emitted)
12125 goto simple;
12126
3a3677ff 12127 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 12128 {
3a3677ff
RH
12129 case QImode:
12130 case HImode:
12131 case SImode:
0d7d98ee 12132 simple:
a1b8572c 12133 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
12134 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12135 gen_rtx_LABEL_REF (VOIDmode, label),
12136 pc_rtx);
12137 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 12138 return;
e075ae69 12139
3a3677ff
RH
12140 case SFmode:
12141 case DFmode:
0f290768 12142 case XFmode:
3a3677ff
RH
12143 {
12144 rtvec vec;
12145 int use_fcomi;
03598dea 12146 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
12147
12148 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12149 &ix86_compare_op1);
fce5a9f2 12150
03598dea
JH
12151 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12152
12153 /* Check whether we will use the natural sequence with one jump. If
12154 so, we can expand jump early. Otherwise delay expansion by
12155 creating compound insn to not confuse optimizers. */
1406ee90 12156 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
03598dea
JH
12157 {
12158 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12159 gen_rtx_LABEL_REF (VOIDmode, label),
7c82106f 12160 pc_rtx, NULL_RTX, NULL_RTX);
03598dea
JH
12161 }
12162 else
12163 {
12164 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12165 ix86_compare_op0, ix86_compare_op1);
12166 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12167 gen_rtx_LABEL_REF (VOIDmode, label),
12168 pc_rtx);
12169 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12170
12171 use_fcomi = ix86_use_fcomi_compare (code);
12172 vec = rtvec_alloc (3 + !use_fcomi);
12173 RTVEC_ELT (vec, 0) = tmp;
12174 RTVEC_ELT (vec, 1)
d02cb675 12175 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
03598dea 12176 RTVEC_ELT (vec, 2)
d02cb675 12177 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
03598dea
JH
12178 if (! use_fcomi)
12179 RTVEC_ELT (vec, 3)
12180 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12181
12182 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12183 }
3a3677ff
RH
12184 return;
12185 }
32b5b1aa 12186
3a3677ff 12187 case DImode:
0d7d98ee
JH
12188 if (TARGET_64BIT)
12189 goto simple;
28356f52 12190 case TImode:
3a3677ff
RH
12191 /* Expand DImode branch into multiple compare+branch. */
12192 {
12193 rtx lo[2], hi[2], label2;
12194 enum rtx_code code1, code2, code3;
28356f52 12195 enum machine_mode submode;
32b5b1aa 12196
3a3677ff
RH
12197 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12198 {
12199 tmp = ix86_compare_op0;
12200 ix86_compare_op0 = ix86_compare_op1;
12201 ix86_compare_op1 = tmp;
12202 code = swap_condition (code);
12203 }
28356f52
JB
12204 if (GET_MODE (ix86_compare_op0) == DImode)
12205 {
12206 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12207 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12208 submode = SImode;
12209 }
12210 else
12211 {
12212 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12213 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12214 submode = DImode;
12215 }
32b5b1aa 12216
3a3677ff
RH
12217 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12218 avoid two branches. This costs one extra insn, so disable when
12219 optimizing for size. */
32b5b1aa 12220
3a3677ff
RH
12221 if ((code == EQ || code == NE)
12222 && (!optimize_size
12223 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12224 {
12225 rtx xor0, xor1;
32b5b1aa 12226
3a3677ff
RH
12227 xor1 = hi[0];
12228 if (hi[1] != const0_rtx)
28356f52 12229 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
3a3677ff 12230 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 12231
3a3677ff
RH
12232 xor0 = lo[0];
12233 if (lo[1] != const0_rtx)
28356f52 12234 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
3a3677ff 12235 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 12236
28356f52 12237 tmp = expand_binop (submode, ior_optab, xor1, xor0,
3a3677ff 12238 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 12239
3a3677ff
RH
12240 ix86_compare_op0 = tmp;
12241 ix86_compare_op1 = const0_rtx;
12242 ix86_expand_branch (code, label);
12243 return;
12244 }
e075ae69 12245
1f9124e4
JJ
12246 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12247 op1 is a constant and the low word is zero, then we can just
18117c05
JJ
12248 examine the high word. Similarly for low word -1 and
12249 less-or-equal-than or greater-than. */
32b5b1aa 12250
18117c05 12251 if (CONST_INT_P (hi[1]))
1f9124e4
JJ
12252 switch (code)
12253 {
12254 case LT: case LTU: case GE: case GEU:
18117c05
JJ
12255 if (lo[1] == const0_rtx)
12256 {
12257 ix86_compare_op0 = hi[0];
12258 ix86_compare_op1 = hi[1];
12259 ix86_expand_branch (code, label);
12260 return;
12261 }
c754abbf 12262 break;
18117c05
JJ
12263 case LE: case LEU: case GT: case GTU:
12264 if (lo[1] == constm1_rtx)
12265 {
12266 ix86_compare_op0 = hi[0];
12267 ix86_compare_op1 = hi[1];
12268 ix86_expand_branch (code, label);
12269 return;
12270 }
c754abbf 12271 break;
1f9124e4
JJ
12272 default:
12273 break;
12274 }
e075ae69 12275
3a3677ff 12276 /* Otherwise, we need two or three jumps. */
e075ae69 12277
3a3677ff 12278 label2 = gen_label_rtx ();
e075ae69 12279
3a3677ff
RH
12280 code1 = code;
12281 code2 = swap_condition (code);
12282 code3 = unsigned_condition (code);
e075ae69 12283
3a3677ff
RH
12284 switch (code)
12285 {
12286 case LT: case GT: case LTU: case GTU:
12287 break;
e075ae69 12288
3a3677ff
RH
12289 case LE: code1 = LT; code2 = GT; break;
12290 case GE: code1 = GT; code2 = LT; break;
12291 case LEU: code1 = LTU; code2 = GTU; break;
12292 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 12293
f822d252
ZW
12294 case EQ: code1 = UNKNOWN; code2 = NE; break;
12295 case NE: code2 = UNKNOWN; break;
e075ae69 12296
3a3677ff 12297 default:
d0396b79 12298 gcc_unreachable ();
3a3677ff 12299 }
e075ae69 12300
3a3677ff
RH
12301 /*
12302 * a < b =>
12303 * if (hi(a) < hi(b)) goto true;
12304 * if (hi(a) > hi(b)) goto false;
12305 * if (lo(a) < lo(b)) goto true;
12306 * false:
12307 */
12308
12309 ix86_compare_op0 = hi[0];
12310 ix86_compare_op1 = hi[1];
12311
f822d252 12312 if (code1 != UNKNOWN)
3a3677ff 12313 ix86_expand_branch (code1, label);
f822d252 12314 if (code2 != UNKNOWN)
3a3677ff
RH
12315 ix86_expand_branch (code2, label2);
12316
12317 ix86_compare_op0 = lo[0];
12318 ix86_compare_op1 = lo[1];
12319 ix86_expand_branch (code3, label);
12320
f822d252 12321 if (code2 != UNKNOWN)
3a3677ff
RH
12322 emit_label (label2);
12323 return;
12324 }
e075ae69 12325
3a3677ff 12326 default:
d0396b79 12327 gcc_unreachable ();
3a3677ff 12328 }
32b5b1aa 12329}
e075ae69 12330
9e7adcb3
JH
12331/* Split branch based on floating point condition. */
12332void
b96a374d 12333ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
7c82106f 12334 rtx target1, rtx target2, rtx tmp, rtx pushed)
9e7adcb3
JH
12335{
12336 rtx second, bypass;
12337 rtx label = NULL_RTX;
03598dea 12338 rtx condition;
6b24c259
JH
12339 int bypass_probability = -1, second_probability = -1, probability = -1;
12340 rtx i;
9e7adcb3
JH
12341
12342 if (target2 != pc_rtx)
12343 {
12344 rtx tmp = target2;
12345 code = reverse_condition_maybe_unordered (code);
12346 target2 = target1;
12347 target1 = tmp;
12348 }
12349
12350 condition = ix86_expand_fp_compare (code, op1, op2,
12351 tmp, &second, &bypass);
6b24c259 12352
7c82106f
UB
12353 /* Remove pushed operand from stack. */
12354 if (pushed)
12355 ix86_free_from_memory (GET_MODE (pushed));
12356
6b24c259
JH
12357 if (split_branch_probability >= 0)
12358 {
12359 /* Distribute the probabilities across the jumps.
12360 Assume the BYPASS and SECOND to be always test
12361 for UNORDERED. */
12362 probability = split_branch_probability;
12363
d6a7951f 12364 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
12365 to be updated. Later we may run some experiments and see
12366 if unordered values are more frequent in practice. */
12367 if (bypass)
12368 bypass_probability = 1;
12369 if (second)
12370 second_probability = 1;
12371 }
9e7adcb3
JH
12372 if (bypass != NULL_RTX)
12373 {
12374 label = gen_label_rtx ();
6b24c259
JH
12375 i = emit_jump_insn (gen_rtx_SET
12376 (VOIDmode, pc_rtx,
12377 gen_rtx_IF_THEN_ELSE (VOIDmode,
12378 bypass,
12379 gen_rtx_LABEL_REF (VOIDmode,
12380 label),
12381 pc_rtx)));
12382 if (bypass_probability >= 0)
12383 REG_NOTES (i)
12384 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12385 GEN_INT (bypass_probability),
12386 REG_NOTES (i));
12387 }
12388 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
12389 (VOIDmode, pc_rtx,
12390 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
12391 condition, target1, target2)));
12392 if (probability >= 0)
12393 REG_NOTES (i)
12394 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12395 GEN_INT (probability),
12396 REG_NOTES (i));
12397 if (second != NULL_RTX)
9e7adcb3 12398 {
6b24c259
JH
12399 i = emit_jump_insn (gen_rtx_SET
12400 (VOIDmode, pc_rtx,
12401 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12402 target2)));
12403 if (second_probability >= 0)
12404 REG_NOTES (i)
12405 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12406 GEN_INT (second_probability),
12407 REG_NOTES (i));
9e7adcb3 12408 }
9e7adcb3
JH
12409 if (label != NULL_RTX)
12410 emit_label (label);
12411}
12412
32b5b1aa 12413int
b96a374d 12414ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 12415{
3a627503 12416 rtx ret, tmp, tmpreg, equiv;
a1b8572c 12417 rtx second_test, bypass_test;
e075ae69 12418
28356f52 12419 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
e075ae69
RH
12420 return 0; /* FAIL */
12421
d0396b79 12422 gcc_assert (GET_MODE (dest) == QImode);
e075ae69 12423
a1b8572c 12424 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
12425 PUT_MODE (ret, QImode);
12426
12427 tmp = dest;
a1b8572c 12428 tmpreg = dest;
32b5b1aa 12429
e075ae69 12430 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
12431 if (bypass_test || second_test)
12432 {
12433 rtx test = second_test;
12434 int bypass = 0;
12435 rtx tmp2 = gen_reg_rtx (QImode);
12436 if (bypass_test)
12437 {
d0396b79 12438 gcc_assert (!second_test);
a1b8572c
JH
12439 test = bypass_test;
12440 bypass = 1;
12441 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12442 }
12443 PUT_MODE (test, QImode);
12444 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12445
12446 if (bypass)
12447 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12448 else
12449 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12450 }
e075ae69 12451
3a627503 12452 /* Attach a REG_EQUAL note describing the comparison result. */
1ef45b77
RH
12453 if (ix86_compare_op0 && ix86_compare_op1)
12454 {
12455 equiv = simplify_gen_relational (code, QImode,
12456 GET_MODE (ix86_compare_op0),
12457 ix86_compare_op0, ix86_compare_op1);
12458 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12459 }
3a627503 12460
e075ae69 12461 return 1; /* DONE */
32b5b1aa 12462}
e075ae69 12463
c35d187f
RH
12464/* Expand comparison setting or clearing carry flag. Return true when
12465 successful and set pop for the operation. */
12466static bool
b96a374d 12467ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
12468{
12469 enum machine_mode mode =
12470 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12471
d02cb675 12472 /* Do not handle DImode compares that go through special path. */
28356f52 12473 if (mode == (TARGET_64BIT ? TImode : DImode))
e6e81735 12474 return false;
27ac40e2
UB
12475
12476 if (SCALAR_FLOAT_MODE_P (mode))
e6e81735
JH
12477 {
12478 rtx second_test = NULL, bypass_test = NULL;
12479 rtx compare_op, compare_seq;
12480
27ac40e2
UB
12481 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12482
12483 /* Shortcut: following common codes never translate
12484 into carry flag compares. */
e6e81735
JH
12485 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12486 || code == ORDERED || code == UNORDERED)
12487 return false;
12488
12489 /* These comparisons require zero flag; swap operands so they won't. */
12490 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12491 && !TARGET_IEEE_FP)
12492 {
12493 rtx tmp = op0;
12494 op0 = op1;
12495 op1 = tmp;
12496 code = swap_condition (code);
12497 }
12498
d02cb675
UB
12499 /* Try to expand the comparison and verify that we end up with
12500 carry flag based comparison. This fails to be true only when
12501 we decide to expand comparison using arithmetic that is not
12502 too common scenario. */
e6e81735
JH
12503 start_sequence ();
12504 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12505 &second_test, &bypass_test);
12506 compare_seq = get_insns ();
12507 end_sequence ();
12508
12509 if (second_test || bypass_test)
12510 return false;
d02cb675 12511
e6e81735
JH
12512 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12513 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12514 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12515 else
12516 code = GET_CODE (compare_op);
d02cb675 12517
e6e81735
JH
12518 if (code != LTU && code != GEU)
12519 return false;
d02cb675 12520
e6e81735
JH
12521 emit_insn (compare_seq);
12522 *pop = compare_op;
12523 return true;
12524 }
d02cb675 12525
e6e81735 12526 if (!INTEGRAL_MODE_P (mode))
4977bab6 12527 return false;
d02cb675 12528
4977bab6
ZW
12529 switch (code)
12530 {
12531 case LTU:
12532 case GEU:
12533 break;
12534
12535 /* Convert a==0 into (unsigned)a<1. */
12536 case EQ:
12537 case NE:
12538 if (op1 != const0_rtx)
12539 return false;
12540 op1 = const1_rtx;
12541 code = (code == EQ ? LTU : GEU);
12542 break;
12543
12544 /* Convert a>b into b<a or a>=b-1. */
12545 case GTU:
12546 case LEU:
7656aee4 12547 if (CONST_INT_P (op1))
4977bab6
ZW
12548 {
12549 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12550 /* Bail out on overflow. We still can swap operands but that
43f3a59d 12551 would force loading of the constant into register. */
4977bab6
ZW
12552 if (op1 == const0_rtx
12553 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12554 return false;
12555 code = (code == GTU ? GEU : LTU);
12556 }
12557 else
12558 {
12559 rtx tmp = op1;
12560 op1 = op0;
12561 op0 = tmp;
12562 code = (code == GTU ? LTU : GEU);
12563 }
12564 break;
12565
ccea753c 12566 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
12567 case LT:
12568 case GE:
12569 if (mode == DImode || op1 != const0_rtx)
12570 return false;
ccea753c 12571 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
12572 code = (code == LT ? GEU : LTU);
12573 break;
12574 case LE:
12575 case GT:
12576 if (mode == DImode || op1 != constm1_rtx)
12577 return false;
ccea753c 12578 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
12579 code = (code == LE ? GEU : LTU);
12580 break;
12581
12582 default:
12583 return false;
12584 }
ebe75517
JH
12585 /* Swapping operands may cause constant to appear as first operand. */
12586 if (!nonimmediate_operand (op0, VOIDmode))
12587 {
b3a13419 12588 if (!can_create_pseudo_p ())
ebe75517
JH
12589 return false;
12590 op0 = force_reg (mode, op0);
12591 }
4977bab6
ZW
12592 ix86_compare_op0 = op0;
12593 ix86_compare_op1 = op1;
12594 *pop = ix86_expand_compare (code, NULL, NULL);
d0396b79 12595 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
4977bab6
ZW
12596 return true;
12597}
12598
32b5b1aa 12599int
b96a374d 12600ix86_expand_int_movcc (rtx operands[])
32b5b1aa 12601{
e075ae69
RH
12602 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12603 rtx compare_seq, compare_op;
a1b8572c 12604 rtx second_test, bypass_test;
635559ab 12605 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 12606 bool sign_bit_compare_p = false;;
3a3677ff 12607
e075ae69 12608 start_sequence ();
a1b8572c 12609 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 12610 compare_seq = get_insns ();
e075ae69
RH
12611 end_sequence ();
12612
12613 compare_code = GET_CODE (compare_op);
12614
4977bab6
ZW
12615 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12616 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12617 sign_bit_compare_p = true;
12618
e075ae69
RH
12619 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12620 HImode insns, we'd be swallowed in word prefix ops. */
12621
4977bab6 12622 if ((mode != HImode || TARGET_FAST_PREFIX)
28356f52 12623 && (mode != (TARGET_64BIT ? TImode : DImode))
7656aee4
UB
12624 && CONST_INT_P (operands[2])
12625 && CONST_INT_P (operands[3]))
e075ae69
RH
12626 {
12627 rtx out = operands[0];
12628 HOST_WIDE_INT ct = INTVAL (operands[2]);
12629 HOST_WIDE_INT cf = INTVAL (operands[3]);
12630 HOST_WIDE_INT diff;
12631
4977bab6
ZW
12632 diff = ct - cf;
12633 /* Sign bit compares are better done using shifts than we do by using
b96a374d 12634 sbb. */
4977bab6
ZW
12635 if (sign_bit_compare_p
12636 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12637 ix86_compare_op1, &compare_op))
e075ae69 12638 {
e075ae69
RH
12639 /* Detect overlap between destination and compare sources. */
12640 rtx tmp = out;
12641
4977bab6 12642 if (!sign_bit_compare_p)
36583fea 12643 {
e6e81735
JH
12644 bool fpcmp = false;
12645
4977bab6
ZW
12646 compare_code = GET_CODE (compare_op);
12647
e6e81735
JH
12648 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12649 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12650 {
12651 fpcmp = true;
12652 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12653 }
12654
4977bab6
ZW
12655 /* To simplify rest of code, restrict to the GEU case. */
12656 if (compare_code == LTU)
12657 {
12658 HOST_WIDE_INT tmp = ct;
12659 ct = cf;
12660 cf = tmp;
12661 compare_code = reverse_condition (compare_code);
12662 code = reverse_condition (code);
12663 }
e6e81735
JH
12664 else
12665 {
12666 if (fpcmp)
12667 PUT_CODE (compare_op,
12668 reverse_condition_maybe_unordered
12669 (GET_CODE (compare_op)));
12670 else
12671 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12672 }
4977bab6 12673 diff = ct - cf;
36583fea 12674
4977bab6
ZW
12675 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12676 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12677 tmp = gen_reg_rtx (mode);
e075ae69 12678
4977bab6 12679 if (mode == DImode)
e6e81735 12680 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 12681 else
e6e81735 12682 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 12683 }
14f73b5a 12684 else
4977bab6
ZW
12685 {
12686 if (code == GT || code == GE)
12687 code = reverse_condition (code);
12688 else
12689 {
12690 HOST_WIDE_INT tmp = ct;
12691 ct = cf;
12692 cf = tmp;
5fb48685 12693 diff = ct - cf;
4977bab6
ZW
12694 }
12695 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12696 ix86_compare_op1, VOIDmode, 0, -1);
12697 }
e075ae69 12698
36583fea
JH
12699 if (diff == 1)
12700 {
12701 /*
12702 * cmpl op0,op1
12703 * sbbl dest,dest
12704 * [addl dest, ct]
12705 *
12706 * Size 5 - 8.
12707 */
12708 if (ct)
b96a374d 12709 tmp = expand_simple_binop (mode, PLUS,
635559ab 12710 tmp, GEN_INT (ct),
4977bab6 12711 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12712 }
12713 else if (cf == -1)
12714 {
12715 /*
12716 * cmpl op0,op1
12717 * sbbl dest,dest
12718 * orl $ct, dest
12719 *
12720 * Size 8.
12721 */
635559ab
JH
12722 tmp = expand_simple_binop (mode, IOR,
12723 tmp, GEN_INT (ct),
4977bab6 12724 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12725 }
12726 else if (diff == -1 && ct)
12727 {
12728 /*
12729 * cmpl op0,op1
12730 * sbbl dest,dest
06ec023f 12731 * notl dest
36583fea
JH
12732 * [addl dest, cf]
12733 *
12734 * Size 8 - 11.
12735 */
4977bab6 12736 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 12737 if (cf)
b96a374d 12738 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
12739 copy_rtx (tmp), GEN_INT (cf),
12740 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12741 }
12742 else
12743 {
12744 /*
12745 * cmpl op0,op1
12746 * sbbl dest,dest
06ec023f 12747 * [notl dest]
36583fea
JH
12748 * andl cf - ct, dest
12749 * [addl dest, ct]
12750 *
12751 * Size 8 - 11.
12752 */
06ec023f
RB
12753
12754 if (cf == 0)
12755 {
12756 cf = ct;
12757 ct = 0;
4977bab6 12758 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
12759 }
12760
635559ab 12761 tmp = expand_simple_binop (mode, AND,
4977bab6 12762 copy_rtx (tmp),
d8bf17f9 12763 gen_int_mode (cf - ct, mode),
4977bab6 12764 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 12765 if (ct)
b96a374d 12766 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
12767 copy_rtx (tmp), GEN_INT (ct),
12768 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 12769 }
e075ae69 12770
4977bab6
ZW
12771 if (!rtx_equal_p (tmp, out))
12772 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
12773
12774 return 1; /* DONE */
12775 }
12776
e075ae69
RH
12777 if (diff < 0)
12778 {
27ac40e2
UB
12779 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12780
e075ae69
RH
12781 HOST_WIDE_INT tmp;
12782 tmp = ct, ct = cf, cf = tmp;
12783 diff = -diff;
27ac40e2
UB
12784
12785 if (SCALAR_FLOAT_MODE_P (cmp_mode))
734dba19 12786 {
27ac40e2
UB
12787 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12788
734dba19
JH
12789 /* We may be reversing unordered compare to normal compare, that
12790 is not valid in general (we may convert non-trapping condition
12791 to trapping one), however on i386 we currently emit all
12792 comparisons unordered. */
12793 compare_code = reverse_condition_maybe_unordered (compare_code);
12794 code = reverse_condition_maybe_unordered (code);
12795 }
12796 else
12797 {
12798 compare_code = reverse_condition (compare_code);
12799 code = reverse_condition (code);
12800 }
e075ae69 12801 }
0f2a3457 12802
f822d252 12803 compare_code = UNKNOWN;
0f2a3457 12804 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
7656aee4 12805 && CONST_INT_P (ix86_compare_op1))
0f2a3457
JJ
12806 {
12807 if (ix86_compare_op1 == const0_rtx
12808 && (code == LT || code == GE))
12809 compare_code = code;
12810 else if (ix86_compare_op1 == constm1_rtx)
12811 {
12812 if (code == LE)
12813 compare_code = LT;
12814 else if (code == GT)
12815 compare_code = GE;
12816 }
12817 }
12818
12819 /* Optimize dest = (op0 < 0) ? -1 : cf. */
f822d252 12820 if (compare_code != UNKNOWN
0f2a3457
JJ
12821 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12822 && (cf == -1 || ct == -1))
12823 {
12824 /* If lea code below could be used, only optimize
12825 if it results in a 2 insn sequence. */
12826
12827 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12828 || diff == 3 || diff == 5 || diff == 9)
12829 || (compare_code == LT && ct == -1)
12830 || (compare_code == GE && cf == -1))
12831 {
12832 /*
12833 * notl op1 (if necessary)
12834 * sarl $31, op1
12835 * orl cf, op1
12836 */
12837 if (ct != -1)
12838 {
12839 cf = ct;
b96a374d 12840 ct = -1;
0f2a3457
JJ
12841 code = reverse_condition (code);
12842 }
12843
12844 out = emit_store_flag (out, code, ix86_compare_op0,
12845 ix86_compare_op1, VOIDmode, 0, -1);
12846
12847 out = expand_simple_binop (mode, IOR,
12848 out, GEN_INT (cf),
12849 out, 1, OPTAB_DIRECT);
12850 if (out != operands[0])
12851 emit_move_insn (operands[0], out);
12852
12853 return 1; /* DONE */
12854 }
12855 }
12856
4977bab6 12857
635559ab
JH
12858 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12859 || diff == 3 || diff == 5 || diff == 9)
4977bab6 12860 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
8fe75e43
RH
12861 && (mode != DImode
12862 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
e075ae69
RH
12863 {
12864 /*
12865 * xorl dest,dest
12866 * cmpl op1,op2
12867 * setcc dest
12868 * lea cf(dest*(ct-cf)),dest
12869 *
12870 * Size 14.
12871 *
12872 * This also catches the degenerate setcc-only case.
12873 */
12874
12875 rtx tmp;
12876 int nops;
12877
12878 out = emit_store_flag (out, code, ix86_compare_op0,
12879 ix86_compare_op1, VOIDmode, 0, 1);
12880
12881 nops = 0;
97f51ac4
RB
12882 /* On x86_64 the lea instruction operates on Pmode, so we need
12883 to get arithmetics done in proper mode to match. */
e075ae69 12884 if (diff == 1)
068f5dea 12885 tmp = copy_rtx (out);
e075ae69
RH
12886 else
12887 {
885a70fd 12888 rtx out1;
068f5dea 12889 out1 = copy_rtx (out);
635559ab 12890 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
12891 nops++;
12892 if (diff & 1)
12893 {
635559ab 12894 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
12895 nops++;
12896 }
12897 }
12898 if (cf != 0)
12899 {
635559ab 12900 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
12901 nops++;
12902 }
4977bab6 12903 if (!rtx_equal_p (tmp, out))
e075ae69 12904 {
14f73b5a 12905 if (nops == 1)
a5cf80f0 12906 out = force_operand (tmp, copy_rtx (out));
e075ae69 12907 else
4977bab6 12908 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 12909 }
4977bab6 12910 if (!rtx_equal_p (out, operands[0]))
1985ef90 12911 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
12912
12913 return 1; /* DONE */
12914 }
12915
12916 /*
12917 * General case: Jumpful:
12918 * xorl dest,dest cmpl op1, op2
12919 * cmpl op1, op2 movl ct, dest
12920 * setcc dest jcc 1f
12921 * decl dest movl cf, dest
12922 * andl (cf-ct),dest 1:
12923 * addl ct,dest
0f290768 12924 *
e075ae69
RH
12925 * Size 20. Size 14.
12926 *
12927 * This is reasonably steep, but branch mispredict costs are
12928 * high on modern cpus, so consider failing only if optimizing
12929 * for space.
e075ae69
RH
12930 */
12931
4977bab6
ZW
12932 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12933 && BRANCH_COST >= 2)
e075ae69 12934 {
97f51ac4 12935 if (cf == 0)
e075ae69 12936 {
27ac40e2
UB
12937 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12938
97f51ac4
RB
12939 cf = ct;
12940 ct = 0;
27ac40e2
UB
12941
12942 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12943 {
12944 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12945
12946 /* We may be reversing unordered compare to normal compare,
12947 that is not valid in general (we may convert non-trapping
12948 condition to trapping one), however on i386 we currently
12949 emit all comparisons unordered. */
12950 code = reverse_condition_maybe_unordered (code);
12951 }
0f2a3457
JJ
12952 else
12953 {
12954 code = reverse_condition (code);
f822d252 12955 if (compare_code != UNKNOWN)
0f2a3457
JJ
12956 compare_code = reverse_condition (compare_code);
12957 }
12958 }
12959
f822d252 12960 if (compare_code != UNKNOWN)
0f2a3457
JJ
12961 {
12962 /* notl op1 (if needed)
12963 sarl $31, op1
12964 andl (cf-ct), op1
b96a374d 12965 addl ct, op1
0f2a3457
JJ
12966
12967 For x < 0 (resp. x <= -1) there will be no notl,
12968 so if possible swap the constants to get rid of the
12969 complement.
12970 True/false will be -1/0 while code below (store flag
12971 followed by decrement) is 0/-1, so the constants need
12972 to be exchanged once more. */
12973
12974 if (compare_code == GE || !cf)
734dba19 12975 {
b96a374d 12976 code = reverse_condition (code);
0f2a3457 12977 compare_code = LT;
734dba19
JH
12978 }
12979 else
12980 {
0f2a3457 12981 HOST_WIDE_INT tmp = cf;
b96a374d 12982 cf = ct;
0f2a3457 12983 ct = tmp;
734dba19 12984 }
0f2a3457
JJ
12985
12986 out = emit_store_flag (out, code, ix86_compare_op0,
12987 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 12988 }
0f2a3457
JJ
12989 else
12990 {
12991 out = emit_store_flag (out, code, ix86_compare_op0,
12992 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 12993
4977bab6
ZW
12994 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12995 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 12996 }
e075ae69 12997
4977bab6 12998 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 12999 gen_int_mode (cf - ct, mode),
4977bab6 13000 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 13001 if (ct)
4977bab6
ZW
13002 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13003 copy_rtx (out), 1, OPTAB_DIRECT);
13004 if (!rtx_equal_p (out, operands[0]))
13005 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
13006
13007 return 1; /* DONE */
13008 }
13009 }
13010
4977bab6 13011 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
13012 {
13013 /* Try a few things more with specific constants and a variable. */
13014
78a0d70c 13015 optab op;
e075ae69
RH
13016 rtx var, orig_out, out, tmp;
13017
4977bab6 13018 if (BRANCH_COST <= 2)
e075ae69
RH
13019 return 0; /* FAIL */
13020
0f290768 13021 /* If one of the two operands is an interesting constant, load a
e075ae69 13022 constant with the above and mask it in with a logical operation. */
0f290768 13023
7656aee4 13024 if (CONST_INT_P (operands[2]))
e075ae69
RH
13025 {
13026 var = operands[3];
4977bab6 13027 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 13028 operands[3] = constm1_rtx, op = and_optab;
4977bab6 13029 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 13030 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
13031 else
13032 return 0; /* FAIL */
e075ae69 13033 }
7656aee4 13034 else if (CONST_INT_P (operands[3]))
e075ae69
RH
13035 {
13036 var = operands[2];
4977bab6 13037 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 13038 operands[2] = constm1_rtx, op = and_optab;
4977bab6 13039 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 13040 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
13041 else
13042 return 0; /* FAIL */
e075ae69 13043 }
78a0d70c 13044 else
e075ae69
RH
13045 return 0; /* FAIL */
13046
13047 orig_out = operands[0];
635559ab 13048 tmp = gen_reg_rtx (mode);
e075ae69
RH
13049 operands[0] = tmp;
13050
13051 /* Recurse to get the constant loaded. */
13052 if (ix86_expand_int_movcc (operands) == 0)
13053 return 0; /* FAIL */
13054
13055 /* Mask in the interesting variable. */
635559ab 13056 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 13057 OPTAB_WIDEN);
4977bab6
ZW
13058 if (!rtx_equal_p (out, orig_out))
13059 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
13060
13061 return 1; /* DONE */
13062 }
13063
13064 /*
13065 * For comparison with above,
13066 *
13067 * movl cf,dest
13068 * movl ct,tmp
13069 * cmpl op1,op2
13070 * cmovcc tmp,dest
13071 *
13072 * Size 15.
13073 */
13074
635559ab
JH
13075 if (! nonimmediate_operand (operands[2], mode))
13076 operands[2] = force_reg (mode, operands[2]);
13077 if (! nonimmediate_operand (operands[3], mode))
13078 operands[3] = force_reg (mode, operands[3]);
e075ae69 13079
a1b8572c
JH
13080 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13081 {
635559ab 13082 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
13083 emit_move_insn (tmp, operands[3]);
13084 operands[3] = tmp;
13085 }
13086 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13087 {
635559ab 13088 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
13089 emit_move_insn (tmp, operands[2]);
13090 operands[2] = tmp;
13091 }
4977bab6 13092
c9682caf 13093 if (! register_operand (operands[2], VOIDmode)
b96a374d 13094 && (mode == QImode
4977bab6 13095 || ! register_operand (operands[3], VOIDmode)))
635559ab 13096 operands[2] = force_reg (mode, operands[2]);
a1b8572c 13097
4977bab6
ZW
13098 if (mode == QImode
13099 && ! register_operand (operands[3], VOIDmode))
13100 operands[3] = force_reg (mode, operands[3]);
13101
e075ae69
RH
13102 emit_insn (compare_seq);
13103 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 13104 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
13105 compare_op, operands[2],
13106 operands[3])));
a1b8572c 13107 if (bypass_test)
4977bab6 13108 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 13109 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 13110 bypass_test,
4977bab6
ZW
13111 copy_rtx (operands[3]),
13112 copy_rtx (operands[0]))));
a1b8572c 13113 if (second_test)
4977bab6 13114 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 13115 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 13116 second_test,
4977bab6
ZW
13117 copy_rtx (operands[2]),
13118 copy_rtx (operands[0]))));
e075ae69
RH
13119
13120 return 1; /* DONE */
e9a25f70 13121}
e075ae69 13122
ab8efbd8
RH
13123/* Swap, force into registers, or otherwise massage the two operands
13124 to an sse comparison with a mask result. Thus we differ a bit from
13125 ix86_prepare_fp_compare_args which expects to produce a flags result.
13126
13127 The DEST operand exists to help determine whether to commute commutative
13128 operators. The POP0/POP1 operands are updated in place. The new
13129 comparison code is returned, or UNKNOWN if not implementable. */
13130
13131static enum rtx_code
13132ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13133 rtx *pop0, rtx *pop1)
13134{
13135 rtx tmp;
13136
13137 switch (code)
13138 {
13139 case LTGT:
13140 case UNEQ:
13141 /* We have no LTGT as an operator. We could implement it with
13142 NE & ORDERED, but this requires an extra temporary. It's
13143 not clear that it's worth it. */
13144 return UNKNOWN;
13145
13146 case LT:
13147 case LE:
13148 case UNGT:
13149 case UNGE:
13150 /* These are supported directly. */
13151 break;
13152
13153 case EQ:
13154 case NE:
13155 case UNORDERED:
13156 case ORDERED:
13157 /* For commutative operators, try to canonicalize the destination
13158 operand to be first in the comparison - this helps reload to
13159 avoid extra moves. */
13160 if (!dest || !rtx_equal_p (dest, *pop1))
13161 break;
13162 /* FALLTHRU */
13163
13164 case GE:
13165 case GT:
13166 case UNLE:
13167 case UNLT:
13168 /* These are not supported directly. Swap the comparison operands
13169 to transform into something that is supported. */
13170 tmp = *pop0;
13171 *pop0 = *pop1;
13172 *pop1 = tmp;
13173 code = swap_condition (code);
13174 break;
13175
13176 default:
13177 gcc_unreachable ();
13178 }
13179
13180 return code;
13181}
13182
13183/* Detect conditional moves that exactly match min/max operational
13184 semantics. Note that this is IEEE safe, as long as we don't
13185 interchange the operands.
13186
13187 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13188 and TRUE if the operation is successful and instructions are emitted. */
13189
13190static bool
13191ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13192 rtx cmp_op1, rtx if_true, rtx if_false)
13193{
13194 enum machine_mode mode;
13195 bool is_min;
13196 rtx tmp;
13197
13198 if (code == LT)
13199 ;
13200 else if (code == UNGE)
13201 {
13202 tmp = if_true;
13203 if_true = if_false;
13204 if_false = tmp;
13205 }
13206 else
13207 return false;
13208
13209 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13210 is_min = true;
13211 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13212 is_min = false;
13213 else
13214 return false;
13215
13216 mode = GET_MODE (dest);
13217
13218 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13219 but MODE may be a vector mode and thus not appropriate. */
13220 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13221 {
13222 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13223 rtvec v;
13224
13225 if_true = force_reg (mode, if_true);
13226 v = gen_rtvec (2, if_true, if_false);
13227 tmp = gen_rtx_UNSPEC (mode, v, u);
13228 }
13229 else
13230 {
13231 code = is_min ? SMIN : SMAX;
13232 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13233 }
13234
13235 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13236 return true;
13237}
13238
ae46a07a
RH
13239/* Expand an sse vector comparison. Return the register with the result. */
13240
13241static rtx
13242ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13243 rtx op_true, rtx op_false)
ab8efbd8
RH
13244{
13245 enum machine_mode mode = GET_MODE (dest);
ae46a07a 13246 rtx x;
ab8efbd8
RH
13247
13248 cmp_op0 = force_reg (mode, cmp_op0);
13249 if (!nonimmediate_operand (cmp_op1, mode))
13250 cmp_op1 = force_reg (mode, cmp_op1);
13251
13252 if (optimize
13253 || reg_overlap_mentioned_p (dest, op_true)
13254 || reg_overlap_mentioned_p (dest, op_false))
ae46a07a 13255 dest = gen_reg_rtx (mode);
ab8efbd8
RH
13256
13257 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
ae46a07a
RH
13258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13259
13260 return dest;
13261}
13262
13263/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13264 operations. This is used for both scalar and vector conditional moves. */
13265
13266static void
13267ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13268{
13269 enum machine_mode mode = GET_MODE (dest);
13270 rtx t2, t3, x;
ab8efbd8 13271
04e1d06b
MM
13272 if (TARGET_SSE5)
13273 {
13274 rtx pcmov = gen_rtx_SET (mode, dest,
13275 gen_rtx_IF_THEN_ELSE (mode, cmp,
13276 op_true,
13277 op_false));
13278 emit_insn (pcmov);
13279 }
13280 else if (op_false == CONST0_RTX (mode))
ab8efbd8
RH
13281 {
13282 op_true = force_reg (mode, op_true);
ae46a07a 13283 x = gen_rtx_AND (mode, cmp, op_true);
ab8efbd8
RH
13284 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13285 }
13286 else if (op_true == CONST0_RTX (mode))
13287 {
13288 op_false = force_reg (mode, op_false);
ae46a07a 13289 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
13290 x = gen_rtx_AND (mode, x, op_false);
13291 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13292 }
13293 else
13294 {
13295 op_true = force_reg (mode, op_true);
13296 op_false = force_reg (mode, op_false);
13297
13298 t2 = gen_reg_rtx (mode);
13299 if (optimize)
13300 t3 = gen_reg_rtx (mode);
13301 else
13302 t3 = dest;
13303
ae46a07a 13304 x = gen_rtx_AND (mode, op_true, cmp);
ab8efbd8
RH
13305 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13306
ae46a07a 13307 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
13308 x = gen_rtx_AND (mode, x, op_false);
13309 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13310
13311 x = gen_rtx_IOR (mode, t3, t2);
13312 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13313 }
13314}
13315
ae46a07a
RH
13316/* Expand a floating-point conditional move. Return true if successful. */
13317
32b5b1aa 13318int
b96a374d 13319ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 13320{
eaa49b49
RH
13321 enum machine_mode mode = GET_MODE (operands[0]);
13322 enum rtx_code code = GET_CODE (operands[1]);
13323 rtx tmp, compare_op, second_test, bypass_test;
13324
13325 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13326 {
ab8efbd8 13327 enum machine_mode cmode;
eaa49b49
RH
13328
13329 /* Since we've no cmove for sse registers, don't force bad register
13330 allocation just to gain access to it. Deny movcc when the
13331 comparison mode doesn't match the move mode. */
ab8efbd8 13332 cmode = GET_MODE (ix86_compare_op0);
eaa49b49 13333 if (cmode == VOIDmode)
ab8efbd8 13334 cmode = GET_MODE (ix86_compare_op1);
eaa49b49
RH
13335 if (cmode != mode)
13336 return 0;
13337
ab8efbd8
RH
13338 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13339 &ix86_compare_op0,
13340 &ix86_compare_op1);
13341 if (code == UNKNOWN)
51d7bae6
RH
13342 return 0;
13343
ab8efbd8
RH
13344 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13345 ix86_compare_op1, operands[2],
13346 operands[3]))
13347 return 1;
eaa49b49 13348
ae46a07a
RH
13349 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13350 ix86_compare_op1, operands[2], operands[3]);
13351 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
0073023d
JH
13352 return 1;
13353 }
13354
e075ae69 13355 /* The floating point conditional move instructions don't directly
0f290768 13356 support conditions resulting from a signed integer comparison. */
32b5b1aa 13357
a1b8572c 13358 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
13359
13360 /* The floating point conditional move instructions don't directly
13361 support signed integer comparisons. */
13362
a1b8572c 13363 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 13364 {
d0396b79 13365 gcc_assert (!second_test && !bypass_test);
e075ae69 13366 tmp = gen_reg_rtx (QImode);
3a3677ff 13367 ix86_expand_setcc (code, tmp);
e075ae69
RH
13368 code = NE;
13369 ix86_compare_op0 = tmp;
13370 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
13371 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13372 }
13373 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13374 {
eaa49b49 13375 tmp = gen_reg_rtx (mode);
a1b8572c
JH
13376 emit_move_insn (tmp, operands[3]);
13377 operands[3] = tmp;
13378 }
13379 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13380 {
eaa49b49 13381 tmp = gen_reg_rtx (mode);
a1b8572c
JH
13382 emit_move_insn (tmp, operands[2]);
13383 operands[2] = tmp;
e075ae69 13384 }
e9a25f70 13385
e075ae69 13386 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
13387 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13388 operands[2], operands[3])));
a1b8572c
JH
13389 if (bypass_test)
13390 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
13391 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13392 operands[3], operands[0])));
a1b8572c
JH
13393 if (second_test)
13394 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
13395 gen_rtx_IF_THEN_ELSE (mode, second_test,
13396 operands[2], operands[0])));
32b5b1aa 13397
e075ae69 13398 return 1;
32b5b1aa
SC
13399}
13400
ae46a07a
RH
13401/* Expand a floating-point vector conditional move; a vcond operation
13402 rather than a movcc operation. */
13403
13404bool
13405ix86_expand_fp_vcond (rtx operands[])
13406{
13407 enum rtx_code code = GET_CODE (operands[3]);
13408 rtx cmp;
13409
13410 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13411 &operands[4], &operands[5]);
13412 if (code == UNKNOWN)
13413 return false;
13414
13415 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13416 operands[5], operands[1], operands[2]))
13417 return true;
13418
13419 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13420 operands[1], operands[2]);
13421 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13422 return true;
13423}
13424
3b8dd071 13425/* Expand a signed/unsigned integral vector conditional move. */
ae46a07a
RH
13426
13427bool
9fb93f89 13428ix86_expand_int_vcond (rtx operands[])
ae46a07a
RH
13429{
13430 enum machine_mode mode = GET_MODE (operands[0]);
13431 enum rtx_code code = GET_CODE (operands[3]);
9fb93f89
RH
13432 bool negate = false;
13433 rtx x, cop0, cop1;
ae46a07a 13434
9fb93f89
RH
13435 cop0 = operands[4];
13436 cop1 = operands[5];
13437
13438 /* Canonicalize the comparison to EQ, GT, GTU. */
13439 switch (code)
ae46a07a 13440 {
9fb93f89
RH
13441 case EQ:
13442 case GT:
13443 case GTU:
13444 break;
13445
13446 case NE:
13447 case LE:
13448 case LEU:
ae46a07a 13449 code = reverse_condition (code);
9fb93f89
RH
13450 negate = true;
13451 break;
13452
13453 case GE:
13454 case GEU:
13455 code = reverse_condition (code);
13456 negate = true;
13457 /* FALLTHRU */
13458
13459 case LT:
13460 case LTU:
ae46a07a 13461 code = swap_condition (code);
9fb93f89
RH
13462 x = cop0, cop0 = cop1, cop1 = x;
13463 break;
ae46a07a 13464
9fb93f89
RH
13465 default:
13466 gcc_unreachable ();
13467 }
ae46a07a 13468
3b8dd071
L
13469 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13470 if (mode == V2DImode)
13471 {
13472 switch (code)
13473 {
13474 case EQ:
13475 /* SSE4.1 supports EQ. */
13476 if (!TARGET_SSE4_1)
13477 return false;
13478 break;
13479
13480 case GT:
13481 case GTU:
13482 /* SSE4.2 supports GT/GTU. */
13483 if (!TARGET_SSE4_2)
13484 return false;
13485 break;
13486
13487 default:
13488 gcc_unreachable ();
13489 }
13490 }
13491
9fb93f89
RH
13492 /* Unsigned parallel compare is not supported by the hardware. Play some
13493 tricks to turn this into a signed comparison against 0. */
13494 if (code == GTU)
ae46a07a 13495 {
55b2de75
UB
13496 cop0 = force_reg (mode, cop0);
13497
ae46a07a
RH
13498 switch (mode)
13499 {
9fb93f89 13500 case V4SImode:
3b8dd071 13501 case V2DImode:
9fb93f89
RH
13502 {
13503 rtx t1, t2, mask;
13504
13505 /* Perform a parallel modulo subtraction. */
13506 t1 = gen_reg_rtx (mode);
3b8dd071
L
13507 emit_insn ((mode == V4SImode
13508 ? gen_subv4si3
13509 : gen_subv2di3) (t1, cop0, cop1));
9fb93f89
RH
13510
13511 /* Extract the original sign bit of op0. */
3b8dd071
L
13512 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13513 true, false);
9fb93f89 13514 t2 = gen_reg_rtx (mode);
3b8dd071
L
13515 emit_insn ((mode == V4SImode
13516 ? gen_andv4si3
13517 : gen_andv2di3) (t2, cop0, mask));
9fb93f89
RH
13518
13519 /* XOR it back into the result of the subtraction. This results
13520 in the sign bit set iff we saw unsigned underflow. */
13521 x = gen_reg_rtx (mode);
3b8dd071
L
13522 emit_insn ((mode == V4SImode
13523 ? gen_xorv4si3
13524 : gen_xorv2di3) (x, t1, t2));
9fb93f89
RH
13525
13526 code = GT;
13527 }
ae46a07a 13528 break;
9fb93f89
RH
13529
13530 case V16QImode:
ae46a07a 13531 case V8HImode:
9fb93f89
RH
13532 /* Perform a parallel unsigned saturating subtraction. */
13533 x = gen_reg_rtx (mode);
13534 emit_insn (gen_rtx_SET (VOIDmode, x,
13535 gen_rtx_US_MINUS (mode, cop0, cop1)));
13536
13537 code = EQ;
13538 negate = !negate;
ae46a07a 13539 break;
9fb93f89 13540
ae46a07a
RH
13541 default:
13542 gcc_unreachable ();
13543 }
13544
9fb93f89
RH
13545 cop0 = x;
13546 cop1 = CONST0_RTX (mode);
ae46a07a 13547 }
ae46a07a 13548
9fb93f89
RH
13549 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13550 operands[1+negate], operands[2-negate]);
13551
13552 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13553 operands[2-negate]);
ae46a07a
RH
13554 return true;
13555}
13556
89d67cca
DN
13557/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13558 true if we should do zero extension, else sign extension. HIGH_P is
13559 true if we want the N/2 high elements, else the low elements. */
13560
13561void
13562ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13563{
13564 enum machine_mode imode = GET_MODE (operands[1]);
13565 rtx (*unpack)(rtx, rtx, rtx);
13566 rtx se, dest;
13567
13568 switch (imode)
13569 {
13570 case V16QImode:
13571 if (high_p)
13572 unpack = gen_vec_interleave_highv16qi;
13573 else
13574 unpack = gen_vec_interleave_lowv16qi;
13575 break;
13576 case V8HImode:
13577 if (high_p)
13578 unpack = gen_vec_interleave_highv8hi;
13579 else
13580 unpack = gen_vec_interleave_lowv8hi;
13581 break;
13582 case V4SImode:
13583 if (high_p)
13584 unpack = gen_vec_interleave_highv4si;
54a88090 13585 else
89d67cca
DN
13586 unpack = gen_vec_interleave_lowv4si;
13587 break;
13588 default:
54a88090 13589 gcc_unreachable ();
89d67cca
DN
13590 }
13591
13592 dest = gen_lowpart (imode, operands[0]);
13593
13594 if (unsigned_p)
13595 se = force_reg (imode, CONST0_RTX (imode));
13596 else
13597 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13598 operands[1], pc_rtx, pc_rtx);
13599
13600 emit_insn (unpack (dest, operands[1], se));
13601}
13602
e5ac0b9b
L
13603/* This function performs the same task as ix86_expand_sse_unpack,
13604 but with SSE4.1 instructions. */
13605
13606void
13607ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13608{
13609 enum machine_mode imode = GET_MODE (operands[1]);
13610 rtx (*unpack)(rtx, rtx);
13611 rtx src, dest;
13612
13613 switch (imode)
13614 {
13615 case V16QImode:
13616 if (unsigned_p)
13617 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13618 else
13619 unpack = gen_sse4_1_extendv8qiv8hi2;
13620 break;
13621 case V8HImode:
13622 if (unsigned_p)
13623 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13624 else
13625 unpack = gen_sse4_1_extendv4hiv4si2;
13626 break;
13627 case V4SImode:
13628 if (unsigned_p)
13629 unpack = gen_sse4_1_zero_extendv2siv2di2;
13630 else
13631 unpack = gen_sse4_1_extendv2siv2di2;
13632 break;
13633 default:
13634 gcc_unreachable ();
13635 }
13636
13637 dest = operands[0];
13638 if (high_p)
13639 {
13640 /* Shift higher 8 bytes to lower 8 bytes. */
13641 src = gen_reg_rtx (imode);
13642 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13643 gen_lowpart (TImode, operands[1]),
13644 GEN_INT (64)));
13645 }
13646 else
13647 src = operands[1];
13648
13649 emit_insn (unpack (dest, src));
13650}
13651
04e1d06b
MM
13652/* This function performs the same task as ix86_expand_sse_unpack,
13653 but with amdfam15 instructions. */
13654
13655#define PPERM_SRC 0x00 /* copy source */
13656#define PPERM_INVERT 0x20 /* invert source */
13657#define PPERM_REVERSE 0x40 /* bit reverse source */
13658#define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13659#define PPERM_ZERO 0x80 /* all 0's */
13660#define PPERM_ONES 0xa0 /* all 1's */
84fbffb2
KH
13661#define PPERM_SIGN 0xc0 /* propagate sign bit */
13662#define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
04e1d06b
MM
13663
13664#define PPERM_SRC1 0x00 /* use first source byte */
13665#define PPERM_SRC2 0x10 /* use second source byte */
13666
13667void
13668ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13669{
13670 enum machine_mode imode = GET_MODE (operands[1]);
13671 int pperm_bytes[16];
13672 int i;
13673 int h = (high_p) ? 8 : 0;
13674 int h2;
13675 int sign_extend;
13676 rtvec v = rtvec_alloc (16);
13677 rtvec vs;
13678 rtx x, p;
13679 rtx op0 = operands[0], op1 = operands[1];
13680
13681 switch (imode)
13682 {
13683 case V16QImode:
13684 vs = rtvec_alloc (8);
13685 h2 = (high_p) ? 8 : 0;
13686 for (i = 0; i < 8; i++)
13687 {
13688 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13689 pperm_bytes[2*i+1] = ((unsigned_p)
13690 ? PPERM_ZERO
13691 : PPERM_SIGN | PPERM_SRC2 | i | h);
13692 }
13693
13694 for (i = 0; i < 16; i++)
13695 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13696
13697 for (i = 0; i < 8; i++)
13698 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13699
13700 p = gen_rtx_PARALLEL (VOIDmode, vs);
13701 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13702 if (unsigned_p)
13703 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13704 else
13705 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13706 break;
13707
13708 case V8HImode:
13709 vs = rtvec_alloc (4);
13710 h2 = (high_p) ? 4 : 0;
13711 for (i = 0; i < 4; i++)
13712 {
13713 sign_extend = ((unsigned_p)
13714 ? PPERM_ZERO
13715 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13716 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13717 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13718 pperm_bytes[4*i+2] = sign_extend;
13719 pperm_bytes[4*i+3] = sign_extend;
13720 }
13721
13722 for (i = 0; i < 16; i++)
13723 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13724
13725 for (i = 0; i < 4; i++)
13726 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13727
13728 p = gen_rtx_PARALLEL (VOIDmode, vs);
13729 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13730 if (unsigned_p)
13731 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13732 else
13733 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13734 break;
13735
13736 case V4SImode:
13737 vs = rtvec_alloc (2);
13738 h2 = (high_p) ? 2 : 0;
13739 for (i = 0; i < 2; i++)
13740 {
13741 sign_extend = ((unsigned_p)
13742 ? PPERM_ZERO
13743 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13744 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13745 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13746 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13747 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13748 pperm_bytes[8*i+4] = sign_extend;
13749 pperm_bytes[8*i+5] = sign_extend;
13750 pperm_bytes[8*i+6] = sign_extend;
13751 pperm_bytes[8*i+7] = sign_extend;
13752 }
13753
13754 for (i = 0; i < 16; i++)
13755 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13756
53a83348 13757 for (i = 0; i < 2; i++)
04e1d06b
MM
13758 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13759
13760 p = gen_rtx_PARALLEL (VOIDmode, vs);
13761 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13762 if (unsigned_p)
53a83348 13763 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
04e1d06b 13764 else
53a83348 13765 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
04e1d06b
MM
13766 break;
13767
13768 default:
13769 gcc_unreachable ();
13770 }
13771
13772 return;
13773}
13774
13775/* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13776 next narrower integer vector type */
13777void
13778ix86_expand_sse5_pack (rtx operands[3])
13779{
13780 enum machine_mode imode = GET_MODE (operands[0]);
13781 int pperm_bytes[16];
13782 int i;
13783 rtvec v = rtvec_alloc (16);
13784 rtx x;
13785 rtx op0 = operands[0];
13786 rtx op1 = operands[1];
13787 rtx op2 = operands[2];
13788
13789 switch (imode)
13790 {
13791 case V16QImode:
13792 for (i = 0; i < 8; i++)
13793 {
13794 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13795 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13796 }
13797
13798 for (i = 0; i < 16; i++)
13799 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13800
13801 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13802 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13803 break;
13804
13805 case V8HImode:
13806 for (i = 0; i < 4; i++)
13807 {
13808 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13809 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13810 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13811 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13812 }
13813
13814 for (i = 0; i < 16; i++)
13815 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13816
13817 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13818 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13819 break;
13820
13821 case V4SImode:
13822 for (i = 0; i < 2; i++)
13823 {
13824 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13825 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13826 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13827 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13828 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13829 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13830 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13831 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13832 }
13833
13834 for (i = 0; i < 16; i++)
13835 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13836
13837 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13838 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13839 break;
13840
13841 default:
13842 gcc_unreachable ();
13843 }
13844
13845 return;
13846}
13847
7b52eede
JH
13848/* Expand conditional increment or decrement using adb/sbb instructions.
13849 The default case using setcc followed by the conditional move can be
13850 done by generic code. */
13851int
b96a374d 13852ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
13853{
13854 enum rtx_code code = GET_CODE (operands[1]);
13855 rtx compare_op;
13856 rtx val = const0_rtx;
e6e81735 13857 bool fpcmp = false;
e6e81735 13858 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
13859
13860 if (operands[3] != const1_rtx
13861 && operands[3] != constm1_rtx)
13862 return 0;
13863 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13864 ix86_compare_op1, &compare_op))
13865 return 0;
e6e81735
JH
13866 code = GET_CODE (compare_op);
13867
13868 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13869 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13870 {
13871 fpcmp = true;
13872 code = ix86_fp_compare_code_to_integer (code);
13873 }
13874
13875 if (code != LTU)
13876 {
13877 val = constm1_rtx;
13878 if (fpcmp)
13879 PUT_CODE (compare_op,
13880 reverse_condition_maybe_unordered
13881 (GET_CODE (compare_op)));
13882 else
13883 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13884 }
13885 PUT_MODE (compare_op, mode);
13886
13887 /* Construct either adc or sbb insn. */
13888 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
13889 {
13890 switch (GET_MODE (operands[0]))
13891 {
13892 case QImode:
e6e81735 13893 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13894 break;
13895 case HImode:
e6e81735 13896 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13897 break;
13898 case SImode:
e6e81735 13899 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13900 break;
13901 case DImode:
e6e81735 13902 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
13903 break;
13904 default:
d0396b79 13905 gcc_unreachable ();
7b52eede
JH
13906 }
13907 }
13908 else
13909 {
13910 switch (GET_MODE (operands[0]))
13911 {
13912 case QImode:
e6e81735 13913 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13914 break;
13915 case HImode:
e6e81735 13916 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13917 break;
13918 case SImode:
e6e81735 13919 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13920 break;
13921 case DImode:
e6e81735 13922 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
13923 break;
13924 default:
d0396b79 13925 gcc_unreachable ();
7b52eede
JH
13926 }
13927 }
13928 return 1; /* DONE */
13929}
13930
13931
2450a057
JH
13932/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13933 works for floating pointer parameters and nonoffsetable memories.
13934 For pushes, it returns just stack offsets; the values will be saved
13935 in the right order. Maximally three parts are generated. */
13936
2b589241 13937static int
b96a374d 13938ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 13939{
26e5b205
JH
13940 int size;
13941
13942 if (!TARGET_64BIT)
f8a1ebc6 13943 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
13944 else
13945 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 13946
7656aee4 13947 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
d0396b79 13948 gcc_assert (size >= 2 && size <= 3);
2450a057 13949
f996902d
RH
13950 /* Optimize constant pool reference to immediates. This is used by fp
13951 moves, that force all constants to memory to allow combining. */
7656aee4 13952 if (MEM_P (operand) && MEM_READONLY_P (operand))
f996902d
RH
13953 {
13954 rtx tmp = maybe_get_pool_constant (operand);
13955 if (tmp)
13956 operand = tmp;
13957 }
d7a29404 13958
7656aee4 13959 if (MEM_P (operand) && !offsettable_memref_p (operand))
e075ae69 13960 {
2450a057 13961 /* The only non-offsetable memories we handle are pushes. */
d0396b79 13962 int ok = push_operand (operand, VOIDmode);
5656a184 13963
d0396b79 13964 gcc_assert (ok);
5656a184 13965
26e5b205
JH
13966 operand = copy_rtx (operand);
13967 PUT_MODE (operand, Pmode);
2450a057 13968 parts[0] = parts[1] = parts[2] = operand;
b4e82619 13969 return size;
2450a057 13970 }
b4e82619
RH
13971
13972 if (GET_CODE (operand) == CONST_VECTOR)
13973 {
13974 enum machine_mode imode = int_mode_for_mode (mode);
bd08db74
RH
13975 /* Caution: if we looked through a constant pool memory above,
13976 the operand may actually have a different mode now. That's
13977 ok, since we want to pun this all the way back to an integer. */
13978 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
b4e82619
RH
13979 gcc_assert (operand != NULL);
13980 mode = imode;
13981 }
13982
13983 if (!TARGET_64BIT)
2450a057
JH
13984 {
13985 if (mode == DImode)
13986 split_di (&operand, 1, &parts[0], &parts[1]);
13987 else
e075ae69 13988 {
2450a057
JH
13989 if (REG_P (operand))
13990 {
d0396b79 13991 gcc_assert (reload_completed);
2450a057
JH
13992 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13993 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13994 if (size == 3)
13995 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13996 }
13997 else if (offsettable_memref_p (operand))
13998 {
f4ef873c 13999 operand = adjust_address (operand, SImode, 0);
2450a057 14000 parts[0] = operand;
b72f00af 14001 parts[1] = adjust_address (operand, SImode, 4);
2450a057 14002 if (size == 3)
b72f00af 14003 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
14004 }
14005 else if (GET_CODE (operand) == CONST_DOUBLE)
14006 {
14007 REAL_VALUE_TYPE r;
2b589241 14008 long l[4];
2450a057
JH
14009
14010 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14011 switch (mode)
14012 {
14013 case XFmode:
14014 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 14015 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
14016 break;
14017 case DFmode:
14018 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14019 break;
14020 default:
d0396b79 14021 gcc_unreachable ();
2450a057 14022 }
d8bf17f9
LB
14023 parts[1] = gen_int_mode (l[1], SImode);
14024 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
14025 }
14026 else
d0396b79 14027 gcc_unreachable ();
e075ae69 14028 }
2450a057 14029 }
26e5b205
JH
14030 else
14031 {
44cf5b6a
JH
14032 if (mode == TImode)
14033 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
14034 if (mode == XFmode || mode == TFmode)
14035 {
f8a1ebc6 14036 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
14037 if (REG_P (operand))
14038 {
d0396b79 14039 gcc_assert (reload_completed);
26e5b205 14040 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 14041 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
14042 }
14043 else if (offsettable_memref_p (operand))
14044 {
b72f00af 14045 operand = adjust_address (operand, DImode, 0);
26e5b205 14046 parts[0] = operand;
f8a1ebc6 14047 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
14048 }
14049 else if (GET_CODE (operand) == CONST_DOUBLE)
14050 {
14051 REAL_VALUE_TYPE r;
38606553 14052 long l[4];
26e5b205
JH
14053
14054 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 14055 real_to_target (l, &r, mode);
38606553 14056
26e5b205
JH
14057 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14058 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 14059 parts[0]
d8bf17f9 14060 = gen_int_mode
44cf5b6a 14061 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 14062 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 14063 DImode);
26e5b205
JH
14064 else
14065 parts[0] = immed_double_const (l[0], l[1], DImode);
38606553 14066
f8a1ebc6
JH
14067 if (upper_mode == SImode)
14068 parts[1] = gen_int_mode (l[2], SImode);
14069 else if (HOST_BITS_PER_WIDE_INT >= 64)
14070 parts[1]
14071 = gen_int_mode
14072 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14073 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14074 DImode);
14075 else
14076 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
14077 }
14078 else
d0396b79 14079 gcc_unreachable ();
26e5b205
JH
14080 }
14081 }
2450a057 14082
2b589241 14083 return size;
2450a057
JH
14084}
14085
14086/* Emit insns to perform a move or push of DI, DF, and XF values.
14087 Return false when normal moves are needed; true when all required
14088 insns have been emitted. Operands 2-4 contain the input values
14089 int the correct order; operands 5-7 contain the output values. */
14090
26e5b205 14091void
b96a374d 14092ix86_split_long_move (rtx operands[])
2450a057
JH
14093{
14094 rtx part[2][3];
26e5b205 14095 int nparts;
2450a057
JH
14096 int push = 0;
14097 int collisions = 0;
26e5b205
JH
14098 enum machine_mode mode = GET_MODE (operands[0]);
14099
14100 /* The DFmode expanders may ask us to move double.
14101 For 64bit target this is single move. By hiding the fact
14102 here we simplify i386.md splitters. */
14103 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14104 {
8cdfa312
RH
14105 /* Optimize constant pool reference to immediates. This is used by
14106 fp moves, that force all constants to memory to allow combining. */
26e5b205 14107
7656aee4 14108 if (MEM_P (operands[1])
26e5b205
JH
14109 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14110 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14111 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14112 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
14113 {
14114 operands[0] = copy_rtx (operands[0]);
14115 PUT_MODE (operands[0], Pmode);
14116 }
26e5b205
JH
14117 else
14118 operands[0] = gen_lowpart (DImode, operands[0]);
14119 operands[1] = gen_lowpart (DImode, operands[1]);
14120 emit_move_insn (operands[0], operands[1]);
14121 return;
14122 }
2450a057 14123
2450a057
JH
14124 /* The only non-offsettable memory we handle is push. */
14125 if (push_operand (operands[0], VOIDmode))
14126 push = 1;
d0396b79 14127 else
7656aee4 14128 gcc_assert (!MEM_P (operands[0])
d0396b79 14129 || offsettable_memref_p (operands[0]));
2450a057 14130
26e5b205
JH
14131 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14132 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
14133
14134 /* When emitting push, take care for source operands on the stack. */
7656aee4 14135 if (push && MEM_P (operands[1])
2450a057
JH
14136 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14137 {
26e5b205 14138 if (nparts == 3)
886cbb88
JH
14139 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14140 XEXP (part[1][2], 0));
14141 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14142 XEXP (part[1][1], 0));
2450a057
JH
14143 }
14144
0f290768 14145 /* We need to do copy in the right order in case an address register
2450a057 14146 of the source overlaps the destination. */
7656aee4 14147 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
2450a057
JH
14148 {
14149 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14150 collisions++;
14151 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14152 collisions++;
26e5b205 14153 if (nparts == 3
2450a057
JH
14154 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14155 collisions++;
14156
14157 /* Collision in the middle part can be handled by reordering. */
26e5b205 14158 if (collisions == 1 && nparts == 3
2450a057 14159 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 14160 {
2450a057
JH
14161 rtx tmp;
14162 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14163 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14164 }
e075ae69 14165
2450a057
JH
14166 /* If there are more collisions, we can't handle it by reordering.
14167 Do an lea to the last part and use only one colliding move. */
14168 else if (collisions > 1)
14169 {
8231b3f9
RH
14170 rtx base;
14171
2450a057 14172 collisions = 1;
8231b3f9
RH
14173
14174 base = part[0][nparts - 1];
14175
14176 /* Handle the case when the last part isn't valid for lea.
14177 Happens in 64-bit mode storing the 12-byte XFmode. */
14178 if (GET_MODE (base) != Pmode)
14179 base = gen_rtx_REG (Pmode, REGNO (base));
14180
14181 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14182 part[1][0] = replace_equiv_address (part[1][0], base);
14183 part[1][1] = replace_equiv_address (part[1][1],
14184 plus_constant (base, UNITS_PER_WORD));
26e5b205 14185 if (nparts == 3)
8231b3f9
RH
14186 part[1][2] = replace_equiv_address (part[1][2],
14187 plus_constant (base, 8));
2450a057
JH
14188 }
14189 }
14190
14191 if (push)
14192 {
26e5b205 14193 if (!TARGET_64BIT)
2b589241 14194 {
26e5b205
JH
14195 if (nparts == 3)
14196 {
f8a1ebc6
JH
14197 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14198 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
14199 emit_move_insn (part[0][2], part[1][2]);
14200 }
2b589241 14201 }
26e5b205
JH
14202 else
14203 {
14204 /* In 64bit mode we don't have 32bit push available. In case this is
14205 register, it is OK - we will just use larger counterpart. We also
14206 retype memory - these comes from attempt to avoid REX prefix on
14207 moving of second half of TFmode value. */
14208 if (GET_MODE (part[1][1]) == SImode)
14209 {
d0396b79
NS
14210 switch (GET_CODE (part[1][1]))
14211 {
14212 case MEM:
14213 part[1][1] = adjust_address (part[1][1], DImode, 0);
14214 break;
14215
14216 case REG:
14217 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14218 break;
14219
14220 default:
14221 gcc_unreachable ();
14222 }
5656a184 14223
886cbb88
JH
14224 if (GET_MODE (part[1][0]) == SImode)
14225 part[1][0] = part[1][1];
26e5b205
JH
14226 }
14227 }
14228 emit_move_insn (part[0][1], part[1][1]);
14229 emit_move_insn (part[0][0], part[1][0]);
14230 return;
2450a057
JH
14231 }
14232
14233 /* Choose correct order to not overwrite the source before it is copied. */
14234 if ((REG_P (part[0][0])
14235 && REG_P (part[1][1])
14236 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 14237 || (nparts == 3
2450a057
JH
14238 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14239 || (collisions > 0
14240 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14241 {
26e5b205 14242 if (nparts == 3)
2450a057 14243 {
26e5b205
JH
14244 operands[2] = part[0][2];
14245 operands[3] = part[0][1];
14246 operands[4] = part[0][0];
14247 operands[5] = part[1][2];
14248 operands[6] = part[1][1];
14249 operands[7] = part[1][0];
2450a057
JH
14250 }
14251 else
14252 {
26e5b205
JH
14253 operands[2] = part[0][1];
14254 operands[3] = part[0][0];
14255 operands[5] = part[1][1];
14256 operands[6] = part[1][0];
2450a057
JH
14257 }
14258 }
14259 else
14260 {
26e5b205 14261 if (nparts == 3)
2450a057 14262 {
26e5b205
JH
14263 operands[2] = part[0][0];
14264 operands[3] = part[0][1];
14265 operands[4] = part[0][2];
14266 operands[5] = part[1][0];
14267 operands[6] = part[1][1];
14268 operands[7] = part[1][2];
2450a057
JH
14269 }
14270 else
14271 {
26e5b205
JH
14272 operands[2] = part[0][0];
14273 operands[3] = part[0][1];
14274 operands[5] = part[1][0];
14275 operands[6] = part[1][1];
e075ae69
RH
14276 }
14277 }
903a5059 14278
0e40b5f2 14279 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
903a5059
RS
14280 if (optimize_size)
14281 {
7656aee4 14282 if (CONST_INT_P (operands[5])
903a5059
RS
14283 && operands[5] != const0_rtx
14284 && REG_P (operands[2]))
14285 {
7656aee4 14286 if (CONST_INT_P (operands[6])
903a5059
RS
14287 && INTVAL (operands[6]) == INTVAL (operands[5]))
14288 operands[6] = operands[2];
14289
14290 if (nparts == 3
7656aee4 14291 && CONST_INT_P (operands[7])
903a5059
RS
14292 && INTVAL (operands[7]) == INTVAL (operands[5]))
14293 operands[7] = operands[2];
14294 }
14295
14296 if (nparts == 3
7656aee4 14297 && CONST_INT_P (operands[6])
903a5059
RS
14298 && operands[6] != const0_rtx
14299 && REG_P (operands[3])
7656aee4 14300 && CONST_INT_P (operands[7])
903a5059
RS
14301 && INTVAL (operands[7]) == INTVAL (operands[6]))
14302 operands[7] = operands[3];
14303 }
14304
26e5b205
JH
14305 emit_move_insn (operands[2], operands[5]);
14306 emit_move_insn (operands[3], operands[6]);
14307 if (nparts == 3)
14308 emit_move_insn (operands[4], operands[7]);
32b5b1aa 14309
26e5b205 14310 return;
32b5b1aa 14311}
32b5b1aa 14312
28356f52 14313/* Helper function of ix86_split_ashl used to generate an SImode/DImode
1b83d209
RS
14314 left shift by a constant, either using a single shift or
14315 a sequence of add instructions. */
14316
14317static void
28356f52 14318ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
1b83d209
RS
14319{
14320 if (count == 1)
28356f52
JB
14321 {
14322 emit_insn ((mode == DImode
14323 ? gen_addsi3
14324 : gen_adddi3) (operand, operand, operand));
14325 }
1b83d209
RS
14326 else if (!optimize_size
14327 && count * ix86_cost->add <= ix86_cost->shift_const)
14328 {
14329 int i;
14330 for (i=0; i<count; i++)
28356f52
JB
14331 {
14332 emit_insn ((mode == DImode
14333 ? gen_addsi3
14334 : gen_adddi3) (operand, operand, operand));
14335 }
1b83d209
RS
14336 }
14337 else
28356f52
JB
14338 emit_insn ((mode == DImode
14339 ? gen_ashlsi3
14340 : gen_ashldi3) (operand, operand, GEN_INT (count)));
1b83d209
RS
14341}
14342
e075ae69 14343void
28356f52 14344ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 14345{
e075ae69
RH
14346 rtx low[2], high[2];
14347 int count;
28356f52 14348 const int single_width = mode == DImode ? 32 : 64;
b985a30f 14349
7656aee4 14350 if (CONST_INT_P (operands[2]))
e075ae69 14351 {
28356f52
JB
14352 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14353 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 14354
28356f52 14355 if (count >= single_width)
e075ae69
RH
14356 {
14357 emit_move_insn (high[0], low[1]);
14358 emit_move_insn (low[0], const0_rtx);
b985a30f 14359
28356f52
JB
14360 if (count > single_width)
14361 ix86_expand_ashl_const (high[0], count - single_width, mode);
e075ae69
RH
14362 }
14363 else
14364 {
14365 if (!rtx_equal_p (operands[0], operands[1]))
14366 emit_move_insn (operands[0], operands[1]);
28356f52
JB
14367 emit_insn ((mode == DImode
14368 ? gen_x86_shld_1
14369 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14370 ix86_expand_ashl_const (low[0], count, mode);
e075ae69 14371 }
93330ea1 14372 return;
e075ae69 14373 }
93330ea1 14374
28356f52 14375 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
93330ea1
RH
14376
14377 if (operands[1] == const1_rtx)
e075ae69 14378 {
28356f52
JB
14379 /* Assuming we've chosen a QImode capable registers, then 1 << N
14380 can be done with two 32/64-bit shifts, no branches, no cmoves. */
93330ea1
RH
14381 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14382 {
14383 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
b985a30f 14384
93330ea1
RH
14385 ix86_expand_clear (low[0]);
14386 ix86_expand_clear (high[0]);
28356f52 14387 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
5656a184 14388
93330ea1
RH
14389 d = gen_lowpart (QImode, low[0]);
14390 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14391 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14392 emit_insn (gen_rtx_SET (VOIDmode, d, s));
b985a30f 14393
93330ea1
RH
14394 d = gen_lowpart (QImode, high[0]);
14395 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14396 s = gen_rtx_NE (QImode, flags, const0_rtx);
14397 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14398 }
32b5b1aa 14399
93330ea1 14400 /* Otherwise, we can get the same results by manually performing
28356f52 14401 a bit extract operation on bit 5/6, and then performing the two
93330ea1
RH
14402 shifts. The two methods of getting 0/1 into low/high are exactly
14403 the same size. Avoiding the shift in the bit extract case helps
14404 pentium4 a bit; no one else seems to care much either way. */
14405 else
e075ae69 14406 {
93330ea1
RH
14407 rtx x;
14408
14409 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
28356f52 14410 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
e075ae69 14411 else
28356f52 14412 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
93330ea1 14413 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
e075ae69 14414
28356f52
JB
14415 emit_insn ((mode == DImode
14416 ? gen_lshrsi3
14417 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14418 emit_insn ((mode == DImode
14419 ? gen_andsi3
14420 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
93330ea1 14421 emit_move_insn (low[0], high[0]);
28356f52
JB
14422 emit_insn ((mode == DImode
14423 ? gen_xorsi3
14424 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
e075ae69 14425 }
93330ea1 14426
28356f52
JB
14427 emit_insn ((mode == DImode
14428 ? gen_ashlsi3
14429 : gen_ashldi3) (low[0], low[0], operands[2]));
14430 emit_insn ((mode == DImode
14431 ? gen_ashlsi3
14432 : gen_ashldi3) (high[0], high[0], operands[2]));
93330ea1
RH
14433 return;
14434 }
14435
14436 if (operands[1] == constm1_rtx)
14437 {
28356f52
JB
14438 /* For -1 << N, we can avoid the shld instruction, because we
14439 know that we're shifting 0...31/63 ones into a -1. */
93330ea1
RH
14440 emit_move_insn (low[0], constm1_rtx);
14441 if (optimize_size)
28356f52 14442 emit_move_insn (high[0], low[0]);
e075ae69 14443 else
93330ea1 14444 emit_move_insn (high[0], constm1_rtx);
e075ae69 14445 }
93330ea1
RH
14446 else
14447 {
14448 if (!rtx_equal_p (operands[0], operands[1]))
14449 emit_move_insn (operands[0], operands[1]);
14450
28356f52
JB
14451 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14452 emit_insn ((mode == DImode
14453 ? gen_x86_shld_1
14454 : gen_x86_64_shld) (high[0], low[0], operands[2]));
93330ea1
RH
14455 }
14456
28356f52 14457 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
93330ea1
RH
14458
14459 if (TARGET_CMOVE && scratch)
14460 {
14461 ix86_expand_clear (scratch);
28356f52
JB
14462 emit_insn ((mode == DImode
14463 ? gen_x86_shift_adj_1
14464 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
93330ea1
RH
14465 }
14466 else
14467 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
e9a25f70 14468}
32b5b1aa 14469
e075ae69 14470void
28356f52 14471ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 14472{
e075ae69
RH
14473 rtx low[2], high[2];
14474 int count;
28356f52 14475 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 14476
7656aee4 14477 if (CONST_INT_P (operands[2]))
e075ae69 14478 {
28356f52
JB
14479 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14480 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 14481
28356f52 14482 if (count == single_width * 2 - 1)
8937b6a2
RS
14483 {
14484 emit_move_insn (high[0], high[1]);
28356f52
JB
14485 emit_insn ((mode == DImode
14486 ? gen_ashrsi3
14487 : gen_ashrdi3) (high[0], high[0],
14488 GEN_INT (single_width - 1)));
8937b6a2
RS
14489 emit_move_insn (low[0], high[0]);
14490
14491 }
28356f52 14492 else if (count >= single_width)
e075ae69
RH
14493 {
14494 emit_move_insn (low[0], high[1]);
93330ea1 14495 emit_move_insn (high[0], low[0]);
28356f52
JB
14496 emit_insn ((mode == DImode
14497 ? gen_ashrsi3
14498 : gen_ashrdi3) (high[0], high[0],
14499 GEN_INT (single_width - 1)));
14500 if (count > single_width)
14501 emit_insn ((mode == DImode
14502 ? gen_ashrsi3
14503 : gen_ashrdi3) (low[0], low[0],
14504 GEN_INT (count - single_width)));
e075ae69
RH
14505 }
14506 else
14507 {
14508 if (!rtx_equal_p (operands[0], operands[1]))
14509 emit_move_insn (operands[0], operands[1]);
28356f52
JB
14510 emit_insn ((mode == DImode
14511 ? gen_x86_shrd_1
14512 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14513 emit_insn ((mode == DImode
14514 ? gen_ashrsi3
14515 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69
RH
14516 }
14517 }
14518 else
32b5b1aa 14519 {
e075ae69
RH
14520 if (!rtx_equal_p (operands[0], operands[1]))
14521 emit_move_insn (operands[0], operands[1]);
14522
28356f52 14523 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 14524
28356f52
JB
14525 emit_insn ((mode == DImode
14526 ? gen_x86_shrd_1
14527 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14528 emit_insn ((mode == DImode
14529 ? gen_ashrsi3
14530 : gen_ashrdi3) (high[0], high[0], operands[2]));
e075ae69 14531
93330ea1 14532 if (TARGET_CMOVE && scratch)
e075ae69 14533 {
e075ae69 14534 emit_move_insn (scratch, high[0]);
28356f52
JB
14535 emit_insn ((mode == DImode
14536 ? gen_ashrsi3
14537 : gen_ashrdi3) (scratch, scratch,
14538 GEN_INT (single_width - 1)));
14539 emit_insn ((mode == DImode
14540 ? gen_x86_shift_adj_1
14541 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14542 scratch));
e075ae69
RH
14543 }
14544 else
14545 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 14546 }
e075ae69 14547}
32b5b1aa 14548
e075ae69 14549void
28356f52 14550ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
e075ae69
RH
14551{
14552 rtx low[2], high[2];
14553 int count;
28356f52 14554 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 14555
7656aee4 14556 if (CONST_INT_P (operands[2]))
32b5b1aa 14557 {
28356f52
JB
14558 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14559 count = INTVAL (operands[2]) & (single_width * 2 - 1);
e075ae69 14560
28356f52 14561 if (count >= single_width)
c7271385 14562 {
e075ae69 14563 emit_move_insn (low[0], high[1]);
93330ea1 14564 ix86_expand_clear (high[0]);
32b5b1aa 14565
28356f52
JB
14566 if (count > single_width)
14567 emit_insn ((mode == DImode
14568 ? gen_lshrsi3
14569 : gen_lshrdi3) (low[0], low[0],
14570 GEN_INT (count - single_width)));
e075ae69
RH
14571 }
14572 else
14573 {
14574 if (!rtx_equal_p (operands[0], operands[1]))
14575 emit_move_insn (operands[0], operands[1]);
28356f52
JB
14576 emit_insn ((mode == DImode
14577 ? gen_x86_shrd_1
14578 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14579 emit_insn ((mode == DImode
14580 ? gen_lshrsi3
14581 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69 14582 }
32b5b1aa 14583 }
e075ae69
RH
14584 else
14585 {
14586 if (!rtx_equal_p (operands[0], operands[1]))
14587 emit_move_insn (operands[0], operands[1]);
32b5b1aa 14588
28356f52 14589 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 14590
28356f52
JB
14591 emit_insn ((mode == DImode
14592 ? gen_x86_shrd_1
14593 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14594 emit_insn ((mode == DImode
14595 ? gen_lshrsi3
14596 : gen_lshrdi3) (high[0], high[0], operands[2]));
e075ae69
RH
14597
14598 /* Heh. By reversing the arguments, we can reuse this pattern. */
93330ea1 14599 if (TARGET_CMOVE && scratch)
e075ae69 14600 {
93330ea1 14601 ix86_expand_clear (scratch);
28356f52
JB
14602 emit_insn ((mode == DImode
14603 ? gen_x86_shift_adj_1
14604 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14605 scratch));
e075ae69
RH
14606 }
14607 else
14608 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14609 }
32b5b1aa 14610}
3f803cd9 14611
8c996513
JH
14612/* Predict just emitted jump instruction to be taken with probability PROB. */
14613static void
14614predict_jump (int prob)
14615{
14616 rtx insn = get_last_insn ();
7656aee4 14617 gcc_assert (JUMP_P (insn));
8c996513
JH
14618 REG_NOTES (insn)
14619 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14620 GEN_INT (prob),
14621 REG_NOTES (insn));
14622}
14623
0407c02b 14624/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
14625 it is aligned to VALUE bytes. If true, jump to the label. */
14626static rtx
8c996513 14627ix86_expand_aligntest (rtx variable, int value, bool epilogue)
0945b39d
JH
14628{
14629 rtx label = gen_label_rtx ();
14630 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14631 if (GET_MODE (variable) == DImode)
14632 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14633 else
14634 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14635 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 14636 1, label);
8c996513
JH
14637 if (epilogue)
14638 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14639 else
14640 predict_jump (REG_BR_PROB_BASE * 90 / 100);
0945b39d
JH
14641 return label;
14642}
14643
14644/* Adjust COUNTER by the VALUE. */
14645static void
b96a374d 14646ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
14647{
14648 if (GET_MODE (countreg) == DImode)
14649 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14650 else
14651 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14652}
14653
14654/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 14655rtx
b96a374d 14656ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
14657{
14658 rtx r;
14659 if (GET_MODE (exp) == VOIDmode)
14660 return force_reg (Pmode, exp);
14661 if (GET_MODE (exp) == Pmode)
14662 return copy_to_mode_reg (Pmode, exp);
14663 r = gen_reg_rtx (Pmode);
14664 emit_insn (gen_zero_extendsidi2 (r, exp));
14665 return r;
14666}
14667
8c996513
JH
14668/* Divide COUNTREG by SCALE. */
14669static rtx
14670scale_counter (rtx countreg, int scale)
0945b39d 14671{
8c996513
JH
14672 rtx sc;
14673 rtx piece_size_mask;
0945b39d 14674
8c996513
JH
14675 if (scale == 1)
14676 return countreg;
7656aee4 14677 if (CONST_INT_P (countreg))
8c996513
JH
14678 return GEN_INT (INTVAL (countreg) / scale);
14679 gcc_assert (REG_P (countreg));
0945b39d 14680
8c996513
JH
14681 piece_size_mask = GEN_INT (scale - 1);
14682 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14683 GEN_INT (exact_log2 (scale)),
14684 NULL, 1, OPTAB_DIRECT);
14685 return sc;
14686}
d0a5295a 14687
7fa7289d
KH
14688/* Return mode for the memcpy/memset loop counter. Prefer SImode over
14689 DImode for constant loop counts. */
bd8d4d19
JH
14690
14691static enum machine_mode
14692counter_mode (rtx count_exp)
14693{
14694 if (GET_MODE (count_exp) != VOIDmode)
14695 return GET_MODE (count_exp);
14696 if (GET_CODE (count_exp) != CONST_INT)
14697 return Pmode;
14698 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14699 return DImode;
14700 return SImode;
14701}
14702
8c996513
JH
14703/* When SRCPTR is non-NULL, output simple loop to move memory
14704 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14705 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14706 equivalent loop to set memory by VALUE (supposed to be in MODE).
0945b39d 14707
8c996513
JH
14708 The size is rounded down to whole number of chunk size moved at once.
14709 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
54a88090 14710
8c996513
JH
14711
14712static void
14713expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14714 rtx destptr, rtx srcptr, rtx value,
14715 rtx count, enum machine_mode mode, int unroll,
14716 int expected_size)
14717{
14718 rtx out_label, top_label, iter, tmp;
bd8d4d19 14719 enum machine_mode iter_mode = counter_mode (count);
8c996513
JH
14720 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14721 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14722 rtx size;
14723 rtx x_addr;
14724 rtx y_addr;
14725 int i;
14726
8c996513
JH
14727 top_label = gen_label_rtx ();
14728 out_label = gen_label_rtx ();
14729 iter = gen_reg_rtx (iter_mode);
14730
14731 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14732 NULL, 1, OPTAB_DIRECT);
14733 /* Those two should combine. */
14734 if (piece_size == const1_rtx)
26771da7 14735 {
8c996513
JH
14736 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14737 true, out_label);
14738 predict_jump (REG_BR_PROB_BASE * 10 / 100);
26771da7 14739 }
8c996513 14740 emit_move_insn (iter, const0_rtx);
0945b39d 14741
8c996513 14742 emit_label (top_label);
0945b39d 14743
8c996513
JH
14744 tmp = convert_modes (Pmode, iter_mode, iter, true);
14745 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14746 destmem = change_address (destmem, mode, x_addr);
0945b39d 14747
8c996513
JH
14748 if (srcmem)
14749 {
14750 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14751 srcmem = change_address (srcmem, mode, y_addr);
4e44c1ef 14752
8c996513 14753 /* When unrolling for chips that reorder memory reads and writes,
54a88090 14754 we can save registers by using single temporary.
8c996513
JH
14755 Also using 4 temporaries is overkill in 32bit mode. */
14756 if (!TARGET_64BIT && 0)
14757 {
14758 for (i = 0; i < unroll; i++)
14759 {
14760 if (i)
14761 {
14762 destmem =
14763 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14764 srcmem =
14765 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14766 }
14767 emit_move_insn (destmem, srcmem);
14768 }
14769 }
14770 else
14771 {
14772 rtx tmpreg[4];
14773 gcc_assert (unroll <= 4);
14774 for (i = 0; i < unroll; i++)
14775 {
14776 tmpreg[i] = gen_reg_rtx (mode);
14777 if (i)
95935e2d 14778 {
8c996513
JH
14779 srcmem =
14780 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
95935e2d 14781 }
8c996513
JH
14782 emit_move_insn (tmpreg[i], srcmem);
14783 }
14784 for (i = 0; i < unroll; i++)
14785 {
14786 if (i)
14787 {
14788 destmem =
14789 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14790 }
14791 emit_move_insn (destmem, tmpreg[i]);
14792 }
14793 }
14794 }
14795 else
14796 for (i = 0; i < unroll; i++)
14797 {
14798 if (i)
14799 destmem =
14800 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14801 emit_move_insn (destmem, value);
14802 }
14803
14804 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14805 true, OPTAB_LIB_WIDEN);
14806 if (tmp != iter)
14807 emit_move_insn (iter, tmp);
14808
14809 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14810 true, top_label);
14811 if (expected_size != -1)
14812 {
14813 expected_size /= GET_MODE_SIZE (mode) * unroll;
14814 if (expected_size == 0)
14815 predict_jump (0);
14816 else if (expected_size > REG_BR_PROB_BASE)
14817 predict_jump (REG_BR_PROB_BASE - 1);
14818 else
14819 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14820 }
14821 else
14822 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14823 iter = ix86_zero_extend_to_Pmode (iter);
14824 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14825 true, OPTAB_LIB_WIDEN);
14826 if (tmp != destptr)
14827 emit_move_insn (destptr, tmp);
14828 if (srcptr)
14829 {
14830 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14831 true, OPTAB_LIB_WIDEN);
14832 if (tmp != srcptr)
14833 emit_move_insn (srcptr, tmp);
14834 }
14835 emit_label (out_label);
14836}
14837
54a88090 14838/* Output "rep; mov" instruction.
8c996513
JH
14839 Arguments have same meaning as for previous function */
14840static void
14841expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14842 rtx destptr, rtx srcptr,
14843 rtx count,
14844 enum machine_mode mode)
14845{
14846 rtx destexp;
14847 rtx srcexp;
14848 rtx countreg;
14849
14850 /* If the size is known, it is shorter to use rep movs. */
7656aee4 14851 if (mode == QImode && CONST_INT_P (count)
8c996513
JH
14852 && !(INTVAL (count) & 3))
14853 mode = SImode;
14854
14855 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14856 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14857 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14858 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14859 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14860 if (mode != QImode)
14861 {
14862 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14863 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14864 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14865 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14866 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14867 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14868 }
14869 else
14870 {
14871 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14872 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14873 }
14874 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14875 destexp, srcexp));
14876}
14877
54a88090 14878/* Output "rep; stos" instruction.
8c996513
JH
14879 Arguments have same meaning as for previous function */
14880static void
14881expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14882 rtx count,
14883 enum machine_mode mode)
14884{
14885 rtx destexp;
14886 rtx countreg;
14887
14888 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14889 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14890 value = force_reg (mode, gen_lowpart (mode, value));
14891 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14892 if (mode != QImode)
14893 {
14894 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14895 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14896 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14897 }
14898 else
14899 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14900 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14901}
14902
14903static void
14904emit_strmov (rtx destmem, rtx srcmem,
14905 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14906{
14907 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14908 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14909 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14910}
14911
14912/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14913static void
14914expand_movmem_epilogue (rtx destmem, rtx srcmem,
14915 rtx destptr, rtx srcptr, rtx count, int max_size)
14916{
14917 rtx src, dest;
7656aee4 14918 if (CONST_INT_P (count))
8c996513
JH
14919 {
14920 HOST_WIDE_INT countval = INTVAL (count);
14921 int offset = 0;
14922
73013054 14923 if ((countval & 0x10) && max_size > 16)
8c996513
JH
14924 {
14925 if (TARGET_64BIT)
14926 {
14927 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14928 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
95935e2d 14929 }
8c996513
JH
14930 else
14931 gcc_unreachable ();
14932 offset += 16;
14933 }
14934 if ((countval & 0x08) && max_size > 8)
14935 {
14936 if (TARGET_64BIT)
14937 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
95935e2d
DV
14938 else
14939 {
bd8d4d19
JH
14940 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14941 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
95935e2d 14942 }
8c996513 14943 offset += 8;
0945b39d 14944 }
8c996513 14945 if ((countval & 0x04) && max_size > 4)
4e44c1ef 14946 {
8c996513 14947 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
4e44c1ef
JJ
14948 offset += 4;
14949 }
8c996513 14950 if ((countval & 0x02) && max_size > 2)
4e44c1ef 14951 {
8c996513 14952 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
4e44c1ef
JJ
14953 offset += 2;
14954 }
8c996513 14955 if ((countval & 0x01) && max_size > 1)
4e44c1ef 14956 {
8c996513
JH
14957 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14958 offset += 1;
4e44c1ef 14959 }
8c996513 14960 return;
0945b39d 14961 }
8c996513 14962 if (max_size > 8)
0945b39d 14963 {
8c996513
JH
14964 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14965 count, 1, OPTAB_DIRECT);
14966 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14967 count, QImode, 1, 4);
14968 return;
14969 }
0945b39d 14970
8c996513
JH
14971 /* When there are stringops, we can cheaply increase dest and src pointers.
14972 Otherwise we save code size by maintaining offset (zero is readily
2f8e468b 14973 available from preceding rep operation) and using x86 addressing modes.
8c996513
JH
14974 */
14975 if (TARGET_SINGLE_STRINGOP)
14976 {
14977 if (max_size > 4)
0945b39d 14978 {
8c996513
JH
14979 rtx label = ix86_expand_aligntest (count, 4, true);
14980 src = change_address (srcmem, SImode, srcptr);
14981 dest = change_address (destmem, SImode, destptr);
14982 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14983 emit_label (label);
14984 LABEL_NUSES (label) = 1;
0945b39d 14985 }
8c996513 14986 if (max_size > 2)
0945b39d 14987 {
8c996513
JH
14988 rtx label = ix86_expand_aligntest (count, 2, true);
14989 src = change_address (srcmem, HImode, srcptr);
14990 dest = change_address (destmem, HImode, destptr);
14991 emit_insn (gen_strmov (destptr, dest, srcptr, src));
0945b39d
JH
14992 emit_label (label);
14993 LABEL_NUSES (label) = 1;
14994 }
8c996513 14995 if (max_size > 1)
0945b39d 14996 {
8c996513
JH
14997 rtx label = ix86_expand_aligntest (count, 1, true);
14998 src = change_address (srcmem, QImode, srcptr);
14999 dest = change_address (destmem, QImode, destptr);
15000 emit_insn (gen_strmov (destptr, dest, srcptr, src));
0945b39d
JH
15001 emit_label (label);
15002 LABEL_NUSES (label) = 1;
15003 }
8c996513
JH
15004 }
15005 else
15006 {
15007 rtx offset = force_reg (Pmode, const0_rtx);
15008 rtx tmp;
15009
15010 if (max_size > 4)
0945b39d 15011 {
8c996513
JH
15012 rtx label = ix86_expand_aligntest (count, 4, true);
15013 src = change_address (srcmem, SImode, srcptr);
15014 dest = change_address (destmem, SImode, destptr);
15015 emit_move_insn (dest, src);
15016 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15017 true, OPTAB_LIB_WIDEN);
15018 if (tmp != offset)
15019 emit_move_insn (offset, tmp);
0945b39d
JH
15020 emit_label (label);
15021 LABEL_NUSES (label) = 1;
15022 }
8c996513
JH
15023 if (max_size > 2)
15024 {
15025 rtx label = ix86_expand_aligntest (count, 2, true);
15026 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15027 src = change_address (srcmem, HImode, tmp);
15028 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15029 dest = change_address (destmem, HImode, tmp);
15030 emit_move_insn (dest, src);
15031 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15032 true, OPTAB_LIB_WIDEN);
15033 if (tmp != offset)
15034 emit_move_insn (offset, tmp);
15035 emit_label (label);
15036 LABEL_NUSES (label) = 1;
15037 }
15038 if (max_size > 1)
37ad04a5 15039 {
8c996513
JH
15040 rtx label = ix86_expand_aligntest (count, 1, true);
15041 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15042 src = change_address (srcmem, QImode, tmp);
15043 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15044 dest = change_address (destmem, QImode, tmp);
15045 emit_move_insn (dest, src);
37ad04a5
JH
15046 emit_label (label);
15047 LABEL_NUSES (label) = 1;
37ad04a5 15048 }
8c996513
JH
15049 }
15050}
15051
15052/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15053static void
15054expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15055 rtx count, int max_size)
15056{
15057 count =
bd8d4d19
JH
15058 expand_simple_binop (counter_mode (count), AND, count,
15059 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
8c996513
JH
15060 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15061 gen_lowpart (QImode, value), count, QImode,
15062 1, max_size / 2);
15063}
15064
15065/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15066static void
15067expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15068{
15069 rtx dest;
2a4f771a 15070
7656aee4 15071 if (CONST_INT_P (count))
8c996513
JH
15072 {
15073 HOST_WIDE_INT countval = INTVAL (count);
15074 int offset = 0;
15075
73013054 15076 if ((countval & 0x10) && max_size > 16)
0945b39d 15077 {
8c996513
JH
15078 if (TARGET_64BIT)
15079 {
15080 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15081 emit_insn (gen_strset (destptr, dest, value));
15082 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15083 emit_insn (gen_strset (destptr, dest, value));
15084 }
15085 else
15086 gcc_unreachable ();
15087 offset += 16;
0945b39d 15088 }
8c996513 15089 if ((countval & 0x08) && max_size > 8)
0945b39d 15090 {
8c996513
JH
15091 if (TARGET_64BIT)
15092 {
15093 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15094 emit_insn (gen_strset (destptr, dest, value));
15095 }
15096 else
15097 {
15098 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15099 emit_insn (gen_strset (destptr, dest, value));
15100 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15101 emit_insn (gen_strset (destptr, dest, value));
15102 }
15103 offset += 8;
0945b39d 15104 }
8c996513 15105 if ((countval & 0x04) && max_size > 4)
0945b39d 15106 {
8c996513
JH
15107 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15108 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15109 offset += 4;
0945b39d 15110 }
8c996513 15111 if ((countval & 0x02) && max_size > 2)
4e44c1ef 15112 {
8c996513
JH
15113 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15114 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15115 offset += 2;
4e44c1ef 15116 }
8c996513 15117 if ((countval & 0x01) && max_size > 1)
0945b39d 15118 {
8c996513
JH
15119 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15120 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15121 offset += 1;
0945b39d 15122 }
8c996513
JH
15123 return;
15124 }
15125 if (max_size > 32)
15126 {
15127 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15128 return;
15129 }
15130 if (max_size > 16)
15131 {
15132 rtx label = ix86_expand_aligntest (count, 16, true);
15133 if (TARGET_64BIT)
4e44c1ef 15134 {
8c996513
JH
15135 dest = change_address (destmem, DImode, destptr);
15136 emit_insn (gen_strset (destptr, dest, value));
15137 emit_insn (gen_strset (destptr, dest, value));
4e44c1ef 15138 }
8c996513 15139 else
0945b39d 15140 {
8c996513
JH
15141 dest = change_address (destmem, SImode, destptr);
15142 emit_insn (gen_strset (destptr, dest, value));
15143 emit_insn (gen_strset (destptr, dest, value));
15144 emit_insn (gen_strset (destptr, dest, value));
15145 emit_insn (gen_strset (destptr, dest, value));
0945b39d 15146 }
8c996513
JH
15147 emit_label (label);
15148 LABEL_NUSES (label) = 1;
15149 }
15150 if (max_size > 8)
15151 {
15152 rtx label = ix86_expand_aligntest (count, 8, true);
15153 if (TARGET_64BIT)
4e44c1ef 15154 {
8c996513
JH
15155 dest = change_address (destmem, DImode, destptr);
15156 emit_insn (gen_strset (destptr, dest, value));
4e44c1ef 15157 }
8c996513 15158 else
0945b39d 15159 {
8c996513
JH
15160 dest = change_address (destmem, SImode, destptr);
15161 emit_insn (gen_strset (destptr, dest, value));
15162 emit_insn (gen_strset (destptr, dest, value));
0945b39d 15163 }
8c996513
JH
15164 emit_label (label);
15165 LABEL_NUSES (label) = 1;
15166 }
15167 if (max_size > 4)
15168 {
15169 rtx label = ix86_expand_aligntest (count, 4, true);
15170 dest = change_address (destmem, SImode, destptr);
15171 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15172 emit_label (label);
15173 LABEL_NUSES (label) = 1;
0945b39d 15174 }
8c996513
JH
15175 if (max_size > 2)
15176 {
15177 rtx label = ix86_expand_aligntest (count, 2, true);
15178 dest = change_address (destmem, HImode, destptr);
15179 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15180 emit_label (label);
15181 LABEL_NUSES (label) = 1;
15182 }
15183 if (max_size > 1)
15184 {
15185 rtx label = ix86_expand_aligntest (count, 1, true);
15186 dest = change_address (destmem, QImode, destptr);
15187 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15188 emit_label (label);
15189 LABEL_NUSES (label) = 1;
15190 }
15191}
0945b39d 15192
8c996513
JH
15193/* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15194 DESIRED_ALIGNMENT. */
15195static void
15196expand_movmem_prologue (rtx destmem, rtx srcmem,
15197 rtx destptr, rtx srcptr, rtx count,
15198 int align, int desired_alignment)
15199{
15200 if (align <= 1 && desired_alignment > 1)
15201 {
15202 rtx label = ix86_expand_aligntest (destptr, 1, false);
15203 srcmem = change_address (srcmem, QImode, srcptr);
15204 destmem = change_address (destmem, QImode, destptr);
15205 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15206 ix86_adjust_counter (count, 1);
15207 emit_label (label);
15208 LABEL_NUSES (label) = 1;
15209 }
15210 if (align <= 2 && desired_alignment > 2)
15211 {
15212 rtx label = ix86_expand_aligntest (destptr, 2, false);
15213 srcmem = change_address (srcmem, HImode, srcptr);
15214 destmem = change_address (destmem, HImode, destptr);
15215 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15216 ix86_adjust_counter (count, 2);
15217 emit_label (label);
15218 LABEL_NUSES (label) = 1;
15219 }
15220 if (align <= 4 && desired_alignment > 4)
15221 {
15222 rtx label = ix86_expand_aligntest (destptr, 4, false);
15223 srcmem = change_address (srcmem, SImode, srcptr);
15224 destmem = change_address (destmem, SImode, destptr);
15225 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15226 ix86_adjust_counter (count, 4);
15227 emit_label (label);
15228 LABEL_NUSES (label) = 1;
15229 }
15230 gcc_assert (desired_alignment <= 8);
0945b39d
JH
15231}
15232
8c996513
JH
15233/* Set enough from DEST to align DEST known to by aligned by ALIGN to
15234 DESIRED_ALIGNMENT. */
15235static void
15236expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15237 int align, int desired_alignment)
15238{
15239 if (align <= 1 && desired_alignment > 1)
15240 {
15241 rtx label = ix86_expand_aligntest (destptr, 1, false);
15242 destmem = change_address (destmem, QImode, destptr);
15243 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15244 ix86_adjust_counter (count, 1);
15245 emit_label (label);
15246 LABEL_NUSES (label) = 1;
15247 }
15248 if (align <= 2 && desired_alignment > 2)
15249 {
15250 rtx label = ix86_expand_aligntest (destptr, 2, false);
15251 destmem = change_address (destmem, HImode, destptr);
15252 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15253 ix86_adjust_counter (count, 2);
15254 emit_label (label);
15255 LABEL_NUSES (label) = 1;
15256 }
15257 if (align <= 4 && desired_alignment > 4)
15258 {
15259 rtx label = ix86_expand_aligntest (destptr, 4, false);
15260 destmem = change_address (destmem, SImode, destptr);
15261 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15262 ix86_adjust_counter (count, 4);
15263 emit_label (label);
15264 LABEL_NUSES (label) = 1;
15265 }
15266 gcc_assert (desired_alignment <= 8);
15267}
15268
15269/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15270static enum stringop_alg
15271decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15272 int *dynamic_check)
15273{
15274 const struct stringop_algs * algs;
3c285765
NF
15275 /* Algorithms using the rep prefix want at least edi and ecx;
15276 additionally, memset wants eax and memcpy wants esi. Don't
15277 consider such algorithms if the user has appropriated those
15278 registers for their own purposes. */
ec382b8c
UB
15279 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15280 || (memset
15281 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
3c285765
NF
15282
15283#define ALG_USABLE_P(alg) (rep_prefix_usable \
15284 || (alg != rep_prefix_1_byte \
15285 && alg != rep_prefix_4_byte \
15286 && alg != rep_prefix_8_byte))
8c996513
JH
15287
15288 *dynamic_check = -1;
15289 if (memset)
15290 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15291 else
15292 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3c285765 15293 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
8c996513
JH
15294 return stringop_alg;
15295 /* rep; movq or rep; movl is the smallest variant. */
15296 else if (optimize_size)
15297 {
15298 if (!count || (count & 3))
3c285765 15299 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
8c996513 15300 else
3c285765 15301 return rep_prefix_usable ? rep_prefix_4_byte : loop;
8c996513
JH
15302 }
15303 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15304 */
15305 else if (expected_size != -1 && expected_size < 4)
15306 return loop_1_byte;
15307 else if (expected_size != -1)
15308 {
15309 unsigned int i;
15310 enum stringop_alg alg = libcall;
15311 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15312 {
3c285765
NF
15313 /* We get here if the algorithms that were not libcall-based
15314 were rep-prefix based and we are unable to use rep prefixes
15315 based on global register usage. Break out of the loop and
15316 use the heuristic below. */
15317 if (algs->size[i].max == 0)
15318 break;
8c996513
JH
15319 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15320 {
3c285765
NF
15321 enum stringop_alg candidate = algs->size[i].alg;
15322
15323 if (candidate != libcall && ALG_USABLE_P (candidate))
15324 alg = candidate;
8c996513 15325 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
3c285765 15326 last non-libcall inline algorithm. */
8c996513
JH
15327 if (TARGET_INLINE_ALL_STRINGOPS)
15328 {
cc0faf9d 15329 /* When the current size is best to be copied by a libcall,
3c285765 15330 but we are still forced to inline, run the heuristic below
cc0faf9d
JH
15331 that will pick code for medium sized blocks. */
15332 if (alg != libcall)
15333 return alg;
15334 break;
8c996513 15335 }
3c285765
NF
15336 else if (ALG_USABLE_P (candidate))
15337 return candidate;
8c996513
JH
15338 }
15339 }
3c285765 15340 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
8c996513
JH
15341 }
15342 /* When asked to inline the call anyway, try to pick meaningful choice.
15343 We look for maximal size of block that is faster to copy by hand and
15344 take blocks of at most of that size guessing that average size will
54a88090 15345 be roughly half of the block.
8c996513
JH
15346
15347 If this turns out to be bad, we might simply specify the preferred
15348 choice in ix86_costs. */
15349 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
3c285765 15350 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
8c996513
JH
15351 {
15352 int max = -1;
15353 enum stringop_alg alg;
15354 int i;
3c285765 15355 bool any_alg_usable_p = true;
8c996513
JH
15356
15357 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
3c285765
NF
15358 {
15359 enum stringop_alg candidate = algs->size[i].alg;
15360 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15361
15362 if (candidate != libcall && candidate
15363 && ALG_USABLE_P (candidate))
15364 max = algs->size[i].max;
15365 }
15366 /* If there aren't any usable algorithms, then recursing on
15367 smaller sizes isn't going to find anything. Just return the
15368 simple byte-at-a-time copy loop. */
15369 if (!any_alg_usable_p)
15370 {
15371 /* Pick something reasonable. */
15372 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15373 *dynamic_check = 128;
15374 return loop_1_byte;
15375 }
8c996513
JH
15376 if (max == -1)
15377 max = 4096;
15378 alg = decide_alg (count, max / 2, memset, dynamic_check);
15379 gcc_assert (*dynamic_check == -1);
15380 gcc_assert (alg != libcall);
15381 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15382 *dynamic_check = max;
15383 return alg;
15384 }
3c285765
NF
15385 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15386#undef ALG_USABLE_P
8c996513
JH
15387}
15388
15389/* Decide on alignment. We know that the operand is already aligned to ALIGN
15390 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15391static int
15392decide_alignment (int align,
15393 enum stringop_alg alg,
15394 int expected_size)
15395{
15396 int desired_align = 0;
15397 switch (alg)
15398 {
15399 case no_stringop:
15400 gcc_unreachable ();
15401 case loop:
15402 case unrolled_loop:
15403 desired_align = GET_MODE_SIZE (Pmode);
15404 break;
15405 case rep_prefix_8_byte:
15406 desired_align = 8;
15407 break;
15408 case rep_prefix_4_byte:
15409 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15410 copying whole cacheline at once. */
15411 if (TARGET_PENTIUMPRO)
15412 desired_align = 8;
15413 else
15414 desired_align = 4;
15415 break;
15416 case rep_prefix_1_byte:
15417 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15418 copying whole cacheline at once. */
15419 if (TARGET_PENTIUMPRO)
15420 desired_align = 8;
15421 else
15422 desired_align = 1;
15423 break;
15424 case loop_1_byte:
15425 desired_align = 1;
15426 break;
15427 case libcall:
15428 return 0;
15429 }
15430
15431 if (optimize_size)
15432 desired_align = 1;
15433 if (desired_align < align)
15434 desired_align = align;
15435 if (expected_size != -1 && expected_size < 4)
15436 desired_align = align;
15437 return desired_align;
15438}
15439
2e226e66 15440/* Return the smallest power of 2 greater than VAL. */
2a4f771a
JH
15441static int
15442smallest_pow2_greater_than (int val)
15443{
15444 int ret = 1;
15445 while (ret <= val)
15446 ret <<= 1;
15447 return ret;
15448}
15449
8c996513 15450/* Expand string move (memcpy) operation. Use i386 string operations when
1b22c72e 15451 profitable. expand_setmem contains similar code. The code depends upon
2a4f771a
JH
15452 architecture, block size and alignment, but always has the same
15453 overall structure:
15454
15455 1) Prologue guard: Conditional that jumps up to epilogues for small
15456 blocks that can be handled by epilogue alone. This is faster but
15457 also needed for correctness, since prologue assume the block is larger
2e226e66 15458 than the desired alignment.
2a4f771a
JH
15459
15460 Optional dynamic check for size and libcall for large
15461 blocks is emitted here too, with -minline-stringops-dynamically.
15462
15463 2) Prologue: copy first few bytes in order to get destination aligned
15464 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15465 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15466 We emit either a jump tree on power of two sized blocks, or a byte loop.
15467
15468 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15469 with specified algorithm.
15470
15471 4) Epilogue: code copying tail of the block that is too small to be
15472 handled by main body (or up to size guarded by prologue guard). */
54a88090 15473
0945b39d 15474int
8c996513
JH
15475ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15476 rtx expected_align_exp, rtx expected_size_exp)
0945b39d 15477{
8c996513
JH
15478 rtx destreg;
15479 rtx srcreg;
15480 rtx label = NULL;
15481 rtx tmp;
15482 rtx jump_around_label = NULL;
15483 HOST_WIDE_INT align = 1;
0945b39d 15484 unsigned HOST_WIDE_INT count = 0;
8c996513 15485 HOST_WIDE_INT expected_size = -1;
2a4f771a 15486 int size_needed = 0, epilogue_size_needed;
8c996513
JH
15487 int desired_align = 0;
15488 enum stringop_alg alg;
15489 int dynamic_check;
0945b39d 15490
7656aee4 15491 if (CONST_INT_P (align_exp))
0945b39d 15492 align = INTVAL (align_exp);
2f8e468b 15493 /* i386 can do misaligned access on reasonably increased cost. */
7656aee4 15494 if (CONST_INT_P (expected_align_exp)
8c996513
JH
15495 && INTVAL (expected_align_exp) > align)
15496 align = INTVAL (expected_align_exp);
7656aee4 15497 if (CONST_INT_P (count_exp))
8c996513 15498 count = expected_size = INTVAL (count_exp);
7656aee4 15499 if (CONST_INT_P (expected_size_exp) && count == 0)
2a4f771a
JH
15500 expected_size = INTVAL (expected_size_exp);
15501
1b22c72e
EB
15502 /* Make sure we don't need to care about overflow later on. */
15503 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15504 return 0;
15505
2a4f771a
JH
15506 /* Step 0: Decide on preferred algorithm, desired alignment and
15507 size of chunks to be copied by main loop. */
0945b39d 15508
8c996513
JH
15509 alg = decide_alg (count, expected_size, false, &dynamic_check);
15510 desired_align = decide_alignment (align, alg, expected_size);
d0a5295a 15511
0945b39d 15512 if (!TARGET_ALIGN_STRINGOPS)
8c996513 15513 align = desired_align;
0945b39d 15514
8c996513
JH
15515 if (alg == libcall)
15516 return 0;
15517 gcc_assert (alg != no_stringop);
15518 if (!count)
15519 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15520 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15521 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15522 switch (alg)
26771da7 15523 {
8c996513
JH
15524 case libcall:
15525 case no_stringop:
15526 gcc_unreachable ();
15527 case loop:
15528 size_needed = GET_MODE_SIZE (Pmode);
15529 break;
15530 case unrolled_loop:
15531 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15532 break;
15533 case rep_prefix_8_byte:
15534 size_needed = 8;
15535 break;
15536 case rep_prefix_4_byte:
15537 size_needed = 4;
15538 break;
15539 case rep_prefix_1_byte:
15540 case loop_1_byte:
15541 size_needed = 1;
15542 break;
26771da7 15543 }
0945b39d 15544
2a4f771a
JH
15545 epilogue_size_needed = size_needed;
15546
15547 /* Step 1: Prologue guard. */
15548
8c996513 15549 /* Alignment code needs count to be in register. */
7656aee4 15550 if (CONST_INT_P (count_exp) && desired_align > align)
097f5e21 15551 count_exp = force_reg (counter_mode (count_exp), count_exp);
8c996513 15552 gcc_assert (desired_align >= 1 && align >= 1);
2a4f771a 15553
8c996513 15554 /* Ensure that alignment prologue won't copy past end of block. */
bd8d4d19 15555 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
8c996513 15556 {
2a4f771a 15557 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
2a4f771a
JH
15558 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15559 Make sure it is power of 2. */
15560 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
90c56b45 15561
097f5e21
MM
15562 if (CONST_INT_P (count_exp))
15563 {
15564 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15565 goto epilogue;
15566 }
8c996513 15567 else
097f5e21
MM
15568 {
15569 label = gen_label_rtx ();
15570 emit_cmp_and_jump_insns (count_exp,
15571 GEN_INT (epilogue_size_needed),
15572 LTU, 0, counter_mode (count_exp), 1, label);
15573 if (expected_size == -1 || expected_size < epilogue_size_needed)
15574 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15575 else
15576 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15577 }
8c996513 15578 }
097f5e21 15579
8c996513
JH
15580 /* Emit code to decide on runtime whether library call or inline should be
15581 used. */
15582 if (dynamic_check != -1)
15583 {
097f5e21
MM
15584 if (CONST_INT_P (count_exp))
15585 {
15586 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15587 {
15588 emit_block_move_via_libcall (dst, src, count_exp, false);
15589 count_exp = const0_rtx;
15590 goto epilogue;
15591 }
15592 }
15593 else
15594 {
15595 rtx hot_label = gen_label_rtx ();
15596 jump_around_label = gen_label_rtx ();
15597 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15598 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15599 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15600 emit_block_move_via_libcall (dst, src, count_exp, false);
15601 emit_jump (jump_around_label);
15602 emit_label (hot_label);
15603 }
8c996513 15604 }
0945b39d 15605
2a4f771a 15606 /* Step 2: Alignment prologue. */
0945b39d 15607
8c996513 15608 if (desired_align > align)
0945b39d 15609 {
8c996513
JH
15610 /* Except for the first move in epilogue, we no longer know
15611 constant offset in aliasing info. It don't seems to worth
15612 the pain to maintain it for the first move, so throw away
15613 the info early. */
15614 src = change_address (src, BLKmode, srcreg);
15615 dst = change_address (dst, BLKmode, destreg);
8c996513
JH
15616 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15617 desired_align);
0945b39d 15618 }
8c996513 15619 if (label && size_needed == 1)
0945b39d 15620 {
8c996513
JH
15621 emit_label (label);
15622 LABEL_NUSES (label) = 1;
15623 label = NULL;
15624 }
4e44c1ef 15625
2a4f771a
JH
15626 /* Step 3: Main loop. */
15627
8c996513
JH
15628 switch (alg)
15629 {
15630 case libcall:
15631 case no_stringop:
15632 gcc_unreachable ();
15633 case loop_1_byte:
15634 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15635 count_exp, QImode, 1, expected_size);
15636 break;
15637 case loop:
15638 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15639 count_exp, Pmode, 1, expected_size);
15640 break;
15641 case unrolled_loop:
15642 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15643 registers for 4 temporaries anyway. */
15644 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15645 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15646 expected_size);
15647 break;
15648 case rep_prefix_8_byte:
8c996513
JH
15649 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15650 DImode);
15651 break;
15652 case rep_prefix_4_byte:
8c996513
JH
15653 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15654 SImode);
15655 break;
15656 case rep_prefix_1_byte:
8c996513
JH
15657 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15658 QImode);
15659 break;
15660 }
15661 /* Adjust properly the offset of src and dest memory for aliasing. */
7656aee4 15662 if (CONST_INT_P (count_exp))
8c996513
JH
15663 {
15664 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15665 (count / size_needed) * size_needed);
15666 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15667 (count / size_needed) * size_needed);
15668 }
15669 else
15670 {
15671 src = change_address (src, BLKmode, srcreg);
15672 dst = change_address (dst, BLKmode, destreg);
15673 }
6b32b628 15674
2a4f771a 15675 /* Step 4: Epilogue to copy the remaining bytes. */
097f5e21 15676 epilogue:
8c996513
JH
15677 if (label)
15678 {
2a4f771a
JH
15679 /* When the main loop is done, COUNT_EXP might hold original count,
15680 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15681 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15682 bytes. Compensate if needed. */
54a88090 15683
2a4f771a 15684 if (size_needed < epilogue_size_needed)
0945b39d 15685 {
8c996513 15686 tmp =
bd8d4d19 15687 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
8c996513
JH
15688 GEN_INT (size_needed - 1), count_exp, 1,
15689 OPTAB_DIRECT);
8c996513
JH
15690 if (tmp != count_exp)
15691 emit_move_insn (count_exp, tmp);
15692 }
15693 emit_label (label);
15694 LABEL_NUSES (label) = 1;
15695 }
2a4f771a
JH
15696
15697 if (count_exp != const0_rtx && epilogue_size_needed > 1)
90c56b45 15698 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
2a4f771a 15699 epilogue_size_needed);
8c996513
JH
15700 if (jump_around_label)
15701 emit_label (jump_around_label);
15702 return 1;
15703}
6b32b628 15704
8c996513
JH
15705/* Helper function for memcpy. For QImode value 0xXY produce
15706 0xXYXYXYXY of wide specified by MODE. This is essentially
15707 a * 0x10101010, but we can do slightly better than
15708 synth_mult by unwinding the sequence by hand on CPUs with
15709 slow multiply. */
15710static rtx
15711promote_duplicated_reg (enum machine_mode mode, rtx val)
15712{
15713 enum machine_mode valmode = GET_MODE (val);
15714 rtx tmp;
15715 int nops = mode == DImode ? 3 : 2;
6b32b628 15716
8c996513
JH
15717 gcc_assert (mode == SImode || mode == DImode);
15718 if (val == const0_rtx)
15719 return copy_to_mode_reg (mode, const0_rtx);
7656aee4 15720 if (CONST_INT_P (val))
8c996513
JH
15721 {
15722 HOST_WIDE_INT v = INTVAL (val) & 255;
6b32b628 15723
8c996513
JH
15724 v |= v << 8;
15725 v |= v << 16;
15726 if (mode == DImode)
15727 v |= (v << 16) << 16;
15728 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15729 }
15730
15731 if (valmode == VOIDmode)
15732 valmode = QImode;
15733 if (valmode != QImode)
15734 val = gen_lowpart (QImode, val);
15735 if (mode == QImode)
15736 return val;
15737 if (!TARGET_PARTIAL_REG_STALL)
15738 nops--;
15739 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15740 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15741 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15742 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15743 {
15744 rtx reg = convert_modes (mode, QImode, val, true);
15745 tmp = promote_duplicated_reg (mode, const1_rtx);
15746 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15747 OPTAB_DIRECT);
0945b39d
JH
15748 }
15749 else
15750 {
8c996513 15751 rtx reg = convert_modes (mode, QImode, val, true);
0945b39d 15752
8c996513
JH
15753 if (!TARGET_PARTIAL_REG_STALL)
15754 if (mode == SImode)
15755 emit_insn (gen_movsi_insv_1 (reg, reg));
15756 else
15757 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15758 else
0945b39d 15759 {
8c996513
JH
15760 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15761 NULL, 1, OPTAB_DIRECT);
15762 reg =
15763 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
0945b39d 15764 }
8c996513
JH
15765 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15766 NULL, 1, OPTAB_DIRECT);
15767 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15768 if (mode == SImode)
15769 return reg;
15770 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15771 NULL, 1, OPTAB_DIRECT);
15772 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15773 return reg;
15774 }
15775}
0945b39d 15776
2a4f771a
JH
15777/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15778 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15779 alignment from ALIGN to DESIRED_ALIGN. */
15780static rtx
15781promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15782{
15783 rtx promoted_val;
15784
15785 if (TARGET_64BIT
15786 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15787 promoted_val = promote_duplicated_reg (DImode, val);
15788 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15789 promoted_val = promote_duplicated_reg (SImode, val);
15790 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15791 promoted_val = promote_duplicated_reg (HImode, val);
15792 else
15793 promoted_val = val;
15794
15795 return promoted_val;
15796}
15797
8c996513 15798/* Expand string clear operation (bzero). Use i386 string operations when
2a4f771a 15799 profitable. See expand_movmem comment for explanation of individual
2e226e66 15800 steps performed. */
8c996513
JH
15801int
15802ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15803 rtx expected_align_exp, rtx expected_size_exp)
15804{
15805 rtx destreg;
15806 rtx label = NULL;
15807 rtx tmp;
15808 rtx jump_around_label = NULL;
15809 HOST_WIDE_INT align = 1;
15810 unsigned HOST_WIDE_INT count = 0;
15811 HOST_WIDE_INT expected_size = -1;
2a4f771a 15812 int size_needed = 0, epilogue_size_needed;
8c996513
JH
15813 int desired_align = 0;
15814 enum stringop_alg alg;
2a4f771a 15815 rtx promoted_val = NULL;
8c996513
JH
15816 bool force_loopy_epilogue = false;
15817 int dynamic_check;
37ad04a5 15818
7656aee4 15819 if (CONST_INT_P (align_exp))
8c996513 15820 align = INTVAL (align_exp);
2f8e468b 15821 /* i386 can do misaligned access on reasonably increased cost. */
7656aee4 15822 if (CONST_INT_P (expected_align_exp)
8c996513
JH
15823 && INTVAL (expected_align_exp) > align)
15824 align = INTVAL (expected_align_exp);
7656aee4 15825 if (CONST_INT_P (count_exp))
8c996513 15826 count = expected_size = INTVAL (count_exp);
7656aee4 15827 if (CONST_INT_P (expected_size_exp) && count == 0)
8c996513 15828 expected_size = INTVAL (expected_size_exp);
4e44c1ef 15829
1b22c72e
EB
15830 /* Make sure we don't need to care about overflow later on. */
15831 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15832 return 0;
15833
2a4f771a
JH
15834 /* Step 0: Decide on preferred algorithm, desired alignment and
15835 size of chunks to be copied by main loop. */
15836
8c996513
JH
15837 alg = decide_alg (count, expected_size, true, &dynamic_check);
15838 desired_align = decide_alignment (align, alg, expected_size);
37ad04a5 15839
8c996513
JH
15840 if (!TARGET_ALIGN_STRINGOPS)
15841 align = desired_align;
15842
15843 if (alg == libcall)
15844 return 0;
15845 gcc_assert (alg != no_stringop);
15846 if (!count)
bd8d4d19 15847 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
8c996513
JH
15848 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15849 switch (alg)
15850 {
15851 case libcall:
15852 case no_stringop:
15853 gcc_unreachable ();
15854 case loop:
15855 size_needed = GET_MODE_SIZE (Pmode);
15856 break;
15857 case unrolled_loop:
15858 size_needed = GET_MODE_SIZE (Pmode) * 4;
15859 break;
15860 case rep_prefix_8_byte:
15861 size_needed = 8;
15862 break;
15863 case rep_prefix_4_byte:
15864 size_needed = 4;
15865 break;
15866 case rep_prefix_1_byte:
15867 case loop_1_byte:
15868 size_needed = 1;
15869 break;
15870 }
2a4f771a
JH
15871 epilogue_size_needed = size_needed;
15872
15873 /* Step 1: Prologue guard. */
15874
8c996513 15875 /* Alignment code needs count to be in register. */
7656aee4 15876 if (CONST_INT_P (count_exp) && desired_align > align)
8c996513
JH
15877 {
15878 enum machine_mode mode = SImode;
15879 if (TARGET_64BIT && (count & ~0xffffffff))
15880 mode = DImode;
15881 count_exp = force_reg (mode, count_exp);
15882 }
54a88090 15883 /* Do the cheap promotion to allow better CSE across the
2a4f771a
JH
15884 main loop and epilogue (ie one load of the big constant in the
15885 front of all code. */
7656aee4 15886 if (CONST_INT_P (val_exp))
2a4f771a
JH
15887 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15888 desired_align, align);
8c996513 15889 /* Ensure that alignment prologue won't copy past end of block. */
bd8d4d19 15890 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
8c996513 15891 {
2a4f771a 15892 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
2a4f771a
JH
15893 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15894 Make sure it is power of 2. */
15895 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15896
15897 /* To improve performance of small blocks, we jump around the VAL
15898 promoting mode. This mean that if the promoted VAL is not constant,
15899 we might not use it in the epilogue and have to use byte
15900 loop variant. */
15901 if (epilogue_size_needed > 2 && !promoted_val)
15902 force_loopy_epilogue = true;
8c996513
JH
15903 label = gen_label_rtx ();
15904 emit_cmp_and_jump_insns (count_exp,
2a4f771a 15905 GEN_INT (epilogue_size_needed),
bd8d4d19
JH
15906 LTU, 0, counter_mode (count_exp), 1, label);
15907 if (GET_CODE (count_exp) == CONST_INT)
15908 ;
15909 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
8c996513
JH
15910 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15911 else
15912 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15913 }
15914 if (dynamic_check != -1)
15915 {
15916 rtx hot_label = gen_label_rtx ();
15917 jump_around_label = gen_label_rtx ();
15918 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
bd8d4d19 15919 LEU, 0, counter_mode (count_exp), 1, hot_label);
8c996513
JH
15920 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15921 set_storage_via_libcall (dst, count_exp, val_exp, false);
15922 emit_jump (jump_around_label);
15923 emit_label (hot_label);
15924 }
2a4f771a
JH
15925
15926 /* Step 2: Alignment prologue. */
15927
15928 /* Do the expensive promotion once we branched off the small blocks. */
15929 if (!promoted_val)
15930 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15931 desired_align, align);
8c996513 15932 gcc_assert (desired_align >= 1 && align >= 1);
90c56b45 15933
8c996513
JH
15934 if (desired_align > align)
15935 {
15936 /* Except for the first move in epilogue, we no longer know
15937 constant offset in aliasing info. It don't seems to worth
15938 the pain to maintain it for the first move, so throw away
15939 the info early. */
15940 dst = change_address (dst, BLKmode, destreg);
8c996513
JH
15941 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15942 desired_align);
15943 }
15944 if (label && size_needed == 1)
15945 {
15946 emit_label (label);
15947 LABEL_NUSES (label) = 1;
15948 label = NULL;
15949 }
2a4f771a
JH
15950
15951 /* Step 3: Main loop. */
15952
8c996513
JH
15953 switch (alg)
15954 {
15955 case libcall:
15956 case no_stringop:
15957 gcc_unreachable ();
15958 case loop_1_byte:
15959 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15960 count_exp, QImode, 1, expected_size);
15961 break;
15962 case loop:
15963 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15964 count_exp, Pmode, 1, expected_size);
15965 break;
15966 case unrolled_loop:
15967 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15968 count_exp, Pmode, 4, expected_size);
15969 break;
15970 case rep_prefix_8_byte:
8c996513
JH
15971 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15972 DImode);
15973 break;
15974 case rep_prefix_4_byte:
8c996513
JH
15975 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15976 SImode);
15977 break;
15978 case rep_prefix_1_byte:
8c996513
JH
15979 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15980 QImode);
15981 break;
15982 }
15983 /* Adjust properly the offset of src and dest memory for aliasing. */
7656aee4 15984 if (CONST_INT_P (count_exp))
8c996513
JH
15985 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15986 (count / size_needed) * size_needed);
15987 else
15988 dst = change_address (dst, BLKmode, destreg);
15989
2a4f771a
JH
15990 /* Step 4: Epilogue to copy the remaining bytes. */
15991
8c996513
JH
15992 if (label)
15993 {
2a4f771a
JH
15994 /* When the main loop is done, COUNT_EXP might hold original count,
15995 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15996 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15997 bytes. Compensate if needed. */
15998
8c996513 15999 if (size_needed < desired_align - align)
0945b39d 16000 {
8c996513 16001 tmp =
bd8d4d19 16002 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
8c996513
JH
16003 GEN_INT (size_needed - 1), count_exp, 1,
16004 OPTAB_DIRECT);
16005 size_needed = desired_align - align + 1;
16006 if (tmp != count_exp)
16007 emit_move_insn (count_exp, tmp);
0945b39d 16008 }
8c996513
JH
16009 emit_label (label);
16010 LABEL_NUSES (label) = 1;
16011 }
2a4f771a 16012 if (count_exp != const0_rtx && epilogue_size_needed > 1)
8c996513
JH
16013 {
16014 if (force_loopy_epilogue)
16015 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16016 size_needed);
16017 else
90c56b45
UB
16018 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16019 size_needed);
0945b39d 16020 }
8c996513
JH
16021 if (jump_around_label)
16022 emit_label (jump_around_label);
0945b39d
JH
16023 return 1;
16024}
4e44c1ef 16025
e075ae69
RH
16026/* Expand the appropriate insns for doing strlen if not just doing
16027 repnz; scasb
16028
16029 out = result, initialized with the start address
16030 align_rtx = alignment of the address.
16031 scratch = scratch register, initialized with the startaddress when
77ebd435 16032 not aligned, otherwise undefined
3f803cd9 16033
39e3f58c 16034 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
16035 some address computing at the end. These things are done in i386.md. */
16036
0945b39d 16037static void
4e44c1ef 16038ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 16039{
e075ae69
RH
16040 int align;
16041 rtx tmp;
16042 rtx align_2_label = NULL_RTX;
16043 rtx align_3_label = NULL_RTX;
16044 rtx align_4_label = gen_label_rtx ();
16045 rtx end_0_label = gen_label_rtx ();
e075ae69 16046 rtx mem;
e2e52e1b 16047 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 16048 rtx scratch = gen_reg_rtx (SImode);
e6e81735 16049 rtx cmp;
e075ae69
RH
16050
16051 align = 0;
7656aee4 16052 if (CONST_INT_P (align_rtx))
e075ae69 16053 align = INTVAL (align_rtx);
3f803cd9 16054
e9a25f70 16055 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 16056
e9a25f70 16057 /* Is there a known alignment and is it less than 4? */
e075ae69 16058 if (align < 4)
3f803cd9 16059 {
0945b39d
JH
16060 rtx scratch1 = gen_reg_rtx (Pmode);
16061 emit_move_insn (scratch1, out);
e9a25f70 16062 /* Is there a known alignment and is it not 2? */
e075ae69 16063 if (align != 2)
3f803cd9 16064 {
e075ae69
RH
16065 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16066 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16067
16068 /* Leave just the 3 lower bits. */
0945b39d 16069 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
16070 NULL_RTX, 0, OPTAB_WIDEN);
16071
9076b9c1 16072 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 16073 Pmode, 1, align_4_label);
60c81c89 16074 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 16075 Pmode, 1, align_2_label);
60c81c89 16076 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 16077 Pmode, 1, align_3_label);
3f803cd9
SC
16078 }
16079 else
16080 {
e9a25f70
JL
16081 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16082 check if is aligned to 4 - byte. */
e9a25f70 16083
60c81c89 16084 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
16085 NULL_RTX, 0, OPTAB_WIDEN);
16086
9076b9c1 16087 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 16088 Pmode, 1, align_4_label);
3f803cd9
SC
16089 }
16090
4e44c1ef 16091 mem = change_address (src, QImode, out);
e9a25f70 16092
e075ae69 16093 /* Now compare the bytes. */
e9a25f70 16094
0f290768 16095 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 16096 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 16097 QImode, 1, end_0_label);
3f803cd9 16098
0f290768 16099 /* Increment the address. */
0945b39d
JH
16100 if (TARGET_64BIT)
16101 emit_insn (gen_adddi3 (out, out, const1_rtx));
16102 else
16103 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 16104
e075ae69
RH
16105 /* Not needed with an alignment of 2 */
16106 if (align != 2)
16107 {
16108 emit_label (align_2_label);
3f803cd9 16109
d43e0b7d
RK
16110 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16111 end_0_label);
e075ae69 16112
0945b39d
JH
16113 if (TARGET_64BIT)
16114 emit_insn (gen_adddi3 (out, out, const1_rtx));
16115 else
16116 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
16117
16118 emit_label (align_3_label);
16119 }
16120
d43e0b7d
RK
16121 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16122 end_0_label);
e075ae69 16123
0945b39d
JH
16124 if (TARGET_64BIT)
16125 emit_insn (gen_adddi3 (out, out, const1_rtx));
16126 else
16127 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
16128 }
16129
e075ae69
RH
16130 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16131 align this loop. It gives only huge programs, but does not help to
16132 speed up. */
16133 emit_label (align_4_label);
3f803cd9 16134
4e44c1ef 16135 mem = change_address (src, SImode, out);
e075ae69 16136 emit_move_insn (scratch, mem);
0945b39d
JH
16137 if (TARGET_64BIT)
16138 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16139 else
16140 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 16141
e2e52e1b
JH
16142 /* This formula yields a nonzero result iff one of the bytes is zero.
16143 This saves three branches inside loop and many cycles. */
16144
16145 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16146 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16147 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 16148 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 16149 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
16150 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16151 align_4_label);
e2e52e1b
JH
16152
16153 if (TARGET_CMOVE)
16154 {
16155 rtx reg = gen_reg_rtx (SImode);
0945b39d 16156 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
16157 emit_move_insn (reg, tmpreg);
16158 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16159
0f290768 16160 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 16161 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
16162 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16163 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16164 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16165 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
16166 reg,
16167 tmpreg)));
e2e52e1b 16168 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 16169 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 16170 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
16171
16172 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16173 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16174 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 16175 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
16176 reg2,
16177 out)));
e2e52e1b
JH
16178
16179 }
16180 else
16181 {
16182 rtx end_2_label = gen_label_rtx ();
16183 /* Is zero in the first two bytes? */
16184
16189740 16185 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
16186 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16187 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16188 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16189 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16190 pc_rtx);
16191 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16192 JUMP_LABEL (tmp) = end_2_label;
16193
0f290768 16194 /* Not in the first two. Move two bytes forward. */
e2e52e1b 16195 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 16196 if (TARGET_64BIT)
60c81c89 16197 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 16198 else
60c81c89 16199 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
16200
16201 emit_label (end_2_label);
16202
16203 }
16204
0f290768 16205 /* Avoid branch in fixing the byte. */
e2e52e1b 16206 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 16207 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
d02cb675 16208 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
0945b39d 16209 if (TARGET_64BIT)
e6e81735 16210 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 16211 else
e6e81735 16212 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
16213
16214 emit_label (end_0_label);
16215}
0e07aff3 16216
2ed941ec
RH
16217/* Expand strlen. */
16218
16219int
16220ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16221{
16222 rtx addr, scratch1, scratch2, scratch3, scratch4;
16223
16224 /* The generic case of strlen expander is long. Avoid it's
16225 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16226
16227 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16228 && !TARGET_INLINE_ALL_STRINGOPS
16229 && !optimize_size
16230 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16231 return 0;
16232
16233 addr = force_reg (Pmode, XEXP (src, 0));
16234 scratch1 = gen_reg_rtx (Pmode);
16235
16236 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16237 && !optimize_size)
16238 {
16239 /* Well it seems that some optimizer does not combine a call like
16240 foo(strlen(bar), strlen(bar));
16241 when the move and the subtraction is done here. It does calculate
16242 the length just once when these instructions are done inside of
16243 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16244 often used and I use one fewer register for the lifetime of
16245 output_strlen_unroll() this is better. */
16246
16247 emit_move_insn (out, addr);
16248
16249 ix86_expand_strlensi_unroll_1 (out, src, align);
16250
16251 /* strlensi_unroll_1 returns the address of the zero at the end of
16252 the string, like memchr(), so compute the length by subtracting
16253 the start address. */
16254 if (TARGET_64BIT)
16255 emit_insn (gen_subdi3 (out, out, addr));
16256 else
16257 emit_insn (gen_subsi3 (out, out, addr));
16258 }
16259 else
16260 {
16261 rtx unspec;
3c285765
NF
16262
16263 /* Can't use this if the user has appropriated eax, ecx, or edi. */
ec382b8c 16264 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
3c285765
NF
16265 return false;
16266
2ed941ec
RH
16267 scratch2 = gen_reg_rtx (Pmode);
16268 scratch3 = gen_reg_rtx (Pmode);
16269 scratch4 = force_reg (Pmode, constm1_rtx);
16270
16271 emit_move_insn (scratch3, addr);
16272 eoschar = force_reg (QImode, eoschar);
16273
16274 src = replace_equiv_address_nv (src, scratch3);
16275
16276 /* If .md starts supporting :P, this can be done in .md. */
16277 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16278 scratch4), UNSPEC_SCAS);
16279 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16280 if (TARGET_64BIT)
16281 {
16282 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16283 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16284 }
16285 else
16286 {
16287 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16288 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16289 }
16290 }
16291 return 1;
16292}
16293
dc4d7240
JH
16294/* For given symbol (function) construct code to compute address of it's PLT
16295 entry in large x86-64 PIC model. */
16296rtx
16297construct_plt_address (rtx symbol)
16298{
16299 rtx tmp = gen_reg_rtx (Pmode);
16300 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16301
16302 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16303 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16304
16305 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16306 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16307 return tmp;
16308}
16309
0e07aff3 16310void
0f901c4c
SH
16311ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16312 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 16313 rtx pop, int sibcall)
0e07aff3
RH
16314{
16315 rtx use = NULL, call;
16316
16317 if (pop == const0_rtx)
16318 pop = NULL;
d0396b79 16319 gcc_assert (!TARGET_64BIT || !pop);
0e07aff3 16320
f7288899
EC
16321 if (TARGET_MACHO && !TARGET_64BIT)
16322 {
b069de3b 16323#if TARGET_MACHO
f7288899
EC
16324 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16325 fnaddr = machopic_indirect_call_target (fnaddr);
16326#endif
16327 }
16328 else
16329 {
16330 /* Static functions and indirect calls don't need the pic register. */
dc4d7240 16331 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
f7288899
EC
16332 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16333 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16334 use_reg (&use, pic_offset_table_rtx);
16335 }
0e07aff3
RH
16336
16337 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16338 {
29b74761 16339 rtx al = gen_rtx_REG (QImode, AX_REG);
0e07aff3
RH
16340 emit_move_insn (al, callarg2);
16341 use_reg (&use, al);
16342 }
16343
dc4d7240
JH
16344 if (ix86_cmodel == CM_LARGE_PIC
16345 && GET_CODE (fnaddr) == MEM
16346 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16347 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16348 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16349 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
0e07aff3
RH
16350 {
16351 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16352 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16353 }
4977bab6
ZW
16354 if (sibcall && TARGET_64BIT
16355 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16356 {
16357 rtx addr;
16358 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
3c4ace25 16359 fnaddr = gen_rtx_REG (Pmode, R11_REG);
4977bab6
ZW
16360 emit_move_insn (fnaddr, addr);
16361 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16362 }
0e07aff3
RH
16363
16364 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16365 if (retval)
16366 call = gen_rtx_SET (VOIDmode, retval, call);
16367 if (pop)
16368 {
16369 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16370 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16371 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16372 }
16373
16374 call = emit_call_insn (call);
16375 if (use)
16376 CALL_INSN_FUNCTION_USAGE (call) = use;
16377}
fce5a9f2 16378
e075ae69 16379\f
e075ae69
RH
16380/* Clear stack slot assignments remembered from previous functions.
16381 This is called from INIT_EXPANDERS once before RTL is emitted for each
16382 function. */
16383
e2500fed 16384static struct machine_function *
b96a374d 16385ix86_init_machine_status (void)
37b15744 16386{
d7394366
JH
16387 struct machine_function *f;
16388
9415ab7d 16389 f = GGC_CNEW (struct machine_function);
d7394366 16390 f->use_fast_prologue_epilogue_nregs = -1;
5bf5a10b 16391 f->tls_descriptor_call_expanded_p = 0;
8330e2c6
AJ
16392
16393 return f;
1526a060
BS
16394}
16395
e075ae69
RH
16396/* Return a MEM corresponding to a stack slot with mode MODE.
16397 Allocate a new slot if necessary.
16398
16399 The RTL for a function can have several slots available: N is
16400 which slot to use. */
16401
16402rtx
ff680eb1 16403assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
e075ae69 16404{
ddb0ae00
ZW
16405 struct stack_local_entry *s;
16406
ff680eb1 16407 gcc_assert (n < MAX_386_STACK_LOCALS);
e075ae69 16408
80dcd3aa
UB
16409 /* Virtual slot is valid only before vregs are instantiated. */
16410 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16411
ddb0ae00
ZW
16412 for (s = ix86_stack_locals; s; s = s->next)
16413 if (s->mode == mode && s->n == n)
3e916873 16414 return copy_rtx (s->rtl);
ddb0ae00
ZW
16415
16416 s = (struct stack_local_entry *)
16417 ggc_alloc (sizeof (struct stack_local_entry));
16418 s->n = n;
16419 s->mode = mode;
808e68bb 16420 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 16421
ddb0ae00
ZW
16422 s->next = ix86_stack_locals;
16423 ix86_stack_locals = s;
16424 return s->rtl;
e075ae69 16425}
f996902d
RH
16426
16427/* Construct the SYMBOL_REF for the tls_get_addr function. */
16428
e2500fed 16429static GTY(()) rtx ix86_tls_symbol;
f996902d 16430rtx
b96a374d 16431ix86_tls_get_addr (void)
f996902d 16432{
f996902d 16433
e2500fed 16434 if (!ix86_tls_symbol)
f996902d 16435 {
75d38379 16436 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
5bf5a10b
AO
16437 (TARGET_ANY_GNU_TLS
16438 && !TARGET_64BIT)
75d38379
JJ
16439 ? "___tls_get_addr"
16440 : "__tls_get_addr");
f996902d
RH
16441 }
16442
e2500fed 16443 return ix86_tls_symbol;
f996902d 16444}
5bf5a10b
AO
16445
16446/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16447
16448static GTY(()) rtx ix86_tls_module_base_symbol;
16449rtx
16450ix86_tls_module_base (void)
16451{
16452
16453 if (!ix86_tls_module_base_symbol)
16454 {
16455 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16456 "_TLS_MODULE_BASE_");
16457 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16458 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16459 }
16460
16461 return ix86_tls_module_base_symbol;
16462}
e075ae69
RH
16463\f
16464/* Calculate the length of the memory address in the instruction
16465 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16466
8fe75e43 16467int
b96a374d 16468memory_address_length (rtx addr)
e075ae69
RH
16469{
16470 struct ix86_address parts;
16471 rtx base, index, disp;
16472 int len;
d0396b79 16473 int ok;
e075ae69
RH
16474
16475 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
16476 || GET_CODE (addr) == POST_INC
16477 || GET_CODE (addr) == PRE_MODIFY
16478 || GET_CODE (addr) == POST_MODIFY)
e075ae69 16479 return 0;
3f803cd9 16480
d0396b79
NS
16481 ok = ix86_decompose_address (addr, &parts);
16482 gcc_assert (ok);
3f803cd9 16483
7c93c2cc
PB
16484 if (parts.base && GET_CODE (parts.base) == SUBREG)
16485 parts.base = SUBREG_REG (parts.base);
16486 if (parts.index && GET_CODE (parts.index) == SUBREG)
16487 parts.index = SUBREG_REG (parts.index);
16488
e075ae69
RH
16489 base = parts.base;
16490 index = parts.index;
16491 disp = parts.disp;
16492 len = 0;
3f803cd9 16493
7b65ed54
EB
16494 /* Rule of thumb:
16495 - esp as the base always wants an index,
16496 - ebp as the base always wants a displacement. */
16497
e075ae69
RH
16498 /* Register Indirect. */
16499 if (base && !index && !disp)
16500 {
7b65ed54
EB
16501 /* esp (for its index) and ebp (for its displacement) need
16502 the two-byte modrm form. */
e075ae69
RH
16503 if (addr == stack_pointer_rtx
16504 || addr == arg_pointer_rtx
564d80f4
JH
16505 || addr == frame_pointer_rtx
16506 || addr == hard_frame_pointer_rtx)
e075ae69 16507 len = 1;
3f803cd9 16508 }
e9a25f70 16509
e075ae69
RH
16510 /* Direct Addressing. */
16511 else if (disp && !base && !index)
16512 len = 4;
16513
3f803cd9
SC
16514 else
16515 {
e075ae69
RH
16516 /* Find the length of the displacement constant. */
16517 if (disp)
16518 {
f38840db 16519 if (base && satisfies_constraint_K (disp))
e075ae69
RH
16520 len = 1;
16521 else
16522 len = 4;
16523 }
7b65ed54
EB
16524 /* ebp always wants a displacement. */
16525 else if (base == hard_frame_pointer_rtx)
16526 len = 1;
3f803cd9 16527
43f3a59d 16528 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
16529 if (index
16530 /* ...like esp, which always wants an index. */
16531 || base == stack_pointer_rtx
16532 || base == arg_pointer_rtx
16533 || base == frame_pointer_rtx)
e075ae69 16534 len += 1;
3f803cd9
SC
16535 }
16536
e075ae69
RH
16537 return len;
16538}
79325812 16539
5bf0ebab
RH
16540/* Compute default value for "length_immediate" attribute. When SHORTFORM
16541 is set, expect that insn have 8bit immediate alternative. */
e075ae69 16542int
b96a374d 16543ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 16544{
6ef67412
JH
16545 int len = 0;
16546 int i;
6c698a6d 16547 extract_insn_cached (insn);
6ef67412
JH
16548 for (i = recog_data.n_operands - 1; i >= 0; --i)
16549 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 16550 {
d0396b79 16551 gcc_assert (!len);
f38840db 16552 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
6ef67412
JH
16553 len = 1;
16554 else
16555 {
16556 switch (get_attr_mode (insn))
16557 {
16558 case MODE_QI:
16559 len+=1;
16560 break;
16561 case MODE_HI:
16562 len+=2;
16563 break;
16564 case MODE_SI:
16565 len+=4;
16566 break;
14f73b5a
JH
16567 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16568 case MODE_DI:
16569 len+=4;
16570 break;
6ef67412 16571 default:
c725bd79 16572 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
16573 }
16574 }
3071fab5 16575 }
6ef67412
JH
16576 return len;
16577}
16578/* Compute default value for "length_address" attribute. */
16579int
b96a374d 16580ix86_attr_length_address_default (rtx insn)
6ef67412
JH
16581{
16582 int i;
9b73c90a
EB
16583
16584 if (get_attr_type (insn) == TYPE_LEA)
16585 {
16586 rtx set = PATTERN (insn);
d0396b79
NS
16587
16588 if (GET_CODE (set) == PARALLEL)
9b73c90a 16589 set = XVECEXP (set, 0, 0);
d0396b79
NS
16590
16591 gcc_assert (GET_CODE (set) == SET);
9b73c90a
EB
16592
16593 return memory_address_length (SET_SRC (set));
16594 }
16595
6c698a6d 16596 extract_insn_cached (insn);
1ccbefce 16597 for (i = recog_data.n_operands - 1; i >= 0; --i)
7656aee4 16598 if (MEM_P (recog_data.operand[i]))
e075ae69 16599 {
6ef67412 16600 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
16601 break;
16602 }
6ef67412 16603 return 0;
3f803cd9 16604}
e075ae69
RH
16605\f
16606/* Return the maximum number of instructions a cpu can issue. */
b657fc39 16607
c237e94a 16608static int
b96a374d 16609ix86_issue_rate (void)
b657fc39 16610{
9e555526 16611 switch (ix86_tune)
b657fc39 16612 {
e075ae69
RH
16613 case PROCESSOR_PENTIUM:
16614 case PROCESSOR_K6:
16615 return 2;
79325812 16616
e075ae69 16617 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
16618 case PROCESSOR_PENTIUM4:
16619 case PROCESSOR_ATHLON:
4977bab6 16620 case PROCESSOR_K8:
21efb4d4 16621 case PROCESSOR_AMDFAM10:
89c43c0a 16622 case PROCESSOR_NOCONA:
d326eaf0
JH
16623 case PROCESSOR_GENERIC32:
16624 case PROCESSOR_GENERIC64:
e075ae69 16625 return 3;
b657fc39 16626
05f85dbb
VM
16627 case PROCESSOR_CORE2:
16628 return 4;
16629
b657fc39 16630 default:
e075ae69 16631 return 1;
b657fc39 16632 }
b657fc39
L
16633}
16634
e075ae69
RH
16635/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16636 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 16637
e075ae69 16638static int
d1c78882 16639ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
16640{
16641 rtx set, set2;
b657fc39 16642
e075ae69
RH
16643 /* Simplify the test for uninteresting insns. */
16644 if (insn_type != TYPE_SETCC
16645 && insn_type != TYPE_ICMOV
16646 && insn_type != TYPE_FCMOV
16647 && insn_type != TYPE_IBR)
16648 return 0;
b657fc39 16649
e075ae69
RH
16650 if ((set = single_set (dep_insn)) != 0)
16651 {
16652 set = SET_DEST (set);
16653 set2 = NULL_RTX;
16654 }
16655 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16656 && XVECLEN (PATTERN (dep_insn), 0) == 2
16657 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16658 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16659 {
16660 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16661 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16662 }
78a0d70c
ZW
16663 else
16664 return 0;
b657fc39 16665
7656aee4 16666 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
78a0d70c 16667 return 0;
b657fc39 16668
f5143c46 16669 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
16670 not any other potentially set register. */
16671 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16672 return 0;
16673
16674 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16675 return 0;
16676
16677 return 1;
e075ae69 16678}
b657fc39 16679
e075ae69
RH
16680/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16681 address with operands set by DEP_INSN. */
16682
16683static int
d1c78882 16684ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
16685{
16686 rtx addr;
16687
6ad48e84
JH
16688 if (insn_type == TYPE_LEA
16689 && TARGET_PENTIUM)
5fbdde42
RH
16690 {
16691 addr = PATTERN (insn);
d0396b79
NS
16692
16693 if (GET_CODE (addr) == PARALLEL)
5fbdde42 16694 addr = XVECEXP (addr, 0, 0);
5656a184 16695
d0396b79 16696 gcc_assert (GET_CODE (addr) == SET);
5656a184 16697
5fbdde42
RH
16698 addr = SET_SRC (addr);
16699 }
e075ae69
RH
16700 else
16701 {
16702 int i;
6c698a6d 16703 extract_insn_cached (insn);
1ccbefce 16704 for (i = recog_data.n_operands - 1; i >= 0; --i)
7656aee4 16705 if (MEM_P (recog_data.operand[i]))
e075ae69 16706 {
1ccbefce 16707 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
16708 goto found;
16709 }
16710 return 0;
16711 found:;
b657fc39
L
16712 }
16713
e075ae69 16714 return modified_in_p (addr, dep_insn);
b657fc39 16715}
a269a03c 16716
c237e94a 16717static int
b96a374d 16718ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 16719{
e075ae69 16720 enum attr_type insn_type, dep_insn_type;
8695f61e 16721 enum attr_memory memory;
e075ae69 16722 rtx set, set2;
9b00189f 16723 int dep_insn_code_number;
a269a03c 16724
d1f87653 16725 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 16726 if (REG_NOTE_KIND (link) != 0)
309ada50 16727 return 0;
a269a03c 16728
9b00189f
JH
16729 dep_insn_code_number = recog_memoized (dep_insn);
16730
e075ae69 16731 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 16732 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 16733 return cost;
a269a03c 16734
1c71e60e
JH
16735 insn_type = get_attr_type (insn);
16736 dep_insn_type = get_attr_type (dep_insn);
9b00189f 16737
9e555526 16738 switch (ix86_tune)
a269a03c
JC
16739 {
16740 case PROCESSOR_PENTIUM:
e075ae69 16741 /* Address Generation Interlock adds a cycle of latency. */
d1c78882 16742 if (ix86_agi_dependent (insn, dep_insn, insn_type))
e075ae69
RH
16743 cost += 1;
16744
16745 /* ??? Compares pair with jump/setcc. */
d1c78882 16746 if (ix86_flags_dependent (insn, dep_insn, insn_type))
e075ae69
RH
16747 cost = 0;
16748
d1f87653 16749 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 16750 if (insn_type == TYPE_FMOV
e075ae69 16751 && get_attr_memory (insn) == MEMORY_STORE
d1c78882 16752 && !ix86_agi_dependent (insn, dep_insn, insn_type))
e075ae69
RH
16753 cost += 1;
16754 break;
a269a03c 16755
e075ae69 16756 case PROCESSOR_PENTIUMPRO:
6ad48e84 16757 memory = get_attr_memory (insn);
e075ae69
RH
16758
16759 /* INT->FP conversion is expensive. */
16760 if (get_attr_fp_int_src (dep_insn))
16761 cost += 5;
16762
16763 /* There is one cycle extra latency between an FP op and a store. */
16764 if (insn_type == TYPE_FMOV
16765 && (set = single_set (dep_insn)) != NULL_RTX
16766 && (set2 = single_set (insn)) != NULL_RTX
16767 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7656aee4 16768 && MEM_P (SET_DEST (set2)))
e075ae69 16769 cost += 1;
6ad48e84
JH
16770
16771 /* Show ability of reorder buffer to hide latency of load by executing
16772 in parallel with previous instruction in case
16773 previous instruction is not needed to compute the address. */
16774 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16775 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16776 {
6ad48e84
JH
16777 /* Claim moves to take one cycle, as core can issue one load
16778 at time and the next load can start cycle later. */
16779 if (dep_insn_type == TYPE_IMOV
16780 || dep_insn_type == TYPE_FMOV)
16781 cost = 1;
16782 else if (cost > 1)
16783 cost--;
16784 }
e075ae69 16785 break;
a269a03c 16786
e075ae69 16787 case PROCESSOR_K6:
6ad48e84 16788 memory = get_attr_memory (insn);
8695f61e 16789
e075ae69
RH
16790 /* The esp dependency is resolved before the instruction is really
16791 finished. */
16792 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16793 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16794 return 1;
a269a03c 16795
e075ae69
RH
16796 /* INT->FP conversion is expensive. */
16797 if (get_attr_fp_int_src (dep_insn))
16798 cost += 5;
6ad48e84
JH
16799
16800 /* Show ability of reorder buffer to hide latency of load by executing
16801 in parallel with previous instruction in case
16802 previous instruction is not needed to compute the address. */
16803 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16804 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16805 {
6ad48e84
JH
16806 /* Claim moves to take one cycle, as core can issue one load
16807 at time and the next load can start cycle later. */
16808 if (dep_insn_type == TYPE_IMOV
16809 || dep_insn_type == TYPE_FMOV)
16810 cost = 1;
16811 else if (cost > 2)
16812 cost -= 2;
16813 else
16814 cost = 1;
16815 }
a14003ee 16816 break;
e075ae69 16817
309ada50 16818 case PROCESSOR_ATHLON:
4977bab6 16819 case PROCESSOR_K8:
21efb4d4 16820 case PROCESSOR_AMDFAM10:
d326eaf0
JH
16821 case PROCESSOR_GENERIC32:
16822 case PROCESSOR_GENERIC64:
6ad48e84 16823 memory = get_attr_memory (insn);
6ad48e84 16824
6ad48e84
JH
16825 /* Show ability of reorder buffer to hide latency of load by executing
16826 in parallel with previous instruction in case
16827 previous instruction is not needed to compute the address. */
16828 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16829 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16830 {
26f74aa3
JH
16831 enum attr_unit unit = get_attr_unit (insn);
16832 int loadcost = 3;
16833
16834 /* Because of the difference between the length of integer and
16835 floating unit pipeline preparation stages, the memory operands
b96a374d 16836 for floating point are cheaper.
26f74aa3 16837
c51e6d85 16838 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
16839 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16840 loadcost = 3;
16841 else
16842 loadcost = TARGET_ATHLON ? 2 : 0;
16843
16844 if (cost >= loadcost)
16845 cost -= loadcost;
6ad48e84
JH
16846 else
16847 cost = 0;
16848 }
309ada50 16849
a269a03c 16850 default:
a269a03c
JC
16851 break;
16852 }
16853
16854 return cost;
16855}
0a726ef1 16856
9b690711
RH
16857/* How many alternative schedules to try. This should be as wide as the
16858 scheduling freedom in the DFA, but no wider. Making this value too
16859 large results extra work for the scheduler. */
16860
16861static int
b96a374d 16862ia32_multipass_dfa_lookahead (void)
9b690711 16863{
8383d43c
UB
16864 switch (ix86_tune)
16865 {
16866 case PROCESSOR_PENTIUM:
16867 return 2;
56bab446 16868
8383d43c
UB
16869 case PROCESSOR_PENTIUMPRO:
16870 case PROCESSOR_K6:
16871 return 1;
56bab446 16872
8383d43c
UB
16873 default:
16874 return 0;
16875 }
9b690711
RH
16876}
16877
0e4970d7 16878\f
a7180f70
BS
16879/* Compute the alignment given to a constant that is being placed in memory.
16880 EXP is the constant and ALIGN is the alignment that the object would
16881 ordinarily have.
16882 The value of this function is used instead of that alignment to align
16883 the object. */
16884
16885int
b96a374d 16886ix86_constant_alignment (tree exp, int align)
a7180f70 16887{
be3d4789
UB
16888 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16889 || TREE_CODE (exp) == INTEGER_CST)
a7180f70
BS
16890 {
16891 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16892 return 64;
16893 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16894 return 128;
16895 }
4137ba7a
JJ
16896 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16897 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16898 return BITS_PER_WORD;
a7180f70
BS
16899
16900 return align;
16901}
16902
16903/* Compute the alignment for a static variable.
16904 TYPE is the data type, and ALIGN is the alignment that
16905 the object would ordinarily have. The value of this function is used
16906 instead of that alignment to align the object. */
16907
16908int
b96a374d 16909ix86_data_alignment (tree type, int align)
a7180f70 16910{
bf69f9d2 16911 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
6c23a1f2 16912
a7180f70 16913 if (AGGREGATE_TYPE_P (type)
6c23a1f2
JB
16914 && TYPE_SIZE (type)
16915 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16916 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16917 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16918 && align < max_align)
16919 align = max_align;
a7180f70 16920
0d7d98ee
JH
16921 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16922 to 16byte boundary. */
16923 if (TARGET_64BIT)
16924 {
16925 if (AGGREGATE_TYPE_P (type)
16926 && TYPE_SIZE (type)
16927 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16928 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16929 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16930 return 128;
16931 }
16932
a7180f70
BS
16933 if (TREE_CODE (type) == ARRAY_TYPE)
16934 {
16935 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16936 return 64;
16937 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16938 return 128;
16939 }
16940 else if (TREE_CODE (type) == COMPLEX_TYPE)
16941 {
0f290768 16942
a7180f70
BS
16943 if (TYPE_MODE (type) == DCmode && align < 64)
16944 return 64;
16945 if (TYPE_MODE (type) == XCmode && align < 128)
16946 return 128;
16947 }
16948 else if ((TREE_CODE (type) == RECORD_TYPE
16949 || TREE_CODE (type) == UNION_TYPE
16950 || TREE_CODE (type) == QUAL_UNION_TYPE)
16951 && TYPE_FIELDS (type))
16952 {
16953 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16954 return 64;
16955 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16956 return 128;
16957 }
16958 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16959 || TREE_CODE (type) == INTEGER_TYPE)
16960 {
16961 if (TYPE_MODE (type) == DFmode && align < 64)
16962 return 64;
16963 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16964 return 128;
16965 }
16966
16967 return align;
16968}
16969
16970/* Compute the alignment for a local variable.
16971 TYPE is the data type, and ALIGN is the alignment that
16972 the object would ordinarily have. The value of this macro is used
16973 instead of that alignment to align the object. */
16974
16975int
b96a374d 16976ix86_local_alignment (tree type, int align)
a7180f70 16977{
0d7d98ee
JH
16978 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16979 to 16byte boundary. */
16980 if (TARGET_64BIT)
16981 {
16982 if (AGGREGATE_TYPE_P (type)
16983 && TYPE_SIZE (type)
16984 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16985 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16986 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16987 return 128;
16988 }
a7180f70
BS
16989 if (TREE_CODE (type) == ARRAY_TYPE)
16990 {
16991 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16992 return 64;
16993 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16994 return 128;
16995 }
16996 else if (TREE_CODE (type) == COMPLEX_TYPE)
16997 {
16998 if (TYPE_MODE (type) == DCmode && align < 64)
16999 return 64;
17000 if (TYPE_MODE (type) == XCmode && align < 128)
17001 return 128;
17002 }
17003 else if ((TREE_CODE (type) == RECORD_TYPE
17004 || TREE_CODE (type) == UNION_TYPE
17005 || TREE_CODE (type) == QUAL_UNION_TYPE)
17006 && TYPE_FIELDS (type))
17007 {
17008 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17009 return 64;
17010 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17011 return 128;
17012 }
17013 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17014 || TREE_CODE (type) == INTEGER_TYPE)
17015 {
0f290768 17016
a7180f70
BS
17017 if (TYPE_MODE (type) == DFmode && align < 64)
17018 return 64;
17019 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17020 return 128;
17021 }
17022 return align;
17023}
0ed08620
JH
17024\f
17025/* Emit RTL insns to initialize the variable parts of a trampoline.
17026 FNADDR is an RTX for the address of the function's pure code.
17027 CXT is an RTX for the static chain value for the function. */
17028void
b96a374d 17029x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
17030{
17031 if (!TARGET_64BIT)
17032 {
17033 /* Compute offset from the end of the jmp to the target function. */
17034 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17035 plus_constant (tramp, 10),
17036 NULL_RTX, 1, OPTAB_DIRECT);
17037 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 17038 gen_int_mode (0xb9, QImode));
0ed08620
JH
17039 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17040 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 17041 gen_int_mode (0xe9, QImode));
0ed08620
JH
17042 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17043 }
17044 else
17045 {
17046 int offset = 0;
17047 /* Try to load address using shorter movl instead of movabs.
17048 We may want to support movq for kernel mode, but kernel does not use
17049 trampolines at the moment. */
8fe75e43 17050 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
0ed08620
JH
17051 {
17052 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17053 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17054 gen_int_mode (0xbb41, HImode));
0ed08620
JH
17055 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17056 gen_lowpart (SImode, fnaddr));
17057 offset += 6;
17058 }
17059 else
17060 {
17061 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17062 gen_int_mode (0xbb49, HImode));
0ed08620
JH
17063 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17064 fnaddr);
17065 offset += 10;
17066 }
17067 /* Load static chain using movabs to r10. */
17068 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17069 gen_int_mode (0xba49, HImode));
0ed08620
JH
17070 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17071 cxt);
17072 offset += 10;
17073 /* Jump to the r11 */
17074 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 17075 gen_int_mode (0xff49, HImode));
0ed08620 17076 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 17077 gen_int_mode (0xe3, QImode));
0ed08620 17078 offset += 3;
d0396b79 17079 gcc_assert (offset <= TRAMPOLINE_SIZE);
0ed08620 17080 }
5791cc29 17081
e7a742ec 17082#ifdef ENABLE_EXECUTE_STACK
f84d109f 17083 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
17084 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17085#endif
0ed08620 17086}
eeb06b1b 17087\f
eb701deb
RH
17088/* Codes for all the SSE/MMX builtins. */
17089enum ix86_builtins
17090{
17091 IX86_BUILTIN_ADDPS,
17092 IX86_BUILTIN_ADDSS,
17093 IX86_BUILTIN_DIVPS,
17094 IX86_BUILTIN_DIVSS,
17095 IX86_BUILTIN_MULPS,
17096 IX86_BUILTIN_MULSS,
17097 IX86_BUILTIN_SUBPS,
17098 IX86_BUILTIN_SUBSS,
17099
17100 IX86_BUILTIN_CMPEQPS,
17101 IX86_BUILTIN_CMPLTPS,
17102 IX86_BUILTIN_CMPLEPS,
17103 IX86_BUILTIN_CMPGTPS,
17104 IX86_BUILTIN_CMPGEPS,
17105 IX86_BUILTIN_CMPNEQPS,
17106 IX86_BUILTIN_CMPNLTPS,
17107 IX86_BUILTIN_CMPNLEPS,
17108 IX86_BUILTIN_CMPNGTPS,
17109 IX86_BUILTIN_CMPNGEPS,
17110 IX86_BUILTIN_CMPORDPS,
17111 IX86_BUILTIN_CMPUNORDPS,
eb701deb
RH
17112 IX86_BUILTIN_CMPEQSS,
17113 IX86_BUILTIN_CMPLTSS,
17114 IX86_BUILTIN_CMPLESS,
17115 IX86_BUILTIN_CMPNEQSS,
17116 IX86_BUILTIN_CMPNLTSS,
17117 IX86_BUILTIN_CMPNLESS,
17118 IX86_BUILTIN_CMPNGTSS,
17119 IX86_BUILTIN_CMPNGESS,
17120 IX86_BUILTIN_CMPORDSS,
17121 IX86_BUILTIN_CMPUNORDSS,
eb701deb
RH
17122
17123 IX86_BUILTIN_COMIEQSS,
17124 IX86_BUILTIN_COMILTSS,
17125 IX86_BUILTIN_COMILESS,
17126 IX86_BUILTIN_COMIGTSS,
17127 IX86_BUILTIN_COMIGESS,
17128 IX86_BUILTIN_COMINEQSS,
17129 IX86_BUILTIN_UCOMIEQSS,
17130 IX86_BUILTIN_UCOMILTSS,
17131 IX86_BUILTIN_UCOMILESS,
17132 IX86_BUILTIN_UCOMIGTSS,
17133 IX86_BUILTIN_UCOMIGESS,
17134 IX86_BUILTIN_UCOMINEQSS,
17135
17136 IX86_BUILTIN_CVTPI2PS,
17137 IX86_BUILTIN_CVTPS2PI,
17138 IX86_BUILTIN_CVTSI2SS,
17139 IX86_BUILTIN_CVTSI642SS,
17140 IX86_BUILTIN_CVTSS2SI,
17141 IX86_BUILTIN_CVTSS2SI64,
17142 IX86_BUILTIN_CVTTPS2PI,
17143 IX86_BUILTIN_CVTTSS2SI,
17144 IX86_BUILTIN_CVTTSS2SI64,
17145
17146 IX86_BUILTIN_MAXPS,
17147 IX86_BUILTIN_MAXSS,
17148 IX86_BUILTIN_MINPS,
17149 IX86_BUILTIN_MINSS,
17150
17151 IX86_BUILTIN_LOADUPS,
17152 IX86_BUILTIN_STOREUPS,
17153 IX86_BUILTIN_MOVSS,
17154
17155 IX86_BUILTIN_MOVHLPS,
17156 IX86_BUILTIN_MOVLHPS,
17157 IX86_BUILTIN_LOADHPS,
17158 IX86_BUILTIN_LOADLPS,
17159 IX86_BUILTIN_STOREHPS,
17160 IX86_BUILTIN_STORELPS,
17161
17162 IX86_BUILTIN_MASKMOVQ,
17163 IX86_BUILTIN_MOVMSKPS,
17164 IX86_BUILTIN_PMOVMSKB,
17165
17166 IX86_BUILTIN_MOVNTPS,
17167 IX86_BUILTIN_MOVNTQ,
17168
17169 IX86_BUILTIN_LOADDQU,
17170 IX86_BUILTIN_STOREDQU,
eb701deb
RH
17171
17172 IX86_BUILTIN_PACKSSWB,
17173 IX86_BUILTIN_PACKSSDW,
17174 IX86_BUILTIN_PACKUSWB,
17175
17176 IX86_BUILTIN_PADDB,
17177 IX86_BUILTIN_PADDW,
17178 IX86_BUILTIN_PADDD,
17179 IX86_BUILTIN_PADDQ,
17180 IX86_BUILTIN_PADDSB,
17181 IX86_BUILTIN_PADDSW,
17182 IX86_BUILTIN_PADDUSB,
17183 IX86_BUILTIN_PADDUSW,
17184 IX86_BUILTIN_PSUBB,
17185 IX86_BUILTIN_PSUBW,
17186 IX86_BUILTIN_PSUBD,
17187 IX86_BUILTIN_PSUBQ,
17188 IX86_BUILTIN_PSUBSB,
17189 IX86_BUILTIN_PSUBSW,
17190 IX86_BUILTIN_PSUBUSB,
17191 IX86_BUILTIN_PSUBUSW,
17192
17193 IX86_BUILTIN_PAND,
17194 IX86_BUILTIN_PANDN,
17195 IX86_BUILTIN_POR,
17196 IX86_BUILTIN_PXOR,
17197
17198 IX86_BUILTIN_PAVGB,
17199 IX86_BUILTIN_PAVGW,
17200
17201 IX86_BUILTIN_PCMPEQB,
17202 IX86_BUILTIN_PCMPEQW,
17203 IX86_BUILTIN_PCMPEQD,
17204 IX86_BUILTIN_PCMPGTB,
17205 IX86_BUILTIN_PCMPGTW,
17206 IX86_BUILTIN_PCMPGTD,
17207
17208 IX86_BUILTIN_PMADDWD,
17209
17210 IX86_BUILTIN_PMAXSW,
17211 IX86_BUILTIN_PMAXUB,
17212 IX86_BUILTIN_PMINSW,
17213 IX86_BUILTIN_PMINUB,
17214
17215 IX86_BUILTIN_PMULHUW,
17216 IX86_BUILTIN_PMULHW,
17217 IX86_BUILTIN_PMULLW,
17218
17219 IX86_BUILTIN_PSADBW,
17220 IX86_BUILTIN_PSHUFW,
17221
17222 IX86_BUILTIN_PSLLW,
17223 IX86_BUILTIN_PSLLD,
17224 IX86_BUILTIN_PSLLQ,
17225 IX86_BUILTIN_PSRAW,
17226 IX86_BUILTIN_PSRAD,
17227 IX86_BUILTIN_PSRLW,
17228 IX86_BUILTIN_PSRLD,
17229 IX86_BUILTIN_PSRLQ,
17230 IX86_BUILTIN_PSLLWI,
17231 IX86_BUILTIN_PSLLDI,
17232 IX86_BUILTIN_PSLLQI,
17233 IX86_BUILTIN_PSRAWI,
17234 IX86_BUILTIN_PSRADI,
17235 IX86_BUILTIN_PSRLWI,
17236 IX86_BUILTIN_PSRLDI,
17237 IX86_BUILTIN_PSRLQI,
17238
17239 IX86_BUILTIN_PUNPCKHBW,
17240 IX86_BUILTIN_PUNPCKHWD,
17241 IX86_BUILTIN_PUNPCKHDQ,
17242 IX86_BUILTIN_PUNPCKLBW,
17243 IX86_BUILTIN_PUNPCKLWD,
17244 IX86_BUILTIN_PUNPCKLDQ,
17245
17246 IX86_BUILTIN_SHUFPS,
17247
17248 IX86_BUILTIN_RCPPS,
17249 IX86_BUILTIN_RCPSS,
17250 IX86_BUILTIN_RSQRTPS,
3dc0f23a 17251 IX86_BUILTIN_RSQRTPS_NR,
eb701deb 17252 IX86_BUILTIN_RSQRTSS,
6b889d89 17253 IX86_BUILTIN_RSQRTF,
eb701deb 17254 IX86_BUILTIN_SQRTPS,
3dc0f23a 17255 IX86_BUILTIN_SQRTPS_NR,
eb701deb
RH
17256 IX86_BUILTIN_SQRTSS,
17257
17258 IX86_BUILTIN_UNPCKHPS,
17259 IX86_BUILTIN_UNPCKLPS,
17260
17261 IX86_BUILTIN_ANDPS,
17262 IX86_BUILTIN_ANDNPS,
17263 IX86_BUILTIN_ORPS,
17264 IX86_BUILTIN_XORPS,
17265
17266 IX86_BUILTIN_EMMS,
17267 IX86_BUILTIN_LDMXCSR,
17268 IX86_BUILTIN_STMXCSR,
17269 IX86_BUILTIN_SFENCE,
17270
17271 /* 3DNow! Original */
17272 IX86_BUILTIN_FEMMS,
17273 IX86_BUILTIN_PAVGUSB,
17274 IX86_BUILTIN_PF2ID,
17275 IX86_BUILTIN_PFACC,
17276 IX86_BUILTIN_PFADD,
17277 IX86_BUILTIN_PFCMPEQ,
17278 IX86_BUILTIN_PFCMPGE,
17279 IX86_BUILTIN_PFCMPGT,
17280 IX86_BUILTIN_PFMAX,
17281 IX86_BUILTIN_PFMIN,
17282 IX86_BUILTIN_PFMUL,
17283 IX86_BUILTIN_PFRCP,
17284 IX86_BUILTIN_PFRCPIT1,
17285 IX86_BUILTIN_PFRCPIT2,
17286 IX86_BUILTIN_PFRSQIT1,
17287 IX86_BUILTIN_PFRSQRT,
17288 IX86_BUILTIN_PFSUB,
17289 IX86_BUILTIN_PFSUBR,
17290 IX86_BUILTIN_PI2FD,
17291 IX86_BUILTIN_PMULHRW,
17292
17293 /* 3DNow! Athlon Extensions */
17294 IX86_BUILTIN_PF2IW,
17295 IX86_BUILTIN_PFNACC,
17296 IX86_BUILTIN_PFPNACC,
17297 IX86_BUILTIN_PI2FW,
17298 IX86_BUILTIN_PSWAPDSI,
17299 IX86_BUILTIN_PSWAPDSF,
17300
17301 /* SSE2 */
17302 IX86_BUILTIN_ADDPD,
17303 IX86_BUILTIN_ADDSD,
17304 IX86_BUILTIN_DIVPD,
17305 IX86_BUILTIN_DIVSD,
17306 IX86_BUILTIN_MULPD,
17307 IX86_BUILTIN_MULSD,
17308 IX86_BUILTIN_SUBPD,
17309 IX86_BUILTIN_SUBSD,
17310
17311 IX86_BUILTIN_CMPEQPD,
17312 IX86_BUILTIN_CMPLTPD,
17313 IX86_BUILTIN_CMPLEPD,
17314 IX86_BUILTIN_CMPGTPD,
17315 IX86_BUILTIN_CMPGEPD,
17316 IX86_BUILTIN_CMPNEQPD,
17317 IX86_BUILTIN_CMPNLTPD,
17318 IX86_BUILTIN_CMPNLEPD,
17319 IX86_BUILTIN_CMPNGTPD,
17320 IX86_BUILTIN_CMPNGEPD,
17321 IX86_BUILTIN_CMPORDPD,
17322 IX86_BUILTIN_CMPUNORDPD,
eb701deb
RH
17323 IX86_BUILTIN_CMPEQSD,
17324 IX86_BUILTIN_CMPLTSD,
17325 IX86_BUILTIN_CMPLESD,
17326 IX86_BUILTIN_CMPNEQSD,
17327 IX86_BUILTIN_CMPNLTSD,
17328 IX86_BUILTIN_CMPNLESD,
17329 IX86_BUILTIN_CMPORDSD,
17330 IX86_BUILTIN_CMPUNORDSD,
eb701deb
RH
17331
17332 IX86_BUILTIN_COMIEQSD,
17333 IX86_BUILTIN_COMILTSD,
17334 IX86_BUILTIN_COMILESD,
17335 IX86_BUILTIN_COMIGTSD,
17336 IX86_BUILTIN_COMIGESD,
17337 IX86_BUILTIN_COMINEQSD,
17338 IX86_BUILTIN_UCOMIEQSD,
17339 IX86_BUILTIN_UCOMILTSD,
17340 IX86_BUILTIN_UCOMILESD,
17341 IX86_BUILTIN_UCOMIGTSD,
17342 IX86_BUILTIN_UCOMIGESD,
17343 IX86_BUILTIN_UCOMINEQSD,
17344
17345 IX86_BUILTIN_MAXPD,
17346 IX86_BUILTIN_MAXSD,
17347 IX86_BUILTIN_MINPD,
17348 IX86_BUILTIN_MINSD,
17349
17350 IX86_BUILTIN_ANDPD,
17351 IX86_BUILTIN_ANDNPD,
17352 IX86_BUILTIN_ORPD,
17353 IX86_BUILTIN_XORPD,
17354
17355 IX86_BUILTIN_SQRTPD,
17356 IX86_BUILTIN_SQRTSD,
17357
17358 IX86_BUILTIN_UNPCKHPD,
17359 IX86_BUILTIN_UNPCKLPD,
17360
17361 IX86_BUILTIN_SHUFPD,
17362
17363 IX86_BUILTIN_LOADUPD,
17364 IX86_BUILTIN_STOREUPD,
17365 IX86_BUILTIN_MOVSD,
17366
17367 IX86_BUILTIN_LOADHPD,
17368 IX86_BUILTIN_LOADLPD,
17369
17370 IX86_BUILTIN_CVTDQ2PD,
17371 IX86_BUILTIN_CVTDQ2PS,
17372
17373 IX86_BUILTIN_CVTPD2DQ,
17374 IX86_BUILTIN_CVTPD2PI,
17375 IX86_BUILTIN_CVTPD2PS,
17376 IX86_BUILTIN_CVTTPD2DQ,
17377 IX86_BUILTIN_CVTTPD2PI,
17378
17379 IX86_BUILTIN_CVTPI2PD,
17380 IX86_BUILTIN_CVTSI2SD,
17381 IX86_BUILTIN_CVTSI642SD,
17382
17383 IX86_BUILTIN_CVTSD2SI,
17384 IX86_BUILTIN_CVTSD2SI64,
17385 IX86_BUILTIN_CVTSD2SS,
17386 IX86_BUILTIN_CVTSS2SD,
17387 IX86_BUILTIN_CVTTSD2SI,
17388 IX86_BUILTIN_CVTTSD2SI64,
17389
17390 IX86_BUILTIN_CVTPS2DQ,
17391 IX86_BUILTIN_CVTPS2PD,
17392 IX86_BUILTIN_CVTTPS2DQ,
17393
17394 IX86_BUILTIN_MOVNTI,
17395 IX86_BUILTIN_MOVNTPD,
17396 IX86_BUILTIN_MOVNTDQ,
17397
17398 /* SSE2 MMX */
17399 IX86_BUILTIN_MASKMOVDQU,
17400 IX86_BUILTIN_MOVMSKPD,
17401 IX86_BUILTIN_PMOVMSKB128,
eb701deb
RH
17402
17403 IX86_BUILTIN_PACKSSWB128,
17404 IX86_BUILTIN_PACKSSDW128,
17405 IX86_BUILTIN_PACKUSWB128,
17406
17407 IX86_BUILTIN_PADDB128,
17408 IX86_BUILTIN_PADDW128,
17409 IX86_BUILTIN_PADDD128,
17410 IX86_BUILTIN_PADDQ128,
17411 IX86_BUILTIN_PADDSB128,
17412 IX86_BUILTIN_PADDSW128,
17413 IX86_BUILTIN_PADDUSB128,
17414 IX86_BUILTIN_PADDUSW128,
17415 IX86_BUILTIN_PSUBB128,
17416 IX86_BUILTIN_PSUBW128,
17417 IX86_BUILTIN_PSUBD128,
17418 IX86_BUILTIN_PSUBQ128,
17419 IX86_BUILTIN_PSUBSB128,
17420 IX86_BUILTIN_PSUBSW128,
17421 IX86_BUILTIN_PSUBUSB128,
17422 IX86_BUILTIN_PSUBUSW128,
17423
17424 IX86_BUILTIN_PAND128,
17425 IX86_BUILTIN_PANDN128,
17426 IX86_BUILTIN_POR128,
17427 IX86_BUILTIN_PXOR128,
17428
17429 IX86_BUILTIN_PAVGB128,
17430 IX86_BUILTIN_PAVGW128,
17431
17432 IX86_BUILTIN_PCMPEQB128,
17433 IX86_BUILTIN_PCMPEQW128,
17434 IX86_BUILTIN_PCMPEQD128,
17435 IX86_BUILTIN_PCMPGTB128,
17436 IX86_BUILTIN_PCMPGTW128,
17437 IX86_BUILTIN_PCMPGTD128,
17438
17439 IX86_BUILTIN_PMADDWD128,
17440
17441 IX86_BUILTIN_PMAXSW128,
17442 IX86_BUILTIN_PMAXUB128,
17443 IX86_BUILTIN_PMINSW128,
17444 IX86_BUILTIN_PMINUB128,
17445
17446 IX86_BUILTIN_PMULUDQ,
17447 IX86_BUILTIN_PMULUDQ128,
17448 IX86_BUILTIN_PMULHUW128,
17449 IX86_BUILTIN_PMULHW128,
17450 IX86_BUILTIN_PMULLW128,
17451
17452 IX86_BUILTIN_PSADBW128,
17453 IX86_BUILTIN_PSHUFHW,
17454 IX86_BUILTIN_PSHUFLW,
17455 IX86_BUILTIN_PSHUFD,
17456
eb701deb
RH
17457 IX86_BUILTIN_PSLLDQI128,
17458 IX86_BUILTIN_PSLLWI128,
17459 IX86_BUILTIN_PSLLDI128,
17460 IX86_BUILTIN_PSLLQI128,
17461 IX86_BUILTIN_PSRAWI128,
17462 IX86_BUILTIN_PSRADI128,
17463 IX86_BUILTIN_PSRLDQI128,
17464 IX86_BUILTIN_PSRLWI128,
17465 IX86_BUILTIN_PSRLDI128,
17466 IX86_BUILTIN_PSRLQI128,
17467
24bfafbc
RH
17468 IX86_BUILTIN_PSLLDQ128,
17469 IX86_BUILTIN_PSLLW128,
17470 IX86_BUILTIN_PSLLD128,
17471 IX86_BUILTIN_PSLLQ128,
17472 IX86_BUILTIN_PSRAW128,
17473 IX86_BUILTIN_PSRAD128,
17474 IX86_BUILTIN_PSRLW128,
17475 IX86_BUILTIN_PSRLD128,
17476 IX86_BUILTIN_PSRLQ128,
17477
eb701deb
RH
17478 IX86_BUILTIN_PUNPCKHBW128,
17479 IX86_BUILTIN_PUNPCKHWD128,
17480 IX86_BUILTIN_PUNPCKHDQ128,
17481 IX86_BUILTIN_PUNPCKHQDQ128,
17482 IX86_BUILTIN_PUNPCKLBW128,
17483 IX86_BUILTIN_PUNPCKLWD128,
17484 IX86_BUILTIN_PUNPCKLDQ128,
17485 IX86_BUILTIN_PUNPCKLQDQ128,
17486
17487 IX86_BUILTIN_CLFLUSH,
17488 IX86_BUILTIN_MFENCE,
17489 IX86_BUILTIN_LFENCE,
17490
17491 /* Prescott New Instructions. */
17492 IX86_BUILTIN_ADDSUBPS,
17493 IX86_BUILTIN_HADDPS,
17494 IX86_BUILTIN_HSUBPS,
17495 IX86_BUILTIN_MOVSHDUP,
17496 IX86_BUILTIN_MOVSLDUP,
17497 IX86_BUILTIN_ADDSUBPD,
17498 IX86_BUILTIN_HADDPD,
17499 IX86_BUILTIN_HSUBPD,
17500 IX86_BUILTIN_LDDQU,
17501
17502 IX86_BUILTIN_MONITOR,
17503 IX86_BUILTIN_MWAIT,
17504
b1875f52
L
17505 /* SSSE3. */
17506 IX86_BUILTIN_PHADDW,
17507 IX86_BUILTIN_PHADDD,
17508 IX86_BUILTIN_PHADDSW,
17509 IX86_BUILTIN_PHSUBW,
17510 IX86_BUILTIN_PHSUBD,
17511 IX86_BUILTIN_PHSUBSW,
17512 IX86_BUILTIN_PMADDUBSW,
17513 IX86_BUILTIN_PMULHRSW,
17514 IX86_BUILTIN_PSHUFB,
17515 IX86_BUILTIN_PSIGNB,
17516 IX86_BUILTIN_PSIGNW,
17517 IX86_BUILTIN_PSIGND,
17518 IX86_BUILTIN_PALIGNR,
17519 IX86_BUILTIN_PABSB,
17520 IX86_BUILTIN_PABSW,
17521 IX86_BUILTIN_PABSD,
17522
17523 IX86_BUILTIN_PHADDW128,
17524 IX86_BUILTIN_PHADDD128,
17525 IX86_BUILTIN_PHADDSW128,
17526 IX86_BUILTIN_PHSUBW128,
17527 IX86_BUILTIN_PHSUBD128,
17528 IX86_BUILTIN_PHSUBSW128,
17529 IX86_BUILTIN_PMADDUBSW128,
17530 IX86_BUILTIN_PMULHRSW128,
17531 IX86_BUILTIN_PSHUFB128,
17532 IX86_BUILTIN_PSIGNB128,
17533 IX86_BUILTIN_PSIGNW128,
17534 IX86_BUILTIN_PSIGND128,
17535 IX86_BUILTIN_PALIGNR128,
17536 IX86_BUILTIN_PABSB128,
17537 IX86_BUILTIN_PABSW128,
17538 IX86_BUILTIN_PABSD128,
17539
21efb4d4
HJ
17540 /* AMDFAM10 - SSE4A New Instructions. */
17541 IX86_BUILTIN_MOVNTSD,
17542 IX86_BUILTIN_MOVNTSS,
17543 IX86_BUILTIN_EXTRQI,
17544 IX86_BUILTIN_EXTRQ,
17545 IX86_BUILTIN_INSERTQI,
17546 IX86_BUILTIN_INSERTQ,
17547
9a5cee02
L
17548 /* SSE4.1. */
17549 IX86_BUILTIN_BLENDPD,
17550 IX86_BUILTIN_BLENDPS,
17551 IX86_BUILTIN_BLENDVPD,
17552 IX86_BUILTIN_BLENDVPS,
17553 IX86_BUILTIN_PBLENDVB128,
17554 IX86_BUILTIN_PBLENDW128,
17555
17556 IX86_BUILTIN_DPPD,
17557 IX86_BUILTIN_DPPS,
17558
17559 IX86_BUILTIN_INSERTPS128,
17560
17561 IX86_BUILTIN_MOVNTDQA,
17562 IX86_BUILTIN_MPSADBW128,
17563 IX86_BUILTIN_PACKUSDW128,
17564 IX86_BUILTIN_PCMPEQQ,
17565 IX86_BUILTIN_PHMINPOSUW128,
17566
17567 IX86_BUILTIN_PMAXSB128,
17568 IX86_BUILTIN_PMAXSD128,
17569 IX86_BUILTIN_PMAXUD128,
17570 IX86_BUILTIN_PMAXUW128,
17571
17572 IX86_BUILTIN_PMINSB128,
17573 IX86_BUILTIN_PMINSD128,
17574 IX86_BUILTIN_PMINUD128,
17575 IX86_BUILTIN_PMINUW128,
17576
17577 IX86_BUILTIN_PMOVSXBW128,
17578 IX86_BUILTIN_PMOVSXBD128,
17579 IX86_BUILTIN_PMOVSXBQ128,
17580 IX86_BUILTIN_PMOVSXWD128,
17581 IX86_BUILTIN_PMOVSXWQ128,
17582 IX86_BUILTIN_PMOVSXDQ128,
17583
17584 IX86_BUILTIN_PMOVZXBW128,
17585 IX86_BUILTIN_PMOVZXBD128,
17586 IX86_BUILTIN_PMOVZXBQ128,
17587 IX86_BUILTIN_PMOVZXWD128,
17588 IX86_BUILTIN_PMOVZXWQ128,
17589 IX86_BUILTIN_PMOVZXDQ128,
17590
17591 IX86_BUILTIN_PMULDQ128,
17592 IX86_BUILTIN_PMULLD128,
17593
17594 IX86_BUILTIN_ROUNDPD,
17595 IX86_BUILTIN_ROUNDPS,
17596 IX86_BUILTIN_ROUNDSD,
17597 IX86_BUILTIN_ROUNDSS,
17598
17599 IX86_BUILTIN_PTESTZ,
17600 IX86_BUILTIN_PTESTC,
17601 IX86_BUILTIN_PTESTNZC,
17602
eb701deb
RH
17603 IX86_BUILTIN_VEC_INIT_V2SI,
17604 IX86_BUILTIN_VEC_INIT_V4HI,
17605 IX86_BUILTIN_VEC_INIT_V8QI,
17606 IX86_BUILTIN_VEC_EXT_V2DF,
17607 IX86_BUILTIN_VEC_EXT_V2DI,
17608 IX86_BUILTIN_VEC_EXT_V4SF,
ed9b5396 17609 IX86_BUILTIN_VEC_EXT_V4SI,
eb701deb 17610 IX86_BUILTIN_VEC_EXT_V8HI,
0f2698d0 17611 IX86_BUILTIN_VEC_EXT_V2SI,
eb701deb 17612 IX86_BUILTIN_VEC_EXT_V4HI,
9a5cee02
L
17613 IX86_BUILTIN_VEC_EXT_V16QI,
17614 IX86_BUILTIN_VEC_SET_V2DI,
17615 IX86_BUILTIN_VEC_SET_V4SF,
17616 IX86_BUILTIN_VEC_SET_V4SI,
eb701deb
RH
17617 IX86_BUILTIN_VEC_SET_V8HI,
17618 IX86_BUILTIN_VEC_SET_V4HI,
9a5cee02 17619 IX86_BUILTIN_VEC_SET_V16QI,
eb701deb 17620
b40c4f68
UB
17621 IX86_BUILTIN_VEC_PACK_SFIX,
17622
3b8dd071
L
17623 /* SSE4.2. */
17624 IX86_BUILTIN_CRC32QI,
17625 IX86_BUILTIN_CRC32HI,
17626 IX86_BUILTIN_CRC32SI,
17627 IX86_BUILTIN_CRC32DI,
17628
06f4e35d
L
17629 IX86_BUILTIN_PCMPESTRI128,
17630 IX86_BUILTIN_PCMPESTRM128,
17631 IX86_BUILTIN_PCMPESTRA128,
17632 IX86_BUILTIN_PCMPESTRC128,
17633 IX86_BUILTIN_PCMPESTRO128,
17634 IX86_BUILTIN_PCMPESTRS128,
17635 IX86_BUILTIN_PCMPESTRZ128,
17636 IX86_BUILTIN_PCMPISTRI128,
17637 IX86_BUILTIN_PCMPISTRM128,
17638 IX86_BUILTIN_PCMPISTRA128,
17639 IX86_BUILTIN_PCMPISTRC128,
17640 IX86_BUILTIN_PCMPISTRO128,
17641 IX86_BUILTIN_PCMPISTRS128,
17642 IX86_BUILTIN_PCMPISTRZ128,
17643
3b8dd071
L
17644 IX86_BUILTIN_PCMPGTQ,
17645
8b96a312
L
17646 /* AES instructions */
17647 IX86_BUILTIN_AESENC128,
17648 IX86_BUILTIN_AESENCLAST128,
17649 IX86_BUILTIN_AESDEC128,
17650 IX86_BUILTIN_AESDECLAST128,
17651 IX86_BUILTIN_AESIMC128,
17652 IX86_BUILTIN_AESKEYGENASSIST128,
17653
17654 /* PCLMUL instruction */
17655 IX86_BUILTIN_PCLMULQDQ128,
17656
edc5bbcd
UB
17657 /* TFmode support builtins. */
17658 IX86_BUILTIN_INFQ,
17659 IX86_BUILTIN_FABSQ,
17660 IX86_BUILTIN_COPYSIGNQ,
17661
04e1d06b
MM
17662 /* SSE5 instructions */
17663 IX86_BUILTIN_FMADDSS,
17664 IX86_BUILTIN_FMADDSD,
17665 IX86_BUILTIN_FMADDPS,
17666 IX86_BUILTIN_FMADDPD,
17667 IX86_BUILTIN_FMSUBSS,
17668 IX86_BUILTIN_FMSUBSD,
17669 IX86_BUILTIN_FMSUBPS,
17670 IX86_BUILTIN_FMSUBPD,
17671 IX86_BUILTIN_FNMADDSS,
17672 IX86_BUILTIN_FNMADDSD,
17673 IX86_BUILTIN_FNMADDPS,
17674 IX86_BUILTIN_FNMADDPD,
17675 IX86_BUILTIN_FNMSUBSS,
17676 IX86_BUILTIN_FNMSUBSD,
17677 IX86_BUILTIN_FNMSUBPS,
17678 IX86_BUILTIN_FNMSUBPD,
17679 IX86_BUILTIN_PCMOV_V2DI,
17680 IX86_BUILTIN_PCMOV_V4SI,
17681 IX86_BUILTIN_PCMOV_V8HI,
17682 IX86_BUILTIN_PCMOV_V16QI,
17683 IX86_BUILTIN_PCMOV_V4SF,
17684 IX86_BUILTIN_PCMOV_V2DF,
17685 IX86_BUILTIN_PPERM,
17686 IX86_BUILTIN_PERMPS,
17687 IX86_BUILTIN_PERMPD,
17688 IX86_BUILTIN_PMACSSWW,
17689 IX86_BUILTIN_PMACSWW,
17690 IX86_BUILTIN_PMACSSWD,
17691 IX86_BUILTIN_PMACSWD,
17692 IX86_BUILTIN_PMACSSDD,
17693 IX86_BUILTIN_PMACSDD,
17694 IX86_BUILTIN_PMACSSDQL,
17695 IX86_BUILTIN_PMACSSDQH,
17696 IX86_BUILTIN_PMACSDQL,
17697 IX86_BUILTIN_PMACSDQH,
17698 IX86_BUILTIN_PMADCSSWD,
17699 IX86_BUILTIN_PMADCSWD,
17700 IX86_BUILTIN_PHADDBW,
17701 IX86_BUILTIN_PHADDBD,
17702 IX86_BUILTIN_PHADDBQ,
17703 IX86_BUILTIN_PHADDWD,
17704 IX86_BUILTIN_PHADDWQ,
17705 IX86_BUILTIN_PHADDDQ,
17706 IX86_BUILTIN_PHADDUBW,
17707 IX86_BUILTIN_PHADDUBD,
17708 IX86_BUILTIN_PHADDUBQ,
17709 IX86_BUILTIN_PHADDUWD,
17710 IX86_BUILTIN_PHADDUWQ,
17711 IX86_BUILTIN_PHADDUDQ,
17712 IX86_BUILTIN_PHSUBBW,
17713 IX86_BUILTIN_PHSUBWD,
17714 IX86_BUILTIN_PHSUBDQ,
17715 IX86_BUILTIN_PROTB,
17716 IX86_BUILTIN_PROTW,
17717 IX86_BUILTIN_PROTD,
17718 IX86_BUILTIN_PROTQ,
17719 IX86_BUILTIN_PROTB_IMM,
17720 IX86_BUILTIN_PROTW_IMM,
17721 IX86_BUILTIN_PROTD_IMM,
17722 IX86_BUILTIN_PROTQ_IMM,
17723 IX86_BUILTIN_PSHLB,
17724 IX86_BUILTIN_PSHLW,
17725 IX86_BUILTIN_PSHLD,
17726 IX86_BUILTIN_PSHLQ,
17727 IX86_BUILTIN_PSHAB,
17728 IX86_BUILTIN_PSHAW,
17729 IX86_BUILTIN_PSHAD,
17730 IX86_BUILTIN_PSHAQ,
17731 IX86_BUILTIN_FRCZSS,
17732 IX86_BUILTIN_FRCZSD,
17733 IX86_BUILTIN_FRCZPS,
17734 IX86_BUILTIN_FRCZPD,
17735 IX86_BUILTIN_CVTPH2PS,
17736 IX86_BUILTIN_CVTPS2PH,
17737
17738 IX86_BUILTIN_COMEQSS,
17739 IX86_BUILTIN_COMNESS,
17740 IX86_BUILTIN_COMLTSS,
17741 IX86_BUILTIN_COMLESS,
17742 IX86_BUILTIN_COMGTSS,
17743 IX86_BUILTIN_COMGESS,
17744 IX86_BUILTIN_COMUEQSS,
17745 IX86_BUILTIN_COMUNESS,
17746 IX86_BUILTIN_COMULTSS,
17747 IX86_BUILTIN_COMULESS,
17748 IX86_BUILTIN_COMUGTSS,
17749 IX86_BUILTIN_COMUGESS,
17750 IX86_BUILTIN_COMORDSS,
17751 IX86_BUILTIN_COMUNORDSS,
17752 IX86_BUILTIN_COMFALSESS,
17753 IX86_BUILTIN_COMTRUESS,
17754
17755 IX86_BUILTIN_COMEQSD,
17756 IX86_BUILTIN_COMNESD,
17757 IX86_BUILTIN_COMLTSD,
17758 IX86_BUILTIN_COMLESD,
17759 IX86_BUILTIN_COMGTSD,
17760 IX86_BUILTIN_COMGESD,
17761 IX86_BUILTIN_COMUEQSD,
17762 IX86_BUILTIN_COMUNESD,
17763 IX86_BUILTIN_COMULTSD,
17764 IX86_BUILTIN_COMULESD,
17765 IX86_BUILTIN_COMUGTSD,
17766 IX86_BUILTIN_COMUGESD,
17767 IX86_BUILTIN_COMORDSD,
17768 IX86_BUILTIN_COMUNORDSD,
17769 IX86_BUILTIN_COMFALSESD,
17770 IX86_BUILTIN_COMTRUESD,
17771
17772 IX86_BUILTIN_COMEQPS,
17773 IX86_BUILTIN_COMNEPS,
17774 IX86_BUILTIN_COMLTPS,
17775 IX86_BUILTIN_COMLEPS,
17776 IX86_BUILTIN_COMGTPS,
17777 IX86_BUILTIN_COMGEPS,
17778 IX86_BUILTIN_COMUEQPS,
17779 IX86_BUILTIN_COMUNEPS,
17780 IX86_BUILTIN_COMULTPS,
17781 IX86_BUILTIN_COMULEPS,
17782 IX86_BUILTIN_COMUGTPS,
17783 IX86_BUILTIN_COMUGEPS,
17784 IX86_BUILTIN_COMORDPS,
17785 IX86_BUILTIN_COMUNORDPS,
17786 IX86_BUILTIN_COMFALSEPS,
17787 IX86_BUILTIN_COMTRUEPS,
17788
17789 IX86_BUILTIN_COMEQPD,
17790 IX86_BUILTIN_COMNEPD,
17791 IX86_BUILTIN_COMLTPD,
17792 IX86_BUILTIN_COMLEPD,
17793 IX86_BUILTIN_COMGTPD,
17794 IX86_BUILTIN_COMGEPD,
17795 IX86_BUILTIN_COMUEQPD,
17796 IX86_BUILTIN_COMUNEPD,
17797 IX86_BUILTIN_COMULTPD,
17798 IX86_BUILTIN_COMULEPD,
17799 IX86_BUILTIN_COMUGTPD,
17800 IX86_BUILTIN_COMUGEPD,
17801 IX86_BUILTIN_COMORDPD,
17802 IX86_BUILTIN_COMUNORDPD,
17803 IX86_BUILTIN_COMFALSEPD,
17804 IX86_BUILTIN_COMTRUEPD,
17805
17806 IX86_BUILTIN_PCOMEQUB,
17807 IX86_BUILTIN_PCOMNEUB,
17808 IX86_BUILTIN_PCOMLTUB,
17809 IX86_BUILTIN_PCOMLEUB,
17810 IX86_BUILTIN_PCOMGTUB,
17811 IX86_BUILTIN_PCOMGEUB,
17812 IX86_BUILTIN_PCOMFALSEUB,
17813 IX86_BUILTIN_PCOMTRUEUB,
17814 IX86_BUILTIN_PCOMEQUW,
17815 IX86_BUILTIN_PCOMNEUW,
17816 IX86_BUILTIN_PCOMLTUW,
17817 IX86_BUILTIN_PCOMLEUW,
17818 IX86_BUILTIN_PCOMGTUW,
17819 IX86_BUILTIN_PCOMGEUW,
17820 IX86_BUILTIN_PCOMFALSEUW,
17821 IX86_BUILTIN_PCOMTRUEUW,
17822 IX86_BUILTIN_PCOMEQUD,
17823 IX86_BUILTIN_PCOMNEUD,
17824 IX86_BUILTIN_PCOMLTUD,
17825 IX86_BUILTIN_PCOMLEUD,
17826 IX86_BUILTIN_PCOMGTUD,
17827 IX86_BUILTIN_PCOMGEUD,
17828 IX86_BUILTIN_PCOMFALSEUD,
17829 IX86_BUILTIN_PCOMTRUEUD,
17830 IX86_BUILTIN_PCOMEQUQ,
17831 IX86_BUILTIN_PCOMNEUQ,
17832 IX86_BUILTIN_PCOMLTUQ,
17833 IX86_BUILTIN_PCOMLEUQ,
17834 IX86_BUILTIN_PCOMGTUQ,
17835 IX86_BUILTIN_PCOMGEUQ,
17836 IX86_BUILTIN_PCOMFALSEUQ,
17837 IX86_BUILTIN_PCOMTRUEUQ,
17838
17839 IX86_BUILTIN_PCOMEQB,
17840 IX86_BUILTIN_PCOMNEB,
17841 IX86_BUILTIN_PCOMLTB,
17842 IX86_BUILTIN_PCOMLEB,
17843 IX86_BUILTIN_PCOMGTB,
17844 IX86_BUILTIN_PCOMGEB,
17845 IX86_BUILTIN_PCOMFALSEB,
17846 IX86_BUILTIN_PCOMTRUEB,
17847 IX86_BUILTIN_PCOMEQW,
17848 IX86_BUILTIN_PCOMNEW,
17849 IX86_BUILTIN_PCOMLTW,
17850 IX86_BUILTIN_PCOMLEW,
17851 IX86_BUILTIN_PCOMGTW,
17852 IX86_BUILTIN_PCOMGEW,
17853 IX86_BUILTIN_PCOMFALSEW,
17854 IX86_BUILTIN_PCOMTRUEW,
17855 IX86_BUILTIN_PCOMEQD,
17856 IX86_BUILTIN_PCOMNED,
17857 IX86_BUILTIN_PCOMLTD,
17858 IX86_BUILTIN_PCOMLED,
17859 IX86_BUILTIN_PCOMGTD,
17860 IX86_BUILTIN_PCOMGED,
17861 IX86_BUILTIN_PCOMFALSED,
17862 IX86_BUILTIN_PCOMTRUED,
17863 IX86_BUILTIN_PCOMEQQ,
17864 IX86_BUILTIN_PCOMNEQ,
17865 IX86_BUILTIN_PCOMLTQ,
17866 IX86_BUILTIN_PCOMLEQ,
17867 IX86_BUILTIN_PCOMGTQ,
17868 IX86_BUILTIN_PCOMGEQ,
17869 IX86_BUILTIN_PCOMFALSEQ,
17870 IX86_BUILTIN_PCOMTRUEQ,
17871
eb701deb
RH
17872 IX86_BUILTIN_MAX
17873};
17874
4f7d8547
RG
17875/* Table for the ix86 builtin decls. */
17876static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17877
110abdbc 17878/* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
4f7d8547
RG
17879 * if the target_flags include one of MASK. Stores the function decl
17880 * in the ix86_builtins array.
17881 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17882
17883static inline tree
17884def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17885{
17886 tree decl = NULL_TREE;
17887
0a1c5e55 17888 if (mask & ix86_isa_flags
853a33f3 17889 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
4f7d8547
RG
17890 {
17891 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17892 NULL, NULL_TREE);
17893 ix86_builtins[(int) code] = decl;
17894 }
17895
17896 return decl;
17897}
17898
17899/* Like def_builtin, but also marks the function decl "const". */
17900
17901static inline tree
17902def_builtin_const (int mask, const char *name, tree type,
17903 enum ix86_builtins code)
17904{
17905 tree decl = def_builtin (mask, name, type, code);
17906 if (decl)
17907 TREE_READONLY (decl) = 1;
17908 return decl;
17909}
bd793c65 17910
e358acde
RH
17911/* Bits for builtin_description.flag. */
17912
17913/* Set when we don't support the comparison natively, and should
17914 swap_comparison in order to support it. */
17915#define BUILTIN_DESC_SWAP_OPERANDS 1
17916
bd793c65
BS
17917struct builtin_description
17918{
8b60264b
KG
17919 const unsigned int mask;
17920 const enum insn_code icode;
17921 const char *const name;
17922 const enum ix86_builtins code;
17923 const enum rtx_code comparison;
06f4e35d 17924 const int flag;
bd793c65
BS
17925};
17926
8b60264b 17927static const struct builtin_description bdesc_comi[] =
bd793c65 17928{
853a33f3
UB
17929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
17953};
17954
9a5cee02
L
17955static const struct builtin_description bdesc_ptest[] =
17956{
17957 /* SSE4.1 */
04e1d06b
MM
17958 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17959 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17960 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
9a5cee02
L
17961};
17962
06f4e35d
L
17963static const struct builtin_description bdesc_pcmpestr[] =
17964{
17965 /* SSE4.2 */
9415ab7d
TN
17966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
06f4e35d
L
17973};
17974
17975static const struct builtin_description bdesc_pcmpistr[] =
17976{
17977 /* SSE4.2 */
9415ab7d
TN
17978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
06f4e35d
L
17985};
17986
3b8dd071
L
17987static const struct builtin_description bdesc_crc32[] =
17988{
17989 /* SSE4.2 */
9415ab7d
TN
17990 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17992 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17993 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
3b8dd071
L
17994};
17995
ec2e9a15
L
17996/* SSE */
17997enum sse_builtin_type
17998{
17999 SSE_CTYPE_UNKNOWN,
18000 V4SF_FTYPE_V4SF_INT,
18001 V2DI_FTYPE_V2DI_INT,
18002 V2DF_FTYPE_V2DF_INT,
18003 V16QI_FTYPE_V16QI_V16QI_V16QI,
18004 V4SF_FTYPE_V4SF_V4SF_V4SF,
18005 V2DF_FTYPE_V2DF_V2DF_V2DF,
18006 V16QI_FTYPE_V16QI_V16QI_INT,
18007 V8HI_FTYPE_V8HI_V8HI_INT,
18008 V4SI_FTYPE_V4SI_V4SI_INT,
18009 V4SF_FTYPE_V4SF_V4SF_INT,
18010 V2DI_FTYPE_V2DI_V2DI_INT,
18011 V2DF_FTYPE_V2DF_V2DF_INT
18012};
18013
18014/* SSE builtins with variable number of arguments. */
18015static const struct builtin_description bdesc_sse_args[] =
9a5cee02 18016{
abe08645 18017 /* SSE */
ec2e9a15 18018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
abe08645
L
18019
18020 /* SSE2 */
ec2e9a15 18021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
abe08645 18022
9a5cee02 18023 /* SSE4.1 */
ec2e9a15
L
18024 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18025 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18026 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18027 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18028 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18029 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18030 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18031 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18032 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18033 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18034
18035 /* SSE4.1 and SSE5 */
18036 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18037 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18038 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18039 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18040
18041 /* AES */
18042 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
8b96a312
L
18043
18044 /* PCLMUL */
ec2e9a15 18045 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
9a5cee02
L
18046};
18047
8b60264b 18048static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
18049{
18050 /* SSE */
9415ab7d
TN
18051 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
18052 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
18053 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
3dc0f23a 18054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
9415ab7d
TN
18055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
18056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
18057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
18058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
853a33f3
UB
18059
18060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
18061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
18062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
18063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
18064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
18065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
18066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
18067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
18068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
18069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
18072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
18073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
18074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
18075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
18076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
18077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
18078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
18079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
18082
9415ab7d
TN
18083 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
18084 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
853a33f3 18087
9415ab7d
TN
18088 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
18090 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
853a33f3 18092
9415ab7d
TN
18093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
bd793c65
BS
18098
18099 /* MMX */
9415ab7d
TN
18100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
18101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
18102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
10a97ae6 18103 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
9415ab7d
TN
18104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
10a97ae6 18107 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
9415ab7d
TN
18108
18109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
18112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
18114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
18115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
18116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
18117
18118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
18121
18122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
18123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
18126
18127 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
18128 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
18129
18130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
18136
18137 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
18138 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
18139 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
18140 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
18141
18142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
18143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
18146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
18147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
bd793c65
BS
18148
18149 /* Special. */
9415ab7d
TN
18150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
18151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
18152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
18153
18154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
18155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
18156 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
18157
9415ab7d
TN
18158 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
18159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
fbe5eb6d
BS
18160
18161 /* SSE2 */
9415ab7d
TN
18162 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
18163 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
18164 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
18165 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
18166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
18167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
18168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
18169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
853a33f3
UB
18170
18171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
18172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
18173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
18174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
18175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
18176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
18177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
18178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
18179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
18180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18191
9415ab7d
TN
18192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
853a33f3 18196
9415ab7d
TN
18197 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18199 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18200 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
853a33f3 18201
9415ab7d
TN
18202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
fbe5eb6d 18205
b40c4f68
UB
18206 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18207
fbe5eb6d 18208 /* SSE2 MMX */
9415ab7d
TN
18209 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18210 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18211 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18212 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18213 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18214 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18215 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18216 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18217
18218 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18219 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18220 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18221 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18222 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18223 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18224 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18225 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18226
18227 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18228 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18229
18230 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18232 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18233 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18234
18235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18237
18238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18244
18245 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18246 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18247 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18249
18250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18258
18259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18262
18263 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18265
ab555a5b 18266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
9415ab7d
TN
18267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18268
9415ab7d
TN
18269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18270
18271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18272 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
22c7c85e 18275
9e200aaf 18276 /* SSE3 MMX */
9415ab7d
TN
18277 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18278 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18279 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18280 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18281 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18282 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
b1875f52
L
18283
18284 /* SSSE3 */
9415ab7d
TN
18285 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18286 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18287 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18288 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18289 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18290 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18291 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18292 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18293 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18294 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18295 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18296 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18303 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
9a5cee02
L
18309
18310 /* SSE4.1 */
9415ab7d
TN
18311 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18312 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18313 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18314 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18315 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18316 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18317 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18318 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18319 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18320 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18321 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18322 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
3b8dd071
L
18323
18324 /* SSE4.2 */
9415ab7d 18325 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
8b96a312
L
18326
18327 /* AES */
18328 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 },
18329 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 },
18330 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 },
18331 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 },
bd793c65
BS
18332};
18333
8b60264b 18334static const struct builtin_description bdesc_1arg[] =
bd793c65 18335{
10a97ae6 18336 /* SSE */
9415ab7d
TN
18337 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
fbe5eb6d 18339
3dc0f23a
UB
18340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18341 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
9415ab7d 18342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
3dc0f23a 18343 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
9415ab7d 18344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
fbe5eb6d 18345
9415ab7d
TN
18346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18348 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18351 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
fbe5eb6d 18352
10a97ae6 18353 /* SSE2 */
9415ab7d
TN
18354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
fbe5eb6d 18356
9415ab7d 18357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
fbe5eb6d 18358
9415ab7d
TN
18359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
bd793c65 18361
9415ab7d
TN
18362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
bd793c65 18367
9415ab7d 18368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
fbe5eb6d 18369
9415ab7d
TN
18370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18372 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18373 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
f02e1358 18374
9415ab7d
TN
18375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
22c7c85e 18378
9e200aaf 18379 /* SSE3 */
9415ab7d
TN
18380 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18381 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
b1875f52
L
18382
18383 /* SSSE3 */
9415ab7d
TN
18384 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18385 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18386 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18389 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
9a5cee02
L
18390
18391 /* SSE4.1 */
9415ab7d
TN
18392 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18393 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18394 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18396 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18397 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18398 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18399 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18400 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18402 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18403 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18404 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
853a33f3 18405
8b96a312
L
18406 /* AES */
18407 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 },
bd793c65
BS
18408};
18409
04e1d06b
MM
18410/* SSE5 */
18411enum multi_arg_type {
18412 MULTI_ARG_UNKNOWN,
18413 MULTI_ARG_3_SF,
18414 MULTI_ARG_3_DF,
18415 MULTI_ARG_3_DI,
18416 MULTI_ARG_3_SI,
18417 MULTI_ARG_3_SI_DI,
18418 MULTI_ARG_3_HI,
18419 MULTI_ARG_3_HI_SI,
18420 MULTI_ARG_3_QI,
18421 MULTI_ARG_3_PERMPS,
18422 MULTI_ARG_3_PERMPD,
18423 MULTI_ARG_2_SF,
18424 MULTI_ARG_2_DF,
18425 MULTI_ARG_2_DI,
18426 MULTI_ARG_2_SI,
18427 MULTI_ARG_2_HI,
18428 MULTI_ARG_2_QI,
18429 MULTI_ARG_2_DI_IMM,
18430 MULTI_ARG_2_SI_IMM,
18431 MULTI_ARG_2_HI_IMM,
18432 MULTI_ARG_2_QI_IMM,
18433 MULTI_ARG_2_SF_CMP,
18434 MULTI_ARG_2_DF_CMP,
18435 MULTI_ARG_2_DI_CMP,
18436 MULTI_ARG_2_SI_CMP,
18437 MULTI_ARG_2_HI_CMP,
18438 MULTI_ARG_2_QI_CMP,
18439 MULTI_ARG_2_DI_TF,
18440 MULTI_ARG_2_SI_TF,
18441 MULTI_ARG_2_HI_TF,
18442 MULTI_ARG_2_QI_TF,
18443 MULTI_ARG_2_SF_TF,
18444 MULTI_ARG_2_DF_TF,
18445 MULTI_ARG_1_SF,
18446 MULTI_ARG_1_DF,
18447 MULTI_ARG_1_DI,
18448 MULTI_ARG_1_SI,
18449 MULTI_ARG_1_HI,
18450 MULTI_ARG_1_QI,
18451 MULTI_ARG_1_SI_DI,
18452 MULTI_ARG_1_HI_DI,
18453 MULTI_ARG_1_HI_SI,
18454 MULTI_ARG_1_QI_DI,
18455 MULTI_ARG_1_QI_SI,
18456 MULTI_ARG_1_QI_HI,
18457 MULTI_ARG_1_PH2PS,
18458 MULTI_ARG_1_PS2PH
18459};
18460
18461static const struct builtin_description bdesc_multi_arg[] =
18462{
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18505 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18506 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18507 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18508 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18526 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18531 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18534 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18538
18539 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18543 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18547 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18555
18556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18561 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18563 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18571 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18572
18573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18577 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18579 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18585 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18589
18590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18593 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18595 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18597 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18601 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18606
18607 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18609 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18612 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18613 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18614
18615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18616 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18617 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18618 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18621 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18622
18623 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18624 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18625 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18626 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18627 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18628 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18629 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18630
18631 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18632 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18633 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18634 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18635 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18636 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18637 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18638
18639 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18640 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18641 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18642 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18643 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18644 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18645 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18646
18647 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18648 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18649 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18650 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18651 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18652 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18653 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18654
18655 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18656 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18657 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18658 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18659 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18660 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18661 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18662
18663 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18664 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18665 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18666 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18667 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18668 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18669 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18670
18671 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18672 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18673 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18674 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18675 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18676 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18677 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18678 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18679
18680 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18681 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18682 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18683 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18684 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18685 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18686 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18687 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18688
18689 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18690 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18691 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18692 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18693 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18694 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18695 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18696 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18697};
18698
f6155fda 18699/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
18700 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18701 builtins. */
e37af218 18702static void
b96a374d 18703ix86_init_mmx_sse_builtins (void)
bd793c65 18704{
8b60264b 18705 const struct builtin_description * d;
77ebd435 18706 size_t i;
bd793c65 18707
00c8e9f6 18708 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
4a5eab38 18709 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
10a97ae6
UB
18710 tree V1DI_type_node
18711 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
4a5eab38 18712 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
a16da3ae
RH
18713 tree V2DI_type_node
18714 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
4a5eab38
PB
18715 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18716 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18717 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18718 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
00c8e9f6 18719 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
4a5eab38
PB
18720 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18721
bd793c65 18722 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
18723 tree pcchar_type_node = build_pointer_type (
18724 build_type_variant (char_type_node, 1, 0));
bd793c65 18725 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
18726 tree pcfloat_type_node = build_pointer_type (
18727 build_type_variant (float_type_node, 1, 0));
bd793c65 18728 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 18729 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
18730 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18731
18732 /* Comparisons. */
18733 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
18734 = build_function_type_list (integer_type_node,
18735 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 18736 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
18737 = build_function_type_list (V4SI_type_node,
18738 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 18739 /* MMX/SSE/integer conversions. */
bd793c65 18740 tree int_ftype_v4sf
b4de2f7d
AH
18741 = build_function_type_list (integer_type_node,
18742 V4SF_type_node, NULL_TREE);
453ee231
JH
18743 tree int64_ftype_v4sf
18744 = build_function_type_list (long_long_integer_type_node,
18745 V4SF_type_node, NULL_TREE);
bd793c65 18746 tree int_ftype_v8qi
b4de2f7d 18747 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 18748 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
18749 = build_function_type_list (V4SF_type_node,
18750 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
18751 tree v4sf_ftype_v4sf_int64
18752 = build_function_type_list (V4SF_type_node,
18753 V4SF_type_node, long_long_integer_type_node,
18754 NULL_TREE);
bd793c65 18755 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
18756 = build_function_type_list (V4SF_type_node,
18757 V4SF_type_node, V2SI_type_node, NULL_TREE);
eb701deb 18758
bd793c65
BS
18759 /* Miscellaneous. */
18760 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
18761 = build_function_type_list (V8QI_type_node,
18762 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 18763 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
18764 = build_function_type_list (V4HI_type_node,
18765 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 18766 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
18767 = build_function_type_list (V4SF_type_node,
18768 V4SF_type_node, V4SF_type_node,
18769 integer_type_node, NULL_TREE);
bd793c65 18770 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
18771 = build_function_type_list (V2SI_type_node,
18772 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 18773 tree v4hi_ftype_v4hi_int
b4de2f7d 18774 = build_function_type_list (V4HI_type_node,
e7a60f56 18775 V4HI_type_node, integer_type_node, NULL_TREE);
10a97ae6 18776 tree v2si_ftype_v2si_int
b4de2f7d 18777 = build_function_type_list (V2SI_type_node,
10a97ae6
UB
18778 V2SI_type_node, integer_type_node, NULL_TREE);
18779 tree v1di_ftype_v1di_int
18780 = build_function_type_list (V1DI_type_node,
18781 V1DI_type_node, integer_type_node, NULL_TREE);
18782
bd793c65 18783 tree void_ftype_void
b4de2f7d 18784 = build_function_type (void_type_node, void_list_node);
bd793c65 18785 tree void_ftype_unsigned
b4de2f7d 18786 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
18787 tree void_ftype_unsigned_unsigned
18788 = build_function_type_list (void_type_node, unsigned_type_node,
18789 unsigned_type_node, NULL_TREE);
18790 tree void_ftype_pcvoid_unsigned_unsigned
18791 = build_function_type_list (void_type_node, const_ptr_type_node,
18792 unsigned_type_node, unsigned_type_node,
18793 NULL_TREE);
bd793c65 18794 tree unsigned_ftype_void
b4de2f7d 18795 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 18796 tree v2si_ftype_v4sf
b4de2f7d 18797 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 18798 /* Loads/stores. */
bd793c65 18799 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
18800 = build_function_type_list (void_type_node,
18801 V8QI_type_node, V8QI_type_node,
18802 pchar_type_node, NULL_TREE);
068f5dea
JH
18803 tree v4sf_ftype_pcfloat
18804 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
18805 /* @@@ the type is bogus */
18806 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 18807 = build_function_type_list (V4SF_type_node,
f8ca7923 18808 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 18809 tree void_ftype_pv2si_v4sf
b4de2f7d 18810 = build_function_type_list (void_type_node,
f8ca7923 18811 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 18812 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
18813 = build_function_type_list (void_type_node,
18814 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 18815 tree void_ftype_pdi_di
b4de2f7d
AH
18816 = build_function_type_list (void_type_node,
18817 pdi_type_node, long_long_unsigned_type_node,
18818 NULL_TREE);
916b60b7 18819 tree void_ftype_pv2di_v2di
b4de2f7d
AH
18820 = build_function_type_list (void_type_node,
18821 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
18822 /* Normal vector unops. */
18823 tree v4sf_ftype_v4sf
b4de2f7d 18824 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
b1875f52
L
18825 tree v16qi_ftype_v16qi
18826 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18827 tree v8hi_ftype_v8hi
18828 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18829 tree v4si_ftype_v4si
18830 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18831 tree v8qi_ftype_v8qi
18832 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18833 tree v4hi_ftype_v4hi
18834 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
0f290768 18835
bd793c65
BS
18836 /* Normal vector binops. */
18837 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
18838 = build_function_type_list (V4SF_type_node,
18839 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 18840 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
18841 = build_function_type_list (V8QI_type_node,
18842 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 18843 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
18844 = build_function_type_list (V4HI_type_node,
18845 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 18846 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
18847 = build_function_type_list (V2SI_type_node,
18848 V2SI_type_node, V2SI_type_node, NULL_TREE);
10a97ae6
UB
18849 tree v1di_ftype_v1di_v1di
18850 = build_function_type_list (V1DI_type_node,
18851 V1DI_type_node, V1DI_type_node, NULL_TREE);
bd793c65 18852
b1875f52
L
18853 tree di_ftype_di_di_int
18854 = build_function_type_list (long_long_unsigned_type_node,
18855 long_long_unsigned_type_node,
18856 long_long_unsigned_type_node,
18857 integer_type_node, NULL_TREE);
18858
47f339cf 18859 tree v2si_ftype_v2sf
ae3aa00d 18860 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 18861 tree v2sf_ftype_v2si
b4de2f7d 18862 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 18863 tree v2si_ftype_v2si
b4de2f7d 18864 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 18865 tree v2sf_ftype_v2sf
b4de2f7d 18866 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 18867 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
18868 = build_function_type_list (V2SF_type_node,
18869 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 18870 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
18871 = build_function_type_list (V2SI_type_node,
18872 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
18873 tree pint_type_node = build_pointer_type (integer_type_node);
18874 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
18875 tree pcdouble_type_node = build_pointer_type (
18876 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 18877 tree int_ftype_v2df_v2df
b4de2f7d
AH
18878 = build_function_type_list (integer_type_node,
18879 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18880
068f5dea
JH
18881 tree void_ftype_pcvoid
18882 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 18883 tree v4sf_ftype_v4si
b4de2f7d 18884 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 18885 tree v4si_ftype_v4sf
b4de2f7d 18886 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 18887 tree v2df_ftype_v4si
b4de2f7d 18888 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 18889 tree v4si_ftype_v2df
b4de2f7d 18890 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
b40c4f68
UB
18891 tree v4si_ftype_v2df_v2df
18892 = build_function_type_list (V4SI_type_node,
18893 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18894 tree v2si_ftype_v2df
b4de2f7d 18895 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18896 tree v4sf_ftype_v2df
b4de2f7d 18897 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18898 tree v2df_ftype_v2si
b4de2f7d 18899 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 18900 tree v2df_ftype_v4sf
b4de2f7d 18901 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 18902 tree int_ftype_v2df
b4de2f7d 18903 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
18904 tree int64_ftype_v2df
18905 = build_function_type_list (long_long_integer_type_node,
b96a374d 18906 V2DF_type_node, NULL_TREE);
fbe5eb6d 18907 tree v2df_ftype_v2df_int
b4de2f7d
AH
18908 = build_function_type_list (V2DF_type_node,
18909 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
18910 tree v2df_ftype_v2df_int64
18911 = build_function_type_list (V2DF_type_node,
18912 V2DF_type_node, long_long_integer_type_node,
18913 NULL_TREE);
fbe5eb6d 18914 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
18915 = build_function_type_list (V4SF_type_node,
18916 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18917 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
18918 = build_function_type_list (V2DF_type_node,
18919 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 18920 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
18921 = build_function_type_list (V2DF_type_node,
18922 V2DF_type_node, V2DF_type_node,
18923 integer_type_node,
18924 NULL_TREE);
1c47af84 18925 tree v2df_ftype_v2df_pcdouble
b4de2f7d 18926 = build_function_type_list (V2DF_type_node,
1c47af84 18927 V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 18928 tree void_ftype_pdouble_v2df
b4de2f7d
AH
18929 = build_function_type_list (void_type_node,
18930 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18931 tree void_ftype_pint_int
b4de2f7d
AH
18932 = build_function_type_list (void_type_node,
18933 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 18934 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
18935 = build_function_type_list (void_type_node,
18936 V16QI_type_node, V16QI_type_node,
18937 pchar_type_node, NULL_TREE);
068f5dea
JH
18938 tree v2df_ftype_pcdouble
18939 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 18940 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
18941 = build_function_type_list (V2DF_type_node,
18942 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18943 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
18944 = build_function_type_list (V16QI_type_node,
18945 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 18946 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
18947 = build_function_type_list (V8HI_type_node,
18948 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 18949 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
18950 = build_function_type_list (V4SI_type_node,
18951 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 18952 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
18953 = build_function_type_list (V2DI_type_node,
18954 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 18955 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
18956 = build_function_type_list (V2DI_type_node,
18957 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 18958 tree v2df_ftype_v2df
b4de2f7d 18959 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
916b60b7 18960 tree v2di_ftype_v2di_int
b4de2f7d
AH
18961 = build_function_type_list (V2DI_type_node,
18962 V2DI_type_node, integer_type_node, NULL_TREE);
b1875f52
L
18963 tree v2di_ftype_v2di_v2di_int
18964 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18965 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 18966 tree v4si_ftype_v4si_int
b4de2f7d
AH
18967 = build_function_type_list (V4SI_type_node,
18968 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 18969 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
18970 = build_function_type_list (V8HI_type_node,
18971 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 18972 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
18973 = build_function_type_list (V4SI_type_node,
18974 V8HI_type_node, V8HI_type_node, NULL_TREE);
ab555a5b
UB
18975 tree v1di_ftype_v8qi_v8qi
18976 = build_function_type_list (V1DI_type_node,
b4de2f7d 18977 V8QI_type_node, V8QI_type_node, NULL_TREE);
ab555a5b
UB
18978 tree v1di_ftype_v2si_v2si
18979 = build_function_type_list (V1DI_type_node,
9e9fb0ce 18980 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 18981 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
18982 = build_function_type_list (V2DI_type_node,
18983 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
18984 tree v2di_ftype_v4si_v4si
18985 = build_function_type_list (V2DI_type_node,
18986 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 18987 tree int_ftype_v16qi
b4de2f7d 18988 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
18989 tree v16qi_ftype_pcchar
18990 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
18991 tree void_ftype_pchar_v16qi
18992 = build_function_type_list (void_type_node,
18993 pchar_type_node, V16QI_type_node, NULL_TREE);
47f339cf 18994
21efb4d4
HJ
18995 tree v2di_ftype_v2di_unsigned_unsigned
18996 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18997 unsigned_type_node, unsigned_type_node,
18998 NULL_TREE);
18999 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19000 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19001 unsigned_type_node, unsigned_type_node,
19002 NULL_TREE);
19003 tree v2di_ftype_v2di_v16qi
19004 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19005 NULL_TREE);
9a5cee02
L
19006 tree v2df_ftype_v2df_v2df_v2df
19007 = build_function_type_list (V2DF_type_node,
19008 V2DF_type_node, V2DF_type_node,
19009 V2DF_type_node, NULL_TREE);
19010 tree v4sf_ftype_v4sf_v4sf_v4sf
19011 = build_function_type_list (V4SF_type_node,
19012 V4SF_type_node, V4SF_type_node,
19013 V4SF_type_node, NULL_TREE);
19014 tree v8hi_ftype_v16qi
19015 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19016 NULL_TREE);
19017 tree v4si_ftype_v16qi
19018 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19019 NULL_TREE);
19020 tree v2di_ftype_v16qi
19021 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19022 NULL_TREE);
19023 tree v4si_ftype_v8hi
19024 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19025 NULL_TREE);
19026 tree v2di_ftype_v8hi
19027 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19028 NULL_TREE);
19029 tree v2di_ftype_v4si
19030 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19031 NULL_TREE);
19032 tree v2di_ftype_pv2di
19033 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19034 NULL_TREE);
19035 tree v16qi_ftype_v16qi_v16qi_int
19036 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19037 V16QI_type_node, integer_type_node,
19038 NULL_TREE);
19039 tree v16qi_ftype_v16qi_v16qi_v16qi
19040 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19041 V16QI_type_node, V16QI_type_node,
19042 NULL_TREE);
19043 tree v8hi_ftype_v8hi_v8hi_int
19044 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19045 V8HI_type_node, integer_type_node,
19046 NULL_TREE);
19047 tree v4si_ftype_v4si_v4si_int
19048 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19049 V4SI_type_node, integer_type_node,
19050 NULL_TREE);
19051 tree int_ftype_v2di_v2di
19052 = build_function_type_list (integer_type_node,
19053 V2DI_type_node, V2DI_type_node,
19054 NULL_TREE);
06f4e35d
L
19055 tree int_ftype_v16qi_int_v16qi_int_int
19056 = build_function_type_list (integer_type_node,
19057 V16QI_type_node,
19058 integer_type_node,
19059 V16QI_type_node,
19060 integer_type_node,
19061 integer_type_node,
19062 NULL_TREE);
19063 tree v16qi_ftype_v16qi_int_v16qi_int_int
19064 = build_function_type_list (V16QI_type_node,
19065 V16QI_type_node,
19066 integer_type_node,
19067 V16QI_type_node,
19068 integer_type_node,
19069 integer_type_node,
19070 NULL_TREE);
19071 tree int_ftype_v16qi_v16qi_int
19072 = build_function_type_list (integer_type_node,
19073 V16QI_type_node,
19074 V16QI_type_node,
19075 integer_type_node,
19076 NULL_TREE);
04e1d06b
MM
19077
19078 /* SSE5 instructions */
19079 tree v2di_ftype_v2di_v2di_v2di
19080 = build_function_type_list (V2DI_type_node,
19081 V2DI_type_node,
19082 V2DI_type_node,
19083 V2DI_type_node,
19084 NULL_TREE);
19085
19086 tree v4si_ftype_v4si_v4si_v4si
19087 = build_function_type_list (V4SI_type_node,
19088 V4SI_type_node,
19089 V4SI_type_node,
19090 V4SI_type_node,
19091 NULL_TREE);
19092
19093 tree v4si_ftype_v4si_v4si_v2di
19094 = build_function_type_list (V4SI_type_node,
19095 V4SI_type_node,
19096 V4SI_type_node,
19097 V2DI_type_node,
19098 NULL_TREE);
19099
19100 tree v8hi_ftype_v8hi_v8hi_v8hi
19101 = build_function_type_list (V8HI_type_node,
19102 V8HI_type_node,
19103 V8HI_type_node,
19104 V8HI_type_node,
19105 NULL_TREE);
19106
19107 tree v8hi_ftype_v8hi_v8hi_v4si
19108 = build_function_type_list (V8HI_type_node,
19109 V8HI_type_node,
19110 V8HI_type_node,
19111 V4SI_type_node,
19112 NULL_TREE);
19113
19114 tree v2df_ftype_v2df_v2df_v16qi
19115 = build_function_type_list (V2DF_type_node,
19116 V2DF_type_node,
19117 V2DF_type_node,
19118 V16QI_type_node,
19119 NULL_TREE);
19120
19121 tree v4sf_ftype_v4sf_v4sf_v16qi
19122 = build_function_type_list (V4SF_type_node,
19123 V4SF_type_node,
19124 V4SF_type_node,
19125 V16QI_type_node,
19126 NULL_TREE);
19127
19128 tree v2di_ftype_v2di_si
19129 = build_function_type_list (V2DI_type_node,
19130 V2DI_type_node,
19131 integer_type_node,
19132 NULL_TREE);
19133
19134 tree v4si_ftype_v4si_si
19135 = build_function_type_list (V4SI_type_node,
19136 V4SI_type_node,
19137 integer_type_node,
19138 NULL_TREE);
19139
19140 tree v8hi_ftype_v8hi_si
19141 = build_function_type_list (V8HI_type_node,
19142 V8HI_type_node,
19143 integer_type_node,
19144 NULL_TREE);
19145
19146 tree v16qi_ftype_v16qi_si
19147 = build_function_type_list (V16QI_type_node,
19148 V16QI_type_node,
19149 integer_type_node,
19150 NULL_TREE);
19151 tree v4sf_ftype_v4hi
19152 = build_function_type_list (V4SF_type_node,
19153 V4HI_type_node,
19154 NULL_TREE);
19155
19156 tree v4hi_ftype_v4sf
19157 = build_function_type_list (V4HI_type_node,
19158 V4SF_type_node,
19159 NULL_TREE);
19160
19161 tree v2di_ftype_v2di
19162 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19163
eb701deb 19164 tree ftype;
f8a1ebc6
JH
19165
19166 /* The __float80 type. */
19167 if (TYPE_MODE (long_double_type_node) == XFmode)
19168 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19169 "__float80");
19170 else
19171 {
19172 /* The __float80 type. */
edc5bbcd
UB
19173 tree float80_type_node = make_node (REAL_TYPE);
19174
19175 TYPE_PRECISION (float80_type_node) = 80;
19176 layout_type (float80_type_node);
19177 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19178 "__float80");
f8a1ebc6
JH
19179 }
19180
f749178d
JH
19181 if (TARGET_64BIT)
19182 {
edc5bbcd
UB
19183 tree float128_type_node = make_node (REAL_TYPE);
19184
19185 TYPE_PRECISION (float128_type_node) = 128;
19186 layout_type (float128_type_node);
19187 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19188 "__float128");
19189
19190 /* TFmode support builtins. */
19191 ftype = build_function_type (float128_type_node,
19192 void_list_node);
9c32f507 19193 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
edc5bbcd
UB
19194
19195 ftype = build_function_type_list (float128_type_node,
19196 float128_type_node,
19197 NULL_TREE);
e41ef486 19198 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
edc5bbcd
UB
19199
19200 ftype = build_function_type_list (float128_type_node,
19201 float128_type_node,
19202 float128_type_node,
19203 NULL_TREE);
e41ef486 19204 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
f749178d 19205 }
f8a1ebc6 19206
ec2e9a15
L
19207 /* Add all SSE builtins with variable number of operands. */
19208 for (i = 0, d = bdesc_sse_args;
19209 i < ARRAY_SIZE (bdesc_sse_args);
9a5cee02
L
19210 i++, d++)
19211 {
9a5cee02
L
19212 tree type;
19213
19214 if (d->name == 0)
19215 continue;
9a5cee02 19216
ec2e9a15 19217 switch ((enum sse_builtin_type) d->flag)
9a5cee02 19218 {
ec2e9a15
L
19219 case V4SF_FTYPE_V4SF_INT:
19220 type = v4sf_ftype_v4sf_int;
19221 break;
19222 case V2DI_FTYPE_V2DI_INT:
19223 type = v2di_ftype_v2di_int;
19224 break;
19225 case V2DF_FTYPE_V2DF_INT:
19226 type = v2df_ftype_v2df_int;
19227 break;
19228 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19229 type = v16qi_ftype_v16qi_v16qi_v16qi;
19230 break;
19231 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19232 type = v4sf_ftype_v4sf_v4sf_v4sf;
19233 break;
19234 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19235 type = v2df_ftype_v2df_v2df_v2df;
19236 break;
19237 case V16QI_FTYPE_V16QI_V16QI_INT:
9a5cee02
L
19238 type = v16qi_ftype_v16qi_v16qi_int;
19239 break;
ec2e9a15 19240 case V8HI_FTYPE_V8HI_V8HI_INT:
9a5cee02
L
19241 type = v8hi_ftype_v8hi_v8hi_int;
19242 break;
ec2e9a15 19243 case V4SI_FTYPE_V4SI_V4SI_INT:
9a5cee02
L
19244 type = v4si_ftype_v4si_v4si_int;
19245 break;
ec2e9a15
L
19246 case V4SF_FTYPE_V4SF_V4SF_INT:
19247 type = v4sf_ftype_v4sf_v4sf_int;
19248 break;
19249 case V2DI_FTYPE_V2DI_V2DI_INT:
9a5cee02
L
19250 type = v2di_ftype_v2di_v2di_int;
19251 break;
ec2e9a15 19252 case V2DF_FTYPE_V2DF_V2DF_INT:
9a5cee02
L
19253 type = v2df_ftype_v2df_v2df_int;
19254 break;
9a5cee02
L
19255 default:
19256 gcc_unreachable ();
19257 }
19258
e41ef486 19259 def_builtin_const (d->mask, d->name, type, d->code);
9a5cee02
L
19260 }
19261
bd793c65
BS
19262 /* Add all builtins that are more or less simple operations on two
19263 operands. */
ca7558fc 19264 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
19265 {
19266 /* Use one of the operands; the target can have a different mode for
19267 mask-generating compares. */
19268 enum machine_mode mode;
19269 tree type;
19270
19271 if (d->name == 0)
19272 continue;
19273 mode = insn_data[d->icode].operand[1].mode;
19274
bd793c65
BS
19275 switch (mode)
19276 {
fbe5eb6d
BS
19277 case V16QImode:
19278 type = v16qi_ftype_v16qi_v16qi;
19279 break;
19280 case V8HImode:
19281 type = v8hi_ftype_v8hi_v8hi;
19282 break;
19283 case V4SImode:
19284 type = v4si_ftype_v4si_v4si;
19285 break;
19286 case V2DImode:
19287 type = v2di_ftype_v2di_v2di;
19288 break;
19289 case V2DFmode:
19290 type = v2df_ftype_v2df_v2df;
19291 break;
bd793c65
BS
19292 case V4SFmode:
19293 type = v4sf_ftype_v4sf_v4sf;
19294 break;
19295 case V8QImode:
19296 type = v8qi_ftype_v8qi_v8qi;
19297 break;
19298 case V4HImode:
19299 type = v4hi_ftype_v4hi_v4hi;
19300 break;
19301 case V2SImode:
19302 type = v2si_ftype_v2si_v2si;
19303 break;
10a97ae6
UB
19304 case V1DImode:
19305 type = v1di_ftype_v1di_v1di;
bd793c65
BS
19306 break;
19307
19308 default:
d0396b79 19309 gcc_unreachable ();
bd793c65 19310 }
0f290768 19311
bd793c65 19312 /* Override for comparisons. */
ef719a44
RH
19313 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19314 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
bd793c65
BS
19315 type = v4si_ftype_v4sf_v4sf;
19316
ef719a44
RH
19317 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19318 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
fbe5eb6d
BS
19319 type = v2di_ftype_v2df_v2df;
19320
b40c4f68
UB
19321 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19322 type = v4si_ftype_v2df_v2df;
19323
19324 def_builtin_const (d->mask, d->name, type, d->code);
bd793c65
BS
19325 }
19326
b1875f52
L
19327 /* Add all builtins that are more or less simple operations on 1 operand. */
19328 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19329 {
19330 enum machine_mode mode;
19331 tree type;
19332
19333 if (d->name == 0)
19334 continue;
19335 mode = insn_data[d->icode].operand[1].mode;
19336
19337 switch (mode)
19338 {
19339 case V16QImode:
19340 type = v16qi_ftype_v16qi;
19341 break;
19342 case V8HImode:
19343 type = v8hi_ftype_v8hi;
19344 break;
19345 case V4SImode:
19346 type = v4si_ftype_v4si;
19347 break;
19348 case V2DFmode:
19349 type = v2df_ftype_v2df;
19350 break;
19351 case V4SFmode:
19352 type = v4sf_ftype_v4sf;
19353 break;
19354 case V8QImode:
19355 type = v8qi_ftype_v8qi;
19356 break;
19357 case V4HImode:
19358 type = v4hi_ftype_v4hi;
19359 break;
19360 case V2SImode:
19361 type = v2si_ftype_v2si;
19362 break;
19363
19364 default:
19365 abort ();
19366 }
19367
e41ef486 19368 def_builtin_const (d->mask, d->name, type, d->code);
b1875f52
L
19369 }
19370
06f4e35d
L
19371 /* pcmpestr[im] insns. */
19372 for (i = 0, d = bdesc_pcmpestr;
19373 i < ARRAY_SIZE (bdesc_pcmpestr);
19374 i++, d++)
19375 {
19376 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19377 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19378 else
19379 ftype = int_ftype_v16qi_int_v16qi_int_int;
e41ef486 19380 def_builtin_const (d->mask, d->name, ftype, d->code);
06f4e35d
L
19381 }
19382
19383 /* pcmpistr[im] insns. */
19384 for (i = 0, d = bdesc_pcmpistr;
19385 i < ARRAY_SIZE (bdesc_pcmpistr);
19386 i++, d++)
19387 {
19388 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19389 ftype = v16qi_ftype_v16qi_v16qi_int;
19390 else
19391 ftype = int_ftype_v16qi_v16qi_int;
e41ef486 19392 def_builtin_const (d->mask, d->name, ftype, d->code);
06f4e35d
L
19393 }
19394
bd793c65 19395 /* Add the remaining MMX insns with somewhat more complicated types. */
853a33f3 19396 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b 19397
10a97ae6
UB
19398 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
19399 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
19400 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
19401 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
19402 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
19403 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
19404
19405 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
19406 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
19407 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
19408 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
19409 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
19410 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
19411
19412 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
19413 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
19414 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
19415 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
eeb06b1b 19416
e41ef486
UB
19417 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19418 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 19419
bd793c65 19420 /* comi/ucomi insns. */
ca7558fc 19421 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
853a33f3 19422 if (d->mask == OPTION_MASK_ISA_SSE2)
e41ef486 19423 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
fbe5eb6d 19424 else
e41ef486 19425 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 19426
9a5cee02
L
19427 /* ptest insns. */
19428 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
e41ef486 19429 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
9a5cee02 19430
e41ef486
UB
19431 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19432 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19433 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 19434
853a33f3
UB
19435 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19436 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19437 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19438 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19439 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19440 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19441 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19442 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19443 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19444 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19445 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
37f22004 19446
853a33f3 19447 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
37f22004 19448
853a33f3
UB
19449 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19450 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
37f22004 19451
853a33f3
UB
19452 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19453 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19454 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19455 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
37f22004 19456
e41ef486
UB
19457 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19458 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
853a33f3
UB
19459 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19460 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
37f22004 19461
853a33f3 19462 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
37f22004 19463
ab555a5b 19464 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
37f22004 19465
e41ef486
UB
19466 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19467 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8d364104 19468 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
3dc0f23a 19469 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
8d364104 19470 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
6b889d89
UB
19471 ftype = build_function_type_list (float_type_node,
19472 float_type_node,
19473 NULL_TREE);
19474 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
853a33f3 19475 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
3dc0f23a 19476 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
853a33f3 19477 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
37f22004 19478
47f339cf 19479 /* Original 3DNow! */
853a33f3 19480 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
e41ef486
UB
19481 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19482 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19483 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19484 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19485 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19486 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19487 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19488 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19489 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19490 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19491 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19492 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19493 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19494 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19495 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19496 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19497 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19498 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19499 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
19500
19501 /* 3DNow! extension as used in the Athlon CPU. */
e41ef486
UB
19502 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19503 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19504 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19505 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19506 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19507 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
47f339cf 19508
fbe5eb6d 19509 /* SSE2 */
853a33f3 19510 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
fbe5eb6d 19511
853a33f3
UB
19512 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19513 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
fbe5eb6d 19514
853a33f3
UB
19515 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19516 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
fbe5eb6d 19517
e41ef486
UB
19518 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19519 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
853a33f3
UB
19520 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19521 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19522 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d 19523
e41ef486
UB
19524 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19525 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19526 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19527 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d 19528
853a33f3
UB
19529 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19530 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
fbe5eb6d 19531
853a33f3
UB
19532 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19533 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d 19534
853a33f3
UB
19535 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19536 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19537 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19538 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19539 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
fbe5eb6d 19540
853a33f3 19541 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
fbe5eb6d 19542
853a33f3
UB
19543 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19544 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19545 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19546 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d 19547
853a33f3
UB
19548 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19549 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19550 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
fbe5eb6d 19551
853a33f3
UB
19552 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19553 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19554 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19555 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
fbe5eb6d 19556
853a33f3
UB
19557 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19558 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
79f5e442 19559 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 19560
853a33f3
UB
19561 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19562 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
f02e1358 19563
ab555a5b 19564 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
e41ef486 19565 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
9e9fb0ce 19566
e41ef486
UB
19567 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19568 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19569 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19570 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19571 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19572 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19573 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
916b60b7 19574
e41ef486
UB
19575 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19576 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19577 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19578 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19579 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19580 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19581 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
916b60b7 19582
e41ef486
UB
19583 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19584 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19585 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19586 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
916b60b7 19587
e41ef486 19588 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
19589
19590 /* Prescott New Instructions. */
853a33f3
UB
19591 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19592 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19593 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
eb701deb 19594
b1875f52 19595 /* SSSE3. */
e41ef486
UB
19596 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19597 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
b1875f52 19598
9a5cee02 19599 /* SSE4.1. */
853a33f3 19600 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
e41ef486
UB
19601 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19602 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19603 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19604 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19605 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19606 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19607 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19608 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19609 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19610 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19611 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19612 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19613 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
04e1d06b 19614
3b8dd071
L
19615 /* SSE4.2. */
19616 ftype = build_function_type_list (unsigned_type_node,
19617 unsigned_type_node,
19618 unsigned_char_type_node,
19619 NULL_TREE);
e41ef486 19620 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
3b8dd071
L
19621 ftype = build_function_type_list (unsigned_type_node,
19622 unsigned_type_node,
19623 short_unsigned_type_node,
19624 NULL_TREE);
e41ef486 19625 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
3b8dd071
L
19626 ftype = build_function_type_list (unsigned_type_node,
19627 unsigned_type_node,
19628 unsigned_type_node,
19629 NULL_TREE);
e41ef486 19630 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
3b8dd071
L
19631 ftype = build_function_type_list (long_long_unsigned_type_node,
19632 long_long_unsigned_type_node,
19633 long_long_unsigned_type_node,
19634 NULL_TREE);
e41ef486 19635 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
3b8dd071 19636
8b96a312
L
19637 /* AES */
19638 if (TARGET_AES)
19639 {
19640 /* Define AES built-in functions only if AES is enabled. */
19641 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
19642 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
19643 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
19644 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
19645 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
19646 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
19647 }
19648
19649 /* PCLMUL */
19650 if (TARGET_PCLMUL)
19651 {
19652 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
19653 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
19654 }
19655
21efb4d4 19656 /* AMDFAM10 SSE4A New built-ins */
853a33f3
UB
19657 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19658 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
e41ef486
UB
19659 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19660 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19661 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19662 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
21efb4d4 19663
eb701deb
RH
19664 /* Access to the vec_init patterns. */
19665 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19666 integer_type_node, NULL_TREE);
e41ef486 19667 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
eb701deb
RH
19668
19669 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19670 short_integer_type_node,
19671 short_integer_type_node,
19672 short_integer_type_node, NULL_TREE);
e41ef486 19673 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
eb701deb
RH
19674
19675 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19676 char_type_node, char_type_node,
19677 char_type_node, char_type_node,
19678 char_type_node, char_type_node,
19679 char_type_node, NULL_TREE);
e41ef486 19680 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
eb701deb
RH
19681
19682 /* Access to the vec_extract patterns. */
19683 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19684 integer_type_node, NULL_TREE);
e41ef486 19685 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
eb701deb
RH
19686
19687 ftype = build_function_type_list (long_long_integer_type_node,
19688 V2DI_type_node, integer_type_node,
19689 NULL_TREE);
e41ef486 19690 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
eb701deb
RH
19691
19692 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19693 integer_type_node, NULL_TREE);
e41ef486 19694 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
eb701deb 19695
ed9b5396
RH
19696 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19697 integer_type_node, NULL_TREE);
e41ef486 19698 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
ed9b5396 19699
eb701deb
RH
19700 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19701 integer_type_node, NULL_TREE);
e41ef486 19702 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
eb701deb
RH
19703
19704 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19705 integer_type_node, NULL_TREE);
e41ef486 19706 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
eb701deb 19707
0f2698d0
RH
19708 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19709 integer_type_node, NULL_TREE);
e41ef486 19710 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
0f2698d0 19711
9a5cee02
L
19712 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19713 integer_type_node, NULL_TREE);
e41ef486 19714 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
9a5cee02 19715
eb701deb 19716 /* Access to the vec_set patterns. */
9a5cee02
L
19717 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19718 intDI_type_node,
19719 integer_type_node, NULL_TREE);
e41ef486 19720 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
9a5cee02
L
19721
19722 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19723 float_type_node,
19724 integer_type_node, NULL_TREE);
e41ef486 19725 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
9a5cee02
L
19726
19727 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19728 intSI_type_node,
19729 integer_type_node, NULL_TREE);
e41ef486 19730 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
9a5cee02 19731
eb701deb
RH
19732 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19733 intHI_type_node,
19734 integer_type_node, NULL_TREE);
e41ef486 19735 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
5656a184 19736
eb701deb
RH
19737 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19738 intHI_type_node,
19739 integer_type_node, NULL_TREE);
e41ef486 19740 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
9a5cee02
L
19741
19742 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19743 intQI_type_node,
19744 integer_type_node, NULL_TREE);
e41ef486 19745 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
04e1d06b
MM
19746
19747 /* Add SSE5 multi-arg argument instructions */
19748 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19749 {
19750 tree mtype = NULL_TREE;
19751
19752 if (d->name == 0)
19753 continue;
19754
19755 switch ((enum multi_arg_type)d->flag)
19756 {
19757 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19758 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19759 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19760 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19761 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19762 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19763 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19764 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19765 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19766 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19767 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19768 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19769 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19770 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19771 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19772 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19773 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19774 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19775 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19776 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19777 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19778 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19779 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19780 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19781 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19782 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19783 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19784 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19785 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19786 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19787 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19788 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19789 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19790 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19791 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19792 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19793 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19794 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19795 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19796 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19797 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19798 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19799 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19800 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19801 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19802 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19803 case MULTI_ARG_UNKNOWN:
19804 default:
19805 gcc_unreachable ();
19806 }
19807
19808 if (mtype)
19809 def_builtin_const (d->mask, d->name, mtype, d->code);
19810 }
bd793c65
BS
19811}
19812
2ed941ec
RH
19813static void
19814ix86_init_builtins (void)
19815{
19816 if (TARGET_MMX)
19817 ix86_init_mmx_sse_builtins ();
19818}
19819
bd793c65
BS
19820/* Errors in the source file can cause expand_expr to return const0_rtx
19821 where we expect a vector. To avoid crashing, use one of the vector
19822 clear instructions. */
19823static rtx
b96a374d 19824safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65 19825{
ef719a44
RH
19826 if (x == const0_rtx)
19827 x = CONST0_RTX (mode);
bd793c65
BS
19828 return x;
19829}
19830
9a5cee02 19831/* Subroutine of ix86_expand_builtin to take care of SSE insns with
ec2e9a15 19832 variable number of operands. */
9a5cee02
L
19833
19834static rtx
ec2e9a15
L
19835ix86_expand_sse_operands_builtin (enum insn_code icode, tree exp,
19836 enum sse_builtin_type type,
19837 rtx target)
9a5cee02
L
19838{
19839 rtx pat;
ec2e9a15
L
19840 unsigned int i, nargs;
19841 int num_memory = 0;
19842 struct
19843 {
19844 rtx op;
19845 enum machine_mode mode;
19846 } args[3];
19847 bool last_arg_constant = false;
19848 const struct insn_data *insn_p = &insn_data[icode];
19849 enum machine_mode tmode = insn_p->operand[0].mode;
9a5cee02 19850
ec2e9a15
L
19851 switch (type)
19852 {
19853 case V4SF_FTYPE_V4SF_INT:
19854 case V2DI_FTYPE_V2DI_INT:
19855 case V2DF_FTYPE_V2DF_INT:
19856 nargs = 2;
19857 last_arg_constant = true;
19858 break;
19859 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19860 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19861 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19862 nargs = 3;
19863 break;
19864 case V16QI_FTYPE_V16QI_V16QI_INT:
19865 case V8HI_FTYPE_V8HI_V8HI_INT:
19866 case V4SI_FTYPE_V4SI_V4SI_INT:
19867 case V4SF_FTYPE_V4SF_V4SF_INT:
19868 case V2DI_FTYPE_V2DI_V2DI_INT:
19869 case V2DF_FTYPE_V2DF_V2DF_INT:
19870 nargs = 3;
19871 last_arg_constant = true;
19872 break;
19873 default:
19874 gcc_unreachable ();
19875 }
19876
19877 gcc_assert (nargs <= ARRAY_SIZE (args));
9a5cee02
L
19878
19879 if (optimize
19880 || target == 0
19881 || GET_MODE (target) != tmode
ec2e9a15 19882 || ! (*insn_p->operand[0].predicate) (target, tmode))
9a5cee02 19883 target = gen_reg_rtx (tmode);
c7a69424 19884
ec2e9a15
L
19885 for (i = 0; i < nargs; i++)
19886 {
19887 tree arg = CALL_EXPR_ARG (exp, i);
19888 rtx op = expand_normal (arg);
19889 enum machine_mode mode = insn_p->operand[i + 1].mode;
19890 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
c7a69424 19891
ec2e9a15
L
19892 if (last_arg_constant && (i + 1) == nargs)
19893 {
19894 if (!match)
19895 switch (icode)
19896 {
19897 case CODE_FOR_sse4_1_roundpd:
19898 case CODE_FOR_sse4_1_roundps:
19899 case CODE_FOR_sse4_1_roundsd:
19900 case CODE_FOR_sse4_1_roundss:
19901 case CODE_FOR_sse4_1_blendps:
19902 error ("the last argument must be a 4-bit immediate");
19903 return const0_rtx;
19904
19905 case CODE_FOR_sse4_1_blendpd:
19906 error ("the last argument must be a 2-bit immediate");
19907 return const0_rtx;
19908
19909 default:
19910 error ("the last argument must be an 8-bit immediate");
19911 return const0_rtx;
19912 }
19913 }
19914 else
19915 {
19916 if (VECTOR_MODE_P (mode))
19917 op = safe_vector_operand (op, mode);
c7a69424 19918
ec2e9a15
L
19919 /* If we aren't optimizing, only allow one memory operand to
19920 be generated. */
19921 if (memory_operand (op, mode))
19922 num_memory++;
c7a69424 19923
ec2e9a15
L
19924 gcc_assert (GET_MODE (op) == mode
19925 || GET_MODE (op) == VOIDmode);
23594c97 19926
ec2e9a15
L
19927 if (optimize || !match || num_memory > 1)
19928 op = copy_to_mode_reg (mode, op);
19929 }
19930
19931 args[i].op = op;
19932 args[i].mode = mode;
19933 }
19934
19935 switch (nargs)
19936 {
19937 case 1:
19938 pat = GEN_FCN (icode) (target, args[0].op);
19939 break;
19940 case 2:
19941 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19942 break;
19943 case 3:
19944 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19945 args[2].op);
19946 break;
19947 default:
19948 gcc_unreachable ();
19949 }
c7a69424 19950
9a5cee02
L
19951 if (! pat)
19952 return 0;
ec2e9a15 19953
9a5cee02
L
19954 emit_insn (pat);
19955 return target;
19956}
19957
3b8dd071
L
19958/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19959
19960static rtx
19961ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19962{
19963 rtx pat;
19964 tree arg0 = CALL_EXPR_ARG (exp, 0);
19965 tree arg1 = CALL_EXPR_ARG (exp, 1);
19966 rtx op0 = expand_normal (arg0);
19967 rtx op1 = expand_normal (arg1);
19968 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19969 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19970 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19971
19972 if (optimize
19973 || !target
19974 || GET_MODE (target) != tmode
19975 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19976 target = gen_reg_rtx (tmode);
19977
19978 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19979 op0 = copy_to_mode_reg (mode0, op0);
19980 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19981 {
19982 op1 = copy_to_reg (op1);
19983 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19984 }
19985
19986 pat = GEN_FCN (icode) (target, op0, op1);
19987 if (! pat)
19988 return 0;
19989 emit_insn (pat);
19990 return target;
19991}
19992
8b96a312
L
19993/* Subroutine of ix86_expand_builtin to take care of binop insns
19994 with an immediate. */
19995
19996static rtx
19997ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp,
19998 rtx target)
19999{
20000 rtx pat;
20001 tree arg0 = CALL_EXPR_ARG (exp, 0);
20002 tree arg1 = CALL_EXPR_ARG (exp, 1);
20003 rtx op0 = expand_normal (arg0);
20004 rtx op1 = expand_normal (arg1);
20005 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20006 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20007 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20008
20009 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20010 {
20011 op0 = copy_to_reg (op0);
20012 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
20013 }
20014
20015 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20016 {
20017 error ("the last operand must be an immediate");
20018 return const0_rtx;
20019 }
20020
20021 target = gen_reg_rtx (V2DImode);
20022 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target,
20023 V2DImode, 0),
20024 op0, op1);
20025 if (! pat)
20026 return 0;
20027 emit_insn (pat);
20028 return target;
20029}
20030
bd793c65
BS
20031/* Subroutine of ix86_expand_builtin to take care of binop insns. */
20032
20033static rtx
5039610b 20034ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
bd793c65 20035{
ef719a44 20036 rtx pat, xops[3];
5039610b
SL
20037 tree arg0 = CALL_EXPR_ARG (exp, 0);
20038 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20039 rtx op0 = expand_normal (arg0);
20040 rtx op1 = expand_normal (arg1);
bd793c65
BS
20041 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20042 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20043 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20044
20045 if (VECTOR_MODE_P (mode0))
20046 op0 = safe_vector_operand (op0, mode0);
20047 if (VECTOR_MODE_P (mode1))
20048 op1 = safe_vector_operand (op1, mode1);
20049
e358acde 20050 if (optimize || !target
bd793c65
BS
20051 || GET_MODE (target) != tmode
20052 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20053 target = gen_reg_rtx (tmode);
20054
d9deed68
JH
20055 if (GET_MODE (op1) == SImode && mode1 == TImode)
20056 {
20057 rtx x = gen_reg_rtx (V4SImode);
20058 emit_insn (gen_sse2_loadd (x, op1));
20059 op1 = gen_lowpart (TImode, x);
20060 }
20061
ef719a44 20062 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 20063 op0 = copy_to_mode_reg (mode0, op0);
ef719a44 20064 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
20065 op1 = copy_to_mode_reg (mode1, op1);
20066
eb701deb
RH
20067 /* ??? Using ix86_fixup_binary_operands is problematic when
20068 we've got mismatched modes. Fake it. */
20069
ef719a44
RH
20070 xops[0] = target;
20071 xops[1] = op0;
20072 xops[2] = op1;
59bef189 20073
eb701deb
RH
20074 if (tmode == mode0 && tmode == mode1)
20075 {
20076 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
20077 op0 = xops[1];
20078 op1 = xops[2];
20079 }
20080 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
20081 {
20082 op0 = force_reg (mode0, op0);
20083 op1 = force_reg (mode1, op1);
20084 target = gen_reg_rtx (tmode);
20085 }
20086
20087 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
20088 if (! pat)
20089 return 0;
20090 emit_insn (pat);
20091 return target;
20092}
20093
04e1d06b
MM
20094/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20095
20096static rtx
20097ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20098 enum multi_arg_type m_type,
20099 enum insn_code sub_code)
20100{
20101 rtx pat;
20102 int i;
20103 int nargs;
20104 bool comparison_p = false;
20105 bool tf_p = false;
20106 bool last_arg_constant = false;
20107 int num_memory = 0;
20108 struct {
20109 rtx op;
20110 enum machine_mode mode;
20111 } args[4];
20112
20113 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20114
20115 switch (m_type)
20116 {
20117 case MULTI_ARG_3_SF:
20118 case MULTI_ARG_3_DF:
20119 case MULTI_ARG_3_DI:
20120 case MULTI_ARG_3_SI:
20121 case MULTI_ARG_3_SI_DI:
20122 case MULTI_ARG_3_HI:
20123 case MULTI_ARG_3_HI_SI:
20124 case MULTI_ARG_3_QI:
20125 case MULTI_ARG_3_PERMPS:
20126 case MULTI_ARG_3_PERMPD:
20127 nargs = 3;
20128 break;
20129
20130 case MULTI_ARG_2_SF:
20131 case MULTI_ARG_2_DF:
20132 case MULTI_ARG_2_DI:
20133 case MULTI_ARG_2_SI:
20134 case MULTI_ARG_2_HI:
20135 case MULTI_ARG_2_QI:
20136 nargs = 2;
20137 break;
20138
20139 case MULTI_ARG_2_DI_IMM:
20140 case MULTI_ARG_2_SI_IMM:
20141 case MULTI_ARG_2_HI_IMM:
20142 case MULTI_ARG_2_QI_IMM:
20143 nargs = 2;
20144 last_arg_constant = true;
20145 break;
20146
20147 case MULTI_ARG_1_SF:
20148 case MULTI_ARG_1_DF:
20149 case MULTI_ARG_1_DI:
20150 case MULTI_ARG_1_SI:
20151 case MULTI_ARG_1_HI:
20152 case MULTI_ARG_1_QI:
20153 case MULTI_ARG_1_SI_DI:
20154 case MULTI_ARG_1_HI_DI:
20155 case MULTI_ARG_1_HI_SI:
20156 case MULTI_ARG_1_QI_DI:
20157 case MULTI_ARG_1_QI_SI:
20158 case MULTI_ARG_1_QI_HI:
20159 case MULTI_ARG_1_PH2PS:
20160 case MULTI_ARG_1_PS2PH:
20161 nargs = 1;
20162 break;
20163
20164 case MULTI_ARG_2_SF_CMP:
20165 case MULTI_ARG_2_DF_CMP:
20166 case MULTI_ARG_2_DI_CMP:
20167 case MULTI_ARG_2_SI_CMP:
20168 case MULTI_ARG_2_HI_CMP:
20169 case MULTI_ARG_2_QI_CMP:
20170 nargs = 2;
20171 comparison_p = true;
20172 break;
20173
20174 case MULTI_ARG_2_SF_TF:
20175 case MULTI_ARG_2_DF_TF:
20176 case MULTI_ARG_2_DI_TF:
20177 case MULTI_ARG_2_SI_TF:
20178 case MULTI_ARG_2_HI_TF:
20179 case MULTI_ARG_2_QI_TF:
20180 nargs = 2;
20181 tf_p = true;
20182 break;
20183
20184 case MULTI_ARG_UNKNOWN:
20185 default:
20186 gcc_unreachable ();
20187 }
20188
20189 if (optimize || !target
20190 || GET_MODE (target) != tmode
20191 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20192 target = gen_reg_rtx (tmode);
20193
20194 gcc_assert (nargs <= 4);
20195
20196 for (i = 0; i < nargs; i++)
20197 {
20198 tree arg = CALL_EXPR_ARG (exp, i);
20199 rtx op = expand_normal (arg);
20200 int adjust = (comparison_p) ? 1 : 0;
20201 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20202
20203 if (last_arg_constant && i == nargs-1)
20204 {
20205 if (GET_CODE (op) != CONST_INT)
20206 {
20207 error ("last argument must be an immediate");
20208 return gen_reg_rtx (tmode);
20209 }
20210 }
20211 else
20212 {
20213 if (VECTOR_MODE_P (mode))
20214 op = safe_vector_operand (op, mode);
20215
20216 /* If we aren't optimizing, only allow one memory operand to be
20217 generated. */
20218 if (memory_operand (op, mode))
20219 num_memory++;
20220
20221 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20222
4f3f76e6 20223 if (optimize
04e1d06b
MM
20224 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20225 || num_memory > 1)
20226 op = force_reg (mode, op);
20227 }
20228
20229 args[i].op = op;
20230 args[i].mode = mode;
20231 }
20232
20233 switch (nargs)
20234 {
20235 case 1:
20236 pat = GEN_FCN (icode) (target, args[0].op);
20237 break;
20238
20239 case 2:
20240 if (tf_p)
20241 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20242 GEN_INT ((int)sub_code));
20243 else if (! comparison_p)
20244 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20245 else
20246 {
20247 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20248 args[0].op,
20249 args[1].op);
20250
20251 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20252 }
20253 break;
20254
20255 case 3:
20256 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20257 break;
20258
20259 default:
20260 gcc_unreachable ();
20261 }
20262
20263 if (! pat)
20264 return 0;
20265
20266 emit_insn (pat);
20267 return target;
20268}
20269
bd793c65
BS
20270/* Subroutine of ix86_expand_builtin to take care of stores. */
20271
20272static rtx
5039610b 20273ix86_expand_store_builtin (enum insn_code icode, tree exp)
bd793c65
BS
20274{
20275 rtx pat;
5039610b
SL
20276 tree arg0 = CALL_EXPR_ARG (exp, 0);
20277 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20278 rtx op0 = expand_normal (arg0);
20279 rtx op1 = expand_normal (arg1);
bd793c65
BS
20280 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
20281 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
20282
20283 if (VECTOR_MODE_P (mode1))
20284 op1 = safe_vector_operand (op1, mode1);
20285
20286 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 20287 op1 = copy_to_mode_reg (mode1, op1);
59bef189 20288
bd793c65
BS
20289 pat = GEN_FCN (icode) (op0, op1);
20290 if (pat)
20291 emit_insn (pat);
20292 return 0;
20293}
20294
20295/* Subroutine of ix86_expand_builtin to take care of unop insns. */
20296
20297static rtx
5039610b 20298ix86_expand_unop_builtin (enum insn_code icode, tree exp,
b96a374d 20299 rtx target, int do_load)
bd793c65
BS
20300{
20301 rtx pat;
5039610b 20302 tree arg0 = CALL_EXPR_ARG (exp, 0);
84217346 20303 rtx op0 = expand_normal (arg0);
bd793c65
BS
20304 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20305 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20306
e358acde 20307 if (optimize || !target
bd793c65
BS
20308 || GET_MODE (target) != tmode
20309 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20310 target = gen_reg_rtx (tmode);
20311 if (do_load)
20312 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20313 else
20314 {
20315 if (VECTOR_MODE_P (mode0))
20316 op0 = safe_vector_operand (op0, mode0);
20317
e358acde
RH
20318 if ((optimize && !register_operand (op0, mode0))
20319 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65
BS
20320 op0 = copy_to_mode_reg (mode0, op0);
20321 }
20322
ec2e9a15 20323 pat = GEN_FCN (icode) (target, op0);
bd793c65
BS
20324 if (! pat)
20325 return 0;
20326 emit_insn (pat);
20327 return target;
20328}
20329
20330/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20331 sqrtss, rsqrtss, rcpss. */
20332
20333static rtx
5039610b 20334ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
bd793c65
BS
20335{
20336 rtx pat;
5039610b 20337 tree arg0 = CALL_EXPR_ARG (exp, 0);
84217346 20338 rtx op1, op0 = expand_normal (arg0);
bd793c65
BS
20339 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20340 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20341
e358acde 20342 if (optimize || !target
bd793c65
BS
20343 || GET_MODE (target) != tmode
20344 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20345 target = gen_reg_rtx (tmode);
20346
20347 if (VECTOR_MODE_P (mode0))
20348 op0 = safe_vector_operand (op0, mode0);
20349
e358acde
RH
20350 if ((optimize && !register_operand (op0, mode0))
20351 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 20352 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 20353
59bef189
RH
20354 op1 = op0;
20355 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20356 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 20357
59bef189 20358 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
20359 if (! pat)
20360 return 0;
20361 emit_insn (pat);
20362 return target;
20363}
20364
20365/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20366
20367static rtx
5039610b 20368ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
b96a374d 20369 rtx target)
bd793c65
BS
20370{
20371 rtx pat;
5039610b
SL
20372 tree arg0 = CALL_EXPR_ARG (exp, 0);
20373 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20374 rtx op0 = expand_normal (arg0);
20375 rtx op1 = expand_normal (arg1);
bd793c65
BS
20376 rtx op2;
20377 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20378 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20379 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20380 enum rtx_code comparison = d->comparison;
20381
20382 if (VECTOR_MODE_P (mode0))
20383 op0 = safe_vector_operand (op0, mode0);
20384 if (VECTOR_MODE_P (mode1))
20385 op1 = safe_vector_operand (op1, mode1);
20386
20387 /* Swap operands if we have a comparison that isn't available in
20388 hardware. */
e358acde 20389 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65 20390 {
21e1b5f1
BS
20391 rtx tmp = gen_reg_rtx (mode1);
20392 emit_move_insn (tmp, op1);
bd793c65 20393 op1 = op0;
21e1b5f1 20394 op0 = tmp;
bd793c65 20395 }
21e1b5f1 20396
e358acde 20397 if (optimize || !target
21e1b5f1
BS
20398 || GET_MODE (target) != tmode
20399 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
20400 target = gen_reg_rtx (tmode);
20401
e358acde
RH
20402 if ((optimize && !register_operand (op0, mode0))
20403 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
bd793c65 20404 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
20405 if ((optimize && !register_operand (op1, mode1))
20406 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
20407 op1 = copy_to_mode_reg (mode1, op1);
20408
20409 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20410 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20411 if (! pat)
20412 return 0;
20413 emit_insn (pat);
20414 return target;
20415}
20416
20417/* Subroutine of ix86_expand_builtin to take care of comi insns. */
20418
20419static rtx
5039610b 20420ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
b96a374d 20421 rtx target)
bd793c65
BS
20422{
20423 rtx pat;
5039610b
SL
20424 tree arg0 = CALL_EXPR_ARG (exp, 0);
20425 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20426 rtx op0 = expand_normal (arg0);
20427 rtx op1 = expand_normal (arg1);
bd793c65
BS
20428 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20429 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20430 enum rtx_code comparison = d->comparison;
20431
20432 if (VECTOR_MODE_P (mode0))
20433 op0 = safe_vector_operand (op0, mode0);
20434 if (VECTOR_MODE_P (mode1))
20435 op1 = safe_vector_operand (op1, mode1);
20436
20437 /* Swap operands if we have a comparison that isn't available in
20438 hardware. */
e358acde 20439 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65
BS
20440 {
20441 rtx tmp = op1;
20442 op1 = op0;
20443 op0 = tmp;
bd793c65
BS
20444 }
20445
20446 target = gen_reg_rtx (SImode);
20447 emit_move_insn (target, const0_rtx);
20448 target = gen_rtx_SUBREG (QImode, target, 0);
20449
e358acde
RH
20450 if ((optimize && !register_operand (op0, mode0))
20451 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
bd793c65 20452 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
20453 if ((optimize && !register_operand (op1, mode1))
20454 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
20455 op1 = copy_to_mode_reg (mode1, op1);
20456
1194ca05 20457 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
20458 if (! pat)
20459 return 0;
20460 emit_insn (pat);
29628f27
BS
20461 emit_insn (gen_rtx_SET (VOIDmode,
20462 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20463 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 20464 SET_DEST (pat),
29628f27 20465 const0_rtx)));
bd793c65 20466
6f1a6c5b 20467 return SUBREG_REG (target);
bd793c65
BS
20468}
20469
9a5cee02
L
20470/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20471
20472static rtx
20473ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20474 rtx target)
20475{
20476 rtx pat;
20477 tree arg0 = CALL_EXPR_ARG (exp, 0);
20478 tree arg1 = CALL_EXPR_ARG (exp, 1);
20479 rtx op0 = expand_normal (arg0);
20480 rtx op1 = expand_normal (arg1);
20481 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20482 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20483 enum rtx_code comparison = d->comparison;
20484
20485 if (VECTOR_MODE_P (mode0))
20486 op0 = safe_vector_operand (op0, mode0);
20487 if (VECTOR_MODE_P (mode1))
20488 op1 = safe_vector_operand (op1, mode1);
20489
20490 target = gen_reg_rtx (SImode);
20491 emit_move_insn (target, const0_rtx);
20492 target = gen_rtx_SUBREG (QImode, target, 0);
20493
20494 if ((optimize && !register_operand (op0, mode0))
20495 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20496 op0 = copy_to_mode_reg (mode0, op0);
20497 if ((optimize && !register_operand (op1, mode1))
20498 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20499 op1 = copy_to_mode_reg (mode1, op1);
20500
20501 pat = GEN_FCN (d->icode) (op0, op1);
20502 if (! pat)
20503 return 0;
20504 emit_insn (pat);
20505 emit_insn (gen_rtx_SET (VOIDmode,
20506 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20507 gen_rtx_fmt_ee (comparison, QImode,
20508 SET_DEST (pat),
20509 const0_rtx)));
20510
20511 return SUBREG_REG (target);
20512}
20513
06f4e35d
L
20514/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20515
20516static rtx
20517ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20518 tree exp, rtx target)
20519{
20520 rtx pat;
20521 tree arg0 = CALL_EXPR_ARG (exp, 0);
20522 tree arg1 = CALL_EXPR_ARG (exp, 1);
20523 tree arg2 = CALL_EXPR_ARG (exp, 2);
20524 tree arg3 = CALL_EXPR_ARG (exp, 3);
20525 tree arg4 = CALL_EXPR_ARG (exp, 4);
20526 rtx scratch0, scratch1;
20527 rtx op0 = expand_normal (arg0);
20528 rtx op1 = expand_normal (arg1);
20529 rtx op2 = expand_normal (arg2);
20530 rtx op3 = expand_normal (arg3);
20531 rtx op4 = expand_normal (arg4);
20532 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20533
20534 tmode0 = insn_data[d->icode].operand[0].mode;
20535 tmode1 = insn_data[d->icode].operand[1].mode;
20536 modev2 = insn_data[d->icode].operand[2].mode;
20537 modei3 = insn_data[d->icode].operand[3].mode;
20538 modev4 = insn_data[d->icode].operand[4].mode;
20539 modei5 = insn_data[d->icode].operand[5].mode;
20540 modeimm = insn_data[d->icode].operand[6].mode;
20541
20542 if (VECTOR_MODE_P (modev2))
20543 op0 = safe_vector_operand (op0, modev2);
20544 if (VECTOR_MODE_P (modev4))
20545 op2 = safe_vector_operand (op2, modev4);
20546
c7a69424 20547 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
06f4e35d 20548 op0 = copy_to_mode_reg (modev2, op0);
c7a69424 20549 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
06f4e35d
L
20550 op1 = copy_to_mode_reg (modei3, op1);
20551 if ((optimize && !register_operand (op2, modev4))
20552 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20553 op2 = copy_to_mode_reg (modev4, op2);
c7a69424 20554 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
06f4e35d
L
20555 op3 = copy_to_mode_reg (modei5, op3);
20556
20557 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20558 {
20559 error ("the fifth argument must be a 8-bit immediate");
20560 return const0_rtx;
20561 }
20562
20563 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20564 {
20565 if (optimize || !target
20566 || GET_MODE (target) != tmode0
20567 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20568 target = gen_reg_rtx (tmode0);
20569
20570 scratch1 = gen_reg_rtx (tmode1);
20571
20572 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20573 }
20574 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20575 {
20576 if (optimize || !target
20577 || GET_MODE (target) != tmode1
20578 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20579 target = gen_reg_rtx (tmode1);
20580
20581 scratch0 = gen_reg_rtx (tmode0);
20582
20583 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20584 }
20585 else
20586 {
20587 gcc_assert (d->flag);
20588
20589 scratch0 = gen_reg_rtx (tmode0);
20590 scratch1 = gen_reg_rtx (tmode1);
20591
20592 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20593 }
20594
20595 if (! pat)
20596 return 0;
20597
20598 emit_insn (pat);
20599
20600 if (d->flag)
20601 {
20602 target = gen_reg_rtx (SImode);
20603 emit_move_insn (target, const0_rtx);
20604 target = gen_rtx_SUBREG (QImode, target, 0);
20605
20606 emit_insn
20607 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20608 gen_rtx_fmt_ee (EQ, QImode,
9415ab7d
TN
20609 gen_rtx_REG ((enum machine_mode) d->flag,
20610 FLAGS_REG),
06f4e35d
L
20611 const0_rtx)));
20612 return SUBREG_REG (target);
20613 }
20614 else
20615 return target;
20616}
20617
20618
20619/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20620
20621static rtx
20622ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20623 tree exp, rtx target)
20624{
20625 rtx pat;
20626 tree arg0 = CALL_EXPR_ARG (exp, 0);
20627 tree arg1 = CALL_EXPR_ARG (exp, 1);
20628 tree arg2 = CALL_EXPR_ARG (exp, 2);
20629 rtx scratch0, scratch1;
20630 rtx op0 = expand_normal (arg0);
20631 rtx op1 = expand_normal (arg1);
20632 rtx op2 = expand_normal (arg2);
20633 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20634
20635 tmode0 = insn_data[d->icode].operand[0].mode;
20636 tmode1 = insn_data[d->icode].operand[1].mode;
20637 modev2 = insn_data[d->icode].operand[2].mode;
20638 modev3 = insn_data[d->icode].operand[3].mode;
20639 modeimm = insn_data[d->icode].operand[4].mode;
20640
20641 if (VECTOR_MODE_P (modev2))
20642 op0 = safe_vector_operand (op0, modev2);
20643 if (VECTOR_MODE_P (modev3))
20644 op1 = safe_vector_operand (op1, modev3);
20645
c7a69424 20646 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
06f4e35d
L
20647 op0 = copy_to_mode_reg (modev2, op0);
20648 if ((optimize && !register_operand (op1, modev3))
20649 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20650 op1 = copy_to_mode_reg (modev3, op1);
20651
20652 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20653 {
20654 error ("the third argument must be a 8-bit immediate");
20655 return const0_rtx;
20656 }
20657
20658 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20659 {
20660 if (optimize || !target
20661 || GET_MODE (target) != tmode0
20662 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20663 target = gen_reg_rtx (tmode0);
20664
20665 scratch1 = gen_reg_rtx (tmode1);
20666
20667 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20668 }
20669 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20670 {
20671 if (optimize || !target
20672 || GET_MODE (target) != tmode1
20673 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20674 target = gen_reg_rtx (tmode1);
20675
20676 scratch0 = gen_reg_rtx (tmode0);
20677
20678 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20679 }
20680 else
20681 {
20682 gcc_assert (d->flag);
20683
20684 scratch0 = gen_reg_rtx (tmode0);
20685 scratch1 = gen_reg_rtx (tmode1);
20686
20687 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20688 }
20689
20690 if (! pat)
20691 return 0;
20692
20693 emit_insn (pat);
20694
20695 if (d->flag)
20696 {
20697 target = gen_reg_rtx (SImode);
20698 emit_move_insn (target, const0_rtx);
20699 target = gen_rtx_SUBREG (QImode, target, 0);
20700
20701 emit_insn
20702 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20703 gen_rtx_fmt_ee (EQ, QImode,
9415ab7d
TN
20704 gen_rtx_REG ((enum machine_mode) d->flag,
20705 FLAGS_REG),
06f4e35d
L
20706 const0_rtx)));
20707 return SUBREG_REG (target);
20708 }
20709 else
20710 return target;
20711}
20712
eb701deb
RH
20713/* Return the integer constant in ARG. Constrain it to be in the range
20714 of the subparts of VEC_TYPE; issue an error if not. */
20715
20716static int
20717get_element_number (tree vec_type, tree arg)
20718{
20719 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20720
20721 if (!host_integerp (arg, 1)
20722 || (elt = tree_low_cst (arg, 1), elt > max))
20723 {
ea40ba9c 20724 error ("selector must be an integer constant in the range 0..%wi", max);
eb701deb
RH
20725 return 0;
20726 }
20727
20728 return elt;
20729}
20730
20731/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20732 ix86_expand_vector_init. We DO have language-level syntax for this, in
20733 the form of (type){ init-list }. Except that since we can't place emms
20734 instructions from inside the compiler, we can't allow the use of MMX
20735 registers unless the user explicitly asks for it. So we do *not* define
20736 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
5656a184 20737 we have builtins invoked by mmintrin.h that gives us license to emit
eb701deb
RH
20738 these sorts of instructions. */
20739
20740static rtx
5039610b 20741ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
eb701deb
RH
20742{
20743 enum machine_mode tmode = TYPE_MODE (type);
20744 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20745 int i, n_elt = GET_MODE_NUNITS (tmode);
20746 rtvec v = rtvec_alloc (n_elt);
20747
20748 gcc_assert (VECTOR_MODE_P (tmode));
5039610b 20749 gcc_assert (call_expr_nargs (exp) == n_elt);
eb701deb 20750
5039610b 20751 for (i = 0; i < n_elt; ++i)
eb701deb 20752 {
5039610b 20753 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
eb701deb
RH
20754 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20755 }
20756
eb701deb
RH
20757 if (!target || !register_operand (target, tmode))
20758 target = gen_reg_rtx (tmode);
20759
20760 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20761 return target;
20762}
20763
20764/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20765 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20766 had a language-level syntax for referencing vector elements. */
20767
20768static rtx
5039610b 20769ix86_expand_vec_ext_builtin (tree exp, rtx target)
eb701deb
RH
20770{
20771 enum machine_mode tmode, mode0;
20772 tree arg0, arg1;
20773 int elt;
20774 rtx op0;
20775
5039610b
SL
20776 arg0 = CALL_EXPR_ARG (exp, 0);
20777 arg1 = CALL_EXPR_ARG (exp, 1);
eb701deb 20778
84217346 20779 op0 = expand_normal (arg0);
eb701deb
RH
20780 elt = get_element_number (TREE_TYPE (arg0), arg1);
20781
20782 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20783 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20784 gcc_assert (VECTOR_MODE_P (mode0));
20785
20786 op0 = force_reg (mode0, op0);
20787
20788 if (optimize || !target || !register_operand (target, tmode))
20789 target = gen_reg_rtx (tmode);
20790
20791 ix86_expand_vector_extract (true, target, op0, elt);
20792
20793 return target;
20794}
20795
20796/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20797 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20798 a language-level syntax for referencing vector elements. */
20799
20800static rtx
5039610b 20801ix86_expand_vec_set_builtin (tree exp)
eb701deb
RH
20802{
20803 enum machine_mode tmode, mode1;
20804 tree arg0, arg1, arg2;
20805 int elt;
7bb4a6be 20806 rtx op0, op1, target;
eb701deb 20807
5039610b
SL
20808 arg0 = CALL_EXPR_ARG (exp, 0);
20809 arg1 = CALL_EXPR_ARG (exp, 1);
20810 arg2 = CALL_EXPR_ARG (exp, 2);
eb701deb
RH
20811
20812 tmode = TYPE_MODE (TREE_TYPE (arg0));
20813 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20814 gcc_assert (VECTOR_MODE_P (tmode));
20815
9415ab7d
TN
20816 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20817 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
eb701deb
RH
20818 elt = get_element_number (TREE_TYPE (arg0), arg2);
20819
20820 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20821 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20822
20823 op0 = force_reg (tmode, op0);
20824 op1 = force_reg (mode1, op1);
20825
7bb4a6be 20826 /* OP0 is the source of these builtin functions and shouldn't be
9cb116cb 20827 modified. Create a copy, use it and return it as target. */
7bb4a6be
L
20828 target = gen_reg_rtx (tmode);
20829 emit_move_insn (target, op0);
20830 ix86_expand_vector_set (true, target, op1, elt);
eb701deb 20831
7bb4a6be 20832 return target;
eb701deb
RH
20833}
20834
bd793c65
BS
20835/* Expand an expression EXP that calls a built-in function,
20836 with result going to TARGET if that's convenient
20837 (and in mode MODE if that's convenient).
20838 SUBTARGET may be used as the target for computing one of EXP's operands.
20839 IGNORE is nonzero if the value is to be ignored. */
20840
eb701deb 20841static rtx
b96a374d
AJ
20842ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20843 enum machine_mode mode ATTRIBUTE_UNUSED,
20844 int ignore ATTRIBUTE_UNUSED)
bd793c65 20845{
8b60264b 20846 const struct builtin_description *d;
77ebd435 20847 size_t i;
bd793c65 20848 enum insn_code icode;
5039610b 20849 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21efb4d4
HJ
20850 tree arg0, arg1, arg2, arg3;
20851 rtx op0, op1, op2, op3, pat;
20852 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
8752c357 20853 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
20854
20855 switch (fcode)
20856 {
20857 case IX86_BUILTIN_EMMS:
80e8bb90 20858 emit_insn (gen_mmx_emms ());
bd793c65
BS
20859 return 0;
20860
20861 case IX86_BUILTIN_SFENCE:
80e8bb90 20862 emit_insn (gen_sse_sfence ());
bd793c65
BS
20863 return 0;
20864
bd793c65 20865 case IX86_BUILTIN_MASKMOVQ:
077084dd 20866 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d 20867 icode = (fcode == IX86_BUILTIN_MASKMOVQ
80e8bb90 20868 ? CODE_FOR_mmx_maskmovq
ef719a44 20869 : CODE_FOR_sse2_maskmovdqu);
bd793c65 20870 /* Note the arg order is different from the operand order. */
5039610b
SL
20871 arg1 = CALL_EXPR_ARG (exp, 0);
20872 arg2 = CALL_EXPR_ARG (exp, 1);
20873 arg0 = CALL_EXPR_ARG (exp, 2);
84217346
MD
20874 op0 = expand_normal (arg0);
20875 op1 = expand_normal (arg1);
20876 op2 = expand_normal (arg2);
bd793c65
BS
20877 mode0 = insn_data[icode].operand[0].mode;
20878 mode1 = insn_data[icode].operand[1].mode;
20879 mode2 = insn_data[icode].operand[2].mode;
20880
80e8bb90
RH
20881 op0 = force_reg (Pmode, op0);
20882 op0 = gen_rtx_MEM (mode1, op0);
ef719a44 20883
5c464583 20884 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
20885 op0 = copy_to_mode_reg (mode0, op0);
20886 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20887 op1 = copy_to_mode_reg (mode1, op1);
20888 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20889 op2 = copy_to_mode_reg (mode2, op2);
20890 pat = GEN_FCN (icode) (op0, op1, op2);
20891 if (! pat)
20892 return 0;
20893 emit_insn (pat);
20894 return 0;
20895
6b889d89
UB
20896 case IX86_BUILTIN_RSQRTF:
20897 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20898
bd793c65 20899 case IX86_BUILTIN_SQRTSS:
5039610b 20900 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
bd793c65 20901 case IX86_BUILTIN_RSQRTSS:
5039610b 20902 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
bd793c65 20903 case IX86_BUILTIN_RCPSS:
5039610b 20904 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
bd793c65 20905
bd793c65 20906 case IX86_BUILTIN_LOADUPS:
5039610b 20907 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
bd793c65 20908
bd793c65 20909 case IX86_BUILTIN_STOREUPS:
5039610b 20910 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
bd793c65 20911
0f290768 20912 case IX86_BUILTIN_LOADHPS:
bd793c65 20913 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
20914 case IX86_BUILTIN_LOADHPD:
20915 case IX86_BUILTIN_LOADLPD:
2cdb3148
RH
20916 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20917 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
1c47af84
RH
20918 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20919 : CODE_FOR_sse2_loadlpd);
5039610b
SL
20920 arg0 = CALL_EXPR_ARG (exp, 0);
20921 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20922 op0 = expand_normal (arg0);
20923 op1 = expand_normal (arg1);
bd793c65
BS
20924 tmode = insn_data[icode].operand[0].mode;
20925 mode0 = insn_data[icode].operand[1].mode;
20926 mode1 = insn_data[icode].operand[2].mode;
20927
e358acde 20928 op0 = force_reg (mode0, op0);
bd793c65 20929 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
e358acde 20930 if (optimize || target == 0
bd793c65 20931 || GET_MODE (target) != tmode
e358acde 20932 || !register_operand (target, tmode))
bd793c65
BS
20933 target = gen_reg_rtx (tmode);
20934 pat = GEN_FCN (icode) (target, op0, op1);
20935 if (! pat)
20936 return 0;
20937 emit_insn (pat);
20938 return target;
0f290768 20939
bd793c65
BS
20940 case IX86_BUILTIN_STOREHPS:
20941 case IX86_BUILTIN_STORELPS:
2cdb3148 20942 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
eb701deb 20943 : CODE_FOR_sse_storelps);
5039610b
SL
20944 arg0 = CALL_EXPR_ARG (exp, 0);
20945 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20946 op0 = expand_normal (arg0);
20947 op1 = expand_normal (arg1);
1c47af84
RH
20948 mode0 = insn_data[icode].operand[0].mode;
20949 mode1 = insn_data[icode].operand[1].mode;
20950
20951 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
e358acde 20952 op1 = force_reg (mode1, op1);
1c47af84
RH
20953
20954 pat = GEN_FCN (icode) (op0, op1);
20955 if (! pat)
20956 return 0;
20957 emit_insn (pat);
20958 return const0_rtx;
bd793c65
BS
20959
20960 case IX86_BUILTIN_MOVNTPS:
5039610b 20961 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
bd793c65 20962 case IX86_BUILTIN_MOVNTQ:
5039610b 20963 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
bd793c65
BS
20964
20965 case IX86_BUILTIN_LDMXCSR:
5039610b 20966 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
80dcd3aa 20967 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
bd793c65 20968 emit_move_insn (target, op0);
80e8bb90 20969 emit_insn (gen_sse_ldmxcsr (target));
bd793c65
BS
20970 return 0;
20971
20972 case IX86_BUILTIN_STMXCSR:
80dcd3aa 20973 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
80e8bb90 20974 emit_insn (gen_sse_stmxcsr (target));
bd793c65
BS
20975 return copy_to_mode_reg (SImode, target);
20976
bd793c65 20977 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
20978 case IX86_BUILTIN_PSHUFD:
20979 case IX86_BUILTIN_PSHUFHW:
20980 case IX86_BUILTIN_PSHUFLW:
20981 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20982 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20983 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20984 : CODE_FOR_mmx_pshufw);
5039610b
SL
20985 arg0 = CALL_EXPR_ARG (exp, 0);
20986 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
20987 op0 = expand_normal (arg0);
20988 op1 = expand_normal (arg1);
bd793c65 20989 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
20990 mode1 = insn_data[icode].operand[1].mode;
20991 mode2 = insn_data[icode].operand[2].mode;
bd793c65 20992
29628f27
BS
20993 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20994 op0 = copy_to_mode_reg (mode1, op0);
20995 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
20996 {
20997 /* @@@ better error message */
20998 error ("mask must be an immediate");
20999 return const0_rtx;
21000 }
21001 if (target == 0
21002 || GET_MODE (target) != tmode
21003 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21004 target = gen_reg_rtx (tmode);
29628f27 21005 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
21006 if (! pat)
21007 return 0;
21008 emit_insn (pat);
21009 return target;
21010
10a97ae6
UB
21011 case IX86_BUILTIN_PSLLW:
21012 case IX86_BUILTIN_PSLLWI:
21013 icode = CODE_FOR_mmx_ashlv4hi3;
21014 goto do_pshift;
21015 case IX86_BUILTIN_PSLLD:
21016 case IX86_BUILTIN_PSLLDI:
21017 icode = CODE_FOR_mmx_ashlv2si3;
21018 goto do_pshift;
21019 case IX86_BUILTIN_PSLLQ:
21020 case IX86_BUILTIN_PSLLQI:
21021 icode = CODE_FOR_mmx_ashlv1di3;
21022 goto do_pshift;
21023 case IX86_BUILTIN_PSRAW:
21024 case IX86_BUILTIN_PSRAWI:
21025 icode = CODE_FOR_mmx_ashrv4hi3;
21026 goto do_pshift;
21027 case IX86_BUILTIN_PSRAD:
21028 case IX86_BUILTIN_PSRADI:
21029 icode = CODE_FOR_mmx_ashrv2si3;
21030 goto do_pshift;
21031 case IX86_BUILTIN_PSRLW:
21032 case IX86_BUILTIN_PSRLWI:
21033 icode = CODE_FOR_mmx_lshrv4hi3;
21034 goto do_pshift;
21035 case IX86_BUILTIN_PSRLD:
21036 case IX86_BUILTIN_PSRLDI:
21037 icode = CODE_FOR_mmx_lshrv2si3;
21038 goto do_pshift;
21039 case IX86_BUILTIN_PSRLQ:
21040 case IX86_BUILTIN_PSRLQI:
21041 icode = CODE_FOR_mmx_lshrv1di3;
21042 goto do_pshift;
21043
24bfafbc 21044 case IX86_BUILTIN_PSLLW128:
2290121c 21045 case IX86_BUILTIN_PSLLWI128:
24bfafbc
RH
21046 icode = CODE_FOR_ashlv8hi3;
21047 goto do_pshift;
21048 case IX86_BUILTIN_PSLLD128:
2290121c 21049 case IX86_BUILTIN_PSLLDI128:
24bfafbc
RH
21050 icode = CODE_FOR_ashlv4si3;
21051 goto do_pshift;
21052 case IX86_BUILTIN_PSLLQ128:
2290121c 21053 case IX86_BUILTIN_PSLLQI128:
24bfafbc
RH
21054 icode = CODE_FOR_ashlv2di3;
21055 goto do_pshift;
21056 case IX86_BUILTIN_PSRAW128:
2290121c 21057 case IX86_BUILTIN_PSRAWI128:
24bfafbc
RH
21058 icode = CODE_FOR_ashrv8hi3;
21059 goto do_pshift;
21060 case IX86_BUILTIN_PSRAD128:
2290121c 21061 case IX86_BUILTIN_PSRADI128:
24bfafbc
RH
21062 icode = CODE_FOR_ashrv4si3;
21063 goto do_pshift;
21064 case IX86_BUILTIN_PSRLW128:
2290121c 21065 case IX86_BUILTIN_PSRLWI128:
24bfafbc
RH
21066 icode = CODE_FOR_lshrv8hi3;
21067 goto do_pshift;
21068 case IX86_BUILTIN_PSRLD128:
2290121c 21069 case IX86_BUILTIN_PSRLDI128:
24bfafbc
RH
21070 icode = CODE_FOR_lshrv4si3;
21071 goto do_pshift;
21072 case IX86_BUILTIN_PSRLQ128:
2290121c 21073 case IX86_BUILTIN_PSRLQI128:
24bfafbc 21074 icode = CODE_FOR_lshrv2di3;
2290121c 21075
24bfafbc
RH
21076 do_pshift:
21077 arg0 = CALL_EXPR_ARG (exp, 0);
21078 arg1 = CALL_EXPR_ARG (exp, 1);
21079 op0 = expand_normal (arg0);
21080 op1 = expand_normal (arg1);
21081
21082 tmode = insn_data[icode].operand[0].mode;
21083 mode1 = insn_data[icode].operand[1].mode;
21084
21085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21086 op0 = copy_to_reg (op0);
21087
2290121c
UB
21088 if (!CONST_INT_P (op1))
21089 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
21090
f8568ac0 21091 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
24bfafbc
RH
21092 op1 = copy_to_reg (op1);
21093
21094 target = gen_reg_rtx (tmode);
21095 pat = GEN_FCN (icode) (target, op0, op1);
21096 if (!pat)
21097 return 0;
21098 emit_insn (pat);
21099 return target;
21100
ab3146fd 21101 case IX86_BUILTIN_PSLLDQI128:
8b96a312
L
21102 return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3,
21103 exp, target);
21104 break;
21105
ab3146fd 21106 case IX86_BUILTIN_PSRLDQI128:
8b96a312
L
21107 return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3,
21108 exp, target);
21109 break;
ab3146fd 21110
47f339cf 21111 case IX86_BUILTIN_FEMMS:
80e8bb90 21112 emit_insn (gen_mmx_femms ());
47f339cf
BS
21113 return NULL_RTX;
21114
21115 case IX86_BUILTIN_PAVGUSB:
5039610b 21116 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
47f339cf
BS
21117
21118 case IX86_BUILTIN_PF2ID:
5039610b 21119 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
47f339cf
BS
21120
21121 case IX86_BUILTIN_PFACC:
5039610b 21122 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
47f339cf
BS
21123
21124 case IX86_BUILTIN_PFADD:
5039610b 21125 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
47f339cf
BS
21126
21127 case IX86_BUILTIN_PFCMPEQ:
5039610b 21128 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
47f339cf
BS
21129
21130 case IX86_BUILTIN_PFCMPGE:
5039610b 21131 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
47f339cf
BS
21132
21133 case IX86_BUILTIN_PFCMPGT:
5039610b 21134 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
47f339cf
BS
21135
21136 case IX86_BUILTIN_PFMAX:
5039610b 21137 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
47f339cf
BS
21138
21139 case IX86_BUILTIN_PFMIN:
5039610b 21140 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
47f339cf
BS
21141
21142 case IX86_BUILTIN_PFMUL:
5039610b 21143 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
47f339cf
BS
21144
21145 case IX86_BUILTIN_PFRCP:
5039610b 21146 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
47f339cf
BS
21147
21148 case IX86_BUILTIN_PFRCPIT1:
5039610b 21149 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
47f339cf
BS
21150
21151 case IX86_BUILTIN_PFRCPIT2:
5039610b 21152 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
47f339cf
BS
21153
21154 case IX86_BUILTIN_PFRSQIT1:
5039610b 21155 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
47f339cf
BS
21156
21157 case IX86_BUILTIN_PFRSQRT:
5039610b 21158 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
47f339cf
BS
21159
21160 case IX86_BUILTIN_PFSUB:
5039610b 21161 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
47f339cf
BS
21162
21163 case IX86_BUILTIN_PFSUBR:
5039610b 21164 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
47f339cf
BS
21165
21166 case IX86_BUILTIN_PI2FD:
5039610b 21167 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
47f339cf
BS
21168
21169 case IX86_BUILTIN_PMULHRW:
5039610b 21170 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
47f339cf 21171
47f339cf 21172 case IX86_BUILTIN_PF2IW:
5039610b 21173 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
47f339cf
BS
21174
21175 case IX86_BUILTIN_PFNACC:
5039610b 21176 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
47f339cf
BS
21177
21178 case IX86_BUILTIN_PFPNACC:
5039610b 21179 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
47f339cf
BS
21180
21181 case IX86_BUILTIN_PI2FW:
5039610b 21182 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
47f339cf
BS
21183
21184 case IX86_BUILTIN_PSWAPDSI:
5039610b 21185 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
47f339cf
BS
21186
21187 case IX86_BUILTIN_PSWAPDSF:
5039610b 21188 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
47f339cf 21189
fbe5eb6d 21190 case IX86_BUILTIN_SQRTSD:
5039610b 21191 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
fbe5eb6d 21192 case IX86_BUILTIN_LOADUPD:
5039610b 21193 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
fbe5eb6d 21194 case IX86_BUILTIN_STOREUPD:
5039610b 21195 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
fbe5eb6d 21196
fbe5eb6d
BS
21197 case IX86_BUILTIN_MFENCE:
21198 emit_insn (gen_sse2_mfence ());
21199 return 0;
21200 case IX86_BUILTIN_LFENCE:
21201 emit_insn (gen_sse2_lfence ());
21202 return 0;
21203
21204 case IX86_BUILTIN_CLFLUSH:
5039610b 21205 arg0 = CALL_EXPR_ARG (exp, 0);
84217346 21206 op0 = expand_normal (arg0);
fbe5eb6d 21207 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
21208 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21209 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
21210
21211 emit_insn (gen_sse2_clflush (op0));
21212 return 0;
21213
21214 case IX86_BUILTIN_MOVNTPD:
5039610b 21215 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
fbe5eb6d 21216 case IX86_BUILTIN_MOVNTDQ:
5039610b 21217 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
fbe5eb6d 21218 case IX86_BUILTIN_MOVNTI:
5039610b 21219 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
fbe5eb6d 21220
f02e1358 21221 case IX86_BUILTIN_LOADDQU:
5039610b 21222 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
f02e1358 21223 case IX86_BUILTIN_STOREDQU:
5039610b 21224 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
f02e1358 21225
22c7c85e 21226 case IX86_BUILTIN_MONITOR:
5039610b
SL
21227 arg0 = CALL_EXPR_ARG (exp, 0);
21228 arg1 = CALL_EXPR_ARG (exp, 1);
21229 arg2 = CALL_EXPR_ARG (exp, 2);
84217346
MD
21230 op0 = expand_normal (arg0);
21231 op1 = expand_normal (arg1);
21232 op2 = expand_normal (arg2);
22c7c85e 21233 if (!REG_P (op0))
546be535 21234 op0 = copy_to_mode_reg (Pmode, op0);
22c7c85e
L
21235 if (!REG_P (op1))
21236 op1 = copy_to_mode_reg (SImode, op1);
21237 if (!REG_P (op2))
21238 op2 = copy_to_mode_reg (SImode, op2);
546be535
KH
21239 if (!TARGET_64BIT)
21240 emit_insn (gen_sse3_monitor (op0, op1, op2));
21241 else
21242 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
22c7c85e
L
21243 return 0;
21244
21245 case IX86_BUILTIN_MWAIT:
5039610b
SL
21246 arg0 = CALL_EXPR_ARG (exp, 0);
21247 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
21248 op0 = expand_normal (arg0);
21249 op1 = expand_normal (arg1);
22c7c85e
L
21250 if (!REG_P (op0))
21251 op0 = copy_to_mode_reg (SImode, op0);
21252 if (!REG_P (op1))
21253 op1 = copy_to_mode_reg (SImode, op1);
ef719a44 21254 emit_insn (gen_sse3_mwait (op0, op1));
22c7c85e
L
21255 return 0;
21256
22c7c85e 21257 case IX86_BUILTIN_LDDQU:
5039610b 21258 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
eb701deb
RH
21259 target, 1);
21260
b1875f52
L
21261 case IX86_BUILTIN_PALIGNR:
21262 case IX86_BUILTIN_PALIGNR128:
21263 if (fcode == IX86_BUILTIN_PALIGNR)
21264 {
21265 icode = CODE_FOR_ssse3_palignrdi;
21266 mode = DImode;
21267 }
21268 else
21269 {
21270 icode = CODE_FOR_ssse3_palignrti;
21271 mode = V2DImode;
21272 }
5039610b
SL
21273 arg0 = CALL_EXPR_ARG (exp, 0);
21274 arg1 = CALL_EXPR_ARG (exp, 1);
21275 arg2 = CALL_EXPR_ARG (exp, 2);
9415ab7d
TN
21276 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21277 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21278 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
b1875f52
L
21279 tmode = insn_data[icode].operand[0].mode;
21280 mode1 = insn_data[icode].operand[1].mode;
21281 mode2 = insn_data[icode].operand[2].mode;
21282 mode3 = insn_data[icode].operand[3].mode;
21283
21284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21285 {
21286 op0 = copy_to_reg (op0);
21287 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21288 }
21289 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21290 {
21291 op1 = copy_to_reg (op1);
21292 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21293 }
21294 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21295 {
21296 error ("shift must be an immediate");
21297 return const0_rtx;
21298 }
21299 target = gen_reg_rtx (mode);
21300 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21301 op0, op1, op2);
21302 if (! pat)
21303 return 0;
21304 emit_insn (pat);
21efb4d4
HJ
21305 return target;
21306
9a5cee02
L
21307 case IX86_BUILTIN_MOVNTDQA:
21308 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21309 target, 1);
21310
21efb4d4 21311 case IX86_BUILTIN_MOVNTSD:
5039610b 21312 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21efb4d4
HJ
21313
21314 case IX86_BUILTIN_MOVNTSS:
5039610b 21315 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21efb4d4
HJ
21316
21317 case IX86_BUILTIN_INSERTQ:
21318 case IX86_BUILTIN_EXTRQ:
21319 icode = (fcode == IX86_BUILTIN_EXTRQ
21320 ? CODE_FOR_sse4a_extrq
21321 : CODE_FOR_sse4a_insertq);
5039610b
SL
21322 arg0 = CALL_EXPR_ARG (exp, 0);
21323 arg1 = CALL_EXPR_ARG (exp, 1);
21efb4d4
HJ
21324 op0 = expand_normal (arg0);
21325 op1 = expand_normal (arg1);
21326 tmode = insn_data[icode].operand[0].mode;
21327 mode1 = insn_data[icode].operand[1].mode;
21328 mode2 = insn_data[icode].operand[2].mode;
21329 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21330 op0 = copy_to_mode_reg (mode1, op0);
21331 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21332 op1 = copy_to_mode_reg (mode2, op1);
21333 if (optimize || target == 0
21334 || GET_MODE (target) != tmode
21335 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21336 target = gen_reg_rtx (tmode);
21337 pat = GEN_FCN (icode) (target, op0, op1);
21338 if (! pat)
21339 return NULL_RTX;
21340 emit_insn (pat);
21341 return target;
21342
21343 case IX86_BUILTIN_EXTRQI:
21344 icode = CODE_FOR_sse4a_extrqi;
5039610b
SL
21345 arg0 = CALL_EXPR_ARG (exp, 0);
21346 arg1 = CALL_EXPR_ARG (exp, 1);
21347 arg2 = CALL_EXPR_ARG (exp, 2);
21efb4d4
HJ
21348 op0 = expand_normal (arg0);
21349 op1 = expand_normal (arg1);
21350 op2 = expand_normal (arg2);
21351 tmode = insn_data[icode].operand[0].mode;
21352 mode1 = insn_data[icode].operand[1].mode;
21353 mode2 = insn_data[icode].operand[2].mode;
21354 mode3 = insn_data[icode].operand[3].mode;
21355 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21356 op0 = copy_to_mode_reg (mode1, op0);
21357 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21358 {
21359 error ("index mask must be an immediate");
21360 return gen_reg_rtx (tmode);
21361 }
21362 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21363 {
21364 error ("length mask must be an immediate");
21365 return gen_reg_rtx (tmode);
21366 }
21367 if (optimize || target == 0
21368 || GET_MODE (target) != tmode
21369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21370 target = gen_reg_rtx (tmode);
21371 pat = GEN_FCN (icode) (target, op0, op1, op2);
21372 if (! pat)
21373 return NULL_RTX;
21374 emit_insn (pat);
21375 return target;
21376
21377 case IX86_BUILTIN_INSERTQI:
21378 icode = CODE_FOR_sse4a_insertqi;
5039610b
SL
21379 arg0 = CALL_EXPR_ARG (exp, 0);
21380 arg1 = CALL_EXPR_ARG (exp, 1);
21381 arg2 = CALL_EXPR_ARG (exp, 2);
21382 arg3 = CALL_EXPR_ARG (exp, 3);
21efb4d4
HJ
21383 op0 = expand_normal (arg0);
21384 op1 = expand_normal (arg1);
21385 op2 = expand_normal (arg2);
21386 op3 = expand_normal (arg3);
21387 tmode = insn_data[icode].operand[0].mode;
21388 mode1 = insn_data[icode].operand[1].mode;
21389 mode2 = insn_data[icode].operand[2].mode;
21390 mode3 = insn_data[icode].operand[3].mode;
21391 mode4 = insn_data[icode].operand[4].mode;
21392
21393 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21394 op0 = copy_to_mode_reg (mode1, op0);
21395
21396 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21397 op1 = copy_to_mode_reg (mode2, op1);
21398
21399 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21400 {
21401 error ("index mask must be an immediate");
21402 return gen_reg_rtx (tmode);
21403 }
21404 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21405 {
21406 error ("length mask must be an immediate");
21407 return gen_reg_rtx (tmode);
21408 }
21409 if (optimize || target == 0
21410 || GET_MODE (target) != tmode
21411 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21412 target = gen_reg_rtx (tmode);
21413 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21414 if (! pat)
21415 return NULL_RTX;
21416 emit_insn (pat);
b1875f52
L
21417 return target;
21418
eb701deb
RH
21419 case IX86_BUILTIN_VEC_INIT_V2SI:
21420 case IX86_BUILTIN_VEC_INIT_V4HI:
21421 case IX86_BUILTIN_VEC_INIT_V8QI:
5039610b 21422 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
eb701deb
RH
21423
21424 case IX86_BUILTIN_VEC_EXT_V2DF:
21425 case IX86_BUILTIN_VEC_EXT_V2DI:
21426 case IX86_BUILTIN_VEC_EXT_V4SF:
ed9b5396 21427 case IX86_BUILTIN_VEC_EXT_V4SI:
eb701deb 21428 case IX86_BUILTIN_VEC_EXT_V8HI:
0f2698d0 21429 case IX86_BUILTIN_VEC_EXT_V2SI:
eb701deb 21430 case IX86_BUILTIN_VEC_EXT_V4HI:
9a5cee02 21431 case IX86_BUILTIN_VEC_EXT_V16QI:
5039610b 21432 return ix86_expand_vec_ext_builtin (exp, target);
eb701deb 21433
9a5cee02
L
21434 case IX86_BUILTIN_VEC_SET_V2DI:
21435 case IX86_BUILTIN_VEC_SET_V4SF:
21436 case IX86_BUILTIN_VEC_SET_V4SI:
eb701deb
RH
21437 case IX86_BUILTIN_VEC_SET_V8HI:
21438 case IX86_BUILTIN_VEC_SET_V4HI:
9a5cee02 21439 case IX86_BUILTIN_VEC_SET_V16QI:
5039610b 21440 return ix86_expand_vec_set_builtin (exp);
22c7c85e 21441
edc5bbcd
UB
21442 case IX86_BUILTIN_INFQ:
21443 {
21444 REAL_VALUE_TYPE inf;
21445 rtx tmp;
21446
21447 real_inf (&inf);
21448 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21449
21450 tmp = validize_mem (force_const_mem (mode, tmp));
21451
21452 if (target == 0)
21453 target = gen_reg_rtx (mode);
21454
21455 emit_move_insn (target, tmp);
21456 return target;
21457 }
21458
21459 case IX86_BUILTIN_FABSQ:
21460 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21461
21462 case IX86_BUILTIN_COPYSIGNQ:
21463 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21464
bd793c65
BS
21465 default:
21466 break;
21467 }
21468
ec2e9a15
L
21469 for (i = 0, d = bdesc_sse_args;
21470 i < ARRAY_SIZE (bdesc_sse_args);
9a5cee02
L
21471 i++, d++)
21472 if (d->code == fcode)
ec2e9a15
L
21473 {
21474 enum sse_builtin_type type = (enum sse_builtin_type) d->flag;
21475 return ix86_expand_sse_operands_builtin (d->icode, exp,
21476 type, target);
21477 }
9a5cee02 21478
ca7558fc 21479 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
21480 if (d->code == fcode)
21481 {
21482 /* Compares are treated specially. */
ef719a44
RH
21483 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21484 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21485 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21486 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
5039610b 21487 return ix86_expand_sse_compare (d, exp, target);
bd793c65 21488
5039610b 21489 return ix86_expand_binop_builtin (d->icode, exp, target);
bd793c65
BS
21490 }
21491
ca7558fc 21492 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65 21493 if (d->code == fcode)
5039610b 21494 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
0f290768 21495
ca7558fc 21496 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65 21497 if (d->code == fcode)
5039610b 21498 return ix86_expand_sse_comi (d, exp, target);
0f290768 21499
9a5cee02
L
21500 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21501 if (d->code == fcode)
21502 return ix86_expand_sse_ptest (d, exp, target);
21503
3b8dd071
L
21504 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21505 if (d->code == fcode)
21506 return ix86_expand_crc32 (d->icode, exp, target);
21507
06f4e35d
L
21508 for (i = 0, d = bdesc_pcmpestr;
21509 i < ARRAY_SIZE (bdesc_pcmpestr);
21510 i++, d++)
21511 if (d->code == fcode)
21512 return ix86_expand_sse_pcmpestr (d, exp, target);
21513
21514 for (i = 0, d = bdesc_pcmpistr;
21515 i < ARRAY_SIZE (bdesc_pcmpistr);
21516 i++, d++)
21517 if (d->code == fcode)
21518 return ix86_expand_sse_pcmpistr (d, exp, target);
21519
04e1d06b
MM
21520 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21521 if (d->code == fcode)
21522 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21523 (enum multi_arg_type)d->flag,
21524 d->comparison);
21525
ed9b5396 21526 gcc_unreachable ();
bd793c65 21527}
4211a8fb 21528
db3cf6bd
RG
21529/* Returns a function decl for a vectorized version of the builtin function
21530 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21531 if it is not available. */
21532
21533static tree
9415ab7d 21534ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
b95becfc 21535 tree type_in)
db3cf6bd 21536{
b95becfc
RG
21537 enum machine_mode in_mode, out_mode;
21538 int in_n, out_n;
db3cf6bd 21539
b95becfc
RG
21540 if (TREE_CODE (type_out) != VECTOR_TYPE
21541 || TREE_CODE (type_in) != VECTOR_TYPE)
db3cf6bd
RG
21542 return NULL_TREE;
21543
b95becfc
RG
21544 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21545 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21546 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21547 in_n = TYPE_VECTOR_SUBPARTS (type_in);
db3cf6bd
RG
21548
21549 switch (fn)
21550 {
21551 case BUILT_IN_SQRT:
b95becfc
RG
21552 if (out_mode == DFmode && out_n == 2
21553 && in_mode == DFmode && in_n == 2)
db3cf6bd 21554 return ix86_builtins[IX86_BUILTIN_SQRTPD];
a5ea943c 21555 break;
db3cf6bd
RG
21556
21557 case BUILT_IN_SQRTF:
b95becfc
RG
21558 if (out_mode == SFmode && out_n == 4
21559 && in_mode == SFmode && in_n == 4)
3dc0f23a 21560 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
a5ea943c 21561 break;
db3cf6bd 21562
b40c4f68
UB
21563 case BUILT_IN_LRINT:
21564 if (out_mode == SImode && out_n == 4
21565 && in_mode == DFmode && in_n == 2)
21566 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
a5ea943c 21567 break;
b40c4f68 21568
b95becfc
RG
21569 case BUILT_IN_LRINTF:
21570 if (out_mode == SImode && out_n == 4
21571 && in_mode == SFmode && in_n == 4)
21572 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
a5ea943c 21573 break;
b95becfc 21574
db3cf6bd
RG
21575 default:
21576 ;
21577 }
21578
a5ea943c
RG
21579 /* Dispatch to a handler for a vectorization library. */
21580 if (ix86_veclib_handler)
21581 return (*ix86_veclib_handler)(fn, type_out, type_in);
21582
db3cf6bd
RG
21583 return NULL_TREE;
21584}
21585
9aba5d22
UB
21586/* Handler for an SVML-style interface to
21587 a library with vectorized intrinsics. */
21588
21589static tree
21590ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21591{
21592 char name[20];
21593 tree fntype, new_fndecl, args;
21594 unsigned arity;
21595 const char *bname;
21596 enum machine_mode el_mode, in_mode;
21597 int n, in_n;
21598
21599 /* The SVML is suitable for unsafe math only. */
21600 if (!flag_unsafe_math_optimizations)
21601 return NULL_TREE;
21602
21603 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21604 n = TYPE_VECTOR_SUBPARTS (type_out);
21605 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21606 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21607 if (el_mode != in_mode
21608 || n != in_n)
21609 return NULL_TREE;
21610
21611 switch (fn)
21612 {
21613 case BUILT_IN_EXP:
21614 case BUILT_IN_LOG:
21615 case BUILT_IN_LOG10:
21616 case BUILT_IN_POW:
21617 case BUILT_IN_TANH:
21618 case BUILT_IN_TAN:
21619 case BUILT_IN_ATAN:
21620 case BUILT_IN_ATAN2:
21621 case BUILT_IN_ATANH:
21622 case BUILT_IN_CBRT:
21623 case BUILT_IN_SINH:
21624 case BUILT_IN_SIN:
21625 case BUILT_IN_ASINH:
21626 case BUILT_IN_ASIN:
21627 case BUILT_IN_COSH:
21628 case BUILT_IN_COS:
21629 case BUILT_IN_ACOSH:
21630 case BUILT_IN_ACOS:
21631 if (el_mode != DFmode || n != 2)
21632 return NULL_TREE;
21633 break;
21634
21635 case BUILT_IN_EXPF:
21636 case BUILT_IN_LOGF:
21637 case BUILT_IN_LOG10F:
21638 case BUILT_IN_POWF:
21639 case BUILT_IN_TANHF:
21640 case BUILT_IN_TANF:
21641 case BUILT_IN_ATANF:
21642 case BUILT_IN_ATAN2F:
21643 case BUILT_IN_ATANHF:
21644 case BUILT_IN_CBRTF:
21645 case BUILT_IN_SINHF:
21646 case BUILT_IN_SINF:
21647 case BUILT_IN_ASINHF:
21648 case BUILT_IN_ASINF:
21649 case BUILT_IN_COSHF:
21650 case BUILT_IN_COSF:
21651 case BUILT_IN_ACOSHF:
21652 case BUILT_IN_ACOSF:
21653 if (el_mode != SFmode || n != 4)
21654 return NULL_TREE;
21655 break;
21656
21657 default:
21658 return NULL_TREE;
21659 }
21660
21661 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21662
21663 if (fn == BUILT_IN_LOGF)
21664 strcpy (name, "vmlsLn4");
21665 else if (fn == BUILT_IN_LOG)
21666 strcpy (name, "vmldLn2");
21667 else if (n == 4)
21668 {
21669 sprintf (name, "vmls%s", bname+10);
21670 name[strlen (name)-1] = '4';
21671 }
21672 else
21673 sprintf (name, "vmld%s2", bname+10);
21674
21675 /* Convert to uppercase. */
21676 name[4] &= ~0x20;
21677
21678 arity = 0;
21679 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21680 args = TREE_CHAIN (args))
21681 arity++;
21682
21683 if (arity == 1)
21684 fntype = build_function_type_list (type_out, type_in, NULL);
21685 else
21686 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21687
21688 /* Build a function declaration for the vectorized function. */
21689 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21690 TREE_PUBLIC (new_fndecl) = 1;
21691 DECL_EXTERNAL (new_fndecl) = 1;
21692 DECL_IS_NOVOPS (new_fndecl) = 1;
21693 TREE_READONLY (new_fndecl) = 1;
21694
21695 return new_fndecl;
21696}
21697
21698/* Handler for an ACML-style interface to
21699 a library with vectorized intrinsics. */
a5ea943c
RG
21700
21701static tree
21702ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21703{
21704 char name[20] = "__vr.._";
21705 tree fntype, new_fndecl, args;
21706 unsigned arity;
21707 const char *bname;
21708 enum machine_mode el_mode, in_mode;
21709 int n, in_n;
21710
21711 /* The ACML is 64bits only and suitable for unsafe math only as
21712 it does not correctly support parts of IEEE with the required
21713 precision such as denormals. */
21714 if (!TARGET_64BIT
21715 || !flag_unsafe_math_optimizations)
21716 return NULL_TREE;
21717
21718 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21719 n = TYPE_VECTOR_SUBPARTS (type_out);
21720 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21721 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21722 if (el_mode != in_mode
21723 || n != in_n)
21724 return NULL_TREE;
21725
21726 switch (fn)
21727 {
21728 case BUILT_IN_SIN:
21729 case BUILT_IN_COS:
21730 case BUILT_IN_EXP:
21731 case BUILT_IN_LOG:
21732 case BUILT_IN_LOG2:
21733 case BUILT_IN_LOG10:
21734 name[4] = 'd';
21735 name[5] = '2';
21736 if (el_mode != DFmode
21737 || n != 2)
21738 return NULL_TREE;
21739 break;
21740
21741 case BUILT_IN_SINF:
21742 case BUILT_IN_COSF:
21743 case BUILT_IN_EXPF:
21744 case BUILT_IN_POWF:
21745 case BUILT_IN_LOGF:
21746 case BUILT_IN_LOG2F:
21747 case BUILT_IN_LOG10F:
21748 name[4] = 's';
21749 name[5] = '4';
21750 if (el_mode != SFmode
21751 || n != 4)
21752 return NULL_TREE;
21753 break;
4f3f76e6 21754
a5ea943c
RG
21755 default:
21756 return NULL_TREE;
21757 }
21758
21759 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21760 sprintf (name + 7, "%s", bname+10);
21761
21762 arity = 0;
21763 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21764 args = TREE_CHAIN (args))
21765 arity++;
21766
21767 if (arity == 1)
21768 fntype = build_function_type_list (type_out, type_in, NULL);
21769 else
21770 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21771
21772 /* Build a function declaration for the vectorized function. */
21773 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21774 TREE_PUBLIC (new_fndecl) = 1;
21775 DECL_EXTERNAL (new_fndecl) = 1;
21776 DECL_IS_NOVOPS (new_fndecl) = 1;
21777 TREE_READONLY (new_fndecl) = 1;
21778
21779 return new_fndecl;
21780}
21781
21782
4c38b6d9
UB
21783/* Returns a decl of a function that implements conversion of the
21784 input vector of type TYPE, or NULL_TREE if it is not available. */
21785
21786static tree
6b889d89 21787ix86_vectorize_builtin_conversion (unsigned int code, tree type)
4c38b6d9
UB
21788{
21789 if (TREE_CODE (type) != VECTOR_TYPE)
21790 return NULL_TREE;
54a88090 21791
4c38b6d9
UB
21792 switch (code)
21793 {
21794 case FLOAT_EXPR:
21795 switch (TYPE_MODE (type))
21796 {
21797 case V4SImode:
21798 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21799 default:
21800 return NULL_TREE;
21801 }
21802
21803 case FIX_TRUNC_EXPR:
21804 switch (TYPE_MODE (type))
21805 {
21806 case V4SFmode:
21807 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21808 default:
21809 return NULL_TREE;
21810 }
21811 default:
21812 return NULL_TREE;
21813
21814 }
21815}
21816
6b889d89
UB
21817/* Returns a code for a target-specific builtin that implements
21818 reciprocal of the function, or NULL_TREE if not available. */
21819
21820static tree
ac10986f
UB
21821ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21822 bool sqrt ATTRIBUTE_UNUSED)
6b889d89 21823{
4d869718 21824 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
6b889d89
UB
21825 && flag_finite_math_only && !flag_trapping_math
21826 && flag_unsafe_math_optimizations))
21827 return NULL_TREE;
21828
ac10986f
UB
21829 if (md_fn)
21830 /* Machine dependent builtins. */
21831 switch (fn)
21832 {
21833 /* Vectorized version of sqrt to rsqrt conversion. */
3dc0f23a
UB
21834 case IX86_BUILTIN_SQRTPS_NR:
21835 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
6b889d89 21836
ac10986f
UB
21837 default:
21838 return NULL_TREE;
21839 }
21840 else
21841 /* Normal builtins. */
21842 switch (fn)
21843 {
21844 /* Sqrt to rsqrt conversion. */
21845 case BUILT_IN_SQRTF:
21846 return ix86_builtins[IX86_BUILTIN_RSQRTF];
6b889d89 21847
ac10986f
UB
21848 default:
21849 return NULL_TREE;
21850 }
6b889d89
UB
21851}
21852
4211a8fb 21853/* Store OPERAND to the memory after reload is completed. This means
f710504c 21854 that we can't easily use assign_stack_local. */
4211a8fb 21855rtx
b96a374d 21856ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 21857{
898d374d 21858 rtx result;
5656a184 21859
d0396b79 21860 gcc_assert (reload_completed);
a5b378d6 21861 if (TARGET_RED_ZONE)
898d374d
JH
21862 {
21863 result = gen_rtx_MEM (mode,
21864 gen_rtx_PLUS (Pmode,
21865 stack_pointer_rtx,
21866 GEN_INT (-RED_ZONE_SIZE)));
21867 emit_move_insn (result, operand);
21868 }
a5b378d6 21869 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 21870 {
898d374d 21871 switch (mode)
4211a8fb 21872 {
898d374d
JH
21873 case HImode:
21874 case SImode:
21875 operand = gen_lowpart (DImode, operand);
5efb1046 21876 /* FALLTHRU */
898d374d 21877 case DImode:
4211a8fb 21878 emit_insn (
898d374d
JH
21879 gen_rtx_SET (VOIDmode,
21880 gen_rtx_MEM (DImode,
21881 gen_rtx_PRE_DEC (DImode,
21882 stack_pointer_rtx)),
21883 operand));
21884 break;
21885 default:
d0396b79 21886 gcc_unreachable ();
898d374d
JH
21887 }
21888 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21889 }
21890 else
21891 {
21892 switch (mode)
21893 {
21894 case DImode:
21895 {
21896 rtx operands[2];
21897 split_di (&operand, 1, operands, operands + 1);
21898 emit_insn (
21899 gen_rtx_SET (VOIDmode,
21900 gen_rtx_MEM (SImode,
21901 gen_rtx_PRE_DEC (Pmode,
21902 stack_pointer_rtx)),
21903 operands[1]));
21904 emit_insn (
21905 gen_rtx_SET (VOIDmode,
21906 gen_rtx_MEM (SImode,
21907 gen_rtx_PRE_DEC (Pmode,
21908 stack_pointer_rtx)),
21909 operands[0]));
21910 }
21911 break;
21912 case HImode:
69642eae
JJ
21913 /* Store HImodes as SImodes. */
21914 operand = gen_lowpart (SImode, operand);
5efb1046 21915 /* FALLTHRU */
898d374d 21916 case SImode:
4211a8fb 21917 emit_insn (
898d374d
JH
21918 gen_rtx_SET (VOIDmode,
21919 gen_rtx_MEM (GET_MODE (operand),
21920 gen_rtx_PRE_DEC (SImode,
21921 stack_pointer_rtx)),
21922 operand));
21923 break;
21924 default:
d0396b79 21925 gcc_unreachable ();
4211a8fb 21926 }
898d374d 21927 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 21928 }
898d374d 21929 return result;
4211a8fb
JH
21930}
21931
21932/* Free operand from the memory. */
21933void
b96a374d 21934ix86_free_from_memory (enum machine_mode mode)
4211a8fb 21935{
a5b378d6 21936 if (!TARGET_RED_ZONE)
898d374d
JH
21937 {
21938 int size;
21939
21940 if (mode == DImode || TARGET_64BIT)
21941 size = 8;
898d374d
JH
21942 else
21943 size = 4;
21944 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21945 to pop or add instruction if registers are available. */
21946 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21947 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21948 GEN_INT (size))));
21949 }
4211a8fb 21950}
a946dd00 21951
f84aa48a
JH
21952/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21953 QImode must go into class Q_REGS.
21954 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 21955 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 21956enum reg_class
9415ab7d 21957ix86_preferred_reload_class (rtx x, enum reg_class regclass)
f84aa48a 21958{
b5c82fa1
PB
21959 enum machine_mode mode = GET_MODE (x);
21960
5656a184 21961 /* We're only allowed to return a subclass of CLASS. Many of the
51df7179 21962 following checks fail for NO_REGS, so eliminate that early. */
9415ab7d 21963 if (regclass == NO_REGS)
f75959a6 21964 return NO_REGS;
51df7179
RH
21965
21966 /* All classes can load zeros. */
b5c82fa1 21967 if (x == CONST0_RTX (mode))
9415ab7d 21968 return regclass;
51df7179 21969
917f1b7e 21970 /* Force constants into memory if we are loading a (nonzero) constant into
b5c82fa1
PB
21971 an MMX or SSE register. This is because there are no MMX/SSE instructions
21972 to load from a constant. */
21973 if (CONSTANT_P (x)
9415ab7d 21974 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
b5c82fa1
PB
21975 return NO_REGS;
21976
21977 /* Prefer SSE regs only, if we can use them for math. */
21978 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
9415ab7d 21979 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
b5c82fa1 21980
51df7179 21981 /* Floating-point constants need more complex checks. */
f84aa48a
JH
21982 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21983 {
f84aa48a 21984 /* General regs can load everything. */
9415ab7d
TN
21985 if (reg_class_subset_p (regclass, GENERAL_REGS))
21986 return regclass;
51df7179
RH
21987
21988 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21989 zero above. We only want to wind up preferring 80387 registers if
21990 we plan on doing computation with them. */
21991 if (TARGET_80387
51df7179
RH
21992 && standard_80387_constant_p (x))
21993 {
21994 /* Limit class to non-sse. */
9415ab7d 21995 if (regclass == FLOAT_SSE_REGS)
51df7179 21996 return FLOAT_REGS;
9415ab7d 21997 if (regclass == FP_TOP_SSE_REGS)
51df7179 21998 return FP_TOP_REG;
9415ab7d 21999 if (regclass == FP_SECOND_SSE_REGS)
51df7179 22000 return FP_SECOND_REG;
9415ab7d
TN
22001 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22002 return regclass;
51df7179
RH
22003 }
22004
22005 return NO_REGS;
f84aa48a 22006 }
51df7179
RH
22007
22008 /* Generally when we see PLUS here, it's the function invariant
22009 (plus soft-fp const_int). Which can only be computed into general
22010 regs. */
22011 if (GET_CODE (x) == PLUS)
9415ab7d 22012 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
51df7179
RH
22013
22014 /* QImode constants are easy to load, but non-constant QImode data
22015 must go into Q_REGS. */
22016 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22017 {
9415ab7d
TN
22018 if (reg_class_subset_p (regclass, Q_REGS))
22019 return regclass;
22020 if (reg_class_subset_p (Q_REGS, regclass))
51df7179
RH
22021 return Q_REGS;
22022 return NO_REGS;
22023 }
22024
9415ab7d 22025 return regclass;
f84aa48a
JH
22026}
22027
b5c82fa1
PB
22028/* Discourage putting floating-point values in SSE registers unless
22029 SSE math is being used, and likewise for the 387 registers. */
22030enum reg_class
9415ab7d 22031ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
b5c82fa1
PB
22032{
22033 enum machine_mode mode = GET_MODE (x);
22034
22035 /* Restrict the output reload class to the register bank that we are doing
22036 math on. If we would like not to return a subset of CLASS, reject this
22037 alternative: if reload cannot do this, it will still use its choice. */
22038 mode = GET_MODE (x);
22039 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9415ab7d 22040 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
b5c82fa1 22041
27ac40e2 22042 if (X87_FLOAT_MODE_P (mode))
b5c82fa1 22043 {
9415ab7d 22044 if (regclass == FP_TOP_SSE_REGS)
b5c82fa1 22045 return FP_TOP_REG;
9415ab7d 22046 else if (regclass == FP_SECOND_SSE_REGS)
b5c82fa1
PB
22047 return FP_SECOND_REG;
22048 else
9415ab7d 22049 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
b5c82fa1
PB
22050 }
22051
9415ab7d 22052 return regclass;
b5c82fa1
PB
22053}
22054
f84aa48a
JH
22055/* If we are copying between general and FP registers, we need a memory
22056 location. The same is true for SSE and MMX registers.
22057
6232eadc
JH
22058 To optimize register_move_cost performance, allow inline variant.
22059
f84aa48a
JH
22060 The macro can't work reliably when one of the CLASSES is class containing
22061 registers from multiple units (SSE, MMX, integer). We avoid this by never
22062 combining those units in single alternative in the machine description.
22063 Ensure that this constraint holds to avoid unexpected surprises.
22064
22065 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22066 enforce these sanity checks. */
f75959a6 22067
6232eadc
JH
22068static inline int
22069inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
b96a374d 22070 enum machine_mode mode, int strict)
f84aa48a
JH
22071{
22072 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22073 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22074 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22075 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22076 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22077 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22078 {
d0396b79 22079 gcc_assert (!strict);
f75959a6 22080 return true;
f84aa48a 22081 }
f75959a6
RH
22082
22083 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22084 return true;
22085
22086 /* ??? This is a lie. We do have moves between mmx/general, and for
22087 mmx/sse2. But by saying we need secondary memory we discourage the
22088 register allocator from using the mmx registers unless needed. */
22089 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22090 return true;
22091
22092 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22093 {
22094 /* SSE1 doesn't have any direct moves from other classes. */
22095 if (!TARGET_SSE2)
22096 return true;
22097
5656a184 22098 /* If the target says that inter-unit moves are more expensive
f75959a6 22099 than moving through memory, then don't generate them. */
ed69105c 22100 if (!TARGET_INTER_UNIT_MOVES)
f75959a6
RH
22101 return true;
22102
22103 /* Between SSE and general, we have moves no larger than word size. */
22104 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22105 return true;
f75959a6
RH
22106 }
22107
22108 return false;
f84aa48a 22109}
f75959a6 22110
6232eadc
JH
22111int
22112ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22113 enum machine_mode mode, int strict)
22114{
22115 return inline_secondary_memory_needed (class1, class2, mode, strict);
22116}
22117
1272914c
RH
22118/* Return true if the registers in CLASS cannot represent the change from
22119 modes FROM to TO. */
22120
22121bool
22122ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9415ab7d 22123 enum reg_class regclass)
1272914c
RH
22124{
22125 if (from == to)
22126 return false;
22127
0fa2e4df 22128 /* x87 registers can't do subreg at all, as all values are reformatted
1272914c 22129 to extended precision. */
9415ab7d 22130 if (MAYBE_FLOAT_CLASS_P (regclass))
1272914c
RH
22131 return true;
22132
9415ab7d 22133 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
1272914c
RH
22134 {
22135 /* Vector registers do not support QI or HImode loads. If we don't
22136 disallow a change to these modes, reload will assume it's ok to
22137 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22138 the vec_dupv4hi pattern. */
22139 if (GET_MODE_SIZE (from) < 4)
22140 return true;
22141
22142 /* Vector registers do not support subreg with nonzero offsets, which
5656a184 22143 are otherwise valid for integer registers. Since we can't see
1272914c
RH
22144 whether we have a nonzero offset from here, prohibit all
22145 nonparadoxical subregs changing size. */
22146 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22147 return true;
22148 }
22149
22150 return false;
22151}
22152
6232eadc
JH
22153/* Return the cost of moving data of mode M between a
22154 register and memory. A value of 2 is the default; this cost is
22155 relative to those in `REGISTER_MOVE_COST'.
22156
22157 This function is used extensively by register_move_cost that is used to
22158 build tables at startup. Make it inline in this case.
22159 When IN is 2, return maximum of in and out move cost.
22160
22161 If moving between registers and memory is more expensive than
22162 between two registers, you should define this macro to express the
22163 relative cost.
22164
22165 Model also increased moving costs of QImode registers in non
22166 Q_REGS classes.
22167 */
22168static inline int
22169inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22170 int in)
22171{
22172 int cost;
22173 if (FLOAT_CLASS_P (regclass))
22174 {
22175 int index;
22176 switch (mode)
22177 {
22178 case SFmode:
22179 index = 0;
22180 break;
22181 case DFmode:
22182 index = 1;
22183 break;
22184 case XFmode:
22185 index = 2;
22186 break;
22187 default:
22188 return 100;
22189 }
22190 if (in == 2)
22191 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22192 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22193 }
22194 if (SSE_CLASS_P (regclass))
22195 {
22196 int index;
22197 switch (GET_MODE_SIZE (mode))
22198 {
22199 case 4:
22200 index = 0;
22201 break;
22202 case 8:
22203 index = 1;
22204 break;
22205 case 16:
22206 index = 2;
22207 break;
22208 default:
22209 return 100;
22210 }
22211 if (in == 2)
22212 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22213 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22214 }
22215 if (MMX_CLASS_P (regclass))
22216 {
22217 int index;
22218 switch (GET_MODE_SIZE (mode))
22219 {
22220 case 4:
22221 index = 0;
22222 break;
22223 case 8:
22224 index = 1;
22225 break;
22226 default:
22227 return 100;
22228 }
22229 if (in)
22230 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22231 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22232 }
22233 switch (GET_MODE_SIZE (mode))
22234 {
22235 case 1:
22236 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22237 {
22238 if (!in)
22239 return ix86_cost->int_store[0];
22240 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22241 cost = ix86_cost->movzbl_load;
22242 else
22243 cost = ix86_cost->int_load[0];
22244 if (in == 2)
22245 return MAX (cost, ix86_cost->int_store[0]);
22246 return cost;
22247 }
22248 else
22249 {
22250 if (in == 2)
22251 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22252 if (in)
22253 return ix86_cost->movzbl_load;
22254 else
22255 return ix86_cost->int_store[0] + 4;
22256 }
22257 break;
22258 case 2:
22259 if (in == 2)
22260 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22261 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22262 default:
22263 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22264 if (mode == TFmode)
22265 mode = XFmode;
22266 if (in == 2)
22267 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22268 else if (in)
22269 cost = ix86_cost->int_load[2];
22270 else
22271 cost = ix86_cost->int_store[2];
22272 return (cost * (((int) GET_MODE_SIZE (mode)
22273 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22274 }
22275}
22276
22277int
22278ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22279{
22280 return inline_memory_move_cost (mode, regclass, in);
22281}
22282
22283
f84aa48a 22284/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 22285 one in class CLASS2.
f84aa48a
JH
22286
22287 It is not required that the cost always equal 2 when FROM is the same as TO;
22288 on some machines it is expensive to move between registers if they are not
22289 general registers. */
f75959a6 22290
f84aa48a 22291int
b96a374d
AJ
22292ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22293 enum reg_class class2)
f84aa48a
JH
22294{
22295 /* In case we require secondary memory, compute cost of the store followed
b96a374d 22296 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
22297 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22298
6232eadc 22299 if (inline_secondary_memory_needed (class1, class2, mode, 0))
f84aa48a 22300 {
d631b80a
RH
22301 int cost = 1;
22302
6232eadc
JH
22303 cost += inline_memory_move_cost (mode, class1, 2);
22304 cost += inline_memory_move_cost (mode, class2, 2);
b96a374d 22305
d631b80a
RH
22306 /* In case of copying from general_purpose_register we may emit multiple
22307 stores followed by single load causing memory size mismatch stall.
d1f87653 22308 Count this as arbitrarily high cost of 20. */
62415523 22309 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
22310 cost += 20;
22311
22312 /* In the case of FP/MMX moves, the registers actually overlap, and we
22313 have to switch modes in order to treat them differently. */
22314 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22315 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22316 cost += 20;
22317
22318 return cost;
f84aa48a 22319 }
d631b80a 22320
92d0fb09 22321 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
22322 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22323 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
147bbdd0
UB
22324
22325 /* ??? By keeping returned value relatively high, we limit the number
22326 of moves between integer and MMX/SSE registers for all targets.
22327 Additionally, high value prevents problem with x86_modes_tieable_p(),
22328 where integer modes in MMX/SSE registers are not tieable
22329 because of missing QImode and HImode moves to, from or between
22330 MMX/SSE registers. */
979c67a5 22331 return MAX (8, ix86_cost->mmxsse_to_integer);
147bbdd0 22332
fa79946e
JH
22333 if (MAYBE_FLOAT_CLASS_P (class1))
22334 return ix86_cost->fp_move;
22335 if (MAYBE_SSE_CLASS_P (class1))
22336 return ix86_cost->sse_move;
22337 if (MAYBE_MMX_CLASS_P (class1))
22338 return ix86_cost->mmx_move;
f84aa48a
JH
22339 return 2;
22340}
22341
a946dd00 22342/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
c1c5b5e3
RH
22343
22344bool
b96a374d 22345ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
22346{
22347 /* Flags and only flags can only hold CCmode values. */
22348 if (CC_REGNO_P (regno))
22349 return GET_MODE_CLASS (mode) == MODE_CC;
22350 if (GET_MODE_CLASS (mode) == MODE_CC
22351 || GET_MODE_CLASS (mode) == MODE_RANDOM
22352 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22353 return 0;
22354 if (FP_REGNO_P (regno))
22355 return VALID_FP_MODE_P (mode);
22356 if (SSE_REGNO_P (regno))
dcbca208 22357 {
6c4ccfd8
RH
22358 /* We implement the move patterns for all vector modes into and
22359 out of SSE registers, even when no operation instructions
22360 are available. */
22361 return (VALID_SSE_REG_MODE (mode)
22362 || VALID_SSE2_REG_MODE (mode)
22363 || VALID_MMX_REG_MODE (mode)
22364 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 22365 }
a946dd00 22366 if (MMX_REGNO_P (regno))
dcbca208 22367 {
6c4ccfd8
RH
22368 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22369 so if the register is available at all, then we can move data of
22370 the given mode into or out of it. */
22371 return (VALID_MMX_REG_MODE (mode)
22372 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 22373 }
b4e82619
RH
22374
22375 if (mode == QImode)
22376 {
22377 /* Take care for QImode values - they can be in non-QI regs,
22378 but then they do cause partial register stalls. */
22379 if (regno < 4 || TARGET_64BIT)
22380 return 1;
22381 if (!TARGET_PARTIAL_REG_STALL)
22382 return 1;
22383 return reload_in_progress || reload_completed;
22384 }
22385 /* We handle both integer and floats in the general purpose registers. */
22386 else if (VALID_INT_MODE_P (mode))
22387 return 1;
22388 else if (VALID_FP_MODE_P (mode))
22389 return 1;
62d75179
L
22390 else if (VALID_DFP_MODE_P (mode))
22391 return 1;
b4e82619 22392 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
5656a184 22393 on to use that value in smaller contexts, this can easily force a
b4e82619
RH
22394 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22395 supporting DImode, allow it. */
22396 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
a946dd00 22397 return 1;
b4e82619
RH
22398
22399 return 0;
a946dd00 22400}
fa79946e 22401
5656a184 22402/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
c1c5b5e3
RH
22403 tieable integer mode. */
22404
22405static bool
22406ix86_tieable_integer_mode_p (enum machine_mode mode)
22407{
22408 switch (mode)
22409 {
22410 case HImode:
22411 case SImode:
22412 return true;
22413
22414 case QImode:
22415 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22416
22417 case DImode:
22418 return TARGET_64BIT;
22419
22420 default:
22421 return false;
22422 }
22423}
22424
22425/* Return true if MODE1 is accessible in a register that can hold MODE2
22426 without copying. That is, all register classes that can hold MODE2
22427 can also hold MODE1. */
22428
22429bool
22430ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22431{
22432 if (mode1 == mode2)
22433 return true;
22434
22435 if (ix86_tieable_integer_mode_p (mode1)
22436 && ix86_tieable_integer_mode_p (mode2))
22437 return true;
22438
22439 /* MODE2 being XFmode implies fp stack or general regs, which means we
22440 can tie any smaller floating point modes to it. Note that we do not
22441 tie this with TFmode. */
22442 if (mode2 == XFmode)
22443 return mode1 == SFmode || mode1 == DFmode;
22444
22445 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22446 that we can tie it with SFmode. */
22447 if (mode2 == DFmode)
22448 return mode1 == SFmode;
22449
5656a184 22450 /* If MODE2 is only appropriate for an SSE register, then tie with
c1c5b5e3 22451 any other mode acceptable to SSE registers. */
8ab93332 22452 if (GET_MODE_SIZE (mode2) == 16
b4e82619 22453 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
8ab93332
UB
22454 return (GET_MODE_SIZE (mode1) == 16
22455 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
c1c5b5e3 22456
8ab93332 22457 /* If MODE2 is appropriate for an MMX register, then tie
c1c5b5e3 22458 with any other mode acceptable to MMX registers. */
b4e82619
RH
22459 if (GET_MODE_SIZE (mode2) == 8
22460 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
93de7743 22461 return (GET_MODE_SIZE (mode1) == 8
8ab93332 22462 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
c1c5b5e3
RH
22463
22464 return false;
22465}
22466
3c50106f
RH
22467/* Compute a (partial) cost for rtx X. Return true if the complete
22468 cost has been computed, and false if subexpressions should be
22469 scanned. In either case, *TOTAL contains the cost result. */
22470
22471static bool
9415ab7d 22472ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
3c50106f 22473{
9415ab7d 22474 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
3c50106f
RH
22475 enum machine_mode mode = GET_MODE (x);
22476
22477 switch (code)
22478 {
22479 case CONST_INT:
22480 case CONST:
22481 case LABEL_REF:
22482 case SYMBOL_REF:
8fe75e43 22483 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
3c50106f 22484 *total = 3;
8fe75e43 22485 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
3c50106f 22486 *total = 2;
3504dad3
JH
22487 else if (flag_pic && SYMBOLIC_CONST (x)
22488 && (!TARGET_64BIT
22489 || (!GET_CODE (x) != LABEL_REF
22490 && (GET_CODE (x) != SYMBOL_REF
12969f45 22491 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
22492 *total = 1;
22493 else
22494 *total = 0;
22495 return true;
22496
22497 case CONST_DOUBLE:
22498 if (mode == VOIDmode)
22499 *total = 0;
22500 else
22501 switch (standard_80387_constant_p (x))
22502 {
22503 case 1: /* 0.0 */
22504 *total = 1;
22505 break;
881b2a96 22506 default: /* Other constants */
3c50106f
RH
22507 *total = 2;
22508 break;
881b2a96
RS
22509 case 0:
22510 case -1:
3c50106f
RH
22511 /* Start with (MEM (SYMBOL_REF)), since that's where
22512 it'll probably end up. Add a penalty for size. */
22513 *total = (COSTS_N_INSNS (1)
3504dad3 22514 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
22515 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22516 break;
22517 }
22518 return true;
22519
22520 case ZERO_EXTEND:
22521 /* The zero extensions is often completely free on x86_64, so make
22522 it as cheap as possible. */
22523 if (TARGET_64BIT && mode == DImode
22524 && GET_MODE (XEXP (x, 0)) == SImode)
22525 *total = 1;
22526 else if (TARGET_ZERO_EXTEND_WITH_AND)
a9cc9cc6 22527 *total = ix86_cost->add;
3c50106f 22528 else
a9cc9cc6 22529 *total = ix86_cost->movzx;
3c50106f
RH
22530 return false;
22531
22532 case SIGN_EXTEND:
a9cc9cc6 22533 *total = ix86_cost->movsx;
3c50106f
RH
22534 return false;
22535
22536 case ASHIFT:
7656aee4 22537 if (CONST_INT_P (XEXP (x, 1))
3c50106f
RH
22538 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22539 {
22540 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22541 if (value == 1)
22542 {
a9cc9cc6 22543 *total = ix86_cost->add;
3c50106f
RH
22544 return false;
22545 }
22546 if ((value == 2 || value == 3)
3c50106f
RH
22547 && ix86_cost->lea <= ix86_cost->shift_const)
22548 {
a9cc9cc6 22549 *total = ix86_cost->lea;
3c50106f
RH
22550 return false;
22551 }
22552 }
5efb1046 22553 /* FALLTHRU */
3c50106f
RH
22554
22555 case ROTATE:
22556 case ASHIFTRT:
22557 case LSHIFTRT:
22558 case ROTATERT:
22559 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22560 {
7656aee4 22561 if (CONST_INT_P (XEXP (x, 1)))
3c50106f
RH
22562 {
22563 if (INTVAL (XEXP (x, 1)) > 32)
a9cc9cc6 22564 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
3c50106f 22565 else
a9cc9cc6 22566 *total = ix86_cost->shift_const * 2;
3c50106f
RH
22567 }
22568 else
22569 {
22570 if (GET_CODE (XEXP (x, 1)) == AND)
a9cc9cc6 22571 *total = ix86_cost->shift_var * 2;
3c50106f 22572 else
a9cc9cc6 22573 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
3c50106f
RH
22574 }
22575 }
22576 else
22577 {
7656aee4 22578 if (CONST_INT_P (XEXP (x, 1)))
a9cc9cc6 22579 *total = ix86_cost->shift_const;
3c50106f 22580 else
a9cc9cc6 22581 *total = ix86_cost->shift_var;
3c50106f
RH
22582 }
22583 return false;
22584
22585 case MULT:
27ac40e2
UB
22586 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22587 {
22588 /* ??? SSE scalar cost should be used here. */
22589 *total = ix86_cost->fmul;
22590 return false;
22591 }
22592 else if (X87_FLOAT_MODE_P (mode))
3c50106f 22593 {
a9cc9cc6 22594 *total = ix86_cost->fmul;
4a5eab38 22595 return false;
3c50106f 22596 }
27ac40e2
UB
22597 else if (FLOAT_MODE_P (mode))
22598 {
22599 /* ??? SSE vector cost should be used here. */
22600 *total = ix86_cost->fmul;
22601 return false;
22602 }
3c50106f
RH
22603 else
22604 {
4a5eab38
PB
22605 rtx op0 = XEXP (x, 0);
22606 rtx op1 = XEXP (x, 1);
22607 int nbits;
7656aee4 22608 if (CONST_INT_P (XEXP (x, 1)))
4a5eab38
PB
22609 {
22610 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22611 for (nbits = 0; value != 0; value &= value - 1)
22612 nbits++;
22613 }
22614 else
22615 /* This is arbitrary. */
22616 nbits = 7;
22617
22618 /* Compute costs correctly for widening multiplication. */
e3dafdf4 22619 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
4a5eab38
PB
22620 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22621 == GET_MODE_SIZE (mode))
22622 {
22623 int is_mulwiden = 0;
22624 enum machine_mode inner_mode = GET_MODE (op0);
22625
22626 if (GET_CODE (op0) == GET_CODE (op1))
22627 is_mulwiden = 1, op1 = XEXP (op1, 0);
7656aee4 22628 else if (CONST_INT_P (op1))
4a5eab38
PB
22629 {
22630 if (GET_CODE (op0) == SIGN_EXTEND)
22631 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22632 == INTVAL (op1);
22633 else
22634 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22635 }
22636
22637 if (is_mulwiden)
22638 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22639 }
f676971a 22640
a9cc9cc6
JH
22641 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22642 + nbits * ix86_cost->mult_bit
22643 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
4a5eab38
PB
22644
22645 return true;
3c50106f 22646 }
3c50106f
RH
22647
22648 case DIV:
22649 case UDIV:
22650 case MOD:
22651 case UMOD:
27ac40e2
UB
22652 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22653 /* ??? SSE cost should be used here. */
22654 *total = ix86_cost->fdiv;
22655 else if (X87_FLOAT_MODE_P (mode))
22656 *total = ix86_cost->fdiv;
22657 else if (FLOAT_MODE_P (mode))
22658 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22659 *total = ix86_cost->fdiv;
3c50106f 22660 else
a9cc9cc6 22661 *total = ix86_cost->divide[MODE_INDEX (mode)];
3c50106f
RH
22662 return false;
22663
22664 case PLUS:
27ac40e2 22665 if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
22666 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22667 {
22668 if (GET_CODE (XEXP (x, 0)) == PLUS
22669 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7656aee4 22670 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
3c50106f
RH
22671 && CONSTANT_P (XEXP (x, 1)))
22672 {
22673 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22674 if (val == 2 || val == 4 || val == 8)
22675 {
a9cc9cc6 22676 *total = ix86_cost->lea;
3c50106f
RH
22677 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22678 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22679 outer_code);
22680 *total += rtx_cost (XEXP (x, 1), outer_code);
22681 return true;
22682 }
22683 }
22684 else if (GET_CODE (XEXP (x, 0)) == MULT
7656aee4 22685 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3c50106f
RH
22686 {
22687 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22688 if (val == 2 || val == 4 || val == 8)
22689 {
a9cc9cc6 22690 *total = ix86_cost->lea;
3c50106f
RH
22691 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22692 *total += rtx_cost (XEXP (x, 1), outer_code);
22693 return true;
22694 }
22695 }
22696 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22697 {
a9cc9cc6 22698 *total = ix86_cost->lea;
3c50106f
RH
22699 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22700 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22701 *total += rtx_cost (XEXP (x, 1), outer_code);
22702 return true;
22703 }
22704 }
5efb1046 22705 /* FALLTHRU */
3c50106f
RH
22706
22707 case MINUS:
27ac40e2
UB
22708 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22709 {
22710 /* ??? SSE cost should be used here. */
22711 *total = ix86_cost->fadd;
22712 return false;
22713 }
22714 else if (X87_FLOAT_MODE_P (mode))
22715 {
22716 *total = ix86_cost->fadd;
22717 return false;
22718 }
22719 else if (FLOAT_MODE_P (mode))
3c50106f 22720 {
27ac40e2 22721 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22722 *total = ix86_cost->fadd;
3c50106f
RH
22723 return false;
22724 }
5efb1046 22725 /* FALLTHRU */
3c50106f
RH
22726
22727 case AND:
22728 case IOR:
22729 case XOR:
22730 if (!TARGET_64BIT && mode == DImode)
22731 {
a9cc9cc6 22732 *total = (ix86_cost->add * 2
3c50106f
RH
22733 + (rtx_cost (XEXP (x, 0), outer_code)
22734 << (GET_MODE (XEXP (x, 0)) != DImode))
22735 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 22736 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
22737 return true;
22738 }
5efb1046 22739 /* FALLTHRU */
3c50106f
RH
22740
22741 case NEG:
27ac40e2
UB
22742 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22743 {
22744 /* ??? SSE cost should be used here. */
22745 *total = ix86_cost->fchs;
22746 return false;
22747 }
22748 else if (X87_FLOAT_MODE_P (mode))
22749 {
22750 *total = ix86_cost->fchs;
22751 return false;
22752 }
22753 else if (FLOAT_MODE_P (mode))
3c50106f 22754 {
27ac40e2 22755 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22756 *total = ix86_cost->fchs;
3c50106f
RH
22757 return false;
22758 }
5efb1046 22759 /* FALLTHRU */
3c50106f
RH
22760
22761 case NOT:
22762 if (!TARGET_64BIT && mode == DImode)
a9cc9cc6 22763 *total = ix86_cost->add * 2;
3c50106f 22764 else
a9cc9cc6 22765 *total = ix86_cost->add;
3c50106f
RH
22766 return false;
22767
c271ba77
KH
22768 case COMPARE:
22769 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22770 && XEXP (XEXP (x, 0), 1) == const1_rtx
7656aee4 22771 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
c271ba77
KH
22772 && XEXP (x, 1) == const0_rtx)
22773 {
22774 /* This kind of construct is implemented using test[bwl].
22775 Treat it as if we had an AND. */
a9cc9cc6 22776 *total = (ix86_cost->add
c271ba77
KH
22777 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22778 + rtx_cost (const1_rtx, outer_code));
22779 return true;
22780 }
22781 return false;
22782
3c50106f 22783 case FLOAT_EXTEND:
27ac40e2 22784 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
ce7d4645 22785 *total = 0;
3c50106f
RH
22786 return false;
22787
22788 case ABS:
27ac40e2
UB
22789 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22790 /* ??? SSE cost should be used here. */
22791 *total = ix86_cost->fabs;
22792 else if (X87_FLOAT_MODE_P (mode))
22793 *total = ix86_cost->fabs;
22794 else if (FLOAT_MODE_P (mode))
22795 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22796 *total = ix86_cost->fabs;
3c50106f
RH
22797 return false;
22798
22799 case SQRT:
27ac40e2
UB
22800 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22801 /* ??? SSE cost should be used here. */
22802 *total = ix86_cost->fsqrt;
22803 else if (X87_FLOAT_MODE_P (mode))
22804 *total = ix86_cost->fsqrt;
22805 else if (FLOAT_MODE_P (mode))
22806 /* ??? SSE vector cost should be used here. */
a9cc9cc6 22807 *total = ix86_cost->fsqrt;
3c50106f
RH
22808 return false;
22809
74dc3e94
RH
22810 case UNSPEC:
22811 if (XINT (x, 1) == UNSPEC_TP)
22812 *total = 0;
22813 return false;
22814
3c50106f
RH
22815 default:
22816 return false;
22817 }
22818}
22819
b069de3b
SS
22820#if TARGET_MACHO
22821
22822static int current_machopic_label_num;
22823
22824/* Given a symbol name and its associated stub, write out the
22825 definition of the stub. */
22826
22827void
b96a374d 22828machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
22829{
22830 unsigned int length;
22831 char *binder_name, *symbol_name, lazy_ptr_name[32];
22832 int label = ++current_machopic_label_num;
22833
f7288899
EC
22834 /* For 64-bit we shouldn't get here. */
22835 gcc_assert (!TARGET_64BIT);
22836
b069de3b
SS
22837 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22838 symb = (*targetm.strip_name_encoding) (symb);
22839
22840 length = strlen (stub);
22841 binder_name = alloca (length + 32);
22842 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22843
22844 length = strlen (symb);
22845 symbol_name = alloca (length + 32);
22846 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22847
22848 sprintf (lazy_ptr_name, "L%d$lz", label);
22849
22850 if (MACHOPIC_PURE)
56c779bc 22851 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
b069de3b 22852 else
56c779bc 22853 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
b069de3b
SS
22854
22855 fprintf (file, "%s:\n", stub);
22856 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22857
22858 if (MACHOPIC_PURE)
22859 {
4b1c1f6f
EC
22860 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22861 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22862 fprintf (file, "\tjmp\t*%%edx\n");
b069de3b
SS
22863 }
22864 else
4b1c1f6f 22865 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
b96a374d 22866
b069de3b 22867 fprintf (file, "%s:\n", binder_name);
b96a374d 22868
b069de3b
SS
22869 if (MACHOPIC_PURE)
22870 {
4b1c1f6f
EC
22871 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22872 fprintf (file, "\tpushl\t%%eax\n");
b069de3b
SS
22873 }
22874 else
4b1c1f6f 22875 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
b069de3b 22876
4b1c1f6f 22877 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
b069de3b 22878
56c779bc 22879 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
b069de3b
SS
22880 fprintf (file, "%s:\n", lazy_ptr_name);
22881 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22882 fprintf (file, "\t.long %s\n", binder_name);
22883}
7d072037
SH
22884
22885void
22886darwin_x86_file_end (void)
22887{
22888 darwin_file_end ();
22889 ix86_file_end ();
22890}
b069de3b
SS
22891#endif /* TARGET_MACHO */
22892
162f023b
JH
22893/* Order the registers for register allocator. */
22894
22895void
b96a374d 22896x86_order_regs_for_local_alloc (void)
162f023b
JH
22897{
22898 int pos = 0;
22899 int i;
22900
22901 /* First allocate the local general purpose registers. */
22902 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22903 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22904 reg_alloc_order [pos++] = i;
22905
22906 /* Global general purpose registers. */
22907 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22908 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22909 reg_alloc_order [pos++] = i;
22910
22911 /* x87 registers come first in case we are doing FP math
22912 using them. */
22913 if (!TARGET_SSE_MATH)
22914 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22915 reg_alloc_order [pos++] = i;
fce5a9f2 22916
162f023b
JH
22917 /* SSE registers. */
22918 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22919 reg_alloc_order [pos++] = i;
22920 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22921 reg_alloc_order [pos++] = i;
22922
d1f87653 22923 /* x87 registers. */
162f023b
JH
22924 if (TARGET_SSE_MATH)
22925 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22926 reg_alloc_order [pos++] = i;
22927
22928 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22929 reg_alloc_order [pos++] = i;
22930
22931 /* Initialize the rest of array as we do not allocate some registers
22932 at all. */
22933 while (pos < FIRST_PSEUDO_REGISTER)
22934 reg_alloc_order [pos++] = 0;
22935}
194734e9 22936
fe77449a
DR
22937/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22938 struct attribute_spec.handler. */
22939static tree
b96a374d
AJ
22940ix86_handle_struct_attribute (tree *node, tree name,
22941 tree args ATTRIBUTE_UNUSED,
22942 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
22943{
22944 tree *type = NULL;
22945 if (DECL_P (*node))
22946 {
22947 if (TREE_CODE (*node) == TYPE_DECL)
22948 type = &TREE_TYPE (*node);
22949 }
22950 else
22951 type = node;
22952
22953 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22954 || TREE_CODE (*type) == UNION_TYPE)))
22955 {
5c498b10
DD
22956 warning (OPT_Wattributes, "%qs attribute ignored",
22957 IDENTIFIER_POINTER (name));
fe77449a
DR
22958 *no_add_attrs = true;
22959 }
22960
22961 else if ((is_attribute_p ("ms_struct", name)
22962 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22963 || ((is_attribute_p ("gcc_struct", name)
22964 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22965 {
5c498b10 22966 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
fe77449a
DR
22967 IDENTIFIER_POINTER (name));
22968 *no_add_attrs = true;
22969 }
22970
22971 return NULL_TREE;
22972}
22973
4977bab6 22974static bool
3101faab 22975ix86_ms_bitfield_layout_p (const_tree record_type)
4977bab6 22976{
6ac49599 22977 return (TARGET_MS_BITFIELD_LAYOUT &&
021bad8e 22978 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 22979 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
22980}
22981
483ab821
MM
22982/* Returns an expression indicating where the this parameter is
22983 located on entry to the FUNCTION. */
22984
22985static rtx
b96a374d 22986x86_this_parameter (tree function)
483ab821
MM
22987{
22988 tree type = TREE_TYPE (function);
ccf8e764 22989 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
34614fff 22990 int nregs;
483ab821 22991
3961e8fe
RH
22992 if (TARGET_64BIT)
22993 {
ccf8e764
RH
22994 const int *parm_regs;
22995
22996 if (TARGET_64BIT_MS_ABI)
22997 parm_regs = x86_64_ms_abi_int_parameter_registers;
22998 else
22999 parm_regs = x86_64_int_parameter_registers;
23000 return gen_rtx_REG (DImode, parm_regs[aggr]);
3961e8fe
RH
23001 }
23002
34614fff
MP
23003 nregs = ix86_function_regparm (type, function);
23004
23005 if (nregs > 0 && !stdarg_p (type))
483ab821 23006 {
34614fff
MP
23007 int regno;
23008
ccf8e764 23009 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
34614fff
MP
23010 regno = aggr ? DX_REG : CX_REG;
23011 else
23012 {
23013 regno = AX_REG;
23014 if (aggr)
23015 {
23016 regno = DX_REG;
23017 if (nregs == 1)
23018 return gen_rtx_MEM (SImode,
23019 plus_constant (stack_pointer_rtx, 4));
23020 }
23021 }
ccf8e764 23022 return gen_rtx_REG (SImode, regno);
483ab821
MM
23023 }
23024
ccf8e764 23025 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
483ab821
MM
23026}
23027
3961e8fe
RH
23028/* Determine whether x86_output_mi_thunk can succeed. */
23029
23030static bool
3101faab 23031x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
b96a374d 23032 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
3101faab 23033 HOST_WIDE_INT vcall_offset, const_tree function)
3961e8fe
RH
23034{
23035 /* 64-bit can handle anything. */
23036 if (TARGET_64BIT)
23037 return true;
23038
23039 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 23040 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
23041 return true;
23042
23043 /* Need a free register for vcall_offset. */
23044 if (vcall_offset)
23045 return false;
23046
23047 /* Need a free register for GOT references. */
23048 if (flag_pic && !(*targetm.binds_local_p) (function))
23049 return false;
23050
23051 /* Otherwise ok. */
23052 return true;
23053}
23054
23055/* Output the assembler code for a thunk function. THUNK_DECL is the
23056 declaration for the thunk function itself, FUNCTION is the decl for
23057 the target function. DELTA is an immediate constant offset to be
272d0bee 23058 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 23059 *(*this + vcall_offset) should be added to THIS. */
483ab821 23060
c590b625 23061static void
b96a374d
AJ
23062x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23063 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23064 HOST_WIDE_INT vcall_offset, tree function)
194734e9 23065{
194734e9 23066 rtx xops[3];
9415ab7d 23067 rtx this_param = x86_this_parameter (function);
3961e8fe 23068 rtx this_reg, tmp;
194734e9 23069
3961e8fe
RH
23070 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23071 pull it in now and let DELTA benefit. */
9415ab7d
TN
23072 if (REG_P (this_param))
23073 this_reg = this_param;
3961e8fe
RH
23074 else if (vcall_offset)
23075 {
23076 /* Put the this parameter into %eax. */
9415ab7d 23077 xops[0] = this_param;
29b74761 23078 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
3961e8fe
RH
23079 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23080 }
23081 else
23082 this_reg = NULL_RTX;
23083
23084 /* Adjust the this parameter by a fixed constant. */
23085 if (delta)
194734e9 23086 {
483ab821 23087 xops[0] = GEN_INT (delta);
9415ab7d 23088 xops[1] = this_reg ? this_reg : this_param;
3961e8fe 23089 if (TARGET_64BIT)
194734e9 23090 {
3961e8fe
RH
23091 if (!x86_64_general_operand (xops[0], DImode))
23092 {
3c4ace25 23093 tmp = gen_rtx_REG (DImode, R10_REG);
3961e8fe
RH
23094 xops[1] = tmp;
23095 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23096 xops[0] = tmp;
9415ab7d 23097 xops[1] = this_param;
3961e8fe
RH
23098 }
23099 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
23100 }
23101 else
3961e8fe 23102 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 23103 }
3961e8fe
RH
23104
23105 /* Adjust the this parameter by a value stored in the vtable. */
23106 if (vcall_offset)
194734e9 23107 {
3961e8fe 23108 if (TARGET_64BIT)
3c4ace25 23109 tmp = gen_rtx_REG (DImode, R10_REG);
3961e8fe 23110 else
e767b5be 23111 {
29b74761 23112 int tmp_regno = CX_REG;
e767b5be 23113 if (lookup_attribute ("fastcall",
ccf8e764 23114 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29b74761 23115 tmp_regno = AX_REG;
e767b5be
JH
23116 tmp = gen_rtx_REG (SImode, tmp_regno);
23117 }
483ab821 23118
3961e8fe
RH
23119 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23120 xops[1] = tmp;
23121 if (TARGET_64BIT)
23122 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23123 else
23124 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 23125
3961e8fe
RH
23126 /* Adjust the this parameter. */
23127 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23128 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23129 {
3c4ace25 23130 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
3961e8fe
RH
23131 xops[0] = GEN_INT (vcall_offset);
23132 xops[1] = tmp2;
23133 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23134 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 23135 }
3961e8fe
RH
23136 xops[1] = this_reg;
23137 if (TARGET_64BIT)
23138 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23139 else
23140 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23141 }
194734e9 23142
3961e8fe 23143 /* If necessary, drop THIS back to its stack slot. */
9415ab7d 23144 if (this_reg && this_reg != this_param)
3961e8fe
RH
23145 {
23146 xops[0] = this_reg;
9415ab7d 23147 xops[1] = this_param;
3961e8fe
RH
23148 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23149 }
194734e9 23150
89ce1c8f 23151 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
23152 if (TARGET_64BIT)
23153 {
23154 if (!flag_pic || (*targetm.binds_local_p) (function))
23155 output_asm_insn ("jmp\t%P0", xops);
ccf8e764
RH
23156 /* All thunks should be in the same object as their target,
23157 and thus binds_local_p should be true. */
23158 else if (TARGET_64BIT_MS_ABI)
23159 gcc_unreachable ();
3961e8fe 23160 else
fcbe3b89 23161 {
89ce1c8f 23162 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
23163 tmp = gen_rtx_CONST (Pmode, tmp);
23164 tmp = gen_rtx_MEM (QImode, tmp);
23165 xops[0] = tmp;
23166 output_asm_insn ("jmp\t%A0", xops);
23167 }
3961e8fe
RH
23168 }
23169 else
23170 {
23171 if (!flag_pic || (*targetm.binds_local_p) (function))
23172 output_asm_insn ("jmp\t%P0", xops);
194734e9 23173 else
21ff35fb 23174#if TARGET_MACHO
095fa594
SH
23175 if (TARGET_MACHO)
23176 {
11abc112 23177 rtx sym_ref = XEXP (DECL_RTL (function), 0);
f676971a
EC
23178 tmp = (gen_rtx_SYMBOL_REF
23179 (Pmode,
11abc112 23180 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
23181 tmp = gen_rtx_MEM (QImode, tmp);
23182 xops[0] = tmp;
23183 output_asm_insn ("jmp\t%0", xops);
23184 }
23185 else
23186#endif /* TARGET_MACHO */
194734e9 23187 {
29b74761 23188 tmp = gen_rtx_REG (SImode, CX_REG);
7d072037 23189 output_set_got (tmp, NULL_RTX);
3961e8fe
RH
23190
23191 xops[1] = tmp;
23192 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23193 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
23194 }
23195 }
23196}
e2500fed 23197
1bc7c5b6 23198static void
b96a374d 23199x86_file_start (void)
1bc7c5b6
ZW
23200{
23201 default_file_start ();
192d0f89
GK
23202#if TARGET_MACHO
23203 darwin_file_start ();
23204#endif
1bc7c5b6
ZW
23205 if (X86_FILE_START_VERSION_DIRECTIVE)
23206 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23207 if (X86_FILE_START_FLTUSED)
23208 fputs ("\t.global\t__fltused\n", asm_out_file);
23209 if (ix86_asm_dialect == ASM_INTEL)
9ad5e54f 23210 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
1bc7c5b6
ZW
23211}
23212
e932b21b 23213int
b96a374d 23214x86_field_alignment (tree field, int computed)
e932b21b
JH
23215{
23216 enum machine_mode mode;
ad9335eb
JJ
23217 tree type = TREE_TYPE (field);
23218
23219 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 23220 return computed;
ad9335eb
JJ
23221 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
23222 ? get_inner_array_type (type) : type);
39e3a681
JJ
23223 if (mode == DFmode || mode == DCmode
23224 || GET_MODE_CLASS (mode) == MODE_INT
23225 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
23226 return MIN (32, computed);
23227 return computed;
23228}
23229
a5fa1ecd
JH
23230/* Output assembler code to FILE to increment profiler label # LABELNO
23231 for profiling a function entry. */
23232void
b96a374d 23233x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
23234{
23235 if (TARGET_64BIT)
ccf8e764 23236 {
a5fa1ecd 23237#ifndef NO_PROFILE_COUNTERS
ccf8e764 23238 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
a5fa1ecd 23239#endif
ccf8e764
RH
23240
23241 if (!TARGET_64BIT_MS_ABI && flag_pic)
a5fa1ecd 23242 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
ccf8e764 23243 else
a5fa1ecd 23244 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
ccf8e764 23245 }
a5fa1ecd
JH
23246 else if (flag_pic)
23247 {
23248#ifndef NO_PROFILE_COUNTERS
23249 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23250 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23251#endif
23252 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23253 }
23254 else
23255 {
23256#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 23257 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
23258 PROFILE_COUNT_REGISTER);
23259#endif
23260 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23261 }
23262}
23263
d2c49530
JH
23264/* We don't have exact information about the insn sizes, but we may assume
23265 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 23266 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
23267 99% of cases. */
23268
23269static int
b96a374d 23270min_insn_size (rtx insn)
d2c49530
JH
23271{
23272 int l = 0;
23273
23274 if (!INSN_P (insn) || !active_insn_p (insn))
23275 return 0;
23276
23277 /* Discard alignments we've emit and jump instructions. */
23278 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23279 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23280 return 0;
7656aee4 23281 if (JUMP_P (insn)
d2c49530
JH
23282 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23283 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23284 return 0;
23285
23286 /* Important case - calls are always 5 bytes.
23287 It is common to have many calls in the row. */
7656aee4 23288 if (CALL_P (insn)
d2c49530
JH
23289 && symbolic_reference_mentioned_p (PATTERN (insn))
23290 && !SIBLING_CALL_P (insn))
23291 return 5;
23292 if (get_attr_length (insn) <= 1)
23293 return 1;
23294
23295 /* For normal instructions we may rely on the sizes of addresses
23296 and the presence of symbol to require 4 bytes of encoding.
23297 This is not the case for jumps where references are PC relative. */
7656aee4 23298 if (!JUMP_P (insn))
d2c49530
JH
23299 {
23300 l = get_attr_length_address (insn);
23301 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23302 l = 4;
23303 }
23304 if (l)
23305 return 1+l;
23306 else
23307 return 2;
23308}
23309
c51e6d85 23310/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
23311 window. */
23312
23313static void
be04394b 23314ix86_avoid_jump_misspredicts (void)
d2c49530
JH
23315{
23316 rtx insn, start = get_insns ();
23317 int nbytes = 0, njumps = 0;
23318 int isjump = 0;
23319
23320 /* Look for all minimal intervals of instructions containing 4 jumps.
23321 The intervals are bounded by START and INSN. NBYTES is the total
23322 size of instructions in the interval including INSN and not including
23323 START. When the NBYTES is smaller than 16 bytes, it is possible
23324 that the end of START and INSN ends up in the same 16byte page.
23325
23326 The smallest offset in the page INSN can start is the case where START
23327 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23328 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23329 */
23330 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23331 {
23332
23333 nbytes += min_insn_size (insn);
c263766c
RH
23334 if (dump_file)
23335 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530 23336 INSN_UID (insn), min_insn_size (insn));
7656aee4 23337 if ((JUMP_P (insn)
d2c49530
JH
23338 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23339 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
7656aee4 23340 || CALL_P (insn))
d2c49530
JH
23341 njumps++;
23342 else
23343 continue;
23344
23345 while (njumps > 3)
23346 {
23347 start = NEXT_INSN (start);
7656aee4 23348 if ((JUMP_P (start)
d2c49530
JH
23349 && GET_CODE (PATTERN (start)) != ADDR_VEC
23350 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
7656aee4 23351 || CALL_P (start))
d2c49530
JH
23352 njumps--, isjump = 1;
23353 else
23354 isjump = 0;
23355 nbytes -= min_insn_size (start);
23356 }
d0396b79 23357 gcc_assert (njumps >= 0);
c263766c
RH
23358 if (dump_file)
23359 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
23360 INSN_UID (start), INSN_UID (insn), nbytes);
23361
23362 if (njumps == 3 && isjump && nbytes < 16)
23363 {
23364 int padsize = 15 - nbytes + min_insn_size (insn);
23365
c263766c
RH
23366 if (dump_file)
23367 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23368 INSN_UID (insn), padsize);
d2c49530
JH
23369 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23370 }
23371 }
23372}
23373
be04394b 23374/* AMD Athlon works faster
d1f87653 23375 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
23376 by other jump instruction. We avoid the penalty by inserting NOP just
23377 before the RET instructions in such cases. */
18dbd950 23378static void
be04394b 23379ix86_pad_returns (void)
2a500b9e
JH
23380{
23381 edge e;
628f6a4e 23382 edge_iterator ei;
2a500b9e 23383
628f6a4e
BE
23384 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23385 {
23386 basic_block bb = e->src;
23387 rtx ret = BB_END (bb);
23388 rtx prev;
23389 bool replace = false;
23390
7656aee4 23391 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
628f6a4e
BE
23392 || !maybe_hot_bb_p (bb))
23393 continue;
23394 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
7656aee4 23395 if (active_insn_p (prev) || LABEL_P (prev))
628f6a4e 23396 break;
7656aee4 23397 if (prev && LABEL_P (prev))
628f6a4e
BE
23398 {
23399 edge e;
23400 edge_iterator ei;
23401
23402 FOR_EACH_EDGE (e, ei, bb->preds)
23403 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23404 && !(e->flags & EDGE_FALLTHRU))
23405 replace = true;
23406 }
23407 if (!replace)
23408 {
23409 prev = prev_active_insn (ret);
23410 if (prev
7656aee4
UB
23411 && ((JUMP_P (prev) && any_condjump_p (prev))
23412 || CALL_P (prev)))
253c7a00 23413 replace = true;
628f6a4e
BE
23414 /* Empty functions get branch mispredict even when the jump destination
23415 is not visible to us. */
23416 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23417 replace = true;
23418 }
23419 if (replace)
23420 {
23421 emit_insn_before (gen_return_internal_long (), ret);
23422 delete_insn (ret);
23423 }
23424 }
be04394b
JH
23425}
23426
23427/* Implement machine specific optimizations. We implement padding of returns
23428 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23429static void
23430ix86_reorg (void)
23431{
d326eaf0 23432 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
be04394b
JH
23433 ix86_pad_returns ();
23434 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23435 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
23436}
23437
4977bab6
ZW
23438/* Return nonzero when QImode register that must be represented via REX prefix
23439 is used. */
23440bool
b96a374d 23441x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
23442{
23443 int i;
23444 extract_insn_cached (insn);
23445 for (i = 0; i < recog_data.n_operands; i++)
23446 if (REG_P (recog_data.operand[i])
23447 && REGNO (recog_data.operand[i]) >= 4)
23448 return true;
23449 return false;
23450}
23451
23452/* Return nonzero when P points to register encoded via REX prefix.
23453 Called via for_each_rtx. */
23454static int
b96a374d 23455extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
23456{
23457 unsigned int regno;
23458 if (!REG_P (*p))
23459 return 0;
23460 regno = REGNO (*p);
23461 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23462}
23463
23464/* Return true when INSN mentions register that must be encoded using REX
23465 prefix. */
23466bool
b96a374d 23467x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
23468{
23469 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23470}
23471
1d6ba901 23472/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
23473 optabs would emit if we didn't have TFmode patterns. */
23474
23475void
b96a374d 23476x86_emit_floatuns (rtx operands[2])
8d705469
JH
23477{
23478 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
23479 enum machine_mode mode, inmode;
23480
23481 inmode = GET_MODE (operands[1]);
d0396b79 23482 gcc_assert (inmode == SImode || inmode == DImode);
8d705469
JH
23483
23484 out = operands[0];
1d6ba901 23485 in = force_reg (inmode, operands[1]);
8d705469
JH
23486 mode = GET_MODE (out);
23487 neglab = gen_label_rtx ();
23488 donelab = gen_label_rtx ();
8d705469
JH
23489 f0 = gen_reg_rtx (mode);
23490
ebff937c
SH
23491 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23492
23493 expand_float (out, in, 0);
8d705469 23494
8d705469
JH
23495 emit_jump_insn (gen_jump (donelab));
23496 emit_barrier ();
23497
23498 emit_label (neglab);
23499
ebff937c
SH
23500 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23501 1, OPTAB_DIRECT);
23502 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23503 1, OPTAB_DIRECT);
23504 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23505
8d705469 23506 expand_float (f0, i0, 0);
ebff937c 23507
8d705469
JH
23508 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23509
23510 emit_label (donelab);
23511}
eb701deb
RH
23512\f
23513/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23514 with all elements equal to VAR. Return true if successful. */
23515
23516static bool
23517ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23518 rtx target, rtx val)
23519{
23520 enum machine_mode smode, wsmode, wvmode;
23521 rtx x;
23522
23523 switch (mode)
23524 {
23525 case V2SImode:
23526 case V2SFmode:
12b3553f 23527 if (!mmx_ok)
eb701deb
RH
23528 return false;
23529 /* FALLTHRU */
23530
23531 case V2DFmode:
23532 case V2DImode:
23533 case V4SFmode:
23534 case V4SImode:
23535 val = force_reg (GET_MODE_INNER (mode), val);
23536 x = gen_rtx_VEC_DUPLICATE (mode, val);
23537 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23538 return true;
23539
23540 case V4HImode:
23541 if (!mmx_ok)
23542 return false;
f817d5d4
RH
23543 if (TARGET_SSE || TARGET_3DNOW_A)
23544 {
23545 val = gen_lowpart (SImode, val);
23546 x = gen_rtx_TRUNCATE (HImode, val);
23547 x = gen_rtx_VEC_DUPLICATE (mode, x);
23548 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23549 return true;
23550 }
23551 else
23552 {
23553 smode = HImode;
23554 wsmode = SImode;
23555 wvmode = V2SImode;
23556 goto widen;
23557 }
eb701deb
RH
23558
23559 case V8QImode:
23560 if (!mmx_ok)
23561 return false;
23562 smode = QImode;
23563 wsmode = HImode;
23564 wvmode = V4HImode;
23565 goto widen;
23566 case V8HImode:
2ff61948
RS
23567 if (TARGET_SSE2)
23568 {
23569 rtx tmp1, tmp2;
23570 /* Extend HImode to SImode using a paradoxical SUBREG. */
23571 tmp1 = gen_reg_rtx (SImode);
23572 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23573 /* Insert the SImode value as low element of V4SImode vector. */
23574 tmp2 = gen_reg_rtx (V4SImode);
23575 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23576 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23577 CONST0_RTX (V4SImode),
23578 const1_rtx);
23579 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23580 /* Cast the V4SImode vector back to a V8HImode vector. */
23581 tmp1 = gen_reg_rtx (V8HImode);
23582 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23583 /* Duplicate the low short through the whole low SImode word. */
23584 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23585 /* Cast the V8HImode vector back to a V4SImode vector. */
23586 tmp2 = gen_reg_rtx (V4SImode);
23587 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23588 /* Replicate the low element of the V4SImode vector. */
23589 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23590 /* Cast the V2SImode back to V8HImode, and store in target. */
23591 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23592 return true;
23593 }
eb701deb
RH
23594 smode = HImode;
23595 wsmode = SImode;
23596 wvmode = V4SImode;
23597 goto widen;
23598 case V16QImode:
2ff61948
RS
23599 if (TARGET_SSE2)
23600 {
23601 rtx tmp1, tmp2;
23602 /* Extend QImode to SImode using a paradoxical SUBREG. */
23603 tmp1 = gen_reg_rtx (SImode);
23604 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23605 /* Insert the SImode value as low element of V4SImode vector. */
23606 tmp2 = gen_reg_rtx (V4SImode);
23607 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23608 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23609 CONST0_RTX (V4SImode),
23610 const1_rtx);
23611 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23612 /* Cast the V4SImode vector back to a V16QImode vector. */
23613 tmp1 = gen_reg_rtx (V16QImode);
23614 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23615 /* Duplicate the low byte through the whole low SImode word. */
23616 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23617 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23618 /* Cast the V16QImode vector back to a V4SImode vector. */
23619 tmp2 = gen_reg_rtx (V4SImode);
23620 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23621 /* Replicate the low element of the V4SImode vector. */
23622 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23623 /* Cast the V2SImode back to V16QImode, and store in target. */
23624 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23625 return true;
23626 }
eb701deb
RH
23627 smode = QImode;
23628 wsmode = HImode;
23629 wvmode = V8HImode;
23630 goto widen;
23631 widen:
23632 /* Replicate the value once into the next wider mode and recurse. */
23633 val = convert_modes (wsmode, smode, val, true);
23634 x = expand_simple_binop (wsmode, ASHIFT, val,
23635 GEN_INT (GET_MODE_BITSIZE (smode)),
23636 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23637 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23638
23639 x = gen_reg_rtx (wvmode);
23640 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23641 gcc_unreachable ();
23642 emit_move_insn (target, gen_lowpart (mode, x));
23643 return true;
23644
23645 default:
23646 return false;
23647 }
23648}
23649
23650/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
acef130f 23651 whose ONE_VAR element is VAR, and other elements are zero. Return true
eb701deb
RH
23652 if successful. */
23653
23654static bool
acef130f
RS
23655ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23656 rtx target, rtx var, int one_var)
eb701deb
RH
23657{
23658 enum machine_mode vsimode;
acef130f
RS
23659 rtx new_target;
23660 rtx x, tmp;
eb701deb
RH
23661
23662 switch (mode)
23663 {
23664 case V2SFmode:
23665 case V2SImode:
12b3553f 23666 if (!mmx_ok)
eb701deb
RH
23667 return false;
23668 /* FALLTHRU */
23669
23670 case V2DFmode:
23671 case V2DImode:
acef130f
RS
23672 if (one_var != 0)
23673 return false;
eb701deb
RH
23674 var = force_reg (GET_MODE_INNER (mode), var);
23675 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23676 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23677 return true;
23678
23679 case V4SFmode:
23680 case V4SImode:
acef130f
RS
23681 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23682 new_target = gen_reg_rtx (mode);
23683 else
23684 new_target = target;
eb701deb
RH
23685 var = force_reg (GET_MODE_INNER (mode), var);
23686 x = gen_rtx_VEC_DUPLICATE (mode, var);
23687 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
acef130f
RS
23688 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23689 if (one_var != 0)
23690 {
23691 /* We need to shuffle the value to the correct position, so
23692 create a new pseudo to store the intermediate result. */
23693
23694 /* With SSE2, we can use the integer shuffle insns. */
23695 if (mode != V4SFmode && TARGET_SSE2)
23696 {
23697 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23698 GEN_INT (1),
23699 GEN_INT (one_var == 1 ? 0 : 1),
23700 GEN_INT (one_var == 2 ? 0 : 1),
23701 GEN_INT (one_var == 3 ? 0 : 1)));
23702 if (target != new_target)
23703 emit_move_insn (target, new_target);
23704 return true;
23705 }
23706
23707 /* Otherwise convert the intermediate result to V4SFmode and
23708 use the SSE1 shuffle instructions. */
23709 if (mode != V4SFmode)
23710 {
23711 tmp = gen_reg_rtx (V4SFmode);
23712 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23713 }
23714 else
23715 tmp = new_target;
23716
23717 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23718 GEN_INT (1),
23719 GEN_INT (one_var == 1 ? 0 : 1),
23720 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23721 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23722
23723 if (mode != V4SFmode)
23724 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23725 else if (tmp != target)
23726 emit_move_insn (target, tmp);
23727 }
23728 else if (target != new_target)
23729 emit_move_insn (target, new_target);
eb701deb
RH
23730 return true;
23731
23732 case V8HImode:
23733 case V16QImode:
23734 vsimode = V4SImode;
23735 goto widen;
23736 case V4HImode:
23737 case V8QImode:
23738 if (!mmx_ok)
23739 return false;
23740 vsimode = V2SImode;
23741 goto widen;
23742 widen:
acef130f
RS
23743 if (one_var != 0)
23744 return false;
23745
eb701deb
RH
23746 /* Zero extend the variable element to SImode and recurse. */
23747 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23748
23749 x = gen_reg_rtx (vsimode);
acef130f
RS
23750 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23751 var, one_var))
eb701deb
RH
23752 gcc_unreachable ();
23753
23754 emit_move_insn (target, gen_lowpart (mode, x));
23755 return true;
23756
23757 default:
23758 return false;
23759 }
23760}
23761
23762/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23763 consisting of the values in VALS. It is known that all elements
23764 except ONE_VAR are constants. Return true if successful. */
23765
23766static bool
23767ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23768 rtx target, rtx vals, int one_var)
23769{
23770 rtx var = XVECEXP (vals, 0, one_var);
23771 enum machine_mode wmode;
23772 rtx const_vec, x;
23773
9fc5fa7b
GK
23774 const_vec = copy_rtx (vals);
23775 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
277fc67e 23776 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
eb701deb
RH
23777
23778 switch (mode)
23779 {
23780 case V2DFmode:
23781 case V2DImode:
23782 case V2SFmode:
23783 case V2SImode:
23784 /* For the two element vectors, it's just as easy to use
23785 the general case. */
23786 return false;
23787
23788 case V4SFmode:
23789 case V4SImode:
23790 case V8HImode:
23791 case V4HImode:
23792 break;
23793
23794 case V16QImode:
23795 wmode = V8HImode;
23796 goto widen;
23797 case V8QImode:
23798 wmode = V4HImode;
23799 goto widen;
23800 widen:
23801 /* There's no way to set one QImode entry easily. Combine
23802 the variable value with its adjacent constant value, and
23803 promote to an HImode set. */
23804 x = XVECEXP (vals, 0, one_var ^ 1);
23805 if (one_var & 1)
23806 {
23807 var = convert_modes (HImode, QImode, var, true);
23808 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23809 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23810 x = GEN_INT (INTVAL (x) & 0xff);
23811 }
23812 else
23813 {
23814 var = convert_modes (HImode, QImode, var, true);
23815 x = gen_int_mode (INTVAL (x) << 8, HImode);
23816 }
23817 if (x != const0_rtx)
23818 var = expand_simple_binop (HImode, IOR, var, x, var,
23819 1, OPTAB_LIB_WIDEN);
23820
23821 x = gen_reg_rtx (wmode);
23822 emit_move_insn (x, gen_lowpart (wmode, const_vec));
ceda96fc 23823 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
eb701deb
RH
23824
23825 emit_move_insn (target, gen_lowpart (mode, x));
23826 return true;
23827
23828 default:
23829 return false;
23830 }
23831
23832 emit_move_insn (target, const_vec);
23833 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23834 return true;
23835}
23836
23837/* A subroutine of ix86_expand_vector_init. Handle the most general case:
23838 all values variable, and none identical. */
23839
23840static void
23841ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23842 rtx target, rtx vals)
23843{
23844 enum machine_mode half_mode = GET_MODE_INNER (mode);
23845 rtx op0 = NULL, op1 = NULL;
23846 bool use_vec_concat = false;
23847
23848 switch (mode)
23849 {
23850 case V2SFmode:
23851 case V2SImode:
23852 if (!mmx_ok && !TARGET_SSE)
23853 break;
23854 /* FALLTHRU */
23855
23856 case V2DFmode:
23857 case V2DImode:
23858 /* For the two element vectors, we always implement VEC_CONCAT. */
23859 op0 = XVECEXP (vals, 0, 0);
23860 op1 = XVECEXP (vals, 0, 1);
23861 use_vec_concat = true;
23862 break;
23863
23864 case V4SFmode:
23865 half_mode = V2SFmode;
23866 goto half;
23867 case V4SImode:
23868 half_mode = V2SImode;
23869 goto half;
23870 half:
23871 {
23872 rtvec v;
23873
23874 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23875 Recurse to load the two halves. */
23876
23877 op0 = gen_reg_rtx (half_mode);
23878 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23879 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23880
23881 op1 = gen_reg_rtx (half_mode);
23882 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23883 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23884
23885 use_vec_concat = true;
23886 }
23887 break;
23888
23889 case V8HImode:
23890 case V16QImode:
23891 case V4HImode:
23892 case V8QImode:
23893 break;
23894
23895 default:
23896 gcc_unreachable ();
23897 }
23898
23899 if (use_vec_concat)
23900 {
23901 if (!register_operand (op0, half_mode))
23902 op0 = force_reg (half_mode, op0);
23903 if (!register_operand (op1, half_mode))
23904 op1 = force_reg (half_mode, op1);
23905
5656a184 23906 emit_insn (gen_rtx_SET (VOIDmode, target,
eb701deb
RH
23907 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23908 }
23909 else
23910 {
23911 int i, j, n_elts, n_words, n_elt_per_word;
23912 enum machine_mode inner_mode;
23913 rtx words[4], shift;
23914
23915 inner_mode = GET_MODE_INNER (mode);
23916 n_elts = GET_MODE_NUNITS (mode);
23917 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23918 n_elt_per_word = n_elts / n_words;
23919 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23920
23921 for (i = 0; i < n_words; ++i)
23922 {
23923 rtx word = NULL_RTX;
23924
23925 for (j = 0; j < n_elt_per_word; ++j)
23926 {
23927 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23928 elt = convert_modes (word_mode, inner_mode, elt, true);
23929
23930 if (j == 0)
23931 word = elt;
23932 else
23933 {
23934 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23935 word, 1, OPTAB_LIB_WIDEN);
23936 word = expand_simple_binop (word_mode, IOR, word, elt,
23937 word, 1, OPTAB_LIB_WIDEN);
23938 }
23939 }
23940
23941 words[i] = word;
23942 }
23943
23944 if (n_words == 1)
23945 emit_move_insn (target, gen_lowpart (mode, words[0]));
23946 else if (n_words == 2)
23947 {
23948 rtx tmp = gen_reg_rtx (mode);
23949 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23950 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23951 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23952 emit_move_insn (target, tmp);
23953 }
23954 else if (n_words == 4)
23955 {
23956 rtx tmp = gen_reg_rtx (V4SImode);
23957 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ceda96fc 23958 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
eb701deb
RH
23959 emit_move_insn (target, gen_lowpart (mode, tmp));
23960 }
23961 else
23962 gcc_unreachable ();
23963 }
23964}
23965
5656a184 23966/* Initialize vector TARGET via VALS. Suppress the use of MMX
eb701deb 23967 instructions unless MMX_OK is true. */
8d705469 23968
997404de 23969void
eb701deb 23970ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
997404de
JH
23971{
23972 enum machine_mode mode = GET_MODE (target);
eb701deb
RH
23973 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23974 int n_elts = GET_MODE_NUNITS (mode);
23975 int n_var = 0, one_var = -1;
23976 bool all_same = true, all_const_zero = true;
997404de 23977 int i;
eb701deb 23978 rtx x;
f676971a 23979
eb701deb
RH
23980 for (i = 0; i < n_elts; ++i)
23981 {
23982 x = XVECEXP (vals, 0, i);
fcc44808
UB
23983 if (!(CONST_INT_P (x)
23984 || GET_CODE (x) == CONST_DOUBLE
23985 || GET_CODE (x) == CONST_FIXED))
eb701deb
RH
23986 n_var++, one_var = i;
23987 else if (x != CONST0_RTX (inner_mode))
23988 all_const_zero = false;
23989 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23990 all_same = false;
23991 }
997404de 23992
eb701deb
RH
23993 /* Constants are best loaded from the constant pool. */
23994 if (n_var == 0)
997404de
JH
23995 {
23996 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23997 return;
23998 }
23999
eb701deb
RH
24000 /* If all values are identical, broadcast the value. */
24001 if (all_same
24002 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24003 XVECEXP (vals, 0, 0)))
24004 return;
24005
24006 /* Values where only one field is non-constant are best loaded from
24007 the pool and overwritten via move later. */
24008 if (n_var == 1)
997404de 24009 {
acef130f
RS
24010 if (all_const_zero
24011 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24012 XVECEXP (vals, 0, one_var),
24013 one_var))
eb701deb
RH
24014 return;
24015
24016 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24017 return;
24018 }
24019
24020 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24021}
24022
24023void
24024ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24025{
24026 enum machine_mode mode = GET_MODE (target);
24027 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24028 bool use_vec_merge = false;
24029 rtx tmp;
24030
24031 switch (mode)
24032 {
24033 case V2SFmode:
24034 case V2SImode:
0f2698d0
RH
24035 if (mmx_ok)
24036 {
24037 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24038 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24039 if (elt == 0)
24040 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24041 else
24042 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24043 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24044 return;
24045 }
24046 break;
eb701deb 24047
eb701deb 24048 case V2DImode:
9a5cee02
L
24049 use_vec_merge = TARGET_SSE4_1;
24050 if (use_vec_merge)
24051 break;
24052
24053 case V2DFmode:
eb701deb
RH
24054 {
24055 rtx op0, op1;
24056
24057 /* For the two element vectors, we implement a VEC_CONCAT with
24058 the extraction of the other element. */
24059
24060 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24061 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24062
24063 if (elt == 0)
24064 op0 = val, op1 = tmp;
24065 else
24066 op0 = tmp, op1 = val;
24067
24068 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24069 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24070 }
24071 return;
1c47af84 24072
eb701deb 24073 case V4SFmode:
9a5cee02
L
24074 use_vec_merge = TARGET_SSE4_1;
24075 if (use_vec_merge)
24076 break;
24077
eb701deb 24078 switch (elt)
997404de 24079 {
eb701deb
RH
24080 case 0:
24081 use_vec_merge = true;
1c47af84
RH
24082 break;
24083
eb701deb 24084 case 1:
125886c7 24085 /* tmp = target = A B C D */
eb701deb 24086 tmp = copy_to_reg (target);
125886c7 24087 /* target = A A B B */
eb701deb 24088 emit_insn (gen_sse_unpcklps (target, target, target));
125886c7 24089 /* target = X A B B */
eb701deb 24090 ix86_expand_vector_set (false, target, val, 0);
125886c7 24091 /* target = A X C D */
eb701deb
RH
24092 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24093 GEN_INT (1), GEN_INT (0),
b100079f 24094 GEN_INT (2+4), GEN_INT (3+4)));
eb701deb
RH
24095 return;
24096
24097 case 2:
125886c7 24098 /* tmp = target = A B C D */
eb701deb 24099 tmp = copy_to_reg (target);
125886c7
JJ
24100 /* tmp = X B C D */
24101 ix86_expand_vector_set (false, tmp, val, 0);
24102 /* target = A B X D */
eb701deb
RH
24103 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24104 GEN_INT (0), GEN_INT (1),
b100079f 24105 GEN_INT (0+4), GEN_INT (3+4)));
eb701deb
RH
24106 return;
24107
24108 case 3:
125886c7 24109 /* tmp = target = A B C D */
eb701deb 24110 tmp = copy_to_reg (target);
125886c7
JJ
24111 /* tmp = X B C D */
24112 ix86_expand_vector_set (false, tmp, val, 0);
24113 /* target = A B X D */
eb701deb
RH
24114 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24115 GEN_INT (0), GEN_INT (1),
b100079f 24116 GEN_INT (2+4), GEN_INT (0+4)));
eb701deb 24117 return;
1c47af84
RH
24118
24119 default:
eb701deb
RH
24120 gcc_unreachable ();
24121 }
24122 break;
24123
24124 case V4SImode:
9a5cee02
L
24125 use_vec_merge = TARGET_SSE4_1;
24126 if (use_vec_merge)
24127 break;
24128
eb701deb
RH
24129 /* Element 0 handled by vec_merge below. */
24130 if (elt == 0)
24131 {
24132 use_vec_merge = true;
1c47af84 24133 break;
997404de 24134 }
eb701deb
RH
24135
24136 if (TARGET_SSE2)
24137 {
24138 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24139 store into element 0, then shuffle them back. */
24140
24141 rtx order[4];
24142
24143 order[0] = GEN_INT (elt);
24144 order[1] = const1_rtx;
24145 order[2] = const2_rtx;
24146 order[3] = GEN_INT (3);
24147 order[elt] = const0_rtx;
24148
24149 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24150 order[1], order[2], order[3]));
24151
24152 ix86_expand_vector_set (false, target, val, 0);
24153
24154 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24155 order[1], order[2], order[3]));
24156 }
24157 else
24158 {
24159 /* For SSE1, we have to reuse the V4SF code. */
24160 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24161 gen_lowpart (SFmode, val), elt);
24162 }
997404de 24163 return;
eb701deb
RH
24164
24165 case V8HImode:
24166 use_vec_merge = TARGET_SSE2;
24167 break;
24168 case V4HImode:
24169 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24170 break;
24171
24172 case V16QImode:
9a5cee02
L
24173 use_vec_merge = TARGET_SSE4_1;
24174 break;
24175
eb701deb
RH
24176 case V8QImode:
24177 default:
24178 break;
997404de
JH
24179 }
24180
eb701deb 24181 if (use_vec_merge)
997404de 24182 {
eb701deb
RH
24183 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24184 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24185 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24186 }
24187 else
24188 {
24189 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24190
24191 emit_move_insn (mem, target);
24192
24193 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24194 emit_move_insn (tmp, val);
24195
24196 emit_move_insn (target, mem);
24197 }
24198}
24199
24200void
24201ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24202{
24203 enum machine_mode mode = GET_MODE (vec);
24204 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24205 bool use_vec_extr = false;
24206 rtx tmp;
24207
24208 switch (mode)
24209 {
24210 case V2SImode:
24211 case V2SFmode:
24212 if (!mmx_ok)
24213 break;
24214 /* FALLTHRU */
24215
24216 case V2DFmode:
24217 case V2DImode:
24218 use_vec_extr = true;
24219 break;
24220
24221 case V4SFmode:
9a5cee02
L
24222 use_vec_extr = TARGET_SSE4_1;
24223 if (use_vec_extr)
24224 break;
24225
eb701deb 24226 switch (elt)
997404de 24227 {
eb701deb
RH
24228 case 0:
24229 tmp = vec;
24230 break;
997404de 24231
eb701deb
RH
24232 case 1:
24233 case 3:
24234 tmp = gen_reg_rtx (mode);
24235 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24236 GEN_INT (elt), GEN_INT (elt),
b100079f 24237 GEN_INT (elt+4), GEN_INT (elt+4)));
eb701deb
RH
24238 break;
24239
24240 case 2:
24241 tmp = gen_reg_rtx (mode);
24242 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24243 break;
24244
24245 default:
24246 gcc_unreachable ();
997404de 24247 }
eb701deb
RH
24248 vec = tmp;
24249 use_vec_extr = true;
ed9b5396 24250 elt = 0;
eb701deb
RH
24251 break;
24252
24253 case V4SImode:
9a5cee02
L
24254 use_vec_extr = TARGET_SSE4_1;
24255 if (use_vec_extr)
24256 break;
24257
eb701deb 24258 if (TARGET_SSE2)
997404de 24259 {
eb701deb
RH
24260 switch (elt)
24261 {
24262 case 0:
24263 tmp = vec;
24264 break;
24265
24266 case 1:
24267 case 3:
24268 tmp = gen_reg_rtx (mode);
24269 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24270 GEN_INT (elt), GEN_INT (elt),
24271 GEN_INT (elt), GEN_INT (elt)));
24272 break;
24273
24274 case 2:
24275 tmp = gen_reg_rtx (mode);
24276 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24277 break;
24278
24279 default:
24280 gcc_unreachable ();
24281 }
24282 vec = tmp;
24283 use_vec_extr = true;
ed9b5396 24284 elt = 0;
997404de 24285 }
eb701deb
RH
24286 else
24287 {
24288 /* For SSE1, we have to reuse the V4SF code. */
24289 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24290 gen_lowpart (V4SFmode, vec), elt);
24291 return;
24292 }
24293 break;
24294
24295 case V8HImode:
24296 use_vec_extr = TARGET_SSE2;
24297 break;
24298 case V4HImode:
24299 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24300 break;
24301
24302 case V16QImode:
9a5cee02
L
24303 use_vec_extr = TARGET_SSE4_1;
24304 break;
24305
eb701deb
RH
24306 case V8QImode:
24307 /* ??? Could extract the appropriate HImode element and shift. */
24308 default:
24309 break;
997404de 24310 }
997404de 24311
eb701deb
RH
24312 if (use_vec_extr)
24313 {
24314 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24315 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24316
24317 /* Let the rtl optimizers know about the zero extension performed. */
9a5cee02 24318 if (inner_mode == QImode || inner_mode == HImode)
eb701deb
RH
24319 {
24320 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24321 target = gen_lowpart (SImode, target);
24322 }
24323
24324 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24325 }
24326 else
24327 {
24328 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24329
24330 emit_move_insn (mem, vec);
24331
24332 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24333 emit_move_insn (target, tmp);
24334 }
24335}
2ab1754e 24336
536fa7b7 24337/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
2ab1754e
RH
24338 pattern to reduce; DEST is the destination; IN is the input vector. */
24339
24340void
24341ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24342{
24343 rtx tmp1, tmp2, tmp3;
24344
24345 tmp1 = gen_reg_rtx (V4SFmode);
24346 tmp2 = gen_reg_rtx (V4SFmode);
24347 tmp3 = gen_reg_rtx (V4SFmode);
24348
24349 emit_insn (gen_sse_movhlps (tmp1, in, in));
24350 emit_insn (fn (tmp2, tmp1, in));
24351
24352 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24353 GEN_INT (1), GEN_INT (1),
24354 GEN_INT (1+4), GEN_INT (1+4)));
24355 emit_insn (fn (dest, tmp2, tmp3));
24356}
eb701deb 24357\f
a81083b2
BE
24358/* Target hook for scalar_mode_supported_p. */
24359static bool
24360ix86_scalar_mode_supported_p (enum machine_mode mode)
24361{
24362 if (DECIMAL_FLOAT_MODE_P (mode))
24363 return true;
27735edb
UB
24364 else if (mode == TFmode)
24365 return TARGET_64BIT;
a81083b2
BE
24366 else
24367 return default_scalar_mode_supported_p (mode);
24368}
24369
f676971a
EC
24370/* Implements target hook vector_mode_supported_p. */
24371static bool
24372ix86_vector_mode_supported_p (enum machine_mode mode)
24373{
dcbca208 24374 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
f676971a 24375 return true;
dcbca208 24376 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
f676971a 24377 return true;
dcbca208 24378 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
f676971a 24379 return true;
dcbca208
RH
24380 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24381 return true;
24382 return false;
f676971a
EC
24383}
24384
c77cd3d1
UB
24385/* Target hook for c_mode_for_suffix. */
24386static enum machine_mode
24387ix86_c_mode_for_suffix (char suffix)
24388{
24389 if (TARGET_64BIT && suffix == 'q')
24390 return TFmode;
24391 if (TARGET_MMX && suffix == 'w')
24392 return XFmode;
24393
24394 return VOIDmode;
24395}
24396
67dfe110
KH
24397/* Worker function for TARGET_MD_ASM_CLOBBERS.
24398
24399 We do this in the new i386 backend to maintain source compatibility
24400 with the old cc0-based compiler. */
24401
24402static tree
61158923
HPN
24403ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24404 tree inputs ATTRIBUTE_UNUSED,
24405 tree clobbers)
67dfe110 24406{
f676971a
EC
24407 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24408 clobbers);
24409 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24410 clobbers);
67dfe110
KH
24411 return clobbers;
24412}
24413
7fa7289d 24414/* Implements target vector targetm.asm.encode_section_info. This
2ed941ec 24415 is not used by netware. */
7dcbf659 24416
2ed941ec 24417static void ATTRIBUTE_UNUSED
7dcbf659
JH
24418ix86_encode_section_info (tree decl, rtx rtl, int first)
24419{
24420 default_encode_section_info (decl, rtl, first);
24421
24422 if (TREE_CODE (decl) == VAR_DECL
24423 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24424 && ix86_in_large_data_p (decl))
24425 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24426}
24427
3c5cb3e4
KH
24428/* Worker function for REVERSE_CONDITION. */
24429
24430enum rtx_code
24431ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24432{
24433 return (mode != CCFPmode && mode != CCFPUmode
24434 ? reverse_condition (code)
24435 : reverse_condition_maybe_unordered (code));
24436}
24437
5ea9cb6e
RS
24438/* Output code to perform an x87 FP register move, from OPERANDS[1]
24439 to OPERANDS[0]. */
24440
24441const char *
24442output_387_reg_move (rtx insn, rtx *operands)
24443{
d869c351 24444 if (REG_P (operands[0]))
5ea9cb6e 24445 {
d869c351
UB
24446 if (REG_P (operands[1])
24447 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24448 {
24449 if (REGNO (operands[0]) == FIRST_STACK_REG)
24450 return output_387_ffreep (operands, 0);
24451 return "fstp\t%y0";
24452 }
24453 if (STACK_TOP_P (operands[0]))
24454 return "fld%z1\t%y1";
24455 return "fst\t%y0";
5ea9cb6e 24456 }
d869c351
UB
24457 else if (MEM_P (operands[0]))
24458 {
24459 gcc_assert (REG_P (operands[1]));
24460 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24461 return "fstp%z0\t%y0";
24462 else
24463 {
24464 /* There is no non-popping store to memory for XFmode.
24465 So if we need one, follow the store with a load. */
24466 if (GET_MODE (operands[0]) == XFmode)
24467 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24468 else
24469 return "fst%z0\t%y0";
24470 }
24471 }
24472 else
24473 gcc_unreachable();
5ea9cb6e
RS
24474}
24475
5ae27cfa
UB
24476/* Output code to perform a conditional jump to LABEL, if C2 flag in
24477 FP status register is set. */
24478
24479void
24480ix86_emit_fp_unordered_jump (rtx label)
24481{
24482 rtx reg = gen_reg_rtx (HImode);
24483 rtx temp;
24484
24485 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35 24486
3c2d980c 24487 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
2484cc35
UB
24488 {
24489 emit_insn (gen_x86_sahf_1 (reg));
24490
f676971a 24491 temp = gen_rtx_REG (CCmode, FLAGS_REG);
2484cc35
UB
24492 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24493 }
24494 else
24495 {
24496 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24497
f676971a 24498 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
2484cc35
UB
24499 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24500 }
f676971a 24501
5ae27cfa
UB
24502 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24503 gen_rtx_LABEL_REF (VOIDmode, label),
24504 pc_rtx);
24505 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
79cd820a 24506
5ae27cfa 24507 emit_jump_insn (temp);
79cd820a 24508 predict_jump (REG_BR_PROB_BASE * 10 / 100);
5ae27cfa
UB
24509}
24510
c2fcfa4f
UB
24511/* Output code to perform a log1p XFmode calculation. */
24512
24513void ix86_emit_i387_log1p (rtx op0, rtx op1)
24514{
24515 rtx label1 = gen_label_rtx ();
24516 rtx label2 = gen_label_rtx ();
24517
24518 rtx tmp = gen_reg_rtx (XFmode);
24519 rtx tmp2 = gen_reg_rtx (XFmode);
24520
24521 emit_insn (gen_absxf2 (tmp, op1));
24522 emit_insn (gen_cmpxf (tmp,
24523 CONST_DOUBLE_FROM_REAL_VALUE (
24524 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24525 XFmode)));
24526 emit_jump_insn (gen_bge (label1));
24527
24528 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
0ac45694 24529 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
c2fcfa4f
UB
24530 emit_jump (label2);
24531
24532 emit_label (label1);
24533 emit_move_insn (tmp, CONST1_RTX (XFmode));
24534 emit_insn (gen_addxf3 (tmp, op1, tmp));
24535 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
0ac45694 24536 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
c2fcfa4f
UB
24537
24538 emit_label (label2);
24539}
f676971a 24540
6b889d89
UB
24541/* Output code to perform a Newton-Rhapson approximation of a single precision
24542 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24543
24544void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24545{
24546 rtx x0, x1, e0, e1, two;
24547
24548 x0 = gen_reg_rtx (mode);
24549 e0 = gen_reg_rtx (mode);
24550 e1 = gen_reg_rtx (mode);
24551 x1 = gen_reg_rtx (mode);
24552
24553 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24554
24555 if (VECTOR_MODE_P (mode))
24556 two = ix86_build_const_vector (SFmode, true, two);
24557
24558 two = force_reg (mode, two);
24559
24560 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24561
43db7979 24562 /* x0 = rcp(b) estimate */
6b889d89
UB
24563 emit_insn (gen_rtx_SET (VOIDmode, x0,
24564 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24565 UNSPEC_RCP)));
24566 /* e0 = x0 * b */
24567 emit_insn (gen_rtx_SET (VOIDmode, e0,
24568 gen_rtx_MULT (mode, x0, b)));
24569 /* e1 = 2. - e0 */
24570 emit_insn (gen_rtx_SET (VOIDmode, e1,
24571 gen_rtx_MINUS (mode, two, e0)));
24572 /* x1 = x0 * e1 */
24573 emit_insn (gen_rtx_SET (VOIDmode, x1,
24574 gen_rtx_MULT (mode, x0, e1)));
24575 /* res = a * x1 */
24576 emit_insn (gen_rtx_SET (VOIDmode, res,
24577 gen_rtx_MULT (mode, a, x1)));
24578}
24579
24580/* Output code to perform a Newton-Rhapson approximation of a
24581 single precision floating point [reciprocal] square root. */
24582
24583void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24584 bool recip)
24585{
43db7979
UB
24586 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24587 REAL_VALUE_TYPE r;
6b889d89
UB
24588
24589 x0 = gen_reg_rtx (mode);
24590 e0 = gen_reg_rtx (mode);
24591 e1 = gen_reg_rtx (mode);
24592 e2 = gen_reg_rtx (mode);
24593 e3 = gen_reg_rtx (mode);
24594
aefa9d43 24595 real_from_integer (&r, VOIDmode, -3, -1, 0);
43db7979 24596 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
670a8526 24597
43db7979
UB
24598 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24599 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
6b889d89
UB
24600
24601 if (VECTOR_MODE_P (mode))
24602 {
43db7979
UB
24603 mthree = ix86_build_const_vector (SFmode, true, mthree);
24604 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
6b889d89
UB
24605 }
24606
43db7979
UB
24607 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24608 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
6b889d89 24609
43db7979 24610 /* x0 = rsqrt(a) estimate */
6b889d89
UB
24611 emit_insn (gen_rtx_SET (VOIDmode, x0,
24612 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24613 UNSPEC_RSQRT)));
43db7979
UB
24614
24615 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
5a37a60c 24616 if (!recip)
43db7979
UB
24617 {
24618 rtx zero, mask;
24619
24620 zero = gen_reg_rtx (mode);
24621 mask = gen_reg_rtx (mode);
24622
24623 zero = force_reg (mode, CONST0_RTX(mode));
24624 emit_insn (gen_rtx_SET (VOIDmode, mask,
24625 gen_rtx_NE (mode, zero, a)));
24626
24627 emit_insn (gen_rtx_SET (VOIDmode, x0,
24628 gen_rtx_AND (mode, x0, mask)));
24629 }
24630
6b889d89
UB
24631 /* e0 = x0 * a */
24632 emit_insn (gen_rtx_SET (VOIDmode, e0,
24633 gen_rtx_MULT (mode, x0, a)));
24634 /* e1 = e0 * x0 */
24635 emit_insn (gen_rtx_SET (VOIDmode, e1,
24636 gen_rtx_MULT (mode, e0, x0)));
43db7979
UB
24637
24638 /* e2 = e1 - 3. */
24639 mthree = force_reg (mode, mthree);
6b889d89 24640 emit_insn (gen_rtx_SET (VOIDmode, e2,
43db7979
UB
24641 gen_rtx_PLUS (mode, e1, mthree)));
24642
24643 mhalf = force_reg (mode, mhalf);
6b889d89 24644 if (recip)
43db7979 24645 /* e3 = -.5 * x0 */
6b889d89 24646 emit_insn (gen_rtx_SET (VOIDmode, e3,
43db7979 24647 gen_rtx_MULT (mode, x0, mhalf)));
6b889d89 24648 else
43db7979 24649 /* e3 = -.5 * e0 */
6b889d89 24650 emit_insn (gen_rtx_SET (VOIDmode, e3,
43db7979 24651 gen_rtx_MULT (mode, e0, mhalf)));
6b889d89
UB
24652 /* ret = e2 * e3 */
24653 emit_insn (gen_rtx_SET (VOIDmode, res,
24654 gen_rtx_MULT (mode, e2, e3)));
24655}
24656
d6b5193b 24657/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
a8e68029 24658
2ed941ec 24659static void ATTRIBUTE_UNUSED
a8e68029
DJ
24660i386_solaris_elf_named_section (const char *name, unsigned int flags,
24661 tree decl)
24662{
24663 /* With Binutils 2.15, the "@unwind" marker must be specified on
24664 every occurrence of the ".eh_frame" section, not just the first
24665 one. */
24666 if (TARGET_64BIT
24667 && strcmp (name, ".eh_frame") == 0)
24668 {
24669 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24670 flags & SECTION_WRITE ? "aw" : "a");
24671 return;
24672 }
24673 default_elf_asm_named_section (name, flags, decl);
24674}
24675
cac24f06
JM
24676/* Return the mangling of TYPE if it is an extended fundamental type. */
24677
24678static const char *
3101faab 24679ix86_mangle_type (const_tree type)
cac24f06 24680{
608063c3
JB
24681 type = TYPE_MAIN_VARIANT (type);
24682
24683 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24684 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24685 return NULL;
24686
cac24f06
JM
24687 switch (TYPE_MODE (type))
24688 {
24689 case TFmode:
24690 /* __float128 is "g". */
24691 return "g";
24692 case XFmode:
24693 /* "long double" or __float80 is "e". */
24694 return "e";
24695 default:
24696 return NULL;
24697 }
24698}
24699
7ce918c5
JJ
24700/* For 32-bit code we can save PIC register setup by using
24701 __stack_chk_fail_local hidden function instead of calling
24702 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24703 register, so it is better to call __stack_chk_fail directly. */
24704
24705static tree
24706ix86_stack_protect_fail (void)
24707{
24708 return TARGET_64BIT
24709 ? default_external_stack_protect_fail ()
24710 : default_hidden_stack_protect_fail ();
24711}
24712
72ce3d4a
JH
24713/* Select a format to encode pointers in exception handling data. CODE
24714 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24715 true if the symbol may be affected by dynamic relocations.
24716
24717 ??? All x86 object file formats are capable of representing this.
24718 After all, the relocation needed is the same as for the call insn.
24719 Whether or not a particular assembler allows us to enter such, I
24720 guess we'll have to see. */
24721int
24722asm_preferred_eh_data_format (int code, int global)
24723{
24724 if (flag_pic)
24725 {
a46cec70 24726 int type = DW_EH_PE_sdata8;
72ce3d4a
JH
24727 if (!TARGET_64BIT
24728 || ix86_cmodel == CM_SMALL_PIC
24729 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24730 type = DW_EH_PE_sdata4;
24731 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24732 }
24733 if (ix86_cmodel == CM_SMALL
24734 || (ix86_cmodel == CM_MEDIUM && code))
24735 return DW_EH_PE_udata4;
24736 return DW_EH_PE_absptr;
24737}
4d81bf84
RG
24738\f
24739/* Expand copysign from SIGN to the positive value ABS_VALUE
c7d32ff6
RG
24740 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24741 the sign-bit. */
4d81bf84 24742static void
c7d32ff6 24743ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
4d81bf84
RG
24744{
24745 enum machine_mode mode = GET_MODE (sign);
24746 rtx sgn = gen_reg_rtx (mode);
c7d32ff6
RG
24747 if (mask == NULL_RTX)
24748 {
24749 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24750 if (!VECTOR_MODE_P (mode))
24751 {
24752 /* We need to generate a scalar mode mask in this case. */
24753 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24754 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24755 mask = gen_reg_rtx (mode);
24756 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24757 }
24758 }
24759 else
24760 mask = gen_rtx_NOT (mode, mask);
24761 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24762 gen_rtx_AND (mode, mask, sign)));
24763 emit_insn (gen_rtx_SET (VOIDmode, result,
24764 gen_rtx_IOR (mode, abs_value, sgn)));
24765}
24766
24767/* Expand fabs (OP0) and return a new rtx that holds the result. The
24768 mask for masking out the sign-bit is stored in *SMASK, if that is
24769 non-null. */
24770static rtx
24771ix86_expand_sse_fabs (rtx op0, rtx *smask)
24772{
24773 enum machine_mode mode = GET_MODE (op0);
24774 rtx xa, mask;
24775
24776 xa = gen_reg_rtx (mode);
24777 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
4d81bf84
RG
24778 if (!VECTOR_MODE_P (mode))
24779 {
24780 /* We need to generate a scalar mode mask in this case. */
24781 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24782 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24783 mask = gen_reg_rtx (mode);
24784 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24785 }
c7d32ff6
RG
24786 emit_insn (gen_rtx_SET (VOIDmode, xa,
24787 gen_rtx_AND (mode, op0, mask)));
24788
24789 if (smask)
24790 *smask = mask;
24791
24792 return xa;
4d81bf84
RG
24793}
24794
c3a4177f
RG
24795/* Expands a comparison of OP0 with OP1 using comparison code CODE,
24796 swapping the operands if SWAP_OPERANDS is true. The expanded
24797 code is a forward jump to a newly created label in case the
24798 comparison is true. The generated label rtx is returned. */
24799static rtx
24800ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24801 bool swap_operands)
24802{
24803 rtx label, tmp;
24804
24805 if (swap_operands)
24806 {
24807 tmp = op0;
24808 op0 = op1;
24809 op1 = tmp;
24810 }
24811
24812 label = gen_label_rtx ();
24813 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24814 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24815 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24816 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24817 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24818 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24819 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24820 JUMP_LABEL (tmp) = label;
24821
24822 return label;
24823}
24824
d096ecdd
RG
24825/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24826 using comparison code CODE. Operands are swapped for the comparison if
24827 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24828static rtx
24829ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24830 bool swap_operands)
24831{
24832 enum machine_mode mode = GET_MODE (op0);
24833 rtx mask = gen_reg_rtx (mode);
24834
24835 if (swap_operands)
24836 {
24837 rtx tmp = op0;
24838 op0 = op1;
24839 op1 = tmp;
24840 }
24841
24842 if (mode == DFmode)
24843 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24844 gen_rtx_fmt_ee (code, mode, op0, op1)));
24845 else
24846 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24847 gen_rtx_fmt_ee (code, mode, op0, op1)));
24848
24849 return mask;
24850}
24851
c7d32ff6
RG
24852/* Generate and return a rtx of mode MODE for 2**n where n is the number
24853 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24854static rtx
24855ix86_gen_TWO52 (enum machine_mode mode)
24856{
24857 REAL_VALUE_TYPE TWO52r;
24858 rtx TWO52;
24859
24860 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24861 TWO52 = const_double_from_real_value (TWO52r, mode);
24862 TWO52 = force_reg (mode, TWO52);
24863
24864 return TWO52;
24865}
24866
4d81bf84
RG
24867/* Expand SSE sequence for computing lround from OP1 storing
24868 into OP0. */
24869void
24870ix86_expand_lround (rtx op0, rtx op1)
24871{
24872 /* C code for the stuff we're doing below:
24873 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24874 return (long)tmp;
24875 */
24876 enum machine_mode mode = GET_MODE (op1);
24877 const struct real_format *fmt;
24878 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24879 rtx adj;
24880
24881 /* load nextafter (0.5, 0.0) */
24882 fmt = REAL_MODE_FORMAT (mode);
6ef9a246 24883 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
4d81bf84
RG
24884 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24885
24886 /* adj = copysign (0.5, op1) */
24887 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
c7d32ff6 24888 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
4d81bf84
RG
24889
24890 /* adj = op1 + adj */
63be4b32 24891 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
4d81bf84
RG
24892
24893 /* op0 = (imode)adj */
24894 expand_fix (op0, adj, 0);
24895}
72ce3d4a 24896
c3a4177f
RG
24897/* Expand SSE2 sequence for computing lround from OPERAND1 storing
24898 into OPERAND0. */
24899void
24900ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24901{
24902 /* C code for the stuff we're doing below (for do_floor):
24903 xi = (long)op1;
24904 xi -= (double)xi > op1 ? 1 : 0;
24905 return xi;
24906 */
24907 enum machine_mode fmode = GET_MODE (op1);
24908 enum machine_mode imode = GET_MODE (op0);
63be4b32 24909 rtx ireg, freg, label, tmp;
c3a4177f
RG
24910
24911 /* reg = (long)op1 */
24912 ireg = gen_reg_rtx (imode);
24913 expand_fix (ireg, op1, 0);
24914
24915 /* freg = (double)reg */
24916 freg = gen_reg_rtx (fmode);
24917 expand_float (freg, ireg, 0);
24918
24919 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24920 label = ix86_expand_sse_compare_and_jump (UNLE,
24921 freg, op1, !do_floor);
63be4b32
RG
24922 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24923 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24924 emit_move_insn (ireg, tmp);
24925
c3a4177f
RG
24926 emit_label (label);
24927 LABEL_NUSES (label) = 1;
24928
24929 emit_move_insn (op0, ireg);
24930}
24931
c7d32ff6
RG
24932/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24933 result in OPERAND0. */
24934void
24935ix86_expand_rint (rtx operand0, rtx operand1)
24936{
24937 /* C code for the stuff we're doing below:
7e35fcb3
RG
24938 xa = fabs (operand1);
24939 if (!isless (xa, 2**52))
c7d32ff6 24940 return operand1;
7e35fcb3
RG
24941 xa = xa + 2**52 - 2**52;
24942 return copysign (xa, operand1);
c7d32ff6
RG
24943 */
24944 enum machine_mode mode = GET_MODE (operand0);
24945 rtx res, xa, label, TWO52, mask;
24946
24947 res = gen_reg_rtx (mode);
24948 emit_move_insn (res, operand1);
24949
24950 /* xa = abs (operand1) */
24951 xa = ix86_expand_sse_fabs (res, &mask);
24952
24953 /* if (!isless (xa, TWO52)) goto label; */
24954 TWO52 = ix86_gen_TWO52 (mode);
24955 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24956
63be4b32
RG
24957 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24958 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
c7d32ff6 24959
7e35fcb3 24960 ix86_sse_copysign_to_positive (res, xa, res, mask);
c7d32ff6
RG
24961
24962 emit_label (label);
24963 LABEL_NUSES (label) = 1;
24964
24965 emit_move_insn (operand0, res);
24966}
24967
d096ecdd
RG
24968/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24969 into OPERAND0. */
24970void
24971ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24972{
24973 /* C code for the stuff we expand below.
24974 double xa = fabs (x), x2;
24975 if (!isless (xa, TWO52))
24976 return x;
24977 xa = xa + TWO52 - TWO52;
24978 x2 = copysign (xa, x);
24979 Compensate. Floor:
24980 if (x2 > x)
24981 x2 -= 1;
24982 Compensate. Ceil:
24983 if (x2 < x)
7e35fcb3 24984 x2 -= -1;
d096ecdd
RG
24985 return x2;
24986 */
24987 enum machine_mode mode = GET_MODE (operand0);
24988 rtx xa, TWO52, tmp, label, one, res, mask;
24989
24990 TWO52 = ix86_gen_TWO52 (mode);
24991
24992 /* Temporary for holding the result, initialized to the input
24993 operand to ease control flow. */
24994 res = gen_reg_rtx (mode);
24995 emit_move_insn (res, operand1);
24996
24997 /* xa = abs (operand1) */
24998 xa = ix86_expand_sse_fabs (res, &mask);
24999
25000 /* if (!isless (xa, TWO52)) goto label; */
25001 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25002
25003 /* xa = xa + TWO52 - TWO52; */
63be4b32
RG
25004 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25005 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
d096ecdd
RG
25006
25007 /* xa = copysign (xa, operand1) */
25008 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25009
7e35fcb3
RG
25010 /* generate 1.0 or -1.0 */
25011 one = force_reg (mode,
25012 const_double_from_real_value (do_floor
25013 ? dconst1 : dconstm1, mode));
d096ecdd
RG
25014
25015 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25016 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25017 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25018 gen_rtx_AND (mode, one, tmp)));
7e35fcb3 25019 /* We always need to subtract here to preserve signed zero. */
63be4b32
RG
25020 tmp = expand_simple_binop (mode, MINUS,
25021 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25022 emit_move_insn (res, tmp);
d096ecdd
RG
25023
25024 emit_label (label);
25025 LABEL_NUSES (label) = 1;
25026
25027 emit_move_insn (operand0, res);
25028}
25029
25030/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25031 into OPERAND0. */
25032void
25033ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25034{
25035 /* C code for the stuff we expand below.
25036 double xa = fabs (x), x2;
25037 if (!isless (xa, TWO52))
25038 return x;
25039 x2 = (double)(long)x;
25040 Compensate. Floor:
25041 if (x2 > x)
25042 x2 -= 1;
25043 Compensate. Ceil:
25044 if (x2 < x)
25045 x2 += 1;
7e35fcb3
RG
25046 if (HONOR_SIGNED_ZEROS (mode))
25047 return copysign (x2, x);
d096ecdd
RG
25048 return x2;
25049 */
25050 enum machine_mode mode = GET_MODE (operand0);
7e35fcb3 25051 rtx xa, xi, TWO52, tmp, label, one, res, mask;
d096ecdd
RG
25052
25053 TWO52 = ix86_gen_TWO52 (mode);
25054
25055 /* Temporary for holding the result, initialized to the input
25056 operand to ease control flow. */
25057 res = gen_reg_rtx (mode);
25058 emit_move_insn (res, operand1);
25059
25060 /* xa = abs (operand1) */
7e35fcb3 25061 xa = ix86_expand_sse_fabs (res, &mask);
d096ecdd
RG
25062
25063 /* if (!isless (xa, TWO52)) goto label; */
25064 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25065
25066 /* xa = (double)(long)x */
25067 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25068 expand_fix (xi, res, 0);
25069 expand_float (xa, xi, 0);
25070
25071 /* generate 1.0 */
25072 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25073
25074 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25075 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25076 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25077 gen_rtx_AND (mode, one, tmp)));
63be4b32
RG
25078 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25079 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25080 emit_move_insn (res, tmp);
d096ecdd 25081
7e35fcb3
RG
25082 if (HONOR_SIGNED_ZEROS (mode))
25083 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25084
d096ecdd
RG
25085 emit_label (label);
25086 LABEL_NUSES (label) = 1;
25087
25088 emit_move_insn (operand0, res);
25089}
25090
097f2964
RG
25091/* Expand SSE sequence for computing round from OPERAND1 storing
25092 into OPERAND0. Sequence that works without relying on DImode truncation
25093 via cvttsd2siq that is only available on 64bit targets. */
25094void
25095ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25096{
25097 /* C code for the stuff we expand below.
25098 double xa = fabs (x), xa2, x2;
25099 if (!isless (xa, TWO52))
25100 return x;
25101 Using the absolute value and copying back sign makes
25102 -0.0 -> -0.0 correct.
25103 xa2 = xa + TWO52 - TWO52;
25104 Compensate.
25105 dxa = xa2 - xa;
25106 if (dxa <= -0.5)
25107 xa2 += 1;
25108 else if (dxa > 0.5)
25109 xa2 -= 1;
25110 x2 = copysign (xa2, x);
25111 return x2;
25112 */
25113 enum machine_mode mode = GET_MODE (operand0);
25114 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25115
25116 TWO52 = ix86_gen_TWO52 (mode);
25117
25118 /* Temporary for holding the result, initialized to the input
25119 operand to ease control flow. */
25120 res = gen_reg_rtx (mode);
25121 emit_move_insn (res, operand1);
25122
25123 /* xa = abs (operand1) */
25124 xa = ix86_expand_sse_fabs (res, &mask);
25125
25126 /* if (!isless (xa, TWO52)) goto label; */
25127 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25128
25129 /* xa2 = xa + TWO52 - TWO52; */
63be4b32
RG
25130 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25131 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
097f2964
RG
25132
25133 /* dxa = xa2 - xa; */
63be4b32 25134 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25135
25136 /* generate 0.5, 1.0 and -0.5 */
25137 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
63be4b32
RG
25138 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25139 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25140 0, OPTAB_DIRECT);
097f2964
RG
25141
25142 /* Compensate. */
25143 tmp = gen_reg_rtx (mode);
25144 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25145 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25146 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25147 gen_rtx_AND (mode, one, tmp)));
63be4b32 25148 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25149 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25150 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25151 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25152 gen_rtx_AND (mode, one, tmp)));
63be4b32 25153 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25154
25155 /* res = copysign (xa2, operand1) */
25156 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25157
25158 emit_label (label);
25159 LABEL_NUSES (label) = 1;
25160
25161 emit_move_insn (operand0, res);
25162}
25163
044928d6
RG
25164/* Expand SSE sequence for computing trunc from OPERAND1 storing
25165 into OPERAND0. */
25166void
25167ix86_expand_trunc (rtx operand0, rtx operand1)
25168{
25169 /* C code for SSE variant we expand below.
25170 double xa = fabs (x), x2;
25171 if (!isless (xa, TWO52))
25172 return x;
7e35fcb3
RG
25173 x2 = (double)(long)x;
25174 if (HONOR_SIGNED_ZEROS (mode))
25175 return copysign (x2, x);
25176 return x2;
044928d6
RG
25177 */
25178 enum machine_mode mode = GET_MODE (operand0);
7e35fcb3 25179 rtx xa, xi, TWO52, label, res, mask;
044928d6
RG
25180
25181 TWO52 = ix86_gen_TWO52 (mode);
25182
25183 /* Temporary for holding the result, initialized to the input
25184 operand to ease control flow. */
25185 res = gen_reg_rtx (mode);
25186 emit_move_insn (res, operand1);
25187
25188 /* xa = abs (operand1) */
7e35fcb3 25189 xa = ix86_expand_sse_fabs (res, &mask);
044928d6
RG
25190
25191 /* if (!isless (xa, TWO52)) goto label; */
25192 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25193
25194 /* x = (double)(long)x */
25195 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25196 expand_fix (xi, res, 0);
25197 expand_float (res, xi, 0);
25198
7e35fcb3
RG
25199 if (HONOR_SIGNED_ZEROS (mode))
25200 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25201
044928d6
RG
25202 emit_label (label);
25203 LABEL_NUSES (label) = 1;
25204
25205 emit_move_insn (operand0, res);
25206}
25207
25208/* Expand SSE sequence for computing trunc from OPERAND1 storing
25209 into OPERAND0. */
25210void
25211ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25212{
25213 enum machine_mode mode = GET_MODE (operand0);
63be4b32 25214 rtx xa, mask, TWO52, label, one, res, smask, tmp;
044928d6
RG
25215
25216 /* C code for SSE variant we expand below.
25217 double xa = fabs (x), x2;
25218 if (!isless (xa, TWO52))
25219 return x;
25220 xa2 = xa + TWO52 - TWO52;
25221 Compensate:
25222 if (xa2 > xa)
25223 xa2 -= 1.0;
25224 x2 = copysign (xa2, x);
25225 return x2;
25226 */
25227
25228 TWO52 = ix86_gen_TWO52 (mode);
25229
25230 /* Temporary for holding the result, initialized to the input
25231 operand to ease control flow. */
25232 res = gen_reg_rtx (mode);
25233 emit_move_insn (res, operand1);
25234
25235 /* xa = abs (operand1) */
25236 xa = ix86_expand_sse_fabs (res, &smask);
25237
25238 /* if (!isless (xa, TWO52)) goto label; */
25239 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25240
25241 /* res = xa + TWO52 - TWO52; */
63be4b32
RG
25242 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25243 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25244 emit_move_insn (res, tmp);
044928d6
RG
25245
25246 /* generate 1.0 */
25247 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25248
25249 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25250 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25251 emit_insn (gen_rtx_SET (VOIDmode, mask,
25252 gen_rtx_AND (mode, mask, one)));
63be4b32
RG
25253 tmp = expand_simple_binop (mode, MINUS,
25254 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25255 emit_move_insn (res, tmp);
044928d6
RG
25256
25257 /* res = copysign (res, operand1) */
25258 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25259
25260 emit_label (label);
25261 LABEL_NUSES (label) = 1;
25262
25263 emit_move_insn (operand0, res);
25264}
25265
097f2964
RG
25266/* Expand SSE sequence for computing round from OPERAND1 storing
25267 into OPERAND0. */
25268void
25269ix86_expand_round (rtx operand0, rtx operand1)
25270{
25271 /* C code for the stuff we're doing below:
25272 double xa = fabs (x);
25273 if (!isless (xa, TWO52))
25274 return x;
25275 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25276 return copysign (xa, x);
25277 */
25278 enum machine_mode mode = GET_MODE (operand0);
25279 rtx res, TWO52, xa, label, xi, half, mask;
25280 const struct real_format *fmt;
25281 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25282
25283 /* Temporary for holding the result, initialized to the input
25284 operand to ease control flow. */
25285 res = gen_reg_rtx (mode);
25286 emit_move_insn (res, operand1);
25287
25288 TWO52 = ix86_gen_TWO52 (mode);
25289 xa = ix86_expand_sse_fabs (res, &mask);
25290 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25291
25292 /* load nextafter (0.5, 0.0) */
25293 fmt = REAL_MODE_FORMAT (mode);
6ef9a246 25294 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
097f2964
RG
25295 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25296
25297 /* xa = xa + 0.5 */
25298 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
63be4b32 25299 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
25300
25301 /* xa = (double)(int64_t)xa */
25302 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25303 expand_fix (xi, xa, 0);
25304 expand_float (xa, xi, 0);
25305
25306 /* res = copysign (xa, operand1) */
25307 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25308
25309 emit_label (label);
25310 LABEL_NUSES (label) = 1;
25311
25312 emit_move_insn (operand0, res);
25313}
25314
04e1d06b
MM
25315\f
25316/* Validate whether a SSE5 instruction is valid or not.
25317 OPERANDS is the array of operands.
25318 NUM is the number of operands.
84fbffb2 25319 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
04e1d06b 25320 NUM_MEMORY is the maximum number of memory operands to accept. */
a273c72a
MM
25321bool
25322ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
04e1d06b
MM
25323{
25324 int mem_mask;
25325 int mem_count;
25326 int i;
25327
25328 /* Count the number of memory arguments */
25329 mem_mask = 0;
25330 mem_count = 0;
25331 for (i = 0; i < num; i++)
25332 {
25333 enum machine_mode mode = GET_MODE (operands[i]);
25334 if (register_operand (operands[i], mode))
25335 ;
25336
25337 else if (memory_operand (operands[i], mode))
25338 {
25339 mem_mask |= (1 << i);
25340 mem_count++;
25341 }
25342
25343 else
25344 {
25345 rtx pattern = PATTERN (insn);
25346
25347 /* allow 0 for pcmov */
25348 if (GET_CODE (pattern) != SET
25349 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25350 || i < 2
25351 || operands[i] != CONST0_RTX (mode))
25352 return false;
25353 }
25354 }
25355
25356 /* If there were no memory operations, allow the insn */
25357 if (mem_mask == 0)
25358 return true;
25359
25360 /* Do not allow the destination register to be a memory operand. */
25361 else if (mem_mask & (1 << 0))
25362 return false;
25363
25364 /* If there are too many memory operations, disallow the instruction. While
25365 the hardware only allows 1 memory reference, before register allocation
25366 for some insns, we allow two memory operations sometimes in order to allow
25367 code like the following to be optimized:
25368
25369 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25370
25371 or similar cases that are vectorized into using the fmaddss
25372 instruction. */
25373 else if (mem_count > num_memory)
25374 return false;
25375
25376 /* Don't allow more than one memory operation if not optimizing. */
25377 else if (mem_count > 1 && !optimize)
25378 return false;
25379
25380 else if (num == 4 && mem_count == 1)
25381 {
25382 /* formats (destination is the first argument), example fmaddss:
25383 xmm1, xmm1, xmm2, xmm3/mem
25384 xmm1, xmm1, xmm2/mem, xmm3
25385 xmm1, xmm2, xmm3/mem, xmm1
25386 xmm1, xmm2/mem, xmm3, xmm1 */
25387 if (uses_oc0)
25388 return ((mem_mask == (1 << 1))
25389 || (mem_mask == (1 << 2))
25390 || (mem_mask == (1 << 3)));
25391
25392 /* format, example pmacsdd:
25393 xmm1, xmm2, xmm3/mem, xmm1 */
25394 else
25395 return (mem_mask == (1 << 2));
25396 }
25397
25398 else if (num == 4 && num_memory == 2)
25399 {
25400 /* If there are two memory operations, we can load one of the memory ops
84fbffb2 25401 into the destination register. This is for optimizing the
04e1d06b
MM
25402 multiply/add ops, which the combiner has optimized both the multiply
25403 and the add insns to have a memory operation. We have to be careful
25404 that the destination doesn't overlap with the inputs. */
25405 rtx op0 = operands[0];
25406
25407 if (reg_mentioned_p (op0, operands[1])
25408 || reg_mentioned_p (op0, operands[2])
25409 || reg_mentioned_p (op0, operands[3]))
25410 return false;
25411
25412 /* formats (destination is the first argument), example fmaddss:
25413 xmm1, xmm1, xmm2, xmm3/mem
25414 xmm1, xmm1, xmm2/mem, xmm3
25415 xmm1, xmm2, xmm3/mem, xmm1
25416 xmm1, xmm2/mem, xmm3, xmm1
25417
25418 For the oc0 case, we will load either operands[1] or operands[3] into
25419 operands[0], so any combination of 2 memory operands is ok. */
25420 if (uses_oc0)
25421 return true;
25422
25423 /* format, example pmacsdd:
25424 xmm1, xmm2, xmm3/mem, xmm1
4f3f76e6 25425
04e1d06b
MM
25426 For the integer multiply/add instructions be more restrictive and
25427 require operands[2] and operands[3] to be the memory operands. */
25428 else
25429 return (mem_mask == ((1 << 2) | (1 << 3)));
25430 }
25431
25432 else if (num == 3 && num_memory == 1)
25433 {
25434 /* formats, example protb:
25435 xmm1, xmm2, xmm3/mem
25436 xmm1, xmm2/mem, xmm3 */
25437 if (uses_oc0)
25438 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25439
25440 /* format, example comeq:
25441 xmm1, xmm2, xmm3/mem */
25442 else
25443 return (mem_mask == (1 << 2));
25444 }
25445
25446 else
25447 gcc_unreachable ();
25448
25449 return false;
25450}
25451
25452\f
25453/* Fixup an SSE5 instruction that has 2 memory input references into a form the
25454 hardware will allow by using the destination register to load one of the
25455 memory operations. Presently this is used by the multiply/add routines to
25456 allow 2 memory references. */
25457
25458void
25459ix86_expand_sse5_multiple_memory (rtx operands[],
25460 int num,
25461 enum machine_mode mode)
25462{
25463 rtx op0 = operands[0];
25464 if (num != 4
25465 || memory_operand (op0, mode)
25466 || reg_mentioned_p (op0, operands[1])
25467 || reg_mentioned_p (op0, operands[2])
25468 || reg_mentioned_p (op0, operands[3]))
25469 gcc_unreachable ();
25470
25471 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25472 the destination register. */
25473 if (memory_operand (operands[1], mode))
25474 {
25475 emit_move_insn (op0, operands[1]);
25476 operands[1] = op0;
25477 }
25478 else if (memory_operand (operands[3], mode))
25479 {
25480 emit_move_insn (op0, operands[3]);
25481 operands[3] = op0;
25482 }
25483 else
25484 gcc_unreachable ();
25485
25486 return;
25487}
25488
2ed941ec
RH
25489\f
25490/* Table of valid machine attributes. */
25491static const struct attribute_spec ix86_attribute_table[] =
25492{
25493 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25494 /* Stdcall attribute says callee is responsible for popping arguments
25495 if they are not variable. */
25496 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25497 /* Fastcall attribute says callee is responsible for popping arguments
25498 if they are not variable. */
25499 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25500 /* Cdecl attribute says the callee is a normal C declaration */
25501 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25502 /* Regparm attribute specifies how many integer arguments are to be
25503 passed in registers. */
25504 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25505 /* Sseregparm attribute says we are using x86_64 calling conventions
25506 for FP arguments. */
25507 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25508 /* force_align_arg_pointer says this function realigns the stack at entry. */
25509 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25510 false, true, true, ix86_handle_cconv_attribute },
25511#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25512 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25513 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25514 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25515#endif
25516 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25517 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25518#ifdef SUBTARGET_ATTRIBUTE_TABLE
25519 SUBTARGET_ATTRIBUTE_TABLE,
25520#endif
25521 { NULL, 0, 0, false, false, false, NULL }
25522};
25523
e70444a8 25524/* Implement targetm.vectorize.builtin_vectorization_cost. */
4f3f76e6 25525static int
e70444a8
HJ
25526x86_builtin_vectorization_cost (bool runtime_test)
25527{
25528 /* If the branch of the runtime test is taken - i.e. - the vectorized
25529 version is skipped - this incurs a misprediction cost (because the
25530 vectorized version is expected to be the fall-through). So we subtract
25531 the latency of a mispredicted branch from the costs that are incured
25532 when the vectorized version is executed.
25533
25534 TODO: The values in individual target tables have to be tuned or new
25535 fields may be needed. For eg. on K8, the default branch path is the
25536 not-taken path. If the taken path is predicted correctly, the minimum
25537 penalty of going down the taken-path is 1 cycle. If the taken-path is
25538 not predicted correctly, then the minimum penalty is 10 cycles. */
25539
25540 if (runtime_test)
25541 {
25542 return (-(ix86_cost->cond_taken_branch_cost));
25543 }
25544 else
25545 return 0;
25546}
25547
2ed941ec
RH
25548/* Initialize the GCC target structure. */
25549#undef TARGET_ATTRIBUTE_TABLE
25550#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25551#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25552# undef TARGET_MERGE_DECL_ATTRIBUTES
25553# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25554#endif
25555
25556#undef TARGET_COMP_TYPE_ATTRIBUTES
25557#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25558
25559#undef TARGET_INIT_BUILTINS
25560#define TARGET_INIT_BUILTINS ix86_init_builtins
25561#undef TARGET_EXPAND_BUILTIN
25562#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25563
25564#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
6b889d89
UB
25565#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25566 ix86_builtin_vectorized_function
25567
2ed941ec 25568#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
6b889d89
UB
25569#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25570
25571#undef TARGET_BUILTIN_RECIPROCAL
25572#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
2ed941ec
RH
25573
25574#undef TARGET_ASM_FUNCTION_EPILOGUE
25575#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25576
25577#undef TARGET_ENCODE_SECTION_INFO
25578#ifndef SUBTARGET_ENCODE_SECTION_INFO
25579#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25580#else
25581#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25582#endif
25583
25584#undef TARGET_ASM_OPEN_PAREN
25585#define TARGET_ASM_OPEN_PAREN ""
25586#undef TARGET_ASM_CLOSE_PAREN
25587#define TARGET_ASM_CLOSE_PAREN ""
25588
25589#undef TARGET_ASM_ALIGNED_HI_OP
25590#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25591#undef TARGET_ASM_ALIGNED_SI_OP
25592#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25593#ifdef ASM_QUAD
25594#undef TARGET_ASM_ALIGNED_DI_OP
25595#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25596#endif
25597
25598#undef TARGET_ASM_UNALIGNED_HI_OP
25599#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25600#undef TARGET_ASM_UNALIGNED_SI_OP
25601#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25602#undef TARGET_ASM_UNALIGNED_DI_OP
25603#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25604
25605#undef TARGET_SCHED_ADJUST_COST
25606#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25607#undef TARGET_SCHED_ISSUE_RATE
25608#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25609#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25610#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25611 ia32_multipass_dfa_lookahead
25612
25613#undef TARGET_FUNCTION_OK_FOR_SIBCALL
25614#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25615
25616#ifdef HAVE_AS_TLS
25617#undef TARGET_HAVE_TLS
25618#define TARGET_HAVE_TLS true
25619#endif
25620#undef TARGET_CANNOT_FORCE_CONST_MEM
25621#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25622#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
3101faab 25623#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
2ed941ec
RH
25624
25625#undef TARGET_DELEGITIMIZE_ADDRESS
25626#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25627
25628#undef TARGET_MS_BITFIELD_LAYOUT_P
25629#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25630
25631#if TARGET_MACHO
25632#undef TARGET_BINDS_LOCAL_P
25633#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25634#endif
da489f73
RH
25635#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25636#undef TARGET_BINDS_LOCAL_P
25637#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25638#endif
2ed941ec
RH
25639
25640#undef TARGET_ASM_OUTPUT_MI_THUNK
25641#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25642#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25643#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25644
25645#undef TARGET_ASM_FILE_START
25646#define TARGET_ASM_FILE_START x86_file_start
25647
25648#undef TARGET_DEFAULT_TARGET_FLAGS
25649#define TARGET_DEFAULT_TARGET_FLAGS \
25650 (TARGET_DEFAULT \
2ed941ec
RH
25651 | TARGET_SUBTARGET_DEFAULT \
25652 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25653
25654#undef TARGET_HANDLE_OPTION
25655#define TARGET_HANDLE_OPTION ix86_handle_option
25656
25657#undef TARGET_RTX_COSTS
25658#define TARGET_RTX_COSTS ix86_rtx_costs
25659#undef TARGET_ADDRESS_COST
25660#define TARGET_ADDRESS_COST ix86_address_cost
25661
25662#undef TARGET_FIXED_CONDITION_CODE_REGS
25663#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25664#undef TARGET_CC_MODES_COMPATIBLE
25665#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25666
25667#undef TARGET_MACHINE_DEPENDENT_REORG
25668#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25669
25670#undef TARGET_BUILD_BUILTIN_VA_LIST
25671#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25672
d7bd8aeb
JJ
25673#undef TARGET_EXPAND_BUILTIN_VA_START
25674#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25675
2ed941ec
RH
25676#undef TARGET_MD_ASM_CLOBBERS
25677#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25678
25679#undef TARGET_PROMOTE_PROTOTYPES
586de218 25680#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
2ed941ec
RH
25681#undef TARGET_STRUCT_VALUE_RTX
25682#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25683#undef TARGET_SETUP_INCOMING_VARARGS
25684#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25685#undef TARGET_MUST_PASS_IN_STACK
25686#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25687#undef TARGET_PASS_BY_REFERENCE
25688#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25689#undef TARGET_INTERNAL_ARG_POINTER
25690#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25691#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25692#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
ccf8e764
RH
25693#undef TARGET_STRICT_ARGUMENT_NAMING
25694#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
2ed941ec
RH
25695
25696#undef TARGET_GIMPLIFY_VA_ARG_EXPR
25697#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25698
25699#undef TARGET_SCALAR_MODE_SUPPORTED_P
25700#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25701
25702#undef TARGET_VECTOR_MODE_SUPPORTED_P
25703#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25704
c77cd3d1
UB
25705#undef TARGET_C_MODE_FOR_SUFFIX
25706#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25707
2ed941ec
RH
25708#ifdef HAVE_AS_TLS
25709#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25710#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25711#endif
25712
25713#ifdef SUBTARGET_INSERT_ATTRIBUTES
25714#undef TARGET_INSERT_ATTRIBUTES
25715#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25716#endif
25717
608063c3
JB
25718#undef TARGET_MANGLE_TYPE
25719#define TARGET_MANGLE_TYPE ix86_mangle_type
2ed941ec
RH
25720
25721#undef TARGET_STACK_PROTECT_FAIL
25722#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25723
25724#undef TARGET_FUNCTION_VALUE
25725#define TARGET_FUNCTION_VALUE ix86_function_value
25726
e70444a8
HJ
25727#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25728#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25729
2ed941ec
RH
25730struct gcc_target targetm = TARGET_INITIALIZER;
25731\f
e2500fed 25732#include "gt-i386.h"