]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/i386.c
host-hpux.c: Change copyright header to refer to version 3 of the GNU General Public...
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
ffa1b3c6 3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9 8it under the terms of the GNU General Public License as published by
2f83c7d6 9the Free Software Foundation; either version 3, or (at your option)
2a2ab3f9
JVA
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
2f83c7d6
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
2a2ab3f9 20
2a2ab3f9 21#include "config.h"
bb5177ac 22#include "system.h"
4977bab6
ZW
23#include "coretypes.h"
24#include "tm.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
2a2ab3f9 33#include "output.h"
8bc527af 34#include "insn-codes.h"
2a2ab3f9 35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
cd3ce9b4 49#include "tree-gimple.h"
72ce3d4a 50#include "dwarf2.h"
6fb5fa3c 51#include "df.h"
279bb624 52#include "tm-constrs.h"
47eb5b32 53#include "params.h"
2a2ab3f9 54
8dfe5673 55#ifndef CHECK_STACK_LIMIT
07933f72 56#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
57#endif
58
3c50106f
RH
59/* Return index of given mode in mult and division cost tables. */
60#define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
2ab0437e 67/* Processor costs (relative to an add) */
3dd0df7f
RS
68/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69#define COSTS_N_BYTES(N) ((N) * 2)
70
8c996513
JH
71#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
fce5a9f2 73static const
2a8a8292 74struct processor_costs size_cost = { /* costs for tuning for size */
3dd0df7f
RS
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
2ab0437e 84 0, /* cost of multiply per each bit set */
3dd0df7f
RS
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
2ab0437e
JH
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
75bcbcdb
L
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
2ab0437e
JH
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
46cb0441
ZD
115 0, /* size of l1 cache */
116 0, /* size of l2 cache */
f4365627
JH
117 0, /* size of prefetch block */
118 0, /* number of parallel prefetches */
3dd0df7f
RS
119 2, /* Branch cost */
120 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
121 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
122 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
123 COSTS_N_BYTES (2), /* cost of FABS instruction. */
124 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
125 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
8c996513
JH
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
2ab0437e 130};
229b303a 131
32b5b1aa 132/* Processor costs (relative to an add) */
fce5a9f2 133static const
32b5b1aa 134struct processor_costs i386_cost = { /* 386 specific costs */
a9cc9cc6
JH
135 COSTS_N_INSNS (1), /* cost of an add instruction */
136 COSTS_N_INSNS (1), /* cost of a lea instruction */
137 COSTS_N_INSNS (3), /* variable shift costs */
138 COSTS_N_INSNS (2), /* constant shift costs */
139 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
140 COSTS_N_INSNS (6), /* HI */
141 COSTS_N_INSNS (6), /* SI */
142 COSTS_N_INSNS (6), /* DI */
143 COSTS_N_INSNS (6)}, /* other */
144 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
145 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
146 COSTS_N_INSNS (23), /* HI */
147 COSTS_N_INSNS (23), /* SI */
148 COSTS_N_INSNS (23), /* DI */
149 COSTS_N_INSNS (23)}, /* other */
150 COSTS_N_INSNS (3), /* cost of movsx */
151 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 152 15, /* "large" insn */
e2e52e1b 153 3, /* MOVE_RATIO */
7c6b971d 154 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
155 {2, 4, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
0f290768 157 Relative to reg-reg move (2). */
96e7ae40
JH
158 {2, 4, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {8, 8, 8}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
75bcbcdb
L
162 {8, 8, 8}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
fa79946e
JH
164 2, /* cost of moving MMX register */
165 {4, 8}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {4, 8}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {4, 8, 16}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {4, 8, 16}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
46cb0441
ZD
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
f4365627
JH
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
4977bab6 179 1, /* Branch cost */
a9cc9cc6
JH
180 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
181 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
182 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
183 COSTS_N_INSNS (22), /* cost of FABS instruction. */
184 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
185 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
8c996513
JH
186 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
187 DUMMY_STRINGOP_ALGS},
188 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
189 DUMMY_STRINGOP_ALGS},
32b5b1aa
SC
190};
191
fce5a9f2 192static const
32b5b1aa 193struct processor_costs i486_cost = { /* 486 specific costs */
a9cc9cc6
JH
194 COSTS_N_INSNS (1), /* cost of an add instruction */
195 COSTS_N_INSNS (1), /* cost of a lea instruction */
196 COSTS_N_INSNS (3), /* variable shift costs */
197 COSTS_N_INSNS (2), /* constant shift costs */
198 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
199 COSTS_N_INSNS (12), /* HI */
200 COSTS_N_INSNS (12), /* SI */
201 COSTS_N_INSNS (12), /* DI */
202 COSTS_N_INSNS (12)}, /* other */
32b5b1aa 203 1, /* cost of multiply per each bit set */
a9cc9cc6
JH
204 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
205 COSTS_N_INSNS (40), /* HI */
206 COSTS_N_INSNS (40), /* SI */
207 COSTS_N_INSNS (40), /* DI */
208 COSTS_N_INSNS (40)}, /* other */
209 COSTS_N_INSNS (3), /* cost of movsx */
210 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 211 15, /* "large" insn */
e2e52e1b 212 3, /* MOVE_RATIO */
7c6b971d 213 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
0f290768 216 Relative to reg-reg move (2). */
96e7ae40
JH
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {8, 8, 8}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
75bcbcdb
L
221 {8, 8, 8}, /* cost of storing fp registers
222 in SFmode, DFmode and XFmode */
fa79946e
JH
223 2, /* cost of moving MMX register */
224 {4, 8}, /* cost of loading MMX registers
225 in SImode and DImode */
226 {4, 8}, /* cost of storing MMX registers
227 in SImode and DImode */
228 2, /* cost of moving SSE register */
229 {4, 8, 16}, /* cost of loading SSE registers
230 in SImode, DImode and TImode */
231 {4, 8, 16}, /* cost of storing SSE registers
232 in SImode, DImode and TImode */
f4365627 233 3, /* MMX or SSE register to integer */
46cb0441
ZD
234 4, /* size of l1 cache. 486 has 8kB cache
235 shared for code and data, so 4kB is
236 not really precise. */
237 4, /* size of l2 cache */
f4365627
JH
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
4977bab6 240 1, /* Branch cost */
a9cc9cc6
JH
241 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (3), /* cost of FABS instruction. */
245 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
8c996513
JH
247 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
248 DUMMY_STRINGOP_ALGS},
249 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
250 DUMMY_STRINGOP_ALGS}
32b5b1aa
SC
251};
252
fce5a9f2 253static const
e5cb57e8 254struct processor_costs pentium_cost = {
a9cc9cc6
JH
255 COSTS_N_INSNS (1), /* cost of an add instruction */
256 COSTS_N_INSNS (1), /* cost of a lea instruction */
257 COSTS_N_INSNS (4), /* variable shift costs */
258 COSTS_N_INSNS (1), /* constant shift costs */
259 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
260 COSTS_N_INSNS (11), /* HI */
261 COSTS_N_INSNS (11), /* SI */
262 COSTS_N_INSNS (11), /* DI */
263 COSTS_N_INSNS (11)}, /* other */
856b07a1 264 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
265 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
266 COSTS_N_INSNS (25), /* HI */
267 COSTS_N_INSNS (25), /* SI */
268 COSTS_N_INSNS (25), /* DI */
269 COSTS_N_INSNS (25)}, /* other */
270 COSTS_N_INSNS (3), /* cost of movsx */
271 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 272 8, /* "large" insn */
e2e52e1b 273 6, /* MOVE_RATIO */
7c6b971d 274 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
275 {2, 4, 2}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
0f290768 277 Relative to reg-reg move (2). */
96e7ae40
JH
278 {2, 4, 2}, /* cost of storing integer registers */
279 2, /* cost of reg,reg fld/fst */
280 {2, 2, 6}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
75bcbcdb
L
282 {4, 4, 6}, /* cost of storing fp registers
283 in SFmode, DFmode and XFmode */
fa79946e
JH
284 8, /* cost of moving MMX register */
285 {8, 8}, /* cost of loading MMX registers
286 in SImode and DImode */
287 {8, 8}, /* cost of storing MMX registers
288 in SImode and DImode */
289 2, /* cost of moving SSE register */
290 {4, 8, 16}, /* cost of loading SSE registers
291 in SImode, DImode and TImode */
292 {4, 8, 16}, /* cost of storing SSE registers
293 in SImode, DImode and TImode */
f4365627 294 3, /* MMX or SSE register to integer */
46cb0441
ZD
295 8, /* size of l1 cache. */
296 8, /* size of l2 cache */
f4365627
JH
297 0, /* size of prefetch block */
298 0, /* number of parallel prefetches */
4977bab6 299 2, /* Branch cost */
a9cc9cc6
JH
300 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
301 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
302 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
303 COSTS_N_INSNS (1), /* cost of FABS instruction. */
304 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
305 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
8c996513
JH
306 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
307 DUMMY_STRINGOP_ALGS},
308 {{libcall, {{-1, rep_prefix_4_byte}}},
309 DUMMY_STRINGOP_ALGS}
32b5b1aa
SC
310};
311
fce5a9f2 312static const
856b07a1 313struct processor_costs pentiumpro_cost = {
a9cc9cc6
JH
314 COSTS_N_INSNS (1), /* cost of an add instruction */
315 COSTS_N_INSNS (1), /* cost of a lea instruction */
316 COSTS_N_INSNS (1), /* variable shift costs */
317 COSTS_N_INSNS (1), /* constant shift costs */
318 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
319 COSTS_N_INSNS (4), /* HI */
320 COSTS_N_INSNS (4), /* SI */
321 COSTS_N_INSNS (4), /* DI */
322 COSTS_N_INSNS (4)}, /* other */
856b07a1 323 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
324 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
325 COSTS_N_INSNS (17), /* HI */
326 COSTS_N_INSNS (17), /* SI */
327 COSTS_N_INSNS (17), /* DI */
328 COSTS_N_INSNS (17)}, /* other */
329 COSTS_N_INSNS (1), /* cost of movsx */
330 COSTS_N_INSNS (1), /* cost of movzx */
96e7ae40 331 8, /* "large" insn */
e2e52e1b 332 6, /* MOVE_RATIO */
7c6b971d 333 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
334 {4, 4, 4}, /* cost of loading integer registers
335 in QImode, HImode and SImode.
0f290768 336 Relative to reg-reg move (2). */
96e7ae40
JH
337 {2, 2, 2}, /* cost of storing integer registers */
338 2, /* cost of reg,reg fld/fst */
339 {2, 2, 6}, /* cost of loading fp registers
340 in SFmode, DFmode and XFmode */
75bcbcdb
L
341 {4, 4, 6}, /* cost of storing fp registers
342 in SFmode, DFmode and XFmode */
fa79946e
JH
343 2, /* cost of moving MMX register */
344 {2, 2}, /* cost of loading MMX registers
345 in SImode and DImode */
346 {2, 2}, /* cost of storing MMX registers
347 in SImode and DImode */
348 2, /* cost of moving SSE register */
349 {2, 2, 8}, /* cost of loading SSE registers
350 in SImode, DImode and TImode */
351 {2, 2, 8}, /* cost of storing SSE registers
352 in SImode, DImode and TImode */
f4365627 353 3, /* MMX or SSE register to integer */
46cb0441
ZD
354 8, /* size of l1 cache. */
355 256, /* size of l2 cache */
f4365627
JH
356 32, /* size of prefetch block */
357 6, /* number of parallel prefetches */
4977bab6 358 2, /* Branch cost */
a9cc9cc6
JH
359 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
360 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
361 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
362 COSTS_N_INSNS (2), /* cost of FABS instruction. */
363 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
364 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
8c996513
JH
365 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
366 the alignment). For small blocks inline loop is still a noticeable win, for bigger
367 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
368 more expensive startup time in CPU, but after 4K the difference is down in the noise.
369 */
370 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
371 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
372 DUMMY_STRINGOP_ALGS},
373 {{rep_prefix_4_byte, {{1024, unrolled_loop},
374 {8192, rep_prefix_4_byte}, {-1, libcall}}},
375 DUMMY_STRINGOP_ALGS}
856b07a1
SC
376};
377
cfe1b18f
VM
378static const
379struct processor_costs geode_cost = {
380 COSTS_N_INSNS (1), /* cost of an add instruction */
381 COSTS_N_INSNS (1), /* cost of a lea instruction */
382 COSTS_N_INSNS (2), /* variable shift costs */
383 COSTS_N_INSNS (1), /* constant shift costs */
384 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
385 COSTS_N_INSNS (4), /* HI */
386 COSTS_N_INSNS (7), /* SI */
387 COSTS_N_INSNS (7), /* DI */
388 COSTS_N_INSNS (7)}, /* other */
389 0, /* cost of multiply per each bit set */
390 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
391 COSTS_N_INSNS (23), /* HI */
392 COSTS_N_INSNS (39), /* SI */
393 COSTS_N_INSNS (39), /* DI */
394 COSTS_N_INSNS (39)}, /* other */
395 COSTS_N_INSNS (1), /* cost of movsx */
396 COSTS_N_INSNS (1), /* cost of movzx */
397 8, /* "large" insn */
398 4, /* MOVE_RATIO */
399 1, /* cost for loading QImode using movzbl */
400 {1, 1, 1}, /* cost of loading integer registers
401 in QImode, HImode and SImode.
402 Relative to reg-reg move (2). */
403 {1, 1, 1}, /* cost of storing integer registers */
404 1, /* cost of reg,reg fld/fst */
405 {1, 1, 1}, /* cost of loading fp registers
406 in SFmode, DFmode and XFmode */
407 {4, 6, 6}, /* cost of storing fp registers
408 in SFmode, DFmode and XFmode */
409
410 1, /* cost of moving MMX register */
411 {1, 1}, /* cost of loading MMX registers
412 in SImode and DImode */
413 {1, 1}, /* cost of storing MMX registers
414 in SImode and DImode */
415 1, /* cost of moving SSE register */
416 {1, 1, 1}, /* cost of loading SSE registers
417 in SImode, DImode and TImode */
418 {1, 1, 1}, /* cost of storing SSE registers
419 in SImode, DImode and TImode */
420 1, /* MMX or SSE register to integer */
46cb0441
ZD
421 64, /* size of l1 cache. */
422 128, /* size of l2 cache. */
cfe1b18f
VM
423 32, /* size of prefetch block */
424 1, /* number of parallel prefetches */
425 1, /* Branch cost */
426 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (1), /* cost of FABS instruction. */
430 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
8c996513
JH
432 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
433 DUMMY_STRINGOP_ALGS},
434 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
435 DUMMY_STRINGOP_ALGS}
cfe1b18f
VM
436};
437
fce5a9f2 438static const
a269a03c 439struct processor_costs k6_cost = {
a9cc9cc6
JH
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (2), /* cost of a lea instruction */
442 COSTS_N_INSNS (1), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (3), /* HI */
446 COSTS_N_INSNS (3), /* SI */
447 COSTS_N_INSNS (3), /* DI */
448 COSTS_N_INSNS (3)}, /* other */
a269a03c 449 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
450 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (18), /* HI */
452 COSTS_N_INSNS (18), /* SI */
453 COSTS_N_INSNS (18), /* DI */
454 COSTS_N_INSNS (18)}, /* other */
455 COSTS_N_INSNS (2), /* cost of movsx */
456 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 457 8, /* "large" insn */
e2e52e1b 458 4, /* MOVE_RATIO */
7c6b971d 459 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
460 {4, 5, 4}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
0f290768 462 Relative to reg-reg move (2). */
96e7ae40
JH
463 {2, 3, 2}, /* cost of storing integer registers */
464 4, /* cost of reg,reg fld/fst */
465 {6, 6, 6}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
75bcbcdb
L
467 {4, 4, 4}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
fa79946e
JH
469 2, /* cost of moving MMX register */
470 {2, 2}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {2, 2}, /* cost of storing MMX registers
473 in SImode and DImode */
474 2, /* cost of moving SSE register */
475 {2, 2, 8}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {2, 2, 8}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
f4365627 479 6, /* MMX or SSE register to integer */
46cb0441
ZD
480 32, /* size of l1 cache. */
481 32, /* size of l2 cache. Some models
482 have integrated l2 cache, but
483 optimizing for k6 is not important
484 enough to worry about that. */
f4365627
JH
485 32, /* size of prefetch block */
486 1, /* number of parallel prefetches */
4977bab6 487 1, /* Branch cost */
a9cc9cc6
JH
488 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
8c996513
JH
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
497 DUMMY_STRINGOP_ALGS}
a269a03c
JC
498};
499
fce5a9f2 500static const
309ada50 501struct processor_costs athlon_cost = {
a9cc9cc6
JH
502 COSTS_N_INSNS (1), /* cost of an add instruction */
503 COSTS_N_INSNS (2), /* cost of a lea instruction */
504 COSTS_N_INSNS (1), /* variable shift costs */
505 COSTS_N_INSNS (1), /* constant shift costs */
506 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
507 COSTS_N_INSNS (5), /* HI */
508 COSTS_N_INSNS (5), /* SI */
509 COSTS_N_INSNS (5), /* DI */
510 COSTS_N_INSNS (5)}, /* other */
309ada50 511 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
512 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
513 COSTS_N_INSNS (26), /* HI */
514 COSTS_N_INSNS (42), /* SI */
515 COSTS_N_INSNS (74), /* DI */
516 COSTS_N_INSNS (74)}, /* other */
517 COSTS_N_INSNS (1), /* cost of movsx */
518 COSTS_N_INSNS (1), /* cost of movzx */
309ada50 519 8, /* "large" insn */
e2e52e1b 520 9, /* MOVE_RATIO */
309ada50 521 4, /* cost for loading QImode using movzbl */
b72b1c29 522 {3, 4, 3}, /* cost of loading integer registers
309ada50 523 in QImode, HImode and SImode.
0f290768 524 Relative to reg-reg move (2). */
b72b1c29 525 {3, 4, 3}, /* cost of storing integer registers */
309ada50 526 4, /* cost of reg,reg fld/fst */
b72b1c29 527 {4, 4, 12}, /* cost of loading fp registers
309ada50 528 in SFmode, DFmode and XFmode */
75bcbcdb
L
529 {6, 6, 8}, /* cost of storing fp registers
530 in SFmode, DFmode and XFmode */
fa79946e 531 2, /* cost of moving MMX register */
b72b1c29 532 {4, 4}, /* cost of loading MMX registers
fa79946e 533 in SImode and DImode */
b72b1c29 534 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
535 in SImode and DImode */
536 2, /* cost of moving SSE register */
b72b1c29 537 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 538 in SImode, DImode and TImode */
b72b1c29 539 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 540 in SImode, DImode and TImode */
b72b1c29 541 5, /* MMX or SSE register to integer */
46cb0441
ZD
542 64, /* size of l1 cache. */
543 256, /* size of l2 cache. */
f4365627
JH
544 64, /* size of prefetch block */
545 6, /* number of parallel prefetches */
8c1e80e9 546 5, /* Branch cost */
a9cc9cc6
JH
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
8c996513 553 /* For some reason, Athlon deals better with REP prefix (relative to loops)
9c134b65 554 compared to K8. Alignment becomes important after 8 bytes for memcpy and
8c996513
JH
555 128 bytes for memset. */
556 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
557 DUMMY_STRINGOP_ALGS},
558 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
559 DUMMY_STRINGOP_ALGS}
309ada50
JH
560};
561
4977bab6
ZW
562static const
563struct processor_costs k8_cost = {
a9cc9cc6
JH
564 COSTS_N_INSNS (1), /* cost of an add instruction */
565 COSTS_N_INSNS (2), /* cost of a lea instruction */
566 COSTS_N_INSNS (1), /* variable shift costs */
567 COSTS_N_INSNS (1), /* constant shift costs */
568 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
569 COSTS_N_INSNS (4), /* HI */
570 COSTS_N_INSNS (3), /* SI */
571 COSTS_N_INSNS (4), /* DI */
572 COSTS_N_INSNS (5)}, /* other */
4977bab6 573 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
574 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
575 COSTS_N_INSNS (26), /* HI */
576 COSTS_N_INSNS (42), /* SI */
577 COSTS_N_INSNS (74), /* DI */
578 COSTS_N_INSNS (74)}, /* other */
579 COSTS_N_INSNS (1), /* cost of movsx */
580 COSTS_N_INSNS (1), /* cost of movzx */
4977bab6
ZW
581 8, /* "large" insn */
582 9, /* MOVE_RATIO */
583 4, /* cost for loading QImode using movzbl */
584 {3, 4, 3}, /* cost of loading integer registers
585 in QImode, HImode and SImode.
586 Relative to reg-reg move (2). */
587 {3, 4, 3}, /* cost of storing integer registers */
588 4, /* cost of reg,reg fld/fst */
589 {4, 4, 12}, /* cost of loading fp registers
590 in SFmode, DFmode and XFmode */
75bcbcdb
L
591 {6, 6, 8}, /* cost of storing fp registers
592 in SFmode, DFmode and XFmode */
4977bab6
ZW
593 2, /* cost of moving MMX register */
594 {3, 3}, /* cost of loading MMX registers
595 in SImode and DImode */
596 {4, 4}, /* cost of storing MMX registers
597 in SImode and DImode */
598 2, /* cost of moving SSE register */
599 {4, 3, 6}, /* cost of loading SSE registers
600 in SImode, DImode and TImode */
601 {4, 4, 5}, /* cost of storing SSE registers
602 in SImode, DImode and TImode */
603 5, /* MMX or SSE register to integer */
46cb0441
ZD
604 64, /* size of l1 cache. */
605 512, /* size of l2 cache. */
4977bab6 606 64, /* size of prefetch block */
8fbbf354 607 /* New AMD processors never drop prefetches; if they cannot be performed
47eb5b32
ZD
608 immediately, they are queued. We set number of simultaneous prefetches
609 to a large constant to reflect this (it probably is not a good idea not
610 to limit number of prefetches at all, as their execution also takes some
611 time). */
612 100, /* number of parallel prefetches */
8c1e80e9 613 5, /* Branch cost */
a9cc9cc6
JH
614 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (2), /* cost of FABS instruction. */
618 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
8c996513
JH
620 /* K8 has optimized REP instruction for medium sized blocks, but for very small
621 blocks it is better to use loop. For large blocks, libcall can do
622 nontemporary accesses and beat inline considerably. */
623 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
624 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
625 {{libcall, {{8, loop}, {24, unrolled_loop},
626 {2048, rep_prefix_4_byte}, {-1, libcall}}},
627 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
4977bab6
ZW
628};
629
21efb4d4
HJ
630struct processor_costs amdfam10_cost = {
631 COSTS_N_INSNS (1), /* cost of an add instruction */
632 COSTS_N_INSNS (2), /* cost of a lea instruction */
633 COSTS_N_INSNS (1), /* variable shift costs */
634 COSTS_N_INSNS (1), /* constant shift costs */
635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
636 COSTS_N_INSNS (4), /* HI */
637 COSTS_N_INSNS (3), /* SI */
638 COSTS_N_INSNS (4), /* DI */
639 COSTS_N_INSNS (5)}, /* other */
640 0, /* cost of multiply per each bit set */
641 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
642 COSTS_N_INSNS (35), /* HI */
643 COSTS_N_INSNS (51), /* SI */
644 COSTS_N_INSNS (83), /* DI */
645 COSTS_N_INSNS (83)}, /* other */
646 COSTS_N_INSNS (1), /* cost of movsx */
647 COSTS_N_INSNS (1), /* cost of movzx */
648 8, /* "large" insn */
649 9, /* MOVE_RATIO */
650 4, /* cost for loading QImode using movzbl */
651 {3, 4, 3}, /* cost of loading integer registers
652 in QImode, HImode and SImode.
653 Relative to reg-reg move (2). */
654 {3, 4, 3}, /* cost of storing integer registers */
655 4, /* cost of reg,reg fld/fst */
656 {4, 4, 12}, /* cost of loading fp registers
657 in SFmode, DFmode and XFmode */
658 {6, 6, 8}, /* cost of storing fp registers
659 in SFmode, DFmode and XFmode */
660 2, /* cost of moving MMX register */
661 {3, 3}, /* cost of loading MMX registers
662 in SImode and DImode */
663 {4, 4}, /* cost of storing MMX registers
664 in SImode and DImode */
665 2, /* cost of moving SSE register */
666 {4, 4, 3}, /* cost of loading SSE registers
667 in SImode, DImode and TImode */
668 {4, 4, 5}, /* cost of storing SSE registers
669 in SImode, DImode and TImode */
670 3, /* MMX or SSE register to integer */
671 /* On K8
672 MOVD reg64, xmmreg Double FSTORE 4
673 MOVD reg32, xmmreg Double FSTORE 4
674 On AMDFAM10
675 MOVD reg64, xmmreg Double FADD 3
676 1/1 1/1
677 MOVD reg32, xmmreg Double FADD 3
678 1/1 1/1 */
46cb0441
ZD
679 64, /* size of l1 cache. */
680 512, /* size of l2 cache. */
21efb4d4
HJ
681 64, /* size of prefetch block */
682 /* New AMD processors never drop prefetches; if they cannot be performed
683 immediately, they are queued. We set number of simultaneous prefetches
684 to a large constant to reflect this (it probably is not a good idea not
685 to limit number of prefetches at all, as their execution also takes some
686 time). */
687 100, /* number of parallel prefetches */
688 5, /* Branch cost */
689 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
690 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
691 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
692 COSTS_N_INSNS (2), /* cost of FABS instruction. */
693 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
694 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
695
696 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
697 very small blocks it is better to use loop. For large blocks, libcall can
698 do nontemporary accesses and beat inline considerably. */
699 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
700 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
701 {{libcall, {{8, loop}, {24, unrolled_loop},
702 {2048, rep_prefix_4_byte}, {-1, libcall}}},
703 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
704};
705
fce5a9f2 706static const
b4e89e2d 707struct processor_costs pentium4_cost = {
a9cc9cc6
JH
708 COSTS_N_INSNS (1), /* cost of an add instruction */
709 COSTS_N_INSNS (3), /* cost of a lea instruction */
710 COSTS_N_INSNS (4), /* variable shift costs */
711 COSTS_N_INSNS (4), /* constant shift costs */
712 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
713 COSTS_N_INSNS (15), /* HI */
714 COSTS_N_INSNS (15), /* SI */
715 COSTS_N_INSNS (15), /* DI */
716 COSTS_N_INSNS (15)}, /* other */
b4e89e2d 717 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
718 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
719 COSTS_N_INSNS (56), /* HI */
720 COSTS_N_INSNS (56), /* SI */
721 COSTS_N_INSNS (56), /* DI */
722 COSTS_N_INSNS (56)}, /* other */
723 COSTS_N_INSNS (1), /* cost of movsx */
724 COSTS_N_INSNS (1), /* cost of movzx */
b4e89e2d
JH
725 16, /* "large" insn */
726 6, /* MOVE_RATIO */
727 2, /* cost for loading QImode using movzbl */
728 {4, 5, 4}, /* cost of loading integer registers
729 in QImode, HImode and SImode.
730 Relative to reg-reg move (2). */
731 {2, 3, 2}, /* cost of storing integer registers */
732 2, /* cost of reg,reg fld/fst */
733 {2, 2, 6}, /* cost of loading fp registers
734 in SFmode, DFmode and XFmode */
75bcbcdb
L
735 {4, 4, 6}, /* cost of storing fp registers
736 in SFmode, DFmode and XFmode */
b4e89e2d
JH
737 2, /* cost of moving MMX register */
738 {2, 2}, /* cost of loading MMX registers
739 in SImode and DImode */
740 {2, 2}, /* cost of storing MMX registers
741 in SImode and DImode */
742 12, /* cost of moving SSE register */
743 {12, 12, 12}, /* cost of loading SSE registers
744 in SImode, DImode and TImode */
745 {2, 2, 8}, /* cost of storing SSE registers
746 in SImode, DImode and TImode */
747 10, /* MMX or SSE register to integer */
46cb0441
ZD
748 8, /* size of l1 cache. */
749 256, /* size of l2 cache. */
f4365627
JH
750 64, /* size of prefetch block */
751 6, /* number of parallel prefetches */
4977bab6 752 2, /* Branch cost */
a9cc9cc6
JH
753 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
754 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
755 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
756 COSTS_N_INSNS (2), /* cost of FABS instruction. */
757 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
758 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
e850f028 759 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
cc0faf9d 760 DUMMY_STRINGOP_ALGS},
e850f028 761 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
cc0faf9d
JH
762 {-1, libcall}}},
763 DUMMY_STRINGOP_ALGS},
b4e89e2d
JH
764};
765
89c43c0a
VM
766static const
767struct processor_costs nocona_cost = {
a9cc9cc6
JH
768 COSTS_N_INSNS (1), /* cost of an add instruction */
769 COSTS_N_INSNS (1), /* cost of a lea instruction */
770 COSTS_N_INSNS (1), /* variable shift costs */
771 COSTS_N_INSNS (1), /* constant shift costs */
772 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
773 COSTS_N_INSNS (10), /* HI */
774 COSTS_N_INSNS (10), /* SI */
775 COSTS_N_INSNS (10), /* DI */
776 COSTS_N_INSNS (10)}, /* other */
89c43c0a 777 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
778 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
779 COSTS_N_INSNS (66), /* HI */
780 COSTS_N_INSNS (66), /* SI */
781 COSTS_N_INSNS (66), /* DI */
782 COSTS_N_INSNS (66)}, /* other */
783 COSTS_N_INSNS (1), /* cost of movsx */
784 COSTS_N_INSNS (1), /* cost of movzx */
89c43c0a 785 16, /* "large" insn */
ea407814 786 17, /* MOVE_RATIO */
89c43c0a
VM
787 4, /* cost for loading QImode using movzbl */
788 {4, 4, 4}, /* cost of loading integer registers
789 in QImode, HImode and SImode.
790 Relative to reg-reg move (2). */
791 {4, 4, 4}, /* cost of storing integer registers */
792 3, /* cost of reg,reg fld/fst */
793 {12, 12, 12}, /* cost of loading fp registers
794 in SFmode, DFmode and XFmode */
75bcbcdb
L
795 {4, 4, 4}, /* cost of storing fp registers
796 in SFmode, DFmode and XFmode */
89c43c0a
VM
797 6, /* cost of moving MMX register */
798 {12, 12}, /* cost of loading MMX registers
799 in SImode and DImode */
800 {12, 12}, /* cost of storing MMX registers
801 in SImode and DImode */
802 6, /* cost of moving SSE register */
803 {12, 12, 12}, /* cost of loading SSE registers
804 in SImode, DImode and TImode */
805 {12, 12, 12}, /* cost of storing SSE registers
806 in SImode, DImode and TImode */
807 8, /* MMX or SSE register to integer */
46cb0441
ZD
808 8, /* size of l1 cache. */
809 1024, /* size of l2 cache. */
89c43c0a
VM
810 128, /* size of prefetch block */
811 8, /* number of parallel prefetches */
812 1, /* Branch cost */
a9cc9cc6
JH
813 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
814 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
815 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
816 COSTS_N_INSNS (3), /* cost of FABS instruction. */
817 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
818 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
e850f028 819 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
8c996513
JH
820 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
821 {100000, unrolled_loop}, {-1, libcall}}}},
e850f028 822 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
cc0faf9d 823 {-1, libcall}}},
8c996513
JH
824 {libcall, {{24, loop}, {64, unrolled_loop},
825 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
89c43c0a
VM
826};
827
05f85dbb
VM
828static const
829struct processor_costs core2_cost = {
830 COSTS_N_INSNS (1), /* cost of an add instruction */
831 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
832 COSTS_N_INSNS (1), /* variable shift costs */
833 COSTS_N_INSNS (1), /* constant shift costs */
834 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
835 COSTS_N_INSNS (3), /* HI */
836 COSTS_N_INSNS (3), /* SI */
837 COSTS_N_INSNS (3), /* DI */
838 COSTS_N_INSNS (3)}, /* other */
839 0, /* cost of multiply per each bit set */
840 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
841 COSTS_N_INSNS (22), /* HI */
842 COSTS_N_INSNS (22), /* SI */
843 COSTS_N_INSNS (22), /* DI */
844 COSTS_N_INSNS (22)}, /* other */
845 COSTS_N_INSNS (1), /* cost of movsx */
846 COSTS_N_INSNS (1), /* cost of movzx */
847 8, /* "large" insn */
848 16, /* MOVE_RATIO */
849 2, /* cost for loading QImode using movzbl */
850 {6, 6, 6}, /* cost of loading integer registers
851 in QImode, HImode and SImode.
852 Relative to reg-reg move (2). */
853 {4, 4, 4}, /* cost of storing integer registers */
854 2, /* cost of reg,reg fld/fst */
855 {6, 6, 6}, /* cost of loading fp registers
856 in SFmode, DFmode and XFmode */
857 {4, 4, 4}, /* cost of loading integer registers */
858 2, /* cost of moving MMX register */
859 {6, 6}, /* cost of loading MMX registers
860 in SImode and DImode */
861 {4, 4}, /* cost of storing MMX registers
862 in SImode and DImode */
863 2, /* cost of moving SSE register */
864 {6, 6, 6}, /* cost of loading SSE registers
865 in SImode, DImode and TImode */
866 {4, 4, 4}, /* cost of storing SSE registers
867 in SImode, DImode and TImode */
868 2, /* MMX or SSE register to integer */
46cb0441
ZD
869 32, /* size of l1 cache. */
870 2048, /* size of l2 cache. */
05f85dbb
VM
871 128, /* size of prefetch block */
872 8, /* number of parallel prefetches */
873 3, /* Branch cost */
874 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
875 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
876 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
877 COSTS_N_INSNS (1), /* cost of FABS instruction. */
878 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
879 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
8c996513
JH
880 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
881 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
882 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
883 {{libcall, {{8, loop}, {15, unrolled_loop},
884 {2048, rep_prefix_4_byte}, {-1, libcall}}},
885 {libcall, {{24, loop}, {32, unrolled_loop},
886 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
05f85dbb
VM
887};
888
d326eaf0
JH
889/* Generic64 should produce code tuned for Nocona and K8. */
890static const
891struct processor_costs generic64_cost = {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 /* On all chips taken into consideration lea is 2 cycles and more. With
894 this cost however our current implementation of synth_mult results in
6fc0bb99 895 use of unnecessary temporary registers causing regression on several
d326eaf0
JH
896 SPECfp benchmarks. */
897 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
898 COSTS_N_INSNS (1), /* variable shift costs */
899 COSTS_N_INSNS (1), /* constant shift costs */
900 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
901 COSTS_N_INSNS (4), /* HI */
902 COSTS_N_INSNS (3), /* SI */
903 COSTS_N_INSNS (4), /* DI */
904 COSTS_N_INSNS (2)}, /* other */
905 0, /* cost of multiply per each bit set */
906 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
907 COSTS_N_INSNS (26), /* HI */
908 COSTS_N_INSNS (42), /* SI */
909 COSTS_N_INSNS (74), /* DI */
910 COSTS_N_INSNS (74)}, /* other */
911 COSTS_N_INSNS (1), /* cost of movsx */
912 COSTS_N_INSNS (1), /* cost of movzx */
913 8, /* "large" insn */
914 17, /* MOVE_RATIO */
915 4, /* cost for loading QImode using movzbl */
916 {4, 4, 4}, /* cost of loading integer registers
917 in QImode, HImode and SImode.
918 Relative to reg-reg move (2). */
919 {4, 4, 4}, /* cost of storing integer registers */
920 4, /* cost of reg,reg fld/fst */
921 {12, 12, 12}, /* cost of loading fp registers
922 in SFmode, DFmode and XFmode */
75bcbcdb
L
923 {6, 6, 8}, /* cost of storing fp registers
924 in SFmode, DFmode and XFmode */
d326eaf0
JH
925 2, /* cost of moving MMX register */
926 {8, 8}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {8, 8}, /* cost of storing MMX registers
929 in SImode and DImode */
930 2, /* cost of moving SSE register */
931 {8, 8, 8}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {8, 8, 8}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 5, /* MMX or SSE register to integer */
46cb0441
ZD
936 32, /* size of l1 cache. */
937 512, /* size of l2 cache. */
d326eaf0
JH
938 64, /* size of prefetch block */
939 6, /* number of parallel prefetches */
940 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
941 is increased to perhaps more appropriate value of 5. */
942 3, /* Branch cost */
943 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
944 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
945 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
946 COSTS_N_INSNS (8), /* cost of FABS instruction. */
947 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
948 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
8c996513
JH
949 {DUMMY_STRINGOP_ALGS,
950 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
951 {DUMMY_STRINGOP_ALGS,
952 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
d326eaf0
JH
953};
954
955/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
956static const
957struct processor_costs generic32_cost = {
958 COSTS_N_INSNS (1), /* cost of an add instruction */
959 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
960 COSTS_N_INSNS (1), /* variable shift costs */
961 COSTS_N_INSNS (1), /* constant shift costs */
962 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
963 COSTS_N_INSNS (4), /* HI */
964 COSTS_N_INSNS (3), /* SI */
965 COSTS_N_INSNS (4), /* DI */
966 COSTS_N_INSNS (2)}, /* other */
967 0, /* cost of multiply per each bit set */
968 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
969 COSTS_N_INSNS (26), /* HI */
970 COSTS_N_INSNS (42), /* SI */
971 COSTS_N_INSNS (74), /* DI */
972 COSTS_N_INSNS (74)}, /* other */
973 COSTS_N_INSNS (1), /* cost of movsx */
974 COSTS_N_INSNS (1), /* cost of movzx */
975 8, /* "large" insn */
976 17, /* MOVE_RATIO */
977 4, /* cost for loading QImode using movzbl */
978 {4, 4, 4}, /* cost of loading integer registers
979 in QImode, HImode and SImode.
980 Relative to reg-reg move (2). */
981 {4, 4, 4}, /* cost of storing integer registers */
982 4, /* cost of reg,reg fld/fst */
983 {12, 12, 12}, /* cost of loading fp registers
984 in SFmode, DFmode and XFmode */
75bcbcdb
L
985 {6, 6, 8}, /* cost of storing fp registers
986 in SFmode, DFmode and XFmode */
d326eaf0
JH
987 2, /* cost of moving MMX register */
988 {8, 8}, /* cost of loading MMX registers
989 in SImode and DImode */
990 {8, 8}, /* cost of storing MMX registers
991 in SImode and DImode */
992 2, /* cost of moving SSE register */
993 {8, 8, 8}, /* cost of loading SSE registers
994 in SImode, DImode and TImode */
995 {8, 8, 8}, /* cost of storing SSE registers
996 in SImode, DImode and TImode */
997 5, /* MMX or SSE register to integer */
46cb0441
ZD
998 32, /* size of l1 cache. */
999 256, /* size of l2 cache. */
d326eaf0
JH
1000 64, /* size of prefetch block */
1001 6, /* number of parallel prefetches */
1002 3, /* Branch cost */
1003 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1004 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1005 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1006 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1007 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1008 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
8c996513
JH
1009 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1010 DUMMY_STRINGOP_ALGS},
1011 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1012 DUMMY_STRINGOP_ALGS},
d326eaf0
JH
1013};
1014
8b60264b 1015const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 1016
a269a03c
JC
1017/* Processor feature/optimization bitmasks. */
1018#define m_386 (1<<PROCESSOR_I386)
1019#define m_486 (1<<PROCESSOR_I486)
1020#define m_PENT (1<<PROCESSOR_PENTIUM)
1021#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
8691cea3
UB
1022#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1023#define m_NOCONA (1<<PROCESSOR_NOCONA)
1024#define m_CORE2 (1<<PROCESSOR_CORE2)
1025
cfe1b18f 1026#define m_GEODE (1<<PROCESSOR_GEODE)
a269a03c 1027#define m_K6 (1<<PROCESSOR_K6)
8691cea3 1028#define m_K6_GEODE (m_K6 | m_GEODE)
4977bab6 1029#define m_K8 (1<<PROCESSOR_K8)
8691cea3 1030#define m_ATHLON (1<<PROCESSOR_ATHLON)
4977bab6 1031#define m_ATHLON_K8 (m_K8 | m_ATHLON)
21efb4d4 1032#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
8691cea3
UB
1033#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1034
d326eaf0
JH
1035#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1036#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
d326eaf0
JH
1037
1038/* Generic instruction choice should be common subset of supported CPUs
05f85dbb 1039 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
8691cea3 1040#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
d326eaf0 1041
80fd744f
RH
1042/* Feature tests against the various tunings. */
1043unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1044 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1045 negatively, so enabling for Generic64 seems like good code size
1046 tradeoff. We can't enable it for 32bit generic because it does not
1047 work well with PPro base chips. */
1048 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1049
1050 /* X86_TUNE_PUSH_MEMORY */
1051 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1052 | m_NOCONA | m_CORE2 | m_GENERIC,
1053
1054 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1055 m_486 | m_PENT,
1056
1057 /* X86_TUNE_USE_BIT_TEST */
1058 m_386,
1059
1060 /* X86_TUNE_UNROLL_STRLEN */
1061 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
933e4fe7 1064 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
80fd744f
RH
1065
1066 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1067 on simulation result. But after P4 was made, no performance benefit
1068 was observed with branch hints. It also increases the code size.
1069 As a result, icc never generates branch hints. */
1070 0,
1071
1072 /* X86_TUNE_DOUBLE_WITH_ADD */
1073 ~m_386,
54a88090 1074
80fd744f 1075 /* X86_TUNE_USE_SAHF */
3c2d980c 1076 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
5be6cb59 1077 | m_NOCONA | m_CORE2 | m_GENERIC,
80fd744f
RH
1078
1079 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
3c2d980c 1080 partial dependencies. */
80fd744f
RH
1081 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1082 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1083
1084 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1085 register stalls on Generic32 compilation setting as well. However
1086 in current implementation the partial register stalls are not eliminated
1087 very well - they can be introduced via subregs synthesized by combine
1088 and can happen in caller/callee saving sequences. Because this option
1089 pays back little on PPro based chips and is in conflict with partial reg
1090 dependencies used by Athlon/P4 based chips, it is better to leave it off
1091 for generic32 for now. */
1092 m_PPRO,
1093
1094 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1095 m_CORE2 | m_GENERIC,
54a88090 1096
80fd744f
RH
1097 /* X86_TUNE_USE_HIMODE_FIOP */
1098 m_386 | m_486 | m_K6_GEODE,
1099
1100 /* X86_TUNE_USE_SIMODE_FIOP */
1101 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1102
1103 /* X86_TUNE_USE_MOV0 */
1104 m_K6,
54a88090 1105
80fd744f
RH
1106 /* X86_TUNE_USE_CLTD */
1107 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1108
1109 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1110 m_PENT4,
1111
1112 /* X86_TUNE_SPLIT_LONG_MOVES */
1113 m_PPRO,
1114
1115 /* X86_TUNE_READ_MODIFY_WRITE */
1116 ~m_PENT,
1117
1118 /* X86_TUNE_READ_MODIFY */
1119 ~(m_PENT | m_PPRO),
1120
1121 /* X86_TUNE_PROMOTE_QIMODE */
1122 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1123 | m_GENERIC /* | m_PENT4 ? */,
1124
1125 /* X86_TUNE_FAST_PREFIX */
1126 ~(m_PENT | m_486 | m_386),
1127
1128 /* X86_TUNE_SINGLE_STRINGOP */
1129 m_386 | m_PENT4 | m_NOCONA,
54a88090 1130
80fd744f
RH
1131 /* X86_TUNE_QIMODE_MATH */
1132 ~0,
54a88090 1133
80fd744f
RH
1134 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1135 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1136 might be considered for Generic32 if our scheme for avoiding partial
1137 stalls was more effective. */
1138 ~m_PPRO,
1139
1140 /* X86_TUNE_PROMOTE_QI_REGS */
1141 0,
1142
1143 /* X86_TUNE_PROMOTE_HI_REGS */
1144 m_PPRO,
1145
1146 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1147 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1148
1149 /* X86_TUNE_ADD_ESP_8 */
1150 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1151 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1152
1153 /* X86_TUNE_SUB_ESP_4 */
1154 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1155
1156 /* X86_TUNE_SUB_ESP_8 */
1157 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1158 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1159
1160 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1161 for DFmode copies */
1162 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1163 | m_GENERIC | m_GEODE),
1164
1165 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1166 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1169 conflict here in between PPro/Pentium4 based chips that thread 128bit
1170 SSE registers as single units versus K8 based chips that divide SSE
1171 registers to two 64bit halves. This knob promotes all store destinations
1172 to be 128bit to allow register renaming on 128bit SSE units, but usually
1173 results in one extra microop on 64bit SSE units. Experimental results
1174 shows that disabling this option on P4 brings over 20% SPECfp regression,
1175 while enabling it on K8 brings roughly 2.4% regression that can be partly
1176 masked by careful scheduling of moves. */
1177 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1178
1179 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1180 m_AMDFAM10,
1181
1182 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1183 are resolved on SSE register parts instead of whole registers, so we may
1184 maintain just lower part of scalar values in proper format leaving the
1185 upper part undefined. */
1186 m_ATHLON_K8,
21efb4d4 1187
80fd744f
RH
1188 /* X86_TUNE_SSE_TYPELESS_STORES */
1189 m_ATHLON_K8_AMDFAM10,
21efb4d4 1190
80fd744f
RH
1191 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1192 m_PPRO | m_PENT4 | m_NOCONA,
21efb4d4 1193
80fd744f
RH
1194 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1195 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
21efb4d4 1196
80fd744f
RH
1197 /* X86_TUNE_PROLOGUE_USING_MOVE */
1198 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1199
1200 /* X86_TUNE_EPILOGUE_USING_MOVE */
1201 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1202
1203 /* X86_TUNE_SHIFT1 */
1204 ~m_486,
1205
1206 /* X86_TUNE_USE_FFREEP */
1207 m_ATHLON_K8_AMDFAM10,
1208
1209 /* X86_TUNE_INTER_UNIT_MOVES */
1210 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1211
1212 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1213 than 4 branch instructions in the 16 byte window. */
1214 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1215
1216 /* X86_TUNE_SCHEDULE */
1217 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1218
1219 /* X86_TUNE_USE_BT */
1220 m_ATHLON_K8_AMDFAM10,
1221
1222 /* X86_TUNE_USE_INCDEC */
77c1632f 1223 ~(m_PENT4 | m_NOCONA | m_GENERIC),
80fd744f
RH
1224
1225 /* X86_TUNE_PAD_RETURNS */
1226 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1227
1228 /* X86_TUNE_EXT_80387_CONSTANTS */
ddff69b9
MM
1229 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1230
1231 /* X86_TUNE_SHORTEN_X87_SSE */
1232 ~m_K8,
1233
1234 /* X86_TUNE_AVOID_VECTOR_DECODE */
1235 m_K8 | m_GENERIC64,
1236
a646aded
UB
1237 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1238 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1239 ~(m_386 | m_486),
1240
1241 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1242 vector path on AMD machines. */
ddff69b9
MM
1243 m_K8 | m_GENERIC64 | m_AMDFAM10,
1244
a646aded
UB
1245 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1246 machines. */
ddff69b9
MM
1247 m_K8 | m_GENERIC64 | m_AMDFAM10,
1248
a646aded
UB
1249 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1250 than a MOV. */
ddff69b9
MM
1251 m_PENT,
1252
a646aded
UB
1253 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1254 but one byte longer. */
ddff69b9
MM
1255 m_PENT,
1256
a646aded 1257 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
ddff69b9 1258 operand that cannot be represented using a modRM byte. The XOR
a646aded 1259 replacement is long decoded, so this split helps here as well. */
ddff69b9 1260 m_K6,
80fd744f
RH
1261};
1262
1263/* Feature tests against the various architecture variations. */
1264unsigned int ix86_arch_features[X86_ARCH_LAST] = {
0a1c5e55
UB
1265 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1266 ~(m_386 | m_486 | m_PENT | m_K6),
80fd744f
RH
1267
1268 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1269 ~m_386,
1270
1271 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1272 ~(m_386 | m_486),
1273
1274 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1275 ~m_386,
1276
1277 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1278 ~m_386,
1279};
1280
1281static const unsigned int x86_accumulate_outgoing_args
1282 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1283
1284static const unsigned int x86_arch_always_fancy_math_387
1285 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1286 | m_NOCONA | m_CORE2 | m_GENERIC;
a269a03c 1287
8c996513
JH
1288static enum stringop_alg stringop_alg = no_stringop;
1289
d1f87653 1290/* In case the average insn count for single function invocation is
6ab16dd9
JH
1291 lower than this constant, emit fast (but longer) prologue and
1292 epilogue code. */
4977bab6 1293#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 1294
5bf0ebab
RH
1295/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1296static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1297static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1298static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
1299
1300/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 1301 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 1302
e075ae69 1303enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
1304{
1305 /* ax, dx, cx, bx */
ab408a86 1306 AREG, DREG, CREG, BREG,
4c0d89b5 1307 /* si, di, bp, sp */
e075ae69 1308 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
1309 /* FP registers */
1310 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 1311 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 1312 /* arg pointer */
83774849 1313 NON_Q_REGS,
b0d95de8
UB
1314 /* flags, fpsr, fpcr, frame */
1315 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
06f4e35d
L
1316 /* SSE registers */
1317 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
a7180f70 1318 SSE_REGS, SSE_REGS,
06f4e35d 1319 /* MMX registers */
a7180f70 1320 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30 1321 MMX_REGS, MMX_REGS,
06f4e35d 1322 /* REX registers */
3d117b30
JH
1323 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1324 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
06f4e35d 1325 /* SSE REX registers */
3d117b30
JH
1326 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1327 SSE_REGS, SSE_REGS,
4c0d89b5 1328};
c572e5ba 1329
3d117b30 1330/* The "default" register map used in 32bit mode. */
83774849 1331
0f290768 1332int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
1333{
1334 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1335 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
b0d95de8 1336 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
a7180f70
BS
1337 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1338 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
1339 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1340 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
1341};
1342
5bf0ebab
RH
1343static int const x86_64_int_parameter_registers[6] =
1344{
1345 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1346 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1347};
1348
ccf8e764
RH
1349static int const x86_64_ms_abi_int_parameter_registers[4] =
1350{
1351 2 /*RCX*/, 1 /*RDX*/,
1352 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1353};
1354
5bf0ebab
RH
1355static int const x86_64_int_return_registers[4] =
1356{
ccf8e764 1357 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
5bf0ebab 1358};
53c17031 1359
0f7fa3d0
JH
1360/* The "default" register map used in 64bit mode. */
1361int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1362{
1363 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 1364 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
b0d95de8 1365 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
0f7fa3d0
JH
1366 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1367 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1368 8,9,10,11,12,13,14,15, /* extended integer registers */
1369 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1370};
1371
83774849
RH
1372/* Define the register numbers to be used in Dwarf debugging information.
1373 The SVR4 reference port C compiler uses the following register numbers
1374 in its Dwarf output code:
1375 0 for %eax (gcc regno = 0)
1376 1 for %ecx (gcc regno = 2)
1377 2 for %edx (gcc regno = 1)
1378 3 for %ebx (gcc regno = 3)
1379 4 for %esp (gcc regno = 7)
1380 5 for %ebp (gcc regno = 6)
1381 6 for %esi (gcc regno = 4)
1382 7 for %edi (gcc regno = 5)
1383 The following three DWARF register numbers are never generated by
1384 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1385 believes these numbers have these meanings.
1386 8 for %eip (no gcc equivalent)
1387 9 for %eflags (gcc regno = 17)
1388 10 for %trapno (no gcc equivalent)
1389 It is not at all clear how we should number the FP stack registers
1390 for the x86 architecture. If the version of SDB on x86/svr4 were
1391 a bit less brain dead with respect to floating-point then we would
1392 have a precedent to follow with respect to DWARF register numbers
1393 for x86 FP registers, but the SDB on x86/svr4 is so completely
1394 broken with respect to FP registers that it is hardly worth thinking
1395 of it as something to strive for compatibility with.
1396 The version of x86/svr4 SDB I have at the moment does (partially)
1397 seem to believe that DWARF register number 11 is associated with
1398 the x86 register %st(0), but that's about all. Higher DWARF
1399 register numbers don't seem to be associated with anything in
1400 particular, and even for DWARF regno 11, SDB only seems to under-
1401 stand that it should say that a variable lives in %st(0) (when
1402 asked via an `=' command) if we said it was in DWARF regno 11,
1403 but SDB still prints garbage when asked for the value of the
1404 variable in question (via a `/' command).
1405 (Also note that the labels SDB prints for various FP stack regs
1406 when doing an `x' command are all wrong.)
1407 Note that these problems generally don't affect the native SVR4
1408 C compiler because it doesn't allow the use of -O with -g and
1409 because when it is *not* optimizing, it allocates a memory
1410 location for each floating-point variable, and the memory
1411 location is what gets described in the DWARF AT_location
1412 attribute for the variable in question.
1413 Regardless of the severe mental illness of the x86/svr4 SDB, we
1414 do something sensible here and we use the following DWARF
1415 register numbers. Note that these are all stack-top-relative
1416 numbers.
1417 11 for %st(0) (gcc regno = 8)
1418 12 for %st(1) (gcc regno = 9)
1419 13 for %st(2) (gcc regno = 10)
1420 14 for %st(3) (gcc regno = 11)
1421 15 for %st(4) (gcc regno = 12)
1422 16 for %st(5) (gcc regno = 13)
1423 17 for %st(6) (gcc regno = 14)
1424 18 for %st(7) (gcc regno = 15)
1425*/
0f290768 1426int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
1427{
1428 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1429 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
b0d95de8 1430 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
a7180f70
BS
1431 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1432 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
1433 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1434 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
1435};
1436
c572e5ba
JVA
1437/* Test and compare insns in i386.md store the information needed to
1438 generate branch and scc insns here. */
1439
07933f72
GS
1440rtx ix86_compare_op0 = NULL_RTX;
1441rtx ix86_compare_op1 = NULL_RTX;
1ef45b77 1442rtx ix86_compare_emitted = NULL_RTX;
f5316dfe 1443
8362f420
JH
1444/* Size of the register save area. */
1445#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
1446
1447/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
1448
1449struct stack_local_entry GTY(())
1450{
1451 unsigned short mode;
1452 unsigned short n;
1453 rtx rtl;
1454 struct stack_local_entry *next;
1455};
1456
4dd2ac2c
JH
1457/* Structure describing stack frame layout.
1458 Stack grows downward:
1459
1460 [arguments]
1461 <- ARG_POINTER
1462 saved pc
1463
1464 saved frame pointer if frame_pointer_needed
1465 <- HARD_FRAME_POINTER
1466 [saved regs]
1467
1468 [padding1] \
1469 )
1470 [va_arg registers] (
1471 > to_allocate <- FRAME_POINTER
1472 [frame] (
1473 )
1474 [padding2] /
1475 */
1476struct ix86_frame
1477{
1478 int nregs;
1479 int padding1;
8362f420 1480 int va_arg_size;
4dd2ac2c
JH
1481 HOST_WIDE_INT frame;
1482 int padding2;
1483 int outgoing_arguments_size;
8362f420 1484 int red_zone_size;
4dd2ac2c
JH
1485
1486 HOST_WIDE_INT to_allocate;
1487 /* The offsets relative to ARG_POINTER. */
1488 HOST_WIDE_INT frame_pointer_offset;
1489 HOST_WIDE_INT hard_frame_pointer_offset;
1490 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
1491
1492 /* When save_regs_using_mov is set, emit prologue using
1493 move instead of push instructions. */
1494 bool save_regs_using_mov;
4dd2ac2c
JH
1495};
1496
55bea00a 1497/* Code model option. */
6189a572 1498enum cmodel ix86_cmodel;
80f33d06 1499/* Asm dialect. */
80f33d06 1500enum asm_dialect ix86_asm_dialect = ASM_ATT;
5bf5a10b 1501/* TLS dialects. */
f996902d 1502enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 1503
5bf0ebab 1504/* Which unit we are generating floating point math for. */
965f5423
JH
1505enum fpmath_unit ix86_fpmath;
1506
5bf0ebab 1507/* Which cpu are we scheduling for. */
9e555526 1508enum processor_type ix86_tune;
8691cea3 1509
5bf0ebab
RH
1510/* Which instruction set architecture to use. */
1511enum processor_type ix86_arch;
c8c5cb99 1512
f4365627
JH
1513/* true if sse prefetch instruction is not NOOP. */
1514int x86_prefetch_sse;
1515
e075ae69 1516/* ix86_regparm_string as a number */
6ac49599 1517static int ix86_regparm;
e9a25f70 1518
33932946
SH
1519/* -mstackrealign option */
1520extern int ix86_force_align_arg_pointer;
1521static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1522
3af4bd89 1523/* Preferred alignment for stack boundary in bits. */
95899b34 1524unsigned int ix86_preferred_stack_boundary;
3af4bd89 1525
e9a25f70 1526/* Values 1-5: see jump.c */
e075ae69 1527int ix86_branch_cost;
623fe810 1528
7dcbf659
JH
1529/* Variables which are this size or smaller are put in the data/bss
1530 or ldata/lbss sections. */
1531
1532int ix86_section_threshold = 65536;
1533
623fe810 1534/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
8fe75e43
RH
1535char internal_label_prefix[16];
1536int internal_label_prefix_len;
e56feed6 1537
79f5e442
ZD
1538/* Fence to use after loop using movnt. */
1539tree x86_mfence;
1540
53c17031
JH
1541/* Register class used for passing given 64bit part of the argument.
1542 These represent classes as documented by the PS ABI, with the exception
1543 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 1544 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 1545
d1f87653 1546 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2ed941ec 1547 whenever possible (upper half does contain padding). */
53c17031
JH
1548enum x86_64_reg_class
1549 {
1550 X86_64_NO_CLASS,
1551 X86_64_INTEGER_CLASS,
1552 X86_64_INTEGERSI_CLASS,
1553 X86_64_SSE_CLASS,
1554 X86_64_SSESF_CLASS,
1555 X86_64_SSEDF_CLASS,
1556 X86_64_SSEUP_CLASS,
1557 X86_64_X87_CLASS,
1558 X86_64_X87UP_CLASS,
499accd7 1559 X86_64_COMPLEX_X87_CLASS,
53c17031
JH
1560 X86_64_MEMORY_CLASS
1561 };
2ed941ec
RH
1562static const char * const x86_64_reg_class_name[] =
1563{
6c4ccfd8
RH
1564 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1565 "sseup", "x87", "x87up", "cplx87", "no"
1566};
53c17031
JH
1567
1568#define MAX_CLASSES 4
881b2a96 1569
43f3a59d 1570/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
1571static REAL_VALUE_TYPE ext_80387_constants_table [5];
1572static bool ext_80387_constants_init = 0;
cb1119b7 1573
2ed941ec
RH
1574\f
1575static struct machine_function * ix86_init_machine_status (void);
1576static rtx ix86_function_value (tree, tree, bool);
1577static int ix86_function_regparm (tree, tree);
1578static void ix86_compute_frame_layout (struct ix86_frame *);
1579static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1580 rtx, rtx, int);
89c43c0a 1581
e075ae69 1582\f
67c2b45f
JS
1583/* The svr4 ABI for the i386 says that records and unions are returned
1584 in memory. */
1585#ifndef DEFAULT_PCC_STRUCT_RETURN
1586#define DEFAULT_PCC_STRUCT_RETURN 1
1587#endif
1588
0a1c5e55
UB
1589/* Bit flags that specify the ISA we are compiling for. */
1590int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1591
1592/* A mask of ix86_isa_flags that includes bit X if X
1593 was set or cleared on the command line. */
1594static int ix86_isa_flags_explicit;
1595
3b8dd071
L
1596/* Define a set of ISAs which aren't available for a given ISA. MMX
1597 and SSE ISAs are handled separately. */
1598
1599#define OPTION_MASK_ISA_MMX_UNSET \
1600 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1601#define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1602
1603#define OPTION_MASK_ISA_SSE_UNSET \
1604 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1605#define OPTION_MASK_ISA_SSE2_UNSET \
1606 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1607#define OPTION_MASK_ISA_SSE3_UNSET \
1608 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1609#define OPTION_MASK_ISA_SSSE3_UNSET \
1610 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1611#define OPTION_MASK_ISA_SSE4_1_UNSET \
1612 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1613#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1614
1615/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
6847c43d 1616 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
3b8dd071
L
1617#define OPTION_MASK_ISA_SSE4 \
1618 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1619#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1620
1621#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1622
6ac49599
RS
1623/* Implement TARGET_HANDLE_OPTION. */
1624
1625static bool
55bea00a 1626ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
6ac49599
RS
1627{
1628 switch (code)
1629 {
0a1c5e55 1630 case OPT_mmmx:
853a33f3 1631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
6ac49599
RS
1632 if (!value)
1633 {
3b8dd071
L
1634 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1635 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
6ac49599
RS
1636 }
1637 return true;
1638
0a1c5e55 1639 case OPT_m3dnow:
853a33f3 1640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
6ac49599
RS
1641 if (!value)
1642 {
3b8dd071
L
1643 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1644 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
6ac49599
RS
1645 }
1646 return true;
1647
0a1c5e55
UB
1648 case OPT_m3dnowa:
1649 return false;
1650
6ac49599 1651 case OPT_msse:
853a33f3 1652 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
6ac49599
RS
1653 if (!value)
1654 {
3b8dd071
L
1655 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1656 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
6ac49599
RS
1657 }
1658 return true;
1659
1660 case OPT_msse2:
853a33f3 1661 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
6ac49599
RS
1662 if (!value)
1663 {
3b8dd071
L
1664 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1665 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
21efb4d4
HJ
1666 }
1667 return true;
1668
1669 case OPT_msse3:
853a33f3 1670 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
594dc048
L
1671 if (!value)
1672 {
3b8dd071
L
1673 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1674 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
594dc048
L
1675 }
1676 return true;
1677
1678 case OPT_mssse3:
853a33f3 1679 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
9a5cee02
L
1680 if (!value)
1681 {
3b8dd071
L
1682 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1683 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
9a5cee02
L
1684 }
1685 return true;
1686
1687 case OPT_msse4_1:
853a33f3 1688 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
21efb4d4
HJ
1689 if (!value)
1690 {
3b8dd071
L
1691 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1692 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1693 }
1694 return true;
1695
1696 case OPT_msse4_2:
1697 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1698 if (!value)
1699 {
1700 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1701 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
6ac49599
RS
1702 }
1703 return true;
1704
3b8dd071
L
1705 case OPT_msse4:
1706 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1707 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1708 return true;
1709
1710 case OPT_mno_sse4:
1711 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1712 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1713 return true;
1714
9a5cee02 1715 case OPT_msse4a:
853a33f3 1716 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
9a5cee02
L
1717 if (!value)
1718 {
3b8dd071
L
1719 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1720 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
9a5cee02
L
1721 }
1722 return true;
1723
6ac49599
RS
1724 default:
1725 return true;
1726 }
1727}
1728
f5316dfe
MM
1729/* Sometimes certain combinations of command options do not make
1730 sense on a particular target machine. You can define a macro
1731 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1732 defined, is executed once just after all the command options have
1733 been parsed.
1734
1735 Don't use this macro to turn on various extra optimizations for
1736 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1737
1738void
b96a374d 1739override_options (void)
f5316dfe 1740{
400500c4 1741 int i;
3326f410 1742 int ix86_tune_defaulted = 0;
b26f6ed7 1743 int ix86_arch_specified = 0;
80fd744f 1744 unsigned int ix86_arch_mask, ix86_tune_mask;
3326f410 1745
e075ae69
RH
1746 /* Comes from final.c -- no real reason to change it. */
1747#define MAX_CODE_ALIGN 16
f5316dfe 1748
c8c5cb99
SC
1749 static struct ptt
1750 {
8b60264b 1751 const struct processor_costs *cost; /* Processor costs */
8b60264b 1752 const int align_loop; /* Default alignments. */
2cca7283 1753 const int align_loop_max_skip;
8b60264b 1754 const int align_jump;
2cca7283 1755 const int align_jump_max_skip;
8b60264b 1756 const int align_func;
e075ae69 1757 }
0f290768 1758 const processor_target_table[PROCESSOR_max] =
e075ae69 1759 {
0a1c5e55
UB
1760 {&i386_cost, 4, 3, 4, 3, 4},
1761 {&i486_cost, 16, 15, 16, 15, 16},
1762 {&pentium_cost, 16, 7, 16, 7, 16},
461a73b5 1763 {&pentiumpro_cost, 16, 15, 16, 10, 16},
0a1c5e55
UB
1764 {&geode_cost, 0, 0, 0, 0, 0},
1765 {&k6_cost, 32, 7, 32, 7, 32},
1766 {&athlon_cost, 16, 7, 16, 7, 16},
1767 {&pentium4_cost, 0, 0, 0, 0, 0},
1768 {&k8_cost, 16, 7, 16, 7, 16},
1769 {&nocona_cost, 0, 0, 0, 0, 0},
461a73b5 1770 {&core2_cost, 16, 10, 16, 10, 16},
0a1c5e55 1771 {&generic32_cost, 16, 7, 16, 7, 16},
461a73b5 1772 {&generic64_cost, 16, 10, 16, 10, 16},
0a1c5e55 1773 {&amdfam10_cost, 32, 24, 32, 7, 32}
e075ae69
RH
1774 };
1775
f4365627 1776 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
9415ab7d
TN
1777 enum pta_flags
1778 {
1779 PTA_SSE = 1 << 0,
1780 PTA_SSE2 = 1 << 1,
1781 PTA_SSE3 = 1 << 2,
1782 PTA_MMX = 1 << 3,
1783 PTA_PREFETCH_SSE = 1 << 4,
1784 PTA_3DNOW = 1 << 5,
1785 PTA_3DNOW_A = 1 << 6,
1786 PTA_64BIT = 1 << 7,
1787 PTA_SSSE3 = 1 << 8,
1788 PTA_CX16 = 1 << 9,
1789 PTA_POPCNT = 1 << 10,
1790 PTA_ABM = 1 << 11,
1791 PTA_SSE4A = 1 << 12,
1792 PTA_NO_SAHF = 1 << 13,
1793 PTA_SSE4_1 = 1 << 14,
1794 PTA_SSE4_2 = 1 << 15
1795 };
1796
e075ae69
RH
1797 static struct pta
1798 {
8b60264b
KG
1799 const char *const name; /* processor name or nickname. */
1800 const enum processor_type processor;
9415ab7d 1801 const unsigned /*enum pta_flags*/ flags;
e075ae69 1802 }
0f290768 1803 const processor_alias_table[] =
e075ae69 1804 {
0dd0e980
JH
1805 {"i386", PROCESSOR_I386, 0},
1806 {"i486", PROCESSOR_I486, 0},
1807 {"i586", PROCESSOR_PENTIUM, 0},
1808 {"pentium", PROCESSOR_PENTIUM, 0},
1809 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1810 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1811 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1812 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0a1c5e55 1813 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
0dd0e980
JH
1814 {"i686", PROCESSOR_PENTIUMPRO, 0},
1815 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1816 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
0a1c5e55
UB
1817 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1818 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1819 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1820 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1821 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1822 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1823 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1824 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1825 | PTA_CX16 | PTA_NO_SAHF)},
1826 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1827 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1828 | PTA_SSSE3
1829 | PTA_CX16)},
1830 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1831 |PTA_PREFETCH_SSE)},
0dd0e980
JH
1832 {"k6", PROCESSOR_K6, PTA_MMX},
1833 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1834 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
0a1c5e55
UB
1835 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1836 | PTA_PREFETCH_SSE)},
1837 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1838 | PTA_PREFETCH_SSE)},
1839 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1840 | PTA_SSE)},
1841 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1842 | PTA_SSE)},
1843 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_SSE)},
1845 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1846 | PTA_MMX | PTA_SSE | PTA_SSE2
1847 | PTA_NO_SAHF)},
1848 {"k8", PROCESSOR_K8, (PTA_64BIT
1849 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1850 | PTA_SSE | PTA_SSE2
1851 | PTA_NO_SAHF)},
1852 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1853 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1854 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1855 | PTA_NO_SAHF)},
1856 {"opteron", PROCESSOR_K8, (PTA_64BIT
1857 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1858 | PTA_SSE | PTA_SSE2
1859 | PTA_NO_SAHF)},
1860 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1861 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1862 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1863 | PTA_NO_SAHF)},
1864 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1865 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1866 | PTA_SSE | PTA_SSE2
1867 | PTA_NO_SAHF)},
1868 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1869 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1870 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1871 | PTA_NO_SAHF)},
1872 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1873 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1874 | PTA_SSE | PTA_SSE2
1875 | PTA_NO_SAHF)},
1876 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1877 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1878 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1879 | PTA_SSE4A
1880 | PTA_CX16 | PTA_ABM)},
1881 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1882 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1883 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1884 | PTA_SSE4A
1885 | PTA_CX16 | PTA_ABM)},
d326eaf0
JH
1886 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1887 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
3af4bd89 1888 };
c8c5cb99 1889
ca7558fc 1890 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1891
554707bd
DJ
1892#ifdef SUBTARGET_OVERRIDE_OPTIONS
1893 SUBTARGET_OVERRIDE_OPTIONS;
1894#endif
1895
f475fd3c
MS
1896#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1897 SUBSUBTARGET_OVERRIDE_OPTIONS;
1898#endif
1899
f7288899
EC
1900 /* -fPIC is the default for x86_64. */
1901 if (TARGET_MACHO && TARGET_64BIT)
1902 flag_pic = 2;
1903
41ed2237 1904 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1905 in case they weren't overwritten by command line options. */
55ba61f3
JH
1906 if (TARGET_64BIT)
1907 {
f7288899 1908 /* Mach-O doesn't support omitting the frame pointer for now. */
55ba61f3 1909 if (flag_omit_frame_pointer == 2)
f7288899 1910 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
55ba61f3
JH
1911 if (flag_asynchronous_unwind_tables == 2)
1912 flag_asynchronous_unwind_tables = 1;
1913 if (flag_pcc_struct_return == 2)
1914 flag_pcc_struct_return = 0;
1915 }
1916 else
1917 {
1918 if (flag_omit_frame_pointer == 2)
1919 flag_omit_frame_pointer = 0;
1920 if (flag_asynchronous_unwind_tables == 2)
1921 flag_asynchronous_unwind_tables = 0;
1922 if (flag_pcc_struct_return == 2)
7c712dcc 1923 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1924 }
1925
d326eaf0
JH
1926 /* Need to check -mtune=generic first. */
1927 if (ix86_tune_string)
3326f410 1928 {
d326eaf0 1929 if (!strcmp (ix86_tune_string, "generic")
fa959ce4
MM
1930 || !strcmp (ix86_tune_string, "i686")
1931 /* As special support for cross compilers we read -mtune=native
1932 as -mtune=generic. With native compilers we won't see the
1933 -mtune=native, as it was changed by the driver. */
1934 || !strcmp (ix86_tune_string, "native"))
d326eaf0
JH
1935 {
1936 if (TARGET_64BIT)
1937 ix86_tune_string = "generic64";
1938 else
1939 ix86_tune_string = "generic32";
1940 }
1941 else if (!strncmp (ix86_tune_string, "generic", 7))
1942 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
3326f410 1943 }
d326eaf0
JH
1944 else
1945 {
1946 if (ix86_arch_string)
1947 ix86_tune_string = ix86_arch_string;
1948 if (!ix86_tune_string)
1949 {
1950 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1951 ix86_tune_defaulted = 1;
1952 }
1953
1954 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1955 need to use a sensible tune option. */
1956 if (!strcmp (ix86_tune_string, "generic")
1957 || !strcmp (ix86_tune_string, "x86-64")
1958 || !strcmp (ix86_tune_string, "i686"))
1959 {
1960 if (TARGET_64BIT)
1961 ix86_tune_string = "generic64";
1962 else
1963 ix86_tune_string = "generic32";
1964 }
1965 }
8c996513
JH
1966 if (ix86_stringop_string)
1967 {
1968 if (!strcmp (ix86_stringop_string, "rep_byte"))
1969 stringop_alg = rep_prefix_1_byte;
1970 else if (!strcmp (ix86_stringop_string, "libcall"))
1971 stringop_alg = libcall;
1972 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1973 stringop_alg = rep_prefix_4_byte;
1974 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1975 stringop_alg = rep_prefix_8_byte;
1976 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1977 stringop_alg = loop_1_byte;
1978 else if (!strcmp (ix86_stringop_string, "loop"))
1979 stringop_alg = loop;
1980 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1981 stringop_alg = unrolled_loop;
1982 else
1983 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1984 }
d326eaf0
JH
1985 if (!strcmp (ix86_tune_string, "x86-64"))
1986 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1987 "-mtune=generic instead as appropriate.");
1988
f4365627 1989 if (!ix86_arch_string)
3fec9fa9 1990 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
b26f6ed7
EC
1991 else
1992 ix86_arch_specified = 1;
1993
d326eaf0
JH
1994 if (!strcmp (ix86_arch_string, "generic"))
1995 error ("generic CPU can be used only for -mtune= switch");
1996 if (!strncmp (ix86_arch_string, "generic", 7))
1997 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1998
6189a572
JH
1999 if (ix86_cmodel_string != 0)
2000 {
2001 if (!strcmp (ix86_cmodel_string, "small"))
2002 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
7dcbf659
JH
2003 else if (!strcmp (ix86_cmodel_string, "medium"))
2004 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
dc4d7240
JH
2005 else if (!strcmp (ix86_cmodel_string, "large"))
2006 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
6189a572 2007 else if (flag_pic)
dc4d7240 2008 error ("code model %s does not support PIC mode", ix86_cmodel_string);
6189a572
JH
2009 else if (!strcmp (ix86_cmodel_string, "32"))
2010 ix86_cmodel = CM_32;
2011 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2012 ix86_cmodel = CM_KERNEL;
6189a572
JH
2013 else
2014 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2015 }
2016 else
2017 {
ccf8e764
RH
2018 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2019 use of rip-relative addressing. This eliminates fixups that
2020 would otherwise be needed if this object is to be placed in a
2021 DLL, and is essentially just as efficient as direct addressing. */
2022 if (TARGET_64BIT_MS_ABI)
2023 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2024 else if (TARGET_64BIT)
6189a572 2025 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
ccf8e764
RH
2026 else
2027 ix86_cmodel = CM_32;
6189a572 2028 }
c93e80a5
JH
2029 if (ix86_asm_string != 0)
2030 {
1f4c2c57
MS
2031 if (! TARGET_MACHO
2032 && !strcmp (ix86_asm_string, "intel"))
c93e80a5
JH
2033 ix86_asm_dialect = ASM_INTEL;
2034 else if (!strcmp (ix86_asm_string, "att"))
2035 ix86_asm_dialect = ASM_ATT;
2036 else
2037 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2038 }
6189a572 2039 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
9e637a26 2040 error ("code model %qs not supported in the %s bit mode",
6189a572 2041 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
853a33f3 2042 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
c725bd79 2043 sorry ("%i-bit mode not compiled in",
853a33f3 2044 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
6189a572 2045
f4365627
JH
2046 for (i = 0; i < pta_size; i++)
2047 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2048 {
2049 ix86_arch = processor_alias_table[i].processor;
2050 /* Default cpu tuning to the architecture. */
9e555526 2051 ix86_tune = ix86_arch;
0a1c5e55
UB
2052
2053 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2054 error ("CPU you selected does not support x86-64 "
2055 "instruction set");
2056
f4365627 2057 if (processor_alias_table[i].flags & PTA_MMX
853a33f3
UB
2058 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2059 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
f4365627 2060 if (processor_alias_table[i].flags & PTA_3DNOW
853a33f3
UB
2061 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2062 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
f4365627 2063 if (processor_alias_table[i].flags & PTA_3DNOW_A
853a33f3
UB
2064 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2065 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
f4365627 2066 if (processor_alias_table[i].flags & PTA_SSE
853a33f3
UB
2067 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2068 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
f4365627 2069 if (processor_alias_table[i].flags & PTA_SSE2
853a33f3
UB
2070 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2071 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
5bbeea44 2072 if (processor_alias_table[i].flags & PTA_SSE3
853a33f3
UB
2073 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2074 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
b1875f52 2075 if (processor_alias_table[i].flags & PTA_SSSE3
853a33f3
UB
2076 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2077 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
9a5cee02 2078 if (processor_alias_table[i].flags & PTA_SSE4_1
853a33f3
UB
2079 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2080 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3b8dd071
L
2081 if (processor_alias_table[i].flags & PTA_SSE4_2
2082 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2083 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
0a1c5e55 2084 if (processor_alias_table[i].flags & PTA_SSE4A
853a33f3
UB
2085 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2086 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
0a1c5e55
UB
2087
2088 if (processor_alias_table[i].flags & PTA_ABM)
2089 x86_abm = true;
15a26abf
JJ
2090 if (processor_alias_table[i].flags & PTA_CX16)
2091 x86_cmpxchg16b = true;
0a1c5e55
UB
2092 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2093 x86_popcnt = true;
2094 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2095 x86_prefetch_sse = true;
9064c533 2096 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
3c2d980c 2097 x86_sahf = true;
0a1c5e55 2098
6716ecbc
JM
2099 break;
2100 }
2101
2102 if (i == pta_size)
2103 error ("bad value (%s) for -march= switch", ix86_arch_string);
2104
80fd744f
RH
2105 ix86_arch_mask = 1u << ix86_arch;
2106 for (i = 0; i < X86_ARCH_LAST; ++i)
2107 ix86_arch_features[i] &= ix86_arch_mask;
2108
6716ecbc
JM
2109 for (i = 0; i < pta_size; i++)
2110 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2111 {
2112 ix86_tune = processor_alias_table[i].processor;
4977bab6 2113 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
2114 {
2115 if (ix86_tune_defaulted)
2116 {
2117 ix86_tune_string = "x86-64";
2118 for (i = 0; i < pta_size; i++)
2119 if (! strcmp (ix86_tune_string,
2120 processor_alias_table[i].name))
2121 break;
2122 ix86_tune = processor_alias_table[i].processor;
2123 }
2124 else
2125 error ("CPU you selected does not support x86-64 "
2126 "instruction set");
2127 }
c618c6ec
JJ
2128 /* Intel CPUs have always interpreted SSE prefetch instructions as
2129 NOPs; so, we can enable SSE prefetch instructions even when
2130 -mtune (rather than -march) points us to a processor that has them.
2131 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2132 higher processors. */
0a1c5e55
UB
2133 if (TARGET_CMOVE
2134 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
c618c6ec 2135 x86_prefetch_sse = true;
f4365627
JH
2136 break;
2137 }
f4365627 2138 if (i == pta_size)
9e555526 2139 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 2140
80fd744f
RH
2141 ix86_tune_mask = 1u << ix86_tune;
2142 for (i = 0; i < X86_TUNE_LAST; ++i)
2143 ix86_tune_features[i] &= ix86_tune_mask;
0fa4c370 2144
2ab0437e
JH
2145 if (optimize_size)
2146 ix86_cost = &size_cost;
2147 else
9e555526 2148 ix86_cost = processor_target_table[ix86_tune].cost;
e075ae69 2149
36edd3cc
BS
2150 /* Arrange to set up i386_stack_locals for all functions. */
2151 init_machine_status = ix86_init_machine_status;
fce5a9f2 2152
0f290768 2153 /* Validate -mregparm= value. */
e075ae69 2154 if (ix86_regparm_string)
b08de47e 2155 {
ccf8e764
RH
2156 if (TARGET_64BIT)
2157 warning (0, "-mregparm is ignored in 64-bit mode");
400500c4
RK
2158 i = atoi (ix86_regparm_string);
2159 if (i < 0 || i > REGPARM_MAX)
2160 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2161 else
2162 ix86_regparm = i;
b08de47e 2163 }
ccf8e764
RH
2164 if (TARGET_64BIT)
2165 ix86_regparm = REGPARM_MAX;
b08de47e 2166
3e18fdf6 2167 /* If the user has provided any of the -malign-* options,
a4f31c00 2168 warn and use that value only if -falign-* is not set.
3e18fdf6 2169 Remove this code in GCC 3.2 or later. */
e075ae69 2170 if (ix86_align_loops_string)
b08de47e 2171 {
d4ee4d25 2172 warning (0, "-malign-loops is obsolete, use -falign-loops");
3e18fdf6
GK
2173 if (align_loops == 0)
2174 {
2175 i = atoi (ix86_align_loops_string);
2176 if (i < 0 || i > MAX_CODE_ALIGN)
2177 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2178 else
2179 align_loops = 1 << i;
2180 }
b08de47e 2181 }
3af4bd89 2182
e075ae69 2183 if (ix86_align_jumps_string)
b08de47e 2184 {
d4ee4d25 2185 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
3e18fdf6
GK
2186 if (align_jumps == 0)
2187 {
2188 i = atoi (ix86_align_jumps_string);
2189 if (i < 0 || i > MAX_CODE_ALIGN)
2190 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2191 else
2192 align_jumps = 1 << i;
2193 }
b08de47e 2194 }
b08de47e 2195
e075ae69 2196 if (ix86_align_funcs_string)
b08de47e 2197 {
d4ee4d25 2198 warning (0, "-malign-functions is obsolete, use -falign-functions");
3e18fdf6
GK
2199 if (align_functions == 0)
2200 {
2201 i = atoi (ix86_align_funcs_string);
2202 if (i < 0 || i > MAX_CODE_ALIGN)
2203 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2204 else
2205 align_functions = 1 << i;
2206 }
b08de47e 2207 }
3af4bd89 2208
3e18fdf6 2209 /* Default align_* from the processor table. */
3e18fdf6 2210 if (align_loops == 0)
2cca7283 2211 {
9e555526
RH
2212 align_loops = processor_target_table[ix86_tune].align_loop;
2213 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 2214 }
3e18fdf6 2215 if (align_jumps == 0)
2cca7283 2216 {
9e555526
RH
2217 align_jumps = processor_target_table[ix86_tune].align_jump;
2218 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 2219 }
3e18fdf6 2220 if (align_functions == 0)
2cca7283 2221 {
9e555526 2222 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 2223 }
3e18fdf6 2224
0f290768 2225 /* Validate -mbranch-cost= value, or provide default. */
3dd0df7f 2226 ix86_branch_cost = ix86_cost->branch_cost;
e075ae69 2227 if (ix86_branch_cost_string)
804a8ee0 2228 {
400500c4
RK
2229 i = atoi (ix86_branch_cost_string);
2230 if (i < 0 || i > 5)
2231 error ("-mbranch-cost=%d is not between 0 and 5", i);
2232 else
2233 ix86_branch_cost = i;
804a8ee0 2234 }
7dcbf659
JH
2235 if (ix86_section_threshold_string)
2236 {
2237 i = atoi (ix86_section_threshold_string);
2238 if (i < 0)
2239 error ("-mlarge-data-threshold=%d is negative", i);
2240 else
2241 ix86_section_threshold = i;
2242 }
804a8ee0 2243
f996902d
RH
2244 if (ix86_tls_dialect_string)
2245 {
2246 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2247 ix86_tls_dialect = TLS_DIALECT_GNU;
5bf5a10b
AO
2248 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2249 ix86_tls_dialect = TLS_DIALECT_GNU2;
f996902d
RH
2250 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2251 ix86_tls_dialect = TLS_DIALECT_SUN;
2252 else
2253 error ("bad value (%s) for -mtls-dialect= switch",
2254 ix86_tls_dialect_string);
2255 }
2256
577565f9
UB
2257 if (ix87_precision_string)
2258 {
2259 i = atoi (ix87_precision_string);
2260 if (i != 32 && i != 64 && i != 80)
2261 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2262 }
2263
d6b0b376
EC
2264 if (TARGET_64BIT)
2265 {
2266 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2267
2268 /* Enable by default the SSE and MMX builtins. Do allow the user to
2269 explicitly disable any of these. In particular, disabling SSE and
2270 MMX for kernel code is extremely useful. */
b26f6ed7 2271 if (!ix86_arch_specified)
d6b0b376
EC
2272 ix86_isa_flags
2273 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2274 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2275
2276 if (TARGET_RTD)
2277 warning (0, "-mrtd is ignored in 64bit mode");
2278 }
2279 else
2280 {
2281 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2282
b26f6ed7 2283 if (!ix86_arch_specified)
d6b0b376
EC
2284 ix86_isa_flags
2285 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2286
2287 /* i386 ABI does not specify red zone. It still makes sense to use it
2288 when programmer takes care to stack from being destroyed. */
2289 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2290 target_flags |= MASK_NO_RED_ZONE;
2291 }
2292
e9a25f70 2293 /* Keep nonleaf frame pointers. */
14c473b9
RS
2294 if (flag_omit_frame_pointer)
2295 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2296 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 2297 flag_omit_frame_pointer = 1;
e075ae69
RH
2298
2299 /* If we're doing fast math, we don't care about comparison order
2300 wrt NaNs. This lets us use a shorter comparison sequence. */
5a4171a0 2301 if (flag_finite_math_only)
e075ae69
RH
2302 target_flags &= ~MASK_IEEE_FP;
2303
30c99a84
RH
2304 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2305 since the insns won't need emulation. */
e39e8c36 2306 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
30c99a84
RH
2307 target_flags &= ~MASK_NO_FANCY_MATH_387;
2308
ba2baa55 2309 /* Likewise, if the target doesn't have a 387, or we've specified
0fa2e4df 2310 software floating point, don't use 387 inline intrinsics. */
ba2baa55
RS
2311 if (!TARGET_80387)
2312 target_flags |= MASK_NO_FANCY_MATH_387;
2313
837a8954 2314 /* Turn on SSE4.1 builtins for -msse4.2. */
3b8dd071 2315 if (TARGET_SSE4_2)
837a8954 2316 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3b8dd071 2317
9a5cee02
L
2318 /* Turn on SSSE3 builtins for -msse4.1. */
2319 if (TARGET_SSE4_1)
853a33f3 2320 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
9a5cee02 2321
b1875f52
L
2322 /* Turn on SSE3 builtins for -mssse3. */
2323 if (TARGET_SSSE3)
853a33f3 2324 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
b1875f52 2325
21efb4d4
HJ
2326 /* Turn on SSE3 builtins for -msse4a. */
2327 if (TARGET_SSE4A)
853a33f3 2328 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
21efb4d4 2329
9e200aaf
KC
2330 /* Turn on SSE2 builtins for -msse3. */
2331 if (TARGET_SSE3)
853a33f3 2332 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
22c7c85e
L
2333
2334 /* Turn on SSE builtins for -msse2. */
2335 if (TARGET_SSE2)
853a33f3 2336 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
22c7c85e 2337
a5370cf0
RH
2338 /* Turn on MMX builtins for -msse. */
2339 if (TARGET_SSE)
2340 {
853a33f3 2341 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
a5370cf0
RH
2342 x86_prefetch_sse = true;
2343 }
2344
2345 /* Turn on MMX builtins for 3Dnow. */
2346 if (TARGET_3DNOW)
853a33f3 2347 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
a5370cf0 2348
837a8954
UB
2349 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2350 if (TARGET_SSE4_2 || TARGET_ABM)
0a1c5e55 2351 x86_popcnt = true;
21efb4d4 2352
d0655f33 2353 /* Validate -mpreferred-stack-boundary= value, or provide default.
1395ea39
L
2354 The default of 128 bits is for Pentium III's SSE __m128. We can't
2355 change it because of optimize_size. Otherwise, we can't mix object
2356 files compiled with -Os and -On. */
2357 ix86_preferred_stack_boundary = 128;
d0655f33
JM
2358 if (ix86_preferred_stack_boundary_string)
2359 {
2360 i = atoi (ix86_preferred_stack_boundary_string);
2361 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2362 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2363 TARGET_64BIT ? 4 : 2);
2364 else
2365 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2366 }
2367
1f97667f
RG
2368 /* Accept -msseregparm only if at least SSE support is enabled. */
2369 if (TARGET_SSEREGPARM
2370 && ! TARGET_SSE)
2371 error ("-msseregparm used without SSE enabled");
2372
004d3859 2373 ix86_fpmath = TARGET_FPMATH_DEFAULT;
965f5423
JH
2374 if (ix86_fpmath_string != 0)
2375 {
2376 if (! strcmp (ix86_fpmath_string, "387"))
2377 ix86_fpmath = FPMATH_387;
2378 else if (! strcmp (ix86_fpmath_string, "sse"))
2379 {
2380 if (!TARGET_SSE)
2381 {
d4ee4d25 2382 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
2383 ix86_fpmath = FPMATH_387;
2384 }
2385 else
2386 ix86_fpmath = FPMATH_SSE;
2387 }
2388 else if (! strcmp (ix86_fpmath_string, "387,sse")
2389 || ! strcmp (ix86_fpmath_string, "sse,387"))
2390 {
2391 if (!TARGET_SSE)
2392 {
d4ee4d25 2393 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
2394 ix86_fpmath = FPMATH_387;
2395 }
2396 else if (!TARGET_80387)
2397 {
d4ee4d25 2398 warning (0, "387 instruction set disabled, using SSE arithmetics");
965f5423
JH
2399 ix86_fpmath = FPMATH_SSE;
2400 }
2401 else
9415ab7d 2402 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
965f5423 2403 }
fce5a9f2 2404 else
965f5423
JH
2405 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2406 }
14f73b5a 2407
de004e6d
JS
2408 /* If the i387 is disabled, then do not return values in it. */
2409 if (!TARGET_80387)
2410 target_flags &= ~MASK_FLOAT_RETURNS;
2411
e39e8c36 2412 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
9ef1b13a 2413 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
2414 && !optimize_size)
2415 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810 2416
d3073c70
RH
2417 /* ??? Unwind info is not correct around the CFG unless either a frame
2418 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2419 unwind info generation to be aware of the CFG and propagating states
2420 around edges. */
2421 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2422 || flag_exceptions || flag_non_call_exceptions)
2423 && flag_omit_frame_pointer
2424 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2425 {
2426 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2427 warning (0, "unwind tables currently require either a frame pointer "
2428 "or -maccumulate-outgoing-args for correctness");
2429 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2430 }
2431
80fd744f
RH
2432 /* For sane SSE instruction set generation we need fcomi instruction.
2433 It is safe to enable all CMOVE instructions. */
2434 if (TARGET_SSE)
2435 TARGET_CMOVE = 1;
2436
623fe810
RH
2437 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2438 {
2439 char *p;
2440 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2441 p = strchr (internal_label_prefix, 'X');
2442 internal_label_prefix_len = p - internal_label_prefix;
2443 *p = '\0';
2444 }
a5370cf0
RH
2445
2446 /* When scheduling description is not available, disable scheduler pass
2447 so it won't slow down the compilation and make x87 code slower. */
ad7b96a9
JH
2448 if (!TARGET_SCHEDULE)
2449 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
47eb5b32
ZD
2450
2451 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2452 set_param_value ("simultaneous-prefetches",
2453 ix86_cost->simultaneous_prefetches);
2454 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2455 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
46cb0441
ZD
2456 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2457 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2458 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2459 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
f5316dfe
MM
2460}
2461\f
2ed941ec
RH
2462/* Return true if this goes in large data/bss. */
2463
2464static bool
2465ix86_in_large_data_p (tree exp)
2466{
2467 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2468 return false;
2469
2470 /* Functions are never large data. */
2471 if (TREE_CODE (exp) == FUNCTION_DECL)
2472 return false;
2473
2474 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2475 {
2476 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2477 if (strcmp (section, ".ldata") == 0
2478 || strcmp (section, ".lbss") == 0)
2479 return true;
2480 return false;
2481 }
2482 else
2483 {
2484 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2485
2486 /* If this is an incomplete type with size 0, then we can't put it
2487 in data because it might be too big when completed. */
2488 if (!size || size > ix86_section_threshold)
2489 return true;
2490 }
2491
2492 return false;
2493}
2494
2495/* Switch to the appropriate section for output of DECL.
7dcbf659
JH
2496 DECL is either a `VAR_DECL' node or a constant of some sort.
2497 RELOC indicates whether forming the initial value of DECL requires
2498 link-time relocations. */
2499
2ed941ec
RH
2500static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2501 ATTRIBUTE_UNUSED;
2502
d6b5193b 2503static section *
7dcbf659 2504x86_64_elf_select_section (tree decl, int reloc,
d6b5193b 2505 unsigned HOST_WIDE_INT align)
7dcbf659
JH
2506{
2507 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2508 && ix86_in_large_data_p (decl))
2509 {
2510 const char *sname = NULL;
3b10d286 2511 unsigned int flags = SECTION_WRITE;
9b580a0b 2512 switch (categorize_decl_for_section (decl, reloc))
7dcbf659
JH
2513 {
2514 case SECCAT_DATA:
2515 sname = ".ldata";
2516 break;
2517 case SECCAT_DATA_REL:
2518 sname = ".ldata.rel";
2519 break;
2520 case SECCAT_DATA_REL_LOCAL:
2521 sname = ".ldata.rel.local";
2522 break;
2523 case SECCAT_DATA_REL_RO:
2524 sname = ".ldata.rel.ro";
2525 break;
2526 case SECCAT_DATA_REL_RO_LOCAL:
2527 sname = ".ldata.rel.ro.local";
2528 break;
2529 case SECCAT_BSS:
2530 sname = ".lbss";
3b10d286 2531 flags |= SECTION_BSS;
7dcbf659
JH
2532 break;
2533 case SECCAT_RODATA:
2534 case SECCAT_RODATA_MERGE_STR:
2535 case SECCAT_RODATA_MERGE_STR_INIT:
2536 case SECCAT_RODATA_MERGE_CONST:
2537 sname = ".lrodata";
3b10d286 2538 flags = 0;
7dcbf659
JH
2539 break;
2540 case SECCAT_SRODATA:
2541 case SECCAT_SDATA:
2542 case SECCAT_SBSS:
2543 gcc_unreachable ();
2544 case SECCAT_TEXT:
2545 case SECCAT_TDATA:
2546 case SECCAT_TBSS:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2549 break;
2550 }
2551 if (sname)
3b10d286
JJ
2552 {
2553 /* We might get called with string constants, but get_named_section
2554 doesn't like them as they are not DECLs. Also, we need to set
2555 flags in that case. */
2556 if (!DECL_P (decl))
2557 return get_section (sname, flags, NULL);
2558 return get_named_section (decl, sname, reloc);
2559 }
7dcbf659 2560 }
d6b5193b 2561 return default_elf_select_section (decl, reloc, align);
7dcbf659
JH
2562}
2563
2564/* Build up a unique section name, expressed as a
2565 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2566 RELOC indicates whether the initial value of EXP requires
2567 link-time relocations. */
2568
2ed941ec 2569static void ATTRIBUTE_UNUSED
7dcbf659
JH
2570x86_64_elf_unique_section (tree decl, int reloc)
2571{
2572 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2573 && ix86_in_large_data_p (decl))
2574 {
2575 const char *prefix = NULL;
2576 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2577 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2578
9b580a0b 2579 switch (categorize_decl_for_section (decl, reloc))
7dcbf659
JH
2580 {
2581 case SECCAT_DATA:
2582 case SECCAT_DATA_REL:
2583 case SECCAT_DATA_REL_LOCAL:
2584 case SECCAT_DATA_REL_RO:
2585 case SECCAT_DATA_REL_RO_LOCAL:
2586 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2587 break;
2588 case SECCAT_BSS:
2589 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2590 break;
2591 case SECCAT_RODATA:
2592 case SECCAT_RODATA_MERGE_STR:
2593 case SECCAT_RODATA_MERGE_STR_INIT:
2594 case SECCAT_RODATA_MERGE_CONST:
2595 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2596 break;
2597 case SECCAT_SRODATA:
2598 case SECCAT_SDATA:
2599 case SECCAT_SBSS:
2600 gcc_unreachable ();
2601 case SECCAT_TEXT:
2602 case SECCAT_TDATA:
2603 case SECCAT_TBSS:
2604 /* We don't split these for medium model. Place them into
2605 default sections and hope for best. */
2606 break;
2607 }
2608 if (prefix)
2609 {
2610 const char *name;
2611 size_t nlen, plen;
2612 char *string;
2613 plen = strlen (prefix);
2614
2615 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2616 name = targetm.strip_name_encoding (name);
2617 nlen = strlen (name);
2618
9415ab7d 2619 string = (char *) alloca (nlen + plen + 1);
7dcbf659
JH
2620 memcpy (string, prefix, plen);
2621 memcpy (string + plen, name, nlen + 1);
2622
2623 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2624 return;
2625 }
2626 }
2627 default_unique_section (decl, reloc);
2628}
2629
e81d37df 2630#ifdef COMMON_ASM_OP
7dcbf659
JH
2631/* This says how to output assembler code to declare an
2632 uninitialized external linkage data object.
2633
569b7f6a 2634 For medium model x86-64 we need to use .largecomm opcode for
7dcbf659
JH
2635 large objects. */
2636void
2637x86_elf_aligned_common (FILE *file,
2638 const char *name, unsigned HOST_WIDE_INT size,
2639 int align)
2640{
2641 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2642 && size > (unsigned int)ix86_section_threshold)
2643 fprintf (file, ".largecomm\t");
2644 else
2645 fprintf (file, "%s", COMMON_ASM_OP);
2646 assemble_name (file, name);
2647 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2648 size, align / BITS_PER_UNIT);
2649}
29c08d7c 2650#endif
2ed941ec 2651
7dcbf659
JH
2652/* Utility function for targets to use in implementing
2653 ASM_OUTPUT_ALIGNED_BSS. */
2654
2655void
2656x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2657 const char *name, unsigned HOST_WIDE_INT size,
2658 int align)
2659{
2660 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2661 && size > (unsigned int)ix86_section_threshold)
d6b5193b 2662 switch_to_section (get_named_section (decl, ".lbss", 0));
7dcbf659 2663 else
d6b5193b 2664 switch_to_section (bss_section);
7dcbf659
JH
2665 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2666#ifdef ASM_DECLARE_OBJECT_NAME
2667 last_assemble_variable_decl = decl;
2668 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2669#else
2670 /* Standard thing is just output label for the object. */
2671 ASM_OUTPUT_LABEL (file, name);
2672#endif /* ASM_DECLARE_OBJECT_NAME */
2673 ASM_OUTPUT_SKIP (file, size ? size : 1);
2674}
2675\f
32b5b1aa 2676void
b96a374d 2677optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 2678{
e9a25f70
JL
2679 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2680 make the problem with not enough registers even worse. */
32b5b1aa
SC
2681#ifdef INSN_SCHEDULING
2682 if (level > 1)
2683 flag_schedule_insns = 0;
2684#endif
55ba61f3 2685
2e3f0db6
DJ
2686 if (TARGET_MACHO)
2687 /* The Darwin libraries never set errno, so we might as well
2688 avoid calling them when that's the only reason we would. */
2689 flag_errno_math = 0;
2690
55ba61f3
JH
2691 /* The default values of these switches depend on the TARGET_64BIT
2692 that is not known at this moment. Mark these values with 2 and
2693 let user the to override these. In case there is no command line option
2694 specifying them, we will set the defaults in override_options. */
2695 if (optimize >= 1)
2696 flag_omit_frame_pointer = 2;
2697 flag_pcc_struct_return = 2;
2698 flag_asynchronous_unwind_tables = 2;
4f514514
JM
2699#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2700 SUBTARGET_OPTIMIZATION_OPTIONS;
2701#endif
32b5b1aa 2702}
b08de47e 2703\f
5fbf0217
EB
2704/* Decide whether we can make a sibling call to a function. DECL is the
2705 declaration of the function being targeted by the call and EXP is the
2706 CALL_EXPR representing the call. */
4977bab6
ZW
2707
2708static bool
b96a374d 2709ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6 2710{
f19e3a64 2711 tree func;
cb1119b7 2712 rtx a, b;
f19e3a64 2713
4977bab6
ZW
2714 /* If we are generating position-independent code, we cannot sibcall
2715 optimize any indirect call, or a direct call to a global function,
2716 as the PLT requires %ebx be live. */
010ef110 2717 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4977bab6
ZW
2718 return false;
2719
f19e3a64
JJ
2720 if (decl)
2721 func = decl;
2722 else
cb1119b7 2723 {
5039610b 2724 func = TREE_TYPE (CALL_EXPR_FN (exp));
cb1119b7
RG
2725 if (POINTER_TYPE_P (func))
2726 func = TREE_TYPE (func);
2727 }
f19e3a64 2728
cb1119b7
RG
2729 /* Check that the return value locations are the same. Like
2730 if we are returning floats on the 80387 register stack, we cannot
4977bab6 2731 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
2732 function that does or, conversely, from a function that does return
2733 a float to a function that doesn't; the necessary stack adjustment
cb1119b7 2734 would not be executed. This is also the place we notice
cac32996
RG
2735 differences in the return value ABI. Note that it is ok for one
2736 of the functions to have void return type as long as the return
2737 value of the other is passed in a register. */
cb1119b7
RG
2738 a = ix86_function_value (TREE_TYPE (exp), func, false);
2739 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2740 cfun->decl, false);
5d3018ce
RH
2741 if (STACK_REG_P (a) || STACK_REG_P (b))
2742 {
2743 if (!rtx_equal_p (a, b))
2744 return false;
2745 }
2746 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2747 ;
2748 else if (!rtx_equal_p (a, b))
4977bab6
ZW
2749 return false;
2750
2751 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 2752 register for the address of the target function. Make sure that all
4977bab6
ZW
2753 such registers are not used for passing parameters. */
2754 if (!decl && !TARGET_64BIT)
2755 {
e767b5be 2756 tree type;
4977bab6
ZW
2757
2758 /* We're looking at the CALL_EXPR, we need the type of the function. */
5039610b 2759 type = CALL_EXPR_FN (exp); /* pointer expression */
4977bab6
ZW
2760 type = TREE_TYPE (type); /* pointer type */
2761 type = TREE_TYPE (type); /* function type */
2762
e767b5be 2763 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
2764 {
2765 /* ??? Need to count the actual number of registers to be used,
2766 not the possible number of registers. Fix later. */
2767 return false;
2768 }
2769 }
2770
6cc37e7e 2771 /* Dllimport'd functions are also called indirectly. */
da489f73
RH
2772 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2773 && decl && DECL_DLLIMPORT_P (decl)
6cc37e7e
DS
2774 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2775 return false;
6cc37e7e 2776
150cdc9e
RH
2777 /* If we forced aligned the stack, then sibcalling would unalign the
2778 stack, which may break the called function. */
2779 if (cfun->machine->force_align_arg_pointer)
2780 return false;
2781
4977bab6
ZW
2782 /* Otherwise okay. That also includes certain types of indirect calls. */
2783 return true;
2784}
2785
fa283935
UB
2786/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2787 calling convention attributes;
91d231cb 2788 arguments as in struct attribute_spec.handler. */
b08de47e 2789
91d231cb 2790static tree
2f84b963
RG
2791ix86_handle_cconv_attribute (tree *node, tree name,
2792 tree args,
2793 int flags ATTRIBUTE_UNUSED,
2794 bool *no_add_attrs)
91d231cb
JM
2795{
2796 if (TREE_CODE (*node) != FUNCTION_TYPE
2797 && TREE_CODE (*node) != METHOD_TYPE
2798 && TREE_CODE (*node) != FIELD_DECL
2799 && TREE_CODE (*node) != TYPE_DECL)
2800 {
5c498b10 2801 warning (OPT_Wattributes, "%qs attribute only applies to functions",
91d231cb
JM
2802 IDENTIFIER_POINTER (name));
2803 *no_add_attrs = true;
2f84b963 2804 return NULL_TREE;
91d231cb 2805 }
2f84b963
RG
2806
2807 /* Can combine regparm with all attributes but fastcall. */
2808 if (is_attribute_p ("regparm", name))
91d231cb
JM
2809 {
2810 tree cst;
b08de47e 2811
2f84b963
RG
2812 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2813 {
2814 error ("fastcall and regparm attributes are not compatible");
2815 }
2816
91d231cb
JM
2817 cst = TREE_VALUE (args);
2818 if (TREE_CODE (cst) != INTEGER_CST)
2819 {
5c498b10
DD
2820 warning (OPT_Wattributes,
2821 "%qs attribute requires an integer constant argument",
91d231cb
JM
2822 IDENTIFIER_POINTER (name));
2823 *no_add_attrs = true;
2824 }
2825 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2826 {
5c498b10 2827 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
91d231cb
JM
2828 IDENTIFIER_POINTER (name), REGPARM_MAX);
2829 *no_add_attrs = true;
2830 }
e91f04de 2831
33932946
SH
2832 if (!TARGET_64BIT
2833 && lookup_attribute (ix86_force_align_arg_pointer_string,
2834 TYPE_ATTRIBUTES (*node))
2835 && compare_tree_int (cst, REGPARM_MAX-1))
2836 {
2837 error ("%s functions limited to %d register parameters",
2838 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2839 }
2840
2f84b963
RG
2841 return NULL_TREE;
2842 }
2843
2844 if (TARGET_64BIT)
2845 {
ccf8e764
RH
2846 /* Do not warn when emulating the MS ABI. */
2847 if (!TARGET_64BIT_MS_ABI)
2848 warning (OPT_Wattributes, "%qs attribute ignored",
2849 IDENTIFIER_POINTER (name));
2f84b963
RG
2850 *no_add_attrs = true;
2851 return NULL_TREE;
2852 }
2853
fa283935 2854 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2f84b963
RG
2855 if (is_attribute_p ("fastcall", name))
2856 {
2857 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2858 {
2859 error ("fastcall and cdecl attributes are not compatible");
2860 }
2861 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2862 {
2863 error ("fastcall and stdcall attributes are not compatible");
2864 }
2865 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2866 {
e767b5be
JH
2867 error ("fastcall and regparm attributes are not compatible");
2868 }
b08de47e
MM
2869 }
2870
fa283935
UB
2871 /* Can combine stdcall with fastcall (redundant), regparm and
2872 sseregparm. */
2f84b963
RG
2873 else if (is_attribute_p ("stdcall", name))
2874 {
2875 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2876 {
2877 error ("stdcall and cdecl attributes are not compatible");
2878 }
2879 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2880 {
2881 error ("stdcall and fastcall attributes are not compatible");
2882 }
2883 }
2884
fa283935 2885 /* Can combine cdecl with regparm and sseregparm. */
2f84b963
RG
2886 else if (is_attribute_p ("cdecl", name))
2887 {
2888 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2889 {
2890 error ("stdcall and cdecl attributes are not compatible");
2891 }
2892 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2893 {
2894 error ("fastcall and cdecl attributes are not compatible");
2895 }
2896 }
2897
fa283935 2898 /* Can combine sseregparm with all attributes. */
2f84b963 2899
91d231cb 2900 return NULL_TREE;
b08de47e
MM
2901}
2902
2903/* Return 0 if the attributes for two types are incompatible, 1 if they
2904 are compatible, and 2 if they are nearly compatible (which causes a
2905 warning to be generated). */
2906
8d8e52be 2907static int
b96a374d 2908ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 2909{
0f290768 2910 /* Check for mismatch of non-default calling convention. */
27c38fbe 2911 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
2912
2913 if (TREE_CODE (type1) != FUNCTION_TYPE)
2914 return 1;
2915
2f84b963
RG
2916 /* Check for mismatched fastcall/regparm types. */
2917 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2918 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2919 || (ix86_function_regparm (type1, NULL)
2920 != ix86_function_regparm (type2, NULL)))
2921 return 0;
2922
2923 /* Check for mismatched sseregparm types. */
2924 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2925 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
b96a374d 2926 return 0;
e91f04de 2927
afcfe58c 2928 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
2929 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2930 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac 2931 return 0;
2f84b963 2932
b08de47e
MM
2933 return 1;
2934}
b08de47e 2935\f
0fa2e4df 2936/* Return the regparm value for a function with the indicated TYPE and DECL.
e767b5be 2937 DECL may be NULL when calling function indirectly
839a4992 2938 or considering a libcall. */
483ab821
MM
2939
2940static int
e767b5be 2941ix86_function_regparm (tree type, tree decl)
483ab821
MM
2942{
2943 tree attr;
e767b5be 2944 int regparm = ix86_regparm;
483ab821 2945
ee2f65b4
RH
2946 if (TARGET_64BIT)
2947 return regparm;
2948
2949 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2950 if (attr)
2951 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2952
2953 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2954 return 2;
2955
2956 /* Use register calling convention for local functions when possible. */
ac97d816
UB
2957 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2958 && flag_unit_at_a_time && !profile_flag)
e767b5be 2959 {
ee2f65b4
RH
2960 struct cgraph_local_info *i = cgraph_local_info (decl);
2961 if (i && i->local)
e767b5be 2962 {
ee2f65b4
RH
2963 int local_regparm, globals = 0, regno;
2964 struct function *f;
e767b5be 2965
ee2f65b4
RH
2966 /* Make sure no regparm register is taken by a
2967 global register variable. */
2968 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2969 if (global_regs[local_regparm])
2970 break;
e767b5be 2971
ee2f65b4
RH
2972 /* We can't use regparm(3) for nested functions as these use
2973 static chain pointer in third argument. */
2974 if (local_regparm == 3
f2f0a960
HMC
2975 && (decl_function_context (decl)
2976 || ix86_force_align_arg_pointer)
ee2f65b4
RH
2977 && !DECL_NO_STATIC_CHAIN (decl))
2978 local_regparm = 2;
2979
2980 /* If the function realigns its stackpointer, the prologue will
2981 clobber %ecx. If we've already generated code for the callee,
2982 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2983 scanning the attributes for the self-realigning property. */
2984 f = DECL_STRUCT_FUNCTION (decl);
2985 if (local_regparm == 3
2986 && (f ? !!f->machine->force_align_arg_pointer
2987 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2988 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2989 local_regparm = 2;
2990
2991 /* Each global register variable increases register preassure,
2992 so the more global reg vars there are, the smaller regparm
2993 optimization use, unless requested by the user explicitly. */
2994 for (regno = 0; regno < 6; regno++)
2995 if (global_regs[regno])
2996 globals++;
2997 local_regparm
2998 = globals < local_regparm ? local_regparm - globals : 0;
2999
3000 if (local_regparm > regparm)
3001 regparm = local_regparm;
e767b5be
JH
3002 }
3003 }
ee2f65b4 3004
e767b5be 3005 return regparm;
483ab821
MM
3006}
3007
3e0a5abd
UB
3008/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3009 DFmode (2) arguments in SSE registers for a function with the
3010 indicated TYPE and DECL. DECL may be NULL when calling function
3011 indirectly or considering a libcall. Otherwise return 0. */
2f84b963
RG
3012
3013static int
3014ix86_function_sseregparm (tree type, tree decl)
3015{
ee2f65b4
RH
3016 gcc_assert (!TARGET_64BIT);
3017
2f84b963
RG
3018 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3019 by the sseregparm attribute. */
1f97667f 3020 if (TARGET_SSEREGPARM
ee2f65b4 3021 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2f84b963
RG
3022 {
3023 if (!TARGET_SSE)
3024 {
3025 if (decl)
3026 error ("Calling %qD with attribute sseregparm without "
3027 "SSE/SSE2 enabled", decl);
3028 else
3029 error ("Calling %qT with attribute sseregparm without "
3030 "SSE/SSE2 enabled", type);
3031 return 0;
3032 }
3033
3034 return 2;
3035 }
3036
56829cae 3037 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
ee2f65b4
RH
3038 (and DFmode for SSE2) arguments in SSE registers. */
3039 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2f84b963
RG
3040 {
3041 struct cgraph_local_info *i = cgraph_local_info (decl);
3042 if (i && i->local)
3043 return TARGET_SSE2 ? 2 : 1;
3044 }
3045
3046 return 0;
3047}
3048
f676971a 3049/* Return true if EAX is live at the start of the function. Used by
fe9f516f
RH
3050 ix86_expand_prologue to determine if we need special help before
3051 calling allocate_stack_worker. */
3052
3053static bool
3054ix86_eax_live_at_start_p (void)
3055{
3056 /* Cheat. Don't bother working forward from ix86_function_regparm
3057 to the function type to whether an actual argument is located in
3058 eax. Instead just look at cfg info, which is still close enough
3059 to correct at this point. This gives false positives for broken
3060 functions that might use uninitialized data that happens to be
3061 allocated in eax, but who cares? */
eaf7f7e7 3062 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
fe9f516f
RH
3063}
3064
ee2f65b4
RH
3065/* Return true if TYPE has a variable argument list. */
3066
3067static bool
3068type_has_variadic_args_p (tree type)
3069{
c7a2139b 3070 tree n, t = TYPE_ARG_TYPES (type);
ee2f65b4 3071
c7a2139b
RH
3072 if (t == NULL)
3073 return false;
3074
3075 while ((n = TREE_CHAIN (t)) != NULL)
3076 t = n;
3077
3078 return TREE_VALUE (t) != void_type_node;
ee2f65b4
RH
3079}
3080
b08de47e
MM
3081/* Value is the number of bytes of arguments automatically
3082 popped when returning from a subroutine call.
3083 FUNDECL is the declaration node of the function (as a tree),
3084 FUNTYPE is the data type of the function (as a tree),
3085 or for a library call it is an identifier node for the subroutine name.
3086 SIZE is the number of bytes of arguments passed on the stack.
3087
3088 On the 80386, the RTD insn may be used to pop them if the number
3089 of args is fixed, but if the number is variable then the caller
3090 must pop them all. RTD can't be used for library calls now
3091 because the library is compiled with the Unix compiler.
3092 Use of RTD is a selectable option, since it is incompatible with
3093 standard Unix calling sequences. If the option is not selected,
3094 the caller must always pop the args.
3095
3096 The attribute stdcall is equivalent to RTD on a per module basis. */
3097
3098int
b96a374d 3099ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 3100{
ee2f65b4
RH
3101 int rtd;
3102
3103 /* None of the 64-bit ABIs pop arguments. */
3104 if (TARGET_64BIT)
3105 return 0;
3106
3107 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 3108
43f3a59d 3109 /* Cdecl functions override -mrtd, and never pop the stack. */
ee2f65b4
RH
3110 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3111 {
3112 /* Stdcall and fastcall functions will pop the stack if not
3113 variable args. */
3114 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3115 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3116 rtd = 1;
3117
3118 if (rtd && ! type_has_variadic_args_p (funtype))
3119 return size;
3120 }
79325812 3121
232b8f52 3122 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 3123 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
61fec9ff 3124 && !KEEP_AGGREGATE_RETURN_POINTER)
232b8f52 3125 {
e767b5be 3126 int nregs = ix86_function_regparm (funtype, fundecl);
ee2f65b4 3127 if (nregs == 0)
232b8f52
JJ
3128 return GET_MODE_SIZE (Pmode);
3129 }
3130
3131 return 0;
b08de47e 3132}
b08de47e
MM
3133\f
3134/* Argument support functions. */
3135
53c17031
JH
3136/* Return true when register may be used to pass function parameters. */
3137bool
b96a374d 3138ix86_function_arg_regno_p (int regno)
53c17031
JH
3139{
3140 int i;
ccf8e764 3141 const int *parm_regs;
ee2f65b4 3142
53c17031 3143 if (!TARGET_64BIT)
88c6f101
HMC
3144 {
3145 if (TARGET_MACHO)
3146 return (regno < REGPARM_MAX
3147 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3148 else
3149 return (regno < REGPARM_MAX
3150 || (TARGET_MMX && MMX_REGNO_P (regno)
3151 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3152 || (TARGET_SSE && SSE_REGNO_P (regno)
3153 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3154 }
3155
3156 if (TARGET_MACHO)
3157 {
3158 if (SSE_REGNO_P (regno) && TARGET_SSE)
3159 return true;
3160 }
3161 else
3162 {
3163 if (TARGET_SSE && SSE_REGNO_P (regno)
3164 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3165 return true;
3166 }
ee2f65b4 3167
53c17031 3168 /* RAX is used as hidden argument to va_arg functions. */
ccf8e764 3169 if (!TARGET_64BIT_MS_ABI && regno == 0)
53c17031 3170 return true;
ee2f65b4 3171
ccf8e764
RH
3172 if (TARGET_64BIT_MS_ABI)
3173 parm_regs = x86_64_ms_abi_int_parameter_registers;
3174 else
3175 parm_regs = x86_64_int_parameter_registers;
53c17031 3176 for (i = 0; i < REGPARM_MAX; i++)
ccf8e764 3177 if (regno == parm_regs[i])
53c17031
JH
3178 return true;
3179 return false;
3180}
3181
fe984136
RH
3182/* Return if we do not know how to pass TYPE solely in registers. */
3183
3184static bool
3185ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3186{
3187 if (must_pass_in_stack_var_size_or_pad (mode, type))
3188 return true;
dcbca208
RH
3189
3190 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3191 The layout_type routine is crafty and tries to trick us into passing
3192 currently unsupported vector types on the stack by using TImode. */
3193 return (!TARGET_64BIT && mode == TImode
3194 && type && TREE_CODE (type) != VECTOR_TYPE);
fe984136
RH
3195}
3196
b08de47e
MM
3197/* Initialize a variable CUM of type CUMULATIVE_ARGS
3198 for a call to a function whose data type is FNTYPE.
3199 For a library call, FNTYPE is 0. */
3200
3201void
b96a374d
AJ
3202init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3203 tree fntype, /* tree ptr for function decl */
3204 rtx libname, /* SYMBOL_REF of library name or 0 */
3205 tree fndecl)
b08de47e 3206{
ee2f65b4 3207 memset (cum, 0, sizeof (*cum));
b08de47e
MM
3208
3209 /* Set up the number of registers to use for passing arguments. */
2f84b963 3210 cum->nregs = ix86_regparm;
78fbfc4b
JB
3211 if (TARGET_SSE)
3212 cum->sse_nregs = SSE_REGPARM_MAX;
3213 if (TARGET_MMX)
3214 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
3215 cum->warn_sse = true;
3216 cum->warn_mmx = true;
f8024378
L
3217 cum->maybe_vaarg = (fntype
3218 ? (!TYPE_ARG_TYPES (fntype)
3219 || type_has_variadic_args_p (fntype))
3220 : !libname);
b08de47e 3221
ee2f65b4 3222 if (!TARGET_64BIT)
e91f04de 3223 {
ee2f65b4
RH
3224 /* If there are variable arguments, then we won't pass anything
3225 in registers in 32-bit mode. */
3226 if (cum->maybe_vaarg)
e91f04de 3227 {
ee2f65b4
RH
3228 cum->nregs = 0;
3229 cum->sse_nregs = 0;
3230 cum->mmx_nregs = 0;
3231 cum->warn_sse = 0;
3232 cum->warn_mmx = 0;
3233 return;
e91f04de 3234 }
2f84b963 3235
ee2f65b4
RH
3236 /* Use ecx and edx registers if function has fastcall attribute,
3237 else look for regparm information. */
3238 if (fntype)
b08de47e 3239 {
ee2f65b4 3240 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
53c17031 3241 {
ee2f65b4
RH
3242 cum->nregs = 2;
3243 cum->fastcall = 1;
53c17031 3244 }
ee2f65b4
RH
3245 else
3246 cum->nregs = ix86_function_regparm (fntype, fndecl);
b08de47e 3247 }
f19e3a64 3248
ee2f65b4
RH
3249 /* Set up the number of SSE registers used for passing SFmode
3250 and DFmode arguments. Warn for mismatching ABI. */
3251 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3252 }
b08de47e
MM
3253}
3254
6c4ccfd8
RH
3255/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3256 But in the case of vector types, it is some vector mode.
3257
3258 When we have only some of our vector isa extensions enabled, then there
3259 are some modes for which vector_mode_supported_p is false. For these
3260 modes, the generic vector support in gcc will choose some non-vector mode
5656a184 3261 in order to implement the type. By computing the natural mode, we'll
6c4ccfd8
RH
3262 select the proper ABI location for the operand and not depend on whatever
3263 the middle-end decides to do with these vector types. */
3264
3265static enum machine_mode
3266type_natural_mode (tree type)
3267{
3268 enum machine_mode mode = TYPE_MODE (type);
3269
3270 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3271 {
3272 HOST_WIDE_INT size = int_size_in_bytes (type);
3273 if ((size == 8 || size == 16)
3274 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3275 && TYPE_VECTOR_SUBPARTS (type) > 1)
3276 {
3277 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3278
3279 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3280 mode = MIN_MODE_VECTOR_FLOAT;
3281 else
3282 mode = MIN_MODE_VECTOR_INT;
3283
3284 /* Get the mode which has this inner mode and number of units. */
3285 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3286 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3287 && GET_MODE_INNER (mode) == innermode)
3288 return mode;
3289
d0396b79 3290 gcc_unreachable ();
6c4ccfd8
RH
3291 }
3292 }
3293
3294 return mode;
3295}
3296
3297/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3298 this may not agree with the mode that the type system has chosen for the
3299 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3300 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3301
3302static rtx
3303gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3304 unsigned int regno)
3305{
3306 rtx tmp;
3307
3308 if (orig_mode != BLKmode)
3309 tmp = gen_rtx_REG (orig_mode, regno);
3310 else
3311 {
3312 tmp = gen_rtx_REG (mode, regno);
3313 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3314 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3315 }
3316
3317 return tmp;
3318}
3319
d1f87653 3320/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 3321 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
3322 class and assign registers accordingly. */
3323
3324/* Return the union class of CLASS1 and CLASS2.
3325 See the x86-64 PS ABI for details. */
3326
3327static enum x86_64_reg_class
b96a374d 3328merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
3329{
3330 /* Rule #1: If both classes are equal, this is the resulting class. */
3331 if (class1 == class2)
3332 return class1;
3333
3334 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3335 the other class. */
3336 if (class1 == X86_64_NO_CLASS)
3337 return class2;
3338 if (class2 == X86_64_NO_CLASS)
3339 return class1;
3340
3341 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3342 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3343 return X86_64_MEMORY_CLASS;
3344
3345 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3346 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3347 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3348 return X86_64_INTEGERSI_CLASS;
3349 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3350 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3351 return X86_64_INTEGER_CLASS;
3352
499accd7
JB
3353 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3354 MEMORY is used. */
3355 if (class1 == X86_64_X87_CLASS
3356 || class1 == X86_64_X87UP_CLASS
3357 || class1 == X86_64_COMPLEX_X87_CLASS
3358 || class2 == X86_64_X87_CLASS
3359 || class2 == X86_64_X87UP_CLASS
3360 || class2 == X86_64_COMPLEX_X87_CLASS)
53c17031
JH
3361 return X86_64_MEMORY_CLASS;
3362
3363 /* Rule #6: Otherwise class SSE is used. */
3364 return X86_64_SSE_CLASS;
3365}
3366
3367/* Classify the argument of type TYPE and mode MODE.
3368 CLASSES will be filled by the register class used to pass each word
3369 of the operand. The number of words is returned. In case the parameter
3370 should be passed in memory, 0 is returned. As a special case for zero
3371 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3372
3373 BIT_OFFSET is used internally for handling records and specifies offset
3374 of the offset in bits modulo 256 to avoid overflow cases.
3375
3376 See the x86-64 PS ABI for details.
3377*/
3378
3379static int
b96a374d
AJ
3380classify_argument (enum machine_mode mode, tree type,
3381 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 3382{
296e4ae8 3383 HOST_WIDE_INT bytes =
53c17031 3384 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 3385 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 3386
c60ee6f5
JH
3387 /* Variable sized entities are always passed/returned in memory. */
3388 if (bytes < 0)
3389 return 0;
3390
dafc5b82 3391 if (mode != VOIDmode
fe984136 3392 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
3393 return 0;
3394
53c17031
JH
3395 if (type && AGGREGATE_TYPE_P (type))
3396 {
3397 int i;
3398 tree field;
3399 enum x86_64_reg_class subclasses[MAX_CLASSES];
3400
3401 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3402 if (bytes > 16)
3403 return 0;
3404
3405 for (i = 0; i < words; i++)
3406 classes[i] = X86_64_NO_CLASS;
3407
3408 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3409 signalize memory class, so handle it as special case. */
3410 if (!words)
3411 {
3412 classes[0] = X86_64_NO_CLASS;
3413 return 1;
3414 }
3415
3416 /* Classify each field of record and merge classes. */
d0396b79 3417 switch (TREE_CODE (type))
53c17031 3418 {
d0396b79 3419 case RECORD_TYPE:
43f3a59d 3420 /* And now merge the fields of structure. */
53c17031
JH
3421 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3422 {
3423 if (TREE_CODE (field) == FIELD_DECL)
3424 {
3425 int num;
3426
f7360901
VR
3427 if (TREE_TYPE (field) == error_mark_node)
3428 continue;
3429
53c17031
JH
3430 /* Bitfields are always classified as integer. Handle them
3431 early, since later code would consider them to be
3432 misaligned integers. */
3433 if (DECL_BIT_FIELD (field))
3434 {
9286af97
JH
3435 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3436 i < ((int_bit_position (field) + (bit_offset % 64))
53c17031 3437 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 3438 + 63) / 8 / 8; i++)
53c17031
JH
3439 classes[i] =
3440 merge_classes (X86_64_INTEGER_CLASS,
3441 classes[i]);
3442 }
3443 else
3444 {
3445 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3446 TREE_TYPE (field), subclasses,
3447 (int_bit_position (field)
3448 + bit_offset) % 256);
3449 if (!num)
3450 return 0;
3451 for (i = 0; i < num; i++)
3452 {
3453 int pos =
db01f480 3454 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
3455 classes[i + pos] =
3456 merge_classes (subclasses[i], classes[i + pos]);
3457 }
3458 }
3459 }
3460 }
d0396b79 3461 break;
91ea38f9 3462
d0396b79
NS
3463 case ARRAY_TYPE:
3464 /* Arrays are handled as small records. */
3465 {
3466 int num;
3467 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3468 TREE_TYPE (type), subclasses, bit_offset);
3469 if (!num)
3470 return 0;
91ea38f9 3471
d0396b79
NS
3472 /* The partial classes are now full classes. */
3473 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3474 subclasses[0] = X86_64_SSE_CLASS;
3475 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3476 subclasses[0] = X86_64_INTEGER_CLASS;
5656a184 3477
d0396b79
NS
3478 for (i = 0; i < words; i++)
3479 classes[i] = subclasses[i % num];
5656a184 3480
d0396b79
NS
3481 break;
3482 }
3483 case UNION_TYPE:
3484 case QUAL_UNION_TYPE:
3485 /* Unions are similar to RECORD_TYPE but offset is always 0.
3486 */
53c17031
JH
3487 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3488 {
3489 if (TREE_CODE (field) == FIELD_DECL)
3490 {
3491 int num;
118ed72a
VR
3492
3493 if (TREE_TYPE (field) == error_mark_node)
3494 continue;
3495
53c17031
JH
3496 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3497 TREE_TYPE (field), subclasses,
3498 bit_offset);
3499 if (!num)
3500 return 0;
3501 for (i = 0; i < num; i++)
3502 classes[i] = merge_classes (subclasses[i], classes[i]);
3503 }
3504 }
d0396b79
NS
3505 break;
3506
3507 default:
3508 gcc_unreachable ();
53c17031 3509 }
53c17031
JH
3510
3511 /* Final merger cleanup. */
3512 for (i = 0; i < words; i++)
3513 {
3514 /* If one class is MEMORY, everything should be passed in
3515 memory. */
3516 if (classes[i] == X86_64_MEMORY_CLASS)
3517 return 0;
3518
d6a7951f 3519 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
3520 X86_64_SSE_CLASS. */
3521 if (classes[i] == X86_64_SSEUP_CLASS
3522 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3523 classes[i] = X86_64_SSE_CLASS;
3524
d6a7951f 3525 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
3526 if (classes[i] == X86_64_X87UP_CLASS
3527 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3528 classes[i] = X86_64_SSE_CLASS;
3529 }
3530 return words;
3531 }
3532
3533 /* Compute alignment needed. We align all types to natural boundaries with
3534 exception of XFmode that is aligned to 64bits. */
3535 if (mode != VOIDmode && mode != BLKmode)
3536 {
3537 int mode_alignment = GET_MODE_BITSIZE (mode);
3538
3539 if (mode == XFmode)
3540 mode_alignment = 128;
3541 else if (mode == XCmode)
3542 mode_alignment = 256;
2c6b27c3
JH
3543 if (COMPLEX_MODE_P (mode))
3544 mode_alignment /= 2;
f5143c46 3545 /* Misaligned fields are always returned in memory. */
53c17031
JH
3546 if (bit_offset % mode_alignment)
3547 return 0;
3548 }
3549
9e9fb0ce
JB
3550 /* for V1xx modes, just use the base mode */
3551 if (VECTOR_MODE_P (mode)
3552 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3553 mode = GET_MODE_INNER (mode);
3554
53c17031
JH
3555 /* Classification of atomic types. */
3556 switch (mode)
3557 {
a81083b2
BE
3558 case SDmode:
3559 case DDmode:
3560 classes[0] = X86_64_SSE_CLASS;
3561 return 1;
3562 case TDmode:
3563 classes[0] = X86_64_SSE_CLASS;
3564 classes[1] = X86_64_SSEUP_CLASS;
3565 return 2;
53c17031
JH
3566 case DImode:
3567 case SImode:
3568 case HImode:
3569 case QImode:
3570 case CSImode:
3571 case CHImode:
3572 case CQImode:
3573 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3574 classes[0] = X86_64_INTEGERSI_CLASS;
3575 else
3576 classes[0] = X86_64_INTEGER_CLASS;
3577 return 1;
3578 case CDImode:
3579 case TImode:
3580 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3581 return 2;
3582 case CTImode:
9e9fb0ce 3583 return 0;
53c17031
JH
3584 case SFmode:
3585 if (!(bit_offset % 64))
3586 classes[0] = X86_64_SSESF_CLASS;
3587 else
3588 classes[0] = X86_64_SSE_CLASS;
3589 return 1;
3590 case DFmode:
3591 classes[0] = X86_64_SSEDF_CLASS;
3592 return 1;
f8a1ebc6 3593 case XFmode:
53c17031
JH
3594 classes[0] = X86_64_X87_CLASS;
3595 classes[1] = X86_64_X87UP_CLASS;
3596 return 2;
f8a1ebc6 3597 case TFmode:
9e9fb0ce
JB
3598 classes[0] = X86_64_SSE_CLASS;
3599 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
3600 return 2;
3601 case SCmode:
3602 classes[0] = X86_64_SSE_CLASS;
3603 return 1;
9e9fb0ce
JB
3604 case DCmode:
3605 classes[0] = X86_64_SSEDF_CLASS;
3606 classes[1] = X86_64_SSEDF_CLASS;
3607 return 2;
3608 case XCmode:
499accd7
JB
3609 classes[0] = X86_64_COMPLEX_X87_CLASS;
3610 return 1;
9e9fb0ce 3611 case TCmode:
499accd7 3612 /* This modes is larger than 16 bytes. */
9e9fb0ce 3613 return 0;
e95d6b23
JH
3614 case V4SFmode:
3615 case V4SImode:
495333a6
JH
3616 case V16QImode:
3617 case V8HImode:
3618 case V2DFmode:
3619 case V2DImode:
e95d6b23
JH
3620 classes[0] = X86_64_SSE_CLASS;
3621 classes[1] = X86_64_SSEUP_CLASS;
3622 return 2;
3623 case V2SFmode:
3624 case V2SImode:
3625 case V4HImode:
3626 case V8QImode:
9e9fb0ce
JB
3627 classes[0] = X86_64_SSE_CLASS;
3628 return 1;
53c17031 3629 case BLKmode:
e95d6b23 3630 case VOIDmode:
53c17031
JH
3631 return 0;
3632 default:
d0396b79 3633 gcc_assert (VECTOR_MODE_P (mode));
5656a184 3634
d0396b79
NS
3635 if (bytes > 16)
3636 return 0;
5656a184 3637
d0396b79 3638 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5656a184 3639
d0396b79
NS
3640 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3641 classes[0] = X86_64_INTEGERSI_CLASS;
3642 else
3643 classes[0] = X86_64_INTEGER_CLASS;
3644 classes[1] = X86_64_INTEGER_CLASS;
3645 return 1 + (bytes > 8);
53c17031
JH
3646 }
3647}
3648
3649/* Examine the argument and return set number of register required in each
f5143c46 3650 class. Return 0 iff parameter should be passed in memory. */
53c17031 3651static int
b96a374d
AJ
3652examine_argument (enum machine_mode mode, tree type, int in_return,
3653 int *int_nregs, int *sse_nregs)
53c17031 3654{
9415ab7d
TN
3655 enum x86_64_reg_class regclass[MAX_CLASSES];
3656 int n = classify_argument (mode, type, regclass, 0);
53c17031
JH
3657
3658 *int_nregs = 0;
3659 *sse_nregs = 0;
3660 if (!n)
3661 return 0;
3662 for (n--; n >= 0; n--)
9415ab7d 3663 switch (regclass[n])
53c17031
JH
3664 {
3665 case X86_64_INTEGER_CLASS:
3666 case X86_64_INTEGERSI_CLASS:
3667 (*int_nregs)++;
3668 break;
3669 case X86_64_SSE_CLASS:
3670 case X86_64_SSESF_CLASS:
3671 case X86_64_SSEDF_CLASS:
3672 (*sse_nregs)++;
3673 break;
3674 case X86_64_NO_CLASS:
3675 case X86_64_SSEUP_CLASS:
3676 break;
3677 case X86_64_X87_CLASS:
3678 case X86_64_X87UP_CLASS:
3679 if (!in_return)
3680 return 0;
3681 break;
499accd7
JB
3682 case X86_64_COMPLEX_X87_CLASS:
3683 return in_return ? 2 : 0;
53c17031 3684 case X86_64_MEMORY_CLASS:
d0396b79 3685 gcc_unreachable ();
53c17031
JH
3686 }
3687 return 1;
3688}
6c4ccfd8 3689
53c17031
JH
3690/* Construct container for the argument used by GCC interface. See
3691 FUNCTION_ARG for the detailed description. */
6c4ccfd8 3692
53c17031 3693static rtx
6c4ccfd8
RH
3694construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3695 tree type, int in_return, int nintregs, int nsseregs,
3696 const int *intreg, int sse_regno)
53c17031 3697{
94e76332
RS
3698 /* The following variables hold the static issued_error state. */
3699 static bool issued_sse_arg_error;
3700 static bool issued_sse_ret_error;
3701 static bool issued_x87_ret_error;
3702
53c17031
JH
3703 enum machine_mode tmpmode;
3704 int bytes =
3705 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
9415ab7d 3706 enum x86_64_reg_class regclass[MAX_CLASSES];
53c17031
JH
3707 int n;
3708 int i;
3709 int nexps = 0;
3710 int needed_sseregs, needed_intregs;
3711 rtx exp[MAX_CLASSES];
3712 rtx ret;
3713
9415ab7d 3714 n = classify_argument (mode, type, regclass, 0);
53c17031
JH
3715 if (!n)
3716 return NULL;
6c4ccfd8
RH
3717 if (!examine_argument (mode, type, in_return, &needed_intregs,
3718 &needed_sseregs))
53c17031
JH
3719 return NULL;
3720 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3721 return NULL;
3722
a5370cf0
RH
3723 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3724 some less clueful developer tries to use floating-point anyway. */
3725 if (needed_sseregs && !TARGET_SSE)
3726 {
94e76332 3727 if (in_return)
a5370cf0 3728 {
94e76332
RS
3729 if (!issued_sse_ret_error)
3730 {
3731 error ("SSE register return with SSE disabled");
3732 issued_sse_ret_error = true;
3733 }
3734 }
3735 else if (!issued_sse_arg_error)
3736 {
3737 error ("SSE register argument with SSE disabled");
3738 issued_sse_arg_error = true;
a5370cf0
RH
3739 }
3740 return NULL;
3741 }
3742
94e76332
RS
3743 /* Likewise, error if the ABI requires us to return values in the
3744 x87 registers and the user specified -mno-80387. */
3745 if (!TARGET_80387 && in_return)
3746 for (i = 0; i < n; i++)
9415ab7d
TN
3747 if (regclass[i] == X86_64_X87_CLASS
3748 || regclass[i] == X86_64_X87UP_CLASS
3749 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
94e76332
RS
3750 {
3751 if (!issued_x87_ret_error)
3752 {
3753 error ("x87 register return with x87 disabled");
3754 issued_x87_ret_error = true;
3755 }
3756 return NULL;
3757 }
3758
53c17031
JH
3759 /* First construct simple cases. Avoid SCmode, since we want to use
3760 single register to pass this type. */
3761 if (n == 1 && mode != SCmode)
9415ab7d 3762 switch (regclass[0])
53c17031
JH
3763 {
3764 case X86_64_INTEGER_CLASS:
3765 case X86_64_INTEGERSI_CLASS:
3766 return gen_rtx_REG (mode, intreg[0]);
3767 case X86_64_SSE_CLASS:
3768 case X86_64_SSESF_CLASS:
3769 case X86_64_SSEDF_CLASS:
6c4ccfd8 3770 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
53c17031 3771 case X86_64_X87_CLASS:
499accd7 3772 case X86_64_COMPLEX_X87_CLASS:
53c17031
JH
3773 return gen_rtx_REG (mode, FIRST_STACK_REG);
3774 case X86_64_NO_CLASS:
3775 /* Zero sized array, struct or class. */
3776 return NULL;
3777 default:
d0396b79 3778 gcc_unreachable ();
53c17031 3779 }
9415ab7d
TN
3780 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3781 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
e95d6b23 3782 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
ee2f65b4 3783
53c17031 3784 if (n == 2
9415ab7d 3785 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
f8a1ebc6 3786 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
9415ab7d
TN
3787 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3788 && regclass[1] == X86_64_INTEGER_CLASS
f8a1ebc6 3789 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
3790 && intreg[0] + 1 == intreg[1])
3791 return gen_rtx_REG (mode, intreg[0]);
53c17031
JH
3792
3793 /* Otherwise figure out the entries of the PARALLEL. */
3794 for (i = 0; i < n; i++)
3795 {
9415ab7d 3796 switch (regclass[i])
53c17031
JH
3797 {
3798 case X86_64_NO_CLASS:
3799 break;
3800 case X86_64_INTEGER_CLASS:
3801 case X86_64_INTEGERSI_CLASS:
d1f87653 3802 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
3803 if (i * 8 + 8 > bytes)
3804 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
9415ab7d 3805 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
53c17031
JH
3806 tmpmode = SImode;
3807 else
3808 tmpmode = DImode;
3809 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3810 if (tmpmode == BLKmode)
3811 tmpmode = DImode;
3812 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3813 gen_rtx_REG (tmpmode, *intreg),
3814 GEN_INT (i*8));
3815 intreg++;
3816 break;
3817 case X86_64_SSESF_CLASS:
3818 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3819 gen_rtx_REG (SFmode,
3820 SSE_REGNO (sse_regno)),
3821 GEN_INT (i*8));
3822 sse_regno++;
3823 break;
3824 case X86_64_SSEDF_CLASS:
3825 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3826 gen_rtx_REG (DFmode,
3827 SSE_REGNO (sse_regno)),
3828 GEN_INT (i*8));
3829 sse_regno++;
3830 break;
3831 case X86_64_SSE_CLASS:
9415ab7d 3832 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
12f5c45e 3833 tmpmode = TImode;
53c17031
JH
3834 else
3835 tmpmode = DImode;
3836 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3837 gen_rtx_REG (tmpmode,
3838 SSE_REGNO (sse_regno)),
3839 GEN_INT (i*8));
12f5c45e
JH
3840 if (tmpmode == TImode)
3841 i++;
53c17031
JH
3842 sse_regno++;
3843 break;
3844 default:
d0396b79 3845 gcc_unreachable ();
53c17031
JH
3846 }
3847 }
1b803355
JJ
3848
3849 /* Empty aligned struct, union or class. */
3850 if (nexps == 0)
3851 return NULL;
3852
53c17031
JH
3853 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3854 for (i = 0; i < nexps; i++)
3855 XVECEXP (ret, 0, i) = exp [i];
3856 return ret;
3857}
3858
ee2f65b4
RH
3859/* Update the data in CUM to advance over an argument of mode MODE
3860 and data type TYPE. (TYPE is null for libcalls where that information
3861 may not be available.) */
b08de47e 3862
ee2f65b4
RH
3863static void
3864function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3865 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
b08de47e 3866{
ee2f65b4
RH
3867 switch (mode)
3868 {
3869 default:
3870 break;
b08de47e 3871
ee2f65b4
RH
3872 case BLKmode:
3873 if (bytes < 0)
3874 break;
3875 /* FALLTHRU */
b3a1ca49 3876
ee2f65b4
RH
3877 case DImode:
3878 case SImode:
3879 case HImode:
3880 case QImode:
3881 cum->words += words;
3882 cum->nregs -= words;
3883 cum->regno += words;
b3a1ca49 3884
ee2f65b4 3885 if (cum->nregs <= 0)
82a127a9 3886 {
ee2f65b4
RH
3887 cum->nregs = 0;
3888 cum->regno = 0;
82a127a9 3889 }
ee2f65b4 3890 break;
b3a1ca49 3891
ee2f65b4
RH
3892 case DFmode:
3893 if (cum->float_in_sse < 2)
3894 break;
3895 case SFmode:
3896 if (cum->float_in_sse < 1)
3897 break;
3898 /* FALLTHRU */
f19e3a64 3899
ee2f65b4
RH
3900 case TImode:
3901 case V16QImode:
3902 case V8HImode:
3903 case V4SImode:
3904 case V2DImode:
3905 case V4SFmode:
3906 case V2DFmode:
3907 if (!type || !AGGREGATE_TYPE_P (type))
3908 {
3909 cum->sse_words += words;
3910 cum->sse_nregs -= 1;
3911 cum->sse_regno += 1;
3912 if (cum->sse_nregs <= 0)
b3a1ca49 3913 {
ee2f65b4
RH
3914 cum->sse_nregs = 0;
3915 cum->sse_regno = 0;
b3a1ca49 3916 }
ee2f65b4
RH
3917 }
3918 break;
b3a1ca49 3919
ee2f65b4
RH
3920 case V8QImode:
3921 case V4HImode:
3922 case V2SImode:
3923 case V2SFmode:
3924 if (!type || !AGGREGATE_TYPE_P (type))
3925 {
3926 cum->mmx_words += words;
3927 cum->mmx_nregs -= 1;
3928 cum->mmx_regno += 1;
3929 if (cum->mmx_nregs <= 0)
b3a1ca49 3930 {
ee2f65b4
RH
3931 cum->mmx_nregs = 0;
3932 cum->mmx_regno = 0;
b3a1ca49 3933 }
82a127a9 3934 }
ee2f65b4 3935 break;
82a127a9 3936 }
b08de47e
MM
3937}
3938
ee2f65b4
RH
3939static void
3940function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3941 tree type, HOST_WIDE_INT words)
3942{
3943 int int_nregs, sse_nregs;
3944
3945 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3946 cum->words += words;
3947 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3948 {
3949 cum->nregs -= int_nregs;
3950 cum->sse_nregs -= sse_nregs;
3951 cum->regno += int_nregs;
3952 cum->sse_regno += sse_nregs;
3953 }
3954 else
3955 cum->words += words;
3956}
3957
ccf8e764
RH
3958static void
3959function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3960 HOST_WIDE_INT words)
3961{
3962 /* Otherwise, this should be passed indirect. */
3963 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3964
3965 cum->words += words;
3966 if (cum->nregs > 0)
3967 {
3968 cum->nregs -= 1;
3969 cum->regno += 1;
3970 }
3971}
3972
ee2f65b4
RH
3973void
3974function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3975 tree type, int named ATTRIBUTE_UNUSED)
3976{
3977 HOST_WIDE_INT bytes, words;
3978
3979 if (mode == BLKmode)
3980 bytes = int_size_in_bytes (type);
3981 else
3982 bytes = GET_MODE_SIZE (mode);
3983 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3984
3985 if (type)
3986 mode = type_natural_mode (type);
3987
ccf8e764
RH
3988 if (TARGET_64BIT_MS_ABI)
3989 function_arg_advance_ms_64 (cum, bytes, words);
3990 else if (TARGET_64BIT)
ee2f65b4
RH
3991 function_arg_advance_64 (cum, mode, type, words);
3992 else
3993 function_arg_advance_32 (cum, mode, type, bytes, words);
3994}
3995
b08de47e
MM
3996/* Define where to put the arguments to a function.
3997 Value is zero to push the argument on the stack,
3998 or a hard register in which to store the argument.
3999
4000 MODE is the argument's machine mode.
4001 TYPE is the data type of the argument (as a tree).
4002 This is null for libcalls where that information may
4003 not be available.
4004 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4005 the preceding args and about the function being called.
4006 NAMED is nonzero if this argument is a named parameter
4007 (otherwise it is an extra parameter matching an ellipsis). */
4008
ee2f65b4
RH
4009static rtx
4010function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4011 enum machine_mode orig_mode, tree type,
4012 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
b08de47e 4013{
bcf17554 4014 static bool warnedsse, warnedmmx;
b08de47e 4015
ee2f65b4 4016 /* Avoid the AL settings for the Unix64 ABI. */
32ee7d1d 4017 if (mode == VOIDmode)
ee2f65b4
RH
4018 return constm1_rtx;
4019
4020 switch (mode)
b08de47e 4021 {
ee2f65b4
RH
4022 default:
4023 break;
4024
4025 case BLKmode:
4026 if (bytes < 0)
53c17031 4027 break;
ee2f65b4
RH
4028 /* FALLTHRU */
4029 case DImode:
4030 case SImode:
4031 case HImode:
4032 case QImode:
4033 if (words <= cum->nregs)
4034 {
4035 int regno = cum->regno;
53c17031 4036
ee2f65b4
RH
4037 /* Fastcall allocates the first two DWORD (SImode) or
4038 smaller arguments to ECX and EDX. */
4039 if (cum->fastcall)
4040 {
4041 if (mode == BLKmode || mode == DImode)
4042 break;
b96a374d 4043
ee2f65b4
RH
4044 /* ECX not EAX is the first allocated register. */
4045 if (regno == 0)
4046 regno = 2;
4047 }
4048 return gen_rtx_REG (mode, regno);
4049 }
4050 break;
b96a374d 4051
ee2f65b4
RH
4052 case DFmode:
4053 if (cum->float_in_sse < 2)
bcf17554 4054 break;
ee2f65b4
RH
4055 case SFmode:
4056 if (cum->float_in_sse < 1)
53c17031 4057 break;
ee2f65b4
RH
4058 /* FALLTHRU */
4059 case TImode:
4060 case V16QImode:
4061 case V8HImode:
4062 case V4SImode:
4063 case V2DImode:
4064 case V4SFmode:
4065 case V2DFmode:
4066 if (!type || !AGGREGATE_TYPE_P (type))
4067 {
4068 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4069 {
4070 warnedsse = true;
4071 warning (0, "SSE vector argument without SSE enabled "
4072 "changes the ABI");
4073 }
4074 if (cum->sse_nregs)
4075 return gen_reg_or_parallel (mode, orig_mode,
4076 cum->sse_regno + FIRST_SSE_REG);
4077 }
4078 break;
b08de47e 4079
ee2f65b4
RH
4080 case V8QImode:
4081 case V4HImode:
4082 case V2SImode:
4083 case V2SFmode:
4084 if (!type || !AGGREGATE_TYPE_P (type))
4085 {
4086 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4087 {
4088 warnedmmx = true;
4089 warning (0, "MMX vector argument without MMX enabled "
4090 "changes the ABI");
4091 }
4092 if (cum->mmx_nregs)
4093 return gen_reg_or_parallel (mode, orig_mode,
4094 cum->mmx_regno + FIRST_MMX_REG);
4095 }
4096 break;
4097 }
b08de47e 4098
ee2f65b4
RH
4099 return NULL_RTX;
4100}
b08de47e 4101
ee2f65b4
RH
4102static rtx
4103function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4104 enum machine_mode orig_mode, tree type)
4105{
4106 /* Handle a hidden AL argument containing number of registers
4107 for varargs x86-64 functions. */
4108 if (mode == VOIDmode)
4109 return GEN_INT (cum->maybe_vaarg
4110 ? (cum->sse_nregs < 0
4111 ? SSE_REGPARM_MAX
4112 : cum->sse_regno)
4113 : -1);
4114
4115 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4116 cum->sse_nregs,
4117 &x86_64_int_parameter_registers [cum->regno],
4118 cum->sse_regno);
4119}
b08de47e 4120
ccf8e764
RH
4121static rtx
4122function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4123 enum machine_mode orig_mode, int named)
4124{
4125 unsigned int regno;
4126
4127 /* Avoid the AL settings for the Unix64 ABI. */
4128 if (mode == VOIDmode)
4129 return constm1_rtx;
4130
4131 /* If we've run out of registers, it goes on the stack. */
4132 if (cum->nregs == 0)
4133 return NULL_RTX;
4134
4135 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4136
4137 /* Only floating point modes are passed in anything but integer regs. */
4138 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4139 {
4140 if (named)
4141 regno = cum->regno + FIRST_SSE_REG;
4142 else
4143 {
4144 rtx t1, t2;
4145
4146 /* Unnamed floating parameters are passed in both the
4147 SSE and integer registers. */
4148 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4149 t2 = gen_rtx_REG (mode, regno);
4150 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4151 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4152 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4153 }
4154 }
4155
4156 return gen_reg_or_parallel (mode, orig_mode, regno);
4157}
4158
ee2f65b4
RH
4159rtx
4160function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
ccf8e764 4161 tree type, int named)
ee2f65b4
RH
4162{
4163 enum machine_mode mode = omode;
4164 HOST_WIDE_INT bytes, words;
4165
4166 if (mode == BLKmode)
4167 bytes = int_size_in_bytes (type);
4168 else
4169 bytes = GET_MODE_SIZE (mode);
4170 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4171
4172 /* To simplify the code below, represent vector types with a vector mode
4173 even if MMX/SSE are not active. */
4174 if (type && TREE_CODE (type) == VECTOR_TYPE)
4175 mode = type_natural_mode (type);
4176
ccf8e764
RH
4177 if (TARGET_64BIT_MS_ABI)
4178 return function_arg_ms_64 (cum, mode, omode, named);
4179 else if (TARGET_64BIT)
ee2f65b4
RH
4180 return function_arg_64 (cum, mode, omode, type);
4181 else
4182 return function_arg_32 (cum, mode, omode, type, bytes, words);
b08de47e 4183}
53c17031 4184
09b2e78d
ZD
4185/* A C expression that indicates when an argument must be passed by
4186 reference. If nonzero for an argument, a copy of that argument is
4187 made in memory and a pointer to the argument is passed instead of
4188 the argument itself. The pointer is passed in whatever way is
4189 appropriate for passing a pointer to that type. */
4190
8cd5a4e0
RH
4191static bool
4192ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4193 enum machine_mode mode ATTRIBUTE_UNUSED,
4194 tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d 4195{
ccf8e764
RH
4196 if (TARGET_64BIT_MS_ABI)
4197 {
4198 if (type)
4199 {
4200 /* Arrays are passed by reference. */
4201 if (TREE_CODE (type) == ARRAY_TYPE)
4202 return true;
4203
4204 if (AGGREGATE_TYPE_P (type))
4205 {
4206 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4207 are passed by reference. */
4208 int el2 = exact_log2 (int_size_in_bytes (type));
4209 return !(el2 >= 0 && el2 <= 3);
4210 }
4211 }
4212
4213 /* __m128 is passed by reference. */
4214 /* ??? How to handle complex? For now treat them as structs,
4215 and pass them by reference if they're too large. */
4216 if (GET_MODE_SIZE (mode) > 8)
4217 return true;
4218 }
4219 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
ee2f65b4 4220 return 1;
09b2e78d
ZD
4221
4222 return 0;
4223}
4224
8b978a57 4225/* Return true when TYPE should be 128bit aligned for 32bit argument passing
90d5887b 4226 ABI. Only called if TARGET_SSE. */
8b978a57 4227static bool
b96a374d 4228contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
4229{
4230 enum machine_mode mode = TYPE_MODE (type);
4231 if (SSE_REG_MODE_P (mode)
4232 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4233 return true;
4234 if (TYPE_ALIGN (type) < 128)
4235 return false;
4236
4237 if (AGGREGATE_TYPE_P (type))
4238 {
2a43945f 4239 /* Walk the aggregates recursively. */
d0396b79 4240 switch (TREE_CODE (type))
8b978a57 4241 {
d0396b79
NS
4242 case RECORD_TYPE:
4243 case UNION_TYPE:
4244 case QUAL_UNION_TYPE:
4245 {
4246 tree field;
5656a184 4247
1faf92ae 4248 /* Walk all the structure fields. */
d0396b79
NS
4249 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4250 {
4251 if (TREE_CODE (field) == FIELD_DECL
4252 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
fa743e8c 4253 return true;
d0396b79
NS
4254 }
4255 break;
4256 }
4257
4258 case ARRAY_TYPE:
4259 /* Just for use if some languages passes arrays by value. */
8b978a57
JH
4260 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4261 return true;
5139c66b 4262 break;
5656a184 4263
d0396b79
NS
4264 default:
4265 gcc_unreachable ();
8b978a57 4266 }
8b978a57
JH
4267 }
4268 return false;
4269}
4270
bb498ea3
AH
4271/* Gives the alignment boundary, in bits, of an argument with the
4272 specified mode and type. */
53c17031
JH
4273
4274int
b96a374d 4275ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
4276{
4277 int align;
53c17031
JH
4278 if (type)
4279 align = TYPE_ALIGN (type);
4280 else
4281 align = GET_MODE_ALIGNMENT (mode);
4282 if (align < PARM_BOUNDARY)
4283 align = PARM_BOUNDARY;
8b978a57
JH
4284 if (!TARGET_64BIT)
4285 {
4286 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4287 make an exception for SSE modes since these require 128bit
b96a374d 4288 alignment.
8b978a57
JH
4289
4290 The handling here differs from field_alignment. ICC aligns MMX
4291 arguments to 4 byte boundaries, while structure fields are aligned
4292 to 8 byte boundaries. */
78fbfc4b
JB
4293 if (!TARGET_SSE)
4294 align = PARM_BOUNDARY;
4295 else if (!type)
8b978a57
JH
4296 {
4297 if (!SSE_REG_MODE_P (mode))
4298 align = PARM_BOUNDARY;
4299 }
4300 else
4301 {
4302 if (!contains_128bit_aligned_vector_p (type))
4303 align = PARM_BOUNDARY;
4304 }
8b978a57 4305 }
53c17031
JH
4306 if (align > 128)
4307 align = 128;
4308 return align;
4309}
4310
4311/* Return true if N is a possible register number of function value. */
ee2f65b4 4312
53c17031 4313bool
b96a374d 4314ix86_function_value_regno_p (int regno)
53c17031 4315{
ee2f65b4 4316 switch (regno)
88c6f101 4317 {
ee2f65b4
RH
4318 case 0:
4319 return true;
aa941a60 4320
ee2f65b4 4321 case FIRST_FLOAT_REG:
ccf8e764
RH
4322 if (TARGET_64BIT_MS_ABI)
4323 return false;
ee2f65b4 4324 return TARGET_FLOAT_RETURNS_IN_80387;
aa941a60 4325
ee2f65b4
RH
4326 case FIRST_SSE_REG:
4327 return TARGET_SSE;
4328
4329 case FIRST_MMX_REG:
4330 if (TARGET_MACHO || TARGET_64BIT)
4331 return false;
4332 return TARGET_MMX;
88c6f101 4333 }
ee2f65b4
RH
4334
4335 return false;
53c17031
JH
4336}
4337
4338/* Define how to find the value returned by a function.
4339 VALTYPE is the data type of the value (as a tree).
4340 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4341 otherwise, FUNC is 0. */
ee2f65b4
RH
4342
4343static rtx
4344function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4345 tree fntype, tree fn)
53c17031 4346{
ee2f65b4 4347 unsigned int regno;
b3a1ca49 4348
ee2f65b4
RH
4349 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4350 we normally prevent this case when mmx is not available. However
4351 some ABIs may require the result to be returned like DImode. */
4352 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4353 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4354
4355 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4356 we prevent this case when sse is not available. However some ABIs
4357 may require the result to be returned like integer TImode. */
4358 else if (mode == TImode
4359 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4360 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4361
27ac40e2
UB
4362 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4363 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4364 regno = FIRST_FLOAT_REG;
4365 else
4366 /* Most things go in %eax. */
ee2f65b4 4367 regno = 0;
27ac40e2
UB
4368
4369 /* Override FP return register with %xmm0 for local functions when
ee2f65b4 4370 SSE math is enabled or for functions with sseregparm attribute. */
27ac40e2 4371 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
53c17031 4372 {
27ac40e2
UB
4373 int sse_level = ix86_function_sseregparm (fntype, fn);
4374 if ((sse_level >= 1 && mode == SFmode)
4375 || (sse_level == 2 && mode == DFmode))
4376 regno = FIRST_SSE_REG;
53c17031 4377 }
ee2f65b4
RH
4378
4379 return gen_rtx_REG (orig_mode, regno);
4380}
4381
4382static rtx
4383function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4384 tree valtype)
4385{
4386 rtx ret;
4387
4388 /* Handle libcalls, which don't provide a type node. */
4389 if (valtype == NULL)
cb1119b7 4390 {
ee2f65b4
RH
4391 switch (mode)
4392 {
4393 case SFmode:
4394 case SCmode:
4395 case DFmode:
4396 case DCmode:
4397 case TFmode:
4398 case SDmode:
4399 case DDmode:
4400 case TDmode:
4401 return gen_rtx_REG (mode, FIRST_SSE_REG);
4402 case XFmode:
4403 case XCmode:
4404 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4405 case TCmode:
4406 return NULL;
4407 default:
4408 return gen_rtx_REG (mode, 0);
4409 }
cb1119b7 4410 }
ee2f65b4
RH
4411
4412 ret = construct_container (mode, orig_mode, valtype, 1,
4413 REGPARM_MAX, SSE_REGPARM_MAX,
4414 x86_64_int_return_registers, 0);
4415
4416 /* For zero sized structures, construct_container returns NULL, but we
4417 need to keep rest of compiler happy by returning meaningful value. */
4418 if (!ret)
4419 ret = gen_rtx_REG (orig_mode, 0);
4420
4421 return ret;
53c17031
JH
4422}
4423
ccf8e764
RH
4424static rtx
4425function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4426{
4427 unsigned int regno = 0;
4428
4429 if (TARGET_SSE)
4430 {
4431 if (mode == SFmode || mode == DFmode)
4432 regno = FIRST_SSE_REG;
4433 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4434 regno = FIRST_SSE_REG;
4435 }
4436
4437 return gen_rtx_REG (orig_mode, regno);
4438}
4439
ee2f65b4
RH
4440static rtx
4441ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4442 enum machine_mode orig_mode, enum machine_mode mode)
53c17031 4443{
ee2f65b4
RH
4444 tree fn, fntype;
4445
4446 fn = NULL_TREE;
4447 if (fntype_or_decl && DECL_P (fntype_or_decl))
4448 fn = fntype_or_decl;
4449 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
a30b6839 4450
ccf8e764
RH
4451 if (TARGET_64BIT_MS_ABI)
4452 return function_value_ms_64 (orig_mode, mode);
4453 else if (TARGET_64BIT)
ee2f65b4
RH
4454 return function_value_64 (orig_mode, mode, valtype);
4455 else
4456 return function_value_32 (orig_mode, mode, fntype, fn);
4457}
4458
4459static rtx
4460ix86_function_value (tree valtype, tree fntype_or_decl,
4461 bool outgoing ATTRIBUTE_UNUSED)
4462{
4463 enum machine_mode mode, orig_mode;
4464
4465 orig_mode = TYPE_MODE (valtype);
4466 mode = type_natural_mode (valtype);
4467 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4468}
4469
4470rtx
4471ix86_libcall_value (enum machine_mode mode)
4472{
4473 return ix86_function_value_1 (NULL, NULL, mode, mode);
4474}
4475
4476/* Return true iff type is returned in memory. */
4477
4478static int
4479return_in_memory_32 (tree type, enum machine_mode mode)
4480{
4481 HOST_WIDE_INT size;
a30b6839
RH
4482
4483 if (mode == BLKmode)
4484 return 1;
4485
4486 size = int_size_in_bytes (type);
4487
4488 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4489 return 0;
4490
4491 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 4492 {
a30b6839
RH
4493 /* User-created vectors small enough to fit in EAX. */
4494 if (size < 8)
5e062767 4495 return 0;
a30b6839 4496
74c4a88a
UB
4497 /* MMX/3dNow values are returned in MM0,
4498 except when it doesn't exits. */
a30b6839 4499 if (size == 8)
74c4a88a 4500 return (TARGET_MMX ? 0 : 1);
a30b6839 4501
0397ac35 4502 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 4503 if (size == 16)
0397ac35 4504 return (TARGET_SSE ? 0 : 1);
53c17031 4505 }
a30b6839 4506
cf2348cb 4507 if (mode == XFmode)
a30b6839 4508 return 0;
f8a1ebc6 4509
a81083b2
BE
4510 if (mode == TDmode)
4511 return 1;
4512
a30b6839
RH
4513 if (size > 12)
4514 return 1;
4515 return 0;
53c17031
JH
4516}
4517
ee2f65b4
RH
4518static int
4519return_in_memory_64 (tree type, enum machine_mode mode)
4520{
4521 int needed_intregs, needed_sseregs;
4522 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4523}
4524
ccf8e764
RH
4525static int
4526return_in_memory_ms_64 (tree type, enum machine_mode mode)
4527{
4528 HOST_WIDE_INT size = int_size_in_bytes (type);
4529
4530 /* __m128 and friends are returned in xmm0. */
4531 if (size == 16 && VECTOR_MODE_P (mode))
4532 return 0;
4533
4534 /* Otherwise, the size must be exactly in [1248]. */
4535 return (size != 1 && size != 2 && size != 4 && size != 8);
4536}
4537
ee2f65b4
RH
4538int
4539ix86_return_in_memory (tree type)
4540{
4541 enum machine_mode mode = type_natural_mode (type);
4542
ccf8e764
RH
4543 if (TARGET_64BIT_MS_ABI)
4544 return return_in_memory_ms_64 (type, mode);
4545 else if (TARGET_64BIT)
ee2f65b4
RH
4546 return return_in_memory_64 (type, mode);
4547 else
4548 return return_in_memory_32 (type, mode);
4549}
4550
29173496
RS
4551/* Return false iff TYPE is returned in memory. This version is used
4552 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4553 but differs notably in that when MMX is available, 8-byte vectors
4554 are returned in memory, rather than in MMX registers. */
4555
4556int
4557ix86_sol10_return_in_memory (tree type)
4558{
e797f7e1 4559 int size;
29173496
RS
4560 enum machine_mode mode = type_natural_mode (type);
4561
4562 if (TARGET_64BIT)
4563 return return_in_memory_64 (type, mode);
4564
4565 if (mode == BLKmode)
4566 return 1;
4567
4568 size = int_size_in_bytes (type);
4569
4570 if (VECTOR_MODE_P (mode))
4571 {
4572 /* Return in memory only if MMX registers *are* available. This
4573 seems backwards, but it is consistent with the existing
4574 Solaris x86 ABI. */
4575 if (size == 8)
4576 return TARGET_MMX;
4577 if (size == 16)
4578 return !TARGET_SSE;
4579 }
4580 else if (mode == TImode)
4581 return !TARGET_SSE;
4582 else if (mode == XFmode)
4583 return 0;
4584
4585 return size > 12;
4586}
4587
0397ac35
RH
4588/* When returning SSE vector types, we have a choice of either
4589 (1) being abi incompatible with a -march switch, or
4590 (2) generating an error.
4591 Given no good solution, I think the safest thing is one warning.
4592 The user won't be able to use -Werror, but....
4593
4594 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4595 called in response to actually generating a caller or callee that
4596 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4597 via aggregate_value_p for general type probing from tree-ssa. */
4598
4599static rtx
4600ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4601{
74c4a88a 4602 static bool warnedsse, warnedmmx;
0397ac35 4603
ee2f65b4 4604 if (!TARGET_64BIT && type)
0397ac35
RH
4605 {
4606 /* Look at the return type of the function, not the function type. */
4607 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4608
74c4a88a
UB
4609 if (!TARGET_SSE && !warnedsse)
4610 {
4611 if (mode == TImode
4612 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4613 {
4614 warnedsse = true;
4615 warning (0, "SSE vector return without SSE enabled "
4616 "changes the ABI");
4617 }
4618 }
4619
4620 if (!TARGET_MMX && !warnedmmx)
0397ac35 4621 {
74c4a88a
UB
4622 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4623 {
4624 warnedmmx = true;
4625 warning (0, "MMX vector return without MMX enabled "
4626 "changes the ABI");
4627 }
0397ac35
RH
4628 }
4629 }
4630
4631 return NULL;
4632}
4633
ad919812
JH
4634\f
4635/* Create the va_list data type. */
53c17031 4636
c35d187f
RH
4637static tree
4638ix86_build_builtin_va_list (void)
ad919812
JH
4639{
4640 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 4641
ad919812 4642 /* For i386 we use plain pointer to argument area. */
ccf8e764 4643 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
ad919812
JH
4644 return build_pointer_type (char_type_node);
4645
f1e639b1 4646 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
4647 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4648
fce5a9f2 4649 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 4650 unsigned_type_node);
fce5a9f2 4651 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
4652 unsigned_type_node);
4653 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4654 ptr_type_node);
4655 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4656 ptr_type_node);
4657
9d30f3c1
JJ
4658 va_list_gpr_counter_field = f_gpr;
4659 va_list_fpr_counter_field = f_fpr;
4660
ad919812
JH
4661 DECL_FIELD_CONTEXT (f_gpr) = record;
4662 DECL_FIELD_CONTEXT (f_fpr) = record;
4663 DECL_FIELD_CONTEXT (f_ovf) = record;
4664 DECL_FIELD_CONTEXT (f_sav) = record;
4665
4666 TREE_CHAIN (record) = type_decl;
4667 TYPE_NAME (record) = type_decl;
4668 TYPE_FIELDS (record) = f_gpr;
4669 TREE_CHAIN (f_gpr) = f_fpr;
4670 TREE_CHAIN (f_fpr) = f_ovf;
4671 TREE_CHAIN (f_ovf) = f_sav;
4672
4673 layout_type (record);
4674
4675 /* The correct type is an array type of one element. */
4676 return build_array_type (record, build_index_type (size_zero_node));
4677}
4678
a0524eb3 4679/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 4680
a0524eb3 4681static void
ee2f65b4 4682setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
ad919812 4683{
ee2f65b4 4684 rtx save_area, mem;
ad919812
JH
4685 rtx label;
4686 rtx label_ref;
4687 rtx tmp_reg;
4688 rtx nsse_reg;
4689 int set;
ad919812
JH
4690 int i;
4691
9d30f3c1
JJ
4692 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4693 return;
4694
ad919812
JH
4695 /* Indicate to allocate space on the stack for varargs save area. */
4696 ix86_save_varrargs_registers = 1;
5474eed5
JH
4697 cfun->stack_alignment_needed = 128;
4698
ee2f65b4 4699 save_area = frame_pointer_rtx;
ad919812
JH
4700 set = get_varargs_alias_set ();
4701
ee2f65b4 4702 for (i = cum->regno;
9d30f3c1 4703 i < ix86_regparm
ee2f65b4 4704 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9d30f3c1 4705 i++)
ad919812
JH
4706 {
4707 mem = gen_rtx_MEM (Pmode,
4708 plus_constant (save_area, i * UNITS_PER_WORD));
8476af98 4709 MEM_NOTRAP_P (mem) = 1;
0692acba 4710 set_mem_alias_set (mem, set);
ad919812
JH
4711 emit_move_insn (mem, gen_rtx_REG (Pmode,
4712 x86_64_int_parameter_registers[i]));
4713 }
4714
ee2f65b4 4715 if (cum->sse_nregs && cfun->va_list_fpr_size)
ad919812
JH
4716 {
4717 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 4718 of SSE parameter registers used to call this function. We use
ad919812
JH
4719 sse_prologue_save insn template that produces computed jump across
4720 SSE saves. We need some preparation work to get this working. */
4721
4722 label = gen_label_rtx ();
4723 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4724
4725 /* Compute address to jump to :
4726 label - 5*eax + nnamed_sse_arguments*5 */
4727 tmp_reg = gen_reg_rtx (Pmode);
4728 nsse_reg = gen_reg_rtx (Pmode);
4729 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4730 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 4731 gen_rtx_MULT (Pmode, nsse_reg,
ad919812 4732 GEN_INT (4))));
ee2f65b4 4733 if (cum->sse_regno)
ad919812
JH
4734 emit_move_insn
4735 (nsse_reg,
4736 gen_rtx_CONST (DImode,
4737 gen_rtx_PLUS (DImode,
4738 label_ref,
ee2f65b4 4739 GEN_INT (cum->sse_regno * 4))));
ad919812
JH
4740 else
4741 emit_move_insn (nsse_reg, label_ref);
4742 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4743
4744 /* Compute address of memory block we save into. We always use pointer
4745 pointing 127 bytes after first byte to store - this is needed to keep
4746 instruction size limited by 4 bytes. */
4747 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
4748 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4749 plus_constant (save_area,
4750 8 * REGPARM_MAX + 127)));
ad919812 4751 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
8476af98 4752 MEM_NOTRAP_P (mem) = 1;
14f73b5a 4753 set_mem_alias_set (mem, set);
8ac61af7 4754 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
4755
4756 /* And finally do the dirty job! */
8ac61af7 4757 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
ee2f65b4 4758 GEN_INT (cum->sse_regno), label));
ad919812 4759 }
ee2f65b4
RH
4760}
4761
ccf8e764
RH
4762static void
4763setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4764{
4765 int set = get_varargs_alias_set ();
4766 int i;
4767
4768 for (i = cum->regno; i < REGPARM_MAX; i++)
4769 {
4770 rtx reg, mem;
4771
4772 mem = gen_rtx_MEM (Pmode,
4773 plus_constant (virtual_incoming_args_rtx,
4774 i * UNITS_PER_WORD));
4775 MEM_NOTRAP_P (mem) = 1;
4776 set_mem_alias_set (mem, set);
4777
4778 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4779 emit_move_insn (mem, reg);
4780 }
4781}
4782
ee2f65b4
RH
4783static void
4784ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4785 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4786 int no_rtl)
4787{
4788 CUMULATIVE_ARGS next_cum;
4789 tree fntype;
4790 int stdarg_p;
4791
4792 /* This argument doesn't appear to be used anymore. Which is good,
4793 because the old code here didn't suppress rtl generation. */
4794 gcc_assert (!no_rtl);
4795
4796 if (!TARGET_64BIT)
4797 return;
4798
4799 fntype = TREE_TYPE (current_function_decl);
4800 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4801 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4802 != void_type_node));
ad919812 4803
ee2f65b4
RH
4804 /* For varargs, we do not want to skip the dummy va_dcl argument.
4805 For stdargs, we do want to skip the last named argument. */
4806 next_cum = *cum;
4807 if (stdarg_p)
4808 function_arg_advance (&next_cum, mode, type, 1);
4809
ccf8e764
RH
4810 if (TARGET_64BIT_MS_ABI)
4811 setup_incoming_varargs_ms_64 (&next_cum);
4812 else
4813 setup_incoming_varargs_64 (&next_cum);
ad919812
JH
4814}
4815
4816/* Implement va_start. */
4817
4818void
b96a374d 4819ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
4820{
4821 HOST_WIDE_INT words, n_gpr, n_fpr;
4822 tree f_gpr, f_fpr, f_ovf, f_sav;
4823 tree gpr, fpr, ovf, sav, t;
3db8a113 4824 tree type;
ad919812
JH
4825
4826 /* Only 64bit target needs something special. */
ccf8e764 4827 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
ad919812 4828 {
e5faf155 4829 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
4830 return;
4831 }
4832
4833 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4834 f_fpr = TREE_CHAIN (f_gpr);
4835 f_ovf = TREE_CHAIN (f_fpr);
4836 f_sav = TREE_CHAIN (f_ovf);
4837
4838 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
47a25a46
RG
4839 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4840 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4841 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4842 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
4843
4844 /* Count number of gp and fp argument registers used. */
4845 words = current_function_args_info.words;
4846 n_gpr = current_function_args_info.regno;
4847 n_fpr = current_function_args_info.sse_regno;
4848
9d30f3c1
JJ
4849 if (cfun->va_list_gpr_size)
4850 {
3db8a113 4851 type = TREE_TYPE (gpr);
07beea0d 4852 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
3db8a113 4853 build_int_cst (type, n_gpr * 8));
9d30f3c1
JJ
4854 TREE_SIDE_EFFECTS (t) = 1;
4855 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4856 }
ad919812 4857
9d30f3c1
JJ
4858 if (cfun->va_list_fpr_size)
4859 {
3db8a113 4860 type = TREE_TYPE (fpr);
07beea0d 4861 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
3db8a113 4862 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
9d30f3c1
JJ
4863 TREE_SIDE_EFFECTS (t) = 1;
4864 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4865 }
ad919812
JH
4866
4867 /* Find the overflow area. */
3db8a113
RS
4868 type = TREE_TYPE (ovf);
4869 t = make_tree (type, virtual_incoming_args_rtx);
ad919812 4870 if (words != 0)
5be014d5
AP
4871 t = build2 (POINTER_PLUS_EXPR, type, t,
4872 size_int (words * UNITS_PER_WORD));
07beea0d 4873 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
ad919812
JH
4874 TREE_SIDE_EFFECTS (t) = 1;
4875 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4876
9d30f3c1
JJ
4877 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4878 {
4879 /* Find the register save area.
4880 Prologue of the function save it right above stack frame. */
3db8a113
RS
4881 type = TREE_TYPE (sav);
4882 t = make_tree (type, frame_pointer_rtx);
07beea0d 4883 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
9d30f3c1
JJ
4884 TREE_SIDE_EFFECTS (t) = 1;
4885 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4886 }
ad919812
JH
4887}
4888
4889/* Implement va_arg. */
cd3ce9b4 4890
2ed941ec 4891static tree
23a60a04 4892ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 4893{
cd3ce9b4
JM
4894 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4895 tree f_gpr, f_fpr, f_ovf, f_sav;
4896 tree gpr, fpr, ovf, sav, t;
4897 int size, rsize;
4898 tree lab_false, lab_over = NULL_TREE;
4899 tree addr, t2;
4900 rtx container;
4901 int indirect_p = 0;
4902 tree ptrtype;
52cf10a3 4903 enum machine_mode nat_mode;
cd3ce9b4
JM
4904
4905 /* Only 64bit target needs something special. */
ccf8e764 4906 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
23a60a04 4907 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
4908
4909 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4910 f_fpr = TREE_CHAIN (f_gpr);
4911 f_ovf = TREE_CHAIN (f_fpr);
4912 f_sav = TREE_CHAIN (f_ovf);
4913
c2433d7d 4914 valist = build_va_arg_indirect_ref (valist);
47a25a46
RG
4915 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4916 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4917 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4918 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 4919
08b0dc1b
RH
4920 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4921 if (indirect_p)
4922 type = build_pointer_type (type);
cd3ce9b4 4923 size = int_size_in_bytes (type);
cd3ce9b4
JM
4924 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4925
52cf10a3
RH
4926 nat_mode = type_natural_mode (type);
4927 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4928 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
6c4ccfd8
RH
4929
4930 /* Pull the value out of the saved registers. */
cd3ce9b4
JM
4931
4932 addr = create_tmp_var (ptr_type_node, "addr");
4933 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4934
4935 if (container)
4936 {
4937 int needed_intregs, needed_sseregs;
e52a6df5 4938 bool need_temp;
cd3ce9b4
JM
4939 tree int_addr, sse_addr;
4940
4941 lab_false = create_artificial_label ();
4942 lab_over = create_artificial_label ();
4943
52cf10a3 4944 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
cd3ce9b4 4945
e52a6df5
JB
4946 need_temp = (!REG_P (container)
4947 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4948 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
4949
4950 /* In case we are passing structure, verify that it is consecutive block
4951 on the register save area. If not we need to do moves. */
4952 if (!need_temp && !REG_P (container))
4953 {
4954 /* Verify that all registers are strictly consecutive */
4955 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4956 {
4957 int i;
4958
4959 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4960 {
4961 rtx slot = XVECEXP (container, 0, i);
4962 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4963 || INTVAL (XEXP (slot, 1)) != i * 16)
4964 need_temp = 1;
4965 }
4966 }
4967 else
4968 {
4969 int i;
4970
4971 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4972 {
4973 rtx slot = XVECEXP (container, 0, i);
4974 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4975 || INTVAL (XEXP (slot, 1)) != i * 8)
4976 need_temp = 1;
4977 }
4978 }
4979 }
4980 if (!need_temp)
4981 {
4982 int_addr = addr;
4983 sse_addr = addr;
4984 }
4985 else
4986 {
4987 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4988 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4989 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4990 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4991 }
56d76b69 4992
cd3ce9b4
JM
4993 /* First ensure that we fit completely in registers. */
4994 if (needed_intregs)
4995 {
4a90aeeb 4996 t = build_int_cst (TREE_TYPE (gpr),
7d60be94 4997 (REGPARM_MAX - needed_intregs + 1) * 8);
cd3ce9b4
JM
4998 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4999 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 5000 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
5001 gimplify_and_add (t, pre_p);
5002 }
5003 if (needed_sseregs)
5004 {
4a90aeeb
NS
5005 t = build_int_cst (TREE_TYPE (fpr),
5006 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7d60be94 5007 + REGPARM_MAX * 8);
cd3ce9b4
JM
5008 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5009 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 5010 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
5011 gimplify_and_add (t, pre_p);
5012 }
5013
5014 /* Compute index to start of area used for integer regs. */
5015 if (needed_intregs)
5016 {
5017 /* int_addr = gpr + sav; */
5be014d5
AP
5018 t = fold_convert (sizetype, gpr);
5019 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
07beea0d 5020 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
cd3ce9b4
JM
5021 gimplify_and_add (t, pre_p);
5022 }
5023 if (needed_sseregs)
5024 {
5025 /* sse_addr = fpr + sav; */
5be014d5
AP
5026 t = fold_convert (sizetype, fpr);
5027 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
07beea0d 5028 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
cd3ce9b4
JM
5029 gimplify_and_add (t, pre_p);
5030 }
5031 if (need_temp)
5032 {
5033 int i;
5034 tree temp = create_tmp_var (type, "va_arg_tmp");
5035
5036 /* addr = &temp; */
5037 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
07beea0d 5038 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
cd3ce9b4 5039 gimplify_and_add (t, pre_p);
f676971a 5040
cd3ce9b4
JM
5041 for (i = 0; i < XVECLEN (container, 0); i++)
5042 {
5043 rtx slot = XVECEXP (container, 0, i);
5044 rtx reg = XEXP (slot, 0);
5045 enum machine_mode mode = GET_MODE (reg);
5046 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5047 tree addr_type = build_pointer_type (piece_type);
5048 tree src_addr, src;
5049 int src_offset;
5050 tree dest_addr, dest;
5051
5052 if (SSE_REGNO_P (REGNO (reg)))
5053 {
5054 src_addr = sse_addr;
5055 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5056 }
5057 else
5058 {
5059 src_addr = int_addr;
5060 src_offset = REGNO (reg) * 8;
5061 }
8fe75e43 5062 src_addr = fold_convert (addr_type, src_addr);
5be014d5 5063 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
381d35b2 5064 size_int (src_offset));
c2433d7d 5065 src = build_va_arg_indirect_ref (src_addr);
e6e81735 5066
8fe75e43 5067 dest_addr = fold_convert (addr_type, addr);
5be014d5 5068 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
381d35b2 5069 size_int (INTVAL (XEXP (slot, 1))));
c2433d7d 5070 dest = build_va_arg_indirect_ref (dest_addr);
3a3677ff 5071
07beea0d 5072 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
8fe75e43
RH
5073 gimplify_and_add (t, pre_p);
5074 }
5075 }
e6e81735 5076
8fe75e43
RH
5077 if (needed_intregs)
5078 {
5079 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
56d76b69 5080 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
07beea0d 5081 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
8fe75e43
RH
5082 gimplify_and_add (t, pre_p);
5083 }
5084 if (needed_sseregs)
5085 {
4a90aeeb 5086 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
56d76b69 5087 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
07beea0d 5088 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
8fe75e43
RH
5089 gimplify_and_add (t, pre_p);
5090 }
e6e81735 5091
8fe75e43
RH
5092 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5093 gimplify_and_add (t, pre_p);
5094
5095 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5096 append_to_statement_list (t, pre_p);
3a3677ff 5097 }
b840bfb0 5098
8fe75e43 5099 /* ... otherwise out of the overflow area. */
e9e80858 5100
8fe75e43 5101 /* Care for on-stack alignment if needed. */
f5a7da0f
RG
5102 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5103 || integer_zerop (TYPE_SIZE (type)))
8fe75e43 5104 t = ovf;
5be014d5 5105 else
e9e80858 5106 {
8fe75e43 5107 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5be014d5
AP
5108 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5109 size_int (align - 1));
5110 t = fold_convert (sizetype, t);
47a25a46 5111 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5be014d5
AP
5112 size_int (-align));
5113 t = fold_convert (TREE_TYPE (ovf), t);
e9e80858 5114 }
8fe75e43 5115 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
e075ae69 5116
07beea0d 5117 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
8fe75e43 5118 gimplify_and_add (t2, pre_p);
e075ae69 5119
5be014d5
AP
5120 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5121 size_int (rsize * UNITS_PER_WORD));
07beea0d 5122 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
8fe75e43 5123 gimplify_and_add (t, pre_p);
e075ae69 5124
8fe75e43 5125 if (container)
2a2ab3f9 5126 {
8fe75e43
RH
5127 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5128 append_to_statement_list (t, pre_p);
2a2ab3f9 5129 }
e075ae69 5130
8fe75e43
RH
5131 ptrtype = build_pointer_type (type);
5132 addr = fold_convert (ptrtype, addr);
0a726ef1 5133
8fe75e43 5134 if (indirect_p)
c2433d7d
FCE
5135 addr = build_va_arg_indirect_ref (addr);
5136 return build_va_arg_indirect_ref (addr);
0a726ef1 5137}
8fe75e43
RH
5138\f
5139/* Return nonzero if OPNUM's MEM should be matched
5140 in movabs* patterns. */
fee2770d
RS
5141
5142int
8fe75e43 5143ix86_check_movabs (rtx insn, int opnum)
4f2c8ebb 5144{
8fe75e43 5145 rtx set, mem;
e075ae69 5146
8fe75e43
RH
5147 set = PATTERN (insn);
5148 if (GET_CODE (set) == PARALLEL)
5149 set = XVECEXP (set, 0, 0);
d0396b79 5150 gcc_assert (GET_CODE (set) == SET);
8fe75e43
RH
5151 mem = XEXP (set, opnum);
5152 while (GET_CODE (mem) == SUBREG)
5153 mem = SUBREG_REG (mem);
7656aee4 5154 gcc_assert (MEM_P (mem));
8fe75e43 5155 return (volatile_ok || !MEM_VOLATILE_P (mem));
2247f6ed 5156}
e075ae69 5157\f
881b2a96
RS
5158/* Initialize the table of extra 80387 mathematical constants. */
5159
5160static void
b96a374d 5161init_ext_80387_constants (void)
881b2a96
RS
5162{
5163 static const char * cst[5] =
5164 {
5165 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5166 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5167 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5168 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5169 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5170 };
5171 int i;
5172
5173 for (i = 0; i < 5; i++)
5174 {
5175 real_from_string (&ext_80387_constants_table[i], cst[i]);
5176 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 5177 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 5178 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
5179 }
5180
5181 ext_80387_constants_init = 1;
5182}
5183
e075ae69 5184/* Return true if the constant is something that can be loaded with
881b2a96 5185 a special instruction. */
57dbca5e
BS
5186
5187int
b96a374d 5188standard_80387_constant_p (rtx x)
57dbca5e 5189{
27ac40e2
UB
5190 enum machine_mode mode = GET_MODE (x);
5191
2e1f15bd
UB
5192 REAL_VALUE_TYPE r;
5193
27ac40e2 5194 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
e075ae69 5195 return -1;
881b2a96 5196
27ac40e2 5197 if (x == CONST0_RTX (mode))
2b04e52b 5198 return 1;
27ac40e2 5199 if (x == CONST1_RTX (mode))
2b04e52b 5200 return 2;
881b2a96 5201
2e1f15bd
UB
5202 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5203
22cc69c4
RS
5204 /* For XFmode constants, try to find a special 80387 instruction when
5205 optimizing for size or on those CPUs that benefit from them. */
27ac40e2 5206 if (mode == XFmode
80fd744f 5207 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
881b2a96 5208 {
881b2a96
RS
5209 int i;
5210
5211 if (! ext_80387_constants_init)
5212 init_ext_80387_constants ();
5213
881b2a96
RS
5214 for (i = 0; i < 5; i++)
5215 if (real_identical (&r, &ext_80387_constants_table[i]))
5216 return i + 3;
5217 }
5218
2e1f15bd
UB
5219 /* Load of the constant -0.0 or -1.0 will be split as
5220 fldz;fchs or fld1;fchs sequence. */
5221 if (real_isnegzero (&r))
5222 return 8;
5223 if (real_identical (&r, &dconstm1))
5224 return 9;
5225
e075ae69 5226 return 0;
57dbca5e
BS
5227}
5228
881b2a96
RS
5229/* Return the opcode of the special instruction to be used to load
5230 the constant X. */
5231
5232const char *
b96a374d 5233standard_80387_constant_opcode (rtx x)
881b2a96
RS
5234{
5235 switch (standard_80387_constant_p (x))
5236 {
b96a374d 5237 case 1:
881b2a96
RS
5238 return "fldz";
5239 case 2:
5240 return "fld1";
b96a374d 5241 case 3:
881b2a96
RS
5242 return "fldlg2";
5243 case 4:
5244 return "fldln2";
b96a374d 5245 case 5:
881b2a96
RS
5246 return "fldl2e";
5247 case 6:
5248 return "fldl2t";
b96a374d 5249 case 7:
881b2a96 5250 return "fldpi";
2e1f15bd
UB
5251 case 8:
5252 case 9:
5253 return "#";
d0396b79
NS
5254 default:
5255 gcc_unreachable ();
881b2a96 5256 }
881b2a96
RS
5257}
5258
5259/* Return the CONST_DOUBLE representing the 80387 constant that is
5260 loaded by the specified special instruction. The argument IDX
5261 matches the return value from standard_80387_constant_p. */
5262
5263rtx
b96a374d 5264standard_80387_constant_rtx (int idx)
881b2a96
RS
5265{
5266 int i;
5267
5268 if (! ext_80387_constants_init)
5269 init_ext_80387_constants ();
5270
5271 switch (idx)
5272 {
5273 case 3:
5274 case 4:
5275 case 5:
5276 case 6:
5277 case 7:
5278 i = idx - 3;
5279 break;
5280
5281 default:
d0396b79 5282 gcc_unreachable ();
881b2a96
RS
5283 }
5284
1f48e56d 5285 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 5286 XFmode);
881b2a96
RS
5287}
5288
5656a184
EC
5289/* Return 1 if mode is a valid mode for sse. */
5290static int
5291standard_sse_mode_p (enum machine_mode mode)
5292{
5293 switch (mode)
5294 {
5295 case V16QImode:
5296 case V8HImode:
5297 case V4SImode:
5298 case V2DImode:
5299 case V4SFmode:
5300 case V2DFmode:
5301 return 1;
5302
5303 default:
5304 return 0;
5305 }
5306}
5307
2b04e52b
JH
5308/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5309 */
5310int
b96a374d 5311standard_sse_constant_p (rtx x)
2b04e52b 5312{
5656a184
EC
5313 enum machine_mode mode = GET_MODE (x);
5314
5315 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
0e67d460 5316 return 1;
5656a184
EC
5317 if (vector_all_ones_operand (x, mode)
5318 && standard_sse_mode_p (mode))
5319 return TARGET_SSE2 ? 2 : -1;
5320
5321 return 0;
5322}
5323
5324/* Return the opcode of the special instruction to be used to load
5325 the constant X. */
5326
5327const char *
5328standard_sse_constant_opcode (rtx insn, rtx x)
5329{
5330 switch (standard_sse_constant_p (x))
5331 {
5332 case 1:
5333 if (get_attr_mode (insn) == MODE_V4SF)
5334 return "xorps\t%0, %0";
5335 else if (get_attr_mode (insn) == MODE_V2DF)
5336 return "xorpd\t%0, %0";
5337 else
5338 return "pxor\t%0, %0";
5339 case 2:
5340 return "pcmpeqd\t%0, %0";
5341 }
5342 gcc_unreachable ();
2b04e52b
JH
5343}
5344
2a2ab3f9
JVA
5345/* Returns 1 if OP contains a symbol reference */
5346
5347int
b96a374d 5348symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 5349{
8d531ab9
KH
5350 const char *fmt;
5351 int i;
2a2ab3f9
JVA
5352
5353 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5354 return 1;
5355
5356 fmt = GET_RTX_FORMAT (GET_CODE (op));
5357 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5358 {
5359 if (fmt[i] == 'E')
5360 {
8d531ab9 5361 int j;
2a2ab3f9
JVA
5362
5363 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5364 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5365 return 1;
5366 }
e9a25f70 5367
2a2ab3f9
JVA
5368 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5369 return 1;
5370 }
5371
5372 return 0;
5373}
e075ae69
RH
5374
5375/* Return 1 if it is appropriate to emit `ret' instructions in the
5376 body of a function. Do this only if the epilogue is simple, needing a
5377 couple of insns. Prior to reloading, we can't tell how many registers
5378 must be saved, so return 0 then. Return 0 if there is no frame
6e14af16 5379 marker to de-allocate. */
32b5b1aa
SC
5380
5381int
b96a374d 5382ix86_can_use_return_insn_p (void)
32b5b1aa 5383{
4dd2ac2c 5384 struct ix86_frame frame;
9a7372d6 5385
9a7372d6
RH
5386 if (! reload_completed || frame_pointer_needed)
5387 return 0;
32b5b1aa 5388
9a7372d6
RH
5389 /* Don't allow more than 32 pop, since that's all we can do
5390 with one instruction. */
5391 if (current_function_pops_args
5392 && current_function_args_size >= 32768)
e075ae69 5393 return 0;
32b5b1aa 5394
4dd2ac2c
JH
5395 ix86_compute_frame_layout (&frame);
5396 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 5397}
6189a572 5398\f
6fca22eb
RH
5399/* Value should be nonzero if functions must have frame pointers.
5400 Zero means the frame pointer need not be set up (and parms may
5401 be accessed via the stack pointer) in functions that seem suitable. */
5402
5403int
b96a374d 5404ix86_frame_pointer_required (void)
6fca22eb
RH
5405{
5406 /* If we accessed previous frames, then the generated code expects
5407 to be able to access the saved ebp value in our frame. */
5408 if (cfun->machine->accesses_prev_frame)
5409 return 1;
a4f31c00 5410
6fca22eb
RH
5411 /* Several x86 os'es need a frame pointer for other reasons,
5412 usually pertaining to setjmp. */
5413 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5414 return 1;
5415
5416 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5417 the frame pointer by default. Turn it back on now if we've not
5418 got a leaf function. */
a7943381 5419 if (TARGET_OMIT_LEAF_FRAME_POINTER
5bf5a10b
AO
5420 && (!current_function_is_leaf
5421 || ix86_current_function_calls_tls_descriptor))
55ba61f3
JH
5422 return 1;
5423
5424 if (current_function_profile)
6fca22eb
RH
5425 return 1;
5426
5427 return 0;
5428}
5429
5430/* Record that the current function accesses previous call frames. */
5431
5432void
b96a374d 5433ix86_setup_frame_addresses (void)
6fca22eb
RH
5434{
5435 cfun->machine->accesses_prev_frame = 1;
5436}
e075ae69 5437\f
7d072037 5438#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
145aacc2
RH
5439# define USE_HIDDEN_LINKONCE 1
5440#else
5441# define USE_HIDDEN_LINKONCE 0
5442#endif
5443
bd09bdeb 5444static int pic_labels_used;
e9a25f70 5445
145aacc2
RH
5446/* Fills in the label name that should be used for a pc thunk for
5447 the given register. */
5448
5449static void
b96a374d 5450get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2 5451{
f7288899
EC
5452 gcc_assert (!TARGET_64BIT);
5453
145aacc2
RH
5454 if (USE_HIDDEN_LINKONCE)
5455 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5456 else
5457 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5458}
5459
5460
e075ae69
RH
5461/* This function generates code for -fpic that loads %ebx with
5462 the return address of the caller and then returns. */
5463
5464void
b96a374d 5465ix86_file_end (void)
e075ae69
RH
5466{
5467 rtx xops[2];
bd09bdeb 5468 int regno;
32b5b1aa 5469
bd09bdeb 5470 for (regno = 0; regno < 8; ++regno)
7c262518 5471 {
145aacc2
RH
5472 char name[32];
5473
bd09bdeb
RH
5474 if (! ((pic_labels_used >> regno) & 1))
5475 continue;
5476
145aacc2 5477 get_pc_thunk_name (name, regno);
bd09bdeb 5478
7d072037
SH
5479#if TARGET_MACHO
5480 if (TARGET_MACHO)
5481 {
5482 switch_to_section (darwin_sections[text_coal_section]);
5483 fputs ("\t.weak_definition\t", asm_out_file);
5484 assemble_name (asm_out_file, name);
5485 fputs ("\n\t.private_extern\t", asm_out_file);
5486 assemble_name (asm_out_file, name);
5487 fputs ("\n", asm_out_file);
5488 ASM_OUTPUT_LABEL (asm_out_file, name);
5489 }
5490 else
5491#endif
145aacc2
RH
5492 if (USE_HIDDEN_LINKONCE)
5493 {
5494 tree decl;
5495
5496 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5497 error_mark_node);
5498 TREE_PUBLIC (decl) = 1;
5499 TREE_STATIC (decl) = 1;
5500 DECL_ONE_ONLY (decl) = 1;
5501
5502 (*targetm.asm_out.unique_section) (decl, 0);
d6b5193b 5503 switch_to_section (get_named_section (decl, NULL, 0));
145aacc2 5504
a5fe455b
ZW
5505 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5506 fputs ("\t.hidden\t", asm_out_file);
5507 assemble_name (asm_out_file, name);
5508 fputc ('\n', asm_out_file);
5509 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
5510 }
5511 else
5512 {
d6b5193b 5513 switch_to_section (text_section);
a5fe455b 5514 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 5515 }
bd09bdeb
RH
5516
5517 xops[0] = gen_rtx_REG (SImode, regno);
5518 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5519 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5520 output_asm_insn ("ret", xops);
7c262518 5521 }
3edc56a9 5522
a5fe455b
ZW
5523 if (NEED_INDICATE_EXEC_STACK)
5524 file_end_indicate_exec_stack ();
32b5b1aa 5525}
32b5b1aa 5526
c8c03509 5527/* Emit code for the SET_GOT patterns. */
32b5b1aa 5528
c8c03509 5529const char *
7d072037 5530output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
c8c03509
RH
5531{
5532 rtx xops[3];
0d7d98ee 5533
c8c03509 5534 xops[0] = dest;
170bdaba
RS
5535
5536 if (TARGET_VXWORKS_RTP && flag_pic)
5537 {
5538 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5539 xops[2] = gen_rtx_MEM (Pmode,
5540 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5541 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5542
5543 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5544 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5545 an unadorned address. */
5546 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5547 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5548 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5549 return "";
5550 }
5551
5fc0e5df 5552 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 5553
c8c03509 5554 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 5555 {
7d072037 5556 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
c8c03509
RH
5557
5558 if (!flag_pic)
5559 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5560 else
5561 output_asm_insn ("call\t%a2", xops);
5562
b069de3b 5563#if TARGET_MACHO
7d072037
SH
5564 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5565 is what will be referenced by the Mach-O PIC subsystem. */
5566 if (!label)
5567 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
b069de3b 5568#endif
7d072037 5569
4977bab6 5570 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
5571 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5572
5573 if (flag_pic)
5574 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 5575 }
e075ae69 5576 else
e5cb57e8 5577 {
145aacc2
RH
5578 char name[32];
5579 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 5580 pic_labels_used |= 1 << REGNO (dest);
f996902d 5581
145aacc2 5582 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
5583 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5584 output_asm_insn ("call\t%X2", xops);
7d072037
SH
5585 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5586 is what will be referenced by the Mach-O PIC subsystem. */
5587#if TARGET_MACHO
5588 if (!label)
5589 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
9795d9fd
AP
5590 else
5591 targetm.asm_out.internal_label (asm_out_file, "L",
5592 CODE_LABEL_NUMBER (label));
7d072037 5593#endif
e5cb57e8 5594 }
e5cb57e8 5595
7d072037
SH
5596 if (TARGET_MACHO)
5597 return "";
5598
c8c03509
RH
5599 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5600 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
7d072037 5601 else
4a8ce6ce 5602 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
79325812 5603
c8c03509 5604 return "";
e9a25f70 5605}
8dfe5673 5606
0d7d98ee 5607/* Generate an "push" pattern for input ARG. */
e9a25f70 5608
e075ae69 5609static rtx
b96a374d 5610gen_push (rtx arg)
e9a25f70 5611{
c5c76735 5612 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
5613 gen_rtx_MEM (Pmode,
5614 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
5615 stack_pointer_rtx)),
5616 arg);
e9a25f70
JL
5617}
5618
bd09bdeb
RH
5619/* Return >= 0 if there is an unused call-clobbered register available
5620 for the entire function. */
5621
5622static unsigned int
b96a374d 5623ix86_select_alt_pic_regnum (void)
bd09bdeb 5624{
5bf5a10b
AO
5625 if (current_function_is_leaf && !current_function_profile
5626 && !ix86_current_function_calls_tls_descriptor)
bd09bdeb
RH
5627 {
5628 int i;
5629 for (i = 2; i >= 0; --i)
6fb5fa3c 5630 if (!df_regs_ever_live_p (i))
bd09bdeb
RH
5631 return i;
5632 }
5633
5634 return INVALID_REGNUM;
5635}
fce5a9f2 5636
4dd2ac2c
JH
5637/* Return 1 if we need to save REGNO. */
5638static int
b96a374d 5639ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 5640{
bd09bdeb
RH
5641 if (pic_offset_table_rtx
5642 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6fb5fa3c 5643 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
66edd3b4 5644 || current_function_profile
8c38a24f
MM
5645 || current_function_calls_eh_return
5646 || current_function_uses_const_pool))
bd09bdeb
RH
5647 {
5648 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5649 return 0;
5650 return 1;
5651 }
1020a5ab
RH
5652
5653 if (current_function_calls_eh_return && maybe_eh_return)
5654 {
5655 unsigned i;
5656 for (i = 0; ; i++)
5657 {
b531087a 5658 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
5659 if (test == INVALID_REGNUM)
5660 break;
9b690711 5661 if (test == regno)
1020a5ab
RH
5662 return 1;
5663 }
5664 }
4dd2ac2c 5665
150cdc9e
RH
5666 if (cfun->machine->force_align_arg_pointer
5667 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5668 return 1;
5669
6fb5fa3c 5670 return (df_regs_ever_live_p (regno)
1020a5ab
RH
5671 && !call_used_regs[regno]
5672 && !fixed_regs[regno]
5673 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
5674}
5675
0903fcab
JH
5676/* Return number of registers to be saved on the stack. */
5677
5678static int
b96a374d 5679ix86_nsaved_regs (void)
0903fcab
JH
5680{
5681 int nregs = 0;
0903fcab
JH
5682 int regno;
5683
4dd2ac2c 5684 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5685 if (ix86_save_reg (regno, true))
4dd2ac2c 5686 nregs++;
0903fcab
JH
5687 return nregs;
5688}
5689
5690/* Return the offset between two registers, one to be eliminated, and the other
5691 its replacement, at the start of a routine. */
5692
5693HOST_WIDE_INT
b96a374d 5694ix86_initial_elimination_offset (int from, int to)
0903fcab 5695{
4dd2ac2c
JH
5696 struct ix86_frame frame;
5697 ix86_compute_frame_layout (&frame);
564d80f4
JH
5698
5699 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 5700 return frame.hard_frame_pointer_offset;
564d80f4
JH
5701 else if (from == FRAME_POINTER_REGNUM
5702 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 5703 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
5704 else
5705 {
d0396b79
NS
5706 gcc_assert (to == STACK_POINTER_REGNUM);
5707
5708 if (from == ARG_POINTER_REGNUM)
4dd2ac2c 5709 return frame.stack_pointer_offset;
5656a184 5710
d0396b79
NS
5711 gcc_assert (from == FRAME_POINTER_REGNUM);
5712 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
5713 }
5714}
5715
4dd2ac2c 5716/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 5717
4dd2ac2c 5718static void
b96a374d 5719ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 5720{
65954bd8 5721 HOST_WIDE_INT total_size;
95899b34 5722 unsigned int stack_alignment_needed;
b19ee4bd 5723 HOST_WIDE_INT offset;
95899b34 5724 unsigned int preferred_alignment;
4dd2ac2c 5725 HOST_WIDE_INT size = get_frame_size ();
65954bd8 5726
4dd2ac2c 5727 frame->nregs = ix86_nsaved_regs ();
564d80f4 5728 total_size = size;
65954bd8 5729
95899b34
RH
5730 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5731 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5732
d7394366
JH
5733 /* During reload iteration the amount of registers saved can change.
5734 Recompute the value as needed. Do not recompute when amount of registers
aabcd309 5735 didn't change as reload does multiple calls to the function and does not
d7394366
JH
5736 expect the decision to change within single iteration. */
5737 if (!optimize_size
5738 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
5739 {
5740 int count = frame->nregs;
5741
d7394366 5742 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
5743 /* The fast prologue uses move instead of push to save registers. This
5744 is significantly longer, but also executes faster as modern hardware
5745 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 5746
d9b40e8d
JH
5747 Be careful about choosing what prologue to emit: When function takes
5748 many instructions to execute we may use slow version as well as in
5749 case function is known to be outside hot spot (this is known with
5750 feedback only). Weight the size of function by number of registers
5751 to save as it is cheap to use one or two push instructions but very
5752 slow to use many of them. */
5753 if (count)
5754 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5755 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5756 || (flag_branch_probabilities
5757 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5758 cfun->machine->use_fast_prologue_epilogue = false;
5759 else
5760 cfun->machine->use_fast_prologue_epilogue
5761 = !expensive_function_p (count);
5762 }
5763 if (TARGET_PROLOGUE_USING_MOVE
5764 && cfun->machine->use_fast_prologue_epilogue)
5765 frame->save_regs_using_mov = true;
5766 else
5767 frame->save_regs_using_mov = false;
5768
5769
9ba81eaa 5770 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
5771 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5772
5773 frame->hard_frame_pointer_offset = offset;
564d80f4 5774
fcbfaa65
RK
5775 /* Do some sanity checking of stack_alignment_needed and
5776 preferred_alignment, since i386 port is the only using those features
f710504c 5777 that may break easily. */
564d80f4 5778
d0396b79
NS
5779 gcc_assert (!size || stack_alignment_needed);
5780 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5781 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5782 gcc_assert (stack_alignment_needed
5783 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
564d80f4 5784
4dd2ac2c
JH
5785 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5786 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 5787
4dd2ac2c
JH
5788 /* Register save area */
5789 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 5790
8362f420
JH
5791 /* Va-arg area */
5792 if (ix86_save_varrargs_registers)
5793 {
5794 offset += X86_64_VARARGS_SIZE;
5795 frame->va_arg_size = X86_64_VARARGS_SIZE;
5796 }
5797 else
5798 frame->va_arg_size = 0;
5799
4dd2ac2c
JH
5800 /* Align start of frame for local function. */
5801 frame->padding1 = ((offset + stack_alignment_needed - 1)
5802 & -stack_alignment_needed) - offset;
f73ad30e 5803
4dd2ac2c 5804 offset += frame->padding1;
65954bd8 5805
4dd2ac2c
JH
5806 /* Frame pointer points here. */
5807 frame->frame_pointer_offset = offset;
54ff41b7 5808
4dd2ac2c 5809 offset += size;
65954bd8 5810
0b7ae565 5811 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
5812 all the function calls as dead code.
5813 Skipping is however impossible when function calls alloca. Alloca
5814 expander assumes that last current_function_outgoing_args_size
5815 of stack frame are unused. */
5816 if (ACCUMULATE_OUTGOING_ARGS
5bf5a10b
AO
5817 && (!current_function_is_leaf || current_function_calls_alloca
5818 || ix86_current_function_calls_tls_descriptor))
4dd2ac2c
JH
5819 {
5820 offset += current_function_outgoing_args_size;
5821 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5822 }
5823 else
5824 frame->outgoing_arguments_size = 0;
564d80f4 5825
002ff5bc
RH
5826 /* Align stack boundary. Only needed if we're calling another function
5827 or using alloca. */
5bf5a10b
AO
5828 if (!current_function_is_leaf || current_function_calls_alloca
5829 || ix86_current_function_calls_tls_descriptor)
0b7ae565
RH
5830 frame->padding2 = ((offset + preferred_alignment - 1)
5831 & -preferred_alignment) - offset;
5832 else
5833 frame->padding2 = 0;
4dd2ac2c
JH
5834
5835 offset += frame->padding2;
5836
5837 /* We've reached end of stack frame. */
5838 frame->stack_pointer_offset = offset;
5839
5840 /* Size prologue needs to allocate. */
5841 frame->to_allocate =
5842 (size + frame->padding1 + frame->padding2
8362f420 5843 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5844
b19ee4bd
JJ
5845 if ((!frame->to_allocate && frame->nregs <= 1)
5846 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5847 frame->save_regs_using_mov = false;
5848
a5b378d6 5849 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5bf5a10b
AO
5850 && current_function_is_leaf
5851 && !ix86_current_function_calls_tls_descriptor)
8362f420
JH
5852 {
5853 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5854 if (frame->save_regs_using_mov)
5855 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5856 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5857 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5858 }
5859 else
5860 frame->red_zone_size = 0;
5861 frame->to_allocate -= frame->red_zone_size;
5862 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c 5863#if 0
7874f14d
MM
5864 fprintf (stderr, "\n");
5865 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5866 fprintf (stderr, "size: %ld\n", (long)size);
5867 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5868 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5869 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5870 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5871 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5872 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5873 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5874 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5875 (long)frame->hard_frame_pointer_offset);
5876 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5877 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5878 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5879 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
4dd2ac2c 5880#endif
65954bd8
JL
5881}
5882
0903fcab
JH
5883/* Emit code to save registers in the prologue. */
5884
5885static void
b96a374d 5886ix86_emit_save_regs (void)
0903fcab 5887{
150cdc9e 5888 unsigned int regno;
0903fcab 5889 rtx insn;
0903fcab 5890
150cdc9e 5891 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
1020a5ab 5892 if (ix86_save_reg (regno, true))
0903fcab 5893 {
0d7d98ee 5894 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5895 RTX_FRAME_RELATED_P (insn) = 1;
5896 }
5897}
5898
c6036a37
JH
5899/* Emit code to save registers using MOV insns. First register
5900 is restored from POINTER + OFFSET. */
5901static void
b96a374d 5902ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37 5903{
150cdc9e 5904 unsigned int regno;
c6036a37
JH
5905 rtx insn;
5906
5907 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5908 if (ix86_save_reg (regno, true))
5909 {
b72f00af
RK
5910 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5911 Pmode, offset),
c6036a37
JH
5912 gen_rtx_REG (Pmode, regno));
5913 RTX_FRAME_RELATED_P (insn) = 1;
5914 offset += UNITS_PER_WORD;
5915 }
5916}
5917
839a4992 5918/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
5919 The pattern exist to put a dependency on all ebp-based memory accesses.
5920 STYLE should be negative if instructions should be marked as frame related,
5921 zero if %r11 register is live and cannot be freely used and positive
5922 otherwise. */
5923
5924static void
5925pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5926{
5927 rtx insn;
5928
5929 if (! TARGET_64BIT)
5930 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5931 else if (x86_64_immediate_operand (offset, DImode))
5932 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5933 else
5934 {
5935 rtx r11;
5936 /* r11 is used by indirect sibcall return as well, set before the
5937 epilogue and used after the epilogue. ATM indirect sibcall
5938 shouldn't be used together with huge frame sizes in one
5939 function because of the frame_size check in sibcall.c. */
d0396b79 5940 gcc_assert (style);
3c4ace25 5941 r11 = gen_rtx_REG (DImode, R11_REG);
b19ee4bd
JJ
5942 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5943 if (style < 0)
5944 RTX_FRAME_RELATED_P (insn) = 1;
5945 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5946 offset));
5947 }
5948 if (style < 0)
5949 RTX_FRAME_RELATED_P (insn) = 1;
5950}
5951
150cdc9e
RH
5952/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5953
5954static rtx
5955ix86_internal_arg_pointer (void)
5956{
33932946
SH
5957 bool has_force_align_arg_pointer =
5958 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5959 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5960 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5961 && DECL_NAME (current_function_decl)
5962 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5963 && DECL_FILE_SCOPE_P (current_function_decl))
5964 || ix86_force_align_arg_pointer
5965 || has_force_align_arg_pointer)
150cdc9e 5966 {
33932946
SH
5967 /* Nested functions can't realign the stack due to a register
5968 conflict. */
5969 if (DECL_CONTEXT (current_function_decl)
5970 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5971 {
5972 if (ix86_force_align_arg_pointer)
5973 warning (0, "-mstackrealign ignored for nested functions");
5974 if (has_force_align_arg_pointer)
5975 error ("%s not supported for nested functions",
5976 ix86_force_align_arg_pointer_string);
5977 return virtual_incoming_args_rtx;
5978 }
150cdc9e
RH
5979 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5980 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5981 }
5982 else
5983 return virtual_incoming_args_rtx;
5984}
5985
5986/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5987 This is called from dwarf2out.c to emit call frame instructions
5988 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5989static void
5990ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5991{
5992 rtx unspec = SET_SRC (pattern);
5993 gcc_assert (GET_CODE (unspec) == UNSPEC);
5994
5995 switch (index)
5996 {
5997 case UNSPEC_REG_SAVE:
5998 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5999 SET_DEST (pattern));
6000 break;
6001 case UNSPEC_DEF_CFA:
6002 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6003 INTVAL (XVECEXP (unspec, 0, 0)));
6004 break;
6005 default:
6006 gcc_unreachable ();
6007 }
6008}
6009
0f290768 6010/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
6011
6012void
b96a374d 6013ix86_expand_prologue (void)
2a2ab3f9 6014{
564d80f4 6015 rtx insn;
bd09bdeb 6016 bool pic_reg_used;
4dd2ac2c 6017 struct ix86_frame frame;
c6036a37 6018 HOST_WIDE_INT allocate;
4dd2ac2c 6019
4977bab6 6020 ix86_compute_frame_layout (&frame);
79325812 6021
150cdc9e
RH
6022 if (cfun->machine->force_align_arg_pointer)
6023 {
6024 rtx x, y;
6025
6026 /* Grab the argument pointer. */
6027 x = plus_constant (stack_pointer_rtx, 4);
6028 y = cfun->machine->force_align_arg_pointer;
6029 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6030 RTX_FRAME_RELATED_P (insn) = 1;
6031
6032 /* The unwind info consists of two parts: install the fafp as the cfa,
6033 and record the fafp as the "save register" of the stack pointer.
6034 The later is there in order that the unwinder can see where it
6035 should restore the stack pointer across the and insn. */
6036 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6037 x = gen_rtx_SET (VOIDmode, y, x);
6038 RTX_FRAME_RELATED_P (x) = 1;
6039 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6040 UNSPEC_REG_SAVE);
6041 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6042 RTX_FRAME_RELATED_P (y) = 1;
6043 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6044 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6045 REG_NOTES (insn) = x;
6046
6047 /* Align the stack. */
6048 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6049 GEN_INT (-16)));
6050
6051 /* And here we cheat like madmen with the unwind info. We force the
6052 cfa register back to sp+4, which is exactly what it was at the
6053 start of the function. Re-pushing the return address results in
5656a184 6054 the return at the same spot relative to the cfa, and thus is
150cdc9e
RH
6055 correct wrt the unwind info. */
6056 x = cfun->machine->force_align_arg_pointer;
6057 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6058 insn = emit_insn (gen_push (x));
6059 RTX_FRAME_RELATED_P (insn) = 1;
6060
6061 x = GEN_INT (4);
6062 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6063 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6064 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6065 REG_NOTES (insn) = x;
6066 }
6067
e075ae69
RH
6068 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6069 slower on all targets. Also sdb doesn't like it. */
e9a25f70 6070
2a2ab3f9
JVA
6071 if (frame_pointer_needed)
6072 {
564d80f4 6073 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 6074 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 6075
564d80f4 6076 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 6077 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
6078 }
6079
c6036a37 6080 allocate = frame.to_allocate;
c6036a37 6081
d9b40e8d 6082 if (!frame.save_regs_using_mov)
c6036a37
JH
6083 ix86_emit_save_regs ();
6084 else
6085 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 6086
d9b40e8d
JH
6087 /* When using red zone we may start register saving before allocating
6088 the stack frame saving one cycle of the prologue. */
6089 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6090 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6091 : stack_pointer_rtx,
6092 -frame.nregs * UNITS_PER_WORD);
6093
c6036a37 6094 if (allocate == 0)
8dfe5673 6095 ;
e323735c 6096 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
6097 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6098 GEN_INT (-allocate), -1);
79325812 6099 else
8dfe5673 6100 {
fe9f516f 6101 /* Only valid for Win32. */
ccf8e764
RH
6102 rtx eax = gen_rtx_REG (Pmode, 0);
6103 bool eax_live;
5fc94ac4 6104 rtx t;
e9a25f70 6105
ccf8e764
RH
6106 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6107
6108 if (TARGET_64BIT_MS_ABI)
6109 eax_live = false;
6110 else
6111 eax_live = ix86_eax_live_at_start_p ();
e075ae69 6112
fe9f516f
RH
6113 if (eax_live)
6114 {
6115 emit_insn (gen_push (eax));
ccf8e764 6116 allocate -= UNITS_PER_WORD;
fe9f516f
RH
6117 }
6118
5fc94ac4 6119 emit_move_insn (eax, GEN_INT (allocate));
98417968 6120
ccf8e764
RH
6121 if (TARGET_64BIT)
6122 insn = gen_allocate_stack_worker_64 (eax);
6123 else
6124 insn = gen_allocate_stack_worker_32 (eax);
6125 insn = emit_insn (insn);
b1177d69 6126 RTX_FRAME_RELATED_P (insn) = 1;
5fc94ac4
RH
6127 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6128 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6129 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6130 t, REG_NOTES (insn));
fe9f516f
RH
6131
6132 if (eax_live)
6133 {
ea5f7a19
RS
6134 if (frame_pointer_needed)
6135 t = plus_constant (hard_frame_pointer_rtx,
6136 allocate
6137 - frame.to_allocate
6138 - frame.nregs * UNITS_PER_WORD);
6139 else
6140 t = plus_constant (stack_pointer_rtx, allocate);
ccf8e764 6141 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
fe9f516f 6142 }
e075ae69 6143 }
fe9f516f 6144
d9b40e8d 6145 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
6146 {
6147 if (!frame_pointer_needed || !frame.to_allocate)
6148 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6149 else
6150 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6151 -frame.nregs * UNITS_PER_WORD);
6152 }
e9a25f70 6153
bd09bdeb
RH
6154 pic_reg_used = false;
6155 if (pic_offset_table_rtx
6fb5fa3c 6156 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
bd09bdeb
RH
6157 || current_function_profile))
6158 {
6159 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6160
6161 if (alt_pic_reg_used != INVALID_REGNUM)
6fb5fa3c 6162 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
bd09bdeb
RH
6163
6164 pic_reg_used = true;
6165 }
6166
e9a25f70 6167 if (pic_reg_used)
c8c03509 6168 {
7dcbf659 6169 if (TARGET_64BIT)
dc4d7240
JH
6170 {
6171 if (ix86_cmodel == CM_LARGE_PIC)
6172 {
6173 rtx tmp_reg = gen_rtx_REG (DImode,
6174 FIRST_REX_INT_REG + 3 /* R11 */);
6175 rtx label = gen_label_rtx ();
6176 emit_label (label);
6177 LABEL_PRESERVE_P (label) = 1;
6178 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6179 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
dc4d7240 6180 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
dc4d7240
JH
6181 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6182 pic_offset_table_rtx, tmp_reg));
6183 }
6184 else
6185 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6186 }
7dcbf659
JH
6187 else
6188 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
c8c03509 6189 }
77a989d1 6190
66edd3b4
RH
6191 /* Prevent function calls from be scheduled before the call to mcount.
6192 In the pic_reg_used case, make sure that the got load isn't deleted. */
6193 if (current_function_profile)
6fb5fa3c
DB
6194 {
6195 if (pic_reg_used)
6196 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6197 emit_insn (gen_blockage ());
6198 }
77a989d1
SC
6199}
6200
da2d1d3a
JH
6201/* Emit code to restore saved registers using MOV insns. First register
6202 is restored from POINTER + OFFSET. */
6203static void
72613dfa
JH
6204ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6205 int maybe_eh_return)
da2d1d3a
JH
6206{
6207 int regno;
72613dfa 6208 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 6209
4dd2ac2c 6210 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 6211 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 6212 {
72613dfa
JH
6213 /* Ensure that adjust_address won't be forced to produce pointer
6214 out of range allowed by x86-64 instruction set. */
6215 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6216 {
6217 rtx r11;
6218
3c4ace25 6219 r11 = gen_rtx_REG (DImode, R11_REG);
72613dfa
JH
6220 emit_move_insn (r11, GEN_INT (offset));
6221 emit_insn (gen_adddi3 (r11, r11, pointer));
6222 base_address = gen_rtx_MEM (Pmode, r11);
6223 offset = 0;
6224 }
4dd2ac2c 6225 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 6226 adjust_address (base_address, Pmode, offset));
4dd2ac2c 6227 offset += UNITS_PER_WORD;
da2d1d3a
JH
6228 }
6229}
6230
0f290768 6231/* Restore function stack, frame, and registers. */
e9a25f70 6232
2a2ab3f9 6233void
b96a374d 6234ix86_expand_epilogue (int style)
2a2ab3f9 6235{
1c71e60e 6236 int regno;
fdb8a883 6237 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 6238 struct ix86_frame frame;
65954bd8 6239 HOST_WIDE_INT offset;
4dd2ac2c
JH
6240
6241 ix86_compute_frame_layout (&frame);
2a2ab3f9 6242
a4f31c00 6243 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
6244 must be taken for the normal return case of a function using
6245 eh_return: the eax and edx registers are marked as saved, but not
6246 restored along this path. */
6247 offset = frame.nregs;
6248 if (current_function_calls_eh_return && style != 2)
6249 offset -= 2;
6250 offset *= -UNITS_PER_WORD;
2a2ab3f9 6251
fdb8a883
JW
6252 /* If we're only restoring one register and sp is not valid then
6253 using a move instruction to restore the register since it's
0f290768 6254 less work than reloading sp and popping the register.
da2d1d3a
JH
6255
6256 The default code result in stack adjustment using add/lea instruction,
6257 while this code results in LEAVE instruction (or discrete equivalent),
6258 so it is profitable in some other cases as well. Especially when there
6259 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 6260 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 6261 tuning in future. */
4dd2ac2c 6262 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 6263 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 6264 && cfun->machine->use_fast_prologue_epilogue
c6036a37 6265 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 6266 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 6267 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
6268 && cfun->machine->use_fast_prologue_epilogue
6269 && frame.nregs == 1)
2ab0437e 6270 || current_function_calls_eh_return)
2a2ab3f9 6271 {
da2d1d3a
JH
6272 /* Restore registers. We can use ebp or esp to address the memory
6273 locations. If both are available, default to ebp, since offsets
6274 are known to be small. Only exception is esp pointing directly to the
6275 end of block of saved registers, where we may simplify addressing
6276 mode. */
6277
4dd2ac2c 6278 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
6279 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6280 frame.to_allocate, style == 2);
da2d1d3a 6281 else
1020a5ab
RH
6282 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6283 offset, style == 2);
6284
6285 /* eh_return epilogues need %ecx added to the stack pointer. */
6286 if (style == 2)
6287 {
6288 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 6289
1020a5ab
RH
6290 if (frame_pointer_needed)
6291 {
6292 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6293 tmp = plus_constant (tmp, UNITS_PER_WORD);
6294 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6295
6296 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6297 emit_move_insn (hard_frame_pointer_rtx, tmp);
6298
b19ee4bd
JJ
6299 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6300 const0_rtx, style);
1020a5ab
RH
6301 }
6302 else
6303 {
6304 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6305 tmp = plus_constant (tmp, (frame.to_allocate
6306 + frame.nregs * UNITS_PER_WORD));
6307 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6308 }
6309 }
6310 else if (!frame_pointer_needed)
b19ee4bd
JJ
6311 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6312 GEN_INT (frame.to_allocate
6313 + frame.nregs * UNITS_PER_WORD),
6314 style);
0f290768 6315 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
6316 else if (TARGET_USE_LEAVE || optimize_size
6317 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 6318 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 6319 else
2a2ab3f9 6320 {
b19ee4bd
JJ
6321 pro_epilogue_adjust_stack (stack_pointer_rtx,
6322 hard_frame_pointer_rtx,
6323 const0_rtx, style);
8362f420
JH
6324 if (TARGET_64BIT)
6325 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6326 else
6327 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
6328 }
6329 }
1c71e60e 6330 else
68f654ec 6331 {
1c71e60e
JH
6332 /* First step is to deallocate the stack frame so that we can
6333 pop the registers. */
6334 if (!sp_valid)
6335 {
d0396b79 6336 gcc_assert (frame_pointer_needed);
b19ee4bd
JJ
6337 pro_epilogue_adjust_stack (stack_pointer_rtx,
6338 hard_frame_pointer_rtx,
6339 GEN_INT (offset), style);
1c71e60e 6340 }
4dd2ac2c 6341 else if (frame.to_allocate)
b19ee4bd
JJ
6342 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6343 GEN_INT (frame.to_allocate), style);
1c71e60e 6344
4dd2ac2c 6345 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 6346 if (ix86_save_reg (regno, false))
8362f420
JH
6347 {
6348 if (TARGET_64BIT)
6349 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6350 else
6351 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6352 }
4dd2ac2c 6353 if (frame_pointer_needed)
8362f420 6354 {
f5143c46 6355 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
6356 able to grok it fast. */
6357 if (TARGET_USE_LEAVE)
6358 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6359 else if (TARGET_64BIT)
8362f420
JH
6360 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6361 else
6362 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6363 }
68f654ec 6364 }
68f654ec 6365
150cdc9e
RH
6366 if (cfun->machine->force_align_arg_pointer)
6367 {
6368 emit_insn (gen_addsi3 (stack_pointer_rtx,
6369 cfun->machine->force_align_arg_pointer,
6370 GEN_INT (-4)));
6371 }
6372
cbbf65e0 6373 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 6374 if (style == 0)
cbbf65e0
RH
6375 return;
6376
2a2ab3f9
JVA
6377 if (current_function_pops_args && current_function_args_size)
6378 {
e075ae69 6379 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 6380
b8c752c8
UD
6381 /* i386 can only pop 64K bytes. If asked to pop more, pop
6382 return address, do explicit add, and jump indirectly to the
0f290768 6383 caller. */
2a2ab3f9 6384
b8c752c8 6385 if (current_function_pops_args >= 65536)
2a2ab3f9 6386 {
e075ae69 6387 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 6388
ccf8e764 6389 /* There is no "pascal" calling convention in any 64bit ABI. */
d0396b79 6390 gcc_assert (!TARGET_64BIT);
8362f420 6391
e075ae69
RH
6392 emit_insn (gen_popsi1 (ecx));
6393 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 6394 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 6395 }
79325812 6396 else
e075ae69
RH
6397 emit_jump_insn (gen_return_pop_internal (popc));
6398 }
6399 else
6400 emit_jump_insn (gen_return_internal ());
6401}
bd09bdeb
RH
6402
6403/* Reset from the function's potential modifications. */
6404
6405static void
b96a374d
AJ
6406ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6407 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
6408{
6409 if (pic_offset_table_rtx)
6fb5fa3c 6410 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
f8c2645c
AL
6411#if TARGET_MACHO
6412 /* Mach-O doesn't support labels at the end of objects, so if
6413 it looks like we might want one, insert a NOP. */
6414 {
6415 rtx insn = get_last_insn ();
6416 while (insn
6417 && NOTE_P (insn)
a38e7aa5 6418 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
f8c2645c
AL
6419 insn = PREV_INSN (insn);
6420 if (insn
6421 && (LABEL_P (insn)
6422 || (NOTE_P (insn)
a38e7aa5 6423 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
f8c2645c
AL
6424 fputs ("\tnop\n", file);
6425 }
6426#endif
6427
bd09bdeb 6428}
e075ae69
RH
6429\f
6430/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
6431 for an instruction. Return 0 if the structure of the address is
6432 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 6433 strictly valid, but still used for computing length of lea instruction. */
e075ae69 6434
8fe75e43 6435int
8d531ab9 6436ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69 6437{
7c93c2cc
PB
6438 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6439 rtx base_reg, index_reg;
e075ae69
RH
6440 HOST_WIDE_INT scale = 1;
6441 rtx scale_rtx = NULL_RTX;
b446e5a2 6442 int retval = 1;
74dc3e94 6443 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 6444
7656aee4 6445 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
6446 base = addr;
6447 else if (GET_CODE (addr) == PLUS)
6448 {
74dc3e94
RH
6449 rtx addends[4], op;
6450 int n = 0, i;
e075ae69 6451
74dc3e94
RH
6452 op = addr;
6453 do
e075ae69 6454 {
74dc3e94
RH
6455 if (n >= 4)
6456 return 0;
6457 addends[n++] = XEXP (op, 1);
6458 op = XEXP (op, 0);
2a2ab3f9 6459 }
74dc3e94
RH
6460 while (GET_CODE (op) == PLUS);
6461 if (n >= 4)
6462 return 0;
6463 addends[n] = op;
6464
6465 for (i = n; i >= 0; --i)
e075ae69 6466 {
74dc3e94
RH
6467 op = addends[i];
6468 switch (GET_CODE (op))
6469 {
6470 case MULT:
6471 if (index)
6472 return 0;
6473 index = XEXP (op, 0);
6474 scale_rtx = XEXP (op, 1);
6475 break;
6476
6477 case UNSPEC:
6478 if (XINT (op, 1) == UNSPEC_TP
6479 && TARGET_TLS_DIRECT_SEG_REFS
6480 && seg == SEG_DEFAULT)
6481 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6482 else
6483 return 0;
6484 break;
6485
6486 case REG:
6487 case SUBREG:
6488 if (!base)
6489 base = op;
6490 else if (!index)
6491 index = op;
6492 else
6493 return 0;
6494 break;
6495
6496 case CONST:
6497 case CONST_INT:
6498 case SYMBOL_REF:
6499 case LABEL_REF:
6500 if (disp)
6501 return 0;
6502 disp = op;
6503 break;
6504
6505 default:
6506 return 0;
6507 }
e075ae69 6508 }
e075ae69
RH
6509 }
6510 else if (GET_CODE (addr) == MULT)
6511 {
6512 index = XEXP (addr, 0); /* index*scale */
6513 scale_rtx = XEXP (addr, 1);
6514 }
6515 else if (GET_CODE (addr) == ASHIFT)
6516 {
6517 rtx tmp;
6518
6519 /* We're called for lea too, which implements ashift on occasion. */
6520 index = XEXP (addr, 0);
6521 tmp = XEXP (addr, 1);
7656aee4 6522 if (!CONST_INT_P (tmp))
b446e5a2 6523 return 0;
e075ae69
RH
6524 scale = INTVAL (tmp);
6525 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 6526 return 0;
e075ae69 6527 scale = 1 << scale;
b446e5a2 6528 retval = -1;
2a2ab3f9 6529 }
2a2ab3f9 6530 else
e075ae69
RH
6531 disp = addr; /* displacement */
6532
6533 /* Extract the integral value of scale. */
6534 if (scale_rtx)
e9a25f70 6535 {
7656aee4 6536 if (!CONST_INT_P (scale_rtx))
b446e5a2 6537 return 0;
e075ae69 6538 scale = INTVAL (scale_rtx);
e9a25f70 6539 }
3b3c6a3f 6540
7c93c2cc
PB
6541 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6542 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6543
74dc3e94 6544 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7c93c2cc
PB
6545 if (base_reg && index_reg && scale == 1
6546 && (index_reg == arg_pointer_rtx
6547 || index_reg == frame_pointer_rtx
6548 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
e075ae69 6549 {
7c93c2cc
PB
6550 rtx tmp;
6551 tmp = base, base = index, index = tmp;
6552 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
e075ae69
RH
6553 }
6554
6555 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7c93c2cc
PB
6556 if ((base_reg == hard_frame_pointer_rtx
6557 || base_reg == frame_pointer_rtx
6558 || base_reg == arg_pointer_rtx) && !disp)
e075ae69
RH
6559 disp = const0_rtx;
6560
6561 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6562 Avoid this by transforming to [%esi+0]. */
9e555526 6563 if (ix86_tune == PROCESSOR_K6 && !optimize_size
7c93c2cc
PB
6564 && base_reg && !index_reg && !disp
6565 && REG_P (base_reg)
6566 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
e075ae69
RH
6567 disp = const0_rtx;
6568
6569 /* Special case: encode reg+reg instead of reg*2. */
6570 if (!base && index && scale && scale == 2)
7c93c2cc 6571 base = index, base_reg = index_reg, scale = 1;
0f290768 6572
e075ae69
RH
6573 /* Special case: scaling cannot be encoded without base or displacement. */
6574 if (!base && !disp && index && scale != 1)
6575 disp = const0_rtx;
6576
6577 out->base = base;
6578 out->index = index;
6579 out->disp = disp;
6580 out->scale = scale;
74dc3e94 6581 out->seg = seg;
3b3c6a3f 6582
b446e5a2 6583 return retval;
e075ae69 6584}
01329426
JH
6585\f
6586/* Return cost of the memory address x.
6587 For i386, it is better to use a complex address than let gcc copy
6588 the address into a reg and make a new pseudo. But not if the address
6589 requires to two regs - that would mean more pseudos with longer
6590 lifetimes. */
dcefdf67 6591static int
b96a374d 6592ix86_address_cost (rtx x)
01329426
JH
6593{
6594 struct ix86_address parts;
6595 int cost = 1;
d0396b79 6596 int ok = ix86_decompose_address (x, &parts);
3b3c6a3f 6597
d0396b79 6598 gcc_assert (ok);
01329426 6599
7c93c2cc
PB
6600 if (parts.base && GET_CODE (parts.base) == SUBREG)
6601 parts.base = SUBREG_REG (parts.base);
6602 if (parts.index && GET_CODE (parts.index) == SUBREG)
6603 parts.index = SUBREG_REG (parts.index);
6604
01329426
JH
6605 /* Attempt to minimize number of registers in the address. */
6606 if ((parts.base
6607 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6608 || (parts.index
6609 && (!REG_P (parts.index)
6610 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6611 cost++;
6612
6613 if (parts.base
6614 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6615 && parts.index
6616 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6617 && parts.base != parts.index)
6618 cost++;
6619
6620 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6621 since it's predecode logic can't detect the length of instructions
6622 and it degenerates to vector decoded. Increase cost of such
6623 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 6624 to split such addresses or even refuse such addresses at all.
01329426
JH
6625
6626 Following addressing modes are affected:
6627 [base+scale*index]
6628 [scale*index+disp]
6629 [base+index]
0f290768 6630
01329426
JH
6631 The first and last case may be avoidable by explicitly coding the zero in
6632 memory address, but I don't have AMD-K6 machine handy to check this
6633 theory. */
6634
6635 if (TARGET_K6
6636 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6637 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6638 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6639 cost += 10;
0f290768 6640
01329426
JH
6641 return cost;
6642}
6643\f
2ed941ec
RH
6644/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6645 this is used for to form addresses to local data when -fPIC is in
6646 use. */
828a4fe4
MS
6647
6648static bool
6649darwin_local_data_pic (rtx disp)
6650{
6651 if (GET_CODE (disp) == MINUS)
6652 {
6653 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6654 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6655 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6656 {
6657 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6658 if (! strcmp (sym_name, "<pic base>"))
6659 return true;
6660 }
6661 }
6662
6663 return false;
6664}
2ed941ec 6665
f996902d
RH
6666/* Determine if a given RTX is a valid constant. We already know this
6667 satisfies CONSTANT_P. */
6668
6669bool
b96a374d 6670legitimate_constant_p (rtx x)
f996902d 6671{
f996902d
RH
6672 switch (GET_CODE (x))
6673 {
f996902d 6674 case CONST:
1e19ac74 6675 x = XEXP (x, 0);
f996902d 6676
1e19ac74 6677 if (GET_CODE (x) == PLUS)
828a4fe4 6678 {
7656aee4 6679 if (!CONST_INT_P (XEXP (x, 1)))
828a4fe4 6680 return false;
1e19ac74 6681 x = XEXP (x, 0);
828a4fe4
MS
6682 }
6683
1e19ac74 6684 if (TARGET_MACHO && darwin_local_data_pic (x))
828a4fe4
MS
6685 return true;
6686
f996902d 6687 /* Only some unspecs are valid as "constants". */
1e19ac74
RH
6688 if (GET_CODE (x) == UNSPEC)
6689 switch (XINT (x, 1))
f996902d 6690 {
dc4d7240 6691 case UNSPEC_GOT:
7dcbf659 6692 case UNSPEC_GOTOFF:
dc4d7240 6693 case UNSPEC_PLTOFF:
7dcbf659 6694 return TARGET_64BIT;
f996902d 6695 case UNSPEC_TPOFF:
cb0e3e3f 6696 case UNSPEC_NTPOFF:
fd4aca96
RH
6697 x = XVECEXP (x, 0, 0);
6698 return (GET_CODE (x) == SYMBOL_REF
6699 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
cb0e3e3f 6700 case UNSPEC_DTPOFF:
fd4aca96
RH
6701 x = XVECEXP (x, 0, 0);
6702 return (GET_CODE (x) == SYMBOL_REF
6703 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
f996902d
RH
6704 default:
6705 return false;
6706 }
1e19ac74
RH
6707
6708 /* We must have drilled down to a symbol. */
fd4aca96
RH
6709 if (GET_CODE (x) == LABEL_REF)
6710 return true;
6711 if (GET_CODE (x) != SYMBOL_REF)
1e19ac74
RH
6712 return false;
6713 /* FALLTHRU */
6714
6715 case SYMBOL_REF:
6716 /* TLS symbols are never valid. */
fd4aca96 6717 if (SYMBOL_REF_TLS_MODEL (x))
1e19ac74 6718 return false;
da489f73
RH
6719
6720 /* DLLIMPORT symbols are never valid. */
6721 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6722 && SYMBOL_REF_DLLIMPORT_P (x))
6723 return false;
f996902d
RH
6724 break;
6725
d0b89852
RS
6726 case CONST_DOUBLE:
6727 if (GET_MODE (x) == TImode
6728 && x != CONST0_RTX (TImode)
6729 && !TARGET_64BIT)
6730 return false;
6731 break;
6732
6733 case CONST_VECTOR:
6734 if (x == CONST0_RTX (GET_MODE (x)))
6735 return true;
6736 return false;
6737
f996902d
RH
6738 default:
6739 break;
6740 }
6741
6742 /* Otherwise we handle everything else in the move patterns. */
6743 return true;
6744}
6745
3a04ff64
RH
6746/* Determine if it's legal to put X into the constant pool. This
6747 is not possible for the address of thread-local symbols, which
6748 is checked above. */
6749
6750static bool
b96a374d 6751ix86_cannot_force_const_mem (rtx x)
3a04ff64 6752{
d0b89852
RS
6753 /* We can always put integral constants and vectors in memory. */
6754 switch (GET_CODE (x))
6755 {
6756 case CONST_INT:
6757 case CONST_DOUBLE:
6758 case CONST_VECTOR:
6759 return false;
6760
6761 default:
6762 break;
6763 }
3a04ff64
RH
6764 return !legitimate_constant_p (x);
6765}
6766
f996902d
RH
6767/* Determine if a given RTX is a valid constant address. */
6768
6769bool
b96a374d 6770constant_address_p (rtx x)
f996902d 6771{
a94f136b 6772 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
6773}
6774
6775/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 6776 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
6777 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6778
6779bool
b96a374d 6780legitimate_pic_operand_p (rtx x)
f996902d
RH
6781{
6782 rtx inner;
6783
6784 switch (GET_CODE (x))
6785 {
6786 case CONST:
6787 inner = XEXP (x, 0);
7dcbf659 6788 if (GET_CODE (inner) == PLUS
7656aee4 6789 && CONST_INT_P (XEXP (inner, 1)))
7dcbf659 6790 inner = XEXP (inner, 0);
f996902d
RH
6791
6792 /* Only some unspecs are valid as "constants". */
6793 if (GET_CODE (inner) == UNSPEC)
6794 switch (XINT (inner, 1))
6795 {
dc4d7240 6796 case UNSPEC_GOT:
7dcbf659 6797 case UNSPEC_GOTOFF:
dc4d7240 6798 case UNSPEC_PLTOFF:
7dcbf659 6799 return TARGET_64BIT;
f996902d 6800 case UNSPEC_TPOFF:
fd4aca96
RH
6801 x = XVECEXP (inner, 0, 0);
6802 return (GET_CODE (x) == SYMBOL_REF
6803 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
f996902d
RH
6804 default:
6805 return false;
6806 }
5efb1046 6807 /* FALLTHRU */
f996902d
RH
6808
6809 case SYMBOL_REF:
6810 case LABEL_REF:
6811 return legitimate_pic_address_disp_p (x);
6812
6813 default:
6814 return true;
6815 }
6816}
6817
e075ae69
RH
6818/* Determine if a given CONST RTX is a valid memory displacement
6819 in PIC mode. */
0f290768 6820
59be65f6 6821int
8d531ab9 6822legitimate_pic_address_disp_p (rtx disp)
91bb873f 6823{
f996902d
RH
6824 bool saw_plus;
6825
6eb791fc
JH
6826 /* In 64bit mode we can allow direct addresses of symbols and labels
6827 when they are not dynamic symbols. */
c05dbe81
JH
6828 if (TARGET_64BIT)
6829 {
fd4aca96
RH
6830 rtx op0 = disp, op1;
6831
6832 switch (GET_CODE (disp))
a132b6a8 6833 {
fd4aca96
RH
6834 case LABEL_REF:
6835 return true;
6836
6837 case CONST:
6838 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6839 break;
6840 op0 = XEXP (XEXP (disp, 0), 0);
6841 op1 = XEXP (XEXP (disp, 0), 1);
7656aee4 6842 if (!CONST_INT_P (op1)
fd4aca96
RH
6843 || INTVAL (op1) >= 16*1024*1024
6844 || INTVAL (op1) < -16*1024*1024)
f7288899 6845 break;
fd4aca96
RH
6846 if (GET_CODE (op0) == LABEL_REF)
6847 return true;
6848 if (GET_CODE (op0) != SYMBOL_REF)
6849 break;
6850 /* FALLTHRU */
a132b6a8 6851
fd4aca96 6852 case SYMBOL_REF:
a132b6a8 6853 /* TLS references should always be enclosed in UNSPEC. */
fd4aca96
RH
6854 if (SYMBOL_REF_TLS_MODEL (op0))
6855 return false;
dc4d7240
JH
6856 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6857 && ix86_cmodel != CM_LARGE_PIC)
fd4aca96
RH
6858 return true;
6859 break;
6860
6861 default:
6862 break;
a132b6a8 6863 }
c05dbe81 6864 }
91bb873f
RH
6865 if (GET_CODE (disp) != CONST)
6866 return 0;
6867 disp = XEXP (disp, 0);
6868
6eb791fc
JH
6869 if (TARGET_64BIT)
6870 {
6871 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6872 of GOT tables. We should not need these anyway. */
6873 if (GET_CODE (disp) != UNSPEC
7dcbf659 6874 || (XINT (disp, 1) != UNSPEC_GOTPCREL
dc4d7240
JH
6875 && XINT (disp, 1) != UNSPEC_GOTOFF
6876 && XINT (disp, 1) != UNSPEC_PLTOFF))
6eb791fc
JH
6877 return 0;
6878
6879 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6880 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6881 return 0;
6882 return 1;
6883 }
6884
f996902d 6885 saw_plus = false;
91bb873f
RH
6886 if (GET_CODE (disp) == PLUS)
6887 {
7656aee4 6888 if (!CONST_INT_P (XEXP (disp, 1)))
91bb873f
RH
6889 return 0;
6890 disp = XEXP (disp, 0);
f996902d 6891 saw_plus = true;
91bb873f
RH
6892 }
6893
828a4fe4
MS
6894 if (TARGET_MACHO && darwin_local_data_pic (disp))
6895 return 1;
b069de3b 6896
8ee41eaf 6897 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
6898 return 0;
6899
623fe810
RH
6900 switch (XINT (disp, 1))
6901 {
8ee41eaf 6902 case UNSPEC_GOT:
f996902d
RH
6903 if (saw_plus)
6904 return false;
170bdaba
RS
6905 /* We need to check for both symbols and labels because VxWorks loads
6906 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6907 details. */
6908 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6909 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8ee41eaf 6910 case UNSPEC_GOTOFF:
47efdea4
JH
6911 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6912 While ABI specify also 32bit relocation but we don't produce it in
6913 small PIC model at all. */
6914 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6915 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6916 && !TARGET_64BIT)
170bdaba 6917 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
799b33a0 6918 return false;
f996902d 6919 case UNSPEC_GOTTPOFF:
dea73790
JJ
6920 case UNSPEC_GOTNTPOFF:
6921 case UNSPEC_INDNTPOFF:
f996902d
RH
6922 if (saw_plus)
6923 return false;
fd4aca96
RH
6924 disp = XVECEXP (disp, 0, 0);
6925 return (GET_CODE (disp) == SYMBOL_REF
6926 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
f996902d 6927 case UNSPEC_NTPOFF:
fd4aca96
RH
6928 disp = XVECEXP (disp, 0, 0);
6929 return (GET_CODE (disp) == SYMBOL_REF
6930 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
f996902d 6931 case UNSPEC_DTPOFF:
fd4aca96
RH
6932 disp = XVECEXP (disp, 0, 0);
6933 return (GET_CODE (disp) == SYMBOL_REF
6934 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
623fe810 6935 }
fce5a9f2 6936
623fe810 6937 return 0;
91bb873f
RH
6938}
6939
e075ae69
RH
6940/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6941 memory address for an instruction. The MODE argument is the machine mode
6942 for the MEM expression that wants to use this address.
6943
6944 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6945 convert common non-canonical forms to canonical form so that they will
6946 be recognized. */
6947
3b3c6a3f 6948int
ee2f65b4
RH
6949legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6950 rtx addr, int strict)
3b3c6a3f 6951{
e075ae69
RH
6952 struct ix86_address parts;
6953 rtx base, index, disp;
6954 HOST_WIDE_INT scale;
6955 const char *reason = NULL;
6956 rtx reason_rtx = NULL_RTX;
3b3c6a3f 6957
b446e5a2 6958 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 6959 {
e075ae69 6960 reason = "decomposition failed";
50e60bc3 6961 goto report_error;
3b3c6a3f
MM
6962 }
6963
e075ae69
RH
6964 base = parts.base;
6965 index = parts.index;
6966 disp = parts.disp;
6967 scale = parts.scale;
91f0226f 6968
e075ae69 6969 /* Validate base register.
e9a25f70 6970
7c93c2cc
PB
6971 Don't allow SUBREG's that span more than a word here. It can lead to spill
6972 failures when the base is one word out of a two word structure, which is
6973 represented internally as a DImode int. */
e9a25f70 6974
3b3c6a3f
MM
6975 if (base)
6976 {
7c93c2cc 6977 rtx reg;
e075ae69 6978 reason_rtx = base;
5656a184 6979
7c93c2cc
PB
6980 if (REG_P (base))
6981 reg = base;
6982 else if (GET_CODE (base) == SUBREG
6983 && REG_P (SUBREG_REG (base))
6984 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6985 <= UNITS_PER_WORD)
6986 reg = SUBREG_REG (base);
6987 else
3b3c6a3f 6988 {
e075ae69 6989 reason = "base is not a register";
50e60bc3 6990 goto report_error;
3b3c6a3f
MM
6991 }
6992
c954bd01
RH
6993 if (GET_MODE (base) != Pmode)
6994 {
e075ae69 6995 reason = "base is not in Pmode";
50e60bc3 6996 goto report_error;
c954bd01
RH
6997 }
6998
7c93c2cc
PB
6999 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7000 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 7001 {
e075ae69 7002 reason = "base is not valid";
50e60bc3 7003 goto report_error;
3b3c6a3f
MM
7004 }
7005 }
7006
e075ae69 7007 /* Validate index register.
e9a25f70 7008
7c93c2cc 7009 Don't allow SUBREG's that span more than a word here -- same as above. */
e075ae69
RH
7010
7011 if (index)
3b3c6a3f 7012 {
7c93c2cc 7013 rtx reg;
e075ae69
RH
7014 reason_rtx = index;
7015
7c93c2cc
PB
7016 if (REG_P (index))
7017 reg = index;
7018 else if (GET_CODE (index) == SUBREG
7019 && REG_P (SUBREG_REG (index))
7020 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7021 <= UNITS_PER_WORD)
7022 reg = SUBREG_REG (index);
7023 else
3b3c6a3f 7024 {
e075ae69 7025 reason = "index is not a register";
50e60bc3 7026 goto report_error;
3b3c6a3f
MM
7027 }
7028
e075ae69 7029 if (GET_MODE (index) != Pmode)
c954bd01 7030 {
e075ae69 7031 reason = "index is not in Pmode";
50e60bc3 7032 goto report_error;
c954bd01
RH
7033 }
7034
7c93c2cc
PB
7035 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7036 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 7037 {
e075ae69 7038 reason = "index is not valid";
50e60bc3 7039 goto report_error;
3b3c6a3f
MM
7040 }
7041 }
3b3c6a3f 7042
e075ae69
RH
7043 /* Validate scale factor. */
7044 if (scale != 1)
3b3c6a3f 7045 {
e075ae69
RH
7046 reason_rtx = GEN_INT (scale);
7047 if (!index)
3b3c6a3f 7048 {
e075ae69 7049 reason = "scale without index";
50e60bc3 7050 goto report_error;
3b3c6a3f
MM
7051 }
7052
e075ae69 7053 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 7054 {
e075ae69 7055 reason = "scale is not a valid multiplier";
50e60bc3 7056 goto report_error;
3b3c6a3f
MM
7057 }
7058 }
7059
91bb873f 7060 /* Validate displacement. */
3b3c6a3f
MM
7061 if (disp)
7062 {
e075ae69
RH
7063 reason_rtx = disp;
7064
f996902d
RH
7065 if (GET_CODE (disp) == CONST
7066 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7067 switch (XINT (XEXP (disp, 0), 1))
7068 {
47efdea4
JH
7069 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7070 used. While ABI specify also 32bit relocations, we don't produce
7071 them at all and use IP relative instead. */
f996902d
RH
7072 case UNSPEC_GOT:
7073 case UNSPEC_GOTOFF:
47efdea4
JH
7074 gcc_assert (flag_pic);
7075 if (!TARGET_64BIT)
7076 goto is_legitimate_pic;
7077 reason = "64bit address unspec";
7078 goto report_error;
5656a184 7079
f996902d 7080 case UNSPEC_GOTPCREL:
d0396b79 7081 gcc_assert (flag_pic);
f996902d
RH
7082 goto is_legitimate_pic;
7083
7084 case UNSPEC_GOTTPOFF:
dea73790
JJ
7085 case UNSPEC_GOTNTPOFF:
7086 case UNSPEC_INDNTPOFF:
f996902d
RH
7087 case UNSPEC_NTPOFF:
7088 case UNSPEC_DTPOFF:
7089 break;
7090
7091 default:
7092 reason = "invalid address unspec";
7093 goto report_error;
7094 }
7095
f7288899
EC
7096 else if (SYMBOLIC_CONST (disp)
7097 && (flag_pic
7098 || (TARGET_MACHO
b069de3b 7099#if TARGET_MACHO
f7288899
EC
7100 && MACHOPIC_INDIRECT
7101 && !machopic_operand_p (disp)
b069de3b 7102#endif
f7288899 7103 )))
3b3c6a3f 7104 {
f7288899 7105
f996902d 7106 is_legitimate_pic:
0d7d98ee
JH
7107 if (TARGET_64BIT && (index || base))
7108 {
75d38379
JJ
7109 /* foo@dtpoff(%rX) is ok. */
7110 if (GET_CODE (disp) != CONST
7111 || GET_CODE (XEXP (disp, 0)) != PLUS
7112 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7656aee4 7113 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
75d38379
JJ
7114 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7115 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7116 {
7117 reason = "non-constant pic memory reference";
7118 goto report_error;
7119 }
0d7d98ee 7120 }
75d38379 7121 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 7122 {
e075ae69 7123 reason = "displacement is an invalid pic construct";
50e60bc3 7124 goto report_error;
91bb873f
RH
7125 }
7126
4e9efe54 7127 /* This code used to verify that a symbolic pic displacement
0f290768
KH
7128 includes the pic_offset_table_rtx register.
7129
4e9efe54
JH
7130 While this is good idea, unfortunately these constructs may
7131 be created by "adds using lea" optimization for incorrect
7132 code like:
7133
7134 int a;
7135 int foo(int i)
7136 {
7137 return *(&a+i);
7138 }
7139
50e60bc3 7140 This code is nonsensical, but results in addressing
4e9efe54 7141 GOT table with pic_offset_table_rtx base. We can't
f710504c 7142 just refuse it easily, since it gets matched by
4e9efe54
JH
7143 "addsi3" pattern, that later gets split to lea in the
7144 case output register differs from input. While this
7145 can be handled by separate addsi pattern for this case
7146 that never results in lea, this seems to be easier and
7147 correct fix for crash to disable this test. */
3b3c6a3f 7148 }
a94f136b 7149 else if (GET_CODE (disp) != LABEL_REF
7656aee4 7150 && !CONST_INT_P (disp)
a94f136b
JH
7151 && (GET_CODE (disp) != CONST
7152 || !legitimate_constant_p (disp))
7153 && (GET_CODE (disp) != SYMBOL_REF
7154 || !legitimate_constant_p (disp)))
f996902d
RH
7155 {
7156 reason = "displacement is not constant";
7157 goto report_error;
7158 }
8fe75e43
RH
7159 else if (TARGET_64BIT
7160 && !x86_64_immediate_operand (disp, VOIDmode))
c05dbe81
JH
7161 {
7162 reason = "displacement is out of range";
7163 goto report_error;
7164 }
3b3c6a3f
MM
7165 }
7166
e075ae69 7167 /* Everything looks valid. */
3b3c6a3f 7168 return TRUE;
e075ae69 7169
5bf0ebab 7170 report_error:
e075ae69 7171 return FALSE;
3b3c6a3f 7172}
3b3c6a3f 7173\f
569b7f6a 7174/* Return a unique alias set for the GOT. */
55efb413 7175
0f290768 7176static HOST_WIDE_INT
b96a374d 7177ix86_GOT_alias_set (void)
55efb413 7178{
5bf0ebab
RH
7179 static HOST_WIDE_INT set = -1;
7180 if (set == -1)
7181 set = new_alias_set ();
7182 return set;
0f290768 7183}
55efb413 7184
3b3c6a3f
MM
7185/* Return a legitimate reference for ORIG (an address) using the
7186 register REG. If REG is 0, a new pseudo is generated.
7187
91bb873f 7188 There are two types of references that must be handled:
3b3c6a3f
MM
7189
7190 1. Global data references must load the address from the GOT, via
7191 the PIC reg. An insn is emitted to do this load, and the reg is
7192 returned.
7193
91bb873f
RH
7194 2. Static data references, constant pool addresses, and code labels
7195 compute the address as an offset from the GOT, whose base is in
2ae5ae57 7196 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
7197 differentiate them from global data objects. The returned
7198 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
7199
7200 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 7201 reg also appears in the address. */
3b3c6a3f 7202
b39edae3 7203static rtx
b96a374d 7204legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
7205{
7206 rtx addr = orig;
9415ab7d 7207 rtx new_rtx = orig;
91bb873f 7208 rtx base;
3b3c6a3f 7209
b069de3b 7210#if TARGET_MACHO
f7288899
EC
7211 if (TARGET_MACHO && !TARGET_64BIT)
7212 {
7213 if (reg == 0)
7214 reg = gen_reg_rtx (Pmode);
7215 /* Use the generic Mach-O PIC machinery. */
7216 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7217 }
b069de3b
SS
7218#endif
7219
c05dbe81 7220 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9415ab7d 7221 new_rtx = addr;
7dcbf659
JH
7222 else if (TARGET_64BIT
7223 && ix86_cmodel != CM_SMALL_PIC
170bdaba 7224 && gotoff_operand (addr, Pmode))
7dcbf659
JH
7225 {
7226 rtx tmpreg;
7227 /* This symbol may be referenced via a displacement from the PIC
7228 base address (@GOTOFF). */
7229
7230 if (reload_in_progress)
6fb5fa3c 7231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7dcbf659
JH
7232 if (GET_CODE (addr) == CONST)
7233 addr = XEXP (addr, 0);
7234 if (GET_CODE (addr) == PLUS)
7235 {
9415ab7d
TN
7236 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7237 UNSPEC_GOTOFF);
7238 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7dcbf659
JH
7239 }
7240 else
9415ab7d
TN
7241 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7242 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7dcbf659
JH
7243 if (!reg)
7244 tmpreg = gen_reg_rtx (Pmode);
7245 else
7246 tmpreg = reg;
9415ab7d 7247 emit_move_insn (tmpreg, new_rtx);
7dcbf659
JH
7248
7249 if (reg != 0)
7250 {
9415ab7d
TN
7251 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7252 tmpreg, 1, OPTAB_DIRECT);
7253 new_rtx = reg;
7dcbf659 7254 }
9415ab7d 7255 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7dcbf659 7256 }
170bdaba 7257 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
3b3c6a3f 7258 {
c05dbe81
JH
7259 /* This symbol may be referenced via a displacement from the PIC
7260 base address (@GOTOFF). */
3b3c6a3f 7261
c05dbe81 7262 if (reload_in_progress)
6fb5fa3c 7263 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
799b33a0
JH
7264 if (GET_CODE (addr) == CONST)
7265 addr = XEXP (addr, 0);
7266 if (GET_CODE (addr) == PLUS)
7267 {
9415ab7d
TN
7268 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7269 UNSPEC_GOTOFF);
7270 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
799b33a0
JH
7271 }
7272 else
9415ab7d
TN
7273 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7274 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7275 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3b3c6a3f 7276
c05dbe81
JH
7277 if (reg != 0)
7278 {
9415ab7d
TN
7279 emit_move_insn (reg, new_rtx);
7280 new_rtx = reg;
c05dbe81 7281 }
3b3c6a3f 7282 }
170bdaba
RS
7283 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7284 /* We can't use @GOTOFF for text labels on VxWorks;
7285 see gotoff_operand. */
7286 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
3b3c6a3f 7287 {
ccf8e764
RH
7288 /* Given that we've already handled dllimport variables separately
7289 in legitimize_address, and all other variables should satisfy
7290 legitimate_pic_address_disp_p, we should never arrive here. */
7291 gcc_assert (!TARGET_64BIT_MS_ABI);
7292
dc4d7240 7293 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
14f73b5a 7294 {
9415ab7d
TN
7295 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7296 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7297 new_rtx = gen_const_mem (Pmode, new_rtx);
7298 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
14f73b5a
JH
7299
7300 if (reg == 0)
7301 reg = gen_reg_rtx (Pmode);
7302 /* Use directly gen_movsi, otherwise the address is loaded
7303 into register for CSE. We don't want to CSE this addresses,
7304 instead we CSE addresses from the GOT table, so skip this. */
9415ab7d
TN
7305 emit_insn (gen_movsi (reg, new_rtx));
7306 new_rtx = reg;
14f73b5a
JH
7307 }
7308 else
7309 {
7310 /* This symbol must be referenced via a load from the
7311 Global Offset Table (@GOT). */
3b3c6a3f 7312
66edd3b4 7313 if (reload_in_progress)
6fb5fa3c 7314 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9415ab7d
TN
7315 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7316 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
dc4d7240 7317 if (TARGET_64BIT)
9415ab7d
TN
7318 new_rtx = force_reg (Pmode, new_rtx);
7319 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7320 new_rtx = gen_const_mem (Pmode, new_rtx);
7321 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
3b3c6a3f 7322
14f73b5a
JH
7323 if (reg == 0)
7324 reg = gen_reg_rtx (Pmode);
9415ab7d
TN
7325 emit_move_insn (reg, new_rtx);
7326 new_rtx = reg;
14f73b5a 7327 }
0f290768 7328 }
91bb873f
RH
7329 else
7330 {
7656aee4 7331 if (CONST_INT_P (addr)
d8ff1871
JH
7332 && !x86_64_immediate_operand (addr, VOIDmode))
7333 {
7334 if (reg)
7335 {
7336 emit_move_insn (reg, addr);
9415ab7d 7337 new_rtx = reg;
d8ff1871
JH
7338 }
7339 else
9415ab7d 7340 new_rtx = force_reg (Pmode, addr);
d8ff1871
JH
7341 }
7342 else if (GET_CODE (addr) == CONST)
3b3c6a3f 7343 {
91bb873f 7344 addr = XEXP (addr, 0);
e3c8ea67
RH
7345
7346 /* We must match stuff we generate before. Assume the only
7347 unspecs that can get here are ours. Not that we could do
43f3a59d 7348 anything with them anyway.... */
e3c8ea67
RH
7349 if (GET_CODE (addr) == UNSPEC
7350 || (GET_CODE (addr) == PLUS
7351 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7352 return orig;
d0396b79 7353 gcc_assert (GET_CODE (addr) == PLUS);
3b3c6a3f 7354 }
91bb873f
RH
7355 if (GET_CODE (addr) == PLUS)
7356 {
7357 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 7358
91bb873f
RH
7359 /* Check first to see if this is a constant offset from a @GOTOFF
7360 symbol reference. */
170bdaba 7361 if (gotoff_operand (op0, Pmode)
7656aee4 7362 && CONST_INT_P (op1))
91bb873f 7363 {
6eb791fc
JH
7364 if (!TARGET_64BIT)
7365 {
66edd3b4 7366 if (reload_in_progress)
6fb5fa3c 7367 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9415ab7d
TN
7368 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7369 UNSPEC_GOTOFF);
7370 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7371 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7372 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
91bb873f 7373
6eb791fc
JH
7374 if (reg != 0)
7375 {
9415ab7d
TN
7376 emit_move_insn (reg, new_rtx);
7377 new_rtx = reg;
6eb791fc
JH
7378 }
7379 }
7380 else
91bb873f 7381 {
75d38379
JJ
7382 if (INTVAL (op1) < -16*1024*1024
7383 || INTVAL (op1) >= 16*1024*1024)
a7297856
ILT
7384 {
7385 if (!x86_64_immediate_operand (op1, Pmode))
7386 op1 = force_reg (Pmode, op1);
9415ab7d 7387 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
a7297856 7388 }
91bb873f
RH
7389 }
7390 }
7391 else
7392 {
7393 base = legitimize_pic_address (XEXP (addr, 0), reg);
9415ab7d
TN
7394 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7395 base == reg ? NULL_RTX : reg);
91bb873f 7396
9415ab7d
TN
7397 if (CONST_INT_P (new_rtx))
7398 new_rtx = plus_constant (base, INTVAL (new_rtx));
91bb873f
RH
7399 else
7400 {
9415ab7d 7401 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
91bb873f 7402 {
9415ab7d
TN
7403 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7404 new_rtx = XEXP (new_rtx, 1);
91bb873f 7405 }
9415ab7d 7406 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
91bb873f
RH
7407 }
7408 }
7409 }
3b3c6a3f 7410 }
9415ab7d 7411 return new_rtx;
3b3c6a3f
MM
7412}
7413\f
74dc3e94 7414/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
7415
7416static rtx
b96a374d 7417get_thread_pointer (int to_reg)
f996902d 7418{
74dc3e94 7419 rtx tp, reg, insn;
f996902d
RH
7420
7421 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
7422 if (!to_reg)
7423 return tp;
f996902d 7424
74dc3e94
RH
7425 reg = gen_reg_rtx (Pmode);
7426 insn = gen_rtx_SET (VOIDmode, reg, tp);
7427 insn = emit_insn (insn);
7428
7429 return reg;
7430}
7431
7432/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7433 false if we expect this to be used for a memory address and true if
7434 we expect to load the address into a register. */
7435
7436static rtx
b96a374d 7437legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94 7438{
5bf5a10b 7439 rtx dest, base, off, pic, tp;
74dc3e94
RH
7440 int type;
7441
7442 switch (model)
7443 {
7444 case TLS_MODEL_GLOBAL_DYNAMIC:
7445 dest = gen_reg_rtx (Pmode);
5bf5a10b
AO
7446 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7447
7448 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94
RH
7449 {
7450 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7451
7452 start_sequence ();
7453 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7454 insns = get_insns ();
7455 end_sequence ();
7456
2d79fde8 7457 CONST_OR_PURE_CALL_P (insns) = 1;
74dc3e94
RH
7458 emit_libcall_block (insns, dest, rax, x);
7459 }
5bf5a10b
AO
7460 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7461 emit_insn (gen_tls_global_dynamic_64 (dest, x));
74dc3e94
RH
7462 else
7463 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5bf5a10b
AO
7464
7465 if (TARGET_GNU2_TLS)
7466 {
7467 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7468
7469 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7470 }
74dc3e94
RH
7471 break;
7472
7473 case TLS_MODEL_LOCAL_DYNAMIC:
7474 base = gen_reg_rtx (Pmode);
5bf5a10b
AO
7475 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7476
7477 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94
RH
7478 {
7479 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7480
7481 start_sequence ();
7482 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7483 insns = get_insns ();
7484 end_sequence ();
7485
7486 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7487 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
2d79fde8 7488 CONST_OR_PURE_CALL_P (insns) = 1;
74dc3e94
RH
7489 emit_libcall_block (insns, base, rax, note);
7490 }
5bf5a10b
AO
7491 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7492 emit_insn (gen_tls_local_dynamic_base_64 (base));
74dc3e94
RH
7493 else
7494 emit_insn (gen_tls_local_dynamic_base_32 (base));
7495
5bf5a10b
AO
7496 if (TARGET_GNU2_TLS)
7497 {
7498 rtx x = ix86_tls_module_base ();
7499
31ebc801
AO
7500 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7501 gen_rtx_MINUS (Pmode, x, tp));
5bf5a10b
AO
7502 }
7503
74dc3e94
RH
7504 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7505 off = gen_rtx_CONST (Pmode, off);
7506
5bf5a10b 7507 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
31ebc801
AO
7508
7509 if (TARGET_GNU2_TLS)
7510 {
7511 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7512
7513 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7514 }
7515
5bf5a10b 7516 break;
74dc3e94
RH
7517
7518 case TLS_MODEL_INITIAL_EXEC:
7519 if (TARGET_64BIT)
7520 {
7521 pic = NULL;
7522 type = UNSPEC_GOTNTPOFF;
7523 }
7524 else if (flag_pic)
7525 {
7526 if (reload_in_progress)
6fb5fa3c 7527 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
74dc3e94 7528 pic = pic_offset_table_rtx;
5bf5a10b 7529 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
74dc3e94 7530 }
5bf5a10b 7531 else if (!TARGET_ANY_GNU_TLS)
74dc3e94
RH
7532 {
7533 pic = gen_reg_rtx (Pmode);
7534 emit_insn (gen_set_got (pic));
7535 type = UNSPEC_GOTTPOFF;
7536 }
7537 else
7538 {
7539 pic = NULL;
7540 type = UNSPEC_INDNTPOFF;
7541 }
7542
7543 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7544 off = gen_rtx_CONST (Pmode, off);
7545 if (pic)
7546 off = gen_rtx_PLUS (Pmode, pic, off);
542a8afa 7547 off = gen_const_mem (Pmode, off);
74dc3e94
RH
7548 set_mem_alias_set (off, ix86_GOT_alias_set ());
7549
5bf5a10b 7550 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
7551 {
7552 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7553 off = force_reg (Pmode, off);
7554 return gen_rtx_PLUS (Pmode, base, off);
7555 }
7556 else
7557 {
7558 base = get_thread_pointer (true);
7559 dest = gen_reg_rtx (Pmode);
7560 emit_insn (gen_subsi3 (dest, base, off));
7561 }
7562 break;
7563
7564 case TLS_MODEL_LOCAL_EXEC:
7565 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5bf5a10b 7566 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
7567 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7568 off = gen_rtx_CONST (Pmode, off);
7569
5bf5a10b 7570 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
7571 {
7572 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7573 return gen_rtx_PLUS (Pmode, base, off);
7574 }
7575 else
7576 {
7577 base = get_thread_pointer (true);
7578 dest = gen_reg_rtx (Pmode);
7579 emit_insn (gen_subsi3 (dest, base, off));
7580 }
7581 break;
7582
7583 default:
d0396b79 7584 gcc_unreachable ();
74dc3e94
RH
7585 }
7586
7587 return dest;
f996902d 7588}
fce5a9f2 7589
da489f73
RH
7590/* Create or return the unique __imp_DECL dllimport symbol corresponding
7591 to symbol DECL. */
7592
7593static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7594 htab_t dllimport_map;
7595
7596static tree
7597get_dllimport_decl (tree decl)
7598{
7599 struct tree_map *h, in;
7600 void **loc;
7601 const char *name;
7602 const char *prefix;
7603 size_t namelen, prefixlen;
7604 char *imp_name;
7605 tree to;
7606 rtx rtl;
7607
7608 if (!dllimport_map)
7609 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7610
7611 in.hash = htab_hash_pointer (decl);
7612 in.base.from = decl;
7613 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9415ab7d 7614 h = (struct tree_map *) *loc;
da489f73
RH
7615 if (h)
7616 return h->to;
7617
9415ab7d 7618 *loc = h = GGC_NEW (struct tree_map);
da489f73
RH
7619 h->hash = in.hash;
7620 h->base.from = decl;
7621 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7622 DECL_ARTIFICIAL (to) = 1;
7623 DECL_IGNORED_P (to) = 1;
7624 DECL_EXTERNAL (to) = 1;
7625 TREE_READONLY (to) = 1;
7626
7627 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7628 name = targetm.strip_name_encoding (name);
7629 if (name[0] == FASTCALL_PREFIX)
7630 {
7631 name++;
7632 prefix = "*__imp_";
7633 }
7634 else
7635 prefix = "*__imp__";
7636
7637 namelen = strlen (name);
7638 prefixlen = strlen (prefix);
9415ab7d 7639 imp_name = (char *) alloca (namelen + prefixlen + 1);
da489f73
RH
7640 memcpy (imp_name, prefix, prefixlen);
7641 memcpy (imp_name + prefixlen, name, namelen + 1);
7642
7643 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7644 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7645 SET_SYMBOL_REF_DECL (rtl, to);
7646 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7647
7648 rtl = gen_const_mem (Pmode, rtl);
7649 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7650
7651 SET_DECL_RTL (to, rtl);
7652
7653 return to;
7654}
7655
7656/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7657 true if we require the result be a register. */
7658
7659static rtx
7660legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7661{
7662 tree imp_decl;
7663 rtx x;
7664
7665 gcc_assert (SYMBOL_REF_DECL (symbol));
7666 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7667
7668 x = DECL_RTL (imp_decl);
7669 if (want_reg)
7670 x = force_reg (Pmode, x);
7671 return x;
7672}
7673
3b3c6a3f
MM
7674/* Try machine-dependent ways of modifying an illegitimate address
7675 to be legitimate. If we find one, return the new, valid address.
7676 This macro is used in only one place: `memory_address' in explow.c.
7677
7678 OLDX is the address as it was before break_out_memory_refs was called.
7679 In some cases it is useful to look at this to decide what needs to be done.
7680
7681 MODE and WIN are passed so that this macro can use
7682 GO_IF_LEGITIMATE_ADDRESS.
7683
7684 It is always safe for this macro to do nothing. It exists to recognize
7685 opportunities to optimize the output.
7686
7687 For the 80386, we handle X+REG by loading X into a register R and
7688 using R+REG. R will go in a general reg and indexing will be used.
7689 However, if REG is a broken-out memory address or multiplication,
7690 nothing needs to be done because REG can certainly go in a general reg.
7691
7692 When -fpic is used, special handling is needed for symbolic references.
7693 See comments by legitimize_pic_address in i386.c for details. */
7694
7695rtx
8d531ab9 7696legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
7697{
7698 int changed = 0;
7699 unsigned log;
7700
8fe75e43 7701 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
f996902d 7702 if (log)
9415ab7d 7703 return legitimize_tls_address (x, (enum tls_model) log, false);
b39edae3
RH
7704 if (GET_CODE (x) == CONST
7705 && GET_CODE (XEXP (x, 0)) == PLUS
8fe75e43
RH
7706 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7707 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
b39edae3 7708 {
9415ab7d
TN
7709 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7710 (enum tls_model) log, false);
b39edae3
RH
7711 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7712 }
f996902d 7713
3b3c6a3f
MM
7714 if (flag_pic && SYMBOLIC_CONST (x))
7715 return legitimize_pic_address (x, 0);
7716
da489f73
RH
7717 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7718 {
7719 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7720 return legitimize_dllimport_symbol (x, true);
7721 if (GET_CODE (x) == CONST
7722 && GET_CODE (XEXP (x, 0)) == PLUS
7723 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7724 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7725 {
7726 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7727 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7728 }
7729 }
7730
3b3c6a3f
MM
7731 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7732 if (GET_CODE (x) == ASHIFT
7656aee4 7733 && CONST_INT_P (XEXP (x, 1))
85b583d3 7734 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
3b3c6a3f
MM
7735 {
7736 changed = 1;
85b583d3 7737 log = INTVAL (XEXP (x, 1));
a269a03c
JC
7738 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7739 GEN_INT (1 << log));
3b3c6a3f
MM
7740 }
7741
7742 if (GET_CODE (x) == PLUS)
7743 {
0f290768 7744 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 7745
3b3c6a3f 7746 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7656aee4 7747 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
85b583d3 7748 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
3b3c6a3f
MM
7749 {
7750 changed = 1;
85b583d3 7751 log = INTVAL (XEXP (XEXP (x, 0), 1));
c5c76735
JL
7752 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7753 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7754 GEN_INT (1 << log));
3b3c6a3f
MM
7755 }
7756
7757 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7656aee4 7758 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
85b583d3 7759 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
3b3c6a3f
MM
7760 {
7761 changed = 1;
85b583d3 7762 log = INTVAL (XEXP (XEXP (x, 1), 1));
c5c76735
JL
7763 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7764 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7765 GEN_INT (1 << log));
3b3c6a3f
MM
7766 }
7767
0f290768 7768 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
7769 if (GET_CODE (XEXP (x, 1)) == MULT)
7770 {
7771 rtx tmp = XEXP (x, 0);
7772 XEXP (x, 0) = XEXP (x, 1);
7773 XEXP (x, 1) = tmp;
7774 changed = 1;
7775 }
7776
7777 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7778 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7779 created by virtual register instantiation, register elimination, and
7780 similar optimizations. */
7781 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7782 {
7783 changed = 1;
c5c76735
JL
7784 x = gen_rtx_PLUS (Pmode,
7785 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7786 XEXP (XEXP (x, 1), 0)),
7787 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
7788 }
7789
e9a25f70
JL
7790 /* Canonicalize
7791 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
7792 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7793 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7794 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7795 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7796 && CONSTANT_P (XEXP (x, 1)))
7797 {
00c79232
ML
7798 rtx constant;
7799 rtx other = NULL_RTX;
3b3c6a3f 7800
7656aee4 7801 if (CONST_INT_P (XEXP (x, 1)))
3b3c6a3f
MM
7802 {
7803 constant = XEXP (x, 1);
7804 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7805 }
7656aee4 7806 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
3b3c6a3f
MM
7807 {
7808 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7809 other = XEXP (x, 1);
7810 }
7811 else
7812 constant = 0;
7813
7814 if (constant)
7815 {
7816 changed = 1;
c5c76735
JL
7817 x = gen_rtx_PLUS (Pmode,
7818 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7819 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7820 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
7821 }
7822 }
7823
7824 if (changed && legitimate_address_p (mode, x, FALSE))
7825 return x;
7826
7827 if (GET_CODE (XEXP (x, 0)) == MULT)
7828 {
7829 changed = 1;
7830 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7831 }
7832
7833 if (GET_CODE (XEXP (x, 1)) == MULT)
7834 {
7835 changed = 1;
7836 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7837 }
7838
7839 if (changed
7656aee4
UB
7840 && REG_P (XEXP (x, 1))
7841 && REG_P (XEXP (x, 0)))
3b3c6a3f
MM
7842 return x;
7843
7844 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7845 {
7846 changed = 1;
7847 x = legitimize_pic_address (x, 0);
7848 }
7849
7850 if (changed && legitimate_address_p (mode, x, FALSE))
7851 return x;
7852
7656aee4 7853 if (REG_P (XEXP (x, 0)))
3b3c6a3f 7854 {
8d531ab9
KH
7855 rtx temp = gen_reg_rtx (Pmode);
7856 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
7857 if (val != temp)
7858 emit_move_insn (temp, val);
7859
7860 XEXP (x, 1) = temp;
7861 return x;
7862 }
7863
7656aee4 7864 else if (REG_P (XEXP (x, 1)))
3b3c6a3f 7865 {
8d531ab9
KH
7866 rtx temp = gen_reg_rtx (Pmode);
7867 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
7868 if (val != temp)
7869 emit_move_insn (temp, val);
7870
7871 XEXP (x, 0) = temp;
7872 return x;
7873 }
7874 }
7875
7876 return x;
7877}
2a2ab3f9
JVA
7878\f
7879/* Print an integer constant expression in assembler syntax. Addition
7880 and subtraction are the only arithmetic that may appear in these
7881 expressions. FILE is the stdio stream to write to, X is the rtx, and
7882 CODE is the operand print code from the output string. */
7883
7884static void
b96a374d 7885output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7886{
7887 char buf[256];
7888
7889 switch (GET_CODE (x))
7890 {
7891 case PC:
d0396b79
NS
7892 gcc_assert (flag_pic);
7893 putc ('.', file);
2a2ab3f9
JVA
7894 break;
7895
7896 case SYMBOL_REF:
320ce1d3
MS
7897 if (! TARGET_MACHO || TARGET_64BIT)
7898 output_addr_const (file, x);
7899 else
7900 {
7901 const char *name = XSTR (x, 0);
7902
ccf8e764
RH
7903 /* Mark the decl as referenced so that cgraph will
7904 output the function. */
320ce1d3
MS
7905 if (SYMBOL_REF_DECL (x))
7906 mark_decl_referenced (SYMBOL_REF_DECL (x));
7907
320ce1d3 7908#if TARGET_MACHO
c88fc50c
MS
7909 if (MACHOPIC_INDIRECT
7910 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
320ce1d3 7911 name = machopic_indirection_name (x, /*stub_p=*/true);
c88fc50c 7912#endif
320ce1d3
MS
7913 assemble_name (file, name);
7914 }
ccf8e764
RH
7915 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7916 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 7917 fputs ("@PLT", file);
2a2ab3f9
JVA
7918 break;
7919
91bb873f
RH
7920 case LABEL_REF:
7921 x = XEXP (x, 0);
5efb1046 7922 /* FALLTHRU */
2a2ab3f9
JVA
7923 case CODE_LABEL:
7924 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7925 assemble_name (asm_out_file, buf);
7926 break;
7927
7928 case CONST_INT:
f64cecad 7929 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
7930 break;
7931
7932 case CONST:
7933 /* This used to output parentheses around the expression,
7934 but that does not work on the 386 (either ATT or BSD assembler). */
7935 output_pic_addr_const (file, XEXP (x, 0), code);
7936 break;
7937
7938 case CONST_DOUBLE:
7939 if (GET_MODE (x) == VOIDmode)
7940 {
7941 /* We can use %d if the number is <32 bits and positive. */
7942 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
7943 fprintf (file, "0x%lx%08lx",
7944 (unsigned long) CONST_DOUBLE_HIGH (x),
7945 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 7946 else
f64cecad 7947 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
7948 }
7949 else
7950 /* We can't handle floating point constants;
7951 PRINT_OPERAND must handle them. */
7952 output_operand_lossage ("floating constant misused");
7953 break;
7954
7955 case PLUS:
e9a25f70 7956 /* Some assemblers need integer constants to appear first. */
7656aee4 7957 if (CONST_INT_P (XEXP (x, 0)))
2a2ab3f9 7958 {
2a2ab3f9 7959 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 7960 putc ('+', file);
e9a25f70 7961 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 7962 }
5656a184 7963 else
2a2ab3f9 7964 {
7656aee4 7965 gcc_assert (CONST_INT_P (XEXP (x, 1)));
2a2ab3f9 7966 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 7967 putc ('+', file);
e9a25f70 7968 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9
JVA
7969 }
7970 break;
7971
7972 case MINUS:
b069de3b
SS
7973 if (!TARGET_MACHO)
7974 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 7975 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 7976 putc ('-', file);
2a2ab3f9 7977 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
7978 if (!TARGET_MACHO)
7979 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
7980 break;
7981
91bb873f 7982 case UNSPEC:
d0396b79 7983 gcc_assert (XVECLEN (x, 0) == 1);
91bb873f
RH
7984 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7985 switch (XINT (x, 1))
77ebd435 7986 {
8ee41eaf 7987 case UNSPEC_GOT:
77ebd435
AJ
7988 fputs ("@GOT", file);
7989 break;
8ee41eaf 7990 case UNSPEC_GOTOFF:
77ebd435
AJ
7991 fputs ("@GOTOFF", file);
7992 break;
dc4d7240
JH
7993 case UNSPEC_PLTOFF:
7994 fputs ("@PLTOFF", file);
7995 break;
8ee41eaf 7996 case UNSPEC_GOTPCREL:
edfe8595 7997 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 7998 break;
f996902d 7999 case UNSPEC_GOTTPOFF:
dea73790 8000 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
8001 fputs ("@GOTTPOFF", file);
8002 break;
8003 case UNSPEC_TPOFF:
8004 fputs ("@TPOFF", file);
8005 break;
8006 case UNSPEC_NTPOFF:
75d38379
JJ
8007 if (TARGET_64BIT)
8008 fputs ("@TPOFF", file);
8009 else
8010 fputs ("@NTPOFF", file);
f996902d
RH
8011 break;
8012 case UNSPEC_DTPOFF:
8013 fputs ("@DTPOFF", file);
8014 break;
dea73790 8015 case UNSPEC_GOTNTPOFF:
75d38379
JJ
8016 if (TARGET_64BIT)
8017 fputs ("@GOTTPOFF(%rip)", file);
8018 else
8019 fputs ("@GOTNTPOFF", file);
dea73790
JJ
8020 break;
8021 case UNSPEC_INDNTPOFF:
8022 fputs ("@INDNTPOFF", file);
8023 break;
77ebd435
AJ
8024 default:
8025 output_operand_lossage ("invalid UNSPEC as operand");
8026 break;
8027 }
91bb873f
RH
8028 break;
8029
2a2ab3f9
JVA
8030 default:
8031 output_operand_lossage ("invalid expression as operand");
8032 }
8033}
1865dbb5 8034
fdbe66f2 8035/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
b9203463
RH
8036 We need to emit DTP-relative relocations. */
8037
2ed941ec 8038static void ATTRIBUTE_UNUSED
b96a374d 8039i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 8040{
75d38379
JJ
8041 fputs (ASM_LONG, file);
8042 output_addr_const (file, x);
8043 fputs ("@DTPOFF", file);
b9203463
RH
8044 switch (size)
8045 {
8046 case 4:
b9203463
RH
8047 break;
8048 case 8:
75d38379 8049 fputs (", 0", file);
b9203463 8050 break;
b9203463 8051 default:
d0396b79 8052 gcc_unreachable ();
b9203463 8053 }
b9203463
RH
8054}
8055
1865dbb5 8056/* In the name of slightly smaller debug output, and to cater to
aabcd309 8057 general assembler lossage, recognize PIC+GOTOFF and turn it back
5656a184 8058 into a direct symbol reference.
dbde310d
GK
8059
8060 On Darwin, this is necessary to avoid a crash, because Darwin
8061 has a different PIC label for each routine but the DWARF debugging
8062 information is not associated with any particular routine, so it's
8063 necessary to remove references to the PIC label from RTL stored by
8064 the DWARF output code. */
1865dbb5 8065
69bd9368 8066static rtx
b96a374d 8067ix86_delegitimize_address (rtx orig_x)
1865dbb5 8068{
dbde310d
GK
8069 rtx x = orig_x;
8070 /* reg_addend is NULL or a multiple of some register. */
8071 rtx reg_addend = NULL_RTX;
8072 /* const_addend is NULL or a const_int. */
8073 rtx const_addend = NULL_RTX;
8074 /* This is the result, or NULL. */
8075 rtx result = NULL_RTX;
1865dbb5 8076
7656aee4 8077 if (MEM_P (x))
4c8c0dec
JJ
8078 x = XEXP (x, 0);
8079
6eb791fc
JH
8080 if (TARGET_64BIT)
8081 {
8082 if (GET_CODE (x) != CONST
8083 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 8084 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7656aee4 8085 || !MEM_P (orig_x))
6eb791fc
JH
8086 return orig_x;
8087 return XVECEXP (XEXP (x, 0), 0, 0);
8088 }
8089
1865dbb5 8090 if (GET_CODE (x) != PLUS
1865dbb5
JM
8091 || GET_CODE (XEXP (x, 1)) != CONST)
8092 return orig_x;
8093
7656aee4 8094 if (REG_P (XEXP (x, 0))
ec65b2e3
JJ
8095 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8096 /* %ebx + GOT/GOTOFF */
dbde310d 8097 ;
ec65b2e3
JJ
8098 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8099 {
8100 /* %ebx + %reg * scale + GOT/GOTOFF */
dbde310d 8101 reg_addend = XEXP (x, 0);
7656aee4 8102 if (REG_P (XEXP (reg_addend, 0))
dbde310d
GK
8103 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8104 reg_addend = XEXP (reg_addend, 1);
7656aee4 8105 else if (REG_P (XEXP (reg_addend, 1))
dbde310d
GK
8106 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8107 reg_addend = XEXP (reg_addend, 0);
ec65b2e3
JJ
8108 else
8109 return orig_x;
7656aee4 8110 if (!REG_P (reg_addend)
dbde310d
GK
8111 && GET_CODE (reg_addend) != MULT
8112 && GET_CODE (reg_addend) != ASHIFT)
ec65b2e3
JJ
8113 return orig_x;
8114 }
8115 else
8116 return orig_x;
8117
1865dbb5 8118 x = XEXP (XEXP (x, 1), 0);
1865dbb5 8119 if (GET_CODE (x) == PLUS
7656aee4 8120 && CONST_INT_P (XEXP (x, 1)))
ec65b2e3 8121 {
dbde310d
GK
8122 const_addend = XEXP (x, 1);
8123 x = XEXP (x, 0);
ec65b2e3 8124 }
1865dbb5 8125
dbde310d 8126 if (GET_CODE (x) == UNSPEC
7656aee4
UB
8127 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8128 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
dbde310d
GK
8129 result = XVECEXP (x, 0, 0);
8130
7931b1be 8131 if (TARGET_MACHO && darwin_local_data_pic (x)
7656aee4 8132 && !MEM_P (orig_x))
dbde310d
GK
8133 result = XEXP (x, 0);
8134
8135 if (! result)
8136 return orig_x;
5656a184 8137
dbde310d
GK
8138 if (const_addend)
8139 result = gen_rtx_PLUS (Pmode, result, const_addend);
8140 if (reg_addend)
8141 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8142 return result;
1865dbb5 8143}
2ed941ec
RH
8144
8145/* If X is a machine specific address (i.e. a symbol or label being
8146 referenced as a displacement from the GOT implemented using an
8147 UNSPEC), then return the base term. Otherwise return X. */
8148
8149rtx
8150ix86_find_base_term (rtx x)
8151{
8152 rtx term;
8153
8154 if (TARGET_64BIT)
8155 {
8156 if (GET_CODE (x) != CONST)
8157 return x;
8158 term = XEXP (x, 0);
8159 if (GET_CODE (term) == PLUS
8160 && (CONST_INT_P (XEXP (term, 1))
8161 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8162 term = XEXP (term, 0);
8163 if (GET_CODE (term) != UNSPEC
8164 || XINT (term, 1) != UNSPEC_GOTPCREL)
8165 return x;
8166
8167 term = XVECEXP (term, 0, 0);
8168
8169 if (GET_CODE (term) != SYMBOL_REF
8170 && GET_CODE (term) != LABEL_REF)
8171 return x;
8172
8173 return term;
8174 }
8175
8176 term = ix86_delegitimize_address (x);
8177
8178 if (GET_CODE (term) != SYMBOL_REF
8179 && GET_CODE (term) != LABEL_REF)
8180 return x;
8181
8182 return term;
8183}
2a2ab3f9 8184\f
a269a03c 8185static void
b96a374d
AJ
8186put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8187 int fp, FILE *file)
a269a03c 8188{
a269a03c
JC
8189 const char *suffix;
8190
9a915772
JH
8191 if (mode == CCFPmode || mode == CCFPUmode)
8192 {
8193 enum rtx_code second_code, bypass_code;
8194 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
d0396b79 8195 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
9a915772
JH
8196 code = ix86_fp_compare_code_to_integer (code);
8197 mode = CCmode;
8198 }
a269a03c
JC
8199 if (reverse)
8200 code = reverse_condition (code);
e075ae69 8201
a269a03c
JC
8202 switch (code)
8203 {
8204 case EQ:
06f4e35d
L
8205 switch (mode)
8206 {
8207 case CCAmode:
8208 suffix = "a";
8209 break;
8210
8211 case CCCmode:
8212 suffix = "c";
8213 break;
8214
8215 case CCOmode:
8216 suffix = "o";
8217 break;
8218
8219 case CCSmode:
8220 suffix = "s";
8221 break;
8222
8223 default:
8224 suffix = "e";
8225 }
a269a03c 8226 break;
a269a03c 8227 case NE:
06f4e35d
L
8228 switch (mode)
8229 {
8230 case CCAmode:
8231 suffix = "na";
8232 break;
8233
8234 case CCCmode:
8235 suffix = "nc";
8236 break;
8237
8238 case CCOmode:
8239 suffix = "no";
8240 break;
8241
8242 case CCSmode:
8243 suffix = "ns";
8244 break;
8245
8246 default:
8247 suffix = "ne";
8248 }
a269a03c 8249 break;
a269a03c 8250 case GT:
d0396b79 8251 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
e075ae69 8252 suffix = "g";
a269a03c 8253 break;
a269a03c 8254 case GTU:
aabcd309
KH
8255 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8256 Those same assemblers have the same but opposite lossage on cmov. */
d0396b79 8257 gcc_assert (mode == CCmode);
e075ae69 8258 suffix = fp ? "nbe" : "a";
a269a03c 8259 break;
a269a03c 8260 case LT:
d0396b79
NS
8261 switch (mode)
8262 {
8263 case CCNOmode:
8264 case CCGOCmode:
8265 suffix = "s";
8266 break;
8267
8268 case CCmode:
8269 case CCGCmode:
8270 suffix = "l";
8271 break;
8272
8273 default:
8274 gcc_unreachable ();
8275 }
a269a03c 8276 break;
a269a03c 8277 case LTU:
d0396b79 8278 gcc_assert (mode == CCmode);
a269a03c
JC
8279 suffix = "b";
8280 break;
a269a03c 8281 case GE:
d0396b79
NS
8282 switch (mode)
8283 {
8284 case CCNOmode:
8285 case CCGOCmode:
8286 suffix = "ns";
8287 break;
8288
8289 case CCmode:
8290 case CCGCmode:
8291 suffix = "ge";
8292 break;
8293
8294 default:
8295 gcc_unreachable ();
8296 }
a269a03c 8297 break;
a269a03c 8298 case GEU:
e075ae69 8299 /* ??? As above. */
d0396b79 8300 gcc_assert (mode == CCmode);
7e08e190 8301 suffix = fp ? "nb" : "ae";
a269a03c 8302 break;
a269a03c 8303 case LE:
d0396b79 8304 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
e075ae69 8305 suffix = "le";
a269a03c 8306 break;
a269a03c 8307 case LEU:
d0396b79 8308 gcc_assert (mode == CCmode);
7e08e190 8309 suffix = "be";
a269a03c 8310 break;
3a3677ff 8311 case UNORDERED:
9e7adcb3 8312 suffix = fp ? "u" : "p";
3a3677ff
RH
8313 break;
8314 case ORDERED:
9e7adcb3 8315 suffix = fp ? "nu" : "np";
3a3677ff 8316 break;
a269a03c 8317 default:
d0396b79 8318 gcc_unreachable ();
a269a03c
JC
8319 }
8320 fputs (suffix, file);
8321}
8322
a55f4481
RK
8323/* Print the name of register X to FILE based on its machine mode and number.
8324 If CODE is 'w', pretend the mode is HImode.
8325 If CODE is 'b', pretend the mode is QImode.
8326 If CODE is 'k', pretend the mode is SImode.
8327 If CODE is 'q', pretend the mode is DImode.
d0396b79 8328 If CODE is 'h', pretend the reg is the 'high' byte register.
a55f4481
RK
8329 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8330
e075ae69 8331void
b96a374d 8332print_reg (rtx x, int code, FILE *file)
e5cb57e8 8333{
d0396b79
NS
8334 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8335 && REGNO (x) != FRAME_POINTER_REGNUM
8336 && REGNO (x) != FLAGS_REG
03c259ad
UB
8337 && REGNO (x) != FPSR_REG
8338 && REGNO (x) != FPCR_REG);
480feac0 8339
5bf0ebab 8340 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
8341 putc ('%', file);
8342
ef6257cd 8343 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
8344 code = 2;
8345 else if (code == 'b')
8346 code = 1;
8347 else if (code == 'k')
8348 code = 4;
3f3f2124
JH
8349 else if (code == 'q')
8350 code = 8;
e075ae69
RH
8351 else if (code == 'y')
8352 code = 3;
8353 else if (code == 'h')
8354 code = 0;
8355 else
8356 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 8357
3f3f2124
JH
8358 /* Irritatingly, AMD extended registers use different naming convention
8359 from the normal registers. */
8360 if (REX_INT_REG_P (x))
8361 {
d0396b79 8362 gcc_assert (TARGET_64BIT);
3f3f2124
JH
8363 switch (code)
8364 {
ef6257cd 8365 case 0:
c725bd79 8366 error ("extended registers have no high halves");
3f3f2124
JH
8367 break;
8368 case 1:
8369 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8370 break;
8371 case 2:
8372 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8373 break;
8374 case 4:
8375 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8376 break;
8377 case 8:
8378 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8379 break;
8380 default:
c725bd79 8381 error ("unsupported operand size for extended register");
3f3f2124
JH
8382 break;
8383 }
8384 return;
8385 }
e075ae69
RH
8386 switch (code)
8387 {
8388 case 3:
8389 if (STACK_TOP_P (x))
8390 {
8391 fputs ("st(0)", file);
8392 break;
8393 }
5efb1046 8394 /* FALLTHRU */
e075ae69 8395 case 8:
3f3f2124 8396 case 4:
e075ae69 8397 case 12:
446988df 8398 if (! ANY_FP_REG_P (x))
885a70fd 8399 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 8400 /* FALLTHRU */
a7180f70 8401 case 16:
e075ae69 8402 case 2:
d4c32b6f 8403 normal:
e075ae69
RH
8404 fputs (hi_reg_name[REGNO (x)], file);
8405 break;
8406 case 1:
d4c32b6f
RH
8407 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8408 goto normal;
e075ae69
RH
8409 fputs (qi_reg_name[REGNO (x)], file);
8410 break;
8411 case 0:
d4c32b6f
RH
8412 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8413 goto normal;
e075ae69
RH
8414 fputs (qi_high_reg_name[REGNO (x)], file);
8415 break;
8416 default:
d0396b79 8417 gcc_unreachable ();
fe25fea3 8418 }
e5cb57e8
SC
8419}
8420
f996902d
RH
8421/* Locate some local-dynamic symbol still in use by this function
8422 so that we can print its name in some tls_local_dynamic_base
8423 pattern. */
8424
2ed941ec
RH
8425static int
8426get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8427{
8428 rtx x = *px;
8429
8430 if (GET_CODE (x) == SYMBOL_REF
8431 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8432 {
8433 cfun->machine->some_ld_name = XSTR (x, 0);
8434 return 1;
8435 }
8436
8437 return 0;
8438}
8439
f996902d 8440static const char *
b96a374d 8441get_some_local_dynamic_name (void)
f996902d
RH
8442{
8443 rtx insn;
8444
8445 if (cfun->machine->some_ld_name)
8446 return cfun->machine->some_ld_name;
8447
8448 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8449 if (INSN_P (insn)
8450 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8451 return cfun->machine->some_ld_name;
8452
d0396b79 8453 gcc_unreachable ();
f996902d
RH
8454}
8455
2a2ab3f9 8456/* Meaning of CODE:
fe25fea3 8457 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 8458 C -- print opcode suffix for set/cmov insn.
fe25fea3 8459 c -- like C, but print reversed condition
ef6257cd 8460 F,f -- likewise, but for floating-point.
f6f5dff2
RO
8461 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8462 otherwise nothing
2a2ab3f9
JVA
8463 R -- print the prefix for register names.
8464 z -- print the opcode suffix for the size of the current operand.
8465 * -- print a star (in certain assembler syntax)
fb204271 8466 A -- print an absolute memory reference.
2a2ab3f9 8467 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
8468 s -- print a shift double count, followed by the assemblers argument
8469 delimiter.
fe25fea3
SC
8470 b -- print the QImode name of the register for the indicated operand.
8471 %b0 would print %al if operands[0] is reg 0.
8472 w -- likewise, print the HImode name of the register.
8473 k -- likewise, print the SImode name of the register.
3f3f2124 8474 q -- likewise, print the DImode name of the register.
ef6257cd
JH
8475 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8476 y -- print "st(0)" instead of "st" as a register.
a46d1d38 8477 D -- print condition for SSE cmp instruction.
ef6257cd
JH
8478 P -- if PIC, print an @PLT suffix.
8479 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 8480 & -- print some in-use local-dynamic symbol name.
ef719a44 8481 H -- print a memory address offset by 8; used for sse high-parts
a46d1d38 8482 */
2a2ab3f9
JVA
8483
8484void
b96a374d 8485print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
8486{
8487 if (code)
8488 {
8489 switch (code)
8490 {
8491 case '*':
80f33d06 8492 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
8493 putc ('*', file);
8494 return;
8495
f996902d
RH
8496 case '&':
8497 assemble_name (file, get_some_local_dynamic_name ());
8498 return;
8499
fb204271 8500 case 'A':
d0396b79 8501 switch (ASSEMBLER_DIALECT)
fb204271 8502 {
d0396b79
NS
8503 case ASM_ATT:
8504 putc ('*', file);
8505 break;
8506
8507 case ASM_INTEL:
fb204271
DN
8508 /* Intel syntax. For absolute addresses, registers should not
8509 be surrounded by braces. */
7656aee4 8510 if (!REG_P (x))
fb204271
DN
8511 {
8512 putc ('[', file);
8513 PRINT_OPERAND (file, x, 0);
8514 putc (']', file);
8515 return;
8516 }
d0396b79
NS
8517 break;
8518
8519 default:
8520 gcc_unreachable ();
fb204271
DN
8521 }
8522
8523 PRINT_OPERAND (file, x, 0);
8524 return;
8525
8526
2a2ab3f9 8527 case 'L':
80f33d06 8528 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8529 putc ('l', file);
2a2ab3f9
JVA
8530 return;
8531
8532 case 'W':
80f33d06 8533 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8534 putc ('w', file);
2a2ab3f9
JVA
8535 return;
8536
8537 case 'B':
80f33d06 8538 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8539 putc ('b', file);
2a2ab3f9
JVA
8540 return;
8541
8542 case 'Q':
80f33d06 8543 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8544 putc ('l', file);
2a2ab3f9
JVA
8545 return;
8546
8547 case 'S':
80f33d06 8548 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8549 putc ('s', file);
2a2ab3f9
JVA
8550 return;
8551
5f1ec3e6 8552 case 'T':
80f33d06 8553 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8554 putc ('t', file);
5f1ec3e6
JVA
8555 return;
8556
2a2ab3f9
JVA
8557 case 'z':
8558 /* 387 opcodes don't get size suffixes if the operands are
0f290768 8559 registers. */
2a2ab3f9
JVA
8560 if (STACK_REG_P (x))
8561 return;
8562
831c4e87
KC
8563 /* Likewise if using Intel opcodes. */
8564 if (ASSEMBLER_DIALECT == ASM_INTEL)
8565 return;
8566
8567 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
8568 switch (GET_MODE_SIZE (GET_MODE (x)))
8569 {
37fc8424
UB
8570 case 1:
8571 putc ('b', file);
8572 return;
8573
2a2ab3f9 8574 case 2:
f3ba4235
UB
8575 if (MEM_P (x))
8576 {
155d8a47 8577#ifdef HAVE_GAS_FILDS_FISTS
f3ba4235 8578 putc ('s', file);
155d8a47 8579#endif
f3ba4235
UB
8580 return;
8581 }
8582 else
8583 putc ('w', file);
2a2ab3f9
JVA
8584 return;
8585
8586 case 4:
8587 if (GET_MODE (x) == SFmode)
8588 {
e075ae69 8589 putc ('s', file);
2a2ab3f9
JVA
8590 return;
8591 }
8592 else
e075ae69 8593 putc ('l', file);
2a2ab3f9
JVA
8594 return;
8595
5f1ec3e6 8596 case 12:
2b589241 8597 case 16:
e075ae69
RH
8598 putc ('t', file);
8599 return;
5f1ec3e6 8600
2a2ab3f9
JVA
8601 case 8:
8602 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
8603 {
8604#ifdef GAS_MNEMONICS
e075ae69 8605 putc ('q', file);
56c0e8fa 8606#else
e075ae69
RH
8607 putc ('l', file);
8608 putc ('l', file);
56c0e8fa
JVA
8609#endif
8610 }
e075ae69
RH
8611 else
8612 putc ('l', file);
2a2ab3f9 8613 return;
155d8a47
JW
8614
8615 default:
d0396b79 8616 gcc_unreachable ();
2a2ab3f9 8617 }
4af3895e
JVA
8618
8619 case 'b':
8620 case 'w':
8621 case 'k':
3f3f2124 8622 case 'q':
4af3895e
JVA
8623 case 'h':
8624 case 'y':
5cb6195d 8625 case 'X':
e075ae69 8626 case 'P':
4af3895e
JVA
8627 break;
8628
2d49677f 8629 case 's':
7656aee4 8630 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
2d49677f
SC
8631 {
8632 PRINT_OPERAND (file, x, 0);
e075ae69 8633 putc (',', file);
2d49677f 8634 }
a269a03c
JC
8635 return;
8636
a46d1d38
JH
8637 case 'D':
8638 /* Little bit of braindamage here. The SSE compare instructions
8639 does use completely different names for the comparisons that the
8640 fp conditional moves. */
8641 switch (GET_CODE (x))
8642 {
8643 case EQ:
8644 case UNEQ:
8645 fputs ("eq", file);
8646 break;
8647 case LT:
8648 case UNLT:
8649 fputs ("lt", file);
8650 break;
8651 case LE:
8652 case UNLE:
8653 fputs ("le", file);
8654 break;
8655 case UNORDERED:
8656 fputs ("unord", file);
8657 break;
8658 case NE:
8659 case LTGT:
8660 fputs ("neq", file);
8661 break;
8662 case UNGE:
8663 case GE:
8664 fputs ("nlt", file);
8665 break;
8666 case UNGT:
8667 case GT:
8668 fputs ("nle", file);
8669 break;
8670 case ORDERED:
8671 fputs ("ord", file);
8672 break;
8673 default:
d0396b79 8674 gcc_unreachable ();
a46d1d38
JH
8675 }
8676 return;
048b1c95 8677 case 'O':
f6f5dff2 8678#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
8679 if (ASSEMBLER_DIALECT == ASM_ATT)
8680 {
8681 switch (GET_MODE (x))
8682 {
8683 case HImode: putc ('w', file); break;
8684 case SImode:
8685 case SFmode: putc ('l', file); break;
8686 case DImode:
8687 case DFmode: putc ('q', file); break;
d0396b79 8688 default: gcc_unreachable ();
048b1c95
JJ
8689 }
8690 putc ('.', file);
8691 }
8692#endif
8693 return;
1853aadd 8694 case 'C':
e075ae69 8695 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 8696 return;
fe25fea3 8697 case 'F':
f6f5dff2 8698#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
8699 if (ASSEMBLER_DIALECT == ASM_ATT)
8700 putc ('.', file);
8701#endif
e075ae69 8702 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
8703 return;
8704
e9a25f70 8705 /* Like above, but reverse condition */
e075ae69 8706 case 'c':
fce5a9f2 8707 /* Check to see if argument to %c is really a constant
c1d5afc4 8708 and not a condition code which needs to be reversed. */
ec8e098d 8709 if (!COMPARISON_P (x))
c1d5afc4
CR
8710 {
8711 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8712 return;
8713 }
e075ae69
RH
8714 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8715 return;
fe25fea3 8716 case 'f':
f6f5dff2 8717#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
8718 if (ASSEMBLER_DIALECT == ASM_ATT)
8719 putc ('.', file);
8720#endif
e075ae69 8721 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 8722 return;
ef719a44
RH
8723
8724 case 'H':
8725 /* It doesn't actually matter what mode we use here, as we're
8726 only going to use this for printing. */
8727 x = adjust_address_nv (x, DImode, 8);
8728 break;
8729
ef6257cd
JH
8730 case '+':
8731 {
8732 rtx x;
e5cb57e8 8733
ef6257cd
JH
8734 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8735 return;
a4f31c00 8736
ef6257cd
JH
8737 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8738 if (x)
8739 {
8740 int pred_val = INTVAL (XEXP (x, 0));
8741
8742 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8743 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8744 {
8745 int taken = pred_val > REG_BR_PROB_BASE / 2;
8746 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8747
8748 /* Emit hints only in the case default branch prediction
d1f87653 8749 heuristics would fail. */
ef6257cd
JH
8750 if (taken != cputaken)
8751 {
8752 /* We use 3e (DS) prefix for taken branches and
8753 2e (CS) prefix for not taken branches. */
8754 if (taken)
8755 fputs ("ds ; ", file);
8756 else
8757 fputs ("cs ; ", file);
8758 }
8759 }
8760 }
8761 return;
8762 }
4af3895e 8763 default:
9e637a26 8764 output_operand_lossage ("invalid operand code '%c'", code);
2a2ab3f9
JVA
8765 }
8766 }
e9a25f70 8767
7656aee4 8768 if (REG_P (x))
a55f4481 8769 print_reg (x, code, file);
e9a25f70 8770
7656aee4 8771 else if (MEM_P (x))
2a2ab3f9 8772 {
e075ae69 8773 /* No `byte ptr' prefix for call instructions. */
80f33d06 8774 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 8775 {
69ddee61 8776 const char * size;
e075ae69
RH
8777 switch (GET_MODE_SIZE (GET_MODE (x)))
8778 {
8779 case 1: size = "BYTE"; break;
8780 case 2: size = "WORD"; break;
8781 case 4: size = "DWORD"; break;
8782 case 8: size = "QWORD"; break;
8783 case 12: size = "XWORD"; break;
a7180f70 8784 case 16: size = "XMMWORD"; break;
e075ae69 8785 default:
d0396b79 8786 gcc_unreachable ();
e075ae69 8787 }
fb204271
DN
8788
8789 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8790 if (code == 'b')
8791 size = "BYTE";
8792 else if (code == 'w')
8793 size = "WORD";
8794 else if (code == 'k')
8795 size = "DWORD";
8796
e075ae69
RH
8797 fputs (size, file);
8798 fputs (" PTR ", file);
2a2ab3f9 8799 }
e075ae69
RH
8800
8801 x = XEXP (x, 0);
0d7d98ee 8802 /* Avoid (%rip) for call operands. */
d10f5ecf 8803 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7656aee4 8804 && !CONST_INT_P (x))
0d7d98ee 8805 output_addr_const (file, x);
c8b94768
RH
8806 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8807 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 8808 else
e075ae69 8809 output_address (x);
2a2ab3f9 8810 }
e9a25f70 8811
2a2ab3f9
JVA
8812 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8813 {
e9a25f70
JL
8814 REAL_VALUE_TYPE r;
8815 long l;
8816
5f1ec3e6
JVA
8817 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8818 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 8819
80f33d06 8820 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 8821 putc ('$', file);
781f4ec1 8822 fprintf (file, "0x%08lx", l);
5f1ec3e6 8823 }
e9a25f70 8824
74dc3e94
RH
8825 /* These float cases don't actually occur as immediate operands. */
8826 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 8827 {
e9a25f70
JL
8828 char dstr[30];
8829
da6eec72 8830 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 8831 fprintf (file, "%s", dstr);
2a2ab3f9 8832 }
e9a25f70 8833
2b589241 8834 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 8835 && GET_MODE (x) == XFmode)
2a2ab3f9 8836 {
e9a25f70
JL
8837 char dstr[30];
8838
da6eec72 8839 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 8840 fprintf (file, "%s", dstr);
2a2ab3f9 8841 }
f996902d 8842
79325812 8843 else
2a2ab3f9 8844 {
b4e82619
RH
8845 /* We have patterns that allow zero sets of memory, for instance.
8846 In 64-bit mode, we should probably support all 8-byte vectors,
8847 since we can in fact encode that into an immediate. */
8848 if (GET_CODE (x) == CONST_VECTOR)
8849 {
d0396b79
NS
8850 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8851 x = const0_rtx;
b4e82619
RH
8852 }
8853
4af3895e 8854 if (code != 'P')
2a2ab3f9 8855 {
7656aee4 8856 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
e075ae69 8857 {
80f33d06 8858 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
8859 putc ('$', file);
8860 }
2a2ab3f9
JVA
8861 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8862 || GET_CODE (x) == LABEL_REF)
e075ae69 8863 {
80f33d06 8864 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
8865 putc ('$', file);
8866 else
8867 fputs ("OFFSET FLAT:", file);
8868 }
2a2ab3f9 8869 }
7656aee4 8870 if (CONST_INT_P (x))
e075ae69
RH
8871 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8872 else if (flag_pic)
2a2ab3f9
JVA
8873 output_pic_addr_const (file, x, code);
8874 else
8875 output_addr_const (file, x);
8876 }
8877}
8878\f
8879/* Print a memory operand whose address is ADDR. */
8880
8881void
8d531ab9 8882print_operand_address (FILE *file, rtx addr)
2a2ab3f9 8883{
e075ae69
RH
8884 struct ix86_address parts;
8885 rtx base, index, disp;
8886 int scale;
d0396b79 8887 int ok = ix86_decompose_address (addr, &parts);
e9a25f70 8888
d0396b79 8889 gcc_assert (ok);
e9a25f70 8890
e075ae69
RH
8891 base = parts.base;
8892 index = parts.index;
8893 disp = parts.disp;
8894 scale = parts.scale;
e9a25f70 8895
74dc3e94
RH
8896 switch (parts.seg)
8897 {
8898 case SEG_DEFAULT:
8899 break;
8900 case SEG_FS:
8901 case SEG_GS:
8902 if (USER_LABEL_PREFIX[0] == 0)
8903 putc ('%', file);
8904 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8905 break;
8906 default:
d0396b79 8907 gcc_unreachable ();
74dc3e94
RH
8908 }
8909
e075ae69
RH
8910 if (!base && !index)
8911 {
8912 /* Displacement only requires special attention. */
e9a25f70 8913
7656aee4 8914 if (CONST_INT_P (disp))
2a2ab3f9 8915 {
74dc3e94 8916 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
8917 {
8918 if (USER_LABEL_PREFIX[0] == 0)
8919 putc ('%', file);
8920 fputs ("ds:", file);
8921 }
74dc3e94 8922 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 8923 }
e075ae69 8924 else if (flag_pic)
74dc3e94 8925 output_pic_addr_const (file, disp, 0);
e075ae69 8926 else
74dc3e94 8927 output_addr_const (file, disp);
0d7d98ee
JH
8928
8929 /* Use one byte shorter RIP relative addressing for 64bit mode. */
fd4aca96
RH
8930 if (TARGET_64BIT)
8931 {
8932 if (GET_CODE (disp) == CONST
8933 && GET_CODE (XEXP (disp, 0)) == PLUS
7656aee4 8934 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
fd4aca96
RH
8935 disp = XEXP (XEXP (disp, 0), 0);
8936 if (GET_CODE (disp) == LABEL_REF
8937 || (GET_CODE (disp) == SYMBOL_REF
8938 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8939 fputs ("(%rip)", file);
8940 }
e075ae69
RH
8941 }
8942 else
8943 {
80f33d06 8944 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 8945 {
e075ae69 8946 if (disp)
2a2ab3f9 8947 {
c399861d 8948 if (flag_pic)
e075ae69
RH
8949 output_pic_addr_const (file, disp, 0);
8950 else if (GET_CODE (disp) == LABEL_REF)
8951 output_asm_label (disp);
2a2ab3f9 8952 else
e075ae69 8953 output_addr_const (file, disp);
2a2ab3f9
JVA
8954 }
8955
e075ae69
RH
8956 putc ('(', file);
8957 if (base)
a55f4481 8958 print_reg (base, 0, file);
e075ae69 8959 if (index)
2a2ab3f9 8960 {
e075ae69 8961 putc (',', file);
a55f4481 8962 print_reg (index, 0, file);
e075ae69
RH
8963 if (scale != 1)
8964 fprintf (file, ",%d", scale);
2a2ab3f9 8965 }
e075ae69 8966 putc (')', file);
2a2ab3f9 8967 }
2a2ab3f9
JVA
8968 else
8969 {
e075ae69 8970 rtx offset = NULL_RTX;
e9a25f70 8971
e075ae69
RH
8972 if (disp)
8973 {
8974 /* Pull out the offset of a symbol; print any symbol itself. */
8975 if (GET_CODE (disp) == CONST
8976 && GET_CODE (XEXP (disp, 0)) == PLUS
7656aee4 8977 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
e075ae69
RH
8978 {
8979 offset = XEXP (XEXP (disp, 0), 1);
8980 disp = gen_rtx_CONST (VOIDmode,
8981 XEXP (XEXP (disp, 0), 0));
8982 }
ce193852 8983
e075ae69
RH
8984 if (flag_pic)
8985 output_pic_addr_const (file, disp, 0);
8986 else if (GET_CODE (disp) == LABEL_REF)
8987 output_asm_label (disp);
7656aee4 8988 else if (CONST_INT_P (disp))
e075ae69
RH
8989 offset = disp;
8990 else
8991 output_addr_const (file, disp);
8992 }
e9a25f70 8993
e075ae69
RH
8994 putc ('[', file);
8995 if (base)
a8620236 8996 {
a55f4481 8997 print_reg (base, 0, file);
e075ae69
RH
8998 if (offset)
8999 {
9000 if (INTVAL (offset) >= 0)
9001 putc ('+', file);
9002 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9003 }
a8620236 9004 }
e075ae69
RH
9005 else if (offset)
9006 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 9007 else
e075ae69 9008 putc ('0', file);
e9a25f70 9009
e075ae69
RH
9010 if (index)
9011 {
9012 putc ('+', file);
a55f4481 9013 print_reg (index, 0, file);
e075ae69
RH
9014 if (scale != 1)
9015 fprintf (file, "*%d", scale);
9016 }
9017 putc (']', file);
9018 }
2a2ab3f9
JVA
9019 }
9020}
f996902d
RH
9021
9022bool
b96a374d 9023output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
9024{
9025 rtx op;
9026
9027 if (GET_CODE (x) != UNSPEC)
9028 return false;
9029
9030 op = XVECEXP (x, 0, 0);
9031 switch (XINT (x, 1))
9032 {
9033 case UNSPEC_GOTTPOFF:
9034 output_addr_const (file, op);
dea73790 9035 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
9036 fputs ("@GOTTPOFF", file);
9037 break;
9038 case UNSPEC_TPOFF:
9039 output_addr_const (file, op);
9040 fputs ("@TPOFF", file);
9041 break;
9042 case UNSPEC_NTPOFF:
9043 output_addr_const (file, op);
75d38379
JJ
9044 if (TARGET_64BIT)
9045 fputs ("@TPOFF", file);
9046 else
9047 fputs ("@NTPOFF", file);
f996902d
RH
9048 break;
9049 case UNSPEC_DTPOFF:
9050 output_addr_const (file, op);
9051 fputs ("@DTPOFF", file);
9052 break;
dea73790
JJ
9053 case UNSPEC_GOTNTPOFF:
9054 output_addr_const (file, op);
75d38379
JJ
9055 if (TARGET_64BIT)
9056 fputs ("@GOTTPOFF(%rip)", file);
9057 else
9058 fputs ("@GOTNTPOFF", file);
dea73790
JJ
9059 break;
9060 case UNSPEC_INDNTPOFF:
9061 output_addr_const (file, op);
9062 fputs ("@INDNTPOFF", file);
9063 break;
f996902d
RH
9064
9065 default:
9066 return false;
9067 }
9068
9069 return true;
9070}
2a2ab3f9
JVA
9071\f
9072/* Split one or more DImode RTL references into pairs of SImode
9073 references. The RTL can be REG, offsettable MEM, integer constant, or
9074 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9075 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 9076 that parallel "operands". */
2a2ab3f9
JVA
9077
9078void
b96a374d 9079split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
9080{
9081 while (num--)
9082 {
57dbca5e 9083 rtx op = operands[num];
b932f770
JH
9084
9085 /* simplify_subreg refuse to split volatile memory addresses,
9086 but we still have to handle it. */
7656aee4 9087 if (MEM_P (op))
2a2ab3f9 9088 {
f4ef873c 9089 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 9090 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
9091 }
9092 else
b932f770 9093 {
38ca929b
JH
9094 lo_half[num] = simplify_gen_subreg (SImode, op,
9095 GET_MODE (op) == VOIDmode
9096 ? DImode : GET_MODE (op), 0);
9097 hi_half[num] = simplify_gen_subreg (SImode, op,
9098 GET_MODE (op) == VOIDmode
9099 ? DImode : GET_MODE (op), 4);
b932f770 9100 }
2a2ab3f9
JVA
9101 }
9102}
28356f52 9103/* Split one or more TImode RTL references into pairs of DImode
44cf5b6a
JH
9104 references. The RTL can be REG, offsettable MEM, integer constant, or
9105 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9106 split and "num" is its length. lo_half and hi_half are output arrays
9107 that parallel "operands". */
9108
9109void
b96a374d 9110split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
9111{
9112 while (num--)
9113 {
9114 rtx op = operands[num];
b932f770
JH
9115
9116 /* simplify_subreg refuse to split volatile memory addresses, but we
9117 still have to handle it. */
7656aee4 9118 if (MEM_P (op))
44cf5b6a
JH
9119 {
9120 lo_half[num] = adjust_address (op, DImode, 0);
9121 hi_half[num] = adjust_address (op, DImode, 8);
9122 }
9123 else
b932f770
JH
9124 {
9125 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9126 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9127 }
44cf5b6a
JH
9128 }
9129}
2a2ab3f9 9130\f
2a2ab3f9
JVA
9131/* Output code to perform a 387 binary operation in INSN, one of PLUS,
9132 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9133 is the expression of the binary operation. The output may either be
9134 emitted here, or returned to the caller, like all output_* functions.
9135
9136 There is no guarantee that the operands are the same mode, as they
0f290768 9137 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 9138
e3c2afab
AM
9139#ifndef SYSV386_COMPAT
9140/* Set to 1 for compatibility with brain-damaged assemblers. No-one
9141 wants to fix the assemblers because that causes incompatibility
9142 with gcc. No-one wants to fix gcc because that causes
9143 incompatibility with assemblers... You can use the option of
9144 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9145#define SYSV386_COMPAT 1
9146#endif
9147
69ddee61 9148const char *
b96a374d 9149output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 9150{
e3c2afab 9151 static char buf[30];
69ddee61 9152 const char *p;
1deaa899 9153 const char *ssep;
89b17498 9154 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
2a2ab3f9 9155
e3c2afab
AM
9156#ifdef ENABLE_CHECKING
9157 /* Even if we do not want to check the inputs, this documents input
9158 constraints. Which helps in understanding the following code. */
9159 if (STACK_REG_P (operands[0])
9160 && ((REG_P (operands[1])
9161 && REGNO (operands[0]) == REGNO (operands[1])
7656aee4 9162 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
e3c2afab
AM
9163 || (REG_P (operands[2])
9164 && REGNO (operands[0]) == REGNO (operands[2])
7656aee4 9165 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
e3c2afab
AM
9166 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9167 ; /* ok */
d0396b79
NS
9168 else
9169 gcc_assert (is_sse);
e3c2afab
AM
9170#endif
9171
2a2ab3f9
JVA
9172 switch (GET_CODE (operands[3]))
9173 {
9174 case PLUS:
e075ae69
RH
9175 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9176 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9177 p = "fiadd";
9178 else
9179 p = "fadd";
1deaa899 9180 ssep = "add";
2a2ab3f9
JVA
9181 break;
9182
9183 case MINUS:
e075ae69
RH
9184 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9185 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9186 p = "fisub";
9187 else
9188 p = "fsub";
1deaa899 9189 ssep = "sub";
2a2ab3f9
JVA
9190 break;
9191
9192 case MULT:
e075ae69
RH
9193 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9194 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9195 p = "fimul";
9196 else
9197 p = "fmul";
1deaa899 9198 ssep = "mul";
2a2ab3f9
JVA
9199 break;
9200
9201 case DIV:
e075ae69
RH
9202 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9203 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9204 p = "fidiv";
9205 else
9206 p = "fdiv";
1deaa899 9207 ssep = "div";
2a2ab3f9
JVA
9208 break;
9209
9210 default:
d0396b79 9211 gcc_unreachable ();
2a2ab3f9
JVA
9212 }
9213
1deaa899
JH
9214 if (is_sse)
9215 {
9216 strcpy (buf, ssep);
9217 if (GET_MODE (operands[0]) == SFmode)
9218 strcat (buf, "ss\t{%2, %0|%0, %2}");
9219 else
9220 strcat (buf, "sd\t{%2, %0|%0, %2}");
9221 return buf;
9222 }
e075ae69 9223 strcpy (buf, p);
2a2ab3f9
JVA
9224
9225 switch (GET_CODE (operands[3]))
9226 {
9227 case MULT:
9228 case PLUS:
9229 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9230 {
e3c2afab 9231 rtx temp = operands[2];
2a2ab3f9
JVA
9232 operands[2] = operands[1];
9233 operands[1] = temp;
9234 }
9235
e3c2afab
AM
9236 /* know operands[0] == operands[1]. */
9237
7656aee4 9238 if (MEM_P (operands[2]))
e075ae69
RH
9239 {
9240 p = "%z2\t%2";
9241 break;
9242 }
2a2ab3f9
JVA
9243
9244 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
9245 {
9246 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
9247 /* How is it that we are storing to a dead operand[2]?
9248 Well, presumably operands[1] is dead too. We can't
9249 store the result to st(0) as st(0) gets popped on this
9250 instruction. Instead store to operands[2] (which I
9251 think has to be st(1)). st(1) will be popped later.
9252 gcc <= 2.8.1 didn't have this check and generated
9253 assembly code that the Unixware assembler rejected. */
9254 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 9255 else
e3c2afab 9256 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 9257 break;
6b28fd63 9258 }
2a2ab3f9
JVA
9259
9260 if (STACK_TOP_P (operands[0]))
e3c2afab 9261 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 9262 else
e3c2afab 9263 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 9264 break;
2a2ab3f9
JVA
9265
9266 case MINUS:
9267 case DIV:
7656aee4 9268 if (MEM_P (operands[1]))
e075ae69
RH
9269 {
9270 p = "r%z1\t%1";
9271 break;
9272 }
2a2ab3f9 9273
7656aee4 9274 if (MEM_P (operands[2]))
e075ae69
RH
9275 {
9276 p = "%z2\t%2";
9277 break;
9278 }
2a2ab3f9 9279
2a2ab3f9 9280 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 9281 {
e3c2afab
AM
9282#if SYSV386_COMPAT
9283 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9284 derived assemblers, confusingly reverse the direction of
9285 the operation for fsub{r} and fdiv{r} when the
9286 destination register is not st(0). The Intel assembler
9287 doesn't have this brain damage. Read !SYSV386_COMPAT to
9288 figure out what the hardware really does. */
9289 if (STACK_TOP_P (operands[0]))
9290 p = "{p\t%0, %2|rp\t%2, %0}";
9291 else
9292 p = "{rp\t%2, %0|p\t%0, %2}";
9293#else
6b28fd63 9294 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
9295 /* As above for fmul/fadd, we can't store to st(0). */
9296 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 9297 else
e3c2afab
AM
9298 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9299#endif
e075ae69 9300 break;
6b28fd63 9301 }
2a2ab3f9
JVA
9302
9303 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 9304 {
e3c2afab 9305#if SYSV386_COMPAT
6b28fd63 9306 if (STACK_TOP_P (operands[0]))
e3c2afab 9307 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 9308 else
e3c2afab
AM
9309 p = "{p\t%1, %0|rp\t%0, %1}";
9310#else
9311 if (STACK_TOP_P (operands[0]))
9312 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9313 else
9314 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9315#endif
e075ae69 9316 break;
6b28fd63 9317 }
2a2ab3f9
JVA
9318
9319 if (STACK_TOP_P (operands[0]))
9320 {
9321 if (STACK_TOP_P (operands[1]))
e3c2afab 9322 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 9323 else
e3c2afab 9324 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 9325 break;
2a2ab3f9
JVA
9326 }
9327 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
9328 {
9329#if SYSV386_COMPAT
9330 p = "{\t%1, %0|r\t%0, %1}";
9331#else
9332 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9333#endif
9334 }
2a2ab3f9 9335 else
e3c2afab
AM
9336 {
9337#if SYSV386_COMPAT
9338 p = "{r\t%2, %0|\t%0, %2}";
9339#else
9340 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9341#endif
9342 }
e075ae69 9343 break;
2a2ab3f9
JVA
9344
9345 default:
d0396b79 9346 gcc_unreachable ();
2a2ab3f9 9347 }
e075ae69
RH
9348
9349 strcat (buf, p);
9350 return buf;
2a2ab3f9 9351}
e075ae69 9352
ff680eb1
UB
9353/* Return needed mode for entity in optimize_mode_switching pass. */
9354
9355int
9356ix86_mode_needed (int entity, rtx insn)
9357{
9358 enum attr_i387_cw mode;
9359
9360 /* The mode UNINITIALIZED is used to store control word after a
9361 function call or ASM pattern. The mode ANY specify that function
9362 has no requirements on the control word and make no changes in the
9363 bits we are interested in. */
9364
9365 if (CALL_P (insn)
9366 || (NONJUMP_INSN_P (insn)
9367 && (asm_noperands (PATTERN (insn)) >= 0
9368 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9369 return I387_CW_UNINITIALIZED;
9370
9371 if (recog_memoized (insn) < 0)
9372 return I387_CW_ANY;
9373
9374 mode = get_attr_i387_cw (insn);
9375
9376 switch (entity)
9377 {
9378 case I387_TRUNC:
9379 if (mode == I387_CW_TRUNC)
9380 return mode;
9381 break;
9382
9383 case I387_FLOOR:
9384 if (mode == I387_CW_FLOOR)
9385 return mode;
9386 break;
9387
9388 case I387_CEIL:
9389 if (mode == I387_CW_CEIL)
9390 return mode;
9391 break;
9392
9393 case I387_MASK_PM:
9394 if (mode == I387_CW_MASK_PM)
9395 return mode;
9396 break;
9397
9398 default:
9399 gcc_unreachable ();
9400 }
9401
9402 return I387_CW_ANY;
9403}
9404
edeacc14
UB
9405/* Output code to initialize control word copies used by trunc?f?i and
9406 rounding patterns. CURRENT_MODE is set to current control word,
9407 while NEW_MODE is set to new control word. */
9408
7a2e09f4 9409void
ff680eb1 9410emit_i387_cw_initialization (int mode)
7a2e09f4 9411{
ff680eb1
UB
9412 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9413 rtx new_mode;
9414
9415ab7d 9415 enum ix86_stack_slot slot;
ff680eb1 9416
7a2e09f4
JH
9417 rtx reg = gen_reg_rtx (HImode);
9418
ff680eb1 9419 emit_insn (gen_x86_fnstcw_1 (stored_mode));
3e916873 9420 emit_move_insn (reg, copy_rtx (stored_mode));
edeacc14 9421
ff680eb1 9422 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
edeacc14
UB
9423 {
9424 switch (mode)
9425 {
ff680eb1
UB
9426 case I387_CW_TRUNC:
9427 /* round toward zero (truncate) */
9428 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9429 slot = SLOT_CW_TRUNC;
9430 break;
9431
edeacc14
UB
9432 case I387_CW_FLOOR:
9433 /* round down toward -oo */
ff680eb1
UB
9434 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9435 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9436 slot = SLOT_CW_FLOOR;
edeacc14
UB
9437 break;
9438
9439 case I387_CW_CEIL:
9440 /* round up toward +oo */
ff680eb1
UB
9441 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9442 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9443 slot = SLOT_CW_CEIL;
edeacc14
UB
9444 break;
9445
edeacc14
UB
9446 case I387_CW_MASK_PM:
9447 /* mask precision exception for nearbyint() */
9448 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 9449 slot = SLOT_CW_MASK_PM;
edeacc14
UB
9450 break;
9451
9452 default:
d0396b79 9453 gcc_unreachable ();
edeacc14
UB
9454 }
9455 }
7a2e09f4 9456 else
edeacc14
UB
9457 {
9458 switch (mode)
9459 {
ff680eb1
UB
9460 case I387_CW_TRUNC:
9461 /* round toward zero (truncate) */
9462 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9463 slot = SLOT_CW_TRUNC;
9464 break;
9465
edeacc14
UB
9466 case I387_CW_FLOOR:
9467 /* round down toward -oo */
ff680eb1
UB
9468 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9469 slot = SLOT_CW_FLOOR;
edeacc14
UB
9470 break;
9471
9472 case I387_CW_CEIL:
9473 /* round up toward +oo */
ff680eb1
UB
9474 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9475 slot = SLOT_CW_CEIL;
edeacc14 9476 break;
5656a184 9477
edeacc14
UB
9478 case I387_CW_MASK_PM:
9479 /* mask precision exception for nearbyint() */
9480 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 9481 slot = SLOT_CW_MASK_PM;
edeacc14
UB
9482 break;
9483
9484 default:
d0396b79 9485 gcc_unreachable ();
edeacc14
UB
9486 }
9487 }
9488
ff680eb1
UB
9489 gcc_assert (slot < MAX_386_STACK_LOCALS);
9490
9491 new_mode = assign_386_stack_local (HImode, slot);
edeacc14 9492 emit_move_insn (new_mode, reg);
7a2e09f4
JH
9493}
9494
2a2ab3f9 9495/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 9496 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 9497 operand may be [SDX]Fmode. */
2a2ab3f9 9498
69ddee61 9499const char *
9199f050 9500output_fix_trunc (rtx insn, rtx *operands, int fisttp)
2a2ab3f9
JVA
9501{
9502 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 9503 int dimode_p = GET_MODE (operands[0]) == DImode;
6e858d45 9504 int round_mode = get_attr_i387_cw (insn);
2a2ab3f9 9505
e075ae69
RH
9506 /* Jump through a hoop or two for DImode, since the hardware has no
9507 non-popping instruction. We used to do this a different way, but
9508 that was somewhat fragile and broke with post-reload splitters. */
9199f050 9509 if ((dimode_p || fisttp) && !stack_top_dies)
a05924f9 9510 output_asm_insn ("fld\t%y1", operands);
e075ae69 9511
d0396b79 9512 gcc_assert (STACK_TOP_P (operands[1]));
7656aee4 9513 gcc_assert (MEM_P (operands[0]));
54a88090 9514 gcc_assert (GET_MODE (operands[1]) != TFmode);
e9a25f70 9515
9199f050
UB
9516 if (fisttp)
9517 output_asm_insn ("fisttp%z0\t%0", operands);
10195bd8 9518 else
9199f050 9519 {
6e858d45
UB
9520 if (round_mode != I387_CW_ANY)
9521 output_asm_insn ("fldcw\t%3", operands);
9199f050
UB
9522 if (stack_top_dies || dimode_p)
9523 output_asm_insn ("fistp%z0\t%0", operands);
9524 else
9525 output_asm_insn ("fist%z0\t%0", operands);
6e858d45
UB
9526 if (round_mode != I387_CW_ANY)
9527 output_asm_insn ("fldcw\t%2", operands);
9199f050 9528 }
10195bd8 9529
e075ae69 9530 return "";
2a2ab3f9 9531}
cda749b1 9532
b6c03bcd
RS
9533/* Output code for x87 ffreep insn. The OPNO argument, which may only
9534 have the values zero or one, indicates the ffreep insn's operand
9535 from the OPERANDS array. */
9536
9537static const char *
9538output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9539{
9540 if (TARGET_USE_FFREEP)
9541#if HAVE_AS_IX86_FFREEP
9542 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9543#else
87ccbc5c
UB
9544 {
9545 static char retval[] = ".word\t0xc_df";
9546 int regno = REGNO (operands[opno]);
54a88090 9547
87ccbc5c
UB
9548 gcc_assert (FP_REGNO_P (regno));
9549
9550 retval[9] = '0' + (regno - FIRST_STACK_REG);
9551 return retval;
9552 }
b6c03bcd
RS
9553#endif
9554
9555 return opno ? "fstp\t%y1" : "fstp\t%y0";
9556}
9557
9558
e075ae69 9559/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7c82106f 9560 should be used. UNORDERED_P is true when fucom should be used. */
e075ae69 9561
69ddee61 9562const char *
b96a374d 9563output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 9564{
e075ae69 9565 int stack_top_dies;
869d095e 9566 rtx cmp_op0, cmp_op1;
7c82106f 9567 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
e075ae69 9568
7c82106f 9569 if (eflags_p)
e075ae69 9570 {
7c82106f
UB
9571 cmp_op0 = operands[0];
9572 cmp_op1 = operands[1];
e075ae69 9573 }
869d095e
UB
9574 else
9575 {
7c82106f
UB
9576 cmp_op0 = operands[1];
9577 cmp_op1 = operands[2];
869d095e
UB
9578 }
9579
0644b628
JH
9580 if (is_sse)
9581 {
9582 if (GET_MODE (operands[0]) == SFmode)
9583 if (unordered_p)
9584 return "ucomiss\t{%1, %0|%0, %1}";
9585 else
a5cf80f0 9586 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
9587 else
9588 if (unordered_p)
9589 return "ucomisd\t{%1, %0|%0, %1}";
9590 else
a5cf80f0 9591 return "comisd\t{%1, %0|%0, %1}";
0644b628 9592 }
cda749b1 9593
d0396b79 9594 gcc_assert (STACK_TOP_P (cmp_op0));
cda749b1 9595
e075ae69 9596 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 9597
869d095e
UB
9598 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9599 {
9600 if (stack_top_dies)
9601 {
9602 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
b6c03bcd 9603 return output_387_ffreep (operands, 1);
869d095e
UB
9604 }
9605 else
9606 return "ftst\n\tfnstsw\t%0";
9607 }
9608
e075ae69
RH
9609 if (STACK_REG_P (cmp_op1)
9610 && stack_top_dies
9611 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9612 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 9613 {
e075ae69
RH
9614 /* If both the top of the 387 stack dies, and the other operand
9615 is also a stack register that dies, then this must be a
9616 `fcompp' float compare */
9617
7c82106f 9618 if (eflags_p)
e075ae69
RH
9619 {
9620 /* There is no double popping fcomi variant. Fortunately,
9621 eflags is immune from the fstp's cc clobbering. */
9622 if (unordered_p)
9623 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9624 else
9625 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
b6c03bcd 9626 return output_387_ffreep (operands, 0);
e075ae69
RH
9627 }
9628 else
cda749b1 9629 {
7c82106f
UB
9630 if (unordered_p)
9631 return "fucompp\n\tfnstsw\t%0";
cda749b1 9632 else
7c82106f 9633 return "fcompp\n\tfnstsw\t%0";
cda749b1 9634 }
cda749b1
JW
9635 }
9636 else
9637 {
e075ae69 9638 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 9639
7c82106f 9640 static const char * const alt[16] =
e075ae69 9641 {
7c82106f
UB
9642 "fcom%z2\t%y2\n\tfnstsw\t%0",
9643 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9644 "fucom%z2\t%y2\n\tfnstsw\t%0",
9645 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 9646
7c82106f
UB
9647 "ficom%z2\t%y2\n\tfnstsw\t%0",
9648 "ficomp%z2\t%y2\n\tfnstsw\t%0",
e075ae69
RH
9649 NULL,
9650 NULL,
9651
9652 "fcomi\t{%y1, %0|%0, %y1}",
9653 "fcomip\t{%y1, %0|%0, %y1}",
9654 "fucomi\t{%y1, %0|%0, %y1}",
9655 "fucomip\t{%y1, %0|%0, %y1}",
9656
9657 NULL,
9658 NULL,
9659 NULL,
e075ae69
RH
9660 NULL
9661 };
9662
9663 int mask;
69ddee61 9664 const char *ret;
e075ae69
RH
9665
9666 mask = eflags_p << 3;
7c82106f 9667 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
e075ae69
RH
9668 mask |= unordered_p << 1;
9669 mask |= stack_top_dies;
9670
d0396b79 9671 gcc_assert (mask < 16);
e075ae69 9672 ret = alt[mask];
d0396b79 9673 gcc_assert (ret);
cda749b1 9674
e075ae69 9675 return ret;
cda749b1
JW
9676 }
9677}
2a2ab3f9 9678
f88c65f7 9679void
b96a374d 9680ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
9681{
9682 const char *directive = ASM_LONG;
9683
f88c65f7 9684#ifdef ASM_QUAD
d0396b79
NS
9685 if (TARGET_64BIT)
9686 directive = ASM_QUAD;
f88c65f7 9687#else
d0396b79 9688 gcc_assert (!TARGET_64BIT);
f88c65f7 9689#endif
f88c65f7
RH
9690
9691 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9692}
9693
9694void
b96a374d 9695ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7 9696{
dc4d7240
JH
9697 const char *directive = ASM_LONG;
9698
9699#ifdef ASM_QUAD
9700 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9701 directive = ASM_QUAD;
9702#else
9703 gcc_assert (!TARGET_64BIT);
9704#endif
170bdaba
RS
9705 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9706 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
74411039 9707 fprintf (file, "%s%s%d-%s%d\n",
dc4d7240 9708 directive, LPREFIX, value, LPREFIX, rel);
f88c65f7
RH
9709 else if (HAVE_AS_GOTOFF_IN_DATA)
9710 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
9711#if TARGET_MACHO
9712 else if (TARGET_MACHO)
86ecdfb6
AP
9713 {
9714 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9715 machopic_output_function_base_name (file);
9716 fprintf(file, "\n");
9717 }
b069de3b 9718#endif
f88c65f7 9719 else
5fc0e5df
KW
9720 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9721 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 9722}
32b5b1aa 9723\f
a8bac9ab
RH
9724/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9725 for the target. */
9726
9727void
b96a374d 9728ix86_expand_clear (rtx dest)
a8bac9ab
RH
9729{
9730 rtx tmp;
9731
9732 /* We play register width games, which are only valid after reload. */
d0396b79 9733 gcc_assert (reload_completed);
a8bac9ab
RH
9734
9735 /* Avoid HImode and its attendant prefix byte. */
9736 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9737 dest = gen_rtx_REG (SImode, REGNO (dest));
a8bac9ab
RH
9738 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9739
9740 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9741 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9742 {
9743 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9744 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9745 }
9746
9747 emit_insn (tmp);
9748}
9749
f996902d
RH
9750/* X is an unchanging MEM. If it is a constant pool reference, return
9751 the constant pool rtx, else NULL. */
9752
8fe75e43 9753rtx
b96a374d 9754maybe_get_pool_constant (rtx x)
f996902d 9755{
69bd9368 9756 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
9757
9758 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9759 return get_pool_constant (x);
9760
9761 return NULL_RTX;
9762}
9763
79325812 9764void
b96a374d 9765ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 9766{
e075ae69 9767 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
9768 rtx op0, op1;
9769 enum tls_model model;
f996902d
RH
9770
9771 op0 = operands[0];
9772 op1 = operands[1];
9773
d2ad2c8a 9774 if (GET_CODE (op1) == SYMBOL_REF)
f996902d 9775 {
d2ad2c8a
JH
9776 model = SYMBOL_REF_TLS_MODEL (op1);
9777 if (model)
9778 {
9779 op1 = legitimize_tls_address (op1, model, true);
9780 op1 = force_operand (op1, op0);
9781 if (op1 == op0)
9782 return;
9783 }
da489f73
RH
9784 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9785 && SYMBOL_REF_DLLIMPORT_P (op1))
9786 op1 = legitimize_dllimport_symbol (op1, false);
d2ad2c8a
JH
9787 }
9788 else if (GET_CODE (op1) == CONST
9789 && GET_CODE (XEXP (op1, 0)) == PLUS
9790 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9791 {
da489f73
RH
9792 rtx addend = XEXP (XEXP (op1, 0), 1);
9793 rtx symbol = XEXP (XEXP (op1, 0), 0);
9794 rtx tmp = NULL;
9795
9796 model = SYMBOL_REF_TLS_MODEL (symbol);
d2ad2c8a 9797 if (model)
da489f73
RH
9798 tmp = legitimize_tls_address (symbol, model, true);
9799 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9800 && SYMBOL_REF_DLLIMPORT_P (symbol))
9801 tmp = legitimize_dllimport_symbol (symbol, true);
9802
9803 if (tmp)
d2ad2c8a 9804 {
da489f73
RH
9805 tmp = force_operand (tmp, NULL);
9806 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
d2ad2c8a 9807 op0, 1, OPTAB_DIRECT);
da489f73 9808 if (tmp == op0)
d2ad2c8a
JH
9809 return;
9810 }
f996902d 9811 }
74dc3e94
RH
9812
9813 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 9814 {
f7288899
EC
9815 if (TARGET_MACHO && !TARGET_64BIT)
9816 {
b069de3b 9817#if TARGET_MACHO
f7288899
EC
9818 if (MACHOPIC_PURE)
9819 {
9820 rtx temp = ((reload_in_progress
7656aee4 9821 || ((op0 && REG_P (op0))
f7288899
EC
9822 && mode == Pmode))
9823 ? op0 : gen_reg_rtx (Pmode));
9824 op1 = machopic_indirect_data_reference (op1, temp);
9825 op1 = machopic_legitimize_pic_address (op1, mode,
9826 temp == op1 ? 0 : temp);
9827 }
9828 else if (MACHOPIC_INDIRECT)
9829 op1 = machopic_indirect_data_reference (op1, 0);
9830 if (op0 == op1)
9831 return;
9832#endif
9833 }
5656a184 9834 else
f7288899 9835 {
7656aee4 9836 if (MEM_P (op0))
f7288899 9837 op1 = force_reg (Pmode, op1);
dc4d7240 9838 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
170bdaba 9839 {
b3a13419 9840 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
170bdaba
RS
9841 op1 = legitimize_pic_address (op1, reg);
9842 if (op0 == op1)
9843 return;
9844 }
f7288899 9845 }
e075ae69
RH
9846 }
9847 else
9848 {
7656aee4 9849 if (MEM_P (op0)
44cf5b6a 9850 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d 9851 || !push_operand (op0, mode))
7656aee4 9852 && MEM_P (op1))
f996902d 9853 op1 = force_reg (mode, op1);
e9a25f70 9854
f996902d
RH
9855 if (push_operand (op0, mode)
9856 && ! general_no_elim_operand (op1, mode))
9857 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 9858
44cf5b6a
JH
9859 /* Force large constants in 64bit compilation into register
9860 to get them CSEed. */
9861 if (TARGET_64BIT && mode == DImode
f996902d 9862 && immediate_operand (op1, mode)
8fe75e43 9863 && !x86_64_zext_immediate_operand (op1, VOIDmode)
f996902d 9864 && !register_operand (op0, mode)
44cf5b6a 9865 && optimize && !reload_completed && !reload_in_progress)
f996902d 9866 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 9867
e075ae69 9868 if (FLOAT_MODE_P (mode))
32b5b1aa 9869 {
d7a29404
JH
9870 /* If we are loading a floating point constant to a register,
9871 force the value to memory now, since we'll get better code
9872 out the back end. */
e075ae69
RH
9873
9874 if (strict)
9875 ;
ddc67067
MM
9876 else if (GET_CODE (op1) == CONST_DOUBLE)
9877 {
9878 op1 = validize_mem (force_const_mem (mode, op1));
9879 if (!register_operand (op0, mode))
9880 {
9881 rtx temp = gen_reg_rtx (mode);
9882 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9883 emit_move_insn (op0, temp);
9884 return;
9885 }
9886 }
32b5b1aa 9887 }
32b5b1aa 9888 }
e9a25f70 9889
74dc3e94 9890 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 9891}
e9a25f70 9892
e37af218 9893void
b96a374d 9894ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218 9895{
c38573a8 9896 rtx op0 = operands[0], op1 = operands[1];
4d2a42a1 9897 unsigned int align = GET_MODE_ALIGNMENT (mode);
c38573a8 9898
e37af218
RH
9899 /* Force constants other than zero into memory. We do not know how
9900 the instructions used to build constants modify the upper 64 bits
9901 of the register, once we have that information we may be able
9902 to handle some of them more efficiently. */
9903 if ((reload_in_progress | reload_completed) == 0
c38573a8 9904 && register_operand (op0, mode)
4d2a42a1
UB
9905 && (CONSTANT_P (op1)
9906 || (GET_CODE (op1) == SUBREG
9907 && CONSTANT_P (SUBREG_REG (op1))))
5656a184 9908 && standard_sse_constant_p (op1) <= 0)
c38573a8 9909 op1 = validize_mem (force_const_mem (mode, op1));
e37af218 9910
4d2a42a1
UB
9911 /* TDmode values are passed as TImode on the stack. Timode values
9912 are moved via xmm registers, and moving them to stack can result in
9913 unaligned memory access. Use ix86_expand_vector_move_misalign()
9914 if memory operand is not aligned correctly. */
b3a13419 9915 if (can_create_pseudo_p ()
e9dd4c3e 9916 && (mode == TImode) && !TARGET_64BIT
4d2a42a1
UB
9917 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9918 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9919 {
9920 rtx tmp[2];
9921
9922 /* ix86_expand_vector_move_misalign() does not like constants ... */
9923 if (CONSTANT_P (op1)
9924 || (GET_CODE (op1) == SUBREG
9925 && CONSTANT_P (SUBREG_REG (op1))))
9926 op1 = validize_mem (force_const_mem (mode, op1));
9927
9928 /* ... nor both arguments in memory. */
9929 if (!register_operand (op0, mode)
9930 && !register_operand (op1, mode))
9931 op1 = force_reg (mode, op1);
9932
9933 tmp[0] = op0; tmp[1] = op1;
9934 ix86_expand_vector_move_misalign (mode, tmp);
9935 return;
9936 }
9937
e37af218 9938 /* Make operand1 a register if it isn't already. */
b3a13419 9939 if (can_create_pseudo_p ()
c38573a8
RH
9940 && !register_operand (op0, mode)
9941 && !register_operand (op1, mode))
e37af218 9942 {
c38573a8 9943 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
e37af218
RH
9944 return;
9945 }
9946
c38573a8 9947 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
fce5a9f2 9948}
e37af218 9949
5656a184 9950/* Implement the movmisalign patterns for SSE. Non-SSE modes go
c38573a8 9951 straight to ix86_expand_vector_move. */
80fd744f
RH
9952/* Code generation for scalar reg-reg moves of single and double precision data:
9953 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9954 movaps reg, reg
9955 else
9956 movss reg, reg
9957 if (x86_sse_partial_reg_dependency == true)
9958 movapd reg, reg
9959 else
9960 movsd reg, reg
9961
9962 Code generation for scalar loads of double precision data:
9963 if (x86_sse_split_regs == true)
9964 movlpd mem, reg (gas syntax)
9965 else
9966 movsd mem, reg
54a88090 9967
80fd744f
RH
9968 Code generation for unaligned packed loads of single precision data
9969 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9970 if (x86_sse_unaligned_move_optimal)
9971 movups mem, reg
9972
9973 if (x86_sse_partial_reg_dependency == true)
9974 {
9975 xorps reg, reg
9976 movlps mem, reg
9977 movhps mem+8, reg
9978 }
9979 else
9980 {
9981 movlps mem, reg
9982 movhps mem+8, reg
9983 }
9984
9985 Code generation for unaligned packed loads of double precision data
9986 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9987 if (x86_sse_unaligned_move_optimal)
9988 movupd mem, reg
9989
9990 if (x86_sse_split_regs == true)
9991 {
9992 movlpd mem, reg
9993 movhpd mem+8, reg
9994 }
9995 else
9996 {
9997 movsd mem, reg
9998 movhpd mem+8, reg
9999 }
10000 */
c38573a8
RH
10001
10002void
10003ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10004{
10005 rtx op0, op1, m;
10006
10007 op0 = operands[0];
10008 op1 = operands[1];
10009
10010 if (MEM_P (op1))
10011 {
10012 /* If we're optimizing for size, movups is the smallest. */
10013 if (optimize_size)
10014 {
10015 op0 = gen_lowpart (V4SFmode, op0);
10016 op1 = gen_lowpart (V4SFmode, op1);
10017 emit_insn (gen_sse_movups (op0, op1));
10018 return;
10019 }
10020
10021 /* ??? If we have typed data, then it would appear that using
10022 movdqu is the only way to get unaligned data loaded with
10023 integer type. */
10024 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10025 {
10026 op0 = gen_lowpart (V16QImode, op0);
10027 op1 = gen_lowpart (V16QImode, op1);
10028 emit_insn (gen_sse2_movdqu (op0, op1));
10029 return;
10030 }
10031
10032 if (TARGET_SSE2 && mode == V2DFmode)
21efb4d4
HJ
10033 {
10034 rtx zero;
10035
10036 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10037 {
10038 op0 = gen_lowpart (V2DFmode, op0);
10039 op1 = gen_lowpart (V2DFmode, op1);
10040 emit_insn (gen_sse2_movupd (op0, op1));
10041 return;
10042 }
eb701deb 10043
c38573a8
RH
10044 /* When SSE registers are split into halves, we can avoid
10045 writing to the top half twice. */
10046 if (TARGET_SSE_SPLIT_REGS)
10047 {
10048 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
eb701deb 10049 zero = op0;
c38573a8
RH
10050 }
10051 else
10052 {
10053 /* ??? Not sure about the best option for the Intel chips.
10054 The following would seem to satisfy; the register is
10055 entirely cleared, breaking the dependency chain. We
10056 then store to the upper half, with a dependency depth
10057 of one. A rumor has it that Intel recommends two movsd
10058 followed by an unpacklpd, but this is unconfirmed. And
10059 given that the dependency depth of the unpacklpd would
10060 still be one, I'm not sure why this would be better. */
eb701deb 10061 zero = CONST0_RTX (V2DFmode);
c38573a8 10062 }
eb701deb
RH
10063
10064 m = adjust_address (op1, DFmode, 0);
10065 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10066 m = adjust_address (op1, DFmode, 8);
10067 emit_insn (gen_sse2_loadhpd (op0, op0, m));
c38573a8
RH
10068 }
10069 else
21efb4d4
HJ
10070 {
10071 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10072 {
10073 op0 = gen_lowpart (V4SFmode, op0);
10074 op1 = gen_lowpart (V4SFmode, op1);
10075 emit_insn (gen_sse_movups (op0, op1));
10076 return;
10077 }
10078
c38573a8
RH
10079 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10080 emit_move_insn (op0, CONST0_RTX (mode));
10081 else
10082 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10083
b4bb3199
RH
10084 if (mode != V4SFmode)
10085 op0 = gen_lowpart (V4SFmode, op0);
2cdb3148
RH
10086 m = adjust_address (op1, V2SFmode, 0);
10087 emit_insn (gen_sse_loadlps (op0, op0, m));
10088 m = adjust_address (op1, V2SFmode, 8);
10089 emit_insn (gen_sse_loadhps (op0, op0, m));
c38573a8
RH
10090 }
10091 }
10092 else if (MEM_P (op0))
10093 {
10094 /* If we're optimizing for size, movups is the smallest. */
10095 if (optimize_size)
10096 {
10097 op0 = gen_lowpart (V4SFmode, op0);
10098 op1 = gen_lowpart (V4SFmode, op1);
10099 emit_insn (gen_sse_movups (op0, op1));
10100 return;
10101 }
10102
10103 /* ??? Similar to above, only less clear because of quote
10104 typeless stores unquote. */
10105 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10106 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10107 {
10108 op0 = gen_lowpart (V16QImode, op0);
10109 op1 = gen_lowpart (V16QImode, op1);
10110 emit_insn (gen_sse2_movdqu (op0, op1));
10111 return;
10112 }
10113
10114 if (TARGET_SSE2 && mode == V2DFmode)
10115 {
10116 m = adjust_address (op0, DFmode, 0);
10117 emit_insn (gen_sse2_storelpd (m, op1));
10118 m = adjust_address (op0, DFmode, 8);
10119 emit_insn (gen_sse2_storehpd (m, op1));
c38573a8
RH
10120 }
10121 else
10122 {
eb701deb
RH
10123 if (mode != V4SFmode)
10124 op1 = gen_lowpart (V4SFmode, op1);
2cdb3148
RH
10125 m = adjust_address (op0, V2SFmode, 0);
10126 emit_insn (gen_sse_storelps (m, op1));
10127 m = adjust_address (op0, V2SFmode, 8);
10128 emit_insn (gen_sse_storehps (m, op1));
c38573a8
RH
10129 }
10130 }
10131 else
10132 gcc_unreachable ();
10133}
10134
6b79c03c
RH
10135/* Expand a push in MODE. This is some mode for which we do not support
10136 proper push instructions, at least from the registers that we expect
10137 the value to live in. */
10138
10139void
10140ix86_expand_push (enum machine_mode mode, rtx x)
10141{
10142 rtx tmp;
10143
10144 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10145 GEN_INT (-GET_MODE_SIZE (mode)),
10146 stack_pointer_rtx, 1, OPTAB_DIRECT);
10147 if (tmp != stack_pointer_rtx)
10148 emit_move_insn (stack_pointer_rtx, tmp);
10149
10150 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10151 emit_move_insn (tmp, x);
10152}
c38573a8 10153
ffa1b3c6
RS
10154/* Helper function of ix86_fixup_binary_operands to canonicalize
10155 operand order. Returns true if the operands should be swapped. */
54a88090 10156
ffa1b3c6
RS
10157static bool
10158ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10159 rtx operands[])
10160{
10161 rtx dst = operands[0];
10162 rtx src1 = operands[1];
10163 rtx src2 = operands[2];
10164
10165 /* If the operation is not commutative, we can't do anything. */
10166 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10167 return false;
10168
10169 /* Highest priority is that src1 should match dst. */
10170 if (rtx_equal_p (dst, src1))
10171 return false;
10172 if (rtx_equal_p (dst, src2))
10173 return true;
10174
10175 /* Next highest priority is that immediate constants come second. */
10176 if (immediate_operand (src2, mode))
10177 return false;
10178 if (immediate_operand (src1, mode))
10179 return true;
10180
10181 /* Lowest priority is that memory references should come second. */
10182 if (MEM_P (src2))
10183 return false;
10184 if (MEM_P (src1))
10185 return true;
10186
10187 return false;
10188}
10189
10190
ef719a44
RH
10191/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10192 destination to use for the operation. If different from the true
10193 destination in operands[0], a copy operation will be required. */
e9a25f70 10194
ef719a44
RH
10195rtx
10196ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10197 rtx operands[])
e075ae69 10198{
ffa1b3c6
RS
10199 rtx dst = operands[0];
10200 rtx src1 = operands[1];
10201 rtx src2 = operands[2];
e075ae69 10202
ffa1b3c6
RS
10203 /* Canonicalize operand order. */
10204 if (ix86_swap_binary_operands_p (code, mode, operands))
e075ae69
RH
10205 {
10206 rtx temp = src1;
10207 src1 = src2;
10208 src2 = temp;
32b5b1aa 10209 }
e9a25f70 10210
e075ae69 10211 /* Both source operands cannot be in memory. */
7656aee4 10212 if (MEM_P (src1) && MEM_P (src2))
e075ae69 10213 {
ffa1b3c6
RS
10214 /* Optimization: Only read from memory once. */
10215 if (rtx_equal_p (src1, src2))
10216 {
10217 src2 = force_reg (mode, src2);
10218 src1 = src2;
10219 }
e075ae69 10220 else
ffa1b3c6 10221 src2 = force_reg (mode, src2);
32b5b1aa 10222 }
e9a25f70 10223
ffa1b3c6
RS
10224 /* If the destination is memory, and we do not have matching source
10225 operands, do things in registers. */
10226 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10227 dst = gen_reg_rtx (mode);
10228
10229 /* Source 1 cannot be a constant. */
10230 if (CONSTANT_P (src1))
10231 src1 = force_reg (mode, src1);
10232
10233 /* Source 1 cannot be a non-matching memory. */
10234 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
e075ae69 10235 src1 = force_reg (mode, src1);
0f290768 10236
ffa1b3c6
RS
10237 operands[1] = src1;
10238 operands[2] = src2;
ef719a44
RH
10239 return dst;
10240}
10241
10242/* Similarly, but assume that the destination has already been
10243 set up properly. */
10244
10245void
10246ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10247 enum machine_mode mode, rtx operands[])
10248{
10249 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10250 gcc_assert (dst == operands[0]);
10251}
10252
10253/* Attempt to expand a binary operator. Make the expansion closer to the
10254 actual machine, then just general_operand, which will allow 3 separate
10255 memory references (one output, two input) in a single insn. */
10256
10257void
10258ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10259 rtx operands[])
10260{
10261 rtx src1, src2, dst, op, clob;
10262
10263 dst = ix86_fixup_binary_operands (code, mode, operands);
10264 src1 = operands[1];
10265 src2 = operands[2];
10266
10267 /* Emit the instruction. */
e075ae69
RH
10268
10269 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10270 if (reload_in_progress)
10271 {
10272 /* Reload doesn't know about the flags register, and doesn't know that
10273 it doesn't want to clobber it. We can only do this with PLUS. */
d0396b79 10274 gcc_assert (code == PLUS);
e075ae69
RH
10275 emit_insn (op);
10276 }
10277 else
32b5b1aa 10278 {
e075ae69
RH
10279 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10280 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 10281 }
e9a25f70 10282
e075ae69
RH
10283 /* Fix up the destination if needed. */
10284 if (dst != operands[0])
10285 emit_move_insn (operands[0], dst);
10286}
10287
10288/* Return TRUE or FALSE depending on whether the binary operator meets the
10289 appropriate constraints. */
10290
10291int
ffa1b3c6 10292ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
b96a374d 10293 rtx operands[3])
e075ae69 10294{
ffa1b3c6
RS
10295 rtx dst = operands[0];
10296 rtx src1 = operands[1];
10297 rtx src2 = operands[2];
10298
e075ae69 10299 /* Both source operands cannot be in memory. */
ffa1b3c6 10300 if (MEM_P (src1) && MEM_P (src2))
e075ae69 10301 return 0;
ffa1b3c6
RS
10302
10303 /* Canonicalize operand order for commutative operators. */
10304 if (ix86_swap_binary_operands_p (code, mode, operands))
10305 {
10306 rtx temp = src1;
10307 src1 = src2;
10308 src2 = temp;
10309 }
10310
e075ae69 10311 /* If the destination is memory, we must have a matching source operand. */
ffa1b3c6
RS
10312 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10313 return 0;
10314
10315 /* Source 1 cannot be a constant. */
10316 if (CONSTANT_P (src1))
e075ae69 10317 return 0;
ffa1b3c6
RS
10318
10319 /* Source 1 cannot be a non-matching memory. */
10320 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
06a964de 10321 return 0;
ffa1b3c6 10322
e075ae69
RH
10323 return 1;
10324}
10325
10326/* Attempt to expand a unary operator. Make the expansion closer to the
10327 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 10328 memory references (one output, one input) in a single insn. */
e075ae69 10329
9d81fc27 10330void
b96a374d
AJ
10331ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10332 rtx operands[])
e075ae69 10333{
06a964de
JH
10334 int matching_memory;
10335 rtx src, dst, op, clob;
10336
10337 dst = operands[0];
10338 src = operands[1];
e075ae69 10339
06a964de
JH
10340 /* If the destination is memory, and we do not have matching source
10341 operands, do things in registers. */
10342 matching_memory = 0;
7cacf53e 10343 if (MEM_P (dst))
32b5b1aa 10344 {
06a964de
JH
10345 if (rtx_equal_p (dst, src))
10346 matching_memory = 1;
e075ae69 10347 else
06a964de 10348 dst = gen_reg_rtx (mode);
32b5b1aa 10349 }
e9a25f70 10350
06a964de 10351 /* When source operand is memory, destination must match. */
7cacf53e 10352 if (MEM_P (src) && !matching_memory)
06a964de 10353 src = force_reg (mode, src);
0f290768 10354
06a964de
JH
10355 /* Emit the instruction. */
10356
10357 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10358 if (reload_in_progress || code == NOT)
10359 {
10360 /* Reload doesn't know about the flags register, and doesn't know that
10361 it doesn't want to clobber it. */
d0396b79 10362 gcc_assert (code == NOT);
06a964de
JH
10363 emit_insn (op);
10364 }
10365 else
10366 {
10367 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10368 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10369 }
10370
10371 /* Fix up the destination if needed. */
10372 if (dst != operands[0])
10373 emit_move_insn (operands[0], dst);
e075ae69
RH
10374}
10375
10376/* Return TRUE or FALSE depending on whether the unary operator meets the
10377 appropriate constraints. */
10378
10379int
b96a374d
AJ
10380ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10381 enum machine_mode mode ATTRIBUTE_UNUSED,
10382 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 10383{
06a964de 10384 /* If one of operands is memory, source and destination must match. */
7656aee4
UB
10385 if ((MEM_P (operands[0])
10386 || MEM_P (operands[1]))
06a964de
JH
10387 && ! rtx_equal_p (operands[0], operands[1]))
10388 return FALSE;
e075ae69
RH
10389 return TRUE;
10390}
7cacf53e 10391
174c12c7
RH
10392/* Post-reload splitter for converting an SF or DFmode value in an
10393 SSE register into an unsigned SImode. */
ebff937c
SH
10394
10395void
174c12c7 10396ix86_split_convert_uns_si_sse (rtx operands[])
ebff937c 10397{
174c12c7
RH
10398 enum machine_mode vecmode;
10399 rtx value, large, zero_or_two31, input, two31, x;
ebff937c 10400
174c12c7
RH
10401 large = operands[1];
10402 zero_or_two31 = operands[2];
10403 input = operands[3];
10404 two31 = operands[4];
10405 vecmode = GET_MODE (large);
10406 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
ebff937c 10407
174c12c7
RH
10408 /* Load up the value into the low element. We must ensure that the other
10409 elements are valid floats -- zero is the easiest such value. */
10410 if (MEM_P (input))
10411 {
10412 if (vecmode == V4SFmode)
10413 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10414 else
10415 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10416 }
10417 else
10418 {
10419 input = gen_rtx_REG (vecmode, REGNO (input));
10420 emit_move_insn (value, CONST0_RTX (vecmode));
10421 if (vecmode == V4SFmode)
10422 emit_insn (gen_sse_movss (value, value, input));
10423 else
10424 emit_insn (gen_sse2_movsd (value, value, input));
10425 }
ebff937c 10426
174c12c7
RH
10427 emit_move_insn (large, two31);
10428 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
ebff937c 10429
174c12c7 10430 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
ebff937c
SH
10431 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10432
174c12c7 10433 x = gen_rtx_AND (vecmode, zero_or_two31, large);
ebff937c
SH
10434 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10435
10436 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10437 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10438
174c12c7
RH
10439 large = gen_rtx_REG (V4SImode, REGNO (large));
10440 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
ebff937c 10441
174c12c7
RH
10442 x = gen_rtx_REG (V4SImode, REGNO (value));
10443 if (vecmode == V4SFmode)
10444 emit_insn (gen_sse2_cvttps2dq (x, value));
10445 else
10446 emit_insn (gen_sse2_cvttpd2dq (x, value));
10447 value = x;
ebff937c 10448
174c12c7 10449 emit_insn (gen_xorv4si3 (value, value, large));
ebff937c
SH
10450}
10451
10452/* Convert an unsigned DImode value into a DFmode, using only SSE.
10453 Expects the 64-bit DImode to be supplied in a pair of integral
10454 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10455 -mfpmath=sse, !optimize_size only. */
10456
10457void
10458ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10459{
10460 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10461 rtx int_xmm, fp_xmm;
10462 rtx biases, exponents;
10463 rtx x;
10464
10465 int_xmm = gen_reg_rtx (V4SImode);
10466 if (TARGET_INTER_UNIT_MOVES)
10467 emit_insn (gen_movdi_to_sse (int_xmm, input));
10468 else if (TARGET_SSE_SPLIT_REGS)
10469 {
10470 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10471 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10472 }
10473 else
10474 {
10475 x = gen_reg_rtx (V2DImode);
10476 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10477 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10478 }
10479
10480 x = gen_rtx_CONST_VECTOR (V4SImode,
10481 gen_rtvec (4, GEN_INT (0x43300000UL),
10482 GEN_INT (0x45300000UL),
10483 const0_rtx, const0_rtx));
10484 exponents = validize_mem (force_const_mem (V4SImode, x));
10485
10486 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10487 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10488
10489 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10490 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10491 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10492 (0x1.0p84 + double(fp_value_hi_xmm)).
10493 Note these exponents differ by 32. */
10494
10495 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10496
10497 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10498 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10499 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10500 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10501 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10502 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10503 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10504 biases = validize_mem (force_const_mem (V2DFmode, biases));
10505 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10506
10507 /* Add the upper and lower DFmode values together. */
10508 if (TARGET_SSE3)
10509 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10510 else
10511 {
10512 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10513 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10514 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10515 }
10516
10517 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10518}
10519
10520/* Convert an unsigned SImode value into a DFmode. Only currently used
10521 for SSE, but applicable anywhere. */
10522
10523void
10524ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10525{
10526 REAL_VALUE_TYPE TWO31r;
10527 rtx x, fp;
10528
10529 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10530 NULL, 1, OPTAB_DIRECT);
10531
10532 fp = gen_reg_rtx (DFmode);
10533 emit_insn (gen_floatsidf2 (fp, x));
10534
10535 real_ldexp (&TWO31r, &dconst1, 31);
10536 x = const_double_from_real_value (TWO31r, DFmode);
10537
10538 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10539 if (x != target)
10540 emit_move_insn (target, x);
10541}
10542
10543/* Convert a signed DImode value into a DFmode. Only used for SSE in
10544 32-bit mode; otherwise we have a direct convert instruction. */
10545
10546void
10547ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10548{
10549 REAL_VALUE_TYPE TWO32r;
10550 rtx fp_lo, fp_hi, x;
54a88090 10551
ebff937c
SH
10552 fp_lo = gen_reg_rtx (DFmode);
10553 fp_hi = gen_reg_rtx (DFmode);
10554
10555 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10556
10557 real_ldexp (&TWO32r, &dconst1, 32);
10558 x = const_double_from_real_value (TWO32r, DFmode);
10559 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10560
10561 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10562
10563 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10564 0, OPTAB_DIRECT);
10565 if (x != target)
10566 emit_move_insn (target, x);
10567}
10568
10569/* Convert an unsigned SImode value into a SFmode, using only SSE.
10570 For x86_32, -mfpmath=sse, !optimize_size only. */
10571void
10572ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10573{
10574 REAL_VALUE_TYPE ONE16r;
10575 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10576
10577 real_ldexp (&ONE16r, &dconst1, 16);
10578 x = const_double_from_real_value (ONE16r, SFmode);
10579 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10580 NULL, 0, OPTAB_DIRECT);
10581 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10582 NULL, 0, OPTAB_DIRECT);
10583 fp_hi = gen_reg_rtx (SFmode);
10584 fp_lo = gen_reg_rtx (SFmode);
10585 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10586 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10587 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10588 0, OPTAB_DIRECT);
10589 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10590 0, OPTAB_DIRECT);
10591 if (!rtx_equal_p (target, fp_hi))
10592 emit_move_insn (target, fp_hi);
10593}
10594
10595/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10596 then replicate the value for all elements of the vector
10597 register. */
10598
174c12c7 10599rtx
ebff937c
SH
10600ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10601{
10602 rtvec v;
10603 switch (mode)
10604 {
3b8dd071
L
10605 case SImode:
10606 gcc_assert (vect);
10607 v = gen_rtvec (4, value, value, value, value);
10608 return gen_rtx_CONST_VECTOR (V4SImode, v);
10609
10610 case DImode:
10611 gcc_assert (vect);
10612 v = gen_rtvec (2, value, value);
10613 return gen_rtx_CONST_VECTOR (V2DImode, v);
10614
ebff937c
SH
10615 case SFmode:
10616 if (vect)
10617 v = gen_rtvec (4, value, value, value, value);
10618 else
10619 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10620 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10621 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10622
10623 case DFmode:
10624 if (vect)
10625 v = gen_rtvec (2, value, value);
10626 else
10627 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10628 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10629
10630 default:
10631 gcc_unreachable ();
10632 }
10633}
10634
3b8dd071
L
10635/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10636 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10637 for an SSE register. If VECT is true, then replicate the mask for
10638 all elements of the vector register. If INVERT is true, then create
10639 a mask excluding the sign bit. */
046625fa
RH
10640
10641rtx
10642ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10643{
3b8dd071 10644 enum machine_mode vec_mode, imode;
046625fa
RH
10645 HOST_WIDE_INT hi, lo;
10646 int shift = 63;
ebff937c 10647 rtx v;
046625fa
RH
10648 rtx mask;
10649
10650 /* Find the sign bit, sign extended to 2*HWI. */
3b8dd071
L
10651 switch (mode)
10652 {
10653 case SImode:
10654 case SFmode:
10655 imode = SImode;
10656 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10657 lo = 0x80000000, hi = lo < 0;
10658 break;
10659
10660 case DImode:
10661 case DFmode:
10662 imode = DImode;
10663 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10664 if (HOST_BITS_PER_WIDE_INT >= 64)
10665 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10666 else
10667 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10668 break;
10669
edc5bbcd
UB
10670 case TImode:
10671 case TFmode:
10672 imode = TImode;
10673 vec_mode = VOIDmode;
10674 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10675 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10676 break;
10677
3b8dd071
L
10678 default:
10679 gcc_unreachable ();
10680 }
046625fa
RH
10681
10682 if (invert)
10683 lo = ~lo, hi = ~hi;
10684
10685 /* Force this value into the low part of a fp vector constant. */
3b8dd071 10686 mask = immed_double_const (lo, hi, imode);
046625fa
RH
10687 mask = gen_lowpart (mode, mask);
10688
edc5bbcd
UB
10689 if (vec_mode == VOIDmode)
10690 return force_reg (mode, mask);
10691
ebff937c 10692 v = ix86_build_const_vector (mode, vect, mask);
ebff937c 10693 return force_reg (vec_mode, v);
046625fa
RH
10694}
10695
7cacf53e
RH
10696/* Generate code for floating point ABS or NEG. */
10697
10698void
10699ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10700 rtx operands[])
10701{
10702 rtx mask, set, use, clob, dst, src;
10703 bool matching_memory;
10704 bool use_sse = false;
ef719a44
RH
10705 bool vector_mode = VECTOR_MODE_P (mode);
10706 enum machine_mode elt_mode = mode;
7cacf53e 10707
ef719a44
RH
10708 if (vector_mode)
10709 {
10710 elt_mode = GET_MODE_INNER (mode);
ef719a44
RH
10711 use_sse = true;
10712 }
edc5bbcd
UB
10713 else if (mode == TFmode)
10714 use_sse = true;
046625fa 10715 else if (TARGET_SSE_MATH)
2aa3d033 10716 use_sse = SSE_FLOAT_MODE_P (mode);
7cacf53e
RH
10717
10718 /* NEG and ABS performed with SSE use bitwise mask operations.
10719 Create the appropriate mask now. */
10720 if (use_sse)
046625fa 10721 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
7cacf53e 10722 else
86ce1825 10723 mask = NULL_RTX;
7cacf53e
RH
10724
10725 dst = operands[0];
10726 src = operands[1];
10727
10728 /* If the destination is memory, and we don't have matching source
86ce1825 10729 operands or we're using the x87, do things in registers. */
7cacf53e
RH
10730 matching_memory = false;
10731 if (MEM_P (dst))
10732 {
86ce1825 10733 if (use_sse && rtx_equal_p (dst, src))
7cacf53e
RH
10734 matching_memory = true;
10735 else
10736 dst = gen_reg_rtx (mode);
10737 }
10738 if (MEM_P (src) && !matching_memory)
10739 src = force_reg (mode, src);
10740
ef719a44
RH
10741 if (vector_mode)
10742 {
10743 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10744 set = gen_rtx_SET (VOIDmode, dst, set);
10745 emit_insn (set);
10746 }
10747 else
10748 {
10749 set = gen_rtx_fmt_e (code, mode, src);
10750 set = gen_rtx_SET (VOIDmode, dst, set);
86ce1825
RS
10751 if (mask)
10752 {
10753 use = gen_rtx_USE (VOIDmode, mask);
10754 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10755 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10756 gen_rtvec (3, set, use, clob)));
10757 }
10758 else
10759 emit_insn (set);
ef719a44 10760 }
7cacf53e
RH
10761
10762 if (dst != operands[0])
10763 emit_move_insn (operands[0], dst);
10764}
e075ae69 10765
b99d6d2b 10766/* Expand a copysign operation. Special case operand 0 being a constant. */
046625fa
RH
10767
10768void
b99d6d2b
RH
10769ix86_expand_copysign (rtx operands[])
10770{
10771 enum machine_mode mode, vmode;
10772 rtx dest, op0, op1, mask, nmask;
10773
10774 dest = operands[0];
10775 op0 = operands[1];
10776 op1 = operands[2];
10777
10778 mode = GET_MODE (dest);
10779 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10780
10781 if (GET_CODE (op0) == CONST_DOUBLE)
10782 {
edc5bbcd 10783 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
b99d6d2b
RH
10784
10785 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10786 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10787
edc5bbcd
UB
10788 if (mode == SFmode || mode == DFmode)
10789 {
10790 if (op0 == CONST0_RTX (mode))
10791 op0 = CONST0_RTX (vmode);
b99d6d2b 10792 else
edc5bbcd
UB
10793 {
10794 rtvec v;
10795
10796 if (mode == SFmode)
10797 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10798 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10799 else
10800 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10801 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10802 }
b99d6d2b
RH
10803 }
10804
10805 mask = ix86_build_signbit_mask (mode, 0, 0);
10806
10807 if (mode == SFmode)
edc5bbcd
UB
10808 copysign_insn = gen_copysignsf3_const;
10809 else if (mode == DFmode)
10810 copysign_insn = gen_copysigndf3_const;
b99d6d2b 10811 else
edc5bbcd
UB
10812 copysign_insn = gen_copysigntf3_const;
10813
10814 emit_insn (copysign_insn (dest, op0, op1, mask));
b99d6d2b
RH
10815 }
10816 else
10817 {
edc5bbcd
UB
10818 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10819
b99d6d2b
RH
10820 nmask = ix86_build_signbit_mask (mode, 0, 1);
10821 mask = ix86_build_signbit_mask (mode, 0, 0);
10822
10823 if (mode == SFmode)
edc5bbcd
UB
10824 copysign_insn = gen_copysignsf3_var;
10825 else if (mode == DFmode)
10826 copysign_insn = gen_copysigndf3_var;
b99d6d2b 10827 else
edc5bbcd
UB
10828 copysign_insn = gen_copysigntf3_var;
10829
10830 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
b99d6d2b
RH
10831 }
10832}
10833
10834/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10835 be a constant, and so has already been expanded into a vector constant. */
10836
10837void
10838ix86_split_copysign_const (rtx operands[])
10839{
10840 enum machine_mode mode, vmode;
10841 rtx dest, op0, op1, mask, x;
10842
10843 dest = operands[0];
10844 op0 = operands[1];
10845 op1 = operands[2];
10846 mask = operands[3];
10847
10848 mode = GET_MODE (dest);
10849 vmode = GET_MODE (mask);
10850
10851 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10852 x = gen_rtx_AND (vmode, dest, mask);
10853 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10854
10855 if (op0 != CONST0_RTX (vmode))
10856 {
10857 x = gen_rtx_IOR (vmode, dest, op0);
10858 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10859 }
10860}
10861
10862/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10863 so we have to do two masks. */
10864
10865void
10866ix86_split_copysign_var (rtx operands[])
046625fa
RH
10867{
10868 enum machine_mode mode, vmode;
10869 rtx dest, scratch, op0, op1, mask, nmask, x;
10870
10871 dest = operands[0];
10872 scratch = operands[1];
10873 op0 = operands[2];
b99d6d2b
RH
10874 op1 = operands[3];
10875 nmask = operands[4];
046625fa
RH
10876 mask = operands[5];
10877
10878 mode = GET_MODE (dest);
10879 vmode = GET_MODE (mask);
10880
10881 if (rtx_equal_p (op0, op1))
10882 {
10883 /* Shouldn't happen often (it's useless, obviously), but when it does
10884 we'd generate incorrect code if we continue below. */
10885 emit_move_insn (dest, op0);
10886 return;
10887 }
10888
10889 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10890 {
10891 gcc_assert (REGNO (op1) == REGNO (scratch));
10892
10893 x = gen_rtx_AND (vmode, scratch, mask);
10894 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10895
10896 dest = mask;
10897 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10898 x = gen_rtx_NOT (vmode, dest);
10899 x = gen_rtx_AND (vmode, x, op0);
10900 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10901 }
10902 else
10903 {
10904 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10905 {
10906 x = gen_rtx_AND (vmode, scratch, mask);
10907 }
10908 else /* alternative 2,4 */
10909 {
10910 gcc_assert (REGNO (mask) == REGNO (scratch));
10911 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10912 x = gen_rtx_AND (vmode, scratch, op1);
10913 }
10914 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10915
10916 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10917 {
10918 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10919 x = gen_rtx_AND (vmode, dest, nmask);
10920 }
10921 else /* alternative 3,4 */
10922 {
10923 gcc_assert (REGNO (nmask) == REGNO (dest));
10924 dest = nmask;
10925 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10926 x = gen_rtx_AND (vmode, dest, op0);
10927 }
10928 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10929 }
10930
10931 x = gen_rtx_IOR (vmode, dest, scratch);
10932 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10933}
10934
16189740
RH
10935/* Return TRUE or FALSE depending on whether the first SET in INSN
10936 has source and destination with matching CC modes, and that the
10937 CC mode is at least as constrained as REQ_MODE. */
10938
10939int
b96a374d 10940ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
10941{
10942 rtx set;
10943 enum machine_mode set_mode;
10944
10945 set = PATTERN (insn);
10946 if (GET_CODE (set) == PARALLEL)
10947 set = XVECEXP (set, 0, 0);
d0396b79
NS
10948 gcc_assert (GET_CODE (set) == SET);
10949 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16189740
RH
10950
10951 set_mode = GET_MODE (SET_DEST (set));
10952 switch (set_mode)
10953 {
9076b9c1
JH
10954 case CCNOmode:
10955 if (req_mode != CCNOmode
10956 && (req_mode != CCmode
10957 || XEXP (SET_SRC (set), 1) != const0_rtx))
10958 return 0;
10959 break;
16189740 10960 case CCmode:
9076b9c1 10961 if (req_mode == CCGCmode)
16189740 10962 return 0;
5efb1046 10963 /* FALLTHRU */
9076b9c1
JH
10964 case CCGCmode:
10965 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10966 return 0;
5efb1046 10967 /* FALLTHRU */
9076b9c1 10968 case CCGOCmode:
16189740
RH
10969 if (req_mode == CCZmode)
10970 return 0;
5efb1046 10971 /* FALLTHRU */
16189740
RH
10972 case CCZmode:
10973 break;
10974
10975 default:
d0396b79 10976 gcc_unreachable ();
16189740
RH
10977 }
10978
10979 return (GET_MODE (SET_SRC (set)) == set_mode);
10980}
10981
e075ae69
RH
10982/* Generate insn patterns to do an integer compare of OPERANDS. */
10983
10984static rtx
b96a374d 10985ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
10986{
10987 enum machine_mode cmpmode;
10988 rtx tmp, flags;
10989
10990 cmpmode = SELECT_CC_MODE (code, op0, op1);
10991 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10992
10993 /* This is very simple, but making the interface the same as in the
10994 FP case makes the rest of the code easier. */
10995 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10996 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10997
10998 /* Return the test that should be put into the flags user, i.e.
10999 the bcc, scc, or cmov instruction. */
11000 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11001}
11002
3a3677ff
RH
11003/* Figure out whether to use ordered or unordered fp comparisons.
11004 Return the appropriate mode to use. */
e075ae69 11005
b1cdafbb 11006enum machine_mode
b96a374d 11007ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 11008{
9e7adcb3
JH
11009 /* ??? In order to make all comparisons reversible, we do all comparisons
11010 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11011 all forms trapping and nontrapping comparisons, we can make inequality
11012 comparisons trapping again, since it results in better code when using
11013 FCOM based compares. */
11014 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
11015}
11016
9076b9c1 11017enum machine_mode
b96a374d 11018ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1 11019{
27ac40e2
UB
11020 enum machine_mode mode = GET_MODE (op0);
11021
11022 if (SCALAR_FLOAT_MODE_P (mode))
11023 {
11024 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11025 return ix86_fp_compare_mode (code);
11026 }
11027
9076b9c1
JH
11028 switch (code)
11029 {
11030 /* Only zero flag is needed. */
11031 case EQ: /* ZF=0 */
11032 case NE: /* ZF!=0 */
11033 return CCZmode;
11034 /* Codes needing carry flag. */
265dab10
JH
11035 case GEU: /* CF=0 */
11036 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
11037 case LTU: /* CF=1 */
11038 case LEU: /* CF=1 | ZF=1 */
265dab10 11039 return CCmode;
9076b9c1
JH
11040 /* Codes possibly doable only with sign flag when
11041 comparing against zero. */
11042 case GE: /* SF=OF or SF=0 */
7e08e190 11043 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
11044 if (op1 == const0_rtx)
11045 return CCGOCmode;
11046 else
11047 /* For other cases Carry flag is not required. */
11048 return CCGCmode;
11049 /* Codes doable only with sign flag when comparing
11050 against zero, but we miss jump instruction for it
4aae8a9a 11051 so we need to use relational tests against overflow
9076b9c1
JH
11052 that thus needs to be zero. */
11053 case GT: /* ZF=0 & SF=OF */
11054 case LE: /* ZF=1 | SF<>OF */
11055 if (op1 == const0_rtx)
11056 return CCNOmode;
11057 else
11058 return CCGCmode;
7fcd7218
JH
11059 /* strcmp pattern do (use flags) and combine may ask us for proper
11060 mode. */
11061 case USE:
11062 return CCmode;
9076b9c1 11063 default:
d0396b79 11064 gcc_unreachable ();
9076b9c1
JH
11065 }
11066}
11067
e129d93a
ILT
11068/* Return the fixed registers used for condition codes. */
11069
11070static bool
11071ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11072{
11073 *p1 = FLAGS_REG;
11074 *p2 = FPSR_REG;
11075 return true;
11076}
11077
11078/* If two condition code modes are compatible, return a condition code
11079 mode which is compatible with both. Otherwise, return
11080 VOIDmode. */
11081
11082static enum machine_mode
11083ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11084{
11085 if (m1 == m2)
11086 return m1;
11087
11088 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11089 return VOIDmode;
11090
11091 if ((m1 == CCGCmode && m2 == CCGOCmode)
11092 || (m1 == CCGOCmode && m2 == CCGCmode))
11093 return CCGCmode;
11094
11095 switch (m1)
11096 {
11097 default:
d0396b79 11098 gcc_unreachable ();
e129d93a
ILT
11099
11100 case CCmode:
11101 case CCGCmode:
11102 case CCGOCmode:
11103 case CCNOmode:
06f4e35d
L
11104 case CCAmode:
11105 case CCCmode:
11106 case CCOmode:
11107 case CCSmode:
e129d93a
ILT
11108 case CCZmode:
11109 switch (m2)
11110 {
11111 default:
11112 return VOIDmode;
11113
11114 case CCmode:
11115 case CCGCmode:
11116 case CCGOCmode:
11117 case CCNOmode:
06f4e35d
L
11118 case CCAmode:
11119 case CCCmode:
11120 case CCOmode:
11121 case CCSmode:
e129d93a
ILT
11122 case CCZmode:
11123 return CCmode;
11124 }
11125
11126 case CCFPmode:
11127 case CCFPUmode:
11128 /* These are only compatible with themselves, which we already
11129 checked above. */
11130 return VOIDmode;
11131 }
11132}
11133
c0c102a9
JH
11134/* Split comparison code CODE into comparisons we can do using branch
11135 instructions. BYPASS_CODE is comparison code for branch that will
11136 branch around FIRST_CODE and SECOND_CODE. If some of branches
f822d252 11137 is not required, set value to UNKNOWN.
c0c102a9 11138 We never require more than two branches. */
8fe75e43
RH
11139
11140void
b96a374d
AJ
11141ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11142 enum rtx_code *first_code,
11143 enum rtx_code *second_code)
c0c102a9
JH
11144{
11145 *first_code = code;
f822d252
ZW
11146 *bypass_code = UNKNOWN;
11147 *second_code = UNKNOWN;
c0c102a9
JH
11148
11149 /* The fcomi comparison sets flags as follows:
11150
11151 cmp ZF PF CF
11152 > 0 0 0
11153 < 0 0 1
11154 = 1 0 0
11155 un 1 1 1 */
11156
11157 switch (code)
11158 {
11159 case GT: /* GTU - CF=0 & ZF=0 */
11160 case GE: /* GEU - CF=0 */
11161 case ORDERED: /* PF=0 */
11162 case UNORDERED: /* PF=1 */
11163 case UNEQ: /* EQ - ZF=1 */
11164 case UNLT: /* LTU - CF=1 */
11165 case UNLE: /* LEU - CF=1 | ZF=1 */
11166 case LTGT: /* EQ - ZF=0 */
11167 break;
11168 case LT: /* LTU - CF=1 - fails on unordered */
11169 *first_code = UNLT;
11170 *bypass_code = UNORDERED;
11171 break;
11172 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11173 *first_code = UNLE;
11174 *bypass_code = UNORDERED;
11175 break;
11176 case EQ: /* EQ - ZF=1 - fails on unordered */
11177 *first_code = UNEQ;
11178 *bypass_code = UNORDERED;
11179 break;
11180 case NE: /* NE - ZF=0 - fails on unordered */
11181 *first_code = LTGT;
11182 *second_code = UNORDERED;
11183 break;
11184 case UNGE: /* GEU - CF=0 - fails on unordered */
11185 *first_code = GE;
11186 *second_code = UNORDERED;
11187 break;
11188 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11189 *first_code = GT;
11190 *second_code = UNORDERED;
11191 break;
11192 default:
d0396b79 11193 gcc_unreachable ();
c0c102a9
JH
11194 }
11195 if (!TARGET_IEEE_FP)
11196 {
f822d252
ZW
11197 *second_code = UNKNOWN;
11198 *bypass_code = UNKNOWN;
c0c102a9
JH
11199 }
11200}
11201
9e7adcb3 11202/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 11203 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
11204 In future this should be tweaked to compute bytes for optimize_size and
11205 take into account performance of various instructions on various CPUs. */
11206static int
b96a374d 11207ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
11208{
11209 if (!TARGET_IEEE_FP)
11210 return 4;
11211 /* The cost of code output by ix86_expand_fp_compare. */
11212 switch (code)
11213 {
11214 case UNLE:
11215 case UNLT:
11216 case LTGT:
11217 case GT:
11218 case GE:
11219 case UNORDERED:
11220 case ORDERED:
11221 case UNEQ:
11222 return 4;
11223 break;
11224 case LT:
11225 case NE:
11226 case EQ:
11227 case UNGE:
11228 return 5;
11229 break;
11230 case LE:
11231 case UNGT:
11232 return 6;
11233 break;
11234 default:
d0396b79 11235 gcc_unreachable ();
9e7adcb3
JH
11236 }
11237}
11238
11239/* Return cost of comparison done using fcomi operation.
11240 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11241static int
b96a374d 11242ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
11243{
11244 enum rtx_code bypass_code, first_code, second_code;
d1f87653 11245 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
11246 prevents gcc from using it. */
11247 if (!TARGET_CMOVE)
11248 return 1024;
11249 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 11250 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9e7adcb3
JH
11251}
11252
11253/* Return cost of comparison done using sahf operation.
11254 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11255static int
b96a374d 11256ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
11257{
11258 enum rtx_code bypass_code, first_code, second_code;
d1f87653 11259 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3 11260 avoids gcc from using it. */
3c2d980c 11261 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
9e7adcb3
JH
11262 return 1024;
11263 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 11264 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9e7adcb3
JH
11265}
11266
11267/* Compute cost of the comparison done using any method.
11268 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11269static int
b96a374d 11270ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
11271{
11272 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11273 int min;
11274
11275 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11276 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11277
11278 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11279 if (min > sahf_cost)
11280 min = sahf_cost;
11281 if (min > fcomi_cost)
11282 min = fcomi_cost;
11283 return min;
11284}
c0c102a9 11285
2ed941ec
RH
11286/* Return true if we should use an FCOMI instruction for this
11287 fp comparison. */
11288
11289int
11290ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11291{
11292 enum rtx_code swapped_code = swap_condition (code);
11293
11294 return ((ix86_fp_comparison_cost (code)
11295 == ix86_fp_comparison_fcomi_cost (code))
11296 || (ix86_fp_comparison_cost (swapped_code)
11297 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11298}
11299
11300/* Swap, force into registers, or otherwise massage the two operands
11301 to a fp comparison. The operands are updated in place; the new
11302 comparison code is returned. */
11303
11304static enum rtx_code
11305ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11306{
11307 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11308 rtx op0 = *pop0, op1 = *pop1;
11309 enum machine_mode op_mode = GET_MODE (op0);
11310 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11311
11312 /* All of the unordered compare instructions only work on registers.
11313 The same is true of the fcomi compare instructions. The XFmode
11314 compare instructions require registers except when comparing
11315 against zero or when converting operand 1 from fixed point to
11316 floating point. */
11317
11318 if (!is_sse
11319 && (fpcmp_mode == CCFPUmode
11320 || (op_mode == XFmode
11321 && ! (standard_80387_constant_p (op0) == 1
11322 || standard_80387_constant_p (op1) == 1)
11323 && GET_CODE (op1) != FLOAT)
11324 || ix86_use_fcomi_compare (code)))
11325 {
11326 op0 = force_reg (op_mode, op0);
11327 op1 = force_reg (op_mode, op1);
11328 }
11329 else
11330 {
11331 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11332 things around if they appear profitable, otherwise force op0
11333 into a register. */
11334
11335 if (standard_80387_constant_p (op0) == 0
11336 || (MEM_P (op0)
11337 && ! (standard_80387_constant_p (op1) == 0
11338 || MEM_P (op1))))
11339 {
11340 rtx tmp;
11341 tmp = op0, op0 = op1, op1 = tmp;
11342 code = swap_condition (code);
11343 }
11344
11345 if (!REG_P (op0))
11346 op0 = force_reg (op_mode, op0);
11347
11348 if (CONSTANT_P (op1))
11349 {
11350 int tmp = standard_80387_constant_p (op1);
11351 if (tmp == 0)
11352 op1 = validize_mem (force_const_mem (op_mode, op1));
11353 else if (tmp == 1)
11354 {
11355 if (TARGET_CMOVE)
11356 op1 = force_reg (op_mode, op1);
11357 }
11358 else
11359 op1 = force_reg (op_mode, op1);
11360 }
11361 }
11362
11363 /* Try to rearrange the comparison to make it cheaper. */
11364 if (ix86_fp_comparison_cost (code)
11365 > ix86_fp_comparison_cost (swap_condition (code))
b3a13419 11366 && (REG_P (op1) || can_create_pseudo_p ()))
2ed941ec
RH
11367 {
11368 rtx tmp;
11369 tmp = op0, op0 = op1, op1 = tmp;
11370 code = swap_condition (code);
11371 if (!REG_P (op0))
11372 op0 = force_reg (op_mode, op0);
11373 }
11374
11375 *pop0 = op0;
11376 *pop1 = op1;
11377 return code;
11378}
11379
11380/* Convert comparison codes we use to represent FP comparison to integer
11381 code that will result in proper branch. Return UNKNOWN if no such code
11382 is available. */
11383
11384enum rtx_code
11385ix86_fp_compare_code_to_integer (enum rtx_code code)
11386{
11387 switch (code)
11388 {
11389 case GT:
11390 return GTU;
11391 case GE:
11392 return GEU;
11393 case ORDERED:
11394 case UNORDERED:
11395 return code;
11396 break;
11397 case UNEQ:
11398 return EQ;
11399 break;
11400 case UNLT:
11401 return LTU;
11402 break;
11403 case UNLE:
11404 return LEU;
11405 break;
11406 case LTGT:
11407 return NE;
11408 break;
11409 default:
11410 return UNKNOWN;
11411 }
11412}
11413
3a3677ff
RH
11414/* Generate insn patterns to do a floating point compare of OPERANDS. */
11415
9e7adcb3 11416static rtx
b96a374d
AJ
11417ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11418 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
11419{
11420 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 11421 rtx tmp, tmp2;
9e7adcb3 11422 int cost = ix86_fp_comparison_cost (code);
c0c102a9 11423 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
11424
11425 fpcmp_mode = ix86_fp_compare_mode (code);
11426 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11427
9e7adcb3
JH
11428 if (second_test)
11429 *second_test = NULL_RTX;
11430 if (bypass_test)
11431 *bypass_test = NULL_RTX;
11432
c0c102a9
JH
11433 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11434
9e7adcb3 11435 /* Do fcomi/sahf based test when profitable. */
3c2d980c
UB
11436 if ((TARGET_CMOVE || TARGET_SAHF)
11437 && (bypass_code == UNKNOWN || bypass_test)
f822d252 11438 && (second_code == UNKNOWN || second_test)
9e7adcb3 11439 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 11440 {
c0c102a9
JH
11441 if (TARGET_CMOVE)
11442 {
11443 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11444 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11445 tmp);
11446 emit_insn (tmp);
11447 }
11448 else
11449 {
11450 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 11451 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
11452 if (!scratch)
11453 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
11454 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11455 emit_insn (gen_x86_sahf_1 (scratch));
11456 }
e075ae69
RH
11457
11458 /* The FP codes work out to act like unsigned. */
9a915772 11459 intcmp_mode = fpcmp_mode;
9e7adcb3 11460 code = first_code;
f822d252 11461 if (bypass_code != UNKNOWN)
9e7adcb3
JH
11462 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11463 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11464 const0_rtx);
f822d252 11465 if (second_code != UNKNOWN)
9e7adcb3
JH
11466 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11467 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11468 const0_rtx);
e075ae69
RH
11469 }
11470 else
11471 {
11472 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 11473 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 11474 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
11475 if (!scratch)
11476 scratch = gen_reg_rtx (HImode);
3a3677ff 11477 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 11478
9a915772
JH
11479 /* In the unordered case, we have to check C2 for NaN's, which
11480 doesn't happen to work out to anything nice combination-wise.
11481 So do some bit twiddling on the value we've got in AH to come
11482 up with an appropriate set of condition codes. */
e075ae69 11483
9a915772
JH
11484 intcmp_mode = CCNOmode;
11485 switch (code)
32b5b1aa 11486 {
9a915772
JH
11487 case GT:
11488 case UNGT:
11489 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 11490 {
3a3677ff 11491 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 11492 code = EQ;
9a915772
JH
11493 }
11494 else
11495 {
11496 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11497 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11498 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11499 intcmp_mode = CCmode;
11500 code = GEU;
11501 }
11502 break;
11503 case LT:
11504 case UNLT:
11505 if (code == LT && TARGET_IEEE_FP)
11506 {
3a3677ff
RH
11507 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11508 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
11509 intcmp_mode = CCmode;
11510 code = EQ;
9a915772
JH
11511 }
11512 else
11513 {
11514 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11515 code = NE;
11516 }
11517 break;
11518 case GE:
11519 case UNGE:
11520 if (code == GE || !TARGET_IEEE_FP)
11521 {
3a3677ff 11522 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 11523 code = EQ;
9a915772
JH
11524 }
11525 else
11526 {
11527 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11528 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11529 GEN_INT (0x01)));
11530 code = NE;
11531 }
11532 break;
11533 case LE:
11534 case UNLE:
11535 if (code == LE && TARGET_IEEE_FP)
11536 {
3a3677ff
RH
11537 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11538 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11539 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
11540 intcmp_mode = CCmode;
11541 code = LTU;
9a915772
JH
11542 }
11543 else
11544 {
11545 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11546 code = NE;
11547 }
11548 break;
11549 case EQ:
11550 case UNEQ:
11551 if (code == EQ && TARGET_IEEE_FP)
11552 {
3a3677ff
RH
11553 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11554 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
11555 intcmp_mode = CCmode;
11556 code = EQ;
9a915772
JH
11557 }
11558 else
11559 {
3a3677ff
RH
11560 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11561 code = NE;
11562 break;
9a915772
JH
11563 }
11564 break;
11565 case NE:
11566 case LTGT:
11567 if (code == NE && TARGET_IEEE_FP)
11568 {
3a3677ff 11569 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
11570 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11571 GEN_INT (0x40)));
3a3677ff 11572 code = NE;
9a915772
JH
11573 }
11574 else
11575 {
3a3677ff
RH
11576 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11577 code = EQ;
32b5b1aa 11578 }
9a915772
JH
11579 break;
11580
11581 case UNORDERED:
11582 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11583 code = NE;
11584 break;
11585 case ORDERED:
11586 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11587 code = EQ;
11588 break;
11589
11590 default:
d0396b79 11591 gcc_unreachable ();
32b5b1aa 11592 }
32b5b1aa 11593 }
e075ae69
RH
11594
11595 /* Return the test that should be put into the flags user, i.e.
11596 the bcc, scc, or cmov instruction. */
11597 return gen_rtx_fmt_ee (code, VOIDmode,
11598 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11599 const0_rtx);
11600}
11601
9e3e266c 11602rtx
b96a374d 11603ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
11604{
11605 rtx op0, op1, ret;
11606 op0 = ix86_compare_op0;
11607 op1 = ix86_compare_op1;
11608
a1b8572c
JH
11609 if (second_test)
11610 *second_test = NULL_RTX;
11611 if (bypass_test)
11612 *bypass_test = NULL_RTX;
11613
1ef45b77
RH
11614 if (ix86_compare_emitted)
11615 {
11616 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11617 ix86_compare_emitted = NULL_RTX;
11618 }
ebb109ad 11619 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
27ac40e2
UB
11620 {
11621 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11622 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11623 second_test, bypass_test);
11624 }
32b5b1aa 11625 else
e075ae69
RH
11626 ret = ix86_expand_int_compare (code, op0, op1);
11627
11628 return ret;
11629}
11630
03598dea
JH
11631/* Return true if the CODE will result in nontrivial jump sequence. */
11632bool
b96a374d 11633ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
11634{
11635 enum rtx_code bypass_code, first_code, second_code;
11636 if (!TARGET_CMOVE)
11637 return true;
11638 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 11639 return bypass_code != UNKNOWN || second_code != UNKNOWN;
03598dea
JH
11640}
11641
e075ae69 11642void
b96a374d 11643ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 11644{
3a3677ff 11645 rtx tmp;
e075ae69 11646
3d763bcf
KH
11647 /* If we have emitted a compare insn, go straight to simple.
11648 ix86_expand_compare won't emit anything if ix86_compare_emitted
11649 is non NULL. */
11650 if (ix86_compare_emitted)
11651 goto simple;
11652
3a3677ff 11653 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 11654 {
3a3677ff
RH
11655 case QImode:
11656 case HImode:
11657 case SImode:
0d7d98ee 11658 simple:
a1b8572c 11659 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
11660 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11661 gen_rtx_LABEL_REF (VOIDmode, label),
11662 pc_rtx);
11663 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 11664 return;
e075ae69 11665
3a3677ff
RH
11666 case SFmode:
11667 case DFmode:
0f290768 11668 case XFmode:
3a3677ff
RH
11669 {
11670 rtvec vec;
11671 int use_fcomi;
03598dea 11672 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
11673
11674 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11675 &ix86_compare_op1);
fce5a9f2 11676
03598dea
JH
11677 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11678
11679 /* Check whether we will use the natural sequence with one jump. If
11680 so, we can expand jump early. Otherwise delay expansion by
11681 creating compound insn to not confuse optimizers. */
f822d252 11682 if (bypass_code == UNKNOWN && second_code == UNKNOWN
03598dea
JH
11683 && TARGET_CMOVE)
11684 {
11685 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11686 gen_rtx_LABEL_REF (VOIDmode, label),
7c82106f 11687 pc_rtx, NULL_RTX, NULL_RTX);
03598dea
JH
11688 }
11689 else
11690 {
11691 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11692 ix86_compare_op0, ix86_compare_op1);
11693 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11694 gen_rtx_LABEL_REF (VOIDmode, label),
11695 pc_rtx);
11696 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11697
11698 use_fcomi = ix86_use_fcomi_compare (code);
11699 vec = rtvec_alloc (3 + !use_fcomi);
11700 RTVEC_ELT (vec, 0) = tmp;
11701 RTVEC_ELT (vec, 1)
11702 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11703 RTVEC_ELT (vec, 2)
11704 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11705 if (! use_fcomi)
11706 RTVEC_ELT (vec, 3)
11707 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11708
11709 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11710 }
3a3677ff
RH
11711 return;
11712 }
32b5b1aa 11713
3a3677ff 11714 case DImode:
0d7d98ee
JH
11715 if (TARGET_64BIT)
11716 goto simple;
28356f52 11717 case TImode:
3a3677ff
RH
11718 /* Expand DImode branch into multiple compare+branch. */
11719 {
11720 rtx lo[2], hi[2], label2;
11721 enum rtx_code code1, code2, code3;
28356f52 11722 enum machine_mode submode;
32b5b1aa 11723
3a3677ff
RH
11724 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11725 {
11726 tmp = ix86_compare_op0;
11727 ix86_compare_op0 = ix86_compare_op1;
11728 ix86_compare_op1 = tmp;
11729 code = swap_condition (code);
11730 }
28356f52
JB
11731 if (GET_MODE (ix86_compare_op0) == DImode)
11732 {
11733 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11734 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11735 submode = SImode;
11736 }
11737 else
11738 {
11739 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11740 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11741 submode = DImode;
11742 }
32b5b1aa 11743
3a3677ff
RH
11744 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11745 avoid two branches. This costs one extra insn, so disable when
11746 optimizing for size. */
32b5b1aa 11747
3a3677ff
RH
11748 if ((code == EQ || code == NE)
11749 && (!optimize_size
11750 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11751 {
11752 rtx xor0, xor1;
32b5b1aa 11753
3a3677ff
RH
11754 xor1 = hi[0];
11755 if (hi[1] != const0_rtx)
28356f52 11756 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
3a3677ff 11757 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 11758
3a3677ff
RH
11759 xor0 = lo[0];
11760 if (lo[1] != const0_rtx)
28356f52 11761 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
3a3677ff 11762 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 11763
28356f52 11764 tmp = expand_binop (submode, ior_optab, xor1, xor0,
3a3677ff 11765 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 11766
3a3677ff
RH
11767 ix86_compare_op0 = tmp;
11768 ix86_compare_op1 = const0_rtx;
11769 ix86_expand_branch (code, label);
11770 return;
11771 }
e075ae69 11772
1f9124e4
JJ
11773 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11774 op1 is a constant and the low word is zero, then we can just
11775 examine the high word. */
32b5b1aa 11776
7656aee4 11777 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
1f9124e4
JJ
11778 switch (code)
11779 {
11780 case LT: case LTU: case GE: case GEU:
11781 ix86_compare_op0 = hi[0];
11782 ix86_compare_op1 = hi[1];
11783 ix86_expand_branch (code, label);
11784 return;
11785 default:
11786 break;
11787 }
e075ae69 11788
3a3677ff 11789 /* Otherwise, we need two or three jumps. */
e075ae69 11790
3a3677ff 11791 label2 = gen_label_rtx ();
e075ae69 11792
3a3677ff
RH
11793 code1 = code;
11794 code2 = swap_condition (code);
11795 code3 = unsigned_condition (code);
e075ae69 11796
3a3677ff
RH
11797 switch (code)
11798 {
11799 case LT: case GT: case LTU: case GTU:
11800 break;
e075ae69 11801
3a3677ff
RH
11802 case LE: code1 = LT; code2 = GT; break;
11803 case GE: code1 = GT; code2 = LT; break;
11804 case LEU: code1 = LTU; code2 = GTU; break;
11805 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 11806
f822d252
ZW
11807 case EQ: code1 = UNKNOWN; code2 = NE; break;
11808 case NE: code2 = UNKNOWN; break;
e075ae69 11809
3a3677ff 11810 default:
d0396b79 11811 gcc_unreachable ();
3a3677ff 11812 }
e075ae69 11813
3a3677ff
RH
11814 /*
11815 * a < b =>
11816 * if (hi(a) < hi(b)) goto true;
11817 * if (hi(a) > hi(b)) goto false;
11818 * if (lo(a) < lo(b)) goto true;
11819 * false:
11820 */
11821
11822 ix86_compare_op0 = hi[0];
11823 ix86_compare_op1 = hi[1];
11824
f822d252 11825 if (code1 != UNKNOWN)
3a3677ff 11826 ix86_expand_branch (code1, label);
f822d252 11827 if (code2 != UNKNOWN)
3a3677ff
RH
11828 ix86_expand_branch (code2, label2);
11829
11830 ix86_compare_op0 = lo[0];
11831 ix86_compare_op1 = lo[1];
11832 ix86_expand_branch (code3, label);
11833
f822d252 11834 if (code2 != UNKNOWN)
3a3677ff
RH
11835 emit_label (label2);
11836 return;
11837 }
e075ae69 11838
3a3677ff 11839 default:
d0396b79 11840 gcc_unreachable ();
3a3677ff 11841 }
32b5b1aa 11842}
e075ae69 11843
9e7adcb3
JH
11844/* Split branch based on floating point condition. */
11845void
b96a374d 11846ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
7c82106f 11847 rtx target1, rtx target2, rtx tmp, rtx pushed)
9e7adcb3
JH
11848{
11849 rtx second, bypass;
11850 rtx label = NULL_RTX;
03598dea 11851 rtx condition;
6b24c259
JH
11852 int bypass_probability = -1, second_probability = -1, probability = -1;
11853 rtx i;
9e7adcb3
JH
11854
11855 if (target2 != pc_rtx)
11856 {
11857 rtx tmp = target2;
11858 code = reverse_condition_maybe_unordered (code);
11859 target2 = target1;
11860 target1 = tmp;
11861 }
11862
11863 condition = ix86_expand_fp_compare (code, op1, op2,
11864 tmp, &second, &bypass);
6b24c259 11865
7c82106f
UB
11866 /* Remove pushed operand from stack. */
11867 if (pushed)
11868 ix86_free_from_memory (GET_MODE (pushed));
11869
6b24c259
JH
11870 if (split_branch_probability >= 0)
11871 {
11872 /* Distribute the probabilities across the jumps.
11873 Assume the BYPASS and SECOND to be always test
11874 for UNORDERED. */
11875 probability = split_branch_probability;
11876
d6a7951f 11877 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
11878 to be updated. Later we may run some experiments and see
11879 if unordered values are more frequent in practice. */
11880 if (bypass)
11881 bypass_probability = 1;
11882 if (second)
11883 second_probability = 1;
11884 }
9e7adcb3
JH
11885 if (bypass != NULL_RTX)
11886 {
11887 label = gen_label_rtx ();
6b24c259
JH
11888 i = emit_jump_insn (gen_rtx_SET
11889 (VOIDmode, pc_rtx,
11890 gen_rtx_IF_THEN_ELSE (VOIDmode,
11891 bypass,
11892 gen_rtx_LABEL_REF (VOIDmode,
11893 label),
11894 pc_rtx)));
11895 if (bypass_probability >= 0)
11896 REG_NOTES (i)
11897 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11898 GEN_INT (bypass_probability),
11899 REG_NOTES (i));
11900 }
11901 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
11902 (VOIDmode, pc_rtx,
11903 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
11904 condition, target1, target2)));
11905 if (probability >= 0)
11906 REG_NOTES (i)
11907 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11908 GEN_INT (probability),
11909 REG_NOTES (i));
11910 if (second != NULL_RTX)
9e7adcb3 11911 {
6b24c259
JH
11912 i = emit_jump_insn (gen_rtx_SET
11913 (VOIDmode, pc_rtx,
11914 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11915 target2)));
11916 if (second_probability >= 0)
11917 REG_NOTES (i)
11918 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11919 GEN_INT (second_probability),
11920 REG_NOTES (i));
9e7adcb3 11921 }
9e7adcb3
JH
11922 if (label != NULL_RTX)
11923 emit_label (label);
11924}
11925
32b5b1aa 11926int
b96a374d 11927ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 11928{
3a627503 11929 rtx ret, tmp, tmpreg, equiv;
a1b8572c 11930 rtx second_test, bypass_test;
e075ae69 11931
28356f52 11932 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
e075ae69
RH
11933 return 0; /* FAIL */
11934
d0396b79 11935 gcc_assert (GET_MODE (dest) == QImode);
e075ae69 11936
a1b8572c 11937 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
11938 PUT_MODE (ret, QImode);
11939
11940 tmp = dest;
a1b8572c 11941 tmpreg = dest;
32b5b1aa 11942
e075ae69 11943 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
11944 if (bypass_test || second_test)
11945 {
11946 rtx test = second_test;
11947 int bypass = 0;
11948 rtx tmp2 = gen_reg_rtx (QImode);
11949 if (bypass_test)
11950 {
d0396b79 11951 gcc_assert (!second_test);
a1b8572c
JH
11952 test = bypass_test;
11953 bypass = 1;
11954 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11955 }
11956 PUT_MODE (test, QImode);
11957 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11958
11959 if (bypass)
11960 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11961 else
11962 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11963 }
e075ae69 11964
3a627503 11965 /* Attach a REG_EQUAL note describing the comparison result. */
1ef45b77
RH
11966 if (ix86_compare_op0 && ix86_compare_op1)
11967 {
11968 equiv = simplify_gen_relational (code, QImode,
11969 GET_MODE (ix86_compare_op0),
11970 ix86_compare_op0, ix86_compare_op1);
11971 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11972 }
3a627503 11973
e075ae69 11974 return 1; /* DONE */
32b5b1aa 11975}
e075ae69 11976
c35d187f
RH
11977/* Expand comparison setting or clearing carry flag. Return true when
11978 successful and set pop for the operation. */
11979static bool
b96a374d 11980ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
11981{
11982 enum machine_mode mode =
11983 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11984
27ac40e2
UB
11985 /* Do not handle DImode compares that go through special path.
11986 Also we can't deal with FP compares yet. This is possible to add. */
28356f52 11987 if (mode == (TARGET_64BIT ? TImode : DImode))
e6e81735 11988 return false;
27ac40e2
UB
11989
11990 if (SCALAR_FLOAT_MODE_P (mode))
e6e81735
JH
11991 {
11992 rtx second_test = NULL, bypass_test = NULL;
11993 rtx compare_op, compare_seq;
11994
27ac40e2
UB
11995 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11996
11997 /* Shortcut: following common codes never translate
11998 into carry flag compares. */
e6e81735
JH
11999 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12000 || code == ORDERED || code == UNORDERED)
12001 return false;
12002
12003 /* These comparisons require zero flag; swap operands so they won't. */
12004 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12005 && !TARGET_IEEE_FP)
12006 {
12007 rtx tmp = op0;
12008 op0 = op1;
12009 op1 = tmp;
12010 code = swap_condition (code);
12011 }
12012
c51e6d85
KH
12013 /* Try to expand the comparison and verify that we end up with carry flag
12014 based comparison. This is fails to be true only when we decide to expand
12015 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
12016 start_sequence ();
12017 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12018 &second_test, &bypass_test);
12019 compare_seq = get_insns ();
12020 end_sequence ();
12021
12022 if (second_test || bypass_test)
12023 return false;
12024 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12025 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12026 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12027 else
12028 code = GET_CODE (compare_op);
12029 if (code != LTU && code != GEU)
12030 return false;
12031 emit_insn (compare_seq);
12032 *pop = compare_op;
12033 return true;
12034 }
12035 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
12036 return false;
12037 switch (code)
12038 {
12039 case LTU:
12040 case GEU:
12041 break;
12042
12043 /* Convert a==0 into (unsigned)a<1. */
12044 case EQ:
12045 case NE:
12046 if (op1 != const0_rtx)
12047 return false;
12048 op1 = const1_rtx;
12049 code = (code == EQ ? LTU : GEU);
12050 break;
12051
12052 /* Convert a>b into b<a or a>=b-1. */
12053 case GTU:
12054 case LEU:
7656aee4 12055 if (CONST_INT_P (op1))
4977bab6
ZW
12056 {
12057 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12058 /* Bail out on overflow. We still can swap operands but that
43f3a59d 12059 would force loading of the constant into register. */
4977bab6
ZW
12060 if (op1 == const0_rtx
12061 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12062 return false;
12063 code = (code == GTU ? GEU : LTU);
12064 }
12065 else
12066 {
12067 rtx tmp = op1;
12068 op1 = op0;
12069 op0 = tmp;
12070 code = (code == GTU ? LTU : GEU);
12071 }
12072 break;
12073
ccea753c 12074 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
12075 case LT:
12076 case GE:
12077 if (mode == DImode || op1 != const0_rtx)
12078 return false;
ccea753c 12079 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
12080 code = (code == LT ? GEU : LTU);
12081 break;
12082 case LE:
12083 case GT:
12084 if (mode == DImode || op1 != constm1_rtx)
12085 return false;
ccea753c 12086 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
12087 code = (code == LE ? GEU : LTU);
12088 break;
12089
12090 default:
12091 return false;
12092 }
ebe75517
JH
12093 /* Swapping operands may cause constant to appear as first operand. */
12094 if (!nonimmediate_operand (op0, VOIDmode))
12095 {
b3a13419 12096 if (!can_create_pseudo_p ())
ebe75517
JH
12097 return false;
12098 op0 = force_reg (mode, op0);
12099 }
4977bab6
ZW
12100 ix86_compare_op0 = op0;
12101 ix86_compare_op1 = op1;
12102 *pop = ix86_expand_compare (code, NULL, NULL);
d0396b79 12103 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
4977bab6
ZW
12104 return true;
12105}
12106
32b5b1aa 12107int
b96a374d 12108ix86_expand_int_movcc (rtx operands[])
32b5b1aa 12109{
e075ae69
RH
12110 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12111 rtx compare_seq, compare_op;
a1b8572c 12112 rtx second_test, bypass_test;
635559ab 12113 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 12114 bool sign_bit_compare_p = false;;
3a3677ff 12115
e075ae69 12116 start_sequence ();
a1b8572c 12117 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 12118 compare_seq = get_insns ();
e075ae69
RH
12119 end_sequence ();
12120
12121 compare_code = GET_CODE (compare_op);
12122
4977bab6
ZW
12123 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12124 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12125 sign_bit_compare_p = true;
12126
e075ae69
RH
12127 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12128 HImode insns, we'd be swallowed in word prefix ops. */
12129
4977bab6 12130 if ((mode != HImode || TARGET_FAST_PREFIX)
28356f52 12131 && (mode != (TARGET_64BIT ? TImode : DImode))
7656aee4
UB
12132 && CONST_INT_P (operands[2])
12133 && CONST_INT_P (operands[3]))
e075ae69
RH
12134 {
12135 rtx out = operands[0];
12136 HOST_WIDE_INT ct = INTVAL (operands[2]);
12137 HOST_WIDE_INT cf = INTVAL (operands[3]);
12138 HOST_WIDE_INT diff;
12139
4977bab6
ZW
12140 diff = ct - cf;
12141 /* Sign bit compares are better done using shifts than we do by using
b96a374d 12142 sbb. */
4977bab6
ZW
12143 if (sign_bit_compare_p
12144 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12145 ix86_compare_op1, &compare_op))
e075ae69 12146 {
e075ae69
RH
12147 /* Detect overlap between destination and compare sources. */
12148 rtx tmp = out;
12149
4977bab6 12150 if (!sign_bit_compare_p)
36583fea 12151 {
e6e81735
JH
12152 bool fpcmp = false;
12153
4977bab6
ZW
12154 compare_code = GET_CODE (compare_op);
12155
e6e81735
JH
12156 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12157 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12158 {
12159 fpcmp = true;
12160 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12161 }
12162
4977bab6
ZW
12163 /* To simplify rest of code, restrict to the GEU case. */
12164 if (compare_code == LTU)
12165 {
12166 HOST_WIDE_INT tmp = ct;
12167 ct = cf;
12168 cf = tmp;
12169 compare_code = reverse_condition (compare_code);
12170 code = reverse_condition (code);
12171 }
e6e81735
JH
12172 else
12173 {
12174 if (fpcmp)
12175 PUT_CODE (compare_op,
12176 reverse_condition_maybe_unordered
12177 (GET_CODE (compare_op)));
12178 else
12179 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12180 }
4977bab6 12181 diff = ct - cf;
36583fea 12182
4977bab6
ZW
12183 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12184 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12185 tmp = gen_reg_rtx (mode);
e075ae69 12186
4977bab6 12187 if (mode == DImode)
e6e81735 12188 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 12189 else
e6e81735 12190 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 12191 }
14f73b5a 12192 else
4977bab6
ZW
12193 {
12194 if (code == GT || code == GE)
12195 code = reverse_condition (code);
12196 else
12197 {
12198 HOST_WIDE_INT tmp = ct;
12199 ct = cf;
12200 cf = tmp;
5fb48685 12201 diff = ct - cf;
4977bab6
ZW
12202 }
12203 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12204 ix86_compare_op1, VOIDmode, 0, -1);
12205 }
e075ae69 12206
36583fea
JH
12207 if (diff == 1)
12208 {
12209 /*
12210 * cmpl op0,op1
12211 * sbbl dest,dest
12212 * [addl dest, ct]
12213 *
12214 * Size 5 - 8.
12215 */
12216 if (ct)
b96a374d 12217 tmp = expand_simple_binop (mode, PLUS,
635559ab 12218 tmp, GEN_INT (ct),
4977bab6 12219 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12220 }
12221 else if (cf == -1)
12222 {
12223 /*
12224 * cmpl op0,op1
12225 * sbbl dest,dest
12226 * orl $ct, dest
12227 *
12228 * Size 8.
12229 */
635559ab
JH
12230 tmp = expand_simple_binop (mode, IOR,
12231 tmp, GEN_INT (ct),
4977bab6 12232 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12233 }
12234 else if (diff == -1 && ct)
12235 {
12236 /*
12237 * cmpl op0,op1
12238 * sbbl dest,dest
06ec023f 12239 * notl dest
36583fea
JH
12240 * [addl dest, cf]
12241 *
12242 * Size 8 - 11.
12243 */
4977bab6 12244 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 12245 if (cf)
b96a374d 12246 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
12247 copy_rtx (tmp), GEN_INT (cf),
12248 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
12249 }
12250 else
12251 {
12252 /*
12253 * cmpl op0,op1
12254 * sbbl dest,dest
06ec023f 12255 * [notl dest]
36583fea
JH
12256 * andl cf - ct, dest
12257 * [addl dest, ct]
12258 *
12259 * Size 8 - 11.
12260 */
06ec023f
RB
12261
12262 if (cf == 0)
12263 {
12264 cf = ct;
12265 ct = 0;
4977bab6 12266 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
12267 }
12268
635559ab 12269 tmp = expand_simple_binop (mode, AND,
4977bab6 12270 copy_rtx (tmp),
d8bf17f9 12271 gen_int_mode (cf - ct, mode),
4977bab6 12272 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 12273 if (ct)
b96a374d 12274 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
12275 copy_rtx (tmp), GEN_INT (ct),
12276 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 12277 }
e075ae69 12278
4977bab6
ZW
12279 if (!rtx_equal_p (tmp, out))
12280 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
12281
12282 return 1; /* DONE */
12283 }
12284
e075ae69
RH
12285 if (diff < 0)
12286 {
27ac40e2
UB
12287 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12288
e075ae69
RH
12289 HOST_WIDE_INT tmp;
12290 tmp = ct, ct = cf, cf = tmp;
12291 diff = -diff;
27ac40e2
UB
12292
12293 if (SCALAR_FLOAT_MODE_P (cmp_mode))
734dba19 12294 {
27ac40e2
UB
12295 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12296
734dba19
JH
12297 /* We may be reversing unordered compare to normal compare, that
12298 is not valid in general (we may convert non-trapping condition
12299 to trapping one), however on i386 we currently emit all
12300 comparisons unordered. */
12301 compare_code = reverse_condition_maybe_unordered (compare_code);
12302 code = reverse_condition_maybe_unordered (code);
12303 }
12304 else
12305 {
12306 compare_code = reverse_condition (compare_code);
12307 code = reverse_condition (code);
12308 }
e075ae69 12309 }
0f2a3457 12310
f822d252 12311 compare_code = UNKNOWN;
0f2a3457 12312 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
7656aee4 12313 && CONST_INT_P (ix86_compare_op1))
0f2a3457
JJ
12314 {
12315 if (ix86_compare_op1 == const0_rtx
12316 && (code == LT || code == GE))
12317 compare_code = code;
12318 else if (ix86_compare_op1 == constm1_rtx)
12319 {
12320 if (code == LE)
12321 compare_code = LT;
12322 else if (code == GT)
12323 compare_code = GE;
12324 }
12325 }
12326
12327 /* Optimize dest = (op0 < 0) ? -1 : cf. */
f822d252 12328 if (compare_code != UNKNOWN
0f2a3457
JJ
12329 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12330 && (cf == -1 || ct == -1))
12331 {
12332 /* If lea code below could be used, only optimize
12333 if it results in a 2 insn sequence. */
12334
12335 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12336 || diff == 3 || diff == 5 || diff == 9)
12337 || (compare_code == LT && ct == -1)
12338 || (compare_code == GE && cf == -1))
12339 {
12340 /*
12341 * notl op1 (if necessary)
12342 * sarl $31, op1
12343 * orl cf, op1
12344 */
12345 if (ct != -1)
12346 {
12347 cf = ct;
b96a374d 12348 ct = -1;
0f2a3457
JJ
12349 code = reverse_condition (code);
12350 }
12351
12352 out = emit_store_flag (out, code, ix86_compare_op0,
12353 ix86_compare_op1, VOIDmode, 0, -1);
12354
12355 out = expand_simple_binop (mode, IOR,
12356 out, GEN_INT (cf),
12357 out, 1, OPTAB_DIRECT);
12358 if (out != operands[0])
12359 emit_move_insn (operands[0], out);
12360
12361 return 1; /* DONE */
12362 }
12363 }
12364
4977bab6 12365
635559ab
JH
12366 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12367 || diff == 3 || diff == 5 || diff == 9)
4977bab6 12368 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
8fe75e43
RH
12369 && (mode != DImode
12370 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
e075ae69
RH
12371 {
12372 /*
12373 * xorl dest,dest
12374 * cmpl op1,op2
12375 * setcc dest
12376 * lea cf(dest*(ct-cf)),dest
12377 *
12378 * Size 14.
12379 *
12380 * This also catches the degenerate setcc-only case.
12381 */
12382
12383 rtx tmp;
12384 int nops;
12385
12386 out = emit_store_flag (out, code, ix86_compare_op0,
12387 ix86_compare_op1, VOIDmode, 0, 1);
12388
12389 nops = 0;
97f51ac4
RB
12390 /* On x86_64 the lea instruction operates on Pmode, so we need
12391 to get arithmetics done in proper mode to match. */
e075ae69 12392 if (diff == 1)
068f5dea 12393 tmp = copy_rtx (out);
e075ae69
RH
12394 else
12395 {
885a70fd 12396 rtx out1;
068f5dea 12397 out1 = copy_rtx (out);
635559ab 12398 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
12399 nops++;
12400 if (diff & 1)
12401 {
635559ab 12402 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
12403 nops++;
12404 }
12405 }
12406 if (cf != 0)
12407 {
635559ab 12408 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
12409 nops++;
12410 }
4977bab6 12411 if (!rtx_equal_p (tmp, out))
e075ae69 12412 {
14f73b5a 12413 if (nops == 1)
a5cf80f0 12414 out = force_operand (tmp, copy_rtx (out));
e075ae69 12415 else
4977bab6 12416 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 12417 }
4977bab6 12418 if (!rtx_equal_p (out, operands[0]))
1985ef90 12419 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
12420
12421 return 1; /* DONE */
12422 }
12423
12424 /*
12425 * General case: Jumpful:
12426 * xorl dest,dest cmpl op1, op2
12427 * cmpl op1, op2 movl ct, dest
12428 * setcc dest jcc 1f
12429 * decl dest movl cf, dest
12430 * andl (cf-ct),dest 1:
12431 * addl ct,dest
0f290768 12432 *
e075ae69
RH
12433 * Size 20. Size 14.
12434 *
12435 * This is reasonably steep, but branch mispredict costs are
12436 * high on modern cpus, so consider failing only if optimizing
12437 * for space.
e075ae69
RH
12438 */
12439
4977bab6
ZW
12440 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12441 && BRANCH_COST >= 2)
e075ae69 12442 {
97f51ac4 12443 if (cf == 0)
e075ae69 12444 {
27ac40e2
UB
12445 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12446
97f51ac4
RB
12447 cf = ct;
12448 ct = 0;
27ac40e2
UB
12449
12450 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12451 {
12452 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12453
12454 /* We may be reversing unordered compare to normal compare,
12455 that is not valid in general (we may convert non-trapping
12456 condition to trapping one), however on i386 we currently
12457 emit all comparisons unordered. */
12458 code = reverse_condition_maybe_unordered (code);
12459 }
0f2a3457
JJ
12460 else
12461 {
12462 code = reverse_condition (code);
f822d252 12463 if (compare_code != UNKNOWN)
0f2a3457
JJ
12464 compare_code = reverse_condition (compare_code);
12465 }
12466 }
12467
f822d252 12468 if (compare_code != UNKNOWN)
0f2a3457
JJ
12469 {
12470 /* notl op1 (if needed)
12471 sarl $31, op1
12472 andl (cf-ct), op1
b96a374d 12473 addl ct, op1
0f2a3457
JJ
12474
12475 For x < 0 (resp. x <= -1) there will be no notl,
12476 so if possible swap the constants to get rid of the
12477 complement.
12478 True/false will be -1/0 while code below (store flag
12479 followed by decrement) is 0/-1, so the constants need
12480 to be exchanged once more. */
12481
12482 if (compare_code == GE || !cf)
734dba19 12483 {
b96a374d 12484 code = reverse_condition (code);
0f2a3457 12485 compare_code = LT;
734dba19
JH
12486 }
12487 else
12488 {
0f2a3457 12489 HOST_WIDE_INT tmp = cf;
b96a374d 12490 cf = ct;
0f2a3457 12491 ct = tmp;
734dba19 12492 }
0f2a3457
JJ
12493
12494 out = emit_store_flag (out, code, ix86_compare_op0,
12495 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 12496 }
0f2a3457
JJ
12497 else
12498 {
12499 out = emit_store_flag (out, code, ix86_compare_op0,
12500 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 12501
4977bab6
ZW
12502 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12503 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 12504 }
e075ae69 12505
4977bab6 12506 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 12507 gen_int_mode (cf - ct, mode),
4977bab6 12508 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 12509 if (ct)
4977bab6
ZW
12510 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12511 copy_rtx (out), 1, OPTAB_DIRECT);
12512 if (!rtx_equal_p (out, operands[0]))
12513 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
12514
12515 return 1; /* DONE */
12516 }
12517 }
12518
4977bab6 12519 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
12520 {
12521 /* Try a few things more with specific constants and a variable. */
12522
78a0d70c 12523 optab op;
e075ae69
RH
12524 rtx var, orig_out, out, tmp;
12525
4977bab6 12526 if (BRANCH_COST <= 2)
e075ae69
RH
12527 return 0; /* FAIL */
12528
0f290768 12529 /* If one of the two operands is an interesting constant, load a
e075ae69 12530 constant with the above and mask it in with a logical operation. */
0f290768 12531
7656aee4 12532 if (CONST_INT_P (operands[2]))
e075ae69
RH
12533 {
12534 var = operands[3];
4977bab6 12535 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 12536 operands[3] = constm1_rtx, op = and_optab;
4977bab6 12537 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 12538 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
12539 else
12540 return 0; /* FAIL */
e075ae69 12541 }
7656aee4 12542 else if (CONST_INT_P (operands[3]))
e075ae69
RH
12543 {
12544 var = operands[2];
4977bab6 12545 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 12546 operands[2] = constm1_rtx, op = and_optab;
4977bab6 12547 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 12548 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
12549 else
12550 return 0; /* FAIL */
e075ae69 12551 }
78a0d70c 12552 else
e075ae69
RH
12553 return 0; /* FAIL */
12554
12555 orig_out = operands[0];
635559ab 12556 tmp = gen_reg_rtx (mode);
e075ae69
RH
12557 operands[0] = tmp;
12558
12559 /* Recurse to get the constant loaded. */
12560 if (ix86_expand_int_movcc (operands) == 0)
12561 return 0; /* FAIL */
12562
12563 /* Mask in the interesting variable. */
635559ab 12564 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 12565 OPTAB_WIDEN);
4977bab6
ZW
12566 if (!rtx_equal_p (out, orig_out))
12567 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
12568
12569 return 1; /* DONE */
12570 }
12571
12572 /*
12573 * For comparison with above,
12574 *
12575 * movl cf,dest
12576 * movl ct,tmp
12577 * cmpl op1,op2
12578 * cmovcc tmp,dest
12579 *
12580 * Size 15.
12581 */
12582
635559ab
JH
12583 if (! nonimmediate_operand (operands[2], mode))
12584 operands[2] = force_reg (mode, operands[2]);
12585 if (! nonimmediate_operand (operands[3], mode))
12586 operands[3] = force_reg (mode, operands[3]);
e075ae69 12587
a1b8572c
JH
12588 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12589 {
635559ab 12590 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
12591 emit_move_insn (tmp, operands[3]);
12592 operands[3] = tmp;
12593 }
12594 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12595 {
635559ab 12596 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
12597 emit_move_insn (tmp, operands[2]);
12598 operands[2] = tmp;
12599 }
4977bab6 12600
c9682caf 12601 if (! register_operand (operands[2], VOIDmode)
b96a374d 12602 && (mode == QImode
4977bab6 12603 || ! register_operand (operands[3], VOIDmode)))
635559ab 12604 operands[2] = force_reg (mode, operands[2]);
a1b8572c 12605
4977bab6
ZW
12606 if (mode == QImode
12607 && ! register_operand (operands[3], VOIDmode))
12608 operands[3] = force_reg (mode, operands[3]);
12609
e075ae69
RH
12610 emit_insn (compare_seq);
12611 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 12612 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
12613 compare_op, operands[2],
12614 operands[3])));
a1b8572c 12615 if (bypass_test)
4977bab6 12616 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 12617 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 12618 bypass_test,
4977bab6
ZW
12619 copy_rtx (operands[3]),
12620 copy_rtx (operands[0]))));
a1b8572c 12621 if (second_test)
4977bab6 12622 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 12623 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 12624 second_test,
4977bab6
ZW
12625 copy_rtx (operands[2]),
12626 copy_rtx (operands[0]))));
e075ae69
RH
12627
12628 return 1; /* DONE */
e9a25f70 12629}
e075ae69 12630
ab8efbd8
RH
12631/* Swap, force into registers, or otherwise massage the two operands
12632 to an sse comparison with a mask result. Thus we differ a bit from
12633 ix86_prepare_fp_compare_args which expects to produce a flags result.
12634
12635 The DEST operand exists to help determine whether to commute commutative
12636 operators. The POP0/POP1 operands are updated in place. The new
12637 comparison code is returned, or UNKNOWN if not implementable. */
12638
12639static enum rtx_code
12640ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12641 rtx *pop0, rtx *pop1)
12642{
12643 rtx tmp;
12644
12645 switch (code)
12646 {
12647 case LTGT:
12648 case UNEQ:
12649 /* We have no LTGT as an operator. We could implement it with
12650 NE & ORDERED, but this requires an extra temporary. It's
12651 not clear that it's worth it. */
12652 return UNKNOWN;
12653
12654 case LT:
12655 case LE:
12656 case UNGT:
12657 case UNGE:
12658 /* These are supported directly. */
12659 break;
12660
12661 case EQ:
12662 case NE:
12663 case UNORDERED:
12664 case ORDERED:
12665 /* For commutative operators, try to canonicalize the destination
12666 operand to be first in the comparison - this helps reload to
12667 avoid extra moves. */
12668 if (!dest || !rtx_equal_p (dest, *pop1))
12669 break;
12670 /* FALLTHRU */
12671
12672 case GE:
12673 case GT:
12674 case UNLE:
12675 case UNLT:
12676 /* These are not supported directly. Swap the comparison operands
12677 to transform into something that is supported. */
12678 tmp = *pop0;
12679 *pop0 = *pop1;
12680 *pop1 = tmp;
12681 code = swap_condition (code);
12682 break;
12683
12684 default:
12685 gcc_unreachable ();
12686 }
12687
12688 return code;
12689}
12690
12691/* Detect conditional moves that exactly match min/max operational
12692 semantics. Note that this is IEEE safe, as long as we don't
12693 interchange the operands.
12694
12695 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12696 and TRUE if the operation is successful and instructions are emitted. */
12697
12698static bool
12699ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12700 rtx cmp_op1, rtx if_true, rtx if_false)
12701{
12702 enum machine_mode mode;
12703 bool is_min;
12704 rtx tmp;
12705
12706 if (code == LT)
12707 ;
12708 else if (code == UNGE)
12709 {
12710 tmp = if_true;
12711 if_true = if_false;
12712 if_false = tmp;
12713 }
12714 else
12715 return false;
12716
12717 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12718 is_min = true;
12719 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12720 is_min = false;
12721 else
12722 return false;
12723
12724 mode = GET_MODE (dest);
12725
12726 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12727 but MODE may be a vector mode and thus not appropriate. */
12728 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12729 {
12730 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12731 rtvec v;
12732
12733 if_true = force_reg (mode, if_true);
12734 v = gen_rtvec (2, if_true, if_false);
12735 tmp = gen_rtx_UNSPEC (mode, v, u);
12736 }
12737 else
12738 {
12739 code = is_min ? SMIN : SMAX;
12740 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12741 }
12742
12743 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12744 return true;
12745}
12746
ae46a07a
RH
12747/* Expand an sse vector comparison. Return the register with the result. */
12748
12749static rtx
12750ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12751 rtx op_true, rtx op_false)
ab8efbd8
RH
12752{
12753 enum machine_mode mode = GET_MODE (dest);
ae46a07a 12754 rtx x;
ab8efbd8
RH
12755
12756 cmp_op0 = force_reg (mode, cmp_op0);
12757 if (!nonimmediate_operand (cmp_op1, mode))
12758 cmp_op1 = force_reg (mode, cmp_op1);
12759
12760 if (optimize
12761 || reg_overlap_mentioned_p (dest, op_true)
12762 || reg_overlap_mentioned_p (dest, op_false))
ae46a07a 12763 dest = gen_reg_rtx (mode);
ab8efbd8
RH
12764
12765 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
ae46a07a
RH
12766 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12767
12768 return dest;
12769}
12770
12771/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12772 operations. This is used for both scalar and vector conditional moves. */
12773
12774static void
12775ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12776{
12777 enum machine_mode mode = GET_MODE (dest);
12778 rtx t2, t3, x;
ab8efbd8
RH
12779
12780 if (op_false == CONST0_RTX (mode))
12781 {
12782 op_true = force_reg (mode, op_true);
ae46a07a 12783 x = gen_rtx_AND (mode, cmp, op_true);
ab8efbd8
RH
12784 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12785 }
12786 else if (op_true == CONST0_RTX (mode))
12787 {
12788 op_false = force_reg (mode, op_false);
ae46a07a 12789 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
12790 x = gen_rtx_AND (mode, x, op_false);
12791 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12792 }
12793 else
12794 {
12795 op_true = force_reg (mode, op_true);
12796 op_false = force_reg (mode, op_false);
12797
12798 t2 = gen_reg_rtx (mode);
12799 if (optimize)
12800 t3 = gen_reg_rtx (mode);
12801 else
12802 t3 = dest;
12803
ae46a07a 12804 x = gen_rtx_AND (mode, op_true, cmp);
ab8efbd8
RH
12805 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12806
ae46a07a 12807 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
12808 x = gen_rtx_AND (mode, x, op_false);
12809 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12810
12811 x = gen_rtx_IOR (mode, t3, t2);
12812 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12813 }
12814}
12815
ae46a07a
RH
12816/* Expand a floating-point conditional move. Return true if successful. */
12817
32b5b1aa 12818int
b96a374d 12819ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 12820{
eaa49b49
RH
12821 enum machine_mode mode = GET_MODE (operands[0]);
12822 enum rtx_code code = GET_CODE (operands[1]);
12823 rtx tmp, compare_op, second_test, bypass_test;
12824
12825 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12826 {
ab8efbd8 12827 enum machine_mode cmode;
eaa49b49
RH
12828
12829 /* Since we've no cmove for sse registers, don't force bad register
12830 allocation just to gain access to it. Deny movcc when the
12831 comparison mode doesn't match the move mode. */
ab8efbd8 12832 cmode = GET_MODE (ix86_compare_op0);
eaa49b49 12833 if (cmode == VOIDmode)
ab8efbd8 12834 cmode = GET_MODE (ix86_compare_op1);
eaa49b49
RH
12835 if (cmode != mode)
12836 return 0;
12837
ab8efbd8
RH
12838 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12839 &ix86_compare_op0,
12840 &ix86_compare_op1);
12841 if (code == UNKNOWN)
51d7bae6
RH
12842 return 0;
12843
ab8efbd8
RH
12844 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12845 ix86_compare_op1, operands[2],
12846 operands[3]))
12847 return 1;
eaa49b49 12848
ae46a07a
RH
12849 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12850 ix86_compare_op1, operands[2], operands[3]);
12851 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
0073023d
JH
12852 return 1;
12853 }
12854
e075ae69 12855 /* The floating point conditional move instructions don't directly
0f290768 12856 support conditions resulting from a signed integer comparison. */
32b5b1aa 12857
a1b8572c 12858 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
12859
12860 /* The floating point conditional move instructions don't directly
12861 support signed integer comparisons. */
12862
a1b8572c 12863 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 12864 {
d0396b79 12865 gcc_assert (!second_test && !bypass_test);
e075ae69 12866 tmp = gen_reg_rtx (QImode);
3a3677ff 12867 ix86_expand_setcc (code, tmp);
e075ae69
RH
12868 code = NE;
12869 ix86_compare_op0 = tmp;
12870 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
12871 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12872 }
12873 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12874 {
eaa49b49 12875 tmp = gen_reg_rtx (mode);
a1b8572c
JH
12876 emit_move_insn (tmp, operands[3]);
12877 operands[3] = tmp;
12878 }
12879 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12880 {
eaa49b49 12881 tmp = gen_reg_rtx (mode);
a1b8572c
JH
12882 emit_move_insn (tmp, operands[2]);
12883 operands[2] = tmp;
e075ae69 12884 }
e9a25f70 12885
e075ae69 12886 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
12887 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12888 operands[2], operands[3])));
a1b8572c
JH
12889 if (bypass_test)
12890 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
12891 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12892 operands[3], operands[0])));
a1b8572c
JH
12893 if (second_test)
12894 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
12895 gen_rtx_IF_THEN_ELSE (mode, second_test,
12896 operands[2], operands[0])));
32b5b1aa 12897
e075ae69 12898 return 1;
32b5b1aa
SC
12899}
12900
ae46a07a
RH
12901/* Expand a floating-point vector conditional move; a vcond operation
12902 rather than a movcc operation. */
12903
12904bool
12905ix86_expand_fp_vcond (rtx operands[])
12906{
12907 enum rtx_code code = GET_CODE (operands[3]);
12908 rtx cmp;
12909
12910 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12911 &operands[4], &operands[5]);
12912 if (code == UNKNOWN)
12913 return false;
12914
12915 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12916 operands[5], operands[1], operands[2]))
12917 return true;
12918
12919 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12920 operands[1], operands[2]);
12921 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12922 return true;
12923}
12924
3b8dd071 12925/* Expand a signed/unsigned integral vector conditional move. */
ae46a07a
RH
12926
12927bool
9fb93f89 12928ix86_expand_int_vcond (rtx operands[])
ae46a07a
RH
12929{
12930 enum machine_mode mode = GET_MODE (operands[0]);
12931 enum rtx_code code = GET_CODE (operands[3]);
9fb93f89
RH
12932 bool negate = false;
12933 rtx x, cop0, cop1;
ae46a07a 12934
9fb93f89
RH
12935 cop0 = operands[4];
12936 cop1 = operands[5];
12937
12938 /* Canonicalize the comparison to EQ, GT, GTU. */
12939 switch (code)
ae46a07a 12940 {
9fb93f89
RH
12941 case EQ:
12942 case GT:
12943 case GTU:
12944 break;
12945
12946 case NE:
12947 case LE:
12948 case LEU:
ae46a07a 12949 code = reverse_condition (code);
9fb93f89
RH
12950 negate = true;
12951 break;
12952
12953 case GE:
12954 case GEU:
12955 code = reverse_condition (code);
12956 negate = true;
12957 /* FALLTHRU */
12958
12959 case LT:
12960 case LTU:
ae46a07a 12961 code = swap_condition (code);
9fb93f89
RH
12962 x = cop0, cop0 = cop1, cop1 = x;
12963 break;
ae46a07a 12964
9fb93f89
RH
12965 default:
12966 gcc_unreachable ();
12967 }
ae46a07a 12968
3b8dd071
L
12969 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12970 if (mode == V2DImode)
12971 {
12972 switch (code)
12973 {
12974 case EQ:
12975 /* SSE4.1 supports EQ. */
12976 if (!TARGET_SSE4_1)
12977 return false;
12978 break;
12979
12980 case GT:
12981 case GTU:
12982 /* SSE4.2 supports GT/GTU. */
12983 if (!TARGET_SSE4_2)
12984 return false;
12985 break;
12986
12987 default:
12988 gcc_unreachable ();
12989 }
12990 }
12991
9fb93f89
RH
12992 /* Unsigned parallel compare is not supported by the hardware. Play some
12993 tricks to turn this into a signed comparison against 0. */
12994 if (code == GTU)
ae46a07a 12995 {
55b2de75
UB
12996 cop0 = force_reg (mode, cop0);
12997
ae46a07a
RH
12998 switch (mode)
12999 {
9fb93f89 13000 case V4SImode:
3b8dd071 13001 case V2DImode:
9fb93f89
RH
13002 {
13003 rtx t1, t2, mask;
13004
13005 /* Perform a parallel modulo subtraction. */
13006 t1 = gen_reg_rtx (mode);
3b8dd071
L
13007 emit_insn ((mode == V4SImode
13008 ? gen_subv4si3
13009 : gen_subv2di3) (t1, cop0, cop1));
9fb93f89
RH
13010
13011 /* Extract the original sign bit of op0. */
3b8dd071
L
13012 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13013 true, false);
9fb93f89 13014 t2 = gen_reg_rtx (mode);
3b8dd071
L
13015 emit_insn ((mode == V4SImode
13016 ? gen_andv4si3
13017 : gen_andv2di3) (t2, cop0, mask));
9fb93f89
RH
13018
13019 /* XOR it back into the result of the subtraction. This results
13020 in the sign bit set iff we saw unsigned underflow. */
13021 x = gen_reg_rtx (mode);
3b8dd071
L
13022 emit_insn ((mode == V4SImode
13023 ? gen_xorv4si3
13024 : gen_xorv2di3) (x, t1, t2));
9fb93f89
RH
13025
13026 code = GT;
13027 }
ae46a07a 13028 break;
9fb93f89
RH
13029
13030 case V16QImode:
ae46a07a 13031 case V8HImode:
9fb93f89
RH
13032 /* Perform a parallel unsigned saturating subtraction. */
13033 x = gen_reg_rtx (mode);
13034 emit_insn (gen_rtx_SET (VOIDmode, x,
13035 gen_rtx_US_MINUS (mode, cop0, cop1)));
13036
13037 code = EQ;
13038 negate = !negate;
ae46a07a 13039 break;
9fb93f89 13040
ae46a07a
RH
13041 default:
13042 gcc_unreachable ();
13043 }
13044
9fb93f89
RH
13045 cop0 = x;
13046 cop1 = CONST0_RTX (mode);
ae46a07a 13047 }
ae46a07a 13048
9fb93f89
RH
13049 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13050 operands[1+negate], operands[2-negate]);
13051
13052 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13053 operands[2-negate]);
ae46a07a
RH
13054 return true;
13055}
13056
89d67cca
DN
13057/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13058 true if we should do zero extension, else sign extension. HIGH_P is
13059 true if we want the N/2 high elements, else the low elements. */
13060
13061void
13062ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13063{
13064 enum machine_mode imode = GET_MODE (operands[1]);
13065 rtx (*unpack)(rtx, rtx, rtx);
13066 rtx se, dest;
13067
13068 switch (imode)
13069 {
13070 case V16QImode:
13071 if (high_p)
13072 unpack = gen_vec_interleave_highv16qi;
13073 else
13074 unpack = gen_vec_interleave_lowv16qi;
13075 break;
13076 case V8HImode:
13077 if (high_p)
13078 unpack = gen_vec_interleave_highv8hi;
13079 else
13080 unpack = gen_vec_interleave_lowv8hi;
13081 break;
13082 case V4SImode:
13083 if (high_p)
13084 unpack = gen_vec_interleave_highv4si;
54a88090 13085 else
89d67cca
DN
13086 unpack = gen_vec_interleave_lowv4si;
13087 break;
13088 default:
54a88090 13089 gcc_unreachable ();
89d67cca
DN
13090 }
13091
13092 dest = gen_lowpart (imode, operands[0]);
13093
13094 if (unsigned_p)
13095 se = force_reg (imode, CONST0_RTX (imode));
13096 else
13097 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13098 operands[1], pc_rtx, pc_rtx);
13099
13100 emit_insn (unpack (dest, operands[1], se));
13101}
13102
e5ac0b9b
L
13103/* This function performs the same task as ix86_expand_sse_unpack,
13104 but with SSE4.1 instructions. */
13105
13106void
13107ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13108{
13109 enum machine_mode imode = GET_MODE (operands[1]);
13110 rtx (*unpack)(rtx, rtx);
13111 rtx src, dest;
13112
13113 switch (imode)
13114 {
13115 case V16QImode:
13116 if (unsigned_p)
13117 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13118 else
13119 unpack = gen_sse4_1_extendv8qiv8hi2;
13120 break;
13121 case V8HImode:
13122 if (unsigned_p)
13123 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13124 else
13125 unpack = gen_sse4_1_extendv4hiv4si2;
13126 break;
13127 case V4SImode:
13128 if (unsigned_p)
13129 unpack = gen_sse4_1_zero_extendv2siv2di2;
13130 else
13131 unpack = gen_sse4_1_extendv2siv2di2;
13132 break;
13133 default:
13134 gcc_unreachable ();
13135 }
13136
13137 dest = operands[0];
13138 if (high_p)
13139 {
13140 /* Shift higher 8 bytes to lower 8 bytes. */
13141 src = gen_reg_rtx (imode);
13142 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13143 gen_lowpart (TImode, operands[1]),
13144 GEN_INT (64)));
13145 }
13146 else
13147 src = operands[1];
13148
13149 emit_insn (unpack (dest, src));
13150}
13151
7b52eede
JH
13152/* Expand conditional increment or decrement using adb/sbb instructions.
13153 The default case using setcc followed by the conditional move can be
13154 done by generic code. */
13155int
b96a374d 13156ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
13157{
13158 enum rtx_code code = GET_CODE (operands[1]);
13159 rtx compare_op;
13160 rtx val = const0_rtx;
e6e81735 13161 bool fpcmp = false;
e6e81735 13162 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
13163
13164 if (operands[3] != const1_rtx
13165 && operands[3] != constm1_rtx)
13166 return 0;
13167 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13168 ix86_compare_op1, &compare_op))
13169 return 0;
e6e81735
JH
13170 code = GET_CODE (compare_op);
13171
13172 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13173 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13174 {
13175 fpcmp = true;
13176 code = ix86_fp_compare_code_to_integer (code);
13177 }
13178
13179 if (code != LTU)
13180 {
13181 val = constm1_rtx;
13182 if (fpcmp)
13183 PUT_CODE (compare_op,
13184 reverse_condition_maybe_unordered
13185 (GET_CODE (compare_op)));
13186 else
13187 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13188 }
13189 PUT_MODE (compare_op, mode);
13190
13191 /* Construct either adc or sbb insn. */
13192 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
13193 {
13194 switch (GET_MODE (operands[0]))
13195 {
13196 case QImode:
e6e81735 13197 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13198 break;
13199 case HImode:
e6e81735 13200 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13201 break;
13202 case SImode:
e6e81735 13203 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13204 break;
13205 case DImode:
e6e81735 13206 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
13207 break;
13208 default:
d0396b79 13209 gcc_unreachable ();
7b52eede
JH
13210 }
13211 }
13212 else
13213 {
13214 switch (GET_MODE (operands[0]))
13215 {
13216 case QImode:
e6e81735 13217 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13218 break;
13219 case HImode:
e6e81735 13220 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13221 break;
13222 case SImode:
e6e81735 13223 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
13224 break;
13225 case DImode:
e6e81735 13226 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
13227 break;
13228 default:
d0396b79 13229 gcc_unreachable ();
7b52eede
JH
13230 }
13231 }
13232 return 1; /* DONE */
13233}
13234
13235
2450a057
JH
13236/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13237 works for floating pointer parameters and nonoffsetable memories.
13238 For pushes, it returns just stack offsets; the values will be saved
13239 in the right order. Maximally three parts are generated. */
13240
2b589241 13241static int
b96a374d 13242ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 13243{
26e5b205
JH
13244 int size;
13245
13246 if (!TARGET_64BIT)
f8a1ebc6 13247 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
13248 else
13249 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 13250
7656aee4 13251 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
d0396b79 13252 gcc_assert (size >= 2 && size <= 3);
2450a057 13253
f996902d
RH
13254 /* Optimize constant pool reference to immediates. This is used by fp
13255 moves, that force all constants to memory to allow combining. */
7656aee4 13256 if (MEM_P (operand) && MEM_READONLY_P (operand))
f996902d
RH
13257 {
13258 rtx tmp = maybe_get_pool_constant (operand);
13259 if (tmp)
13260 operand = tmp;
13261 }
d7a29404 13262
7656aee4 13263 if (MEM_P (operand) && !offsettable_memref_p (operand))
e075ae69 13264 {
2450a057 13265 /* The only non-offsetable memories we handle are pushes. */
d0396b79 13266 int ok = push_operand (operand, VOIDmode);
5656a184 13267
d0396b79 13268 gcc_assert (ok);
5656a184 13269
26e5b205
JH
13270 operand = copy_rtx (operand);
13271 PUT_MODE (operand, Pmode);
2450a057 13272 parts[0] = parts[1] = parts[2] = operand;
b4e82619 13273 return size;
2450a057 13274 }
b4e82619
RH
13275
13276 if (GET_CODE (operand) == CONST_VECTOR)
13277 {
13278 enum machine_mode imode = int_mode_for_mode (mode);
bd08db74
RH
13279 /* Caution: if we looked through a constant pool memory above,
13280 the operand may actually have a different mode now. That's
13281 ok, since we want to pun this all the way back to an integer. */
13282 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
b4e82619
RH
13283 gcc_assert (operand != NULL);
13284 mode = imode;
13285 }
13286
13287 if (!TARGET_64BIT)
2450a057
JH
13288 {
13289 if (mode == DImode)
13290 split_di (&operand, 1, &parts[0], &parts[1]);
13291 else
e075ae69 13292 {
2450a057
JH
13293 if (REG_P (operand))
13294 {
d0396b79 13295 gcc_assert (reload_completed);
2450a057
JH
13296 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13297 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13298 if (size == 3)
13299 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13300 }
13301 else if (offsettable_memref_p (operand))
13302 {
f4ef873c 13303 operand = adjust_address (operand, SImode, 0);
2450a057 13304 parts[0] = operand;
b72f00af 13305 parts[1] = adjust_address (operand, SImode, 4);
2450a057 13306 if (size == 3)
b72f00af 13307 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
13308 }
13309 else if (GET_CODE (operand) == CONST_DOUBLE)
13310 {
13311 REAL_VALUE_TYPE r;
2b589241 13312 long l[4];
2450a057
JH
13313
13314 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13315 switch (mode)
13316 {
13317 case XFmode:
13318 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 13319 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
13320 break;
13321 case DFmode:
13322 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13323 break;
13324 default:
d0396b79 13325 gcc_unreachable ();
2450a057 13326 }
d8bf17f9
LB
13327 parts[1] = gen_int_mode (l[1], SImode);
13328 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
13329 }
13330 else
d0396b79 13331 gcc_unreachable ();
e075ae69 13332 }
2450a057 13333 }
26e5b205
JH
13334 else
13335 {
44cf5b6a
JH
13336 if (mode == TImode)
13337 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
13338 if (mode == XFmode || mode == TFmode)
13339 {
f8a1ebc6 13340 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
13341 if (REG_P (operand))
13342 {
d0396b79 13343 gcc_assert (reload_completed);
26e5b205 13344 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 13345 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
13346 }
13347 else if (offsettable_memref_p (operand))
13348 {
b72f00af 13349 operand = adjust_address (operand, DImode, 0);
26e5b205 13350 parts[0] = operand;
f8a1ebc6 13351 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
13352 }
13353 else if (GET_CODE (operand) == CONST_DOUBLE)
13354 {
13355 REAL_VALUE_TYPE r;
38606553 13356 long l[4];
26e5b205
JH
13357
13358 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 13359 real_to_target (l, &r, mode);
38606553 13360
26e5b205
JH
13361 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13362 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 13363 parts[0]
d8bf17f9 13364 = gen_int_mode
44cf5b6a 13365 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 13366 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 13367 DImode);
26e5b205
JH
13368 else
13369 parts[0] = immed_double_const (l[0], l[1], DImode);
38606553 13370
f8a1ebc6
JH
13371 if (upper_mode == SImode)
13372 parts[1] = gen_int_mode (l[2], SImode);
13373 else if (HOST_BITS_PER_WIDE_INT >= 64)
13374 parts[1]
13375 = gen_int_mode
13376 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13377 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13378 DImode);
13379 else
13380 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
13381 }
13382 else
d0396b79 13383 gcc_unreachable ();
26e5b205
JH
13384 }
13385 }
2450a057 13386
2b589241 13387 return size;
2450a057
JH
13388}
13389
13390/* Emit insns to perform a move or push of DI, DF, and XF values.
13391 Return false when normal moves are needed; true when all required
13392 insns have been emitted. Operands 2-4 contain the input values
13393 int the correct order; operands 5-7 contain the output values. */
13394
26e5b205 13395void
b96a374d 13396ix86_split_long_move (rtx operands[])
2450a057
JH
13397{
13398 rtx part[2][3];
26e5b205 13399 int nparts;
2450a057
JH
13400 int push = 0;
13401 int collisions = 0;
26e5b205
JH
13402 enum machine_mode mode = GET_MODE (operands[0]);
13403
13404 /* The DFmode expanders may ask us to move double.
13405 For 64bit target this is single move. By hiding the fact
13406 here we simplify i386.md splitters. */
13407 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13408 {
8cdfa312
RH
13409 /* Optimize constant pool reference to immediates. This is used by
13410 fp moves, that force all constants to memory to allow combining. */
26e5b205 13411
7656aee4 13412 if (MEM_P (operands[1])
26e5b205
JH
13413 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13414 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13415 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13416 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
13417 {
13418 operands[0] = copy_rtx (operands[0]);
13419 PUT_MODE (operands[0], Pmode);
13420 }
26e5b205
JH
13421 else
13422 operands[0] = gen_lowpart (DImode, operands[0]);
13423 operands[1] = gen_lowpart (DImode, operands[1]);
13424 emit_move_insn (operands[0], operands[1]);
13425 return;
13426 }
2450a057 13427
2450a057
JH
13428 /* The only non-offsettable memory we handle is push. */
13429 if (push_operand (operands[0], VOIDmode))
13430 push = 1;
d0396b79 13431 else
7656aee4 13432 gcc_assert (!MEM_P (operands[0])
d0396b79 13433 || offsettable_memref_p (operands[0]));
2450a057 13434
26e5b205
JH
13435 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13436 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
13437
13438 /* When emitting push, take care for source operands on the stack. */
7656aee4 13439 if (push && MEM_P (operands[1])
2450a057
JH
13440 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13441 {
26e5b205 13442 if (nparts == 3)
886cbb88
JH
13443 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13444 XEXP (part[1][2], 0));
13445 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13446 XEXP (part[1][1], 0));
2450a057
JH
13447 }
13448
0f290768 13449 /* We need to do copy in the right order in case an address register
2450a057 13450 of the source overlaps the destination. */
7656aee4 13451 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
2450a057
JH
13452 {
13453 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13454 collisions++;
13455 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13456 collisions++;
26e5b205 13457 if (nparts == 3
2450a057
JH
13458 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13459 collisions++;
13460
13461 /* Collision in the middle part can be handled by reordering. */
26e5b205 13462 if (collisions == 1 && nparts == 3
2450a057 13463 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 13464 {
2450a057
JH
13465 rtx tmp;
13466 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13467 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13468 }
e075ae69 13469
2450a057
JH
13470 /* If there are more collisions, we can't handle it by reordering.
13471 Do an lea to the last part and use only one colliding move. */
13472 else if (collisions > 1)
13473 {
8231b3f9
RH
13474 rtx base;
13475
2450a057 13476 collisions = 1;
8231b3f9
RH
13477
13478 base = part[0][nparts - 1];
13479
13480 /* Handle the case when the last part isn't valid for lea.
13481 Happens in 64-bit mode storing the 12-byte XFmode. */
13482 if (GET_MODE (base) != Pmode)
13483 base = gen_rtx_REG (Pmode, REGNO (base));
13484
13485 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13486 part[1][0] = replace_equiv_address (part[1][0], base);
13487 part[1][1] = replace_equiv_address (part[1][1],
13488 plus_constant (base, UNITS_PER_WORD));
26e5b205 13489 if (nparts == 3)
8231b3f9
RH
13490 part[1][2] = replace_equiv_address (part[1][2],
13491 plus_constant (base, 8));
2450a057
JH
13492 }
13493 }
13494
13495 if (push)
13496 {
26e5b205 13497 if (!TARGET_64BIT)
2b589241 13498 {
26e5b205
JH
13499 if (nparts == 3)
13500 {
f8a1ebc6
JH
13501 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13502 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
13503 emit_move_insn (part[0][2], part[1][2]);
13504 }
2b589241 13505 }
26e5b205
JH
13506 else
13507 {
13508 /* In 64bit mode we don't have 32bit push available. In case this is
13509 register, it is OK - we will just use larger counterpart. We also
13510 retype memory - these comes from attempt to avoid REX prefix on
13511 moving of second half of TFmode value. */
13512 if (GET_MODE (part[1][1]) == SImode)
13513 {
d0396b79
NS
13514 switch (GET_CODE (part[1][1]))
13515 {
13516 case MEM:
13517 part[1][1] = adjust_address (part[1][1], DImode, 0);
13518 break;
13519
13520 case REG:
13521 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13522 break;
13523
13524 default:
13525 gcc_unreachable ();
13526 }
5656a184 13527
886cbb88
JH
13528 if (GET_MODE (part[1][0]) == SImode)
13529 part[1][0] = part[1][1];
26e5b205
JH
13530 }
13531 }
13532 emit_move_insn (part[0][1], part[1][1]);
13533 emit_move_insn (part[0][0], part[1][0]);
13534 return;
2450a057
JH
13535 }
13536
13537 /* Choose correct order to not overwrite the source before it is copied. */
13538 if ((REG_P (part[0][0])
13539 && REG_P (part[1][1])
13540 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 13541 || (nparts == 3
2450a057
JH
13542 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13543 || (collisions > 0
13544 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13545 {
26e5b205 13546 if (nparts == 3)
2450a057 13547 {
26e5b205
JH
13548 operands[2] = part[0][2];
13549 operands[3] = part[0][1];
13550 operands[4] = part[0][0];
13551 operands[5] = part[1][2];
13552 operands[6] = part[1][1];
13553 operands[7] = part[1][0];
2450a057
JH
13554 }
13555 else
13556 {
26e5b205
JH
13557 operands[2] = part[0][1];
13558 operands[3] = part[0][0];
13559 operands[5] = part[1][1];
13560 operands[6] = part[1][0];
2450a057
JH
13561 }
13562 }
13563 else
13564 {
26e5b205 13565 if (nparts == 3)
2450a057 13566 {
26e5b205
JH
13567 operands[2] = part[0][0];
13568 operands[3] = part[0][1];
13569 operands[4] = part[0][2];
13570 operands[5] = part[1][0];
13571 operands[6] = part[1][1];
13572 operands[7] = part[1][2];
2450a057
JH
13573 }
13574 else
13575 {
26e5b205
JH
13576 operands[2] = part[0][0];
13577 operands[3] = part[0][1];
13578 operands[5] = part[1][0];
13579 operands[6] = part[1][1];
e075ae69
RH
13580 }
13581 }
903a5059 13582
0e40b5f2 13583 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
903a5059
RS
13584 if (optimize_size)
13585 {
7656aee4 13586 if (CONST_INT_P (operands[5])
903a5059
RS
13587 && operands[5] != const0_rtx
13588 && REG_P (operands[2]))
13589 {
7656aee4 13590 if (CONST_INT_P (operands[6])
903a5059
RS
13591 && INTVAL (operands[6]) == INTVAL (operands[5]))
13592 operands[6] = operands[2];
13593
13594 if (nparts == 3
7656aee4 13595 && CONST_INT_P (operands[7])
903a5059
RS
13596 && INTVAL (operands[7]) == INTVAL (operands[5]))
13597 operands[7] = operands[2];
13598 }
13599
13600 if (nparts == 3
7656aee4 13601 && CONST_INT_P (operands[6])
903a5059
RS
13602 && operands[6] != const0_rtx
13603 && REG_P (operands[3])
7656aee4 13604 && CONST_INT_P (operands[7])
903a5059
RS
13605 && INTVAL (operands[7]) == INTVAL (operands[6]))
13606 operands[7] = operands[3];
13607 }
13608
26e5b205
JH
13609 emit_move_insn (operands[2], operands[5]);
13610 emit_move_insn (operands[3], operands[6]);
13611 if (nparts == 3)
13612 emit_move_insn (operands[4], operands[7]);
32b5b1aa 13613
26e5b205 13614 return;
32b5b1aa 13615}
32b5b1aa 13616
28356f52 13617/* Helper function of ix86_split_ashl used to generate an SImode/DImode
1b83d209
RS
13618 left shift by a constant, either using a single shift or
13619 a sequence of add instructions. */
13620
13621static void
28356f52 13622ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
1b83d209
RS
13623{
13624 if (count == 1)
28356f52
JB
13625 {
13626 emit_insn ((mode == DImode
13627 ? gen_addsi3
13628 : gen_adddi3) (operand, operand, operand));
13629 }
1b83d209
RS
13630 else if (!optimize_size
13631 && count * ix86_cost->add <= ix86_cost->shift_const)
13632 {
13633 int i;
13634 for (i=0; i<count; i++)
28356f52
JB
13635 {
13636 emit_insn ((mode == DImode
13637 ? gen_addsi3
13638 : gen_adddi3) (operand, operand, operand));
13639 }
1b83d209
RS
13640 }
13641 else
28356f52
JB
13642 emit_insn ((mode == DImode
13643 ? gen_ashlsi3
13644 : gen_ashldi3) (operand, operand, GEN_INT (count)));
1b83d209
RS
13645}
13646
e075ae69 13647void
28356f52 13648ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 13649{
e075ae69
RH
13650 rtx low[2], high[2];
13651 int count;
28356f52 13652 const int single_width = mode == DImode ? 32 : 64;
b985a30f 13653
7656aee4 13654 if (CONST_INT_P (operands[2]))
e075ae69 13655 {
28356f52
JB
13656 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13657 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 13658
28356f52 13659 if (count >= single_width)
e075ae69
RH
13660 {
13661 emit_move_insn (high[0], low[1]);
13662 emit_move_insn (low[0], const0_rtx);
b985a30f 13663
28356f52
JB
13664 if (count > single_width)
13665 ix86_expand_ashl_const (high[0], count - single_width, mode);
e075ae69
RH
13666 }
13667 else
13668 {
13669 if (!rtx_equal_p (operands[0], operands[1]))
13670 emit_move_insn (operands[0], operands[1]);
28356f52
JB
13671 emit_insn ((mode == DImode
13672 ? gen_x86_shld_1
13673 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13674 ix86_expand_ashl_const (low[0], count, mode);
e075ae69 13675 }
93330ea1 13676 return;
e075ae69 13677 }
93330ea1 13678
28356f52 13679 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
93330ea1
RH
13680
13681 if (operands[1] == const1_rtx)
e075ae69 13682 {
28356f52
JB
13683 /* Assuming we've chosen a QImode capable registers, then 1 << N
13684 can be done with two 32/64-bit shifts, no branches, no cmoves. */
93330ea1
RH
13685 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13686 {
13687 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
b985a30f 13688
93330ea1
RH
13689 ix86_expand_clear (low[0]);
13690 ix86_expand_clear (high[0]);
28356f52 13691 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
5656a184 13692
93330ea1
RH
13693 d = gen_lowpart (QImode, low[0]);
13694 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13695 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13696 emit_insn (gen_rtx_SET (VOIDmode, d, s));
b985a30f 13697
93330ea1
RH
13698 d = gen_lowpart (QImode, high[0]);
13699 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13700 s = gen_rtx_NE (QImode, flags, const0_rtx);
13701 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13702 }
32b5b1aa 13703
93330ea1 13704 /* Otherwise, we can get the same results by manually performing
28356f52 13705 a bit extract operation on bit 5/6, and then performing the two
93330ea1
RH
13706 shifts. The two methods of getting 0/1 into low/high are exactly
13707 the same size. Avoiding the shift in the bit extract case helps
13708 pentium4 a bit; no one else seems to care much either way. */
13709 else
e075ae69 13710 {
93330ea1
RH
13711 rtx x;
13712
13713 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
28356f52 13714 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
e075ae69 13715 else
28356f52 13716 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
93330ea1 13717 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
e075ae69 13718
28356f52
JB
13719 emit_insn ((mode == DImode
13720 ? gen_lshrsi3
13721 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13722 emit_insn ((mode == DImode
13723 ? gen_andsi3
13724 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
93330ea1 13725 emit_move_insn (low[0], high[0]);
28356f52
JB
13726 emit_insn ((mode == DImode
13727 ? gen_xorsi3
13728 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
e075ae69 13729 }
93330ea1 13730
28356f52
JB
13731 emit_insn ((mode == DImode
13732 ? gen_ashlsi3
13733 : gen_ashldi3) (low[0], low[0], operands[2]));
13734 emit_insn ((mode == DImode
13735 ? gen_ashlsi3
13736 : gen_ashldi3) (high[0], high[0], operands[2]));
93330ea1
RH
13737 return;
13738 }
13739
13740 if (operands[1] == constm1_rtx)
13741 {
28356f52
JB
13742 /* For -1 << N, we can avoid the shld instruction, because we
13743 know that we're shifting 0...31/63 ones into a -1. */
93330ea1
RH
13744 emit_move_insn (low[0], constm1_rtx);
13745 if (optimize_size)
28356f52 13746 emit_move_insn (high[0], low[0]);
e075ae69 13747 else
93330ea1 13748 emit_move_insn (high[0], constm1_rtx);
e075ae69 13749 }
93330ea1
RH
13750 else
13751 {
13752 if (!rtx_equal_p (operands[0], operands[1]))
13753 emit_move_insn (operands[0], operands[1]);
13754
28356f52
JB
13755 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13756 emit_insn ((mode == DImode
13757 ? gen_x86_shld_1
13758 : gen_x86_64_shld) (high[0], low[0], operands[2]));
93330ea1
RH
13759 }
13760
28356f52 13761 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
93330ea1
RH
13762
13763 if (TARGET_CMOVE && scratch)
13764 {
13765 ix86_expand_clear (scratch);
28356f52
JB
13766 emit_insn ((mode == DImode
13767 ? gen_x86_shift_adj_1
13768 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
93330ea1
RH
13769 }
13770 else
13771 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
e9a25f70 13772}
32b5b1aa 13773
e075ae69 13774void
28356f52 13775ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 13776{
e075ae69
RH
13777 rtx low[2], high[2];
13778 int count;
28356f52 13779 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 13780
7656aee4 13781 if (CONST_INT_P (operands[2]))
e075ae69 13782 {
28356f52
JB
13783 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13784 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 13785
28356f52 13786 if (count == single_width * 2 - 1)
8937b6a2
RS
13787 {
13788 emit_move_insn (high[0], high[1]);
28356f52
JB
13789 emit_insn ((mode == DImode
13790 ? gen_ashrsi3
13791 : gen_ashrdi3) (high[0], high[0],
13792 GEN_INT (single_width - 1)));
8937b6a2
RS
13793 emit_move_insn (low[0], high[0]);
13794
13795 }
28356f52 13796 else if (count >= single_width)
e075ae69
RH
13797 {
13798 emit_move_insn (low[0], high[1]);
93330ea1 13799 emit_move_insn (high[0], low[0]);
28356f52
JB
13800 emit_insn ((mode == DImode
13801 ? gen_ashrsi3
13802 : gen_ashrdi3) (high[0], high[0],
13803 GEN_INT (single_width - 1)));
13804 if (count > single_width)
13805 emit_insn ((mode == DImode
13806 ? gen_ashrsi3
13807 : gen_ashrdi3) (low[0], low[0],
13808 GEN_INT (count - single_width)));
e075ae69
RH
13809 }
13810 else
13811 {
13812 if (!rtx_equal_p (operands[0], operands[1]))
13813 emit_move_insn (operands[0], operands[1]);
28356f52
JB
13814 emit_insn ((mode == DImode
13815 ? gen_x86_shrd_1
13816 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13817 emit_insn ((mode == DImode
13818 ? gen_ashrsi3
13819 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69
RH
13820 }
13821 }
13822 else
32b5b1aa 13823 {
e075ae69
RH
13824 if (!rtx_equal_p (operands[0], operands[1]))
13825 emit_move_insn (operands[0], operands[1]);
13826
28356f52 13827 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 13828
28356f52
JB
13829 emit_insn ((mode == DImode
13830 ? gen_x86_shrd_1
13831 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13832 emit_insn ((mode == DImode
13833 ? gen_ashrsi3
13834 : gen_ashrdi3) (high[0], high[0], operands[2]));
e075ae69 13835
93330ea1 13836 if (TARGET_CMOVE && scratch)
e075ae69 13837 {
e075ae69 13838 emit_move_insn (scratch, high[0]);
28356f52
JB
13839 emit_insn ((mode == DImode
13840 ? gen_ashrsi3
13841 : gen_ashrdi3) (scratch, scratch,
13842 GEN_INT (single_width - 1)));
13843 emit_insn ((mode == DImode
13844 ? gen_x86_shift_adj_1
13845 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13846 scratch));
e075ae69
RH
13847 }
13848 else
13849 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 13850 }
e075ae69 13851}
32b5b1aa 13852
e075ae69 13853void
28356f52 13854ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
e075ae69
RH
13855{
13856 rtx low[2], high[2];
13857 int count;
28356f52 13858 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 13859
7656aee4 13860 if (CONST_INT_P (operands[2]))
32b5b1aa 13861 {
28356f52
JB
13862 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13863 count = INTVAL (operands[2]) & (single_width * 2 - 1);
e075ae69 13864
28356f52 13865 if (count >= single_width)
c7271385 13866 {
e075ae69 13867 emit_move_insn (low[0], high[1]);
93330ea1 13868 ix86_expand_clear (high[0]);
32b5b1aa 13869
28356f52
JB
13870 if (count > single_width)
13871 emit_insn ((mode == DImode
13872 ? gen_lshrsi3
13873 : gen_lshrdi3) (low[0], low[0],
13874 GEN_INT (count - single_width)));
e075ae69
RH
13875 }
13876 else
13877 {
13878 if (!rtx_equal_p (operands[0], operands[1]))
13879 emit_move_insn (operands[0], operands[1]);
28356f52
JB
13880 emit_insn ((mode == DImode
13881 ? gen_x86_shrd_1
13882 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13883 emit_insn ((mode == DImode
13884 ? gen_lshrsi3
13885 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69 13886 }
32b5b1aa 13887 }
e075ae69
RH
13888 else
13889 {
13890 if (!rtx_equal_p (operands[0], operands[1]))
13891 emit_move_insn (operands[0], operands[1]);
32b5b1aa 13892
28356f52 13893 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 13894
28356f52
JB
13895 emit_insn ((mode == DImode
13896 ? gen_x86_shrd_1
13897 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13898 emit_insn ((mode == DImode
13899 ? gen_lshrsi3
13900 : gen_lshrdi3) (high[0], high[0], operands[2]));
e075ae69
RH
13901
13902 /* Heh. By reversing the arguments, we can reuse this pattern. */
93330ea1 13903 if (TARGET_CMOVE && scratch)
e075ae69 13904 {
93330ea1 13905 ix86_expand_clear (scratch);
28356f52
JB
13906 emit_insn ((mode == DImode
13907 ? gen_x86_shift_adj_1
13908 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13909 scratch));
e075ae69
RH
13910 }
13911 else
13912 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13913 }
32b5b1aa 13914}
3f803cd9 13915
8c996513
JH
13916/* Predict just emitted jump instruction to be taken with probability PROB. */
13917static void
13918predict_jump (int prob)
13919{
13920 rtx insn = get_last_insn ();
7656aee4 13921 gcc_assert (JUMP_P (insn));
8c996513
JH
13922 REG_NOTES (insn)
13923 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13924 GEN_INT (prob),
13925 REG_NOTES (insn));
13926}
13927
0407c02b 13928/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
13929 it is aligned to VALUE bytes. If true, jump to the label. */
13930static rtx
8c996513 13931ix86_expand_aligntest (rtx variable, int value, bool epilogue)
0945b39d
JH
13932{
13933 rtx label = gen_label_rtx ();
13934 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13935 if (GET_MODE (variable) == DImode)
13936 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13937 else
13938 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13939 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 13940 1, label);
8c996513
JH
13941 if (epilogue)
13942 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13943 else
13944 predict_jump (REG_BR_PROB_BASE * 90 / 100);
0945b39d
JH
13945 return label;
13946}
13947
13948/* Adjust COUNTER by the VALUE. */
13949static void
b96a374d 13950ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
13951{
13952 if (GET_MODE (countreg) == DImode)
13953 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13954 else
13955 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13956}
13957
13958/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 13959rtx
b96a374d 13960ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
13961{
13962 rtx r;
13963 if (GET_MODE (exp) == VOIDmode)
13964 return force_reg (Pmode, exp);
13965 if (GET_MODE (exp) == Pmode)
13966 return copy_to_mode_reg (Pmode, exp);
13967 r = gen_reg_rtx (Pmode);
13968 emit_insn (gen_zero_extendsidi2 (r, exp));
13969 return r;
13970}
13971
8c996513
JH
13972/* Divide COUNTREG by SCALE. */
13973static rtx
13974scale_counter (rtx countreg, int scale)
0945b39d 13975{
8c996513
JH
13976 rtx sc;
13977 rtx piece_size_mask;
0945b39d 13978
8c996513
JH
13979 if (scale == 1)
13980 return countreg;
7656aee4 13981 if (CONST_INT_P (countreg))
8c996513
JH
13982 return GEN_INT (INTVAL (countreg) / scale);
13983 gcc_assert (REG_P (countreg));
0945b39d 13984
8c996513
JH
13985 piece_size_mask = GEN_INT (scale - 1);
13986 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13987 GEN_INT (exact_log2 (scale)),
13988 NULL, 1, OPTAB_DIRECT);
13989 return sc;
13990}
d0a5295a 13991
7fa7289d
KH
13992/* Return mode for the memcpy/memset loop counter. Prefer SImode over
13993 DImode for constant loop counts. */
bd8d4d19
JH
13994
13995static enum machine_mode
13996counter_mode (rtx count_exp)
13997{
13998 if (GET_MODE (count_exp) != VOIDmode)
13999 return GET_MODE (count_exp);
14000 if (GET_CODE (count_exp) != CONST_INT)
14001 return Pmode;
14002 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14003 return DImode;
14004 return SImode;
14005}
14006
8c996513
JH
14007/* When SRCPTR is non-NULL, output simple loop to move memory
14008 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14009 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14010 equivalent loop to set memory by VALUE (supposed to be in MODE).
0945b39d 14011
8c996513
JH
14012 The size is rounded down to whole number of chunk size moved at once.
14013 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
54a88090 14014
8c996513
JH
14015
14016static void
14017expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14018 rtx destptr, rtx srcptr, rtx value,
14019 rtx count, enum machine_mode mode, int unroll,
14020 int expected_size)
14021{
14022 rtx out_label, top_label, iter, tmp;
bd8d4d19 14023 enum machine_mode iter_mode = counter_mode (count);
8c996513
JH
14024 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14025 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14026 rtx size;
14027 rtx x_addr;
14028 rtx y_addr;
14029 int i;
14030
8c996513
JH
14031 top_label = gen_label_rtx ();
14032 out_label = gen_label_rtx ();
14033 iter = gen_reg_rtx (iter_mode);
14034
14035 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14036 NULL, 1, OPTAB_DIRECT);
14037 /* Those two should combine. */
14038 if (piece_size == const1_rtx)
26771da7 14039 {
8c996513
JH
14040 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14041 true, out_label);
14042 predict_jump (REG_BR_PROB_BASE * 10 / 100);
26771da7 14043 }
8c996513 14044 emit_move_insn (iter, const0_rtx);
0945b39d 14045
8c996513 14046 emit_label (top_label);
0945b39d 14047
8c996513
JH
14048 tmp = convert_modes (Pmode, iter_mode, iter, true);
14049 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14050 destmem = change_address (destmem, mode, x_addr);
0945b39d 14051
8c996513
JH
14052 if (srcmem)
14053 {
14054 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14055 srcmem = change_address (srcmem, mode, y_addr);
4e44c1ef 14056
8c996513 14057 /* When unrolling for chips that reorder memory reads and writes,
54a88090 14058 we can save registers by using single temporary.
8c996513
JH
14059 Also using 4 temporaries is overkill in 32bit mode. */
14060 if (!TARGET_64BIT && 0)
14061 {
14062 for (i = 0; i < unroll; i++)
14063 {
14064 if (i)
14065 {
14066 destmem =
14067 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14068 srcmem =
14069 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14070 }
14071 emit_move_insn (destmem, srcmem);
14072 }
14073 }
14074 else
14075 {
14076 rtx tmpreg[4];
14077 gcc_assert (unroll <= 4);
14078 for (i = 0; i < unroll; i++)
14079 {
14080 tmpreg[i] = gen_reg_rtx (mode);
14081 if (i)
95935e2d 14082 {
8c996513
JH
14083 srcmem =
14084 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
95935e2d 14085 }
8c996513
JH
14086 emit_move_insn (tmpreg[i], srcmem);
14087 }
14088 for (i = 0; i < unroll; i++)
14089 {
14090 if (i)
14091 {
14092 destmem =
14093 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14094 }
14095 emit_move_insn (destmem, tmpreg[i]);
14096 }
14097 }
14098 }
14099 else
14100 for (i = 0; i < unroll; i++)
14101 {
14102 if (i)
14103 destmem =
14104 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14105 emit_move_insn (destmem, value);
14106 }
14107
14108 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14109 true, OPTAB_LIB_WIDEN);
14110 if (tmp != iter)
14111 emit_move_insn (iter, tmp);
14112
14113 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14114 true, top_label);
14115 if (expected_size != -1)
14116 {
14117 expected_size /= GET_MODE_SIZE (mode) * unroll;
14118 if (expected_size == 0)
14119 predict_jump (0);
14120 else if (expected_size > REG_BR_PROB_BASE)
14121 predict_jump (REG_BR_PROB_BASE - 1);
14122 else
14123 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14124 }
14125 else
14126 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14127 iter = ix86_zero_extend_to_Pmode (iter);
14128 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14129 true, OPTAB_LIB_WIDEN);
14130 if (tmp != destptr)
14131 emit_move_insn (destptr, tmp);
14132 if (srcptr)
14133 {
14134 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14135 true, OPTAB_LIB_WIDEN);
14136 if (tmp != srcptr)
14137 emit_move_insn (srcptr, tmp);
14138 }
14139 emit_label (out_label);
14140}
14141
54a88090 14142/* Output "rep; mov" instruction.
8c996513
JH
14143 Arguments have same meaning as for previous function */
14144static void
14145expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14146 rtx destptr, rtx srcptr,
14147 rtx count,
14148 enum machine_mode mode)
14149{
14150 rtx destexp;
14151 rtx srcexp;
14152 rtx countreg;
14153
14154 /* If the size is known, it is shorter to use rep movs. */
7656aee4 14155 if (mode == QImode && CONST_INT_P (count)
8c996513
JH
14156 && !(INTVAL (count) & 3))
14157 mode = SImode;
14158
14159 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14160 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14161 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14162 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14163 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14164 if (mode != QImode)
14165 {
14166 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14167 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14168 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14169 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14170 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14171 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14172 }
14173 else
14174 {
14175 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14176 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14177 }
14178 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14179 destexp, srcexp));
14180}
14181
54a88090 14182/* Output "rep; stos" instruction.
8c996513
JH
14183 Arguments have same meaning as for previous function */
14184static void
14185expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14186 rtx count,
14187 enum machine_mode mode)
14188{
14189 rtx destexp;
14190 rtx countreg;
14191
14192 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14193 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14194 value = force_reg (mode, gen_lowpart (mode, value));
14195 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14196 if (mode != QImode)
14197 {
14198 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14199 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14200 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14201 }
14202 else
14203 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14204 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14205}
14206
14207static void
14208emit_strmov (rtx destmem, rtx srcmem,
14209 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14210{
14211 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14212 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14213 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14214}
14215
14216/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14217static void
14218expand_movmem_epilogue (rtx destmem, rtx srcmem,
14219 rtx destptr, rtx srcptr, rtx count, int max_size)
14220{
14221 rtx src, dest;
7656aee4 14222 if (CONST_INT_P (count))
8c996513
JH
14223 {
14224 HOST_WIDE_INT countval = INTVAL (count);
14225 int offset = 0;
14226
73013054 14227 if ((countval & 0x10) && max_size > 16)
8c996513
JH
14228 {
14229 if (TARGET_64BIT)
14230 {
14231 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14232 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
95935e2d 14233 }
8c996513
JH
14234 else
14235 gcc_unreachable ();
14236 offset += 16;
14237 }
14238 if ((countval & 0x08) && max_size > 8)
14239 {
14240 if (TARGET_64BIT)
14241 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
95935e2d
DV
14242 else
14243 {
bd8d4d19
JH
14244 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14245 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
95935e2d 14246 }
8c996513 14247 offset += 8;
0945b39d 14248 }
8c996513 14249 if ((countval & 0x04) && max_size > 4)
4e44c1ef 14250 {
8c996513 14251 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
4e44c1ef
JJ
14252 offset += 4;
14253 }
8c996513 14254 if ((countval & 0x02) && max_size > 2)
4e44c1ef 14255 {
8c996513 14256 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
4e44c1ef
JJ
14257 offset += 2;
14258 }
8c996513 14259 if ((countval & 0x01) && max_size > 1)
4e44c1ef 14260 {
8c996513
JH
14261 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14262 offset += 1;
4e44c1ef 14263 }
8c996513 14264 return;
0945b39d 14265 }
8c996513 14266 if (max_size > 8)
0945b39d 14267 {
8c996513
JH
14268 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14269 count, 1, OPTAB_DIRECT);
14270 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14271 count, QImode, 1, 4);
14272 return;
14273 }
0945b39d 14274
8c996513
JH
14275 /* When there are stringops, we can cheaply increase dest and src pointers.
14276 Otherwise we save code size by maintaining offset (zero is readily
2f8e468b 14277 available from preceding rep operation) and using x86 addressing modes.
8c996513
JH
14278 */
14279 if (TARGET_SINGLE_STRINGOP)
14280 {
14281 if (max_size > 4)
0945b39d 14282 {
8c996513
JH
14283 rtx label = ix86_expand_aligntest (count, 4, true);
14284 src = change_address (srcmem, SImode, srcptr);
14285 dest = change_address (destmem, SImode, destptr);
14286 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14287 emit_label (label);
14288 LABEL_NUSES (label) = 1;
0945b39d 14289 }
8c996513 14290 if (max_size > 2)
0945b39d 14291 {
8c996513
JH
14292 rtx label = ix86_expand_aligntest (count, 2, true);
14293 src = change_address (srcmem, HImode, srcptr);
14294 dest = change_address (destmem, HImode, destptr);
14295 emit_insn (gen_strmov (destptr, dest, srcptr, src));
0945b39d
JH
14296 emit_label (label);
14297 LABEL_NUSES (label) = 1;
14298 }
8c996513 14299 if (max_size > 1)
0945b39d 14300 {
8c996513
JH
14301 rtx label = ix86_expand_aligntest (count, 1, true);
14302 src = change_address (srcmem, QImode, srcptr);
14303 dest = change_address (destmem, QImode, destptr);
14304 emit_insn (gen_strmov (destptr, dest, srcptr, src));
0945b39d
JH
14305 emit_label (label);
14306 LABEL_NUSES (label) = 1;
14307 }
8c996513
JH
14308 }
14309 else
14310 {
14311 rtx offset = force_reg (Pmode, const0_rtx);
14312 rtx tmp;
14313
14314 if (max_size > 4)
0945b39d 14315 {
8c996513
JH
14316 rtx label = ix86_expand_aligntest (count, 4, true);
14317 src = change_address (srcmem, SImode, srcptr);
14318 dest = change_address (destmem, SImode, destptr);
14319 emit_move_insn (dest, src);
14320 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14321 true, OPTAB_LIB_WIDEN);
14322 if (tmp != offset)
14323 emit_move_insn (offset, tmp);
0945b39d
JH
14324 emit_label (label);
14325 LABEL_NUSES (label) = 1;
14326 }
8c996513
JH
14327 if (max_size > 2)
14328 {
14329 rtx label = ix86_expand_aligntest (count, 2, true);
14330 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14331 src = change_address (srcmem, HImode, tmp);
14332 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14333 dest = change_address (destmem, HImode, tmp);
14334 emit_move_insn (dest, src);
14335 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14336 true, OPTAB_LIB_WIDEN);
14337 if (tmp != offset)
14338 emit_move_insn (offset, tmp);
14339 emit_label (label);
14340 LABEL_NUSES (label) = 1;
14341 }
14342 if (max_size > 1)
37ad04a5 14343 {
8c996513
JH
14344 rtx label = ix86_expand_aligntest (count, 1, true);
14345 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14346 src = change_address (srcmem, QImode, tmp);
14347 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14348 dest = change_address (destmem, QImode, tmp);
14349 emit_move_insn (dest, src);
37ad04a5
JH
14350 emit_label (label);
14351 LABEL_NUSES (label) = 1;
37ad04a5 14352 }
8c996513
JH
14353 }
14354}
14355
14356/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14357static void
14358expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14359 rtx count, int max_size)
14360{
14361 count =
bd8d4d19
JH
14362 expand_simple_binop (counter_mode (count), AND, count,
14363 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
8c996513
JH
14364 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14365 gen_lowpart (QImode, value), count, QImode,
14366 1, max_size / 2);
14367}
14368
14369/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14370static void
14371expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14372{
14373 rtx dest;
2a4f771a 14374
7656aee4 14375 if (CONST_INT_P (count))
8c996513
JH
14376 {
14377 HOST_WIDE_INT countval = INTVAL (count);
14378 int offset = 0;
14379
73013054 14380 if ((countval & 0x10) && max_size > 16)
0945b39d 14381 {
8c996513
JH
14382 if (TARGET_64BIT)
14383 {
14384 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14385 emit_insn (gen_strset (destptr, dest, value));
14386 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14387 emit_insn (gen_strset (destptr, dest, value));
14388 }
14389 else
14390 gcc_unreachable ();
14391 offset += 16;
0945b39d 14392 }
8c996513 14393 if ((countval & 0x08) && max_size > 8)
0945b39d 14394 {
8c996513
JH
14395 if (TARGET_64BIT)
14396 {
14397 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14398 emit_insn (gen_strset (destptr, dest, value));
14399 }
14400 else
14401 {
14402 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14403 emit_insn (gen_strset (destptr, dest, value));
14404 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14405 emit_insn (gen_strset (destptr, dest, value));
14406 }
14407 offset += 8;
0945b39d 14408 }
8c996513 14409 if ((countval & 0x04) && max_size > 4)
0945b39d 14410 {
8c996513
JH
14411 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14412 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14413 offset += 4;
0945b39d 14414 }
8c996513 14415 if ((countval & 0x02) && max_size > 2)
4e44c1ef 14416 {
8c996513
JH
14417 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14418 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14419 offset += 2;
4e44c1ef 14420 }
8c996513 14421 if ((countval & 0x01) && max_size > 1)
0945b39d 14422 {
8c996513
JH
14423 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14424 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14425 offset += 1;
0945b39d 14426 }
8c996513
JH
14427 return;
14428 }
14429 if (max_size > 32)
14430 {
14431 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14432 return;
14433 }
14434 if (max_size > 16)
14435 {
14436 rtx label = ix86_expand_aligntest (count, 16, true);
14437 if (TARGET_64BIT)
4e44c1ef 14438 {
8c996513
JH
14439 dest = change_address (destmem, DImode, destptr);
14440 emit_insn (gen_strset (destptr, dest, value));
14441 emit_insn (gen_strset (destptr, dest, value));
4e44c1ef 14442 }
8c996513 14443 else
0945b39d 14444 {
8c996513
JH
14445 dest = change_address (destmem, SImode, destptr);
14446 emit_insn (gen_strset (destptr, dest, value));
14447 emit_insn (gen_strset (destptr, dest, value));
14448 emit_insn (gen_strset (destptr, dest, value));
14449 emit_insn (gen_strset (destptr, dest, value));
0945b39d 14450 }
8c996513
JH
14451 emit_label (label);
14452 LABEL_NUSES (label) = 1;
14453 }
14454 if (max_size > 8)
14455 {
14456 rtx label = ix86_expand_aligntest (count, 8, true);
14457 if (TARGET_64BIT)
4e44c1ef 14458 {
8c996513
JH
14459 dest = change_address (destmem, DImode, destptr);
14460 emit_insn (gen_strset (destptr, dest, value));
4e44c1ef 14461 }
8c996513 14462 else
0945b39d 14463 {
8c996513
JH
14464 dest = change_address (destmem, SImode, destptr);
14465 emit_insn (gen_strset (destptr, dest, value));
14466 emit_insn (gen_strset (destptr, dest, value));
0945b39d 14467 }
8c996513
JH
14468 emit_label (label);
14469 LABEL_NUSES (label) = 1;
14470 }
14471 if (max_size > 4)
14472 {
14473 rtx label = ix86_expand_aligntest (count, 4, true);
14474 dest = change_address (destmem, SImode, destptr);
14475 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14476 emit_label (label);
14477 LABEL_NUSES (label) = 1;
0945b39d 14478 }
8c996513
JH
14479 if (max_size > 2)
14480 {
14481 rtx label = ix86_expand_aligntest (count, 2, true);
14482 dest = change_address (destmem, HImode, destptr);
14483 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14484 emit_label (label);
14485 LABEL_NUSES (label) = 1;
14486 }
14487 if (max_size > 1)
14488 {
14489 rtx label = ix86_expand_aligntest (count, 1, true);
14490 dest = change_address (destmem, QImode, destptr);
14491 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14492 emit_label (label);
14493 LABEL_NUSES (label) = 1;
14494 }
14495}
0945b39d 14496
8c996513
JH
14497/* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14498 DESIRED_ALIGNMENT. */
14499static void
14500expand_movmem_prologue (rtx destmem, rtx srcmem,
14501 rtx destptr, rtx srcptr, rtx count,
14502 int align, int desired_alignment)
14503{
14504 if (align <= 1 && desired_alignment > 1)
14505 {
14506 rtx label = ix86_expand_aligntest (destptr, 1, false);
14507 srcmem = change_address (srcmem, QImode, srcptr);
14508 destmem = change_address (destmem, QImode, destptr);
14509 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14510 ix86_adjust_counter (count, 1);
14511 emit_label (label);
14512 LABEL_NUSES (label) = 1;
14513 }
14514 if (align <= 2 && desired_alignment > 2)
14515 {
14516 rtx label = ix86_expand_aligntest (destptr, 2, false);
14517 srcmem = change_address (srcmem, HImode, srcptr);
14518 destmem = change_address (destmem, HImode, destptr);
14519 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14520 ix86_adjust_counter (count, 2);
14521 emit_label (label);
14522 LABEL_NUSES (label) = 1;
14523 }
14524 if (align <= 4 && desired_alignment > 4)
14525 {
14526 rtx label = ix86_expand_aligntest (destptr, 4, false);
14527 srcmem = change_address (srcmem, SImode, srcptr);
14528 destmem = change_address (destmem, SImode, destptr);
14529 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14530 ix86_adjust_counter (count, 4);
14531 emit_label (label);
14532 LABEL_NUSES (label) = 1;
14533 }
14534 gcc_assert (desired_alignment <= 8);
0945b39d
JH
14535}
14536
8c996513
JH
14537/* Set enough from DEST to align DEST known to by aligned by ALIGN to
14538 DESIRED_ALIGNMENT. */
14539static void
14540expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14541 int align, int desired_alignment)
14542{
14543 if (align <= 1 && desired_alignment > 1)
14544 {
14545 rtx label = ix86_expand_aligntest (destptr, 1, false);
14546 destmem = change_address (destmem, QImode, destptr);
14547 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14548 ix86_adjust_counter (count, 1);
14549 emit_label (label);
14550 LABEL_NUSES (label) = 1;
14551 }
14552 if (align <= 2 && desired_alignment > 2)
14553 {
14554 rtx label = ix86_expand_aligntest (destptr, 2, false);
14555 destmem = change_address (destmem, HImode, destptr);
14556 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14557 ix86_adjust_counter (count, 2);
14558 emit_label (label);
14559 LABEL_NUSES (label) = 1;
14560 }
14561 if (align <= 4 && desired_alignment > 4)
14562 {
14563 rtx label = ix86_expand_aligntest (destptr, 4, false);
14564 destmem = change_address (destmem, SImode, destptr);
14565 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14566 ix86_adjust_counter (count, 4);
14567 emit_label (label);
14568 LABEL_NUSES (label) = 1;
14569 }
14570 gcc_assert (desired_alignment <= 8);
14571}
14572
14573/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14574static enum stringop_alg
14575decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14576 int *dynamic_check)
14577{
14578 const struct stringop_algs * algs;
14579
14580 *dynamic_check = -1;
14581 if (memset)
14582 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14583 else
14584 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14585 if (stringop_alg != no_stringop)
14586 return stringop_alg;
14587 /* rep; movq or rep; movl is the smallest variant. */
14588 else if (optimize_size)
14589 {
14590 if (!count || (count & 3))
14591 return rep_prefix_1_byte;
14592 else
14593 return rep_prefix_4_byte;
14594 }
14595 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14596 */
14597 else if (expected_size != -1 && expected_size < 4)
14598 return loop_1_byte;
14599 else if (expected_size != -1)
14600 {
14601 unsigned int i;
14602 enum stringop_alg alg = libcall;
14603 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14604 {
14605 gcc_assert (algs->size[i].max);
14606 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14607 {
14608 if (algs->size[i].alg != libcall)
14609 alg = algs->size[i].alg;
14610 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14611 last non-libcall inline algorithm. */
14612 if (TARGET_INLINE_ALL_STRINGOPS)
14613 {
cc0faf9d
JH
14614 /* When the current size is best to be copied by a libcall,
14615 but we are still forced to inline, run the heuristic bellow
14616 that will pick code for medium sized blocks. */
14617 if (alg != libcall)
14618 return alg;
14619 break;
8c996513
JH
14620 }
14621 else
14622 return algs->size[i].alg;
14623 }
14624 }
cc0faf9d 14625 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
8c996513
JH
14626 }
14627 /* When asked to inline the call anyway, try to pick meaningful choice.
14628 We look for maximal size of block that is faster to copy by hand and
14629 take blocks of at most of that size guessing that average size will
54a88090 14630 be roughly half of the block.
8c996513
JH
14631
14632 If this turns out to be bad, we might simply specify the preferred
14633 choice in ix86_costs. */
14634 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14635 && algs->unknown_size == libcall)
14636 {
14637 int max = -1;
14638 enum stringop_alg alg;
14639 int i;
14640
14641 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14642 if (algs->size[i].alg != libcall && algs->size[i].alg)
14643 max = algs->size[i].max;
14644 if (max == -1)
14645 max = 4096;
14646 alg = decide_alg (count, max / 2, memset, dynamic_check);
14647 gcc_assert (*dynamic_check == -1);
14648 gcc_assert (alg != libcall);
14649 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14650 *dynamic_check = max;
14651 return alg;
14652 }
14653 return algs->unknown_size;
14654}
14655
14656/* Decide on alignment. We know that the operand is already aligned to ALIGN
14657 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14658static int
14659decide_alignment (int align,
14660 enum stringop_alg alg,
14661 int expected_size)
14662{
14663 int desired_align = 0;
14664 switch (alg)
14665 {
14666 case no_stringop:
14667 gcc_unreachable ();
14668 case loop:
14669 case unrolled_loop:
14670 desired_align = GET_MODE_SIZE (Pmode);
14671 break;
14672 case rep_prefix_8_byte:
14673 desired_align = 8;
14674 break;
14675 case rep_prefix_4_byte:
14676 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14677 copying whole cacheline at once. */
14678 if (TARGET_PENTIUMPRO)
14679 desired_align = 8;
14680 else
14681 desired_align = 4;
14682 break;
14683 case rep_prefix_1_byte:
14684 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14685 copying whole cacheline at once. */
14686 if (TARGET_PENTIUMPRO)
14687 desired_align = 8;
14688 else
14689 desired_align = 1;
14690 break;
14691 case loop_1_byte:
14692 desired_align = 1;
14693 break;
14694 case libcall:
14695 return 0;
14696 }
14697
14698 if (optimize_size)
14699 desired_align = 1;
14700 if (desired_align < align)
14701 desired_align = align;
14702 if (expected_size != -1 && expected_size < 4)
14703 desired_align = align;
14704 return desired_align;
14705}
14706
2e226e66 14707/* Return the smallest power of 2 greater than VAL. */
2a4f771a
JH
14708static int
14709smallest_pow2_greater_than (int val)
14710{
14711 int ret = 1;
14712 while (ret <= val)
14713 ret <<= 1;
14714 return ret;
14715}
14716
8c996513 14717/* Expand string move (memcpy) operation. Use i386 string operations when
2a4f771a
JH
14718 profitable. expand_clrmem contains similar code. The code depends upon
14719 architecture, block size and alignment, but always has the same
14720 overall structure:
14721
14722 1) Prologue guard: Conditional that jumps up to epilogues for small
14723 blocks that can be handled by epilogue alone. This is faster but
14724 also needed for correctness, since prologue assume the block is larger
2e226e66 14725 than the desired alignment.
2a4f771a
JH
14726
14727 Optional dynamic check for size and libcall for large
14728 blocks is emitted here too, with -minline-stringops-dynamically.
14729
14730 2) Prologue: copy first few bytes in order to get destination aligned
14731 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14732 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14733 We emit either a jump tree on power of two sized blocks, or a byte loop.
14734
14735 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14736 with specified algorithm.
14737
14738 4) Epilogue: code copying tail of the block that is too small to be
14739 handled by main body (or up to size guarded by prologue guard). */
54a88090 14740
0945b39d 14741int
8c996513
JH
14742ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14743 rtx expected_align_exp, rtx expected_size_exp)
0945b39d 14744{
8c996513
JH
14745 rtx destreg;
14746 rtx srcreg;
14747 rtx label = NULL;
14748 rtx tmp;
14749 rtx jump_around_label = NULL;
14750 HOST_WIDE_INT align = 1;
0945b39d 14751 unsigned HOST_WIDE_INT count = 0;
8c996513 14752 HOST_WIDE_INT expected_size = -1;
2a4f771a 14753 int size_needed = 0, epilogue_size_needed;
8c996513
JH
14754 int desired_align = 0;
14755 enum stringop_alg alg;
14756 int dynamic_check;
0945b39d 14757
7656aee4 14758 if (CONST_INT_P (align_exp))
0945b39d 14759 align = INTVAL (align_exp);
2f8e468b 14760 /* i386 can do misaligned access on reasonably increased cost. */
7656aee4 14761 if (CONST_INT_P (expected_align_exp)
8c996513
JH
14762 && INTVAL (expected_align_exp) > align)
14763 align = INTVAL (expected_align_exp);
7656aee4 14764 if (CONST_INT_P (count_exp))
8c996513 14765 count = expected_size = INTVAL (count_exp);
7656aee4 14766 if (CONST_INT_P (expected_size_exp) && count == 0)
2a4f771a
JH
14767 expected_size = INTVAL (expected_size_exp);
14768
14769 /* Step 0: Decide on preferred algorithm, desired alignment and
14770 size of chunks to be copied by main loop. */
0945b39d 14771
8c996513
JH
14772 alg = decide_alg (count, expected_size, false, &dynamic_check);
14773 desired_align = decide_alignment (align, alg, expected_size);
d0a5295a 14774
0945b39d 14775 if (!TARGET_ALIGN_STRINGOPS)
8c996513 14776 align = desired_align;
0945b39d 14777
8c996513
JH
14778 if (alg == libcall)
14779 return 0;
14780 gcc_assert (alg != no_stringop);
14781 if (!count)
14782 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14783 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14784 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14785 switch (alg)
26771da7 14786 {
8c996513
JH
14787 case libcall:
14788 case no_stringop:
14789 gcc_unreachable ();
14790 case loop:
14791 size_needed = GET_MODE_SIZE (Pmode);
14792 break;
14793 case unrolled_loop:
14794 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14795 break;
14796 case rep_prefix_8_byte:
14797 size_needed = 8;
14798 break;
14799 case rep_prefix_4_byte:
14800 size_needed = 4;
14801 break;
14802 case rep_prefix_1_byte:
14803 case loop_1_byte:
14804 size_needed = 1;
14805 break;
26771da7 14806 }
0945b39d 14807
2a4f771a
JH
14808 epilogue_size_needed = size_needed;
14809
14810 /* Step 1: Prologue guard. */
14811
8c996513 14812 /* Alignment code needs count to be in register. */
7656aee4 14813 if (CONST_INT_P (count_exp) && desired_align > align)
8c996513
JH
14814 {
14815 enum machine_mode mode = SImode;
14816 if (TARGET_64BIT && (count & ~0xffffffff))
14817 mode = DImode;
14818 count_exp = force_reg (mode, count_exp);
14819 }
14820 gcc_assert (desired_align >= 1 && align >= 1);
2a4f771a 14821
8c996513 14822 /* Ensure that alignment prologue won't copy past end of block. */
bd8d4d19 14823 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
8c996513 14824 {
2a4f771a 14825 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
2a4f771a
JH
14826 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14827 Make sure it is power of 2. */
14828 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
90c56b45 14829
8c996513
JH
14830 label = gen_label_rtx ();
14831 emit_cmp_and_jump_insns (count_exp,
2a4f771a 14832 GEN_INT (epilogue_size_needed),
bd8d4d19
JH
14833 LTU, 0, counter_mode (count_exp), 1, label);
14834 if (GET_CODE (count_exp) == CONST_INT)
14835 ;
14836 else if (expected_size == -1 || expected_size < epilogue_size_needed)
8c996513
JH
14837 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14838 else
14839 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14840 }
14841 /* Emit code to decide on runtime whether library call or inline should be
14842 used. */
14843 if (dynamic_check != -1)
14844 {
14845 rtx hot_label = gen_label_rtx ();
14846 jump_around_label = gen_label_rtx ();
14847 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14848 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14849 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14850 emit_block_move_via_libcall (dst, src, count_exp, false);
14851 emit_jump (jump_around_label);
14852 emit_label (hot_label);
14853 }
0945b39d 14854
2a4f771a 14855 /* Step 2: Alignment prologue. */
0945b39d 14856
8c996513 14857 if (desired_align > align)
0945b39d 14858 {
8c996513
JH
14859 /* Except for the first move in epilogue, we no longer know
14860 constant offset in aliasing info. It don't seems to worth
14861 the pain to maintain it for the first move, so throw away
14862 the info early. */
14863 src = change_address (src, BLKmode, srcreg);
14864 dst = change_address (dst, BLKmode, destreg);
8c996513
JH
14865 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14866 desired_align);
0945b39d 14867 }
8c996513 14868 if (label && size_needed == 1)
0945b39d 14869 {
8c996513
JH
14870 emit_label (label);
14871 LABEL_NUSES (label) = 1;
14872 label = NULL;
14873 }
4e44c1ef 14874
2a4f771a
JH
14875 /* Step 3: Main loop. */
14876
8c996513
JH
14877 switch (alg)
14878 {
14879 case libcall:
14880 case no_stringop:
14881 gcc_unreachable ();
14882 case loop_1_byte:
14883 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14884 count_exp, QImode, 1, expected_size);
14885 break;
14886 case loop:
14887 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14888 count_exp, Pmode, 1, expected_size);
14889 break;
14890 case unrolled_loop:
14891 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14892 registers for 4 temporaries anyway. */
14893 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14894 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14895 expected_size);
14896 break;
14897 case rep_prefix_8_byte:
8c996513
JH
14898 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14899 DImode);
14900 break;
14901 case rep_prefix_4_byte:
8c996513
JH
14902 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14903 SImode);
14904 break;
14905 case rep_prefix_1_byte:
8c996513
JH
14906 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14907 QImode);
14908 break;
14909 }
14910 /* Adjust properly the offset of src and dest memory for aliasing. */
7656aee4 14911 if (CONST_INT_P (count_exp))
8c996513
JH
14912 {
14913 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14914 (count / size_needed) * size_needed);
14915 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14916 (count / size_needed) * size_needed);
14917 }
14918 else
14919 {
14920 src = change_address (src, BLKmode, srcreg);
14921 dst = change_address (dst, BLKmode, destreg);
14922 }
6b32b628 14923
2a4f771a
JH
14924 /* Step 4: Epilogue to copy the remaining bytes. */
14925
8c996513
JH
14926 if (label)
14927 {
2a4f771a
JH
14928 /* When the main loop is done, COUNT_EXP might hold original count,
14929 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14930 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14931 bytes. Compensate if needed. */
54a88090 14932
2a4f771a 14933 if (size_needed < epilogue_size_needed)
0945b39d 14934 {
8c996513 14935 tmp =
bd8d4d19 14936 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
8c996513
JH
14937 GEN_INT (size_needed - 1), count_exp, 1,
14938 OPTAB_DIRECT);
8c996513
JH
14939 if (tmp != count_exp)
14940 emit_move_insn (count_exp, tmp);
14941 }
14942 emit_label (label);
14943 LABEL_NUSES (label) = 1;
14944 }
2a4f771a
JH
14945
14946 if (count_exp != const0_rtx && epilogue_size_needed > 1)
90c56b45 14947 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
2a4f771a 14948 epilogue_size_needed);
8c996513
JH
14949 if (jump_around_label)
14950 emit_label (jump_around_label);
14951 return 1;
14952}
6b32b628 14953
8c996513
JH
14954/* Helper function for memcpy. For QImode value 0xXY produce
14955 0xXYXYXYXY of wide specified by MODE. This is essentially
14956 a * 0x10101010, but we can do slightly better than
14957 synth_mult by unwinding the sequence by hand on CPUs with
14958 slow multiply. */
14959static rtx
14960promote_duplicated_reg (enum machine_mode mode, rtx val)
14961{
14962 enum machine_mode valmode = GET_MODE (val);
14963 rtx tmp;
14964 int nops = mode == DImode ? 3 : 2;
6b32b628 14965
8c996513
JH
14966 gcc_assert (mode == SImode || mode == DImode);
14967 if (val == const0_rtx)
14968 return copy_to_mode_reg (mode, const0_rtx);
7656aee4 14969 if (CONST_INT_P (val))
8c996513
JH
14970 {
14971 HOST_WIDE_INT v = INTVAL (val) & 255;
6b32b628 14972
8c996513
JH
14973 v |= v << 8;
14974 v |= v << 16;
14975 if (mode == DImode)
14976 v |= (v << 16) << 16;
14977 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14978 }
14979
14980 if (valmode == VOIDmode)
14981 valmode = QImode;
14982 if (valmode != QImode)
14983 val = gen_lowpart (QImode, val);
14984 if (mode == QImode)
14985 return val;
14986 if (!TARGET_PARTIAL_REG_STALL)
14987 nops--;
14988 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14989 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14990 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14991 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14992 {
14993 rtx reg = convert_modes (mode, QImode, val, true);
14994 tmp = promote_duplicated_reg (mode, const1_rtx);
14995 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14996 OPTAB_DIRECT);
0945b39d
JH
14997 }
14998 else
14999 {
8c996513 15000 rtx reg = convert_modes (mode, QImode, val, true);
0945b39d 15001
8c996513
JH
15002 if (!TARGET_PARTIAL_REG_STALL)
15003 if (mode == SImode)
15004 emit_insn (gen_movsi_insv_1 (reg, reg));
15005 else
15006 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15007 else
0945b39d 15008 {
8c996513
JH
15009 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15010 NULL, 1, OPTAB_DIRECT);
15011 reg =
15012 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
0945b39d 15013 }
8c996513
JH
15014 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15015 NULL, 1, OPTAB_DIRECT);
15016 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15017 if (mode == SImode)
15018 return reg;
15019 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15020 NULL, 1, OPTAB_DIRECT);
15021 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15022 return reg;
15023 }
15024}
0945b39d 15025
2a4f771a
JH
15026/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15027 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15028 alignment from ALIGN to DESIRED_ALIGN. */
15029static rtx
15030promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15031{
15032 rtx promoted_val;
15033
15034 if (TARGET_64BIT
15035 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15036 promoted_val = promote_duplicated_reg (DImode, val);
15037 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15038 promoted_val = promote_duplicated_reg (SImode, val);
15039 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15040 promoted_val = promote_duplicated_reg (HImode, val);
15041 else
15042 promoted_val = val;
15043
15044 return promoted_val;
15045}
15046
8c996513 15047/* Expand string clear operation (bzero). Use i386 string operations when
2a4f771a 15048 profitable. See expand_movmem comment for explanation of individual
2e226e66 15049 steps performed. */
8c996513
JH
15050int
15051ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15052 rtx expected_align_exp, rtx expected_size_exp)
15053{
15054 rtx destreg;
15055 rtx label = NULL;
15056 rtx tmp;
15057 rtx jump_around_label = NULL;
15058 HOST_WIDE_INT align = 1;
15059 unsigned HOST_WIDE_INT count = 0;
15060 HOST_WIDE_INT expected_size = -1;
2a4f771a 15061 int size_needed = 0, epilogue_size_needed;
8c996513
JH
15062 int desired_align = 0;
15063 enum stringop_alg alg;
2a4f771a 15064 rtx promoted_val = NULL;
8c996513
JH
15065 bool force_loopy_epilogue = false;
15066 int dynamic_check;
37ad04a5 15067
7656aee4 15068 if (CONST_INT_P (align_exp))
8c996513 15069 align = INTVAL (align_exp);
2f8e468b 15070 /* i386 can do misaligned access on reasonably increased cost. */
7656aee4 15071 if (CONST_INT_P (expected_align_exp)
8c996513
JH
15072 && INTVAL (expected_align_exp) > align)
15073 align = INTVAL (expected_align_exp);
7656aee4 15074 if (CONST_INT_P (count_exp))
8c996513 15075 count = expected_size = INTVAL (count_exp);
7656aee4 15076 if (CONST_INT_P (expected_size_exp) && count == 0)
8c996513 15077 expected_size = INTVAL (expected_size_exp);
4e44c1ef 15078
2a4f771a
JH
15079 /* Step 0: Decide on preferred algorithm, desired alignment and
15080 size of chunks to be copied by main loop. */
15081
8c996513
JH
15082 alg = decide_alg (count, expected_size, true, &dynamic_check);
15083 desired_align = decide_alignment (align, alg, expected_size);
37ad04a5 15084
8c996513
JH
15085 if (!TARGET_ALIGN_STRINGOPS)
15086 align = desired_align;
15087
15088 if (alg == libcall)
15089 return 0;
15090 gcc_assert (alg != no_stringop);
15091 if (!count)
bd8d4d19 15092 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
8c996513
JH
15093 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15094 switch (alg)
15095 {
15096 case libcall:
15097 case no_stringop:
15098 gcc_unreachable ();
15099 case loop:
15100 size_needed = GET_MODE_SIZE (Pmode);
15101 break;
15102 case unrolled_loop:
15103 size_needed = GET_MODE_SIZE (Pmode) * 4;
15104 break;
15105 case rep_prefix_8_byte:
15106 size_needed = 8;
15107 break;
15108 case rep_prefix_4_byte:
15109 size_needed = 4;
15110 break;
15111 case rep_prefix_1_byte:
15112 case loop_1_byte:
15113 size_needed = 1;
15114 break;
15115 }
2a4f771a
JH
15116 epilogue_size_needed = size_needed;
15117
15118 /* Step 1: Prologue guard. */
15119
8c996513 15120 /* Alignment code needs count to be in register. */
7656aee4 15121 if (CONST_INT_P (count_exp) && desired_align > align)
8c996513
JH
15122 {
15123 enum machine_mode mode = SImode;
15124 if (TARGET_64BIT && (count & ~0xffffffff))
15125 mode = DImode;
15126 count_exp = force_reg (mode, count_exp);
15127 }
54a88090 15128 /* Do the cheap promotion to allow better CSE across the
2a4f771a
JH
15129 main loop and epilogue (ie one load of the big constant in the
15130 front of all code. */
7656aee4 15131 if (CONST_INT_P (val_exp))
2a4f771a
JH
15132 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15133 desired_align, align);
8c996513 15134 /* Ensure that alignment prologue won't copy past end of block. */
bd8d4d19 15135 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
8c996513 15136 {
2a4f771a 15137 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
2a4f771a
JH
15138 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15139 Make sure it is power of 2. */
15140 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15141
15142 /* To improve performance of small blocks, we jump around the VAL
15143 promoting mode. This mean that if the promoted VAL is not constant,
15144 we might not use it in the epilogue and have to use byte
15145 loop variant. */
15146 if (epilogue_size_needed > 2 && !promoted_val)
15147 force_loopy_epilogue = true;
8c996513
JH
15148 label = gen_label_rtx ();
15149 emit_cmp_and_jump_insns (count_exp,
2a4f771a 15150 GEN_INT (epilogue_size_needed),
bd8d4d19
JH
15151 LTU, 0, counter_mode (count_exp), 1, label);
15152 if (GET_CODE (count_exp) == CONST_INT)
15153 ;
15154 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
8c996513
JH
15155 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15156 else
15157 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15158 }
15159 if (dynamic_check != -1)
15160 {
15161 rtx hot_label = gen_label_rtx ();
15162 jump_around_label = gen_label_rtx ();
15163 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
bd8d4d19 15164 LEU, 0, counter_mode (count_exp), 1, hot_label);
8c996513
JH
15165 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15166 set_storage_via_libcall (dst, count_exp, val_exp, false);
15167 emit_jump (jump_around_label);
15168 emit_label (hot_label);
15169 }
2a4f771a
JH
15170
15171 /* Step 2: Alignment prologue. */
15172
15173 /* Do the expensive promotion once we branched off the small blocks. */
15174 if (!promoted_val)
15175 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15176 desired_align, align);
8c996513 15177 gcc_assert (desired_align >= 1 && align >= 1);
90c56b45 15178
8c996513
JH
15179 if (desired_align > align)
15180 {
15181 /* Except for the first move in epilogue, we no longer know
15182 constant offset in aliasing info. It don't seems to worth
15183 the pain to maintain it for the first move, so throw away
15184 the info early. */
15185 dst = change_address (dst, BLKmode, destreg);
8c996513
JH
15186 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15187 desired_align);
15188 }
15189 if (label && size_needed == 1)
15190 {
15191 emit_label (label);
15192 LABEL_NUSES (label) = 1;
15193 label = NULL;
15194 }
2a4f771a
JH
15195
15196 /* Step 3: Main loop. */
15197
8c996513
JH
15198 switch (alg)
15199 {
15200 case libcall:
15201 case no_stringop:
15202 gcc_unreachable ();
15203 case loop_1_byte:
15204 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15205 count_exp, QImode, 1, expected_size);
15206 break;
15207 case loop:
15208 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15209 count_exp, Pmode, 1, expected_size);
15210 break;
15211 case unrolled_loop:
15212 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15213 count_exp, Pmode, 4, expected_size);
15214 break;
15215 case rep_prefix_8_byte:
8c996513
JH
15216 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15217 DImode);
15218 break;
15219 case rep_prefix_4_byte:
8c996513
JH
15220 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15221 SImode);
15222 break;
15223 case rep_prefix_1_byte:
8c996513
JH
15224 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15225 QImode);
15226 break;
15227 }
15228 /* Adjust properly the offset of src and dest memory for aliasing. */
7656aee4 15229 if (CONST_INT_P (count_exp))
8c996513
JH
15230 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15231 (count / size_needed) * size_needed);
15232 else
15233 dst = change_address (dst, BLKmode, destreg);
15234
2a4f771a
JH
15235 /* Step 4: Epilogue to copy the remaining bytes. */
15236
8c996513
JH
15237 if (label)
15238 {
2a4f771a
JH
15239 /* When the main loop is done, COUNT_EXP might hold original count,
15240 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15241 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15242 bytes. Compensate if needed. */
15243
8c996513 15244 if (size_needed < desired_align - align)
0945b39d 15245 {
8c996513 15246 tmp =
bd8d4d19 15247 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
8c996513
JH
15248 GEN_INT (size_needed - 1), count_exp, 1,
15249 OPTAB_DIRECT);
15250 size_needed = desired_align - align + 1;
15251 if (tmp != count_exp)
15252 emit_move_insn (count_exp, tmp);
0945b39d 15253 }
8c996513
JH
15254 emit_label (label);
15255 LABEL_NUSES (label) = 1;
15256 }
2a4f771a 15257 if (count_exp != const0_rtx && epilogue_size_needed > 1)
8c996513
JH
15258 {
15259 if (force_loopy_epilogue)
15260 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15261 size_needed);
15262 else
90c56b45
UB
15263 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15264 size_needed);
0945b39d 15265 }
8c996513
JH
15266 if (jump_around_label)
15267 emit_label (jump_around_label);
0945b39d
JH
15268 return 1;
15269}
4e44c1ef 15270
e075ae69
RH
15271/* Expand the appropriate insns for doing strlen if not just doing
15272 repnz; scasb
15273
15274 out = result, initialized with the start address
15275 align_rtx = alignment of the address.
15276 scratch = scratch register, initialized with the startaddress when
77ebd435 15277 not aligned, otherwise undefined
3f803cd9 15278
39e3f58c 15279 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
15280 some address computing at the end. These things are done in i386.md. */
15281
0945b39d 15282static void
4e44c1ef 15283ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 15284{
e075ae69
RH
15285 int align;
15286 rtx tmp;
15287 rtx align_2_label = NULL_RTX;
15288 rtx align_3_label = NULL_RTX;
15289 rtx align_4_label = gen_label_rtx ();
15290 rtx end_0_label = gen_label_rtx ();
e075ae69 15291 rtx mem;
e2e52e1b 15292 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 15293 rtx scratch = gen_reg_rtx (SImode);
e6e81735 15294 rtx cmp;
e075ae69
RH
15295
15296 align = 0;
7656aee4 15297 if (CONST_INT_P (align_rtx))
e075ae69 15298 align = INTVAL (align_rtx);
3f803cd9 15299
e9a25f70 15300 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 15301
e9a25f70 15302 /* Is there a known alignment and is it less than 4? */
e075ae69 15303 if (align < 4)
3f803cd9 15304 {
0945b39d
JH
15305 rtx scratch1 = gen_reg_rtx (Pmode);
15306 emit_move_insn (scratch1, out);
e9a25f70 15307 /* Is there a known alignment and is it not 2? */
e075ae69 15308 if (align != 2)
3f803cd9 15309 {
e075ae69
RH
15310 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15311 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15312
15313 /* Leave just the 3 lower bits. */
0945b39d 15314 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
15315 NULL_RTX, 0, OPTAB_WIDEN);
15316
9076b9c1 15317 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 15318 Pmode, 1, align_4_label);
60c81c89 15319 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 15320 Pmode, 1, align_2_label);
60c81c89 15321 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 15322 Pmode, 1, align_3_label);
3f803cd9
SC
15323 }
15324 else
15325 {
e9a25f70
JL
15326 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15327 check if is aligned to 4 - byte. */
e9a25f70 15328
60c81c89 15329 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
15330 NULL_RTX, 0, OPTAB_WIDEN);
15331
9076b9c1 15332 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 15333 Pmode, 1, align_4_label);
3f803cd9
SC
15334 }
15335
4e44c1ef 15336 mem = change_address (src, QImode, out);
e9a25f70 15337
e075ae69 15338 /* Now compare the bytes. */
e9a25f70 15339
0f290768 15340 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 15341 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 15342 QImode, 1, end_0_label);
3f803cd9 15343
0f290768 15344 /* Increment the address. */
0945b39d
JH
15345 if (TARGET_64BIT)
15346 emit_insn (gen_adddi3 (out, out, const1_rtx));
15347 else
15348 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 15349
e075ae69
RH
15350 /* Not needed with an alignment of 2 */
15351 if (align != 2)
15352 {
15353 emit_label (align_2_label);
3f803cd9 15354
d43e0b7d
RK
15355 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15356 end_0_label);
e075ae69 15357
0945b39d
JH
15358 if (TARGET_64BIT)
15359 emit_insn (gen_adddi3 (out, out, const1_rtx));
15360 else
15361 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
15362
15363 emit_label (align_3_label);
15364 }
15365
d43e0b7d
RK
15366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15367 end_0_label);
e075ae69 15368
0945b39d
JH
15369 if (TARGET_64BIT)
15370 emit_insn (gen_adddi3 (out, out, const1_rtx));
15371 else
15372 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
15373 }
15374
e075ae69
RH
15375 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15376 align this loop. It gives only huge programs, but does not help to
15377 speed up. */
15378 emit_label (align_4_label);
3f803cd9 15379
4e44c1ef 15380 mem = change_address (src, SImode, out);
e075ae69 15381 emit_move_insn (scratch, mem);
0945b39d
JH
15382 if (TARGET_64BIT)
15383 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15384 else
15385 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 15386
e2e52e1b
JH
15387 /* This formula yields a nonzero result iff one of the bytes is zero.
15388 This saves three branches inside loop and many cycles. */
15389
15390 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15391 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15392 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 15393 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 15394 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
15395 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15396 align_4_label);
e2e52e1b
JH
15397
15398 if (TARGET_CMOVE)
15399 {
15400 rtx reg = gen_reg_rtx (SImode);
0945b39d 15401 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
15402 emit_move_insn (reg, tmpreg);
15403 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15404
0f290768 15405 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 15406 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
15407 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15408 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15409 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15410 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
15411 reg,
15412 tmpreg)));
e2e52e1b 15413 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 15414 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 15415 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
15416
15417 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15418 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15419 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 15420 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
15421 reg2,
15422 out)));
e2e52e1b
JH
15423
15424 }
15425 else
15426 {
15427 rtx end_2_label = gen_label_rtx ();
15428 /* Is zero in the first two bytes? */
15429
16189740 15430 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
15431 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15432 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15433 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15434 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15435 pc_rtx);
15436 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15437 JUMP_LABEL (tmp) = end_2_label;
15438
0f290768 15439 /* Not in the first two. Move two bytes forward. */
e2e52e1b 15440 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 15441 if (TARGET_64BIT)
60c81c89 15442 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 15443 else
60c81c89 15444 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
15445
15446 emit_label (end_2_label);
15447
15448 }
15449
0f290768 15450 /* Avoid branch in fixing the byte. */
e2e52e1b 15451 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 15452 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 15453 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 15454 if (TARGET_64BIT)
e6e81735 15455 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 15456 else
e6e81735 15457 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
15458
15459 emit_label (end_0_label);
15460}
0e07aff3 15461
2ed941ec
RH
15462/* Expand strlen. */
15463
15464int
15465ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15466{
15467 rtx addr, scratch1, scratch2, scratch3, scratch4;
15468
15469 /* The generic case of strlen expander is long. Avoid it's
15470 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15471
15472 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15473 && !TARGET_INLINE_ALL_STRINGOPS
15474 && !optimize_size
15475 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15476 return 0;
15477
15478 addr = force_reg (Pmode, XEXP (src, 0));
15479 scratch1 = gen_reg_rtx (Pmode);
15480
15481 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15482 && !optimize_size)
15483 {
15484 /* Well it seems that some optimizer does not combine a call like
15485 foo(strlen(bar), strlen(bar));
15486 when the move and the subtraction is done here. It does calculate
15487 the length just once when these instructions are done inside of
15488 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15489 often used and I use one fewer register for the lifetime of
15490 output_strlen_unroll() this is better. */
15491
15492 emit_move_insn (out, addr);
15493
15494 ix86_expand_strlensi_unroll_1 (out, src, align);
15495
15496 /* strlensi_unroll_1 returns the address of the zero at the end of
15497 the string, like memchr(), so compute the length by subtracting
15498 the start address. */
15499 if (TARGET_64BIT)
15500 emit_insn (gen_subdi3 (out, out, addr));
15501 else
15502 emit_insn (gen_subsi3 (out, out, addr));
15503 }
15504 else
15505 {
15506 rtx unspec;
15507 scratch2 = gen_reg_rtx (Pmode);
15508 scratch3 = gen_reg_rtx (Pmode);
15509 scratch4 = force_reg (Pmode, constm1_rtx);
15510
15511 emit_move_insn (scratch3, addr);
15512 eoschar = force_reg (QImode, eoschar);
15513
15514 src = replace_equiv_address_nv (src, scratch3);
15515
15516 /* If .md starts supporting :P, this can be done in .md. */
15517 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15518 scratch4), UNSPEC_SCAS);
15519 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15520 if (TARGET_64BIT)
15521 {
15522 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15523 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15524 }
15525 else
15526 {
15527 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15528 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15529 }
15530 }
15531 return 1;
15532}
15533
dc4d7240
JH
15534/* For given symbol (function) construct code to compute address of it's PLT
15535 entry in large x86-64 PIC model. */
15536rtx
15537construct_plt_address (rtx symbol)
15538{
15539 rtx tmp = gen_reg_rtx (Pmode);
15540 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15541
15542 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15543 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15544
15545 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15546 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15547 return tmp;
15548}
15549
0e07aff3 15550void
0f901c4c
SH
15551ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15552 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 15553 rtx pop, int sibcall)
0e07aff3
RH
15554{
15555 rtx use = NULL, call;
15556
15557 if (pop == const0_rtx)
15558 pop = NULL;
d0396b79 15559 gcc_assert (!TARGET_64BIT || !pop);
0e07aff3 15560
f7288899
EC
15561 if (TARGET_MACHO && !TARGET_64BIT)
15562 {
b069de3b 15563#if TARGET_MACHO
f7288899
EC
15564 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15565 fnaddr = machopic_indirect_call_target (fnaddr);
15566#endif
15567 }
15568 else
15569 {
15570 /* Static functions and indirect calls don't need the pic register. */
dc4d7240 15571 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
f7288899
EC
15572 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15573 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15574 use_reg (&use, pic_offset_table_rtx);
15575 }
0e07aff3
RH
15576
15577 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15578 {
15579 rtx al = gen_rtx_REG (QImode, 0);
15580 emit_move_insn (al, callarg2);
15581 use_reg (&use, al);
15582 }
15583
dc4d7240
JH
15584 if (ix86_cmodel == CM_LARGE_PIC
15585 && GET_CODE (fnaddr) == MEM
15586 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15587 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15588 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15589 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
0e07aff3
RH
15590 {
15591 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15592 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15593 }
4977bab6
ZW
15594 if (sibcall && TARGET_64BIT
15595 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15596 {
15597 rtx addr;
15598 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
3c4ace25 15599 fnaddr = gen_rtx_REG (Pmode, R11_REG);
4977bab6
ZW
15600 emit_move_insn (fnaddr, addr);
15601 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15602 }
0e07aff3
RH
15603
15604 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15605 if (retval)
15606 call = gen_rtx_SET (VOIDmode, retval, call);
15607 if (pop)
15608 {
15609 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15610 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15611 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15612 }
15613
15614 call = emit_call_insn (call);
15615 if (use)
15616 CALL_INSN_FUNCTION_USAGE (call) = use;
15617}
fce5a9f2 15618
e075ae69 15619\f
e075ae69
RH
15620/* Clear stack slot assignments remembered from previous functions.
15621 This is called from INIT_EXPANDERS once before RTL is emitted for each
15622 function. */
15623
e2500fed 15624static struct machine_function *
b96a374d 15625ix86_init_machine_status (void)
37b15744 15626{
d7394366
JH
15627 struct machine_function *f;
15628
9415ab7d 15629 f = GGC_CNEW (struct machine_function);
d7394366 15630 f->use_fast_prologue_epilogue_nregs = -1;
5bf5a10b 15631 f->tls_descriptor_call_expanded_p = 0;
8330e2c6
AJ
15632
15633 return f;
1526a060
BS
15634}
15635
e075ae69
RH
15636/* Return a MEM corresponding to a stack slot with mode MODE.
15637 Allocate a new slot if necessary.
15638
15639 The RTL for a function can have several slots available: N is
15640 which slot to use. */
15641
15642rtx
ff680eb1 15643assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
e075ae69 15644{
ddb0ae00
ZW
15645 struct stack_local_entry *s;
15646
ff680eb1 15647 gcc_assert (n < MAX_386_STACK_LOCALS);
e075ae69 15648
80dcd3aa
UB
15649 /* Virtual slot is valid only before vregs are instantiated. */
15650 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15651
ddb0ae00
ZW
15652 for (s = ix86_stack_locals; s; s = s->next)
15653 if (s->mode == mode && s->n == n)
3e916873 15654 return copy_rtx (s->rtl);
ddb0ae00
ZW
15655
15656 s = (struct stack_local_entry *)
15657 ggc_alloc (sizeof (struct stack_local_entry));
15658 s->n = n;
15659 s->mode = mode;
15660 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 15661
ddb0ae00
ZW
15662 s->next = ix86_stack_locals;
15663 ix86_stack_locals = s;
15664 return s->rtl;
e075ae69 15665}
f996902d
RH
15666
15667/* Construct the SYMBOL_REF for the tls_get_addr function. */
15668
e2500fed 15669static GTY(()) rtx ix86_tls_symbol;
f996902d 15670rtx
b96a374d 15671ix86_tls_get_addr (void)
f996902d 15672{
f996902d 15673
e2500fed 15674 if (!ix86_tls_symbol)
f996902d 15675 {
75d38379 15676 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
5bf5a10b
AO
15677 (TARGET_ANY_GNU_TLS
15678 && !TARGET_64BIT)
75d38379
JJ
15679 ? "___tls_get_addr"
15680 : "__tls_get_addr");
f996902d
RH
15681 }
15682
e2500fed 15683 return ix86_tls_symbol;
f996902d 15684}
5bf5a10b
AO
15685
15686/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15687
15688static GTY(()) rtx ix86_tls_module_base_symbol;
15689rtx
15690ix86_tls_module_base (void)
15691{
15692
15693 if (!ix86_tls_module_base_symbol)
15694 {
15695 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15696 "_TLS_MODULE_BASE_");
15697 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15698 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15699 }
15700
15701 return ix86_tls_module_base_symbol;
15702}
e075ae69
RH
15703\f
15704/* Calculate the length of the memory address in the instruction
15705 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15706
8fe75e43 15707int
b96a374d 15708memory_address_length (rtx addr)
e075ae69
RH
15709{
15710 struct ix86_address parts;
15711 rtx base, index, disp;
15712 int len;
d0396b79 15713 int ok;
e075ae69
RH
15714
15715 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
15716 || GET_CODE (addr) == POST_INC
15717 || GET_CODE (addr) == PRE_MODIFY
15718 || GET_CODE (addr) == POST_MODIFY)
e075ae69 15719 return 0;
3f803cd9 15720
d0396b79
NS
15721 ok = ix86_decompose_address (addr, &parts);
15722 gcc_assert (ok);
3f803cd9 15723
7c93c2cc
PB
15724 if (parts.base && GET_CODE (parts.base) == SUBREG)
15725 parts.base = SUBREG_REG (parts.base);
15726 if (parts.index && GET_CODE (parts.index) == SUBREG)
15727 parts.index = SUBREG_REG (parts.index);
15728
e075ae69
RH
15729 base = parts.base;
15730 index = parts.index;
15731 disp = parts.disp;
15732 len = 0;
3f803cd9 15733
7b65ed54
EB
15734 /* Rule of thumb:
15735 - esp as the base always wants an index,
15736 - ebp as the base always wants a displacement. */
15737
e075ae69
RH
15738 /* Register Indirect. */
15739 if (base && !index && !disp)
15740 {
7b65ed54
EB
15741 /* esp (for its index) and ebp (for its displacement) need
15742 the two-byte modrm form. */
e075ae69
RH
15743 if (addr == stack_pointer_rtx
15744 || addr == arg_pointer_rtx
564d80f4
JH
15745 || addr == frame_pointer_rtx
15746 || addr == hard_frame_pointer_rtx)
e075ae69 15747 len = 1;
3f803cd9 15748 }
e9a25f70 15749
e075ae69
RH
15750 /* Direct Addressing. */
15751 else if (disp && !base && !index)
15752 len = 4;
15753
3f803cd9
SC
15754 else
15755 {
e075ae69
RH
15756 /* Find the length of the displacement constant. */
15757 if (disp)
15758 {
f38840db 15759 if (base && satisfies_constraint_K (disp))
e075ae69
RH
15760 len = 1;
15761 else
15762 len = 4;
15763 }
7b65ed54
EB
15764 /* ebp always wants a displacement. */
15765 else if (base == hard_frame_pointer_rtx)
15766 len = 1;
3f803cd9 15767
43f3a59d 15768 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
15769 if (index
15770 /* ...like esp, which always wants an index. */
15771 || base == stack_pointer_rtx
15772 || base == arg_pointer_rtx
15773 || base == frame_pointer_rtx)
e075ae69 15774 len += 1;
3f803cd9
SC
15775 }
15776
e075ae69
RH
15777 return len;
15778}
79325812 15779
5bf0ebab
RH
15780/* Compute default value for "length_immediate" attribute. When SHORTFORM
15781 is set, expect that insn have 8bit immediate alternative. */
e075ae69 15782int
b96a374d 15783ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 15784{
6ef67412
JH
15785 int len = 0;
15786 int i;
6c698a6d 15787 extract_insn_cached (insn);
6ef67412
JH
15788 for (i = recog_data.n_operands - 1; i >= 0; --i)
15789 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 15790 {
d0396b79 15791 gcc_assert (!len);
f38840db 15792 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
6ef67412
JH
15793 len = 1;
15794 else
15795 {
15796 switch (get_attr_mode (insn))
15797 {
15798 case MODE_QI:
15799 len+=1;
15800 break;
15801 case MODE_HI:
15802 len+=2;
15803 break;
15804 case MODE_SI:
15805 len+=4;
15806 break;
14f73b5a
JH
15807 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15808 case MODE_DI:
15809 len+=4;
15810 break;
6ef67412 15811 default:
c725bd79 15812 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
15813 }
15814 }
3071fab5 15815 }
6ef67412
JH
15816 return len;
15817}
15818/* Compute default value for "length_address" attribute. */
15819int
b96a374d 15820ix86_attr_length_address_default (rtx insn)
6ef67412
JH
15821{
15822 int i;
9b73c90a
EB
15823
15824 if (get_attr_type (insn) == TYPE_LEA)
15825 {
15826 rtx set = PATTERN (insn);
d0396b79
NS
15827
15828 if (GET_CODE (set) == PARALLEL)
9b73c90a 15829 set = XVECEXP (set, 0, 0);
d0396b79
NS
15830
15831 gcc_assert (GET_CODE (set) == SET);
9b73c90a
EB
15832
15833 return memory_address_length (SET_SRC (set));
15834 }
15835
6c698a6d 15836 extract_insn_cached (insn);
1ccbefce 15837 for (i = recog_data.n_operands - 1; i >= 0; --i)
7656aee4 15838 if (MEM_P (recog_data.operand[i]))
e075ae69 15839 {
6ef67412 15840 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
15841 break;
15842 }
6ef67412 15843 return 0;
3f803cd9 15844}
e075ae69
RH
15845\f
15846/* Return the maximum number of instructions a cpu can issue. */
b657fc39 15847
c237e94a 15848static int
b96a374d 15849ix86_issue_rate (void)
b657fc39 15850{
9e555526 15851 switch (ix86_tune)
b657fc39 15852 {
e075ae69
RH
15853 case PROCESSOR_PENTIUM:
15854 case PROCESSOR_K6:
15855 return 2;
79325812 15856
e075ae69 15857 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
15858 case PROCESSOR_PENTIUM4:
15859 case PROCESSOR_ATHLON:
4977bab6 15860 case PROCESSOR_K8:
21efb4d4 15861 case PROCESSOR_AMDFAM10:
89c43c0a 15862 case PROCESSOR_NOCONA:
d326eaf0
JH
15863 case PROCESSOR_GENERIC32:
15864 case PROCESSOR_GENERIC64:
e075ae69 15865 return 3;
b657fc39 15866
05f85dbb
VM
15867 case PROCESSOR_CORE2:
15868 return 4;
15869
b657fc39 15870 default:
e075ae69 15871 return 1;
b657fc39 15872 }
b657fc39
L
15873}
15874
e075ae69
RH
15875/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15876 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 15877
e075ae69 15878static int
d1c78882 15879ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
15880{
15881 rtx set, set2;
b657fc39 15882
e075ae69
RH
15883 /* Simplify the test for uninteresting insns. */
15884 if (insn_type != TYPE_SETCC
15885 && insn_type != TYPE_ICMOV
15886 && insn_type != TYPE_FCMOV
15887 && insn_type != TYPE_IBR)
15888 return 0;
b657fc39 15889
e075ae69
RH
15890 if ((set = single_set (dep_insn)) != 0)
15891 {
15892 set = SET_DEST (set);
15893 set2 = NULL_RTX;
15894 }
15895 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15896 && XVECLEN (PATTERN (dep_insn), 0) == 2
15897 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15898 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15899 {
15900 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15901 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15902 }
78a0d70c
ZW
15903 else
15904 return 0;
b657fc39 15905
7656aee4 15906 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
78a0d70c 15907 return 0;
b657fc39 15908
f5143c46 15909 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
15910 not any other potentially set register. */
15911 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15912 return 0;
15913
15914 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15915 return 0;
15916
15917 return 1;
e075ae69 15918}
b657fc39 15919
e075ae69
RH
15920/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15921 address with operands set by DEP_INSN. */
15922
15923static int
d1c78882 15924ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
15925{
15926 rtx addr;
15927
6ad48e84
JH
15928 if (insn_type == TYPE_LEA
15929 && TARGET_PENTIUM)
5fbdde42
RH
15930 {
15931 addr = PATTERN (insn);
d0396b79
NS
15932
15933 if (GET_CODE (addr) == PARALLEL)
5fbdde42 15934 addr = XVECEXP (addr, 0, 0);
5656a184 15935
d0396b79 15936 gcc_assert (GET_CODE (addr) == SET);
5656a184 15937
5fbdde42
RH
15938 addr = SET_SRC (addr);
15939 }
e075ae69
RH
15940 else
15941 {
15942 int i;
6c698a6d 15943 extract_insn_cached (insn);
1ccbefce 15944 for (i = recog_data.n_operands - 1; i >= 0; --i)
7656aee4 15945 if (MEM_P (recog_data.operand[i]))
e075ae69 15946 {
1ccbefce 15947 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
15948 goto found;
15949 }
15950 return 0;
15951 found:;
b657fc39
L
15952 }
15953
e075ae69 15954 return modified_in_p (addr, dep_insn);
b657fc39 15955}
a269a03c 15956
c237e94a 15957static int
b96a374d 15958ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 15959{
e075ae69 15960 enum attr_type insn_type, dep_insn_type;
8695f61e 15961 enum attr_memory memory;
e075ae69 15962 rtx set, set2;
9b00189f 15963 int dep_insn_code_number;
a269a03c 15964
d1f87653 15965 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 15966 if (REG_NOTE_KIND (link) != 0)
309ada50 15967 return 0;
a269a03c 15968
9b00189f
JH
15969 dep_insn_code_number = recog_memoized (dep_insn);
15970
e075ae69 15971 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 15972 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 15973 return cost;
a269a03c 15974
1c71e60e
JH
15975 insn_type = get_attr_type (insn);
15976 dep_insn_type = get_attr_type (dep_insn);
9b00189f 15977
9e555526 15978 switch (ix86_tune)
a269a03c
JC
15979 {
15980 case PROCESSOR_PENTIUM:
e075ae69 15981 /* Address Generation Interlock adds a cycle of latency. */
d1c78882 15982 if (ix86_agi_dependent (insn, dep_insn, insn_type))
e075ae69
RH
15983 cost += 1;
15984
15985 /* ??? Compares pair with jump/setcc. */
d1c78882 15986 if (ix86_flags_dependent (insn, dep_insn, insn_type))
e075ae69
RH
15987 cost = 0;
15988
d1f87653 15989 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 15990 if (insn_type == TYPE_FMOV
e075ae69 15991 && get_attr_memory (insn) == MEMORY_STORE
d1c78882 15992 && !ix86_agi_dependent (insn, dep_insn, insn_type))
e075ae69
RH
15993 cost += 1;
15994 break;
a269a03c 15995
e075ae69 15996 case PROCESSOR_PENTIUMPRO:
6ad48e84 15997 memory = get_attr_memory (insn);
e075ae69
RH
15998
15999 /* INT->FP conversion is expensive. */
16000 if (get_attr_fp_int_src (dep_insn))
16001 cost += 5;
16002
16003 /* There is one cycle extra latency between an FP op and a store. */
16004 if (insn_type == TYPE_FMOV
16005 && (set = single_set (dep_insn)) != NULL_RTX
16006 && (set2 = single_set (insn)) != NULL_RTX
16007 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7656aee4 16008 && MEM_P (SET_DEST (set2)))
e075ae69 16009 cost += 1;
6ad48e84
JH
16010
16011 /* Show ability of reorder buffer to hide latency of load by executing
16012 in parallel with previous instruction in case
16013 previous instruction is not needed to compute the address. */
16014 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16015 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16016 {
6ad48e84
JH
16017 /* Claim moves to take one cycle, as core can issue one load
16018 at time and the next load can start cycle later. */
16019 if (dep_insn_type == TYPE_IMOV
16020 || dep_insn_type == TYPE_FMOV)
16021 cost = 1;
16022 else if (cost > 1)
16023 cost--;
16024 }
e075ae69 16025 break;
a269a03c 16026
e075ae69 16027 case PROCESSOR_K6:
6ad48e84 16028 memory = get_attr_memory (insn);
8695f61e 16029
e075ae69
RH
16030 /* The esp dependency is resolved before the instruction is really
16031 finished. */
16032 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16033 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16034 return 1;
a269a03c 16035
e075ae69
RH
16036 /* INT->FP conversion is expensive. */
16037 if (get_attr_fp_int_src (dep_insn))
16038 cost += 5;
6ad48e84
JH
16039
16040 /* Show ability of reorder buffer to hide latency of load by executing
16041 in parallel with previous instruction in case
16042 previous instruction is not needed to compute the address. */
16043 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16044 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16045 {
6ad48e84
JH
16046 /* Claim moves to take one cycle, as core can issue one load
16047 at time and the next load can start cycle later. */
16048 if (dep_insn_type == TYPE_IMOV
16049 || dep_insn_type == TYPE_FMOV)
16050 cost = 1;
16051 else if (cost > 2)
16052 cost -= 2;
16053 else
16054 cost = 1;
16055 }
a14003ee 16056 break;
e075ae69 16057
309ada50 16058 case PROCESSOR_ATHLON:
4977bab6 16059 case PROCESSOR_K8:
21efb4d4 16060 case PROCESSOR_AMDFAM10:
d326eaf0
JH
16061 case PROCESSOR_GENERIC32:
16062 case PROCESSOR_GENERIC64:
6ad48e84 16063 memory = get_attr_memory (insn);
6ad48e84 16064
6ad48e84
JH
16065 /* Show ability of reorder buffer to hide latency of load by executing
16066 in parallel with previous instruction in case
16067 previous instruction is not needed to compute the address. */
16068 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
d1c78882 16069 && !ix86_agi_dependent (insn, dep_insn, insn_type))
b96a374d 16070 {
26f74aa3
JH
16071 enum attr_unit unit = get_attr_unit (insn);
16072 int loadcost = 3;
16073
16074 /* Because of the difference between the length of integer and
16075 floating unit pipeline preparation stages, the memory operands
b96a374d 16076 for floating point are cheaper.
26f74aa3 16077
c51e6d85 16078 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
16079 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16080 loadcost = 3;
16081 else
16082 loadcost = TARGET_ATHLON ? 2 : 0;
16083
16084 if (cost >= loadcost)
16085 cost -= loadcost;
6ad48e84
JH
16086 else
16087 cost = 0;
16088 }
309ada50 16089
a269a03c 16090 default:
a269a03c
JC
16091 break;
16092 }
16093
16094 return cost;
16095}
0a726ef1 16096
9b690711
RH
16097/* How many alternative schedules to try. This should be as wide as the
16098 scheduling freedom in the DFA, but no wider. Making this value too
16099 large results extra work for the scheduler. */
16100
16101static int
b96a374d 16102ia32_multipass_dfa_lookahead (void)
9b690711 16103{
9e555526 16104 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 16105 return 2;
56bab446 16106
8695f61e
SB
16107 if (ix86_tune == PROCESSOR_PENTIUMPRO
16108 || ix86_tune == PROCESSOR_K6)
56bab446
SB
16109 return 1;
16110
9b690711 16111 else
56bab446 16112 return 0;
9b690711
RH
16113}
16114
0e4970d7 16115\f
a7180f70
BS
16116/* Compute the alignment given to a constant that is being placed in memory.
16117 EXP is the constant and ALIGN is the alignment that the object would
16118 ordinarily have.
16119 The value of this function is used instead of that alignment to align
16120 the object. */
16121
16122int
b96a374d 16123ix86_constant_alignment (tree exp, int align)
a7180f70
BS
16124{
16125 if (TREE_CODE (exp) == REAL_CST)
16126 {
16127 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16128 return 64;
16129 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16130 return 128;
16131 }
4137ba7a
JJ
16132 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16133 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16134 return BITS_PER_WORD;
a7180f70
BS
16135
16136 return align;
16137}
16138
16139/* Compute the alignment for a static variable.
16140 TYPE is the data type, and ALIGN is the alignment that
16141 the object would ordinarily have. The value of this function is used
16142 instead of that alignment to align the object. */
16143
16144int
b96a374d 16145ix86_data_alignment (tree type, int align)
a7180f70 16146{
bf69f9d2 16147 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
6c23a1f2 16148
a7180f70 16149 if (AGGREGATE_TYPE_P (type)
6c23a1f2
JB
16150 && TYPE_SIZE (type)
16151 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16152 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16153 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16154 && align < max_align)
16155 align = max_align;
a7180f70 16156
0d7d98ee
JH
16157 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16158 to 16byte boundary. */
16159 if (TARGET_64BIT)
16160 {
16161 if (AGGREGATE_TYPE_P (type)
16162 && TYPE_SIZE (type)
16163 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16164 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16165 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16166 return 128;
16167 }
16168
a7180f70
BS
16169 if (TREE_CODE (type) == ARRAY_TYPE)
16170 {
16171 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16172 return 64;
16173 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16174 return 128;
16175 }
16176 else if (TREE_CODE (type) == COMPLEX_TYPE)
16177 {
0f290768 16178
a7180f70
BS
16179 if (TYPE_MODE (type) == DCmode && align < 64)
16180 return 64;
16181 if (TYPE_MODE (type) == XCmode && align < 128)
16182 return 128;
16183 }
16184 else if ((TREE_CODE (type) == RECORD_TYPE
16185 || TREE_CODE (type) == UNION_TYPE
16186 || TREE_CODE (type) == QUAL_UNION_TYPE)
16187 && TYPE_FIELDS (type))
16188 {
16189 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16190 return 64;
16191 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16192 return 128;
16193 }
16194 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16195 || TREE_CODE (type) == INTEGER_TYPE)
16196 {
16197 if (TYPE_MODE (type) == DFmode && align < 64)
16198 return 64;
16199 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16200 return 128;
16201 }
16202
16203 return align;
16204}
16205
16206/* Compute the alignment for a local variable.
16207 TYPE is the data type, and ALIGN is the alignment that
16208 the object would ordinarily have. The value of this macro is used
16209 instead of that alignment to align the object. */
16210
16211int
b96a374d 16212ix86_local_alignment (tree type, int align)
a7180f70 16213{
0d7d98ee
JH
16214 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16215 to 16byte boundary. */
16216 if (TARGET_64BIT)
16217 {
16218 if (AGGREGATE_TYPE_P (type)
16219 && TYPE_SIZE (type)
16220 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16221 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16222 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16223 return 128;
16224 }
a7180f70
BS
16225 if (TREE_CODE (type) == ARRAY_TYPE)
16226 {
16227 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16228 return 64;
16229 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16230 return 128;
16231 }
16232 else if (TREE_CODE (type) == COMPLEX_TYPE)
16233 {
16234 if (TYPE_MODE (type) == DCmode && align < 64)
16235 return 64;
16236 if (TYPE_MODE (type) == XCmode && align < 128)
16237 return 128;
16238 }
16239 else if ((TREE_CODE (type) == RECORD_TYPE
16240 || TREE_CODE (type) == UNION_TYPE
16241 || TREE_CODE (type) == QUAL_UNION_TYPE)
16242 && TYPE_FIELDS (type))
16243 {
16244 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16245 return 64;
16246 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16247 return 128;
16248 }
16249 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16250 || TREE_CODE (type) == INTEGER_TYPE)
16251 {
0f290768 16252
a7180f70
BS
16253 if (TYPE_MODE (type) == DFmode && align < 64)
16254 return 64;
16255 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16256 return 128;
16257 }
16258 return align;
16259}
0ed08620
JH
16260\f
16261/* Emit RTL insns to initialize the variable parts of a trampoline.
16262 FNADDR is an RTX for the address of the function's pure code.
16263 CXT is an RTX for the static chain value for the function. */
16264void
b96a374d 16265x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
16266{
16267 if (!TARGET_64BIT)
16268 {
16269 /* Compute offset from the end of the jmp to the target function. */
16270 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16271 plus_constant (tramp, 10),
16272 NULL_RTX, 1, OPTAB_DIRECT);
16273 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 16274 gen_int_mode (0xb9, QImode));
0ed08620
JH
16275 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16276 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 16277 gen_int_mode (0xe9, QImode));
0ed08620
JH
16278 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16279 }
16280 else
16281 {
16282 int offset = 0;
16283 /* Try to load address using shorter movl instead of movabs.
16284 We may want to support movq for kernel mode, but kernel does not use
16285 trampolines at the moment. */
8fe75e43 16286 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
0ed08620
JH
16287 {
16288 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16289 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 16290 gen_int_mode (0xbb41, HImode));
0ed08620
JH
16291 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16292 gen_lowpart (SImode, fnaddr));
16293 offset += 6;
16294 }
16295 else
16296 {
16297 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 16298 gen_int_mode (0xbb49, HImode));
0ed08620
JH
16299 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16300 fnaddr);
16301 offset += 10;
16302 }
16303 /* Load static chain using movabs to r10. */
16304 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 16305 gen_int_mode (0xba49, HImode));
0ed08620
JH
16306 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16307 cxt);
16308 offset += 10;
16309 /* Jump to the r11 */
16310 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 16311 gen_int_mode (0xff49, HImode));
0ed08620 16312 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 16313 gen_int_mode (0xe3, QImode));
0ed08620 16314 offset += 3;
d0396b79 16315 gcc_assert (offset <= TRAMPOLINE_SIZE);
0ed08620 16316 }
5791cc29 16317
e7a742ec 16318#ifdef ENABLE_EXECUTE_STACK
f84d109f 16319 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
16320 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16321#endif
0ed08620 16322}
eeb06b1b 16323\f
eb701deb
RH
16324/* Codes for all the SSE/MMX builtins. */
16325enum ix86_builtins
16326{
16327 IX86_BUILTIN_ADDPS,
16328 IX86_BUILTIN_ADDSS,
16329 IX86_BUILTIN_DIVPS,
16330 IX86_BUILTIN_DIVSS,
16331 IX86_BUILTIN_MULPS,
16332 IX86_BUILTIN_MULSS,
16333 IX86_BUILTIN_SUBPS,
16334 IX86_BUILTIN_SUBSS,
16335
16336 IX86_BUILTIN_CMPEQPS,
16337 IX86_BUILTIN_CMPLTPS,
16338 IX86_BUILTIN_CMPLEPS,
16339 IX86_BUILTIN_CMPGTPS,
16340 IX86_BUILTIN_CMPGEPS,
16341 IX86_BUILTIN_CMPNEQPS,
16342 IX86_BUILTIN_CMPNLTPS,
16343 IX86_BUILTIN_CMPNLEPS,
16344 IX86_BUILTIN_CMPNGTPS,
16345 IX86_BUILTIN_CMPNGEPS,
16346 IX86_BUILTIN_CMPORDPS,
16347 IX86_BUILTIN_CMPUNORDPS,
eb701deb
RH
16348 IX86_BUILTIN_CMPEQSS,
16349 IX86_BUILTIN_CMPLTSS,
16350 IX86_BUILTIN_CMPLESS,
16351 IX86_BUILTIN_CMPNEQSS,
16352 IX86_BUILTIN_CMPNLTSS,
16353 IX86_BUILTIN_CMPNLESS,
16354 IX86_BUILTIN_CMPNGTSS,
16355 IX86_BUILTIN_CMPNGESS,
16356 IX86_BUILTIN_CMPORDSS,
16357 IX86_BUILTIN_CMPUNORDSS,
eb701deb
RH
16358
16359 IX86_BUILTIN_COMIEQSS,
16360 IX86_BUILTIN_COMILTSS,
16361 IX86_BUILTIN_COMILESS,
16362 IX86_BUILTIN_COMIGTSS,
16363 IX86_BUILTIN_COMIGESS,
16364 IX86_BUILTIN_COMINEQSS,
16365 IX86_BUILTIN_UCOMIEQSS,
16366 IX86_BUILTIN_UCOMILTSS,
16367 IX86_BUILTIN_UCOMILESS,
16368 IX86_BUILTIN_UCOMIGTSS,
16369 IX86_BUILTIN_UCOMIGESS,
16370 IX86_BUILTIN_UCOMINEQSS,
16371
16372 IX86_BUILTIN_CVTPI2PS,
16373 IX86_BUILTIN_CVTPS2PI,
16374 IX86_BUILTIN_CVTSI2SS,
16375 IX86_BUILTIN_CVTSI642SS,
16376 IX86_BUILTIN_CVTSS2SI,
16377 IX86_BUILTIN_CVTSS2SI64,
16378 IX86_BUILTIN_CVTTPS2PI,
16379 IX86_BUILTIN_CVTTSS2SI,
16380 IX86_BUILTIN_CVTTSS2SI64,
16381
16382 IX86_BUILTIN_MAXPS,
16383 IX86_BUILTIN_MAXSS,
16384 IX86_BUILTIN_MINPS,
16385 IX86_BUILTIN_MINSS,
16386
16387 IX86_BUILTIN_LOADUPS,
16388 IX86_BUILTIN_STOREUPS,
16389 IX86_BUILTIN_MOVSS,
16390
16391 IX86_BUILTIN_MOVHLPS,
16392 IX86_BUILTIN_MOVLHPS,
16393 IX86_BUILTIN_LOADHPS,
16394 IX86_BUILTIN_LOADLPS,
16395 IX86_BUILTIN_STOREHPS,
16396 IX86_BUILTIN_STORELPS,
16397
16398 IX86_BUILTIN_MASKMOVQ,
16399 IX86_BUILTIN_MOVMSKPS,
16400 IX86_BUILTIN_PMOVMSKB,
16401
16402 IX86_BUILTIN_MOVNTPS,
16403 IX86_BUILTIN_MOVNTQ,
16404
16405 IX86_BUILTIN_LOADDQU,
16406 IX86_BUILTIN_STOREDQU,
eb701deb
RH
16407
16408 IX86_BUILTIN_PACKSSWB,
16409 IX86_BUILTIN_PACKSSDW,
16410 IX86_BUILTIN_PACKUSWB,
16411
16412 IX86_BUILTIN_PADDB,
16413 IX86_BUILTIN_PADDW,
16414 IX86_BUILTIN_PADDD,
16415 IX86_BUILTIN_PADDQ,
16416 IX86_BUILTIN_PADDSB,
16417 IX86_BUILTIN_PADDSW,
16418 IX86_BUILTIN_PADDUSB,
16419 IX86_BUILTIN_PADDUSW,
16420 IX86_BUILTIN_PSUBB,
16421 IX86_BUILTIN_PSUBW,
16422 IX86_BUILTIN_PSUBD,
16423 IX86_BUILTIN_PSUBQ,
16424 IX86_BUILTIN_PSUBSB,
16425 IX86_BUILTIN_PSUBSW,
16426 IX86_BUILTIN_PSUBUSB,
16427 IX86_BUILTIN_PSUBUSW,
16428
16429 IX86_BUILTIN_PAND,
16430 IX86_BUILTIN_PANDN,
16431 IX86_BUILTIN_POR,
16432 IX86_BUILTIN_PXOR,
16433
16434 IX86_BUILTIN_PAVGB,
16435 IX86_BUILTIN_PAVGW,
16436
16437 IX86_BUILTIN_PCMPEQB,
16438 IX86_BUILTIN_PCMPEQW,
16439 IX86_BUILTIN_PCMPEQD,
16440 IX86_BUILTIN_PCMPGTB,
16441 IX86_BUILTIN_PCMPGTW,
16442 IX86_BUILTIN_PCMPGTD,
16443
16444 IX86_BUILTIN_PMADDWD,
16445
16446 IX86_BUILTIN_PMAXSW,
16447 IX86_BUILTIN_PMAXUB,
16448 IX86_BUILTIN_PMINSW,
16449 IX86_BUILTIN_PMINUB,
16450
16451 IX86_BUILTIN_PMULHUW,
16452 IX86_BUILTIN_PMULHW,
16453 IX86_BUILTIN_PMULLW,
16454
16455 IX86_BUILTIN_PSADBW,
16456 IX86_BUILTIN_PSHUFW,
16457
16458 IX86_BUILTIN_PSLLW,
16459 IX86_BUILTIN_PSLLD,
16460 IX86_BUILTIN_PSLLQ,
16461 IX86_BUILTIN_PSRAW,
16462 IX86_BUILTIN_PSRAD,
16463 IX86_BUILTIN_PSRLW,
16464 IX86_BUILTIN_PSRLD,
16465 IX86_BUILTIN_PSRLQ,
16466 IX86_BUILTIN_PSLLWI,
16467 IX86_BUILTIN_PSLLDI,
16468 IX86_BUILTIN_PSLLQI,
16469 IX86_BUILTIN_PSRAWI,
16470 IX86_BUILTIN_PSRADI,
16471 IX86_BUILTIN_PSRLWI,
16472 IX86_BUILTIN_PSRLDI,
16473 IX86_BUILTIN_PSRLQI,
16474
16475 IX86_BUILTIN_PUNPCKHBW,
16476 IX86_BUILTIN_PUNPCKHWD,
16477 IX86_BUILTIN_PUNPCKHDQ,
16478 IX86_BUILTIN_PUNPCKLBW,
16479 IX86_BUILTIN_PUNPCKLWD,
16480 IX86_BUILTIN_PUNPCKLDQ,
16481
16482 IX86_BUILTIN_SHUFPS,
16483
16484 IX86_BUILTIN_RCPPS,
16485 IX86_BUILTIN_RCPSS,
16486 IX86_BUILTIN_RSQRTPS,
16487 IX86_BUILTIN_RSQRTSS,
6b889d89 16488 IX86_BUILTIN_RSQRTF,
eb701deb
RH
16489 IX86_BUILTIN_SQRTPS,
16490 IX86_BUILTIN_SQRTSS,
16491
16492 IX86_BUILTIN_UNPCKHPS,
16493 IX86_BUILTIN_UNPCKLPS,
16494
16495 IX86_BUILTIN_ANDPS,
16496 IX86_BUILTIN_ANDNPS,
16497 IX86_BUILTIN_ORPS,
16498 IX86_BUILTIN_XORPS,
16499
16500 IX86_BUILTIN_EMMS,
16501 IX86_BUILTIN_LDMXCSR,
16502 IX86_BUILTIN_STMXCSR,
16503 IX86_BUILTIN_SFENCE,
16504
16505 /* 3DNow! Original */
16506 IX86_BUILTIN_FEMMS,
16507 IX86_BUILTIN_PAVGUSB,
16508 IX86_BUILTIN_PF2ID,
16509 IX86_BUILTIN_PFACC,
16510 IX86_BUILTIN_PFADD,
16511 IX86_BUILTIN_PFCMPEQ,
16512 IX86_BUILTIN_PFCMPGE,
16513 IX86_BUILTIN_PFCMPGT,
16514 IX86_BUILTIN_PFMAX,
16515 IX86_BUILTIN_PFMIN,
16516 IX86_BUILTIN_PFMUL,
16517 IX86_BUILTIN_PFRCP,
16518 IX86_BUILTIN_PFRCPIT1,
16519 IX86_BUILTIN_PFRCPIT2,
16520 IX86_BUILTIN_PFRSQIT1,
16521 IX86_BUILTIN_PFRSQRT,
16522 IX86_BUILTIN_PFSUB,
16523 IX86_BUILTIN_PFSUBR,
16524 IX86_BUILTIN_PI2FD,
16525 IX86_BUILTIN_PMULHRW,
16526
16527 /* 3DNow! Athlon Extensions */
16528 IX86_BUILTIN_PF2IW,
16529 IX86_BUILTIN_PFNACC,
16530 IX86_BUILTIN_PFPNACC,
16531 IX86_BUILTIN_PI2FW,
16532 IX86_BUILTIN_PSWAPDSI,
16533 IX86_BUILTIN_PSWAPDSF,
16534
16535 /* SSE2 */
16536 IX86_BUILTIN_ADDPD,
16537 IX86_BUILTIN_ADDSD,
16538 IX86_BUILTIN_DIVPD,
16539 IX86_BUILTIN_DIVSD,
16540 IX86_BUILTIN_MULPD,
16541 IX86_BUILTIN_MULSD,
16542 IX86_BUILTIN_SUBPD,
16543 IX86_BUILTIN_SUBSD,
16544
16545 IX86_BUILTIN_CMPEQPD,
16546 IX86_BUILTIN_CMPLTPD,
16547 IX86_BUILTIN_CMPLEPD,
16548 IX86_BUILTIN_CMPGTPD,
16549 IX86_BUILTIN_CMPGEPD,
16550 IX86_BUILTIN_CMPNEQPD,
16551 IX86_BUILTIN_CMPNLTPD,
16552 IX86_BUILTIN_CMPNLEPD,
16553 IX86_BUILTIN_CMPNGTPD,
16554 IX86_BUILTIN_CMPNGEPD,
16555 IX86_BUILTIN_CMPORDPD,
16556 IX86_BUILTIN_CMPUNORDPD,
eb701deb
RH
16557 IX86_BUILTIN_CMPEQSD,
16558 IX86_BUILTIN_CMPLTSD,
16559 IX86_BUILTIN_CMPLESD,
16560 IX86_BUILTIN_CMPNEQSD,
16561 IX86_BUILTIN_CMPNLTSD,
16562 IX86_BUILTIN_CMPNLESD,
16563 IX86_BUILTIN_CMPORDSD,
16564 IX86_BUILTIN_CMPUNORDSD,
eb701deb
RH
16565
16566 IX86_BUILTIN_COMIEQSD,
16567 IX86_BUILTIN_COMILTSD,
16568 IX86_BUILTIN_COMILESD,
16569 IX86_BUILTIN_COMIGTSD,
16570 IX86_BUILTIN_COMIGESD,
16571 IX86_BUILTIN_COMINEQSD,
16572 IX86_BUILTIN_UCOMIEQSD,
16573 IX86_BUILTIN_UCOMILTSD,
16574 IX86_BUILTIN_UCOMILESD,
16575 IX86_BUILTIN_UCOMIGTSD,
16576 IX86_BUILTIN_UCOMIGESD,
16577 IX86_BUILTIN_UCOMINEQSD,
16578
16579 IX86_BUILTIN_MAXPD,
16580 IX86_BUILTIN_MAXSD,
16581 IX86_BUILTIN_MINPD,
16582 IX86_BUILTIN_MINSD,
16583
16584 IX86_BUILTIN_ANDPD,
16585 IX86_BUILTIN_ANDNPD,
16586 IX86_BUILTIN_ORPD,
16587 IX86_BUILTIN_XORPD,
16588
16589 IX86_BUILTIN_SQRTPD,
16590 IX86_BUILTIN_SQRTSD,
16591
16592 IX86_BUILTIN_UNPCKHPD,
16593 IX86_BUILTIN_UNPCKLPD,
16594
16595 IX86_BUILTIN_SHUFPD,
16596
16597 IX86_BUILTIN_LOADUPD,
16598 IX86_BUILTIN_STOREUPD,
16599 IX86_BUILTIN_MOVSD,
16600
16601 IX86_BUILTIN_LOADHPD,
16602 IX86_BUILTIN_LOADLPD,
16603
16604 IX86_BUILTIN_CVTDQ2PD,
16605 IX86_BUILTIN_CVTDQ2PS,
16606
16607 IX86_BUILTIN_CVTPD2DQ,
16608 IX86_BUILTIN_CVTPD2PI,
16609 IX86_BUILTIN_CVTPD2PS,
16610 IX86_BUILTIN_CVTTPD2DQ,
16611 IX86_BUILTIN_CVTTPD2PI,
16612
16613 IX86_BUILTIN_CVTPI2PD,
16614 IX86_BUILTIN_CVTSI2SD,
16615 IX86_BUILTIN_CVTSI642SD,
16616
16617 IX86_BUILTIN_CVTSD2SI,
16618 IX86_BUILTIN_CVTSD2SI64,
16619 IX86_BUILTIN_CVTSD2SS,
16620 IX86_BUILTIN_CVTSS2SD,
16621 IX86_BUILTIN_CVTTSD2SI,
16622 IX86_BUILTIN_CVTTSD2SI64,
16623
16624 IX86_BUILTIN_CVTPS2DQ,
16625 IX86_BUILTIN_CVTPS2PD,
16626 IX86_BUILTIN_CVTTPS2DQ,
16627
16628 IX86_BUILTIN_MOVNTI,
16629 IX86_BUILTIN_MOVNTPD,
16630 IX86_BUILTIN_MOVNTDQ,
16631
16632 /* SSE2 MMX */
16633 IX86_BUILTIN_MASKMOVDQU,
16634 IX86_BUILTIN_MOVMSKPD,
16635 IX86_BUILTIN_PMOVMSKB128,
eb701deb
RH
16636
16637 IX86_BUILTIN_PACKSSWB128,
16638 IX86_BUILTIN_PACKSSDW128,
16639 IX86_BUILTIN_PACKUSWB128,
16640
16641 IX86_BUILTIN_PADDB128,
16642 IX86_BUILTIN_PADDW128,
16643 IX86_BUILTIN_PADDD128,
16644 IX86_BUILTIN_PADDQ128,
16645 IX86_BUILTIN_PADDSB128,
16646 IX86_BUILTIN_PADDSW128,
16647 IX86_BUILTIN_PADDUSB128,
16648 IX86_BUILTIN_PADDUSW128,
16649 IX86_BUILTIN_PSUBB128,
16650 IX86_BUILTIN_PSUBW128,
16651 IX86_BUILTIN_PSUBD128,
16652 IX86_BUILTIN_PSUBQ128,
16653 IX86_BUILTIN_PSUBSB128,
16654 IX86_BUILTIN_PSUBSW128,
16655 IX86_BUILTIN_PSUBUSB128,
16656 IX86_BUILTIN_PSUBUSW128,
16657
16658 IX86_BUILTIN_PAND128,
16659 IX86_BUILTIN_PANDN128,
16660 IX86_BUILTIN_POR128,
16661 IX86_BUILTIN_PXOR128,
16662
16663 IX86_BUILTIN_PAVGB128,
16664 IX86_BUILTIN_PAVGW128,
16665
16666 IX86_BUILTIN_PCMPEQB128,
16667 IX86_BUILTIN_PCMPEQW128,
16668 IX86_BUILTIN_PCMPEQD128,
16669 IX86_BUILTIN_PCMPGTB128,
16670 IX86_BUILTIN_PCMPGTW128,
16671 IX86_BUILTIN_PCMPGTD128,
16672
16673 IX86_BUILTIN_PMADDWD128,
16674
16675 IX86_BUILTIN_PMAXSW128,
16676 IX86_BUILTIN_PMAXUB128,
16677 IX86_BUILTIN_PMINSW128,
16678 IX86_BUILTIN_PMINUB128,
16679
16680 IX86_BUILTIN_PMULUDQ,
16681 IX86_BUILTIN_PMULUDQ128,
16682 IX86_BUILTIN_PMULHUW128,
16683 IX86_BUILTIN_PMULHW128,
16684 IX86_BUILTIN_PMULLW128,
16685
16686 IX86_BUILTIN_PSADBW128,
16687 IX86_BUILTIN_PSHUFHW,
16688 IX86_BUILTIN_PSHUFLW,
16689 IX86_BUILTIN_PSHUFD,
16690
eb701deb
RH
16691 IX86_BUILTIN_PSLLDQI128,
16692 IX86_BUILTIN_PSLLWI128,
16693 IX86_BUILTIN_PSLLDI128,
16694 IX86_BUILTIN_PSLLQI128,
16695 IX86_BUILTIN_PSRAWI128,
16696 IX86_BUILTIN_PSRADI128,
16697 IX86_BUILTIN_PSRLDQI128,
16698 IX86_BUILTIN_PSRLWI128,
16699 IX86_BUILTIN_PSRLDI128,
16700 IX86_BUILTIN_PSRLQI128,
16701
24bfafbc
RH
16702 IX86_BUILTIN_PSLLDQ128,
16703 IX86_BUILTIN_PSLLW128,
16704 IX86_BUILTIN_PSLLD128,
16705 IX86_BUILTIN_PSLLQ128,
16706 IX86_BUILTIN_PSRAW128,
16707 IX86_BUILTIN_PSRAD128,
16708 IX86_BUILTIN_PSRLW128,
16709 IX86_BUILTIN_PSRLD128,
16710 IX86_BUILTIN_PSRLQ128,
16711
eb701deb
RH
16712 IX86_BUILTIN_PUNPCKHBW128,
16713 IX86_BUILTIN_PUNPCKHWD128,
16714 IX86_BUILTIN_PUNPCKHDQ128,
16715 IX86_BUILTIN_PUNPCKHQDQ128,
16716 IX86_BUILTIN_PUNPCKLBW128,
16717 IX86_BUILTIN_PUNPCKLWD128,
16718 IX86_BUILTIN_PUNPCKLDQ128,
16719 IX86_BUILTIN_PUNPCKLQDQ128,
16720
16721 IX86_BUILTIN_CLFLUSH,
16722 IX86_BUILTIN_MFENCE,
16723 IX86_BUILTIN_LFENCE,
16724
16725 /* Prescott New Instructions. */
16726 IX86_BUILTIN_ADDSUBPS,
16727 IX86_BUILTIN_HADDPS,
16728 IX86_BUILTIN_HSUBPS,
16729 IX86_BUILTIN_MOVSHDUP,
16730 IX86_BUILTIN_MOVSLDUP,
16731 IX86_BUILTIN_ADDSUBPD,
16732 IX86_BUILTIN_HADDPD,
16733 IX86_BUILTIN_HSUBPD,
16734 IX86_BUILTIN_LDDQU,
16735
16736 IX86_BUILTIN_MONITOR,
16737 IX86_BUILTIN_MWAIT,
16738
b1875f52
L
16739 /* SSSE3. */
16740 IX86_BUILTIN_PHADDW,
16741 IX86_BUILTIN_PHADDD,
16742 IX86_BUILTIN_PHADDSW,
16743 IX86_BUILTIN_PHSUBW,
16744 IX86_BUILTIN_PHSUBD,
16745 IX86_BUILTIN_PHSUBSW,
16746 IX86_BUILTIN_PMADDUBSW,
16747 IX86_BUILTIN_PMULHRSW,
16748 IX86_BUILTIN_PSHUFB,
16749 IX86_BUILTIN_PSIGNB,
16750 IX86_BUILTIN_PSIGNW,
16751 IX86_BUILTIN_PSIGND,
16752 IX86_BUILTIN_PALIGNR,
16753 IX86_BUILTIN_PABSB,
16754 IX86_BUILTIN_PABSW,
16755 IX86_BUILTIN_PABSD,
16756
16757 IX86_BUILTIN_PHADDW128,
16758 IX86_BUILTIN_PHADDD128,
16759 IX86_BUILTIN_PHADDSW128,
16760 IX86_BUILTIN_PHSUBW128,
16761 IX86_BUILTIN_PHSUBD128,
16762 IX86_BUILTIN_PHSUBSW128,
16763 IX86_BUILTIN_PMADDUBSW128,
16764 IX86_BUILTIN_PMULHRSW128,
16765 IX86_BUILTIN_PSHUFB128,
16766 IX86_BUILTIN_PSIGNB128,
16767 IX86_BUILTIN_PSIGNW128,
16768 IX86_BUILTIN_PSIGND128,
16769 IX86_BUILTIN_PALIGNR128,
16770 IX86_BUILTIN_PABSB128,
16771 IX86_BUILTIN_PABSW128,
16772 IX86_BUILTIN_PABSD128,
16773
21efb4d4
HJ
16774 /* AMDFAM10 - SSE4A New Instructions. */
16775 IX86_BUILTIN_MOVNTSD,
16776 IX86_BUILTIN_MOVNTSS,
16777 IX86_BUILTIN_EXTRQI,
16778 IX86_BUILTIN_EXTRQ,
16779 IX86_BUILTIN_INSERTQI,
16780 IX86_BUILTIN_INSERTQ,
16781
9a5cee02
L
16782 /* SSE4.1. */
16783 IX86_BUILTIN_BLENDPD,
16784 IX86_BUILTIN_BLENDPS,
16785 IX86_BUILTIN_BLENDVPD,
16786 IX86_BUILTIN_BLENDVPS,
16787 IX86_BUILTIN_PBLENDVB128,
16788 IX86_BUILTIN_PBLENDW128,
16789
16790 IX86_BUILTIN_DPPD,
16791 IX86_BUILTIN_DPPS,
16792
16793 IX86_BUILTIN_INSERTPS128,
16794
16795 IX86_BUILTIN_MOVNTDQA,
16796 IX86_BUILTIN_MPSADBW128,
16797 IX86_BUILTIN_PACKUSDW128,
16798 IX86_BUILTIN_PCMPEQQ,
16799 IX86_BUILTIN_PHMINPOSUW128,
16800
16801 IX86_BUILTIN_PMAXSB128,
16802 IX86_BUILTIN_PMAXSD128,
16803 IX86_BUILTIN_PMAXUD128,
16804 IX86_BUILTIN_PMAXUW128,
16805
16806 IX86_BUILTIN_PMINSB128,
16807 IX86_BUILTIN_PMINSD128,
16808 IX86_BUILTIN_PMINUD128,
16809 IX86_BUILTIN_PMINUW128,
16810
16811 IX86_BUILTIN_PMOVSXBW128,
16812 IX86_BUILTIN_PMOVSXBD128,
16813 IX86_BUILTIN_PMOVSXBQ128,
16814 IX86_BUILTIN_PMOVSXWD128,
16815 IX86_BUILTIN_PMOVSXWQ128,
16816 IX86_BUILTIN_PMOVSXDQ128,
16817
16818 IX86_BUILTIN_PMOVZXBW128,
16819 IX86_BUILTIN_PMOVZXBD128,
16820 IX86_BUILTIN_PMOVZXBQ128,
16821 IX86_BUILTIN_PMOVZXWD128,
16822 IX86_BUILTIN_PMOVZXWQ128,
16823 IX86_BUILTIN_PMOVZXDQ128,
16824
16825 IX86_BUILTIN_PMULDQ128,
16826 IX86_BUILTIN_PMULLD128,
16827
16828 IX86_BUILTIN_ROUNDPD,
16829 IX86_BUILTIN_ROUNDPS,
16830 IX86_BUILTIN_ROUNDSD,
16831 IX86_BUILTIN_ROUNDSS,
16832
16833 IX86_BUILTIN_PTESTZ,
16834 IX86_BUILTIN_PTESTC,
16835 IX86_BUILTIN_PTESTNZC,
16836
eb701deb
RH
16837 IX86_BUILTIN_VEC_INIT_V2SI,
16838 IX86_BUILTIN_VEC_INIT_V4HI,
16839 IX86_BUILTIN_VEC_INIT_V8QI,
16840 IX86_BUILTIN_VEC_EXT_V2DF,
16841 IX86_BUILTIN_VEC_EXT_V2DI,
16842 IX86_BUILTIN_VEC_EXT_V4SF,
ed9b5396 16843 IX86_BUILTIN_VEC_EXT_V4SI,
eb701deb 16844 IX86_BUILTIN_VEC_EXT_V8HI,
0f2698d0 16845 IX86_BUILTIN_VEC_EXT_V2SI,
eb701deb 16846 IX86_BUILTIN_VEC_EXT_V4HI,
9a5cee02
L
16847 IX86_BUILTIN_VEC_EXT_V16QI,
16848 IX86_BUILTIN_VEC_SET_V2DI,
16849 IX86_BUILTIN_VEC_SET_V4SF,
16850 IX86_BUILTIN_VEC_SET_V4SI,
eb701deb
RH
16851 IX86_BUILTIN_VEC_SET_V8HI,
16852 IX86_BUILTIN_VEC_SET_V4HI,
9a5cee02 16853 IX86_BUILTIN_VEC_SET_V16QI,
eb701deb 16854
b40c4f68
UB
16855 IX86_BUILTIN_VEC_PACK_SFIX,
16856
3b8dd071
L
16857 /* SSE4.2. */
16858 IX86_BUILTIN_CRC32QI,
16859 IX86_BUILTIN_CRC32HI,
16860 IX86_BUILTIN_CRC32SI,
16861 IX86_BUILTIN_CRC32DI,
16862
06f4e35d
L
16863 IX86_BUILTIN_PCMPESTRI128,
16864 IX86_BUILTIN_PCMPESTRM128,
16865 IX86_BUILTIN_PCMPESTRA128,
16866 IX86_BUILTIN_PCMPESTRC128,
16867 IX86_BUILTIN_PCMPESTRO128,
16868 IX86_BUILTIN_PCMPESTRS128,
16869 IX86_BUILTIN_PCMPESTRZ128,
16870 IX86_BUILTIN_PCMPISTRI128,
16871 IX86_BUILTIN_PCMPISTRM128,
16872 IX86_BUILTIN_PCMPISTRA128,
16873 IX86_BUILTIN_PCMPISTRC128,
16874 IX86_BUILTIN_PCMPISTRO128,
16875 IX86_BUILTIN_PCMPISTRS128,
16876 IX86_BUILTIN_PCMPISTRZ128,
16877
3b8dd071
L
16878 IX86_BUILTIN_PCMPGTQ,
16879
edc5bbcd
UB
16880 /* TFmode support builtins. */
16881 IX86_BUILTIN_INFQ,
16882 IX86_BUILTIN_FABSQ,
16883 IX86_BUILTIN_COPYSIGNQ,
16884
eb701deb
RH
16885 IX86_BUILTIN_MAX
16886};
16887
4f7d8547
RG
16888/* Table for the ix86 builtin decls. */
16889static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16890
110abdbc 16891/* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
4f7d8547
RG
16892 * if the target_flags include one of MASK. Stores the function decl
16893 * in the ix86_builtins array.
16894 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16895
16896static inline tree
16897def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16898{
16899 tree decl = NULL_TREE;
16900
0a1c5e55 16901 if (mask & ix86_isa_flags
853a33f3 16902 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
4f7d8547
RG
16903 {
16904 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16905 NULL, NULL_TREE);
16906 ix86_builtins[(int) code] = decl;
16907 }
16908
16909 return decl;
16910}
16911
16912/* Like def_builtin, but also marks the function decl "const". */
16913
16914static inline tree
16915def_builtin_const (int mask, const char *name, tree type,
16916 enum ix86_builtins code)
16917{
16918 tree decl = def_builtin (mask, name, type, code);
16919 if (decl)
16920 TREE_READONLY (decl) = 1;
16921 return decl;
16922}
bd793c65 16923
e358acde
RH
16924/* Bits for builtin_description.flag. */
16925
16926/* Set when we don't support the comparison natively, and should
16927 swap_comparison in order to support it. */
16928#define BUILTIN_DESC_SWAP_OPERANDS 1
16929
bd793c65
BS
16930struct builtin_description
16931{
8b60264b
KG
16932 const unsigned int mask;
16933 const enum insn_code icode;
16934 const char *const name;
16935 const enum ix86_builtins code;
16936 const enum rtx_code comparison;
06f4e35d 16937 const int flag;
bd793c65
BS
16938};
16939
8b60264b 16940static const struct builtin_description bdesc_comi[] =
bd793c65 16941{
853a33f3
UB
16942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
16966};
16967
9a5cee02
L
16968static const struct builtin_description bdesc_ptest[] =
16969{
16970 /* SSE4.1 */
853a33f3
UB
16971 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16972 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
9a5cee02
L
16974};
16975
06f4e35d
L
16976static const struct builtin_description bdesc_pcmpestr[] =
16977{
16978 /* SSE4.2 */
9415ab7d
TN
16979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
16980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
16981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
16982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
16983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
16984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
16985 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
06f4e35d
L
16986};
16987
16988static const struct builtin_description bdesc_pcmpistr[] =
16989{
16990 /* SSE4.2 */
9415ab7d
TN
16991 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
16992 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
16993 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
16994 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
16995 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
16996 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
16997 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
06f4e35d
L
16998};
16999
3b8dd071
L
17000static const struct builtin_description bdesc_crc32[] =
17001{
17002 /* SSE4.2 */
9415ab7d
TN
17003 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17004 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17005 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17006 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
3b8dd071
L
17007};
17008
c7a69424 17009/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
9a5cee02
L
17010static const struct builtin_description bdesc_sse_3arg[] =
17011{
17012 /* SSE4.1 */
9415ab7d
TN
17013 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17014 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17015 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17016 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17017 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17018 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17019 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17020 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17022 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17023 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17024 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
9a5cee02
L
17025};
17026
8b60264b 17027static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
17028{
17029 /* SSE */
9415ab7d
TN
17030 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17031 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17032 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17033 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
853a33f3
UB
17038
17039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17061
9415ab7d
TN
17062 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17063 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
853a33f3 17066
9415ab7d
TN
17067 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17069 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17070 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
853a33f3 17071
9415ab7d
TN
17072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
bd793c65
BS
17077
17078 /* MMX */
9415ab7d
TN
17079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17081 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17082 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17086 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17087
17088 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17089 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17090 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17094 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17095 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17096
17097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17099 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17100
17101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17105
17106 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17107 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17108
17109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17115
17116 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17117 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17118 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17119 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17120
17121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
bd793c65
BS
17127
17128 /* Special. */
9415ab7d
TN
17129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17132
17133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17135 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17136
17137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17143
17144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17150
17151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17155
17156 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
fbe5eb6d
BS
17158
17159 /* SSE2 */
9415ab7d
TN
17160 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17161 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17162 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17163 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
853a33f3
UB
17168
17169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17189
9415ab7d
TN
17190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
853a33f3 17194
9415ab7d
TN
17195 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17197 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
853a33f3 17199
9415ab7d
TN
17200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
fbe5eb6d 17203
b40c4f68
UB
17204 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17205
fbe5eb6d 17206 /* SSE2 MMX */
9415ab7d
TN
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17208 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17209 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17210 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17211 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17212 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17214 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17215
17216 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17218 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17219 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17222 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17224
17225 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17226 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17227
17228 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17231 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17232
17233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17235
17236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17242
17243 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17244 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17245 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17247
17248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17256
17257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17260
17261 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17263
17264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17266
17267 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17270
17271 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17274
17275 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17276 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17277
17278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17279
17280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17281 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
22c7c85e 17284
9e200aaf 17285 /* SSE3 MMX */
9415ab7d
TN
17286 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17287 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17288 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17290 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17291 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
b1875f52
L
17292
17293 /* SSSE3 */
9415ab7d
TN
17294 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17295 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17303 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17313 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17314 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17317 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
9a5cee02
L
17318
17319 /* SSE4.1 */
9415ab7d
TN
17320 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17323 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17324 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17326 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17328 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17329 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17330 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17331 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
3b8dd071
L
17332
17333 /* SSE4.2 */
9415ab7d 17334 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
bd793c65
BS
17335};
17336
8b60264b 17337static const struct builtin_description bdesc_1arg[] =
bd793c65 17338{
9415ab7d
TN
17339 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
fbe5eb6d 17341
9415ab7d
TN
17342 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
fbe5eb6d 17345
9415ab7d
TN
17346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17351 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
fbe5eb6d 17352
9415ab7d
TN
17353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
fbe5eb6d 17355
9415ab7d 17356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
fbe5eb6d 17357
9415ab7d
TN
17358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
bd793c65 17360
9415ab7d
TN
17361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
bd793c65 17366
9415ab7d 17367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
fbe5eb6d 17368
9415ab7d
TN
17369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17371 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17372 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
f02e1358 17373
9415ab7d
TN
17374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
22c7c85e 17377
9e200aaf 17378 /* SSE3 */
9415ab7d
TN
17379 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17380 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
b1875f52
L
17381
17382 /* SSSE3 */
9415ab7d
TN
17383 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17384 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17385 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17386 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
9a5cee02
L
17389
17390 /* SSE4.1 */
9415ab7d
TN
17391 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17392 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17393 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17394 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17396 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17397 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17398 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17399 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17400 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17402 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17403 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
853a33f3
UB
17404
17405 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
9415ab7d
TN
17406 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17407 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
bd793c65
BS
17408};
17409
f6155fda 17410/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
17411 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17412 builtins. */
e37af218 17413static void
b96a374d 17414ix86_init_mmx_sse_builtins (void)
bd793c65 17415{
8b60264b 17416 const struct builtin_description * d;
77ebd435 17417 size_t i;
bd793c65 17418
00c8e9f6 17419 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
4a5eab38
PB
17420 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17421 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
a16da3ae
RH
17422 tree V2DI_type_node
17423 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
4a5eab38
PB
17424 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17425 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17426 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17427 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
00c8e9f6 17428 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
4a5eab38
PB
17429 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17430
bd793c65 17431 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
17432 tree pcchar_type_node = build_pointer_type (
17433 build_type_variant (char_type_node, 1, 0));
bd793c65 17434 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
17435 tree pcfloat_type_node = build_pointer_type (
17436 build_type_variant (float_type_node, 1, 0));
bd793c65 17437 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 17438 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
17439 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17440
17441 /* Comparisons. */
17442 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
17443 = build_function_type_list (integer_type_node,
17444 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 17445 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
17446 = build_function_type_list (V4SI_type_node,
17447 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 17448 /* MMX/SSE/integer conversions. */
bd793c65 17449 tree int_ftype_v4sf
b4de2f7d
AH
17450 = build_function_type_list (integer_type_node,
17451 V4SF_type_node, NULL_TREE);
453ee231
JH
17452 tree int64_ftype_v4sf
17453 = build_function_type_list (long_long_integer_type_node,
17454 V4SF_type_node, NULL_TREE);
bd793c65 17455 tree int_ftype_v8qi
b4de2f7d 17456 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 17457 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
17458 = build_function_type_list (V4SF_type_node,
17459 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
17460 tree v4sf_ftype_v4sf_int64
17461 = build_function_type_list (V4SF_type_node,
17462 V4SF_type_node, long_long_integer_type_node,
17463 NULL_TREE);
bd793c65 17464 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
17465 = build_function_type_list (V4SF_type_node,
17466 V4SF_type_node, V2SI_type_node, NULL_TREE);
eb701deb 17467
bd793c65
BS
17468 /* Miscellaneous. */
17469 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
17470 = build_function_type_list (V8QI_type_node,
17471 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 17472 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
17473 = build_function_type_list (V4HI_type_node,
17474 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 17475 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
17476 = build_function_type_list (V4SF_type_node,
17477 V4SF_type_node, V4SF_type_node,
17478 integer_type_node, NULL_TREE);
bd793c65 17479 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
17480 = build_function_type_list (V2SI_type_node,
17481 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 17482 tree v4hi_ftype_v4hi_int
b4de2f7d 17483 = build_function_type_list (V4HI_type_node,
e7a60f56 17484 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 17485 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
17486 = build_function_type_list (V4HI_type_node,
17487 V4HI_type_node, long_long_unsigned_type_node,
17488 NULL_TREE);
bd793c65 17489 tree v2si_ftype_v2si_di
b4de2f7d
AH
17490 = build_function_type_list (V2SI_type_node,
17491 V2SI_type_node, long_long_unsigned_type_node,
17492 NULL_TREE);
bd793c65 17493 tree void_ftype_void
b4de2f7d 17494 = build_function_type (void_type_node, void_list_node);
bd793c65 17495 tree void_ftype_unsigned
b4de2f7d 17496 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
17497 tree void_ftype_unsigned_unsigned
17498 = build_function_type_list (void_type_node, unsigned_type_node,
17499 unsigned_type_node, NULL_TREE);
17500 tree void_ftype_pcvoid_unsigned_unsigned
17501 = build_function_type_list (void_type_node, const_ptr_type_node,
17502 unsigned_type_node, unsigned_type_node,
17503 NULL_TREE);
bd793c65 17504 tree unsigned_ftype_void
b4de2f7d 17505 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 17506 tree v2si_ftype_v4sf
b4de2f7d 17507 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 17508 /* Loads/stores. */
bd793c65 17509 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
17510 = build_function_type_list (void_type_node,
17511 V8QI_type_node, V8QI_type_node,
17512 pchar_type_node, NULL_TREE);
068f5dea
JH
17513 tree v4sf_ftype_pcfloat
17514 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
17515 /* @@@ the type is bogus */
17516 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 17517 = build_function_type_list (V4SF_type_node,
f8ca7923 17518 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 17519 tree void_ftype_pv2si_v4sf
b4de2f7d 17520 = build_function_type_list (void_type_node,
f8ca7923 17521 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 17522 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
17523 = build_function_type_list (void_type_node,
17524 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 17525 tree void_ftype_pdi_di
b4de2f7d
AH
17526 = build_function_type_list (void_type_node,
17527 pdi_type_node, long_long_unsigned_type_node,
17528 NULL_TREE);
916b60b7 17529 tree void_ftype_pv2di_v2di
b4de2f7d
AH
17530 = build_function_type_list (void_type_node,
17531 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
17532 /* Normal vector unops. */
17533 tree v4sf_ftype_v4sf
b4de2f7d 17534 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
b1875f52
L
17535 tree v16qi_ftype_v16qi
17536 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17537 tree v8hi_ftype_v8hi
17538 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17539 tree v4si_ftype_v4si
17540 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17541 tree v8qi_ftype_v8qi
17542 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17543 tree v4hi_ftype_v4hi
17544 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
0f290768 17545
bd793c65
BS
17546 /* Normal vector binops. */
17547 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
17548 = build_function_type_list (V4SF_type_node,
17549 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 17550 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
17551 = build_function_type_list (V8QI_type_node,
17552 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 17553 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
17554 = build_function_type_list (V4HI_type_node,
17555 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 17556 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
17557 = build_function_type_list (V2SI_type_node,
17558 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 17559 tree di_ftype_di_di
b4de2f7d
AH
17560 = build_function_type_list (long_long_unsigned_type_node,
17561 long_long_unsigned_type_node,
17562 long_long_unsigned_type_node, NULL_TREE);
bd793c65 17563
b1875f52
L
17564 tree di_ftype_di_di_int
17565 = build_function_type_list (long_long_unsigned_type_node,
17566 long_long_unsigned_type_node,
17567 long_long_unsigned_type_node,
17568 integer_type_node, NULL_TREE);
17569
47f339cf 17570 tree v2si_ftype_v2sf
ae3aa00d 17571 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 17572 tree v2sf_ftype_v2si
b4de2f7d 17573 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 17574 tree v2si_ftype_v2si
b4de2f7d 17575 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 17576 tree v2sf_ftype_v2sf
b4de2f7d 17577 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 17578 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
17579 = build_function_type_list (V2SF_type_node,
17580 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 17581 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
17582 = build_function_type_list (V2SI_type_node,
17583 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
17584 tree pint_type_node = build_pointer_type (integer_type_node);
17585 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
17586 tree pcdouble_type_node = build_pointer_type (
17587 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 17588 tree int_ftype_v2df_v2df
b4de2f7d
AH
17589 = build_function_type_list (integer_type_node,
17590 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17591
068f5dea
JH
17592 tree void_ftype_pcvoid
17593 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 17594 tree v4sf_ftype_v4si
b4de2f7d 17595 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 17596 tree v4si_ftype_v4sf
b4de2f7d 17597 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 17598 tree v2df_ftype_v4si
b4de2f7d 17599 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 17600 tree v4si_ftype_v2df
b4de2f7d 17601 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
b40c4f68
UB
17602 tree v4si_ftype_v2df_v2df
17603 = build_function_type_list (V4SI_type_node,
17604 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17605 tree v2si_ftype_v2df
b4de2f7d 17606 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17607 tree v4sf_ftype_v2df
b4de2f7d 17608 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17609 tree v2df_ftype_v2si
b4de2f7d 17610 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 17611 tree v2df_ftype_v4sf
b4de2f7d 17612 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 17613 tree int_ftype_v2df
b4de2f7d 17614 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
17615 tree int64_ftype_v2df
17616 = build_function_type_list (long_long_integer_type_node,
b96a374d 17617 V2DF_type_node, NULL_TREE);
fbe5eb6d 17618 tree v2df_ftype_v2df_int
b4de2f7d
AH
17619 = build_function_type_list (V2DF_type_node,
17620 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
17621 tree v2df_ftype_v2df_int64
17622 = build_function_type_list (V2DF_type_node,
17623 V2DF_type_node, long_long_integer_type_node,
17624 NULL_TREE);
fbe5eb6d 17625 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
17626 = build_function_type_list (V4SF_type_node,
17627 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17628 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
17629 = build_function_type_list (V2DF_type_node,
17630 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 17631 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
17632 = build_function_type_list (V2DF_type_node,
17633 V2DF_type_node, V2DF_type_node,
17634 integer_type_node,
17635 NULL_TREE);
1c47af84 17636 tree v2df_ftype_v2df_pcdouble
b4de2f7d 17637 = build_function_type_list (V2DF_type_node,
1c47af84 17638 V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 17639 tree void_ftype_pdouble_v2df
b4de2f7d
AH
17640 = build_function_type_list (void_type_node,
17641 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17642 tree void_ftype_pint_int
b4de2f7d
AH
17643 = build_function_type_list (void_type_node,
17644 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 17645 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
17646 = build_function_type_list (void_type_node,
17647 V16QI_type_node, V16QI_type_node,
17648 pchar_type_node, NULL_TREE);
068f5dea
JH
17649 tree v2df_ftype_pcdouble
17650 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 17651 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
17652 = build_function_type_list (V2DF_type_node,
17653 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17654 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
17655 = build_function_type_list (V16QI_type_node,
17656 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 17657 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
17658 = build_function_type_list (V8HI_type_node,
17659 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 17660 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
17661 = build_function_type_list (V4SI_type_node,
17662 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 17663 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
17664 = build_function_type_list (V2DI_type_node,
17665 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 17666 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
17667 = build_function_type_list (V2DI_type_node,
17668 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 17669 tree v2df_ftype_v2df
b4de2f7d 17670 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
916b60b7 17671 tree v2di_ftype_v2di_int
b4de2f7d
AH
17672 = build_function_type_list (V2DI_type_node,
17673 V2DI_type_node, integer_type_node, NULL_TREE);
b1875f52
L
17674 tree v2di_ftype_v2di_v2di_int
17675 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17676 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 17677 tree v4si_ftype_v4si_int
b4de2f7d
AH
17678 = build_function_type_list (V4SI_type_node,
17679 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 17680 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
17681 = build_function_type_list (V8HI_type_node,
17682 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 17683 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
17684 = build_function_type_list (V4SI_type_node,
17685 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 17686 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
17687 = build_function_type_list (long_long_unsigned_type_node,
17688 V8QI_type_node, V8QI_type_node, NULL_TREE);
9e9fb0ce
JB
17689 tree di_ftype_v2si_v2si
17690 = build_function_type_list (long_long_unsigned_type_node,
17691 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 17692 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
17693 = build_function_type_list (V2DI_type_node,
17694 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
17695 tree v2di_ftype_v4si_v4si
17696 = build_function_type_list (V2DI_type_node,
17697 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 17698 tree int_ftype_v16qi
b4de2f7d 17699 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
17700 tree v16qi_ftype_pcchar
17701 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
17702 tree void_ftype_pchar_v16qi
17703 = build_function_type_list (void_type_node,
17704 pchar_type_node, V16QI_type_node, NULL_TREE);
47f339cf 17705
21efb4d4
HJ
17706 tree v2di_ftype_v2di_unsigned_unsigned
17707 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17708 unsigned_type_node, unsigned_type_node,
17709 NULL_TREE);
17710 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17711 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17712 unsigned_type_node, unsigned_type_node,
17713 NULL_TREE);
17714 tree v2di_ftype_v2di_v16qi
17715 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17716 NULL_TREE);
9a5cee02
L
17717 tree v2df_ftype_v2df_v2df_v2df
17718 = build_function_type_list (V2DF_type_node,
17719 V2DF_type_node, V2DF_type_node,
17720 V2DF_type_node, NULL_TREE);
17721 tree v4sf_ftype_v4sf_v4sf_v4sf
17722 = build_function_type_list (V4SF_type_node,
17723 V4SF_type_node, V4SF_type_node,
17724 V4SF_type_node, NULL_TREE);
17725 tree v8hi_ftype_v16qi
17726 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17727 NULL_TREE);
17728 tree v4si_ftype_v16qi
17729 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17730 NULL_TREE);
17731 tree v2di_ftype_v16qi
17732 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17733 NULL_TREE);
17734 tree v4si_ftype_v8hi
17735 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17736 NULL_TREE);
17737 tree v2di_ftype_v8hi
17738 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17739 NULL_TREE);
17740 tree v2di_ftype_v4si
17741 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17742 NULL_TREE);
17743 tree v2di_ftype_pv2di
17744 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17745 NULL_TREE);
17746 tree v16qi_ftype_v16qi_v16qi_int
17747 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17748 V16QI_type_node, integer_type_node,
17749 NULL_TREE);
17750 tree v16qi_ftype_v16qi_v16qi_v16qi
17751 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17752 V16QI_type_node, V16QI_type_node,
17753 NULL_TREE);
17754 tree v8hi_ftype_v8hi_v8hi_int
17755 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17756 V8HI_type_node, integer_type_node,
17757 NULL_TREE);
17758 tree v4si_ftype_v4si_v4si_int
17759 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17760 V4SI_type_node, integer_type_node,
17761 NULL_TREE);
17762 tree int_ftype_v2di_v2di
17763 = build_function_type_list (integer_type_node,
17764 V2DI_type_node, V2DI_type_node,
17765 NULL_TREE);
06f4e35d
L
17766 tree int_ftype_v16qi_int_v16qi_int_int
17767 = build_function_type_list (integer_type_node,
17768 V16QI_type_node,
17769 integer_type_node,
17770 V16QI_type_node,
17771 integer_type_node,
17772 integer_type_node,
17773 NULL_TREE);
17774 tree v16qi_ftype_v16qi_int_v16qi_int_int
17775 = build_function_type_list (V16QI_type_node,
17776 V16QI_type_node,
17777 integer_type_node,
17778 V16QI_type_node,
17779 integer_type_node,
17780 integer_type_node,
17781 NULL_TREE);
17782 tree int_ftype_v16qi_v16qi_int
17783 = build_function_type_list (integer_type_node,
17784 V16QI_type_node,
17785 V16QI_type_node,
17786 integer_type_node,
17787 NULL_TREE);
eb701deb 17788 tree ftype;
f8a1ebc6
JH
17789
17790 /* The __float80 type. */
17791 if (TYPE_MODE (long_double_type_node) == XFmode)
17792 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17793 "__float80");
17794 else
17795 {
17796 /* The __float80 type. */
edc5bbcd
UB
17797 tree float80_type_node = make_node (REAL_TYPE);
17798
17799 TYPE_PRECISION (float80_type_node) = 80;
17800 layout_type (float80_type_node);
17801 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17802 "__float80");
f8a1ebc6
JH
17803 }
17804
f749178d
JH
17805 if (TARGET_64BIT)
17806 {
edc5bbcd
UB
17807 tree float128_type_node = make_node (REAL_TYPE);
17808
17809 TYPE_PRECISION (float128_type_node) = 128;
17810 layout_type (float128_type_node);
17811 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17812 "__float128");
17813
17814 /* TFmode support builtins. */
17815 ftype = build_function_type (float128_type_node,
17816 void_list_node);
9c32f507 17817 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
edc5bbcd
UB
17818
17819 ftype = build_function_type_list (float128_type_node,
17820 float128_type_node,
17821 NULL_TREE);
e41ef486 17822 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
edc5bbcd
UB
17823
17824 ftype = build_function_type_list (float128_type_node,
17825 float128_type_node,
17826 float128_type_node,
17827 NULL_TREE);
e41ef486 17828 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
f749178d 17829 }
f8a1ebc6 17830
9a5cee02
L
17831 /* Add all SSE builtins that are more or less simple operations on
17832 three operands. */
17833 for (i = 0, d = bdesc_sse_3arg;
17834 i < ARRAY_SIZE (bdesc_sse_3arg);
17835 i++, d++)
17836 {
17837 /* Use one of the operands; the target can have a different mode for
17838 mask-generating compares. */
17839 enum machine_mode mode;
17840 tree type;
17841
17842 if (d->name == 0)
17843 continue;
17844 mode = insn_data[d->icode].operand[1].mode;
17845
17846 switch (mode)
17847 {
17848 case V16QImode:
17849 type = v16qi_ftype_v16qi_v16qi_int;
17850 break;
17851 case V8HImode:
17852 type = v8hi_ftype_v8hi_v8hi_int;
17853 break;
17854 case V4SImode:
17855 type = v4si_ftype_v4si_v4si_int;
17856 break;
17857 case V2DImode:
17858 type = v2di_ftype_v2di_v2di_int;
17859 break;
17860 case V2DFmode:
17861 type = v2df_ftype_v2df_v2df_int;
17862 break;
17863 case V4SFmode:
17864 type = v4sf_ftype_v4sf_v4sf_int;
17865 break;
17866 default:
17867 gcc_unreachable ();
17868 }
17869
17870 /* Override for variable blends. */
17871 switch (d->icode)
17872 {
17873 case CODE_FOR_sse4_1_blendvpd:
17874 type = v2df_ftype_v2df_v2df_v2df;
17875 break;
17876 case CODE_FOR_sse4_1_blendvps:
17877 type = v4sf_ftype_v4sf_v4sf_v4sf;
17878 break;
17879 case CODE_FOR_sse4_1_pblendvb:
17880 type = v16qi_ftype_v16qi_v16qi_v16qi;
17881 break;
17882 default:
17883 break;
17884 }
17885
e41ef486 17886 def_builtin_const (d->mask, d->name, type, d->code);
9a5cee02
L
17887 }
17888
bd793c65
BS
17889 /* Add all builtins that are more or less simple operations on two
17890 operands. */
ca7558fc 17891 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
17892 {
17893 /* Use one of the operands; the target can have a different mode for
17894 mask-generating compares. */
17895 enum machine_mode mode;
17896 tree type;
17897
17898 if (d->name == 0)
17899 continue;
17900 mode = insn_data[d->icode].operand[1].mode;
17901
bd793c65
BS
17902 switch (mode)
17903 {
fbe5eb6d
BS
17904 case V16QImode:
17905 type = v16qi_ftype_v16qi_v16qi;
17906 break;
17907 case V8HImode:
17908 type = v8hi_ftype_v8hi_v8hi;
17909 break;
17910 case V4SImode:
17911 type = v4si_ftype_v4si_v4si;
17912 break;
17913 case V2DImode:
17914 type = v2di_ftype_v2di_v2di;
17915 break;
17916 case V2DFmode:
17917 type = v2df_ftype_v2df_v2df;
17918 break;
bd793c65
BS
17919 case V4SFmode:
17920 type = v4sf_ftype_v4sf_v4sf;
17921 break;
17922 case V8QImode:
17923 type = v8qi_ftype_v8qi_v8qi;
17924 break;
17925 case V4HImode:
17926 type = v4hi_ftype_v4hi_v4hi;
17927 break;
17928 case V2SImode:
17929 type = v2si_ftype_v2si_v2si;
17930 break;
bd793c65
BS
17931 case DImode:
17932 type = di_ftype_di_di;
17933 break;
17934
17935 default:
d0396b79 17936 gcc_unreachable ();
bd793c65 17937 }
0f290768 17938
bd793c65 17939 /* Override for comparisons. */
ef719a44
RH
17940 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17941 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
bd793c65
BS
17942 type = v4si_ftype_v4sf_v4sf;
17943
ef719a44
RH
17944 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17945 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
fbe5eb6d
BS
17946 type = v2di_ftype_v2df_v2df;
17947
b40c4f68
UB
17948 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
17949 type = v4si_ftype_v2df_v2df;
17950
17951 def_builtin_const (d->mask, d->name, type, d->code);
bd793c65
BS
17952 }
17953
b1875f52
L
17954 /* Add all builtins that are more or less simple operations on 1 operand. */
17955 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17956 {
17957 enum machine_mode mode;
17958 tree type;
17959
17960 if (d->name == 0)
17961 continue;
17962 mode = insn_data[d->icode].operand[1].mode;
17963
17964 switch (mode)
17965 {
17966 case V16QImode:
17967 type = v16qi_ftype_v16qi;
17968 break;
17969 case V8HImode:
17970 type = v8hi_ftype_v8hi;
17971 break;
17972 case V4SImode:
17973 type = v4si_ftype_v4si;
17974 break;
17975 case V2DFmode:
17976 type = v2df_ftype_v2df;
17977 break;
17978 case V4SFmode:
17979 type = v4sf_ftype_v4sf;
17980 break;
17981 case V8QImode:
17982 type = v8qi_ftype_v8qi;
17983 break;
17984 case V4HImode:
17985 type = v4hi_ftype_v4hi;
17986 break;
17987 case V2SImode:
17988 type = v2si_ftype_v2si;
17989 break;
17990
17991 default:
17992 abort ();
17993 }
17994
e41ef486 17995 def_builtin_const (d->mask, d->name, type, d->code);
b1875f52
L
17996 }
17997
06f4e35d
L
17998 /* pcmpestr[im] insns. */
17999 for (i = 0, d = bdesc_pcmpestr;
18000 i < ARRAY_SIZE (bdesc_pcmpestr);
18001 i++, d++)
18002 {
18003 if (d->code == IX86_BUILTIN_PCMPESTRM128)
18004 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
18005 else
18006 ftype = int_ftype_v16qi_int_v16qi_int_int;
e41ef486 18007 def_builtin_const (d->mask, d->name, ftype, d->code);
06f4e35d
L
18008 }
18009
18010 /* pcmpistr[im] insns. */
18011 for (i = 0, d = bdesc_pcmpistr;
18012 i < ARRAY_SIZE (bdesc_pcmpistr);
18013 i++, d++)
18014 {
18015 if (d->code == IX86_BUILTIN_PCMPISTRM128)
18016 ftype = v16qi_ftype_v16qi_v16qi_int;
18017 else
18018 ftype = int_ftype_v16qi_v16qi_int;
e41ef486 18019 def_builtin_const (d->mask, d->name, ftype, d->code);
06f4e35d
L
18020 }
18021
bd793c65 18022 /* Add the remaining MMX insns with somewhat more complicated types. */
853a33f3 18023 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
e41ef486
UB
18024 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
18025 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
18026 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
eeb06b1b 18027
e41ef486
UB
18028 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
18029 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
18030 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
eeb06b1b 18031
e41ef486
UB
18032 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
18033 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
eeb06b1b 18034
e41ef486
UB
18035 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
18036 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 18037
bd793c65 18038 /* comi/ucomi insns. */
ca7558fc 18039 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
853a33f3 18040 if (d->mask == OPTION_MASK_ISA_SSE2)
e41ef486 18041 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
fbe5eb6d 18042 else
e41ef486 18043 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 18044
9a5cee02
L
18045 /* ptest insns. */
18046 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
e41ef486 18047 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
9a5cee02 18048
e41ef486
UB
18049 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18050 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18051 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 18052
853a33f3
UB
18053 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18054 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18055 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18056 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18057 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18058 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18059 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18060 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18061 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18062 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18063 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
37f22004 18064
853a33f3 18065 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
37f22004 18066
853a33f3
UB
18067 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18068 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
37f22004 18069
853a33f3
UB
18070 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18071 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18072 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18073 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
37f22004 18074
e41ef486
UB
18075 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18076 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
853a33f3
UB
18077 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18078 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
37f22004 18079
853a33f3 18080 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
37f22004 18081
e41ef486 18082 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
37f22004 18083
e41ef486
UB
18084 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18085 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8d364104
UB
18086 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18087 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
6b889d89
UB
18088 ftype = build_function_type_list (float_type_node,
18089 float_type_node,
18090 NULL_TREE);
18091 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
853a33f3
UB
18092 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18093 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
37f22004 18094
e41ef486 18095 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 18096
47f339cf 18097 /* Original 3DNow! */
853a33f3 18098 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
e41ef486
UB
18099 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18100 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18101 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18102 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18103 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18104 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18105 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18106 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18107 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18108 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18109 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18110 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18111 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18112 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18113 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18114 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18115 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18116 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18117 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
18118
18119 /* 3DNow! extension as used in the Athlon CPU. */
e41ef486
UB
18120 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18121 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18122 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18123 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18124 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18125 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
47f339cf 18126
fbe5eb6d 18127 /* SSE2 */
853a33f3 18128 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
fbe5eb6d 18129
853a33f3
UB
18130 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18131 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
fbe5eb6d 18132
853a33f3
UB
18133 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18134 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
fbe5eb6d 18135
e41ef486
UB
18136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18137 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
853a33f3
UB
18138 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18139 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18140 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d 18141
e41ef486
UB
18142 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18143 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18144 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18145 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d 18146
853a33f3
UB
18147 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18148 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
fbe5eb6d 18149
e41ef486 18150 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
fbe5eb6d 18151
853a33f3
UB
18152 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18153 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d 18154
853a33f3
UB
18155 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18156 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18157 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18158 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18159 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
fbe5eb6d 18160
853a33f3 18161 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
fbe5eb6d 18162
853a33f3
UB
18163 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18164 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18165 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18166 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d 18167
853a33f3
UB
18168 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18169 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18170 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
fbe5eb6d 18171
853a33f3
UB
18172 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18173 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18174 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18175 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
fbe5eb6d 18176
853a33f3
UB
18177 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18178 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
79f5e442 18179 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 18180
853a33f3
UB
18181 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18182 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
f02e1358 18183
e41ef486
UB
18184 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18185 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
9e9fb0ce 18186
e41ef486
UB
18187 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18188 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18189 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18190 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18191 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18192 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18193 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
916b60b7 18194
e41ef486
UB
18195 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18196 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18197 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18198 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18199 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18200 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18201 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
916b60b7 18202
e41ef486
UB
18203 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18204 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18205 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18206 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
916b60b7 18207
e41ef486 18208 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
18209
18210 /* Prescott New Instructions. */
853a33f3
UB
18211 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18212 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18213 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
eb701deb 18214
b1875f52 18215 /* SSSE3. */
e41ef486
UB
18216 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18217 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
b1875f52 18218
9a5cee02 18219 /* SSE4.1. */
853a33f3 18220 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
e41ef486
UB
18221 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18222 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18223 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18224 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18225 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18226 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18227 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18228 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18229 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18230 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18231 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18232 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18233 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
853a33f3
UB
18234 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18235 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18236 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18237 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
9a5cee02 18238
3b8dd071
L
18239 /* SSE4.2. */
18240 ftype = build_function_type_list (unsigned_type_node,
18241 unsigned_type_node,
18242 unsigned_char_type_node,
18243 NULL_TREE);
e41ef486 18244 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
3b8dd071
L
18245 ftype = build_function_type_list (unsigned_type_node,
18246 unsigned_type_node,
18247 short_unsigned_type_node,
18248 NULL_TREE);
e41ef486 18249 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
3b8dd071
L
18250 ftype = build_function_type_list (unsigned_type_node,
18251 unsigned_type_node,
18252 unsigned_type_node,
18253 NULL_TREE);
e41ef486 18254 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
3b8dd071
L
18255 ftype = build_function_type_list (long_long_unsigned_type_node,
18256 long_long_unsigned_type_node,
18257 long_long_unsigned_type_node,
18258 NULL_TREE);
e41ef486 18259 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
3b8dd071 18260
21efb4d4 18261 /* AMDFAM10 SSE4A New built-ins */
853a33f3
UB
18262 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18263 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
e41ef486
UB
18264 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18265 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18266 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18267 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
21efb4d4 18268
eb701deb
RH
18269 /* Access to the vec_init patterns. */
18270 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18271 integer_type_node, NULL_TREE);
e41ef486 18272 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
eb701deb
RH
18273
18274 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18275 short_integer_type_node,
18276 short_integer_type_node,
18277 short_integer_type_node, NULL_TREE);
e41ef486 18278 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
eb701deb
RH
18279
18280 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18281 char_type_node, char_type_node,
18282 char_type_node, char_type_node,
18283 char_type_node, char_type_node,
18284 char_type_node, NULL_TREE);
e41ef486 18285 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
eb701deb
RH
18286
18287 /* Access to the vec_extract patterns. */
18288 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18289 integer_type_node, NULL_TREE);
e41ef486 18290 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
eb701deb
RH
18291
18292 ftype = build_function_type_list (long_long_integer_type_node,
18293 V2DI_type_node, integer_type_node,
18294 NULL_TREE);
e41ef486 18295 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
eb701deb
RH
18296
18297 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18298 integer_type_node, NULL_TREE);
e41ef486 18299 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
eb701deb 18300
ed9b5396
RH
18301 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18302 integer_type_node, NULL_TREE);
e41ef486 18303 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
ed9b5396 18304
eb701deb
RH
18305 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18306 integer_type_node, NULL_TREE);
e41ef486 18307 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
eb701deb
RH
18308
18309 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18310 integer_type_node, NULL_TREE);
e41ef486 18311 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
eb701deb 18312
0f2698d0
RH
18313 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18314 integer_type_node, NULL_TREE);
e41ef486 18315 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
0f2698d0 18316
9a5cee02
L
18317 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18318 integer_type_node, NULL_TREE);
e41ef486 18319 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
9a5cee02 18320
eb701deb 18321 /* Access to the vec_set patterns. */
9a5cee02
L
18322 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18323 intDI_type_node,
18324 integer_type_node, NULL_TREE);
e41ef486 18325 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
9a5cee02
L
18326
18327 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18328 float_type_node,
18329 integer_type_node, NULL_TREE);
e41ef486 18330 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
9a5cee02
L
18331
18332 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18333 intSI_type_node,
18334 integer_type_node, NULL_TREE);
e41ef486 18335 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
9a5cee02 18336
eb701deb
RH
18337 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18338 intHI_type_node,
18339 integer_type_node, NULL_TREE);
e41ef486 18340 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
5656a184 18341
eb701deb
RH
18342 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18343 intHI_type_node,
18344 integer_type_node, NULL_TREE);
e41ef486 18345 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
9a5cee02
L
18346
18347 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18348 intQI_type_node,
18349 integer_type_node, NULL_TREE);
e41ef486 18350 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
bd793c65
BS
18351}
18352
2ed941ec
RH
18353static void
18354ix86_init_builtins (void)
18355{
18356 if (TARGET_MMX)
18357 ix86_init_mmx_sse_builtins ();
18358}
18359
bd793c65
BS
18360/* Errors in the source file can cause expand_expr to return const0_rtx
18361 where we expect a vector. To avoid crashing, use one of the vector
18362 clear instructions. */
18363static rtx
b96a374d 18364safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65 18365{
ef719a44
RH
18366 if (x == const0_rtx)
18367 x = CONST0_RTX (mode);
bd793c65
BS
18368 return x;
18369}
18370
9a5cee02
L
18371/* Subroutine of ix86_expand_builtin to take care of SSE insns with
18372 4 operands. The third argument must be a constant smaller than 8
18373 bits or xmm0. */
18374
18375static rtx
18376ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18377 rtx target)
18378{
18379 rtx pat;
18380 tree arg0 = CALL_EXPR_ARG (exp, 0);
18381 tree arg1 = CALL_EXPR_ARG (exp, 1);
18382 tree arg2 = CALL_EXPR_ARG (exp, 2);
18383 rtx op0 = expand_normal (arg0);
18384 rtx op1 = expand_normal (arg1);
18385 rtx op2 = expand_normal (arg2);
18386 enum machine_mode tmode = insn_data[icode].operand[0].mode;
c7a69424
UB
18387 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18388 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
cb482895 18389 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
9a5cee02 18390
c7a69424
UB
18391 if (VECTOR_MODE_P (mode1))
18392 op0 = safe_vector_operand (op0, mode1);
18393 if (VECTOR_MODE_P (mode2))
18394 op1 = safe_vector_operand (op1, mode2);
cb482895
UB
18395 if (VECTOR_MODE_P (mode3))
18396 op2 = safe_vector_operand (op2, mode3);
9a5cee02
L
18397
18398 if (optimize
18399 || target == 0
18400 || GET_MODE (target) != tmode
18401 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18402 target = gen_reg_rtx (tmode);
c7a69424
UB
18403
18404 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18405 op0 = copy_to_mode_reg (mode1, op0);
18406 if ((optimize && !register_operand (op1, mode2))
18407 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18408 op1 = copy_to_mode_reg (mode2, op1);
18409
cb482895
UB
18410 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18411 switch (icode)
18412 {
18413 case CODE_FOR_sse4_1_blendvpd:
18414 case CODE_FOR_sse4_1_blendvps:
18415 case CODE_FOR_sse4_1_pblendvb:
c7a69424 18416 op2 = copy_to_mode_reg (mode3, op2);
cb482895 18417 break;
c7a69424 18418
cb482895
UB
18419 case CODE_FOR_sse4_1_roundsd:
18420 case CODE_FOR_sse4_1_roundss:
18421 error ("the third argument must be a 4-bit immediate");
18422 return const0_rtx;
c7a69424 18423
cb482895
UB
18424 default:
18425 error ("the third argument must be an 8-bit immediate");
18426 return const0_rtx;
18427 }
c7a69424 18428
9a5cee02
L
18429 pat = GEN_FCN (icode) (target, op0, op1, op2);
18430 if (! pat)
18431 return 0;
18432 emit_insn (pat);
18433 return target;
18434}
18435
3b8dd071
L
18436/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18437
18438static rtx
18439ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18440{
18441 rtx pat;
18442 tree arg0 = CALL_EXPR_ARG (exp, 0);
18443 tree arg1 = CALL_EXPR_ARG (exp, 1);
18444 rtx op0 = expand_normal (arg0);
18445 rtx op1 = expand_normal (arg1);
18446 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18447 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18448 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18449
18450 if (optimize
18451 || !target
18452 || GET_MODE (target) != tmode
18453 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18454 target = gen_reg_rtx (tmode);
18455
18456 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18457 op0 = copy_to_mode_reg (mode0, op0);
18458 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18459 {
18460 op1 = copy_to_reg (op1);
18461 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18462 }
18463
18464 pat = GEN_FCN (icode) (target, op0, op1);
18465 if (! pat)
18466 return 0;
18467 emit_insn (pat);
18468 return target;
18469}
18470
bd793c65
BS
18471/* Subroutine of ix86_expand_builtin to take care of binop insns. */
18472
18473static rtx
5039610b 18474ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
bd793c65 18475{
ef719a44 18476 rtx pat, xops[3];
5039610b
SL
18477 tree arg0 = CALL_EXPR_ARG (exp, 0);
18478 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
18479 rtx op0 = expand_normal (arg0);
18480 rtx op1 = expand_normal (arg1);
bd793c65
BS
18481 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18482 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18483 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18484
18485 if (VECTOR_MODE_P (mode0))
18486 op0 = safe_vector_operand (op0, mode0);
18487 if (VECTOR_MODE_P (mode1))
18488 op1 = safe_vector_operand (op1, mode1);
18489
e358acde 18490 if (optimize || !target
bd793c65
BS
18491 || GET_MODE (target) != tmode
18492 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18493 target = gen_reg_rtx (tmode);
18494
d9deed68
JH
18495 if (GET_MODE (op1) == SImode && mode1 == TImode)
18496 {
18497 rtx x = gen_reg_rtx (V4SImode);
18498 emit_insn (gen_sse2_loadd (x, op1));
18499 op1 = gen_lowpart (TImode, x);
18500 }
18501
ef719a44 18502 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 18503 op0 = copy_to_mode_reg (mode0, op0);
ef719a44 18504 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
18505 op1 = copy_to_mode_reg (mode1, op1);
18506
eb701deb
RH
18507 /* ??? Using ix86_fixup_binary_operands is problematic when
18508 we've got mismatched modes. Fake it. */
18509
ef719a44
RH
18510 xops[0] = target;
18511 xops[1] = op0;
18512 xops[2] = op1;
59bef189 18513
eb701deb
RH
18514 if (tmode == mode0 && tmode == mode1)
18515 {
18516 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18517 op0 = xops[1];
18518 op1 = xops[2];
18519 }
18520 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18521 {
18522 op0 = force_reg (mode0, op0);
18523 op1 = force_reg (mode1, op1);
18524 target = gen_reg_rtx (tmode);
18525 }
18526
18527 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
18528 if (! pat)
18529 return 0;
18530 emit_insn (pat);
18531 return target;
18532}
18533
18534/* Subroutine of ix86_expand_builtin to take care of stores. */
18535
18536static rtx
5039610b 18537ix86_expand_store_builtin (enum insn_code icode, tree exp)
bd793c65
BS
18538{
18539 rtx pat;
5039610b
SL
18540 tree arg0 = CALL_EXPR_ARG (exp, 0);
18541 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
18542 rtx op0 = expand_normal (arg0);
18543 rtx op1 = expand_normal (arg1);
bd793c65
BS
18544 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18545 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18546
18547 if (VECTOR_MODE_P (mode1))
18548 op1 = safe_vector_operand (op1, mode1);
18549
18550 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 18551 op1 = copy_to_mode_reg (mode1, op1);
59bef189 18552
bd793c65
BS
18553 pat = GEN_FCN (icode) (op0, op1);
18554 if (pat)
18555 emit_insn (pat);
18556 return 0;
18557}
18558
18559/* Subroutine of ix86_expand_builtin to take care of unop insns. */
18560
18561static rtx
5039610b 18562ix86_expand_unop_builtin (enum insn_code icode, tree exp,
b96a374d 18563 rtx target, int do_load)
bd793c65
BS
18564{
18565 rtx pat;
5039610b 18566 tree arg0 = CALL_EXPR_ARG (exp, 0);
84217346 18567 rtx op0 = expand_normal (arg0);
bd793c65
BS
18568 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18569 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18570
e358acde 18571 if (optimize || !target
bd793c65
BS
18572 || GET_MODE (target) != tmode
18573 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18574 target = gen_reg_rtx (tmode);
18575 if (do_load)
18576 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18577 else
18578 {
18579 if (VECTOR_MODE_P (mode0))
18580 op0 = safe_vector_operand (op0, mode0);
18581
e358acde
RH
18582 if ((optimize && !register_operand (op0, mode0))
18583 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65
BS
18584 op0 = copy_to_mode_reg (mode0, op0);
18585 }
18586
9a5cee02
L
18587 switch (icode)
18588 {
18589 case CODE_FOR_sse4_1_roundpd:
18590 case CODE_FOR_sse4_1_roundps:
18591 {
18592 tree arg1 = CALL_EXPR_ARG (exp, 1);
18593 rtx op1 = expand_normal (arg1);
18594 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18595
18596 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18597 {
18598 error ("the second argument must be a 4-bit immediate");
18599 return const0_rtx;
18600 }
18601 pat = GEN_FCN (icode) (target, op0, op1);
18602 }
18603 break;
18604 default:
18605 pat = GEN_FCN (icode) (target, op0);
18606 break;
18607 }
18608
bd793c65
BS
18609 if (! pat)
18610 return 0;
18611 emit_insn (pat);
18612 return target;
18613}
18614
18615/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18616 sqrtss, rsqrtss, rcpss. */
18617
18618static rtx
5039610b 18619ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
bd793c65
BS
18620{
18621 rtx pat;
5039610b 18622 tree arg0 = CALL_EXPR_ARG (exp, 0);
84217346 18623 rtx op1, op0 = expand_normal (arg0);
bd793c65
BS
18624 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18625 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18626
e358acde 18627 if (optimize || !target
bd793c65
BS
18628 || GET_MODE (target) != tmode
18629 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18630 target = gen_reg_rtx (tmode);
18631
18632 if (VECTOR_MODE_P (mode0))
18633 op0 = safe_vector_operand (op0, mode0);
18634
e358acde
RH
18635 if ((optimize && !register_operand (op0, mode0))
18636 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 18637 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 18638
59bef189
RH
18639 op1 = op0;
18640 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18641 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 18642
59bef189 18643 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
18644 if (! pat)
18645 return 0;
18646 emit_insn (pat);
18647 return target;
18648}
18649
18650/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18651
18652static rtx
5039610b 18653ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
b96a374d 18654 rtx target)
bd793c65
BS
18655{
18656 rtx pat;
5039610b
SL
18657 tree arg0 = CALL_EXPR_ARG (exp, 0);
18658 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
18659 rtx op0 = expand_normal (arg0);
18660 rtx op1 = expand_normal (arg1);
bd793c65
BS
18661 rtx op2;
18662 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18663 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18664 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18665 enum rtx_code comparison = d->comparison;
18666
18667 if (VECTOR_MODE_P (mode0))
18668 op0 = safe_vector_operand (op0, mode0);
18669 if (VECTOR_MODE_P (mode1))
18670 op1 = safe_vector_operand (op1, mode1);
18671
18672 /* Swap operands if we have a comparison that isn't available in
18673 hardware. */
e358acde 18674 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65 18675 {
21e1b5f1
BS
18676 rtx tmp = gen_reg_rtx (mode1);
18677 emit_move_insn (tmp, op1);
bd793c65 18678 op1 = op0;
21e1b5f1 18679 op0 = tmp;
bd793c65 18680 }
21e1b5f1 18681
e358acde 18682 if (optimize || !target
21e1b5f1
BS
18683 || GET_MODE (target) != tmode
18684 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
18685 target = gen_reg_rtx (tmode);
18686
e358acde
RH
18687 if ((optimize && !register_operand (op0, mode0))
18688 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
bd793c65 18689 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
18690 if ((optimize && !register_operand (op1, mode1))
18691 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
18692 op1 = copy_to_mode_reg (mode1, op1);
18693
18694 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18695 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18696 if (! pat)
18697 return 0;
18698 emit_insn (pat);
18699 return target;
18700}
18701
18702/* Subroutine of ix86_expand_builtin to take care of comi insns. */
18703
18704static rtx
5039610b 18705ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
b96a374d 18706 rtx target)
bd793c65
BS
18707{
18708 rtx pat;
5039610b
SL
18709 tree arg0 = CALL_EXPR_ARG (exp, 0);
18710 tree arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
18711 rtx op0 = expand_normal (arg0);
18712 rtx op1 = expand_normal (arg1);
bd793c65
BS
18713 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18714 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18715 enum rtx_code comparison = d->comparison;
18716
18717 if (VECTOR_MODE_P (mode0))
18718 op0 = safe_vector_operand (op0, mode0);
18719 if (VECTOR_MODE_P (mode1))
18720 op1 = safe_vector_operand (op1, mode1);
18721
18722 /* Swap operands if we have a comparison that isn't available in
18723 hardware. */
e358acde 18724 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65
BS
18725 {
18726 rtx tmp = op1;
18727 op1 = op0;
18728 op0 = tmp;
bd793c65
BS
18729 }
18730
18731 target = gen_reg_rtx (SImode);
18732 emit_move_insn (target, const0_rtx);
18733 target = gen_rtx_SUBREG (QImode, target, 0);
18734
e358acde
RH
18735 if ((optimize && !register_operand (op0, mode0))
18736 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
bd793c65 18737 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
18738 if ((optimize && !register_operand (op1, mode1))
18739 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
18740 op1 = copy_to_mode_reg (mode1, op1);
18741
1194ca05 18742 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
18743 if (! pat)
18744 return 0;
18745 emit_insn (pat);
29628f27
BS
18746 emit_insn (gen_rtx_SET (VOIDmode,
18747 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18748 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 18749 SET_DEST (pat),
29628f27 18750 const0_rtx)));
bd793c65 18751
6f1a6c5b 18752 return SUBREG_REG (target);
bd793c65
BS
18753}
18754
9a5cee02
L
18755/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18756
18757static rtx
18758ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18759 rtx target)
18760{
18761 rtx pat;
18762 tree arg0 = CALL_EXPR_ARG (exp, 0);
18763 tree arg1 = CALL_EXPR_ARG (exp, 1);
18764 rtx op0 = expand_normal (arg0);
18765 rtx op1 = expand_normal (arg1);
18766 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18767 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18768 enum rtx_code comparison = d->comparison;
18769
18770 if (VECTOR_MODE_P (mode0))
18771 op0 = safe_vector_operand (op0, mode0);
18772 if (VECTOR_MODE_P (mode1))
18773 op1 = safe_vector_operand (op1, mode1);
18774
18775 target = gen_reg_rtx (SImode);
18776 emit_move_insn (target, const0_rtx);
18777 target = gen_rtx_SUBREG (QImode, target, 0);
18778
18779 if ((optimize && !register_operand (op0, mode0))
18780 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18781 op0 = copy_to_mode_reg (mode0, op0);
18782 if ((optimize && !register_operand (op1, mode1))
18783 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18784 op1 = copy_to_mode_reg (mode1, op1);
18785
18786 pat = GEN_FCN (d->icode) (op0, op1);
18787 if (! pat)
18788 return 0;
18789 emit_insn (pat);
18790 emit_insn (gen_rtx_SET (VOIDmode,
18791 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18792 gen_rtx_fmt_ee (comparison, QImode,
18793 SET_DEST (pat),
18794 const0_rtx)));
18795
18796 return SUBREG_REG (target);
18797}
18798
06f4e35d
L
18799/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18800
18801static rtx
18802ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18803 tree exp, rtx target)
18804{
18805 rtx pat;
18806 tree arg0 = CALL_EXPR_ARG (exp, 0);
18807 tree arg1 = CALL_EXPR_ARG (exp, 1);
18808 tree arg2 = CALL_EXPR_ARG (exp, 2);
18809 tree arg3 = CALL_EXPR_ARG (exp, 3);
18810 tree arg4 = CALL_EXPR_ARG (exp, 4);
18811 rtx scratch0, scratch1;
18812 rtx op0 = expand_normal (arg0);
18813 rtx op1 = expand_normal (arg1);
18814 rtx op2 = expand_normal (arg2);
18815 rtx op3 = expand_normal (arg3);
18816 rtx op4 = expand_normal (arg4);
18817 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18818
18819 tmode0 = insn_data[d->icode].operand[0].mode;
18820 tmode1 = insn_data[d->icode].operand[1].mode;
18821 modev2 = insn_data[d->icode].operand[2].mode;
18822 modei3 = insn_data[d->icode].operand[3].mode;
18823 modev4 = insn_data[d->icode].operand[4].mode;
18824 modei5 = insn_data[d->icode].operand[5].mode;
18825 modeimm = insn_data[d->icode].operand[6].mode;
18826
18827 if (VECTOR_MODE_P (modev2))
18828 op0 = safe_vector_operand (op0, modev2);
18829 if (VECTOR_MODE_P (modev4))
18830 op2 = safe_vector_operand (op2, modev4);
18831
c7a69424 18832 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
06f4e35d 18833 op0 = copy_to_mode_reg (modev2, op0);
c7a69424 18834 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
06f4e35d
L
18835 op1 = copy_to_mode_reg (modei3, op1);
18836 if ((optimize && !register_operand (op2, modev4))
18837 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18838 op2 = copy_to_mode_reg (modev4, op2);
c7a69424 18839 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
06f4e35d
L
18840 op3 = copy_to_mode_reg (modei5, op3);
18841
18842 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18843 {
18844 error ("the fifth argument must be a 8-bit immediate");
18845 return const0_rtx;
18846 }
18847
18848 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18849 {
18850 if (optimize || !target
18851 || GET_MODE (target) != tmode0
18852 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18853 target = gen_reg_rtx (tmode0);
18854
18855 scratch1 = gen_reg_rtx (tmode1);
18856
18857 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18858 }
18859 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18860 {
18861 if (optimize || !target
18862 || GET_MODE (target) != tmode1
18863 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18864 target = gen_reg_rtx (tmode1);
18865
18866 scratch0 = gen_reg_rtx (tmode0);
18867
18868 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18869 }
18870 else
18871 {
18872 gcc_assert (d->flag);
18873
18874 scratch0 = gen_reg_rtx (tmode0);
18875 scratch1 = gen_reg_rtx (tmode1);
18876
18877 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18878 }
18879
18880 if (! pat)
18881 return 0;
18882
18883 emit_insn (pat);
18884
18885 if (d->flag)
18886 {
18887 target = gen_reg_rtx (SImode);
18888 emit_move_insn (target, const0_rtx);
18889 target = gen_rtx_SUBREG (QImode, target, 0);
18890
18891 emit_insn
18892 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18893 gen_rtx_fmt_ee (EQ, QImode,
9415ab7d
TN
18894 gen_rtx_REG ((enum machine_mode) d->flag,
18895 FLAGS_REG),
06f4e35d
L
18896 const0_rtx)));
18897 return SUBREG_REG (target);
18898 }
18899 else
18900 return target;
18901}
18902
18903
18904/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18905
18906static rtx
18907ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18908 tree exp, rtx target)
18909{
18910 rtx pat;
18911 tree arg0 = CALL_EXPR_ARG (exp, 0);
18912 tree arg1 = CALL_EXPR_ARG (exp, 1);
18913 tree arg2 = CALL_EXPR_ARG (exp, 2);
18914 rtx scratch0, scratch1;
18915 rtx op0 = expand_normal (arg0);
18916 rtx op1 = expand_normal (arg1);
18917 rtx op2 = expand_normal (arg2);
18918 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18919
18920 tmode0 = insn_data[d->icode].operand[0].mode;
18921 tmode1 = insn_data[d->icode].operand[1].mode;
18922 modev2 = insn_data[d->icode].operand[2].mode;
18923 modev3 = insn_data[d->icode].operand[3].mode;
18924 modeimm = insn_data[d->icode].operand[4].mode;
18925
18926 if (VECTOR_MODE_P (modev2))
18927 op0 = safe_vector_operand (op0, modev2);
18928 if (VECTOR_MODE_P (modev3))
18929 op1 = safe_vector_operand (op1, modev3);
18930
c7a69424 18931 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
06f4e35d
L
18932 op0 = copy_to_mode_reg (modev2, op0);
18933 if ((optimize && !register_operand (op1, modev3))
18934 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18935 op1 = copy_to_mode_reg (modev3, op1);
18936
18937 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18938 {
18939 error ("the third argument must be a 8-bit immediate");
18940 return const0_rtx;
18941 }
18942
18943 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18944 {
18945 if (optimize || !target
18946 || GET_MODE (target) != tmode0
18947 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18948 target = gen_reg_rtx (tmode0);
18949
18950 scratch1 = gen_reg_rtx (tmode1);
18951
18952 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18953 }
18954 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18955 {
18956 if (optimize || !target
18957 || GET_MODE (target) != tmode1
18958 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18959 target = gen_reg_rtx (tmode1);
18960
18961 scratch0 = gen_reg_rtx (tmode0);
18962
18963 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18964 }
18965 else
18966 {
18967 gcc_assert (d->flag);
18968
18969 scratch0 = gen_reg_rtx (tmode0);
18970 scratch1 = gen_reg_rtx (tmode1);
18971
18972 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
18973 }
18974
18975 if (! pat)
18976 return 0;
18977
18978 emit_insn (pat);
18979
18980 if (d->flag)
18981 {
18982 target = gen_reg_rtx (SImode);
18983 emit_move_insn (target, const0_rtx);
18984 target = gen_rtx_SUBREG (QImode, target, 0);
18985
18986 emit_insn
18987 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18988 gen_rtx_fmt_ee (EQ, QImode,
9415ab7d
TN
18989 gen_rtx_REG ((enum machine_mode) d->flag,
18990 FLAGS_REG),
06f4e35d
L
18991 const0_rtx)));
18992 return SUBREG_REG (target);
18993 }
18994 else
18995 return target;
18996}
18997
eb701deb
RH
18998/* Return the integer constant in ARG. Constrain it to be in the range
18999 of the subparts of VEC_TYPE; issue an error if not. */
19000
19001static int
19002get_element_number (tree vec_type, tree arg)
19003{
19004 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
19005
19006 if (!host_integerp (arg, 1)
19007 || (elt = tree_low_cst (arg, 1), elt > max))
19008 {
ea40ba9c 19009 error ("selector must be an integer constant in the range 0..%wi", max);
eb701deb
RH
19010 return 0;
19011 }
19012
19013 return elt;
19014}
19015
19016/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19017 ix86_expand_vector_init. We DO have language-level syntax for this, in
19018 the form of (type){ init-list }. Except that since we can't place emms
19019 instructions from inside the compiler, we can't allow the use of MMX
19020 registers unless the user explicitly asks for it. So we do *not* define
19021 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
5656a184 19022 we have builtins invoked by mmintrin.h that gives us license to emit
eb701deb
RH
19023 these sorts of instructions. */
19024
19025static rtx
5039610b 19026ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
eb701deb
RH
19027{
19028 enum machine_mode tmode = TYPE_MODE (type);
19029 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
19030 int i, n_elt = GET_MODE_NUNITS (tmode);
19031 rtvec v = rtvec_alloc (n_elt);
19032
19033 gcc_assert (VECTOR_MODE_P (tmode));
5039610b 19034 gcc_assert (call_expr_nargs (exp) == n_elt);
eb701deb 19035
5039610b 19036 for (i = 0; i < n_elt; ++i)
eb701deb 19037 {
5039610b 19038 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
eb701deb
RH
19039 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19040 }
19041
eb701deb
RH
19042 if (!target || !register_operand (target, tmode))
19043 target = gen_reg_rtx (tmode);
19044
19045 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19046 return target;
19047}
19048
19049/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19050 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19051 had a language-level syntax for referencing vector elements. */
19052
19053static rtx
5039610b 19054ix86_expand_vec_ext_builtin (tree exp, rtx target)
eb701deb
RH
19055{
19056 enum machine_mode tmode, mode0;
19057 tree arg0, arg1;
19058 int elt;
19059 rtx op0;
19060
5039610b
SL
19061 arg0 = CALL_EXPR_ARG (exp, 0);
19062 arg1 = CALL_EXPR_ARG (exp, 1);
eb701deb 19063
84217346 19064 op0 = expand_normal (arg0);
eb701deb
RH
19065 elt = get_element_number (TREE_TYPE (arg0), arg1);
19066
19067 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19068 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19069 gcc_assert (VECTOR_MODE_P (mode0));
19070
19071 op0 = force_reg (mode0, op0);
19072
19073 if (optimize || !target || !register_operand (target, tmode))
19074 target = gen_reg_rtx (tmode);
19075
19076 ix86_expand_vector_extract (true, target, op0, elt);
19077
19078 return target;
19079}
19080
19081/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19082 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19083 a language-level syntax for referencing vector elements. */
19084
19085static rtx
5039610b 19086ix86_expand_vec_set_builtin (tree exp)
eb701deb
RH
19087{
19088 enum machine_mode tmode, mode1;
19089 tree arg0, arg1, arg2;
19090 int elt;
7bb4a6be 19091 rtx op0, op1, target;
eb701deb 19092
5039610b
SL
19093 arg0 = CALL_EXPR_ARG (exp, 0);
19094 arg1 = CALL_EXPR_ARG (exp, 1);
19095 arg2 = CALL_EXPR_ARG (exp, 2);
eb701deb
RH
19096
19097 tmode = TYPE_MODE (TREE_TYPE (arg0));
19098 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19099 gcc_assert (VECTOR_MODE_P (tmode));
19100
9415ab7d
TN
19101 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19102 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
eb701deb
RH
19103 elt = get_element_number (TREE_TYPE (arg0), arg2);
19104
19105 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19106 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19107
19108 op0 = force_reg (tmode, op0);
19109 op1 = force_reg (mode1, op1);
19110
7bb4a6be 19111 /* OP0 is the source of these builtin functions and shouldn't be
9cb116cb 19112 modified. Create a copy, use it and return it as target. */
7bb4a6be
L
19113 target = gen_reg_rtx (tmode);
19114 emit_move_insn (target, op0);
19115 ix86_expand_vector_set (true, target, op1, elt);
eb701deb 19116
7bb4a6be 19117 return target;
eb701deb
RH
19118}
19119
bd793c65
BS
19120/* Expand an expression EXP that calls a built-in function,
19121 with result going to TARGET if that's convenient
19122 (and in mode MODE if that's convenient).
19123 SUBTARGET may be used as the target for computing one of EXP's operands.
19124 IGNORE is nonzero if the value is to be ignored. */
19125
eb701deb 19126static rtx
b96a374d
AJ
19127ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19128 enum machine_mode mode ATTRIBUTE_UNUSED,
19129 int ignore ATTRIBUTE_UNUSED)
bd793c65 19130{
8b60264b 19131 const struct builtin_description *d;
77ebd435 19132 size_t i;
bd793c65 19133 enum insn_code icode;
5039610b 19134 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21efb4d4
HJ
19135 tree arg0, arg1, arg2, arg3;
19136 rtx op0, op1, op2, op3, pat;
19137 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
8752c357 19138 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
19139
19140 switch (fcode)
19141 {
19142 case IX86_BUILTIN_EMMS:
80e8bb90 19143 emit_insn (gen_mmx_emms ());
bd793c65
BS
19144 return 0;
19145
19146 case IX86_BUILTIN_SFENCE:
80e8bb90 19147 emit_insn (gen_sse_sfence ());
bd793c65
BS
19148 return 0;
19149
bd793c65 19150 case IX86_BUILTIN_MASKMOVQ:
077084dd 19151 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d 19152 icode = (fcode == IX86_BUILTIN_MASKMOVQ
80e8bb90 19153 ? CODE_FOR_mmx_maskmovq
ef719a44 19154 : CODE_FOR_sse2_maskmovdqu);
bd793c65 19155 /* Note the arg order is different from the operand order. */
5039610b
SL
19156 arg1 = CALL_EXPR_ARG (exp, 0);
19157 arg2 = CALL_EXPR_ARG (exp, 1);
19158 arg0 = CALL_EXPR_ARG (exp, 2);
84217346
MD
19159 op0 = expand_normal (arg0);
19160 op1 = expand_normal (arg1);
19161 op2 = expand_normal (arg2);
bd793c65
BS
19162 mode0 = insn_data[icode].operand[0].mode;
19163 mode1 = insn_data[icode].operand[1].mode;
19164 mode2 = insn_data[icode].operand[2].mode;
19165
80e8bb90
RH
19166 op0 = force_reg (Pmode, op0);
19167 op0 = gen_rtx_MEM (mode1, op0);
ef719a44 19168
5c464583 19169 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
19170 op0 = copy_to_mode_reg (mode0, op0);
19171 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19172 op1 = copy_to_mode_reg (mode1, op1);
19173 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19174 op2 = copy_to_mode_reg (mode2, op2);
19175 pat = GEN_FCN (icode) (op0, op1, op2);
19176 if (! pat)
19177 return 0;
19178 emit_insn (pat);
19179 return 0;
19180
6b889d89
UB
19181 case IX86_BUILTIN_RSQRTF:
19182 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19183
bd793c65 19184 case IX86_BUILTIN_SQRTSS:
5039610b 19185 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
bd793c65 19186 case IX86_BUILTIN_RSQRTSS:
5039610b 19187 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
bd793c65 19188 case IX86_BUILTIN_RCPSS:
5039610b 19189 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
bd793c65 19190
bd793c65 19191 case IX86_BUILTIN_LOADUPS:
5039610b 19192 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
bd793c65 19193
bd793c65 19194 case IX86_BUILTIN_STOREUPS:
5039610b 19195 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
bd793c65 19196
0f290768 19197 case IX86_BUILTIN_LOADHPS:
bd793c65 19198 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
19199 case IX86_BUILTIN_LOADHPD:
19200 case IX86_BUILTIN_LOADLPD:
2cdb3148
RH
19201 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19202 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
1c47af84
RH
19203 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19204 : CODE_FOR_sse2_loadlpd);
5039610b
SL
19205 arg0 = CALL_EXPR_ARG (exp, 0);
19206 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
19207 op0 = expand_normal (arg0);
19208 op1 = expand_normal (arg1);
bd793c65
BS
19209 tmode = insn_data[icode].operand[0].mode;
19210 mode0 = insn_data[icode].operand[1].mode;
19211 mode1 = insn_data[icode].operand[2].mode;
19212
e358acde 19213 op0 = force_reg (mode0, op0);
bd793c65 19214 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
e358acde 19215 if (optimize || target == 0
bd793c65 19216 || GET_MODE (target) != tmode
e358acde 19217 || !register_operand (target, tmode))
bd793c65
BS
19218 target = gen_reg_rtx (tmode);
19219 pat = GEN_FCN (icode) (target, op0, op1);
19220 if (! pat)
19221 return 0;
19222 emit_insn (pat);
19223 return target;
0f290768 19224
bd793c65
BS
19225 case IX86_BUILTIN_STOREHPS:
19226 case IX86_BUILTIN_STORELPS:
2cdb3148 19227 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
eb701deb 19228 : CODE_FOR_sse_storelps);
5039610b
SL
19229 arg0 = CALL_EXPR_ARG (exp, 0);
19230 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
19231 op0 = expand_normal (arg0);
19232 op1 = expand_normal (arg1);
1c47af84
RH
19233 mode0 = insn_data[icode].operand[0].mode;
19234 mode1 = insn_data[icode].operand[1].mode;
19235
19236 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
e358acde 19237 op1 = force_reg (mode1, op1);
1c47af84
RH
19238
19239 pat = GEN_FCN (icode) (op0, op1);
19240 if (! pat)
19241 return 0;
19242 emit_insn (pat);
19243 return const0_rtx;
bd793c65
BS
19244
19245 case IX86_BUILTIN_MOVNTPS:
5039610b 19246 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
bd793c65 19247 case IX86_BUILTIN_MOVNTQ:
5039610b 19248 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
bd793c65
BS
19249
19250 case IX86_BUILTIN_LDMXCSR:
5039610b 19251 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
80dcd3aa 19252 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
bd793c65 19253 emit_move_insn (target, op0);
80e8bb90 19254 emit_insn (gen_sse_ldmxcsr (target));
bd793c65
BS
19255 return 0;
19256
19257 case IX86_BUILTIN_STMXCSR:
80dcd3aa 19258 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
80e8bb90 19259 emit_insn (gen_sse_stmxcsr (target));
bd793c65
BS
19260 return copy_to_mode_reg (SImode, target);
19261
bd793c65 19262 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
19263 case IX86_BUILTIN_SHUFPD:
19264 icode = (fcode == IX86_BUILTIN_SHUFPS
19265 ? CODE_FOR_sse_shufps
19266 : CODE_FOR_sse2_shufpd);
5039610b
SL
19267 arg0 = CALL_EXPR_ARG (exp, 0);
19268 arg1 = CALL_EXPR_ARG (exp, 1);
19269 arg2 = CALL_EXPR_ARG (exp, 2);
84217346
MD
19270 op0 = expand_normal (arg0);
19271 op1 = expand_normal (arg1);
19272 op2 = expand_normal (arg2);
bd793c65
BS
19273 tmode = insn_data[icode].operand[0].mode;
19274 mode0 = insn_data[icode].operand[1].mode;
19275 mode1 = insn_data[icode].operand[2].mode;
19276 mode2 = insn_data[icode].operand[3].mode;
19277
19278 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19279 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
19280 if ((optimize && !register_operand (op1, mode1))
19281 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
19282 op1 = copy_to_mode_reg (mode1, op1);
19283 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19284 {
19285 /* @@@ better error message */
19286 error ("mask must be an immediate");
6f1a6c5b 19287 return gen_reg_rtx (tmode);
bd793c65 19288 }
e358acde 19289 if (optimize || target == 0
bd793c65
BS
19290 || GET_MODE (target) != tmode
19291 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19292 target = gen_reg_rtx (tmode);
19293 pat = GEN_FCN (icode) (target, op0, op1, op2);
19294 if (! pat)
19295 return 0;
19296 emit_insn (pat);
19297 return target;
19298
19299 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
19300 case IX86_BUILTIN_PSHUFD:
19301 case IX86_BUILTIN_PSHUFHW:
19302 case IX86_BUILTIN_PSHUFLW:
19303 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19304 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19305 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19306 : CODE_FOR_mmx_pshufw);
5039610b
SL
19307 arg0 = CALL_EXPR_ARG (exp, 0);
19308 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
19309 op0 = expand_normal (arg0);
19310 op1 = expand_normal (arg1);
bd793c65 19311 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
19312 mode1 = insn_data[icode].operand[1].mode;
19313 mode2 = insn_data[icode].operand[2].mode;
bd793c65 19314
29628f27
BS
19315 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19316 op0 = copy_to_mode_reg (mode1, op0);
19317 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
19318 {
19319 /* @@@ better error message */
19320 error ("mask must be an immediate");
19321 return const0_rtx;
19322 }
19323 if (target == 0
19324 || GET_MODE (target) != tmode
19325 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19326 target = gen_reg_rtx (tmode);
29628f27 19327 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
19328 if (! pat)
19329 return 0;
19330 emit_insn (pat);
19331 return target;
19332
24bfafbc
RH
19333 case IX86_BUILTIN_PSLLWI128:
19334 icode = CODE_FOR_ashlv8hi3;
19335 goto do_pshifti;
19336 case IX86_BUILTIN_PSLLDI128:
19337 icode = CODE_FOR_ashlv4si3;
19338 goto do_pshifti;
19339 case IX86_BUILTIN_PSLLQI128:
19340 icode = CODE_FOR_ashlv2di3;
19341 goto do_pshifti;
19342 case IX86_BUILTIN_PSRAWI128:
19343 icode = CODE_FOR_ashrv8hi3;
19344 goto do_pshifti;
19345 case IX86_BUILTIN_PSRADI128:
19346 icode = CODE_FOR_ashrv4si3;
19347 goto do_pshifti;
19348 case IX86_BUILTIN_PSRLWI128:
19349 icode = CODE_FOR_lshrv8hi3;
19350 goto do_pshifti;
19351 case IX86_BUILTIN_PSRLDI128:
19352 icode = CODE_FOR_lshrv4si3;
19353 goto do_pshifti;
19354 case IX86_BUILTIN_PSRLQI128:
19355 icode = CODE_FOR_lshrv2di3;
19356 goto do_pshifti;
19357 do_pshifti:
19358 arg0 = CALL_EXPR_ARG (exp, 0);
19359 arg1 = CALL_EXPR_ARG (exp, 1);
19360 op0 = expand_normal (arg0);
19361 op1 = expand_normal (arg1);
19362
19363 if (!CONST_INT_P (op1))
19364 {
19365 error ("shift must be an immediate");
19366 return const0_rtx;
19367 }
19368 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19369 op1 = GEN_INT (255);
19370
19371 tmode = insn_data[icode].operand[0].mode;
19372 mode1 = insn_data[icode].operand[1].mode;
19373 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19374 op0 = copy_to_reg (op0);
19375
19376 target = gen_reg_rtx (tmode);
19377 pat = GEN_FCN (icode) (target, op0, op1);
19378 if (!pat)
19379 return 0;
19380 emit_insn (pat);
19381 return target;
19382
19383 case IX86_BUILTIN_PSLLW128:
19384 icode = CODE_FOR_ashlv8hi3;
19385 goto do_pshift;
19386 case IX86_BUILTIN_PSLLD128:
19387 icode = CODE_FOR_ashlv4si3;
19388 goto do_pshift;
19389 case IX86_BUILTIN_PSLLQ128:
19390 icode = CODE_FOR_ashlv2di3;
19391 goto do_pshift;
19392 case IX86_BUILTIN_PSRAW128:
19393 icode = CODE_FOR_ashrv8hi3;
19394 goto do_pshift;
19395 case IX86_BUILTIN_PSRAD128:
19396 icode = CODE_FOR_ashrv4si3;
19397 goto do_pshift;
19398 case IX86_BUILTIN_PSRLW128:
19399 icode = CODE_FOR_lshrv8hi3;
19400 goto do_pshift;
19401 case IX86_BUILTIN_PSRLD128:
19402 icode = CODE_FOR_lshrv4si3;
19403 goto do_pshift;
19404 case IX86_BUILTIN_PSRLQ128:
19405 icode = CODE_FOR_lshrv2di3;
19406 goto do_pshift;
19407 do_pshift:
19408 arg0 = CALL_EXPR_ARG (exp, 0);
19409 arg1 = CALL_EXPR_ARG (exp, 1);
19410 op0 = expand_normal (arg0);
19411 op1 = expand_normal (arg1);
19412
19413 tmode = insn_data[icode].operand[0].mode;
19414 mode1 = insn_data[icode].operand[1].mode;
19415
19416 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19417 op0 = copy_to_reg (op0);
19418
19419 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19420 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19421 op1 = copy_to_reg (op1);
19422
19423 target = gen_reg_rtx (tmode);
19424 pat = GEN_FCN (icode) (target, op0, op1);
19425 if (!pat)
19426 return 0;
19427 emit_insn (pat);
19428 return target;
19429
ab3146fd
ZD
19430 case IX86_BUILTIN_PSLLDQI128:
19431 case IX86_BUILTIN_PSRLDQI128:
24bfafbc 19432 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
ab3146fd 19433 : CODE_FOR_sse2_lshrti3);
5039610b
SL
19434 arg0 = CALL_EXPR_ARG (exp, 0);
19435 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
19436 op0 = expand_normal (arg0);
19437 op1 = expand_normal (arg1);
ab3146fd
ZD
19438 tmode = insn_data[icode].operand[0].mode;
19439 mode1 = insn_data[icode].operand[1].mode;
19440 mode2 = insn_data[icode].operand[2].mode;
19441
19442 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19443 {
19444 op0 = copy_to_reg (op0);
19445 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19446 }
19447 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19448 {
19449 error ("shift must be an immediate");
19450 return const0_rtx;
19451 }
19452 target = gen_reg_rtx (V2DImode);
24bfafbc
RH
19453 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19454 op0, op1);
ab3146fd
ZD
19455 if (! pat)
19456 return 0;
19457 emit_insn (pat);
19458 return target;
19459
47f339cf 19460 case IX86_BUILTIN_FEMMS:
80e8bb90 19461 emit_insn (gen_mmx_femms ());
47f339cf
BS
19462 return NULL_RTX;
19463
19464 case IX86_BUILTIN_PAVGUSB:
5039610b 19465 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
47f339cf
BS
19466
19467 case IX86_BUILTIN_PF2ID:
5039610b 19468 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
47f339cf
BS
19469
19470 case IX86_BUILTIN_PFACC:
5039610b 19471 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
47f339cf
BS
19472
19473 case IX86_BUILTIN_PFADD:
5039610b 19474 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
47f339cf
BS
19475
19476 case IX86_BUILTIN_PFCMPEQ:
5039610b 19477 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
47f339cf
BS
19478
19479 case IX86_BUILTIN_PFCMPGE:
5039610b 19480 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
47f339cf
BS
19481
19482 case IX86_BUILTIN_PFCMPGT:
5039610b 19483 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
47f339cf
BS
19484
19485 case IX86_BUILTIN_PFMAX:
5039610b 19486 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
47f339cf
BS
19487
19488 case IX86_BUILTIN_PFMIN:
5039610b 19489 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
47f339cf
BS
19490
19491 case IX86_BUILTIN_PFMUL:
5039610b 19492 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
47f339cf
BS
19493
19494 case IX86_BUILTIN_PFRCP:
5039610b 19495 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
47f339cf
BS
19496
19497 case IX86_BUILTIN_PFRCPIT1:
5039610b 19498 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
47f339cf
BS
19499
19500 case IX86_BUILTIN_PFRCPIT2:
5039610b 19501 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
47f339cf
BS
19502
19503 case IX86_BUILTIN_PFRSQIT1:
5039610b 19504 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
47f339cf
BS
19505
19506 case IX86_BUILTIN_PFRSQRT:
5039610b 19507 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
47f339cf
BS
19508
19509 case IX86_BUILTIN_PFSUB:
5039610b 19510 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
47f339cf
BS
19511
19512 case IX86_BUILTIN_PFSUBR:
5039610b 19513 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
47f339cf
BS
19514
19515 case IX86_BUILTIN_PI2FD:
5039610b 19516 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
47f339cf
BS
19517
19518 case IX86_BUILTIN_PMULHRW:
5039610b 19519 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
47f339cf 19520
47f339cf 19521 case IX86_BUILTIN_PF2IW:
5039610b 19522 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
47f339cf
BS
19523
19524 case IX86_BUILTIN_PFNACC:
5039610b 19525 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
47f339cf
BS
19526
19527 case IX86_BUILTIN_PFPNACC:
5039610b 19528 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
47f339cf
BS
19529
19530 case IX86_BUILTIN_PI2FW:
5039610b 19531 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
47f339cf
BS
19532
19533 case IX86_BUILTIN_PSWAPDSI:
5039610b 19534 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
47f339cf
BS
19535
19536 case IX86_BUILTIN_PSWAPDSF:
5039610b 19537 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
47f339cf 19538
fbe5eb6d 19539 case IX86_BUILTIN_SQRTSD:
5039610b 19540 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
fbe5eb6d 19541 case IX86_BUILTIN_LOADUPD:
5039610b 19542 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
fbe5eb6d 19543 case IX86_BUILTIN_STOREUPD:
5039610b 19544 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
fbe5eb6d 19545
fbe5eb6d
BS
19546 case IX86_BUILTIN_MFENCE:
19547 emit_insn (gen_sse2_mfence ());
19548 return 0;
19549 case IX86_BUILTIN_LFENCE:
19550 emit_insn (gen_sse2_lfence ());
19551 return 0;
19552
19553 case IX86_BUILTIN_CLFLUSH:
5039610b 19554 arg0 = CALL_EXPR_ARG (exp, 0);
84217346 19555 op0 = expand_normal (arg0);
fbe5eb6d 19556 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
19557 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19558 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
19559
19560 emit_insn (gen_sse2_clflush (op0));
19561 return 0;
19562
19563 case IX86_BUILTIN_MOVNTPD:
5039610b 19564 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
fbe5eb6d 19565 case IX86_BUILTIN_MOVNTDQ:
5039610b 19566 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
fbe5eb6d 19567 case IX86_BUILTIN_MOVNTI:
5039610b 19568 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
fbe5eb6d 19569
f02e1358 19570 case IX86_BUILTIN_LOADDQU:
5039610b 19571 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
f02e1358 19572 case IX86_BUILTIN_STOREDQU:
5039610b 19573 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
f02e1358 19574
22c7c85e 19575 case IX86_BUILTIN_MONITOR:
5039610b
SL
19576 arg0 = CALL_EXPR_ARG (exp, 0);
19577 arg1 = CALL_EXPR_ARG (exp, 1);
19578 arg2 = CALL_EXPR_ARG (exp, 2);
84217346
MD
19579 op0 = expand_normal (arg0);
19580 op1 = expand_normal (arg1);
19581 op2 = expand_normal (arg2);
22c7c85e 19582 if (!REG_P (op0))
546be535 19583 op0 = copy_to_mode_reg (Pmode, op0);
22c7c85e
L
19584 if (!REG_P (op1))
19585 op1 = copy_to_mode_reg (SImode, op1);
19586 if (!REG_P (op2))
19587 op2 = copy_to_mode_reg (SImode, op2);
546be535
KH
19588 if (!TARGET_64BIT)
19589 emit_insn (gen_sse3_monitor (op0, op1, op2));
19590 else
19591 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
22c7c85e
L
19592 return 0;
19593
19594 case IX86_BUILTIN_MWAIT:
5039610b
SL
19595 arg0 = CALL_EXPR_ARG (exp, 0);
19596 arg1 = CALL_EXPR_ARG (exp, 1);
84217346
MD
19597 op0 = expand_normal (arg0);
19598 op1 = expand_normal (arg1);
22c7c85e
L
19599 if (!REG_P (op0))
19600 op0 = copy_to_mode_reg (SImode, op0);
19601 if (!REG_P (op1))
19602 op1 = copy_to_mode_reg (SImode, op1);
ef719a44 19603 emit_insn (gen_sse3_mwait (op0, op1));
22c7c85e
L
19604 return 0;
19605
22c7c85e 19606 case IX86_BUILTIN_LDDQU:
5039610b 19607 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
eb701deb
RH
19608 target, 1);
19609
b1875f52
L
19610 case IX86_BUILTIN_PALIGNR:
19611 case IX86_BUILTIN_PALIGNR128:
19612 if (fcode == IX86_BUILTIN_PALIGNR)
19613 {
19614 icode = CODE_FOR_ssse3_palignrdi;
19615 mode = DImode;
19616 }
19617 else
19618 {
19619 icode = CODE_FOR_ssse3_palignrti;
19620 mode = V2DImode;
19621 }
5039610b
SL
19622 arg0 = CALL_EXPR_ARG (exp, 0);
19623 arg1 = CALL_EXPR_ARG (exp, 1);
19624 arg2 = CALL_EXPR_ARG (exp, 2);
9415ab7d
TN
19625 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19626 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19627 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
b1875f52
L
19628 tmode = insn_data[icode].operand[0].mode;
19629 mode1 = insn_data[icode].operand[1].mode;
19630 mode2 = insn_data[icode].operand[2].mode;
19631 mode3 = insn_data[icode].operand[3].mode;
19632
19633 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19634 {
19635 op0 = copy_to_reg (op0);
19636 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19637 }
19638 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19639 {
19640 op1 = copy_to_reg (op1);
19641 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19642 }
19643 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19644 {
19645 error ("shift must be an immediate");
19646 return const0_rtx;
19647 }
19648 target = gen_reg_rtx (mode);
19649 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19650 op0, op1, op2);
19651 if (! pat)
19652 return 0;
19653 emit_insn (pat);
21efb4d4
HJ
19654 return target;
19655
9a5cee02
L
19656 case IX86_BUILTIN_MOVNTDQA:
19657 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19658 target, 1);
19659
21efb4d4 19660 case IX86_BUILTIN_MOVNTSD:
5039610b 19661 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21efb4d4
HJ
19662
19663 case IX86_BUILTIN_MOVNTSS:
5039610b 19664 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21efb4d4
HJ
19665
19666 case IX86_BUILTIN_INSERTQ:
19667 case IX86_BUILTIN_EXTRQ:
19668 icode = (fcode == IX86_BUILTIN_EXTRQ
19669 ? CODE_FOR_sse4a_extrq
19670 : CODE_FOR_sse4a_insertq);
5039610b
SL
19671 arg0 = CALL_EXPR_ARG (exp, 0);
19672 arg1 = CALL_EXPR_ARG (exp, 1);
21efb4d4
HJ
19673 op0 = expand_normal (arg0);
19674 op1 = expand_normal (arg1);
19675 tmode = insn_data[icode].operand[0].mode;
19676 mode1 = insn_data[icode].operand[1].mode;
19677 mode2 = insn_data[icode].operand[2].mode;
19678 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19679 op0 = copy_to_mode_reg (mode1, op0);
19680 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19681 op1 = copy_to_mode_reg (mode2, op1);
19682 if (optimize || target == 0
19683 || GET_MODE (target) != tmode
19684 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19685 target = gen_reg_rtx (tmode);
19686 pat = GEN_FCN (icode) (target, op0, op1);
19687 if (! pat)
19688 return NULL_RTX;
19689 emit_insn (pat);
19690 return target;
19691
19692 case IX86_BUILTIN_EXTRQI:
19693 icode = CODE_FOR_sse4a_extrqi;
5039610b
SL
19694 arg0 = CALL_EXPR_ARG (exp, 0);
19695 arg1 = CALL_EXPR_ARG (exp, 1);
19696 arg2 = CALL_EXPR_ARG (exp, 2);
21efb4d4
HJ
19697 op0 = expand_normal (arg0);
19698 op1 = expand_normal (arg1);
19699 op2 = expand_normal (arg2);
19700 tmode = insn_data[icode].operand[0].mode;
19701 mode1 = insn_data[icode].operand[1].mode;
19702 mode2 = insn_data[icode].operand[2].mode;
19703 mode3 = insn_data[icode].operand[3].mode;
19704 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19705 op0 = copy_to_mode_reg (mode1, op0);
19706 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19707 {
19708 error ("index mask must be an immediate");
19709 return gen_reg_rtx (tmode);
19710 }
19711 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19712 {
19713 error ("length mask must be an immediate");
19714 return gen_reg_rtx (tmode);
19715 }
19716 if (optimize || target == 0
19717 || GET_MODE (target) != tmode
19718 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19719 target = gen_reg_rtx (tmode);
19720 pat = GEN_FCN (icode) (target, op0, op1, op2);
19721 if (! pat)
19722 return NULL_RTX;
19723 emit_insn (pat);
19724 return target;
19725
19726 case IX86_BUILTIN_INSERTQI:
19727 icode = CODE_FOR_sse4a_insertqi;
5039610b
SL
19728 arg0 = CALL_EXPR_ARG (exp, 0);
19729 arg1 = CALL_EXPR_ARG (exp, 1);
19730 arg2 = CALL_EXPR_ARG (exp, 2);
19731 arg3 = CALL_EXPR_ARG (exp, 3);
21efb4d4
HJ
19732 op0 = expand_normal (arg0);
19733 op1 = expand_normal (arg1);
19734 op2 = expand_normal (arg2);
19735 op3 = expand_normal (arg3);
19736 tmode = insn_data[icode].operand[0].mode;
19737 mode1 = insn_data[icode].operand[1].mode;
19738 mode2 = insn_data[icode].operand[2].mode;
19739 mode3 = insn_data[icode].operand[3].mode;
19740 mode4 = insn_data[icode].operand[4].mode;
19741
19742 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19743 op0 = copy_to_mode_reg (mode1, op0);
19744
19745 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19746 op1 = copy_to_mode_reg (mode2, op1);
19747
19748 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19749 {
19750 error ("index mask must be an immediate");
19751 return gen_reg_rtx (tmode);
19752 }
19753 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19754 {
19755 error ("length mask must be an immediate");
19756 return gen_reg_rtx (tmode);
19757 }
19758 if (optimize || target == 0
19759 || GET_MODE (target) != tmode
19760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19761 target = gen_reg_rtx (tmode);
19762 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19763 if (! pat)
19764 return NULL_RTX;
19765 emit_insn (pat);
b1875f52
L
19766 return target;
19767
eb701deb
RH
19768 case IX86_BUILTIN_VEC_INIT_V2SI:
19769 case IX86_BUILTIN_VEC_INIT_V4HI:
19770 case IX86_BUILTIN_VEC_INIT_V8QI:
5039610b 19771 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
eb701deb
RH
19772
19773 case IX86_BUILTIN_VEC_EXT_V2DF:
19774 case IX86_BUILTIN_VEC_EXT_V2DI:
19775 case IX86_BUILTIN_VEC_EXT_V4SF:
ed9b5396 19776 case IX86_BUILTIN_VEC_EXT_V4SI:
eb701deb 19777 case IX86_BUILTIN_VEC_EXT_V8HI:
0f2698d0 19778 case IX86_BUILTIN_VEC_EXT_V2SI:
eb701deb 19779 case IX86_BUILTIN_VEC_EXT_V4HI:
9a5cee02 19780 case IX86_BUILTIN_VEC_EXT_V16QI:
5039610b 19781 return ix86_expand_vec_ext_builtin (exp, target);
eb701deb 19782
9a5cee02
L
19783 case IX86_BUILTIN_VEC_SET_V2DI:
19784 case IX86_BUILTIN_VEC_SET_V4SF:
19785 case IX86_BUILTIN_VEC_SET_V4SI:
eb701deb
RH
19786 case IX86_BUILTIN_VEC_SET_V8HI:
19787 case IX86_BUILTIN_VEC_SET_V4HI:
9a5cee02 19788 case IX86_BUILTIN_VEC_SET_V16QI:
5039610b 19789 return ix86_expand_vec_set_builtin (exp);
22c7c85e 19790
edc5bbcd
UB
19791 case IX86_BUILTIN_INFQ:
19792 {
19793 REAL_VALUE_TYPE inf;
19794 rtx tmp;
19795
19796 real_inf (&inf);
19797 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19798
19799 tmp = validize_mem (force_const_mem (mode, tmp));
19800
19801 if (target == 0)
19802 target = gen_reg_rtx (mode);
19803
19804 emit_move_insn (target, tmp);
19805 return target;
19806 }
19807
19808 case IX86_BUILTIN_FABSQ:
19809 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19810
19811 case IX86_BUILTIN_COPYSIGNQ:
19812 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19813
bd793c65
BS
19814 default:
19815 break;
19816 }
19817
9a5cee02
L
19818 for (i = 0, d = bdesc_sse_3arg;
19819 i < ARRAY_SIZE (bdesc_sse_3arg);
19820 i++, d++)
19821 if (d->code == fcode)
19822 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19823 target);
19824
ca7558fc 19825 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
19826 if (d->code == fcode)
19827 {
19828 /* Compares are treated specially. */
ef719a44
RH
19829 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19830 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19831 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19832 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
5039610b 19833 return ix86_expand_sse_compare (d, exp, target);
bd793c65 19834
5039610b 19835 return ix86_expand_binop_builtin (d->icode, exp, target);
bd793c65
BS
19836 }
19837
ca7558fc 19838 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65 19839 if (d->code == fcode)
5039610b 19840 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
0f290768 19841
ca7558fc 19842 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65 19843 if (d->code == fcode)
5039610b 19844 return ix86_expand_sse_comi (d, exp, target);
0f290768 19845
9a5cee02
L
19846 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19847 if (d->code == fcode)
19848 return ix86_expand_sse_ptest (d, exp, target);
19849
3b8dd071
L
19850 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19851 if (d->code == fcode)
19852 return ix86_expand_crc32 (d->icode, exp, target);
19853
06f4e35d
L
19854 for (i = 0, d = bdesc_pcmpestr;
19855 i < ARRAY_SIZE (bdesc_pcmpestr);
19856 i++, d++)
19857 if (d->code == fcode)
19858 return ix86_expand_sse_pcmpestr (d, exp, target);
19859
19860 for (i = 0, d = bdesc_pcmpistr;
19861 i < ARRAY_SIZE (bdesc_pcmpistr);
19862 i++, d++)
19863 if (d->code == fcode)
19864 return ix86_expand_sse_pcmpistr (d, exp, target);
19865
ed9b5396 19866 gcc_unreachable ();
bd793c65 19867}
4211a8fb 19868
db3cf6bd
RG
19869/* Returns a function decl for a vectorized version of the builtin function
19870 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19871 if it is not available. */
19872
19873static tree
9415ab7d 19874ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
b95becfc 19875 tree type_in)
db3cf6bd 19876{
b95becfc
RG
19877 enum machine_mode in_mode, out_mode;
19878 int in_n, out_n;
db3cf6bd 19879
b95becfc
RG
19880 if (TREE_CODE (type_out) != VECTOR_TYPE
19881 || TREE_CODE (type_in) != VECTOR_TYPE)
db3cf6bd
RG
19882 return NULL_TREE;
19883
b95becfc
RG
19884 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19885 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19886 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19887 in_n = TYPE_VECTOR_SUBPARTS (type_in);
db3cf6bd
RG
19888
19889 switch (fn)
19890 {
19891 case BUILT_IN_SQRT:
b95becfc
RG
19892 if (out_mode == DFmode && out_n == 2
19893 && in_mode == DFmode && in_n == 2)
db3cf6bd
RG
19894 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19895 return NULL_TREE;
19896
19897 case BUILT_IN_SQRTF:
b95becfc
RG
19898 if (out_mode == SFmode && out_n == 4
19899 && in_mode == SFmode && in_n == 4)
db3cf6bd
RG
19900 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19901 return NULL_TREE;
19902
b40c4f68
UB
19903 case BUILT_IN_LRINT:
19904 if (out_mode == SImode && out_n == 4
19905 && in_mode == DFmode && in_n == 2)
19906 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
19907 return NULL_TREE;
19908
b95becfc
RG
19909 case BUILT_IN_LRINTF:
19910 if (out_mode == SImode && out_n == 4
19911 && in_mode == SFmode && in_n == 4)
19912 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19913 return NULL_TREE;
19914
db3cf6bd
RG
19915 default:
19916 ;
19917 }
19918
19919 return NULL_TREE;
19920}
19921
4c38b6d9
UB
19922/* Returns a decl of a function that implements conversion of the
19923 input vector of type TYPE, or NULL_TREE if it is not available. */
19924
19925static tree
6b889d89 19926ix86_vectorize_builtin_conversion (unsigned int code, tree type)
4c38b6d9
UB
19927{
19928 if (TREE_CODE (type) != VECTOR_TYPE)
19929 return NULL_TREE;
54a88090 19930
4c38b6d9
UB
19931 switch (code)
19932 {
19933 case FLOAT_EXPR:
19934 switch (TYPE_MODE (type))
19935 {
19936 case V4SImode:
19937 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19938 default:
19939 return NULL_TREE;
19940 }
19941
19942 case FIX_TRUNC_EXPR:
19943 switch (TYPE_MODE (type))
19944 {
19945 case V4SFmode:
19946 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19947 default:
19948 return NULL_TREE;
19949 }
19950 default:
19951 return NULL_TREE;
19952
19953 }
19954}
19955
6b889d89
UB
19956/* Returns a code for a target-specific builtin that implements
19957 reciprocal of the function, or NULL_TREE if not available. */
19958
19959static tree
ac10986f
UB
19960ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
19961 bool sqrt ATTRIBUTE_UNUSED)
6b889d89
UB
19962{
19963 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
19964 && flag_finite_math_only && !flag_trapping_math
19965 && flag_unsafe_math_optimizations))
19966 return NULL_TREE;
19967
ac10986f
UB
19968 if (md_fn)
19969 /* Machine dependent builtins. */
19970 switch (fn)
19971 {
19972 /* Vectorized version of sqrt to rsqrt conversion. */
19973 case IX86_BUILTIN_SQRTPS:
19974 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
6b889d89 19975
ac10986f
UB
19976 default:
19977 return NULL_TREE;
19978 }
19979 else
19980 /* Normal builtins. */
19981 switch (fn)
19982 {
19983 /* Sqrt to rsqrt conversion. */
19984 case BUILT_IN_SQRTF:
19985 return ix86_builtins[IX86_BUILTIN_RSQRTF];
6b889d89 19986
ac10986f
UB
19987 default:
19988 return NULL_TREE;
19989 }
6b889d89
UB
19990}
19991
4211a8fb 19992/* Store OPERAND to the memory after reload is completed. This means
f710504c 19993 that we can't easily use assign_stack_local. */
4211a8fb 19994rtx
b96a374d 19995ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 19996{
898d374d 19997 rtx result;
5656a184 19998
d0396b79 19999 gcc_assert (reload_completed);
a5b378d6 20000 if (TARGET_RED_ZONE)
898d374d
JH
20001 {
20002 result = gen_rtx_MEM (mode,
20003 gen_rtx_PLUS (Pmode,
20004 stack_pointer_rtx,
20005 GEN_INT (-RED_ZONE_SIZE)));
20006 emit_move_insn (result, operand);
20007 }
a5b378d6 20008 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 20009 {
898d374d 20010 switch (mode)
4211a8fb 20011 {
898d374d
JH
20012 case HImode:
20013 case SImode:
20014 operand = gen_lowpart (DImode, operand);
5efb1046 20015 /* FALLTHRU */
898d374d 20016 case DImode:
4211a8fb 20017 emit_insn (
898d374d
JH
20018 gen_rtx_SET (VOIDmode,
20019 gen_rtx_MEM (DImode,
20020 gen_rtx_PRE_DEC (DImode,
20021 stack_pointer_rtx)),
20022 operand));
20023 break;
20024 default:
d0396b79 20025 gcc_unreachable ();
898d374d
JH
20026 }
20027 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20028 }
20029 else
20030 {
20031 switch (mode)
20032 {
20033 case DImode:
20034 {
20035 rtx operands[2];
20036 split_di (&operand, 1, operands, operands + 1);
20037 emit_insn (
20038 gen_rtx_SET (VOIDmode,
20039 gen_rtx_MEM (SImode,
20040 gen_rtx_PRE_DEC (Pmode,
20041 stack_pointer_rtx)),
20042 operands[1]));
20043 emit_insn (
20044 gen_rtx_SET (VOIDmode,
20045 gen_rtx_MEM (SImode,
20046 gen_rtx_PRE_DEC (Pmode,
20047 stack_pointer_rtx)),
20048 operands[0]));
20049 }
20050 break;
20051 case HImode:
69642eae
JJ
20052 /* Store HImodes as SImodes. */
20053 operand = gen_lowpart (SImode, operand);
5efb1046 20054 /* FALLTHRU */
898d374d 20055 case SImode:
4211a8fb 20056 emit_insn (
898d374d
JH
20057 gen_rtx_SET (VOIDmode,
20058 gen_rtx_MEM (GET_MODE (operand),
20059 gen_rtx_PRE_DEC (SImode,
20060 stack_pointer_rtx)),
20061 operand));
20062 break;
20063 default:
d0396b79 20064 gcc_unreachable ();
4211a8fb 20065 }
898d374d 20066 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 20067 }
898d374d 20068 return result;
4211a8fb
JH
20069}
20070
20071/* Free operand from the memory. */
20072void
b96a374d 20073ix86_free_from_memory (enum machine_mode mode)
4211a8fb 20074{
a5b378d6 20075 if (!TARGET_RED_ZONE)
898d374d
JH
20076 {
20077 int size;
20078
20079 if (mode == DImode || TARGET_64BIT)
20080 size = 8;
898d374d
JH
20081 else
20082 size = 4;
20083 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20084 to pop or add instruction if registers are available. */
20085 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20086 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20087 GEN_INT (size))));
20088 }
4211a8fb 20089}
a946dd00 20090
f84aa48a
JH
20091/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20092 QImode must go into class Q_REGS.
20093 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 20094 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 20095enum reg_class
9415ab7d 20096ix86_preferred_reload_class (rtx x, enum reg_class regclass)
f84aa48a 20097{
b5c82fa1
PB
20098 enum machine_mode mode = GET_MODE (x);
20099
5656a184 20100 /* We're only allowed to return a subclass of CLASS. Many of the
51df7179 20101 following checks fail for NO_REGS, so eliminate that early. */
9415ab7d 20102 if (regclass == NO_REGS)
f75959a6 20103 return NO_REGS;
51df7179
RH
20104
20105 /* All classes can load zeros. */
b5c82fa1 20106 if (x == CONST0_RTX (mode))
9415ab7d 20107 return regclass;
51df7179 20108
917f1b7e 20109 /* Force constants into memory if we are loading a (nonzero) constant into
b5c82fa1
PB
20110 an MMX or SSE register. This is because there are no MMX/SSE instructions
20111 to load from a constant. */
20112 if (CONSTANT_P (x)
9415ab7d 20113 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
b5c82fa1
PB
20114 return NO_REGS;
20115
20116 /* Prefer SSE regs only, if we can use them for math. */
20117 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
9415ab7d 20118 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
b5c82fa1 20119
51df7179 20120 /* Floating-point constants need more complex checks. */
f84aa48a
JH
20121 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20122 {
f84aa48a 20123 /* General regs can load everything. */
9415ab7d
TN
20124 if (reg_class_subset_p (regclass, GENERAL_REGS))
20125 return regclass;
51df7179
RH
20126
20127 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20128 zero above. We only want to wind up preferring 80387 registers if
20129 we plan on doing computation with them. */
20130 if (TARGET_80387
51df7179
RH
20131 && standard_80387_constant_p (x))
20132 {
20133 /* Limit class to non-sse. */
9415ab7d 20134 if (regclass == FLOAT_SSE_REGS)
51df7179 20135 return FLOAT_REGS;
9415ab7d 20136 if (regclass == FP_TOP_SSE_REGS)
51df7179 20137 return FP_TOP_REG;
9415ab7d 20138 if (regclass == FP_SECOND_SSE_REGS)
51df7179 20139 return FP_SECOND_REG;
9415ab7d
TN
20140 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20141 return regclass;
51df7179
RH
20142 }
20143
20144 return NO_REGS;
f84aa48a 20145 }
51df7179
RH
20146
20147 /* Generally when we see PLUS here, it's the function invariant
20148 (plus soft-fp const_int). Which can only be computed into general
20149 regs. */
20150 if (GET_CODE (x) == PLUS)
9415ab7d 20151 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
51df7179
RH
20152
20153 /* QImode constants are easy to load, but non-constant QImode data
20154 must go into Q_REGS. */
20155 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20156 {
9415ab7d
TN
20157 if (reg_class_subset_p (regclass, Q_REGS))
20158 return regclass;
20159 if (reg_class_subset_p (Q_REGS, regclass))
51df7179
RH
20160 return Q_REGS;
20161 return NO_REGS;
20162 }
20163
9415ab7d 20164 return regclass;
f84aa48a
JH
20165}
20166
b5c82fa1
PB
20167/* Discourage putting floating-point values in SSE registers unless
20168 SSE math is being used, and likewise for the 387 registers. */
20169enum reg_class
9415ab7d 20170ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
b5c82fa1
PB
20171{
20172 enum machine_mode mode = GET_MODE (x);
20173
20174 /* Restrict the output reload class to the register bank that we are doing
20175 math on. If we would like not to return a subset of CLASS, reject this
20176 alternative: if reload cannot do this, it will still use its choice. */
20177 mode = GET_MODE (x);
20178 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9415ab7d 20179 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
b5c82fa1 20180
27ac40e2 20181 if (X87_FLOAT_MODE_P (mode))
b5c82fa1 20182 {
9415ab7d 20183 if (regclass == FP_TOP_SSE_REGS)
b5c82fa1 20184 return FP_TOP_REG;
9415ab7d 20185 else if (regclass == FP_SECOND_SSE_REGS)
b5c82fa1
PB
20186 return FP_SECOND_REG;
20187 else
9415ab7d 20188 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
b5c82fa1
PB
20189 }
20190
9415ab7d 20191 return regclass;
b5c82fa1
PB
20192}
20193
f84aa48a
JH
20194/* If we are copying between general and FP registers, we need a memory
20195 location. The same is true for SSE and MMX registers.
20196
6232eadc
JH
20197 To optimize register_move_cost performance, allow inline variant.
20198
f84aa48a
JH
20199 The macro can't work reliably when one of the CLASSES is class containing
20200 registers from multiple units (SSE, MMX, integer). We avoid this by never
20201 combining those units in single alternative in the machine description.
20202 Ensure that this constraint holds to avoid unexpected surprises.
20203
20204 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20205 enforce these sanity checks. */
f75959a6 20206
6232eadc
JH
20207static inline int
20208inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
b96a374d 20209 enum machine_mode mode, int strict)
f84aa48a
JH
20210{
20211 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20212 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20213 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20214 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20215 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20216 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20217 {
d0396b79 20218 gcc_assert (!strict);
f75959a6 20219 return true;
f84aa48a 20220 }
f75959a6
RH
20221
20222 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20223 return true;
20224
20225 /* ??? This is a lie. We do have moves between mmx/general, and for
20226 mmx/sse2. But by saying we need secondary memory we discourage the
20227 register allocator from using the mmx registers unless needed. */
20228 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20229 return true;
20230
20231 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20232 {
20233 /* SSE1 doesn't have any direct moves from other classes. */
20234 if (!TARGET_SSE2)
20235 return true;
20236
5656a184 20237 /* If the target says that inter-unit moves are more expensive
f75959a6 20238 than moving through memory, then don't generate them. */
ed69105c 20239 if (!TARGET_INTER_UNIT_MOVES)
f75959a6
RH
20240 return true;
20241
20242 /* Between SSE and general, we have moves no larger than word size. */
20243 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20244 return true;
f75959a6
RH
20245 }
20246
20247 return false;
f84aa48a 20248}
f75959a6 20249
6232eadc
JH
20250int
20251ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20252 enum machine_mode mode, int strict)
20253{
20254 return inline_secondary_memory_needed (class1, class2, mode, strict);
20255}
20256
1272914c
RH
20257/* Return true if the registers in CLASS cannot represent the change from
20258 modes FROM to TO. */
20259
20260bool
20261ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9415ab7d 20262 enum reg_class regclass)
1272914c
RH
20263{
20264 if (from == to)
20265 return false;
20266
0fa2e4df 20267 /* x87 registers can't do subreg at all, as all values are reformatted
1272914c 20268 to extended precision. */
9415ab7d 20269 if (MAYBE_FLOAT_CLASS_P (regclass))
1272914c
RH
20270 return true;
20271
9415ab7d 20272 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
1272914c
RH
20273 {
20274 /* Vector registers do not support QI or HImode loads. If we don't
20275 disallow a change to these modes, reload will assume it's ok to
20276 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20277 the vec_dupv4hi pattern. */
20278 if (GET_MODE_SIZE (from) < 4)
20279 return true;
20280
20281 /* Vector registers do not support subreg with nonzero offsets, which
5656a184 20282 are otherwise valid for integer registers. Since we can't see
1272914c
RH
20283 whether we have a nonzero offset from here, prohibit all
20284 nonparadoxical subregs changing size. */
20285 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20286 return true;
20287 }
20288
20289 return false;
20290}
20291
6232eadc
JH
20292/* Return the cost of moving data of mode M between a
20293 register and memory. A value of 2 is the default; this cost is
20294 relative to those in `REGISTER_MOVE_COST'.
20295
20296 This function is used extensively by register_move_cost that is used to
20297 build tables at startup. Make it inline in this case.
20298 When IN is 2, return maximum of in and out move cost.
20299
20300 If moving between registers and memory is more expensive than
20301 between two registers, you should define this macro to express the
20302 relative cost.
20303
20304 Model also increased moving costs of QImode registers in non
20305 Q_REGS classes.
20306 */
20307static inline int
20308inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
20309 int in)
20310{
20311 int cost;
20312 if (FLOAT_CLASS_P (regclass))
20313 {
20314 int index;
20315 switch (mode)
20316 {
20317 case SFmode:
20318 index = 0;
20319 break;
20320 case DFmode:
20321 index = 1;
20322 break;
20323 case XFmode:
20324 index = 2;
20325 break;
20326 default:
20327 return 100;
20328 }
20329 if (in == 2)
20330 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
20331 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20332 }
20333 if (SSE_CLASS_P (regclass))
20334 {
20335 int index;
20336 switch (GET_MODE_SIZE (mode))
20337 {
20338 case 4:
20339 index = 0;
20340 break;
20341 case 8:
20342 index = 1;
20343 break;
20344 case 16:
20345 index = 2;
20346 break;
20347 default:
20348 return 100;
20349 }
20350 if (in == 2)
20351 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
20352 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20353 }
20354 if (MMX_CLASS_P (regclass))
20355 {
20356 int index;
20357 switch (GET_MODE_SIZE (mode))
20358 {
20359 case 4:
20360 index = 0;
20361 break;
20362 case 8:
20363 index = 1;
20364 break;
20365 default:
20366 return 100;
20367 }
20368 if (in)
20369 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
20370 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20371 }
20372 switch (GET_MODE_SIZE (mode))
20373 {
20374 case 1:
20375 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20376 {
20377 if (!in)
20378 return ix86_cost->int_store[0];
20379 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
20380 cost = ix86_cost->movzbl_load;
20381 else
20382 cost = ix86_cost->int_load[0];
20383 if (in == 2)
20384 return MAX (cost, ix86_cost->int_store[0]);
20385 return cost;
20386 }
20387 else
20388 {
20389 if (in == 2)
20390 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
20391 if (in)
20392 return ix86_cost->movzbl_load;
20393 else
20394 return ix86_cost->int_store[0] + 4;
20395 }
20396 break;
20397 case 2:
20398 if (in == 2)
20399 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
20400 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20401 default:
20402 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20403 if (mode == TFmode)
20404 mode = XFmode;
20405 if (in == 2)
20406 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
20407 else if (in)
20408 cost = ix86_cost->int_load[2];
20409 else
20410 cost = ix86_cost->int_store[2];
20411 return (cost * (((int) GET_MODE_SIZE (mode)
20412 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20413 }
20414}
20415
20416int
20417ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20418{
20419 return inline_memory_move_cost (mode, regclass, in);
20420}
20421
20422
f84aa48a 20423/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 20424 one in class CLASS2.
f84aa48a
JH
20425
20426 It is not required that the cost always equal 2 when FROM is the same as TO;
20427 on some machines it is expensive to move between registers if they are not
20428 general registers. */
f75959a6 20429
f84aa48a 20430int
b96a374d
AJ
20431ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20432 enum reg_class class2)
f84aa48a
JH
20433{
20434 /* In case we require secondary memory, compute cost of the store followed
b96a374d 20435 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
20436 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20437
6232eadc 20438 if (inline_secondary_memory_needed (class1, class2, mode, 0))
f84aa48a 20439 {
d631b80a
RH
20440 int cost = 1;
20441
6232eadc
JH
20442 cost += inline_memory_move_cost (mode, class1, 2);
20443 cost += inline_memory_move_cost (mode, class2, 2);
b96a374d 20444
d631b80a
RH
20445 /* In case of copying from general_purpose_register we may emit multiple
20446 stores followed by single load causing memory size mismatch stall.
d1f87653 20447 Count this as arbitrarily high cost of 20. */
62415523 20448 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
20449 cost += 20;
20450
20451 /* In the case of FP/MMX moves, the registers actually overlap, and we
20452 have to switch modes in order to treat them differently. */
20453 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20454 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20455 cost += 20;
20456
20457 return cost;
f84aa48a 20458 }
d631b80a 20459
92d0fb09 20460 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
20461 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20462 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
147bbdd0
UB
20463
20464 /* ??? By keeping returned value relatively high, we limit the number
20465 of moves between integer and MMX/SSE registers for all targets.
20466 Additionally, high value prevents problem with x86_modes_tieable_p(),
20467 where integer modes in MMX/SSE registers are not tieable
20468 because of missing QImode and HImode moves to, from or between
20469 MMX/SSE registers. */
20470 return MAX (ix86_cost->mmxsse_to_integer, 8);
20471
fa79946e
JH
20472 if (MAYBE_FLOAT_CLASS_P (class1))
20473 return ix86_cost->fp_move;
20474 if (MAYBE_SSE_CLASS_P (class1))
20475 return ix86_cost->sse_move;
20476 if (MAYBE_MMX_CLASS_P (class1))
20477 return ix86_cost->mmx_move;
f84aa48a
JH
20478 return 2;
20479}
20480
a946dd00 20481/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
c1c5b5e3
RH
20482
20483bool
b96a374d 20484ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
20485{
20486 /* Flags and only flags can only hold CCmode values. */
20487 if (CC_REGNO_P (regno))
20488 return GET_MODE_CLASS (mode) == MODE_CC;
20489 if (GET_MODE_CLASS (mode) == MODE_CC
20490 || GET_MODE_CLASS (mode) == MODE_RANDOM
20491 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20492 return 0;
20493 if (FP_REGNO_P (regno))
20494 return VALID_FP_MODE_P (mode);
20495 if (SSE_REGNO_P (regno))
dcbca208 20496 {
6c4ccfd8
RH
20497 /* We implement the move patterns for all vector modes into and
20498 out of SSE registers, even when no operation instructions
20499 are available. */
20500 return (VALID_SSE_REG_MODE (mode)
20501 || VALID_SSE2_REG_MODE (mode)
20502 || VALID_MMX_REG_MODE (mode)
20503 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 20504 }
a946dd00 20505 if (MMX_REGNO_P (regno))
dcbca208 20506 {
6c4ccfd8
RH
20507 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20508 so if the register is available at all, then we can move data of
20509 the given mode into or out of it. */
20510 return (VALID_MMX_REG_MODE (mode)
20511 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 20512 }
b4e82619
RH
20513
20514 if (mode == QImode)
20515 {
20516 /* Take care for QImode values - they can be in non-QI regs,
20517 but then they do cause partial register stalls. */
20518 if (regno < 4 || TARGET_64BIT)
20519 return 1;
20520 if (!TARGET_PARTIAL_REG_STALL)
20521 return 1;
20522 return reload_in_progress || reload_completed;
20523 }
20524 /* We handle both integer and floats in the general purpose registers. */
20525 else if (VALID_INT_MODE_P (mode))
20526 return 1;
20527 else if (VALID_FP_MODE_P (mode))
20528 return 1;
20529 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
5656a184 20530 on to use that value in smaller contexts, this can easily force a
b4e82619
RH
20531 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20532 supporting DImode, allow it. */
20533 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
a946dd00 20534 return 1;
b4e82619
RH
20535
20536 return 0;
a946dd00 20537}
fa79946e 20538
5656a184 20539/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
c1c5b5e3
RH
20540 tieable integer mode. */
20541
20542static bool
20543ix86_tieable_integer_mode_p (enum machine_mode mode)
20544{
20545 switch (mode)
20546 {
20547 case HImode:
20548 case SImode:
20549 return true;
20550
20551 case QImode:
20552 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20553
20554 case DImode:
20555 return TARGET_64BIT;
20556
20557 default:
20558 return false;
20559 }
20560}
20561
20562/* Return true if MODE1 is accessible in a register that can hold MODE2
20563 without copying. That is, all register classes that can hold MODE2
20564 can also hold MODE1. */
20565
20566bool
20567ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20568{
20569 if (mode1 == mode2)
20570 return true;
20571
20572 if (ix86_tieable_integer_mode_p (mode1)
20573 && ix86_tieable_integer_mode_p (mode2))
20574 return true;
20575
20576 /* MODE2 being XFmode implies fp stack or general regs, which means we
20577 can tie any smaller floating point modes to it. Note that we do not
20578 tie this with TFmode. */
20579 if (mode2 == XFmode)
20580 return mode1 == SFmode || mode1 == DFmode;
20581
20582 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20583 that we can tie it with SFmode. */
20584 if (mode2 == DFmode)
20585 return mode1 == SFmode;
20586
5656a184 20587 /* If MODE2 is only appropriate for an SSE register, then tie with
c1c5b5e3 20588 any other mode acceptable to SSE registers. */
8ab93332 20589 if (GET_MODE_SIZE (mode2) == 16
b4e82619 20590 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
8ab93332
UB
20591 return (GET_MODE_SIZE (mode1) == 16
20592 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
c1c5b5e3 20593
8ab93332 20594 /* If MODE2 is appropriate for an MMX register, then tie
c1c5b5e3 20595 with any other mode acceptable to MMX registers. */
b4e82619
RH
20596 if (GET_MODE_SIZE (mode2) == 8
20597 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
93de7743 20598 return (GET_MODE_SIZE (mode1) == 8
8ab93332 20599 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
c1c5b5e3
RH
20600
20601 return false;
20602}
20603
3c50106f
RH
20604/* Compute a (partial) cost for rtx X. Return true if the complete
20605 cost has been computed, and false if subexpressions should be
20606 scanned. In either case, *TOTAL contains the cost result. */
20607
20608static bool
9415ab7d 20609ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
3c50106f 20610{
9415ab7d 20611 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
3c50106f
RH
20612 enum machine_mode mode = GET_MODE (x);
20613
20614 switch (code)
20615 {
20616 case CONST_INT:
20617 case CONST:
20618 case LABEL_REF:
20619 case SYMBOL_REF:
8fe75e43 20620 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
3c50106f 20621 *total = 3;
8fe75e43 20622 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
3c50106f 20623 *total = 2;
3504dad3
JH
20624 else if (flag_pic && SYMBOLIC_CONST (x)
20625 && (!TARGET_64BIT
20626 || (!GET_CODE (x) != LABEL_REF
20627 && (GET_CODE (x) != SYMBOL_REF
12969f45 20628 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
20629 *total = 1;
20630 else
20631 *total = 0;
20632 return true;
20633
20634 case CONST_DOUBLE:
20635 if (mode == VOIDmode)
20636 *total = 0;
20637 else
20638 switch (standard_80387_constant_p (x))
20639 {
20640 case 1: /* 0.0 */
20641 *total = 1;
20642 break;
881b2a96 20643 default: /* Other constants */
3c50106f
RH
20644 *total = 2;
20645 break;
881b2a96
RS
20646 case 0:
20647 case -1:
3c50106f
RH
20648 /* Start with (MEM (SYMBOL_REF)), since that's where
20649 it'll probably end up. Add a penalty for size. */
20650 *total = (COSTS_N_INSNS (1)
3504dad3 20651 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
20652 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20653 break;
20654 }
20655 return true;
20656
20657 case ZERO_EXTEND:
20658 /* The zero extensions is often completely free on x86_64, so make
20659 it as cheap as possible. */
20660 if (TARGET_64BIT && mode == DImode
20661 && GET_MODE (XEXP (x, 0)) == SImode)
20662 *total = 1;
20663 else if (TARGET_ZERO_EXTEND_WITH_AND)
a9cc9cc6 20664 *total = ix86_cost->add;
3c50106f 20665 else
a9cc9cc6 20666 *total = ix86_cost->movzx;
3c50106f
RH
20667 return false;
20668
20669 case SIGN_EXTEND:
a9cc9cc6 20670 *total = ix86_cost->movsx;
3c50106f
RH
20671 return false;
20672
20673 case ASHIFT:
7656aee4 20674 if (CONST_INT_P (XEXP (x, 1))
3c50106f
RH
20675 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20676 {
20677 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20678 if (value == 1)
20679 {
a9cc9cc6 20680 *total = ix86_cost->add;
3c50106f
RH
20681 return false;
20682 }
20683 if ((value == 2 || value == 3)
3c50106f
RH
20684 && ix86_cost->lea <= ix86_cost->shift_const)
20685 {
a9cc9cc6 20686 *total = ix86_cost->lea;
3c50106f
RH
20687 return false;
20688 }
20689 }
5efb1046 20690 /* FALLTHRU */
3c50106f
RH
20691
20692 case ROTATE:
20693 case ASHIFTRT:
20694 case LSHIFTRT:
20695 case ROTATERT:
20696 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20697 {
7656aee4 20698 if (CONST_INT_P (XEXP (x, 1)))
3c50106f
RH
20699 {
20700 if (INTVAL (XEXP (x, 1)) > 32)
a9cc9cc6 20701 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
3c50106f 20702 else
a9cc9cc6 20703 *total = ix86_cost->shift_const * 2;
3c50106f
RH
20704 }
20705 else
20706 {
20707 if (GET_CODE (XEXP (x, 1)) == AND)
a9cc9cc6 20708 *total = ix86_cost->shift_var * 2;
3c50106f 20709 else
a9cc9cc6 20710 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
3c50106f
RH
20711 }
20712 }
20713 else
20714 {
7656aee4 20715 if (CONST_INT_P (XEXP (x, 1)))
a9cc9cc6 20716 *total = ix86_cost->shift_const;
3c50106f 20717 else
a9cc9cc6 20718 *total = ix86_cost->shift_var;
3c50106f
RH
20719 }
20720 return false;
20721
20722 case MULT:
27ac40e2
UB
20723 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20724 {
20725 /* ??? SSE scalar cost should be used here. */
20726 *total = ix86_cost->fmul;
20727 return false;
20728 }
20729 else if (X87_FLOAT_MODE_P (mode))
3c50106f 20730 {
a9cc9cc6 20731 *total = ix86_cost->fmul;
4a5eab38 20732 return false;
3c50106f 20733 }
27ac40e2
UB
20734 else if (FLOAT_MODE_P (mode))
20735 {
20736 /* ??? SSE vector cost should be used here. */
20737 *total = ix86_cost->fmul;
20738 return false;
20739 }
3c50106f
RH
20740 else
20741 {
4a5eab38
PB
20742 rtx op0 = XEXP (x, 0);
20743 rtx op1 = XEXP (x, 1);
20744 int nbits;
7656aee4 20745 if (CONST_INT_P (XEXP (x, 1)))
4a5eab38
PB
20746 {
20747 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20748 for (nbits = 0; value != 0; value &= value - 1)
20749 nbits++;
20750 }
20751 else
20752 /* This is arbitrary. */
20753 nbits = 7;
20754
20755 /* Compute costs correctly for widening multiplication. */
20756 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20757 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20758 == GET_MODE_SIZE (mode))
20759 {
20760 int is_mulwiden = 0;
20761 enum machine_mode inner_mode = GET_MODE (op0);
20762
20763 if (GET_CODE (op0) == GET_CODE (op1))
20764 is_mulwiden = 1, op1 = XEXP (op1, 0);
7656aee4 20765 else if (CONST_INT_P (op1))
4a5eab38
PB
20766 {
20767 if (GET_CODE (op0) == SIGN_EXTEND)
20768 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20769 == INTVAL (op1);
20770 else
20771 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20772 }
20773
20774 if (is_mulwiden)
20775 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20776 }
f676971a 20777
a9cc9cc6
JH
20778 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20779 + nbits * ix86_cost->mult_bit
20780 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
4a5eab38
PB
20781
20782 return true;
3c50106f 20783 }
3c50106f
RH
20784
20785 case DIV:
20786 case UDIV:
20787 case MOD:
20788 case UMOD:
27ac40e2
UB
20789 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20790 /* ??? SSE cost should be used here. */
20791 *total = ix86_cost->fdiv;
20792 else if (X87_FLOAT_MODE_P (mode))
20793 *total = ix86_cost->fdiv;
20794 else if (FLOAT_MODE_P (mode))
20795 /* ??? SSE vector cost should be used here. */
a9cc9cc6 20796 *total = ix86_cost->fdiv;
3c50106f 20797 else
a9cc9cc6 20798 *total = ix86_cost->divide[MODE_INDEX (mode)];
3c50106f
RH
20799 return false;
20800
20801 case PLUS:
27ac40e2 20802 if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
20803 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20804 {
20805 if (GET_CODE (XEXP (x, 0)) == PLUS
20806 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7656aee4 20807 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
3c50106f
RH
20808 && CONSTANT_P (XEXP (x, 1)))
20809 {
20810 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20811 if (val == 2 || val == 4 || val == 8)
20812 {
a9cc9cc6 20813 *total = ix86_cost->lea;
3c50106f
RH
20814 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20815 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20816 outer_code);
20817 *total += rtx_cost (XEXP (x, 1), outer_code);
20818 return true;
20819 }
20820 }
20821 else if (GET_CODE (XEXP (x, 0)) == MULT
7656aee4 20822 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3c50106f
RH
20823 {
20824 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20825 if (val == 2 || val == 4 || val == 8)
20826 {
a9cc9cc6 20827 *total = ix86_cost->lea;
3c50106f
RH
20828 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20829 *total += rtx_cost (XEXP (x, 1), outer_code);
20830 return true;
20831 }
20832 }
20833 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20834 {
a9cc9cc6 20835 *total = ix86_cost->lea;
3c50106f
RH
20836 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20837 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20838 *total += rtx_cost (XEXP (x, 1), outer_code);
20839 return true;
20840 }
20841 }
5efb1046 20842 /* FALLTHRU */
3c50106f
RH
20843
20844 case MINUS:
27ac40e2
UB
20845 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20846 {
20847 /* ??? SSE cost should be used here. */
20848 *total = ix86_cost->fadd;
20849 return false;
20850 }
20851 else if (X87_FLOAT_MODE_P (mode))
20852 {
20853 *total = ix86_cost->fadd;
20854 return false;
20855 }
20856 else if (FLOAT_MODE_P (mode))
3c50106f 20857 {
27ac40e2 20858 /* ??? SSE vector cost should be used here. */
a9cc9cc6 20859 *total = ix86_cost->fadd;
3c50106f
RH
20860 return false;
20861 }
5efb1046 20862 /* FALLTHRU */
3c50106f
RH
20863
20864 case AND:
20865 case IOR:
20866 case XOR:
20867 if (!TARGET_64BIT && mode == DImode)
20868 {
a9cc9cc6 20869 *total = (ix86_cost->add * 2
3c50106f
RH
20870 + (rtx_cost (XEXP (x, 0), outer_code)
20871 << (GET_MODE (XEXP (x, 0)) != DImode))
20872 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 20873 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
20874 return true;
20875 }
5efb1046 20876 /* FALLTHRU */
3c50106f
RH
20877
20878 case NEG:
27ac40e2
UB
20879 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20880 {
20881 /* ??? SSE cost should be used here. */
20882 *total = ix86_cost->fchs;
20883 return false;
20884 }
20885 else if (X87_FLOAT_MODE_P (mode))
20886 {
20887 *total = ix86_cost->fchs;
20888 return false;
20889 }
20890 else if (FLOAT_MODE_P (mode))
3c50106f 20891 {
27ac40e2 20892 /* ??? SSE vector cost should be used here. */
a9cc9cc6 20893 *total = ix86_cost->fchs;
3c50106f
RH
20894 return false;
20895 }
5efb1046 20896 /* FALLTHRU */
3c50106f
RH
20897
20898 case NOT:
20899 if (!TARGET_64BIT && mode == DImode)
a9cc9cc6 20900 *total = ix86_cost->add * 2;
3c50106f 20901 else
a9cc9cc6 20902 *total = ix86_cost->add;
3c50106f
RH
20903 return false;
20904
c271ba77
KH
20905 case COMPARE:
20906 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20907 && XEXP (XEXP (x, 0), 1) == const1_rtx
7656aee4 20908 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
c271ba77
KH
20909 && XEXP (x, 1) == const0_rtx)
20910 {
20911 /* This kind of construct is implemented using test[bwl].
20912 Treat it as if we had an AND. */
a9cc9cc6 20913 *total = (ix86_cost->add
c271ba77
KH
20914 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20915 + rtx_cost (const1_rtx, outer_code));
20916 return true;
20917 }
20918 return false;
20919
3c50106f 20920 case FLOAT_EXTEND:
27ac40e2 20921 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
ce7d4645 20922 *total = 0;
3c50106f
RH
20923 return false;
20924
20925 case ABS:
27ac40e2
UB
20926 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20927 /* ??? SSE cost should be used here. */
20928 *total = ix86_cost->fabs;
20929 else if (X87_FLOAT_MODE_P (mode))
20930 *total = ix86_cost->fabs;
20931 else if (FLOAT_MODE_P (mode))
20932 /* ??? SSE vector cost should be used here. */
a9cc9cc6 20933 *total = ix86_cost->fabs;
3c50106f
RH
20934 return false;
20935
20936 case SQRT:
27ac40e2
UB
20937 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20938 /* ??? SSE cost should be used here. */
20939 *total = ix86_cost->fsqrt;
20940 else if (X87_FLOAT_MODE_P (mode))
20941 *total = ix86_cost->fsqrt;
20942 else if (FLOAT_MODE_P (mode))
20943 /* ??? SSE vector cost should be used here. */
a9cc9cc6 20944 *total = ix86_cost->fsqrt;
3c50106f
RH
20945 return false;
20946
74dc3e94
RH
20947 case UNSPEC:
20948 if (XINT (x, 1) == UNSPEC_TP)
20949 *total = 0;
20950 return false;
20951
3c50106f
RH
20952 default:
20953 return false;
20954 }
20955}
20956
b069de3b
SS
20957#if TARGET_MACHO
20958
20959static int current_machopic_label_num;
20960
20961/* Given a symbol name and its associated stub, write out the
20962 definition of the stub. */
20963
20964void
b96a374d 20965machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
20966{
20967 unsigned int length;
20968 char *binder_name, *symbol_name, lazy_ptr_name[32];
20969 int label = ++current_machopic_label_num;
20970
f7288899
EC
20971 /* For 64-bit we shouldn't get here. */
20972 gcc_assert (!TARGET_64BIT);
20973
b069de3b
SS
20974 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20975 symb = (*targetm.strip_name_encoding) (symb);
20976
20977 length = strlen (stub);
20978 binder_name = alloca (length + 32);
20979 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20980
20981 length = strlen (symb);
20982 symbol_name = alloca (length + 32);
20983 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20984
20985 sprintf (lazy_ptr_name, "L%d$lz", label);
20986
20987 if (MACHOPIC_PURE)
56c779bc 20988 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
b069de3b 20989 else
56c779bc 20990 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
b069de3b
SS
20991
20992 fprintf (file, "%s:\n", stub);
20993 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20994
20995 if (MACHOPIC_PURE)
20996 {
4b1c1f6f
EC
20997 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20998 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20999 fprintf (file, "\tjmp\t*%%edx\n");
b069de3b
SS
21000 }
21001 else
4b1c1f6f 21002 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
b96a374d 21003
b069de3b 21004 fprintf (file, "%s:\n", binder_name);
b96a374d 21005
b069de3b
SS
21006 if (MACHOPIC_PURE)
21007 {
4b1c1f6f
EC
21008 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
21009 fprintf (file, "\tpushl\t%%eax\n");
b069de3b
SS
21010 }
21011 else
4b1c1f6f 21012 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
b069de3b 21013
4b1c1f6f 21014 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
b069de3b 21015
56c779bc 21016 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
b069de3b
SS
21017 fprintf (file, "%s:\n", lazy_ptr_name);
21018 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21019 fprintf (file, "\t.long %s\n", binder_name);
21020}
7d072037
SH
21021
21022void
21023darwin_x86_file_end (void)
21024{
21025 darwin_file_end ();
21026 ix86_file_end ();
21027}
b069de3b
SS
21028#endif /* TARGET_MACHO */
21029
162f023b
JH
21030/* Order the registers for register allocator. */
21031
21032void
b96a374d 21033x86_order_regs_for_local_alloc (void)
162f023b
JH
21034{
21035 int pos = 0;
21036 int i;
21037
21038 /* First allocate the local general purpose registers. */
21039 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21040 if (GENERAL_REGNO_P (i) && call_used_regs[i])
21041 reg_alloc_order [pos++] = i;
21042
21043 /* Global general purpose registers. */
21044 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21045 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
21046 reg_alloc_order [pos++] = i;
21047
21048 /* x87 registers come first in case we are doing FP math
21049 using them. */
21050 if (!TARGET_SSE_MATH)
21051 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21052 reg_alloc_order [pos++] = i;
fce5a9f2 21053
162f023b
JH
21054 /* SSE registers. */
21055 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21056 reg_alloc_order [pos++] = i;
21057 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21058 reg_alloc_order [pos++] = i;
21059
d1f87653 21060 /* x87 registers. */
162f023b
JH
21061 if (TARGET_SSE_MATH)
21062 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21063 reg_alloc_order [pos++] = i;
21064
21065 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21066 reg_alloc_order [pos++] = i;
21067
21068 /* Initialize the rest of array as we do not allocate some registers
21069 at all. */
21070 while (pos < FIRST_PSEUDO_REGISTER)
21071 reg_alloc_order [pos++] = 0;
21072}
194734e9 21073
fe77449a
DR
21074/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21075 struct attribute_spec.handler. */
21076static tree
b96a374d
AJ
21077ix86_handle_struct_attribute (tree *node, tree name,
21078 tree args ATTRIBUTE_UNUSED,
21079 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
21080{
21081 tree *type = NULL;
21082 if (DECL_P (*node))
21083 {
21084 if (TREE_CODE (*node) == TYPE_DECL)
21085 type = &TREE_TYPE (*node);
21086 }
21087 else
21088 type = node;
21089
21090 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21091 || TREE_CODE (*type) == UNION_TYPE)))
21092 {
5c498b10
DD
21093 warning (OPT_Wattributes, "%qs attribute ignored",
21094 IDENTIFIER_POINTER (name));
fe77449a
DR
21095 *no_add_attrs = true;
21096 }
21097
21098 else if ((is_attribute_p ("ms_struct", name)
21099 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21100 || ((is_attribute_p ("gcc_struct", name)
21101 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21102 {
5c498b10 21103 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
fe77449a
DR
21104 IDENTIFIER_POINTER (name));
21105 *no_add_attrs = true;
21106 }
21107
21108 return NULL_TREE;
21109}
21110
4977bab6 21111static bool
b96a374d 21112ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 21113{
6ac49599 21114 return (TARGET_MS_BITFIELD_LAYOUT &&
021bad8e 21115 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 21116 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
21117}
21118
483ab821
MM
21119/* Returns an expression indicating where the this parameter is
21120 located on entry to the FUNCTION. */
21121
21122static rtx
b96a374d 21123x86_this_parameter (tree function)
483ab821
MM
21124{
21125 tree type = TREE_TYPE (function);
ccf8e764 21126 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
483ab821 21127
3961e8fe
RH
21128 if (TARGET_64BIT)
21129 {
ccf8e764
RH
21130 const int *parm_regs;
21131
21132 if (TARGET_64BIT_MS_ABI)
21133 parm_regs = x86_64_ms_abi_int_parameter_registers;
21134 else
21135 parm_regs = x86_64_int_parameter_registers;
21136 return gen_rtx_REG (DImode, parm_regs[aggr]);
3961e8fe
RH
21137 }
21138
ccf8e764
RH
21139 if (ix86_function_regparm (type, function) > 0
21140 && !type_has_variadic_args_p (type))
483ab821 21141 {
ccf8e764
RH
21142 int regno = 0;
21143 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21144 regno = 2;
21145 return gen_rtx_REG (SImode, regno);
483ab821
MM
21146 }
21147
ccf8e764 21148 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
483ab821
MM
21149}
21150
3961e8fe
RH
21151/* Determine whether x86_output_mi_thunk can succeed. */
21152
21153static bool
b96a374d
AJ
21154x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
21155 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21156 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
21157{
21158 /* 64-bit can handle anything. */
21159 if (TARGET_64BIT)
21160 return true;
21161
21162 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 21163 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
21164 return true;
21165
21166 /* Need a free register for vcall_offset. */
21167 if (vcall_offset)
21168 return false;
21169
21170 /* Need a free register for GOT references. */
21171 if (flag_pic && !(*targetm.binds_local_p) (function))
21172 return false;
21173
21174 /* Otherwise ok. */
21175 return true;
21176}
21177
21178/* Output the assembler code for a thunk function. THUNK_DECL is the
21179 declaration for the thunk function itself, FUNCTION is the decl for
21180 the target function. DELTA is an immediate constant offset to be
272d0bee 21181 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 21182 *(*this + vcall_offset) should be added to THIS. */
483ab821 21183
c590b625 21184static void
b96a374d
AJ
21185x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21186 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21187 HOST_WIDE_INT vcall_offset, tree function)
194734e9 21188{
194734e9 21189 rtx xops[3];
9415ab7d 21190 rtx this_param = x86_this_parameter (function);
3961e8fe 21191 rtx this_reg, tmp;
194734e9 21192
3961e8fe
RH
21193 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21194 pull it in now and let DELTA benefit. */
9415ab7d
TN
21195 if (REG_P (this_param))
21196 this_reg = this_param;
3961e8fe
RH
21197 else if (vcall_offset)
21198 {
21199 /* Put the this parameter into %eax. */
9415ab7d 21200 xops[0] = this_param;
3961e8fe
RH
21201 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21202 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21203 }
21204 else
21205 this_reg = NULL_RTX;
21206
21207 /* Adjust the this parameter by a fixed constant. */
21208 if (delta)
194734e9 21209 {
483ab821 21210 xops[0] = GEN_INT (delta);
9415ab7d 21211 xops[1] = this_reg ? this_reg : this_param;
3961e8fe 21212 if (TARGET_64BIT)
194734e9 21213 {
3961e8fe
RH
21214 if (!x86_64_general_operand (xops[0], DImode))
21215 {
3c4ace25 21216 tmp = gen_rtx_REG (DImode, R10_REG);
3961e8fe
RH
21217 xops[1] = tmp;
21218 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21219 xops[0] = tmp;
9415ab7d 21220 xops[1] = this_param;
3961e8fe
RH
21221 }
21222 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
21223 }
21224 else
3961e8fe 21225 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 21226 }
3961e8fe
RH
21227
21228 /* Adjust the this parameter by a value stored in the vtable. */
21229 if (vcall_offset)
194734e9 21230 {
3961e8fe 21231 if (TARGET_64BIT)
3c4ace25 21232 tmp = gen_rtx_REG (DImode, R10_REG);
3961e8fe 21233 else
e767b5be
JH
21234 {
21235 int tmp_regno = 2 /* ECX */;
21236 if (lookup_attribute ("fastcall",
ccf8e764 21237 TYPE_ATTRIBUTES (TREE_TYPE (function))))
e767b5be
JH
21238 tmp_regno = 0 /* EAX */;
21239 tmp = gen_rtx_REG (SImode, tmp_regno);
21240 }
483ab821 21241
3961e8fe
RH
21242 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21243 xops[1] = tmp;
21244 if (TARGET_64BIT)
21245 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21246 else
21247 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 21248
3961e8fe
RH
21249 /* Adjust the this parameter. */
21250 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21251 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21252 {
3c4ace25 21253 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
3961e8fe
RH
21254 xops[0] = GEN_INT (vcall_offset);
21255 xops[1] = tmp2;
21256 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21257 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 21258 }
3961e8fe
RH
21259 xops[1] = this_reg;
21260 if (TARGET_64BIT)
21261 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21262 else
21263 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21264 }
194734e9 21265
3961e8fe 21266 /* If necessary, drop THIS back to its stack slot. */
9415ab7d 21267 if (this_reg && this_reg != this_param)
3961e8fe
RH
21268 {
21269 xops[0] = this_reg;
9415ab7d 21270 xops[1] = this_param;
3961e8fe
RH
21271 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21272 }
194734e9 21273
89ce1c8f 21274 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
21275 if (TARGET_64BIT)
21276 {
21277 if (!flag_pic || (*targetm.binds_local_p) (function))
21278 output_asm_insn ("jmp\t%P0", xops);
ccf8e764
RH
21279 /* All thunks should be in the same object as their target,
21280 and thus binds_local_p should be true. */
21281 else if (TARGET_64BIT_MS_ABI)
21282 gcc_unreachable ();
3961e8fe 21283 else
fcbe3b89 21284 {
89ce1c8f 21285 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
21286 tmp = gen_rtx_CONST (Pmode, tmp);
21287 tmp = gen_rtx_MEM (QImode, tmp);
21288 xops[0] = tmp;
21289 output_asm_insn ("jmp\t%A0", xops);
21290 }
3961e8fe
RH
21291 }
21292 else
21293 {
21294 if (!flag_pic || (*targetm.binds_local_p) (function))
21295 output_asm_insn ("jmp\t%P0", xops);
194734e9 21296 else
21ff35fb 21297#if TARGET_MACHO
095fa594
SH
21298 if (TARGET_MACHO)
21299 {
11abc112 21300 rtx sym_ref = XEXP (DECL_RTL (function), 0);
f676971a
EC
21301 tmp = (gen_rtx_SYMBOL_REF
21302 (Pmode,
11abc112 21303 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
21304 tmp = gen_rtx_MEM (QImode, tmp);
21305 xops[0] = tmp;
21306 output_asm_insn ("jmp\t%0", xops);
21307 }
21308 else
21309#endif /* TARGET_MACHO */
194734e9 21310 {
3961e8fe 21311 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
7d072037 21312 output_set_got (tmp, NULL_RTX);
3961e8fe
RH
21313
21314 xops[1] = tmp;
21315 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21316 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
21317 }
21318 }
21319}
e2500fed 21320
1bc7c5b6 21321static void
b96a374d 21322x86_file_start (void)
1bc7c5b6
ZW
21323{
21324 default_file_start ();
192d0f89
GK
21325#if TARGET_MACHO
21326 darwin_file_start ();
21327#endif
1bc7c5b6
ZW
21328 if (X86_FILE_START_VERSION_DIRECTIVE)
21329 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21330 if (X86_FILE_START_FLTUSED)
21331 fputs ("\t.global\t__fltused\n", asm_out_file);
21332 if (ix86_asm_dialect == ASM_INTEL)
21333 fputs ("\t.intel_syntax\n", asm_out_file);
21334}
21335
e932b21b 21336int
b96a374d 21337x86_field_alignment (tree field, int computed)
e932b21b
JH
21338{
21339 enum machine_mode mode;
ad9335eb
JJ
21340 tree type = TREE_TYPE (field);
21341
21342 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 21343 return computed;
ad9335eb
JJ
21344 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21345 ? get_inner_array_type (type) : type);
39e3a681
JJ
21346 if (mode == DFmode || mode == DCmode
21347 || GET_MODE_CLASS (mode) == MODE_INT
21348 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
21349 return MIN (32, computed);
21350 return computed;
21351}
21352
a5fa1ecd
JH
21353/* Output assembler code to FILE to increment profiler label # LABELNO
21354 for profiling a function entry. */
21355void
b96a374d 21356x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
21357{
21358 if (TARGET_64BIT)
ccf8e764 21359 {
a5fa1ecd 21360#ifndef NO_PROFILE_COUNTERS
ccf8e764 21361 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
a5fa1ecd 21362#endif
ccf8e764
RH
21363
21364 if (!TARGET_64BIT_MS_ABI && flag_pic)
a5fa1ecd 21365 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
ccf8e764 21366 else
a5fa1ecd 21367 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
ccf8e764 21368 }
a5fa1ecd
JH
21369 else if (flag_pic)
21370 {
21371#ifndef NO_PROFILE_COUNTERS
21372 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21373 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21374#endif
21375 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21376 }
21377 else
21378 {
21379#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 21380 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
21381 PROFILE_COUNT_REGISTER);
21382#endif
21383 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21384 }
21385}
21386
d2c49530
JH
21387/* We don't have exact information about the insn sizes, but we may assume
21388 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 21389 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
21390 99% of cases. */
21391
21392static int
b96a374d 21393min_insn_size (rtx insn)
d2c49530
JH
21394{
21395 int l = 0;
21396
21397 if (!INSN_P (insn) || !active_insn_p (insn))
21398 return 0;
21399
21400 /* Discard alignments we've emit and jump instructions. */
21401 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21402 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21403 return 0;
7656aee4 21404 if (JUMP_P (insn)
d2c49530
JH
21405 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21406 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21407 return 0;
21408
21409 /* Important case - calls are always 5 bytes.
21410 It is common to have many calls in the row. */
7656aee4 21411 if (CALL_P (insn)
d2c49530
JH
21412 && symbolic_reference_mentioned_p (PATTERN (insn))
21413 && !SIBLING_CALL_P (insn))
21414 return 5;
21415 if (get_attr_length (insn) <= 1)
21416 return 1;
21417
21418 /* For normal instructions we may rely on the sizes of addresses
21419 and the presence of symbol to require 4 bytes of encoding.
21420 This is not the case for jumps where references are PC relative. */
7656aee4 21421 if (!JUMP_P (insn))
d2c49530
JH
21422 {
21423 l = get_attr_length_address (insn);
21424 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21425 l = 4;
21426 }
21427 if (l)
21428 return 1+l;
21429 else
21430 return 2;
21431}
21432
c51e6d85 21433/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
21434 window. */
21435
21436static void
be04394b 21437ix86_avoid_jump_misspredicts (void)
d2c49530
JH
21438{
21439 rtx insn, start = get_insns ();
21440 int nbytes = 0, njumps = 0;
21441 int isjump = 0;
21442
21443 /* Look for all minimal intervals of instructions containing 4 jumps.
21444 The intervals are bounded by START and INSN. NBYTES is the total
21445 size of instructions in the interval including INSN and not including
21446 START. When the NBYTES is smaller than 16 bytes, it is possible
21447 that the end of START and INSN ends up in the same 16byte page.
21448
21449 The smallest offset in the page INSN can start is the case where START
21450 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21451 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21452 */
21453 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21454 {
21455
21456 nbytes += min_insn_size (insn);
c263766c
RH
21457 if (dump_file)
21458 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530 21459 INSN_UID (insn), min_insn_size (insn));
7656aee4 21460 if ((JUMP_P (insn)
d2c49530
JH
21461 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21462 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
7656aee4 21463 || CALL_P (insn))
d2c49530
JH
21464 njumps++;
21465 else
21466 continue;
21467
21468 while (njumps > 3)
21469 {
21470 start = NEXT_INSN (start);
7656aee4 21471 if ((JUMP_P (start)
d2c49530
JH
21472 && GET_CODE (PATTERN (start)) != ADDR_VEC
21473 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
7656aee4 21474 || CALL_P (start))
d2c49530
JH
21475 njumps--, isjump = 1;
21476 else
21477 isjump = 0;
21478 nbytes -= min_insn_size (start);
21479 }
d0396b79 21480 gcc_assert (njumps >= 0);
c263766c
RH
21481 if (dump_file)
21482 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
21483 INSN_UID (start), INSN_UID (insn), nbytes);
21484
21485 if (njumps == 3 && isjump && nbytes < 16)
21486 {
21487 int padsize = 15 - nbytes + min_insn_size (insn);
21488
c263766c
RH
21489 if (dump_file)
21490 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21491 INSN_UID (insn), padsize);
d2c49530
JH
21492 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21493 }
21494 }
21495}
21496
be04394b 21497/* AMD Athlon works faster
d1f87653 21498 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
21499 by other jump instruction. We avoid the penalty by inserting NOP just
21500 before the RET instructions in such cases. */
18dbd950 21501static void
be04394b 21502ix86_pad_returns (void)
2a500b9e
JH
21503{
21504 edge e;
628f6a4e 21505 edge_iterator ei;
2a500b9e 21506
628f6a4e
BE
21507 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21508 {
21509 basic_block bb = e->src;
21510 rtx ret = BB_END (bb);
21511 rtx prev;
21512 bool replace = false;
21513
7656aee4 21514 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
628f6a4e
BE
21515 || !maybe_hot_bb_p (bb))
21516 continue;
21517 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
7656aee4 21518 if (active_insn_p (prev) || LABEL_P (prev))
628f6a4e 21519 break;
7656aee4 21520 if (prev && LABEL_P (prev))
628f6a4e
BE
21521 {
21522 edge e;
21523 edge_iterator ei;
21524
21525 FOR_EACH_EDGE (e, ei, bb->preds)
21526 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21527 && !(e->flags & EDGE_FALLTHRU))
21528 replace = true;
21529 }
21530 if (!replace)
21531 {
21532 prev = prev_active_insn (ret);
21533 if (prev
7656aee4
UB
21534 && ((JUMP_P (prev) && any_condjump_p (prev))
21535 || CALL_P (prev)))
253c7a00 21536 replace = true;
628f6a4e
BE
21537 /* Empty functions get branch mispredict even when the jump destination
21538 is not visible to us. */
21539 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21540 replace = true;
21541 }
21542 if (replace)
21543 {
21544 emit_insn_before (gen_return_internal_long (), ret);
21545 delete_insn (ret);
21546 }
21547 }
be04394b
JH
21548}
21549
21550/* Implement machine specific optimizations. We implement padding of returns
21551 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21552static void
21553ix86_reorg (void)
21554{
d326eaf0 21555 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
be04394b
JH
21556 ix86_pad_returns ();
21557 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21558 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
21559}
21560
4977bab6
ZW
21561/* Return nonzero when QImode register that must be represented via REX prefix
21562 is used. */
21563bool
b96a374d 21564x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
21565{
21566 int i;
21567 extract_insn_cached (insn);
21568 for (i = 0; i < recog_data.n_operands; i++)
21569 if (REG_P (recog_data.operand[i])
21570 && REGNO (recog_data.operand[i]) >= 4)
21571 return true;
21572 return false;
21573}
21574
21575/* Return nonzero when P points to register encoded via REX prefix.
21576 Called via for_each_rtx. */
21577static int
b96a374d 21578extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
21579{
21580 unsigned int regno;
21581 if (!REG_P (*p))
21582 return 0;
21583 regno = REGNO (*p);
21584 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21585}
21586
21587/* Return true when INSN mentions register that must be encoded using REX
21588 prefix. */
21589bool
b96a374d 21590x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
21591{
21592 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21593}
21594
1d6ba901 21595/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
21596 optabs would emit if we didn't have TFmode patterns. */
21597
21598void
b96a374d 21599x86_emit_floatuns (rtx operands[2])
8d705469
JH
21600{
21601 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
21602 enum machine_mode mode, inmode;
21603
21604 inmode = GET_MODE (operands[1]);
d0396b79 21605 gcc_assert (inmode == SImode || inmode == DImode);
8d705469
JH
21606
21607 out = operands[0];
1d6ba901 21608 in = force_reg (inmode, operands[1]);
8d705469
JH
21609 mode = GET_MODE (out);
21610 neglab = gen_label_rtx ();
21611 donelab = gen_label_rtx ();
8d705469
JH
21612 f0 = gen_reg_rtx (mode);
21613
ebff937c
SH
21614 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21615
21616 expand_float (out, in, 0);
8d705469 21617
8d705469
JH
21618 emit_jump_insn (gen_jump (donelab));
21619 emit_barrier ();
21620
21621 emit_label (neglab);
21622
ebff937c
SH
21623 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21624 1, OPTAB_DIRECT);
21625 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21626 1, OPTAB_DIRECT);
21627 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21628
8d705469 21629 expand_float (f0, i0, 0);
ebff937c 21630
8d705469
JH
21631 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21632
21633 emit_label (donelab);
21634}
eb701deb
RH
21635\f
21636/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21637 with all elements equal to VAR. Return true if successful. */
21638
21639static bool
21640ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21641 rtx target, rtx val)
21642{
21643 enum machine_mode smode, wsmode, wvmode;
21644 rtx x;
21645
21646 switch (mode)
21647 {
21648 case V2SImode:
21649 case V2SFmode:
12b3553f 21650 if (!mmx_ok)
eb701deb
RH
21651 return false;
21652 /* FALLTHRU */
21653
21654 case V2DFmode:
21655 case V2DImode:
21656 case V4SFmode:
21657 case V4SImode:
21658 val = force_reg (GET_MODE_INNER (mode), val);
21659 x = gen_rtx_VEC_DUPLICATE (mode, val);
21660 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21661 return true;
21662
21663 case V4HImode:
21664 if (!mmx_ok)
21665 return false;
f817d5d4
RH
21666 if (TARGET_SSE || TARGET_3DNOW_A)
21667 {
21668 val = gen_lowpart (SImode, val);
21669 x = gen_rtx_TRUNCATE (HImode, val);
21670 x = gen_rtx_VEC_DUPLICATE (mode, x);
21671 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21672 return true;
21673 }
21674 else
21675 {
21676 smode = HImode;
21677 wsmode = SImode;
21678 wvmode = V2SImode;
21679 goto widen;
21680 }
eb701deb
RH
21681
21682 case V8QImode:
21683 if (!mmx_ok)
21684 return false;
21685 smode = QImode;
21686 wsmode = HImode;
21687 wvmode = V4HImode;
21688 goto widen;
21689 case V8HImode:
2ff61948
RS
21690 if (TARGET_SSE2)
21691 {
21692 rtx tmp1, tmp2;
21693 /* Extend HImode to SImode using a paradoxical SUBREG. */
21694 tmp1 = gen_reg_rtx (SImode);
21695 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21696 /* Insert the SImode value as low element of V4SImode vector. */
21697 tmp2 = gen_reg_rtx (V4SImode);
21698 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21699 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21700 CONST0_RTX (V4SImode),
21701 const1_rtx);
21702 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21703 /* Cast the V4SImode vector back to a V8HImode vector. */
21704 tmp1 = gen_reg_rtx (V8HImode);
21705 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21706 /* Duplicate the low short through the whole low SImode word. */
21707 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21708 /* Cast the V8HImode vector back to a V4SImode vector. */
21709 tmp2 = gen_reg_rtx (V4SImode);
21710 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21711 /* Replicate the low element of the V4SImode vector. */
21712 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21713 /* Cast the V2SImode back to V8HImode, and store in target. */
21714 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21715 return true;
21716 }
eb701deb
RH
21717 smode = HImode;
21718 wsmode = SImode;
21719 wvmode = V4SImode;
21720 goto widen;
21721 case V16QImode:
2ff61948
RS
21722 if (TARGET_SSE2)
21723 {
21724 rtx tmp1, tmp2;
21725 /* Extend QImode to SImode using a paradoxical SUBREG. */
21726 tmp1 = gen_reg_rtx (SImode);
21727 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21728 /* Insert the SImode value as low element of V4SImode vector. */
21729 tmp2 = gen_reg_rtx (V4SImode);
21730 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21731 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21732 CONST0_RTX (V4SImode),
21733 const1_rtx);
21734 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21735 /* Cast the V4SImode vector back to a V16QImode vector. */
21736 tmp1 = gen_reg_rtx (V16QImode);
21737 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21738 /* Duplicate the low byte through the whole low SImode word. */
21739 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21740 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21741 /* Cast the V16QImode vector back to a V4SImode vector. */
21742 tmp2 = gen_reg_rtx (V4SImode);
21743 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21744 /* Replicate the low element of the V4SImode vector. */
21745 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21746 /* Cast the V2SImode back to V16QImode, and store in target. */
21747 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21748 return true;
21749 }
eb701deb
RH
21750 smode = QImode;
21751 wsmode = HImode;
21752 wvmode = V8HImode;
21753 goto widen;
21754 widen:
21755 /* Replicate the value once into the next wider mode and recurse. */
21756 val = convert_modes (wsmode, smode, val, true);
21757 x = expand_simple_binop (wsmode, ASHIFT, val,
21758 GEN_INT (GET_MODE_BITSIZE (smode)),
21759 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21760 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21761
21762 x = gen_reg_rtx (wvmode);
21763 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21764 gcc_unreachable ();
21765 emit_move_insn (target, gen_lowpart (mode, x));
21766 return true;
21767
21768 default:
21769 return false;
21770 }
21771}
21772
21773/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
acef130f 21774 whose ONE_VAR element is VAR, and other elements are zero. Return true
eb701deb
RH
21775 if successful. */
21776
21777static bool
acef130f
RS
21778ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21779 rtx target, rtx var, int one_var)
eb701deb
RH
21780{
21781 enum machine_mode vsimode;
acef130f
RS
21782 rtx new_target;
21783 rtx x, tmp;
eb701deb
RH
21784
21785 switch (mode)
21786 {
21787 case V2SFmode:
21788 case V2SImode:
12b3553f 21789 if (!mmx_ok)
eb701deb
RH
21790 return false;
21791 /* FALLTHRU */
21792
21793 case V2DFmode:
21794 case V2DImode:
acef130f
RS
21795 if (one_var != 0)
21796 return false;
eb701deb
RH
21797 var = force_reg (GET_MODE_INNER (mode), var);
21798 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21799 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21800 return true;
21801
21802 case V4SFmode:
21803 case V4SImode:
acef130f
RS
21804 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21805 new_target = gen_reg_rtx (mode);
21806 else
21807 new_target = target;
eb701deb
RH
21808 var = force_reg (GET_MODE_INNER (mode), var);
21809 x = gen_rtx_VEC_DUPLICATE (mode, var);
21810 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
acef130f
RS
21811 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21812 if (one_var != 0)
21813 {
21814 /* We need to shuffle the value to the correct position, so
21815 create a new pseudo to store the intermediate result. */
21816
21817 /* With SSE2, we can use the integer shuffle insns. */
21818 if (mode != V4SFmode && TARGET_SSE2)
21819 {
21820 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21821 GEN_INT (1),
21822 GEN_INT (one_var == 1 ? 0 : 1),
21823 GEN_INT (one_var == 2 ? 0 : 1),
21824 GEN_INT (one_var == 3 ? 0 : 1)));
21825 if (target != new_target)
21826 emit_move_insn (target, new_target);
21827 return true;
21828 }
21829
21830 /* Otherwise convert the intermediate result to V4SFmode and
21831 use the SSE1 shuffle instructions. */
21832 if (mode != V4SFmode)
21833 {
21834 tmp = gen_reg_rtx (V4SFmode);
21835 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21836 }
21837 else
21838 tmp = new_target;
21839
21840 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21841 GEN_INT (1),
21842 GEN_INT (one_var == 1 ? 0 : 1),
21843 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21844 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21845
21846 if (mode != V4SFmode)
21847 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21848 else if (tmp != target)
21849 emit_move_insn (target, tmp);
21850 }
21851 else if (target != new_target)
21852 emit_move_insn (target, new_target);
eb701deb
RH
21853 return true;
21854
21855 case V8HImode:
21856 case V16QImode:
21857 vsimode = V4SImode;
21858 goto widen;
21859 case V4HImode:
21860 case V8QImode:
21861 if (!mmx_ok)
21862 return false;
21863 vsimode = V2SImode;
21864 goto widen;
21865 widen:
acef130f
RS
21866 if (one_var != 0)
21867 return false;
21868
eb701deb
RH
21869 /* Zero extend the variable element to SImode and recurse. */
21870 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21871
21872 x = gen_reg_rtx (vsimode);
acef130f
RS
21873 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21874 var, one_var))
eb701deb
RH
21875 gcc_unreachable ();
21876
21877 emit_move_insn (target, gen_lowpart (mode, x));
21878 return true;
21879
21880 default:
21881 return false;
21882 }
21883}
21884
21885/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21886 consisting of the values in VALS. It is known that all elements
21887 except ONE_VAR are constants. Return true if successful. */
21888
21889static bool
21890ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21891 rtx target, rtx vals, int one_var)
21892{
21893 rtx var = XVECEXP (vals, 0, one_var);
21894 enum machine_mode wmode;
21895 rtx const_vec, x;
21896
9fc5fa7b
GK
21897 const_vec = copy_rtx (vals);
21898 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
277fc67e 21899 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
eb701deb
RH
21900
21901 switch (mode)
21902 {
21903 case V2DFmode:
21904 case V2DImode:
21905 case V2SFmode:
21906 case V2SImode:
21907 /* For the two element vectors, it's just as easy to use
21908 the general case. */
21909 return false;
21910
21911 case V4SFmode:
21912 case V4SImode:
21913 case V8HImode:
21914 case V4HImode:
21915 break;
21916
21917 case V16QImode:
21918 wmode = V8HImode;
21919 goto widen;
21920 case V8QImode:
21921 wmode = V4HImode;
21922 goto widen;
21923 widen:
21924 /* There's no way to set one QImode entry easily. Combine
21925 the variable value with its adjacent constant value, and
21926 promote to an HImode set. */
21927 x = XVECEXP (vals, 0, one_var ^ 1);
21928 if (one_var & 1)
21929 {
21930 var = convert_modes (HImode, QImode, var, true);
21931 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21932 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21933 x = GEN_INT (INTVAL (x) & 0xff);
21934 }
21935 else
21936 {
21937 var = convert_modes (HImode, QImode, var, true);
21938 x = gen_int_mode (INTVAL (x) << 8, HImode);
21939 }
21940 if (x != const0_rtx)
21941 var = expand_simple_binop (HImode, IOR, var, x, var,
21942 1, OPTAB_LIB_WIDEN);
21943
21944 x = gen_reg_rtx (wmode);
21945 emit_move_insn (x, gen_lowpart (wmode, const_vec));
ceda96fc 21946 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
eb701deb
RH
21947
21948 emit_move_insn (target, gen_lowpart (mode, x));
21949 return true;
21950
21951 default:
21952 return false;
21953 }
21954
21955 emit_move_insn (target, const_vec);
21956 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21957 return true;
21958}
21959
21960/* A subroutine of ix86_expand_vector_init. Handle the most general case:
21961 all values variable, and none identical. */
21962
21963static void
21964ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21965 rtx target, rtx vals)
21966{
21967 enum machine_mode half_mode = GET_MODE_INNER (mode);
21968 rtx op0 = NULL, op1 = NULL;
21969 bool use_vec_concat = false;
21970
21971 switch (mode)
21972 {
21973 case V2SFmode:
21974 case V2SImode:
21975 if (!mmx_ok && !TARGET_SSE)
21976 break;
21977 /* FALLTHRU */
21978
21979 case V2DFmode:
21980 case V2DImode:
21981 /* For the two element vectors, we always implement VEC_CONCAT. */
21982 op0 = XVECEXP (vals, 0, 0);
21983 op1 = XVECEXP (vals, 0, 1);
21984 use_vec_concat = true;
21985 break;
21986
21987 case V4SFmode:
21988 half_mode = V2SFmode;
21989 goto half;
21990 case V4SImode:
21991 half_mode = V2SImode;
21992 goto half;
21993 half:
21994 {
21995 rtvec v;
21996
21997 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21998 Recurse to load the two halves. */
21999
22000 op0 = gen_reg_rtx (half_mode);
22001 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
22002 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
22003
22004 op1 = gen_reg_rtx (half_mode);
22005 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
22006 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
22007
22008 use_vec_concat = true;
22009 }
22010 break;
22011
22012 case V8HImode:
22013 case V16QImode:
22014 case V4HImode:
22015 case V8QImode:
22016 break;
22017
22018 default:
22019 gcc_unreachable ();
22020 }
22021
22022 if (use_vec_concat)
22023 {
22024 if (!register_operand (op0, half_mode))
22025 op0 = force_reg (half_mode, op0);
22026 if (!register_operand (op1, half_mode))
22027 op1 = force_reg (half_mode, op1);
22028
5656a184 22029 emit_insn (gen_rtx_SET (VOIDmode, target,
eb701deb
RH
22030 gen_rtx_VEC_CONCAT (mode, op0, op1)));
22031 }
22032 else
22033 {
22034 int i, j, n_elts, n_words, n_elt_per_word;
22035 enum machine_mode inner_mode;
22036 rtx words[4], shift;
22037
22038 inner_mode = GET_MODE_INNER (mode);
22039 n_elts = GET_MODE_NUNITS (mode);
22040 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22041 n_elt_per_word = n_elts / n_words;
22042 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
22043
22044 for (i = 0; i < n_words; ++i)
22045 {
22046 rtx word = NULL_RTX;
22047
22048 for (j = 0; j < n_elt_per_word; ++j)
22049 {
22050 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
22051 elt = convert_modes (word_mode, inner_mode, elt, true);
22052
22053 if (j == 0)
22054 word = elt;
22055 else
22056 {
22057 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
22058 word, 1, OPTAB_LIB_WIDEN);
22059 word = expand_simple_binop (word_mode, IOR, word, elt,
22060 word, 1, OPTAB_LIB_WIDEN);
22061 }
22062 }
22063
22064 words[i] = word;
22065 }
22066
22067 if (n_words == 1)
22068 emit_move_insn (target, gen_lowpart (mode, words[0]));
22069 else if (n_words == 2)
22070 {
22071 rtx tmp = gen_reg_rtx (mode);
22072 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
22073 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
22074 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
22075 emit_move_insn (target, tmp);
22076 }
22077 else if (n_words == 4)
22078 {
22079 rtx tmp = gen_reg_rtx (V4SImode);
22080 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ceda96fc 22081 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
eb701deb
RH
22082 emit_move_insn (target, gen_lowpart (mode, tmp));
22083 }
22084 else
22085 gcc_unreachable ();
22086 }
22087}
22088
5656a184 22089/* Initialize vector TARGET via VALS. Suppress the use of MMX
eb701deb 22090 instructions unless MMX_OK is true. */
8d705469 22091
997404de 22092void
eb701deb 22093ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
997404de
JH
22094{
22095 enum machine_mode mode = GET_MODE (target);
eb701deb
RH
22096 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22097 int n_elts = GET_MODE_NUNITS (mode);
22098 int n_var = 0, one_var = -1;
22099 bool all_same = true, all_const_zero = true;
997404de 22100 int i;
eb701deb 22101 rtx x;
f676971a 22102
eb701deb
RH
22103 for (i = 0; i < n_elts; ++i)
22104 {
22105 x = XVECEXP (vals, 0, i);
22106 if (!CONSTANT_P (x))
22107 n_var++, one_var = i;
22108 else if (x != CONST0_RTX (inner_mode))
22109 all_const_zero = false;
22110 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22111 all_same = false;
22112 }
997404de 22113
eb701deb
RH
22114 /* Constants are best loaded from the constant pool. */
22115 if (n_var == 0)
997404de
JH
22116 {
22117 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22118 return;
22119 }
22120
eb701deb
RH
22121 /* If all values are identical, broadcast the value. */
22122 if (all_same
22123 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22124 XVECEXP (vals, 0, 0)))
22125 return;
22126
22127 /* Values where only one field is non-constant are best loaded from
22128 the pool and overwritten via move later. */
22129 if (n_var == 1)
997404de 22130 {
acef130f
RS
22131 if (all_const_zero
22132 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22133 XVECEXP (vals, 0, one_var),
22134 one_var))
eb701deb
RH
22135 return;
22136
22137 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22138 return;
22139 }
22140
22141 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22142}
22143
22144void
22145ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22146{
22147 enum machine_mode mode = GET_MODE (target);
22148 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22149 bool use_vec_merge = false;
22150 rtx tmp;
22151
22152 switch (mode)
22153 {
22154 case V2SFmode:
22155 case V2SImode:
0f2698d0
RH
22156 if (mmx_ok)
22157 {
22158 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22159 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22160 if (elt == 0)
22161 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22162 else
22163 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22164 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22165 return;
22166 }
22167 break;
eb701deb 22168
eb701deb 22169 case V2DImode:
9a5cee02
L
22170 use_vec_merge = TARGET_SSE4_1;
22171 if (use_vec_merge)
22172 break;
22173
22174 case V2DFmode:
eb701deb
RH
22175 {
22176 rtx op0, op1;
22177
22178 /* For the two element vectors, we implement a VEC_CONCAT with
22179 the extraction of the other element. */
22180
22181 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22182 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22183
22184 if (elt == 0)
22185 op0 = val, op1 = tmp;
22186 else
22187 op0 = tmp, op1 = val;
22188
22189 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22190 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22191 }
22192 return;
1c47af84 22193
eb701deb 22194 case V4SFmode:
9a5cee02
L
22195 use_vec_merge = TARGET_SSE4_1;
22196 if (use_vec_merge)
22197 break;
22198
eb701deb 22199 switch (elt)
997404de 22200 {
eb701deb
RH
22201 case 0:
22202 use_vec_merge = true;
1c47af84
RH
22203 break;
22204
eb701deb 22205 case 1:
125886c7 22206 /* tmp = target = A B C D */
eb701deb 22207 tmp = copy_to_reg (target);
125886c7 22208 /* target = A A B B */
eb701deb 22209 emit_insn (gen_sse_unpcklps (target, target, target));
125886c7 22210 /* target = X A B B */
eb701deb 22211 ix86_expand_vector_set (false, target, val, 0);
125886c7 22212 /* target = A X C D */
eb701deb
RH
22213 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22214 GEN_INT (1), GEN_INT (0),
b100079f 22215 GEN_INT (2+4), GEN_INT (3+4)));
eb701deb
RH
22216 return;
22217
22218 case 2:
125886c7 22219 /* tmp = target = A B C D */
eb701deb 22220 tmp = copy_to_reg (target);
125886c7
JJ
22221 /* tmp = X B C D */
22222 ix86_expand_vector_set (false, tmp, val, 0);
22223 /* target = A B X D */
eb701deb
RH
22224 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22225 GEN_INT (0), GEN_INT (1),
b100079f 22226 GEN_INT (0+4), GEN_INT (3+4)));
eb701deb
RH
22227 return;
22228
22229 case 3:
125886c7 22230 /* tmp = target = A B C D */
eb701deb 22231 tmp = copy_to_reg (target);
125886c7
JJ
22232 /* tmp = X B C D */
22233 ix86_expand_vector_set (false, tmp, val, 0);
22234 /* target = A B X D */
eb701deb
RH
22235 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22236 GEN_INT (0), GEN_INT (1),
b100079f 22237 GEN_INT (2+4), GEN_INT (0+4)));
eb701deb 22238 return;
1c47af84
RH
22239
22240 default:
eb701deb
RH
22241 gcc_unreachable ();
22242 }
22243 break;
22244
22245 case V4SImode:
9a5cee02
L
22246 use_vec_merge = TARGET_SSE4_1;
22247 if (use_vec_merge)
22248 break;
22249
eb701deb
RH
22250 /* Element 0 handled by vec_merge below. */
22251 if (elt == 0)
22252 {
22253 use_vec_merge = true;
1c47af84 22254 break;
997404de 22255 }
eb701deb
RH
22256
22257 if (TARGET_SSE2)
22258 {
22259 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22260 store into element 0, then shuffle them back. */
22261
22262 rtx order[4];
22263
22264 order[0] = GEN_INT (elt);
22265 order[1] = const1_rtx;
22266 order[2] = const2_rtx;
22267 order[3] = GEN_INT (3);
22268 order[elt] = const0_rtx;
22269
22270 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22271 order[1], order[2], order[3]));
22272
22273 ix86_expand_vector_set (false, target, val, 0);
22274
22275 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22276 order[1], order[2], order[3]));
22277 }
22278 else
22279 {
22280 /* For SSE1, we have to reuse the V4SF code. */
22281 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22282 gen_lowpart (SFmode, val), elt);
22283 }
997404de 22284 return;
eb701deb
RH
22285
22286 case V8HImode:
22287 use_vec_merge = TARGET_SSE2;
22288 break;
22289 case V4HImode:
22290 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22291 break;
22292
22293 case V16QImode:
9a5cee02
L
22294 use_vec_merge = TARGET_SSE4_1;
22295 break;
22296
eb701deb
RH
22297 case V8QImode:
22298 default:
22299 break;
997404de
JH
22300 }
22301
eb701deb 22302 if (use_vec_merge)
997404de 22303 {
eb701deb
RH
22304 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22305 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22306 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22307 }
22308 else
22309 {
22310 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22311
22312 emit_move_insn (mem, target);
22313
22314 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22315 emit_move_insn (tmp, val);
22316
22317 emit_move_insn (target, mem);
22318 }
22319}
22320
22321void
22322ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22323{
22324 enum machine_mode mode = GET_MODE (vec);
22325 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22326 bool use_vec_extr = false;
22327 rtx tmp;
22328
22329 switch (mode)
22330 {
22331 case V2SImode:
22332 case V2SFmode:
22333 if (!mmx_ok)
22334 break;
22335 /* FALLTHRU */
22336
22337 case V2DFmode:
22338 case V2DImode:
22339 use_vec_extr = true;
22340 break;
22341
22342 case V4SFmode:
9a5cee02
L
22343 use_vec_extr = TARGET_SSE4_1;
22344 if (use_vec_extr)
22345 break;
22346
eb701deb 22347 switch (elt)
997404de 22348 {
eb701deb
RH
22349 case 0:
22350 tmp = vec;
22351 break;
997404de 22352
eb701deb
RH
22353 case 1:
22354 case 3:
22355 tmp = gen_reg_rtx (mode);
22356 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22357 GEN_INT (elt), GEN_INT (elt),
b100079f 22358 GEN_INT (elt+4), GEN_INT (elt+4)));
eb701deb
RH
22359 break;
22360
22361 case 2:
22362 tmp = gen_reg_rtx (mode);
22363 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22364 break;
22365
22366 default:
22367 gcc_unreachable ();
997404de 22368 }
eb701deb
RH
22369 vec = tmp;
22370 use_vec_extr = true;
ed9b5396 22371 elt = 0;
eb701deb
RH
22372 break;
22373
22374 case V4SImode:
9a5cee02
L
22375 use_vec_extr = TARGET_SSE4_1;
22376 if (use_vec_extr)
22377 break;
22378
eb701deb 22379 if (TARGET_SSE2)
997404de 22380 {
eb701deb
RH
22381 switch (elt)
22382 {
22383 case 0:
22384 tmp = vec;
22385 break;
22386
22387 case 1:
22388 case 3:
22389 tmp = gen_reg_rtx (mode);
22390 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22391 GEN_INT (elt), GEN_INT (elt),
22392 GEN_INT (elt), GEN_INT (elt)));
22393 break;
22394
22395 case 2:
22396 tmp = gen_reg_rtx (mode);
22397 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22398 break;
22399
22400 default:
22401 gcc_unreachable ();
22402 }
22403 vec = tmp;
22404 use_vec_extr = true;
ed9b5396 22405 elt = 0;
997404de 22406 }
eb701deb
RH
22407 else
22408 {
22409 /* For SSE1, we have to reuse the V4SF code. */
22410 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22411 gen_lowpart (V4SFmode, vec), elt);
22412 return;
22413 }
22414 break;
22415
22416 case V8HImode:
22417 use_vec_extr = TARGET_SSE2;
22418 break;
22419 case V4HImode:
22420 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22421 break;
22422
22423 case V16QImode:
9a5cee02
L
22424 use_vec_extr = TARGET_SSE4_1;
22425 break;
22426
eb701deb
RH
22427 case V8QImode:
22428 /* ??? Could extract the appropriate HImode element and shift. */
22429 default:
22430 break;
997404de 22431 }
997404de 22432
eb701deb
RH
22433 if (use_vec_extr)
22434 {
22435 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22436 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22437
22438 /* Let the rtl optimizers know about the zero extension performed. */
9a5cee02 22439 if (inner_mode == QImode || inner_mode == HImode)
eb701deb
RH
22440 {
22441 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22442 target = gen_lowpart (SImode, target);
22443 }
22444
22445 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22446 }
22447 else
22448 {
22449 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22450
22451 emit_move_insn (mem, vec);
22452
22453 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22454 emit_move_insn (target, tmp);
22455 }
22456}
2ab1754e 22457
536fa7b7 22458/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
2ab1754e
RH
22459 pattern to reduce; DEST is the destination; IN is the input vector. */
22460
22461void
22462ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22463{
22464 rtx tmp1, tmp2, tmp3;
22465
22466 tmp1 = gen_reg_rtx (V4SFmode);
22467 tmp2 = gen_reg_rtx (V4SFmode);
22468 tmp3 = gen_reg_rtx (V4SFmode);
22469
22470 emit_insn (gen_sse_movhlps (tmp1, in, in));
22471 emit_insn (fn (tmp2, tmp1, in));
22472
22473 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22474 GEN_INT (1), GEN_INT (1),
22475 GEN_INT (1+4), GEN_INT (1+4)));
22476 emit_insn (fn (dest, tmp2, tmp3));
22477}
eb701deb 22478\f
a81083b2
BE
22479/* Target hook for scalar_mode_supported_p. */
22480static bool
22481ix86_scalar_mode_supported_p (enum machine_mode mode)
22482{
22483 if (DECIMAL_FLOAT_MODE_P (mode))
22484 return true;
27735edb
UB
22485 else if (mode == TFmode)
22486 return TARGET_64BIT;
a81083b2
BE
22487 else
22488 return default_scalar_mode_supported_p (mode);
22489}
22490
f676971a
EC
22491/* Implements target hook vector_mode_supported_p. */
22492static bool
22493ix86_vector_mode_supported_p (enum machine_mode mode)
22494{
dcbca208 22495 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
f676971a 22496 return true;
dcbca208 22497 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
f676971a 22498 return true;
dcbca208 22499 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
f676971a 22500 return true;
dcbca208
RH
22501 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22502 return true;
22503 return false;
f676971a
EC
22504}
22505
c77cd3d1
UB
22506/* Target hook for c_mode_for_suffix. */
22507static enum machine_mode
22508ix86_c_mode_for_suffix (char suffix)
22509{
22510 if (TARGET_64BIT && suffix == 'q')
22511 return TFmode;
22512 if (TARGET_MMX && suffix == 'w')
22513 return XFmode;
22514
22515 return VOIDmode;
22516}
22517
67dfe110
KH
22518/* Worker function for TARGET_MD_ASM_CLOBBERS.
22519
22520 We do this in the new i386 backend to maintain source compatibility
22521 with the old cc0-based compiler. */
22522
22523static tree
61158923
HPN
22524ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22525 tree inputs ATTRIBUTE_UNUSED,
22526 tree clobbers)
67dfe110 22527{
f676971a
EC
22528 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22529 clobbers);
22530 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22531 clobbers);
67dfe110
KH
22532 return clobbers;
22533}
22534
7fa7289d 22535/* Implements target vector targetm.asm.encode_section_info. This
2ed941ec 22536 is not used by netware. */
7dcbf659 22537
2ed941ec 22538static void ATTRIBUTE_UNUSED
7dcbf659
JH
22539ix86_encode_section_info (tree decl, rtx rtl, int first)
22540{
22541 default_encode_section_info (decl, rtl, first);
22542
22543 if (TREE_CODE (decl) == VAR_DECL
22544 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22545 && ix86_in_large_data_p (decl))
22546 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22547}
22548
3c5cb3e4
KH
22549/* Worker function for REVERSE_CONDITION. */
22550
22551enum rtx_code
22552ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22553{
22554 return (mode != CCFPmode && mode != CCFPUmode
22555 ? reverse_condition (code)
22556 : reverse_condition_maybe_unordered (code));
22557}
22558
5ea9cb6e
RS
22559/* Output code to perform an x87 FP register move, from OPERANDS[1]
22560 to OPERANDS[0]. */
22561
22562const char *
22563output_387_reg_move (rtx insn, rtx *operands)
22564{
d869c351 22565 if (REG_P (operands[0]))
5ea9cb6e 22566 {
d869c351
UB
22567 if (REG_P (operands[1])
22568 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22569 {
22570 if (REGNO (operands[0]) == FIRST_STACK_REG)
22571 return output_387_ffreep (operands, 0);
22572 return "fstp\t%y0";
22573 }
22574 if (STACK_TOP_P (operands[0]))
22575 return "fld%z1\t%y1";
22576 return "fst\t%y0";
5ea9cb6e 22577 }
d869c351
UB
22578 else if (MEM_P (operands[0]))
22579 {
22580 gcc_assert (REG_P (operands[1]));
22581 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22582 return "fstp%z0\t%y0";
22583 else
22584 {
22585 /* There is no non-popping store to memory for XFmode.
22586 So if we need one, follow the store with a load. */
22587 if (GET_MODE (operands[0]) == XFmode)
22588 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22589 else
22590 return "fst%z0\t%y0";
22591 }
22592 }
22593 else
22594 gcc_unreachable();
5ea9cb6e
RS
22595}
22596
5ae27cfa
UB
22597/* Output code to perform a conditional jump to LABEL, if C2 flag in
22598 FP status register is set. */
22599
22600void
22601ix86_emit_fp_unordered_jump (rtx label)
22602{
22603 rtx reg = gen_reg_rtx (HImode);
22604 rtx temp;
22605
22606 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35 22607
3c2d980c 22608 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
2484cc35
UB
22609 {
22610 emit_insn (gen_x86_sahf_1 (reg));
22611
f676971a 22612 temp = gen_rtx_REG (CCmode, FLAGS_REG);
2484cc35
UB
22613 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22614 }
22615 else
22616 {
22617 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22618
f676971a 22619 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
2484cc35
UB
22620 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22621 }
f676971a 22622
5ae27cfa
UB
22623 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22624 gen_rtx_LABEL_REF (VOIDmode, label),
22625 pc_rtx);
22626 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
79cd820a 22627
5ae27cfa 22628 emit_jump_insn (temp);
79cd820a 22629 predict_jump (REG_BR_PROB_BASE * 10 / 100);
5ae27cfa
UB
22630}
22631
c2fcfa4f
UB
22632/* Output code to perform a log1p XFmode calculation. */
22633
22634void ix86_emit_i387_log1p (rtx op0, rtx op1)
22635{
22636 rtx label1 = gen_label_rtx ();
22637 rtx label2 = gen_label_rtx ();
22638
22639 rtx tmp = gen_reg_rtx (XFmode);
22640 rtx tmp2 = gen_reg_rtx (XFmode);
22641
22642 emit_insn (gen_absxf2 (tmp, op1));
22643 emit_insn (gen_cmpxf (tmp,
22644 CONST_DOUBLE_FROM_REAL_VALUE (
22645 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22646 XFmode)));
22647 emit_jump_insn (gen_bge (label1));
22648
22649 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
0ac45694 22650 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
c2fcfa4f
UB
22651 emit_jump (label2);
22652
22653 emit_label (label1);
22654 emit_move_insn (tmp, CONST1_RTX (XFmode));
22655 emit_insn (gen_addxf3 (tmp, op1, tmp));
22656 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
0ac45694 22657 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
c2fcfa4f
UB
22658
22659 emit_label (label2);
22660}
f676971a 22661
6b889d89
UB
22662/* Output code to perform a Newton-Rhapson approximation of a single precision
22663 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22664
22665void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22666{
22667 rtx x0, x1, e0, e1, two;
22668
22669 x0 = gen_reg_rtx (mode);
22670 e0 = gen_reg_rtx (mode);
22671 e1 = gen_reg_rtx (mode);
22672 x1 = gen_reg_rtx (mode);
22673
22674 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22675
22676 if (VECTOR_MODE_P (mode))
22677 two = ix86_build_const_vector (SFmode, true, two);
22678
22679 two = force_reg (mode, two);
22680
22681 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22682
22683 /* x0 = 1./b estimate */
22684 emit_insn (gen_rtx_SET (VOIDmode, x0,
22685 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
22686 UNSPEC_RCP)));
22687 /* e0 = x0 * b */
22688 emit_insn (gen_rtx_SET (VOIDmode, e0,
22689 gen_rtx_MULT (mode, x0, b)));
22690 /* e1 = 2. - e0 */
22691 emit_insn (gen_rtx_SET (VOIDmode, e1,
22692 gen_rtx_MINUS (mode, two, e0)));
22693 /* x1 = x0 * e1 */
22694 emit_insn (gen_rtx_SET (VOIDmode, x1,
22695 gen_rtx_MULT (mode, x0, e1)));
22696 /* res = a * x1 */
22697 emit_insn (gen_rtx_SET (VOIDmode, res,
22698 gen_rtx_MULT (mode, a, x1)));
22699}
22700
22701/* Output code to perform a Newton-Rhapson approximation of a
22702 single precision floating point [reciprocal] square root. */
22703
22704void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
22705 bool recip)
22706{
670a8526 22707 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
6b889d89
UB
22708
22709 x0 = gen_reg_rtx (mode);
22710 e0 = gen_reg_rtx (mode);
22711 e1 = gen_reg_rtx (mode);
22712 e2 = gen_reg_rtx (mode);
22713 e3 = gen_reg_rtx (mode);
22714
22715 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
22716 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
670a8526
UB
22717
22718 mask = gen_reg_rtx (mode);
6b889d89
UB
22719
22720 if (VECTOR_MODE_P (mode))
22721 {
22722 three = ix86_build_const_vector (SFmode, true, three);
22723 half = ix86_build_const_vector (SFmode, true, half);
22724 }
22725
22726 three = force_reg (mode, three);
22727 half = force_reg (mode, half);
670a8526
UB
22728
22729 zero = force_reg (mode, CONST0_RTX(mode));
6b889d89
UB
22730
22731 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22732 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22733
670a8526
UB
22734 /* Compare a to zero. */
22735 emit_insn (gen_rtx_SET (VOIDmode, mask,
22736 gen_rtx_NE (mode, a, zero)));
22737
6b889d89
UB
22738 /* x0 = 1./sqrt(a) estimate */
22739 emit_insn (gen_rtx_SET (VOIDmode, x0,
22740 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
22741 UNSPEC_RSQRT)));
670a8526
UB
22742 /* Filter out infinity. */
22743 if (VECTOR_MODE_P (mode))
22744 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
22745 gen_rtx_AND (mode,
22746 gen_lowpart (V4SFmode, x0),
22747 gen_lowpart (V4SFmode, mask))));
22748 else
22749 emit_insn (gen_rtx_SET (VOIDmode, x0,
22750 gen_rtx_AND (mode, x0, mask)));
22751
6b889d89
UB
22752 /* e0 = x0 * a */
22753 emit_insn (gen_rtx_SET (VOIDmode, e0,
22754 gen_rtx_MULT (mode, x0, a)));
22755 /* e1 = e0 * x0 */
22756 emit_insn (gen_rtx_SET (VOIDmode, e1,
22757 gen_rtx_MULT (mode, e0, x0)));
22758 /* e2 = 3. - e1 */
22759 emit_insn (gen_rtx_SET (VOIDmode, e2,
22760 gen_rtx_MINUS (mode, three, e1)));
22761 if (recip)
22762 /* e3 = .5 * x0 */
22763 emit_insn (gen_rtx_SET (VOIDmode, e3,
22764 gen_rtx_MULT (mode, half, x0)));
22765 else
22766 /* e3 = .5 * e0 */
22767 emit_insn (gen_rtx_SET (VOIDmode, e3,
22768 gen_rtx_MULT (mode, half, e0)));
22769 /* ret = e2 * e3 */
22770 emit_insn (gen_rtx_SET (VOIDmode, res,
22771 gen_rtx_MULT (mode, e2, e3)));
22772}
22773
d6b5193b 22774/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
a8e68029 22775
2ed941ec 22776static void ATTRIBUTE_UNUSED
a8e68029
DJ
22777i386_solaris_elf_named_section (const char *name, unsigned int flags,
22778 tree decl)
22779{
22780 /* With Binutils 2.15, the "@unwind" marker must be specified on
22781 every occurrence of the ".eh_frame" section, not just the first
22782 one. */
22783 if (TARGET_64BIT
22784 && strcmp (name, ".eh_frame") == 0)
22785 {
22786 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22787 flags & SECTION_WRITE ? "aw" : "a");
22788 return;
22789 }
22790 default_elf_asm_named_section (name, flags, decl);
22791}
22792
cac24f06
JM
22793/* Return the mangling of TYPE if it is an extended fundamental type. */
22794
22795static const char *
608063c3 22796ix86_mangle_type (tree type)
cac24f06 22797{
608063c3
JB
22798 type = TYPE_MAIN_VARIANT (type);
22799
22800 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
22801 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
22802 return NULL;
22803
cac24f06
JM
22804 switch (TYPE_MODE (type))
22805 {
22806 case TFmode:
22807 /* __float128 is "g". */
22808 return "g";
22809 case XFmode:
22810 /* "long double" or __float80 is "e". */
22811 return "e";
22812 default:
22813 return NULL;
22814 }
22815}
22816
7ce918c5
JJ
22817/* For 32-bit code we can save PIC register setup by using
22818 __stack_chk_fail_local hidden function instead of calling
22819 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22820 register, so it is better to call __stack_chk_fail directly. */
22821
22822static tree
22823ix86_stack_protect_fail (void)
22824{
22825 return TARGET_64BIT
22826 ? default_external_stack_protect_fail ()
22827 : default_hidden_stack_protect_fail ();
22828}
22829
72ce3d4a
JH
22830/* Select a format to encode pointers in exception handling data. CODE
22831 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22832 true if the symbol may be affected by dynamic relocations.
22833
22834 ??? All x86 object file formats are capable of representing this.
22835 After all, the relocation needed is the same as for the call insn.
22836 Whether or not a particular assembler allows us to enter such, I
22837 guess we'll have to see. */
22838int
22839asm_preferred_eh_data_format (int code, int global)
22840{
22841 if (flag_pic)
22842 {
a46cec70 22843 int type = DW_EH_PE_sdata8;
72ce3d4a
JH
22844 if (!TARGET_64BIT
22845 || ix86_cmodel == CM_SMALL_PIC
22846 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22847 type = DW_EH_PE_sdata4;
22848 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22849 }
22850 if (ix86_cmodel == CM_SMALL
22851 || (ix86_cmodel == CM_MEDIUM && code))
22852 return DW_EH_PE_udata4;
22853 return DW_EH_PE_absptr;
22854}
4d81bf84
RG
22855\f
22856/* Expand copysign from SIGN to the positive value ABS_VALUE
c7d32ff6
RG
22857 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22858 the sign-bit. */
4d81bf84 22859static void
c7d32ff6 22860ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
4d81bf84
RG
22861{
22862 enum machine_mode mode = GET_MODE (sign);
22863 rtx sgn = gen_reg_rtx (mode);
c7d32ff6
RG
22864 if (mask == NULL_RTX)
22865 {
22866 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22867 if (!VECTOR_MODE_P (mode))
22868 {
22869 /* We need to generate a scalar mode mask in this case. */
22870 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22871 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22872 mask = gen_reg_rtx (mode);
22873 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22874 }
22875 }
22876 else
22877 mask = gen_rtx_NOT (mode, mask);
22878 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22879 gen_rtx_AND (mode, mask, sign)));
22880 emit_insn (gen_rtx_SET (VOIDmode, result,
22881 gen_rtx_IOR (mode, abs_value, sgn)));
22882}
22883
22884/* Expand fabs (OP0) and return a new rtx that holds the result. The
22885 mask for masking out the sign-bit is stored in *SMASK, if that is
22886 non-null. */
22887static rtx
22888ix86_expand_sse_fabs (rtx op0, rtx *smask)
22889{
22890 enum machine_mode mode = GET_MODE (op0);
22891 rtx xa, mask;
22892
22893 xa = gen_reg_rtx (mode);
22894 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
4d81bf84
RG
22895 if (!VECTOR_MODE_P (mode))
22896 {
22897 /* We need to generate a scalar mode mask in this case. */
22898 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22899 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22900 mask = gen_reg_rtx (mode);
22901 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22902 }
c7d32ff6
RG
22903 emit_insn (gen_rtx_SET (VOIDmode, xa,
22904 gen_rtx_AND (mode, op0, mask)));
22905
22906 if (smask)
22907 *smask = mask;
22908
22909 return xa;
4d81bf84
RG
22910}
22911
c3a4177f
RG
22912/* Expands a comparison of OP0 with OP1 using comparison code CODE,
22913 swapping the operands if SWAP_OPERANDS is true. The expanded
22914 code is a forward jump to a newly created label in case the
22915 comparison is true. The generated label rtx is returned. */
22916static rtx
22917ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22918 bool swap_operands)
22919{
22920 rtx label, tmp;
22921
22922 if (swap_operands)
22923 {
22924 tmp = op0;
22925 op0 = op1;
22926 op1 = tmp;
22927 }
22928
22929 label = gen_label_rtx ();
22930 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22931 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22932 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22933 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22934 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22935 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22936 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22937 JUMP_LABEL (tmp) = label;
22938
22939 return label;
22940}
22941
d096ecdd
RG
22942/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22943 using comparison code CODE. Operands are swapped for the comparison if
22944 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22945static rtx
22946ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22947 bool swap_operands)
22948{
22949 enum machine_mode mode = GET_MODE (op0);
22950 rtx mask = gen_reg_rtx (mode);
22951
22952 if (swap_operands)
22953 {
22954 rtx tmp = op0;
22955 op0 = op1;
22956 op1 = tmp;
22957 }
22958
22959 if (mode == DFmode)
22960 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22961 gen_rtx_fmt_ee (code, mode, op0, op1)));
22962 else
22963 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22964 gen_rtx_fmt_ee (code, mode, op0, op1)));
22965
22966 return mask;
22967}
22968
c7d32ff6
RG
22969/* Generate and return a rtx of mode MODE for 2**n where n is the number
22970 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22971static rtx
22972ix86_gen_TWO52 (enum machine_mode mode)
22973{
22974 REAL_VALUE_TYPE TWO52r;
22975 rtx TWO52;
22976
22977 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22978 TWO52 = const_double_from_real_value (TWO52r, mode);
22979 TWO52 = force_reg (mode, TWO52);
22980
22981 return TWO52;
22982}
22983
4d81bf84
RG
22984/* Expand SSE sequence for computing lround from OP1 storing
22985 into OP0. */
22986void
22987ix86_expand_lround (rtx op0, rtx op1)
22988{
22989 /* C code for the stuff we're doing below:
22990 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22991 return (long)tmp;
22992 */
22993 enum machine_mode mode = GET_MODE (op1);
22994 const struct real_format *fmt;
22995 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22996 rtx adj;
22997
22998 /* load nextafter (0.5, 0.0) */
22999 fmt = REAL_MODE_FORMAT (mode);
23000 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23001 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23002
23003 /* adj = copysign (0.5, op1) */
23004 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
c7d32ff6 23005 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
4d81bf84
RG
23006
23007 /* adj = op1 + adj */
63be4b32 23008 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
4d81bf84
RG
23009
23010 /* op0 = (imode)adj */
23011 expand_fix (op0, adj, 0);
23012}
72ce3d4a 23013
c3a4177f
RG
23014/* Expand SSE2 sequence for computing lround from OPERAND1 storing
23015 into OPERAND0. */
23016void
23017ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
23018{
23019 /* C code for the stuff we're doing below (for do_floor):
23020 xi = (long)op1;
23021 xi -= (double)xi > op1 ? 1 : 0;
23022 return xi;
23023 */
23024 enum machine_mode fmode = GET_MODE (op1);
23025 enum machine_mode imode = GET_MODE (op0);
63be4b32 23026 rtx ireg, freg, label, tmp;
c3a4177f
RG
23027
23028 /* reg = (long)op1 */
23029 ireg = gen_reg_rtx (imode);
23030 expand_fix (ireg, op1, 0);
23031
23032 /* freg = (double)reg */
23033 freg = gen_reg_rtx (fmode);
23034 expand_float (freg, ireg, 0);
23035
23036 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23037 label = ix86_expand_sse_compare_and_jump (UNLE,
23038 freg, op1, !do_floor);
63be4b32
RG
23039 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
23040 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
23041 emit_move_insn (ireg, tmp);
23042
c3a4177f
RG
23043 emit_label (label);
23044 LABEL_NUSES (label) = 1;
23045
23046 emit_move_insn (op0, ireg);
23047}
23048
c7d32ff6
RG
23049/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23050 result in OPERAND0. */
23051void
23052ix86_expand_rint (rtx operand0, rtx operand1)
23053{
23054 /* C code for the stuff we're doing below:
7e35fcb3
RG
23055 xa = fabs (operand1);
23056 if (!isless (xa, 2**52))
c7d32ff6 23057 return operand1;
7e35fcb3
RG
23058 xa = xa + 2**52 - 2**52;
23059 return copysign (xa, operand1);
c7d32ff6
RG
23060 */
23061 enum machine_mode mode = GET_MODE (operand0);
23062 rtx res, xa, label, TWO52, mask;
23063
23064 res = gen_reg_rtx (mode);
23065 emit_move_insn (res, operand1);
23066
23067 /* xa = abs (operand1) */
23068 xa = ix86_expand_sse_fabs (res, &mask);
23069
23070 /* if (!isless (xa, TWO52)) goto label; */
23071 TWO52 = ix86_gen_TWO52 (mode);
23072 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23073
63be4b32
RG
23074 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23075 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
c7d32ff6 23076
7e35fcb3 23077 ix86_sse_copysign_to_positive (res, xa, res, mask);
c7d32ff6
RG
23078
23079 emit_label (label);
23080 LABEL_NUSES (label) = 1;
23081
23082 emit_move_insn (operand0, res);
23083}
23084
d096ecdd
RG
23085/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23086 into OPERAND0. */
23087void
23088ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
23089{
23090 /* C code for the stuff we expand below.
23091 double xa = fabs (x), x2;
23092 if (!isless (xa, TWO52))
23093 return x;
23094 xa = xa + TWO52 - TWO52;
23095 x2 = copysign (xa, x);
23096 Compensate. Floor:
23097 if (x2 > x)
23098 x2 -= 1;
23099 Compensate. Ceil:
23100 if (x2 < x)
7e35fcb3 23101 x2 -= -1;
d096ecdd
RG
23102 return x2;
23103 */
23104 enum machine_mode mode = GET_MODE (operand0);
23105 rtx xa, TWO52, tmp, label, one, res, mask;
23106
23107 TWO52 = ix86_gen_TWO52 (mode);
23108
23109 /* Temporary for holding the result, initialized to the input
23110 operand to ease control flow. */
23111 res = gen_reg_rtx (mode);
23112 emit_move_insn (res, operand1);
23113
23114 /* xa = abs (operand1) */
23115 xa = ix86_expand_sse_fabs (res, &mask);
23116
23117 /* if (!isless (xa, TWO52)) goto label; */
23118 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23119
23120 /* xa = xa + TWO52 - TWO52; */
63be4b32
RG
23121 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23122 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
d096ecdd
RG
23123
23124 /* xa = copysign (xa, operand1) */
23125 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23126
7e35fcb3
RG
23127 /* generate 1.0 or -1.0 */
23128 one = force_reg (mode,
23129 const_double_from_real_value (do_floor
23130 ? dconst1 : dconstm1, mode));
d096ecdd
RG
23131
23132 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23133 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23134 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23135 gen_rtx_AND (mode, one, tmp)));
7e35fcb3 23136 /* We always need to subtract here to preserve signed zero. */
63be4b32
RG
23137 tmp = expand_simple_binop (mode, MINUS,
23138 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23139 emit_move_insn (res, tmp);
d096ecdd
RG
23140
23141 emit_label (label);
23142 LABEL_NUSES (label) = 1;
23143
23144 emit_move_insn (operand0, res);
23145}
23146
23147/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23148 into OPERAND0. */
23149void
23150ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23151{
23152 /* C code for the stuff we expand below.
23153 double xa = fabs (x), x2;
23154 if (!isless (xa, TWO52))
23155 return x;
23156 x2 = (double)(long)x;
23157 Compensate. Floor:
23158 if (x2 > x)
23159 x2 -= 1;
23160 Compensate. Ceil:
23161 if (x2 < x)
23162 x2 += 1;
7e35fcb3
RG
23163 if (HONOR_SIGNED_ZEROS (mode))
23164 return copysign (x2, x);
d096ecdd
RG
23165 return x2;
23166 */
23167 enum machine_mode mode = GET_MODE (operand0);
7e35fcb3 23168 rtx xa, xi, TWO52, tmp, label, one, res, mask;
d096ecdd
RG
23169
23170 TWO52 = ix86_gen_TWO52 (mode);
23171
23172 /* Temporary for holding the result, initialized to the input
23173 operand to ease control flow. */
23174 res = gen_reg_rtx (mode);
23175 emit_move_insn (res, operand1);
23176
23177 /* xa = abs (operand1) */
7e35fcb3 23178 xa = ix86_expand_sse_fabs (res, &mask);
d096ecdd
RG
23179
23180 /* if (!isless (xa, TWO52)) goto label; */
23181 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23182
23183 /* xa = (double)(long)x */
23184 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23185 expand_fix (xi, res, 0);
23186 expand_float (xa, xi, 0);
23187
23188 /* generate 1.0 */
23189 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23190
23191 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23192 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23193 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23194 gen_rtx_AND (mode, one, tmp)));
63be4b32
RG
23195 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23196 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23197 emit_move_insn (res, tmp);
d096ecdd 23198
7e35fcb3
RG
23199 if (HONOR_SIGNED_ZEROS (mode))
23200 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23201
d096ecdd
RG
23202 emit_label (label);
23203 LABEL_NUSES (label) = 1;
23204
23205 emit_move_insn (operand0, res);
23206}
23207
097f2964
RG
23208/* Expand SSE sequence for computing round from OPERAND1 storing
23209 into OPERAND0. Sequence that works without relying on DImode truncation
23210 via cvttsd2siq that is only available on 64bit targets. */
23211void
23212ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23213{
23214 /* C code for the stuff we expand below.
23215 double xa = fabs (x), xa2, x2;
23216 if (!isless (xa, TWO52))
23217 return x;
23218 Using the absolute value and copying back sign makes
23219 -0.0 -> -0.0 correct.
23220 xa2 = xa + TWO52 - TWO52;
23221 Compensate.
23222 dxa = xa2 - xa;
23223 if (dxa <= -0.5)
23224 xa2 += 1;
23225 else if (dxa > 0.5)
23226 xa2 -= 1;
23227 x2 = copysign (xa2, x);
23228 return x2;
23229 */
23230 enum machine_mode mode = GET_MODE (operand0);
23231 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23232
23233 TWO52 = ix86_gen_TWO52 (mode);
23234
23235 /* Temporary for holding the result, initialized to the input
23236 operand to ease control flow. */
23237 res = gen_reg_rtx (mode);
23238 emit_move_insn (res, operand1);
23239
23240 /* xa = abs (operand1) */
23241 xa = ix86_expand_sse_fabs (res, &mask);
23242
23243 /* if (!isless (xa, TWO52)) goto label; */
23244 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23245
23246 /* xa2 = xa + TWO52 - TWO52; */
63be4b32
RG
23247 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23248 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
097f2964
RG
23249
23250 /* dxa = xa2 - xa; */
63be4b32 23251 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
23252
23253 /* generate 0.5, 1.0 and -0.5 */
23254 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
63be4b32
RG
23255 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23256 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23257 0, OPTAB_DIRECT);
097f2964
RG
23258
23259 /* Compensate. */
23260 tmp = gen_reg_rtx (mode);
23261 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23262 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23263 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23264 gen_rtx_AND (mode, one, tmp)));
63be4b32 23265 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
23266 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23267 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23268 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23269 gen_rtx_AND (mode, one, tmp)));
63be4b32 23270 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
23271
23272 /* res = copysign (xa2, operand1) */
23273 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23274
23275 emit_label (label);
23276 LABEL_NUSES (label) = 1;
23277
23278 emit_move_insn (operand0, res);
23279}
23280
044928d6
RG
23281/* Expand SSE sequence for computing trunc from OPERAND1 storing
23282 into OPERAND0. */
23283void
23284ix86_expand_trunc (rtx operand0, rtx operand1)
23285{
23286 /* C code for SSE variant we expand below.
23287 double xa = fabs (x), x2;
23288 if (!isless (xa, TWO52))
23289 return x;
7e35fcb3
RG
23290 x2 = (double)(long)x;
23291 if (HONOR_SIGNED_ZEROS (mode))
23292 return copysign (x2, x);
23293 return x2;
044928d6
RG
23294 */
23295 enum machine_mode mode = GET_MODE (operand0);
7e35fcb3 23296 rtx xa, xi, TWO52, label, res, mask;
044928d6
RG
23297
23298 TWO52 = ix86_gen_TWO52 (mode);
23299
23300 /* Temporary for holding the result, initialized to the input
23301 operand to ease control flow. */
23302 res = gen_reg_rtx (mode);
23303 emit_move_insn (res, operand1);
23304
23305 /* xa = abs (operand1) */
7e35fcb3 23306 xa = ix86_expand_sse_fabs (res, &mask);
044928d6
RG
23307
23308 /* if (!isless (xa, TWO52)) goto label; */
23309 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23310
23311 /* x = (double)(long)x */
23312 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23313 expand_fix (xi, res, 0);
23314 expand_float (res, xi, 0);
23315
7e35fcb3
RG
23316 if (HONOR_SIGNED_ZEROS (mode))
23317 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23318
044928d6
RG
23319 emit_label (label);
23320 LABEL_NUSES (label) = 1;
23321
23322 emit_move_insn (operand0, res);
23323}
23324
23325/* Expand SSE sequence for computing trunc from OPERAND1 storing
23326 into OPERAND0. */
23327void
23328ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23329{
23330 enum machine_mode mode = GET_MODE (operand0);
63be4b32 23331 rtx xa, mask, TWO52, label, one, res, smask, tmp;
044928d6
RG
23332
23333 /* C code for SSE variant we expand below.
23334 double xa = fabs (x), x2;
23335 if (!isless (xa, TWO52))
23336 return x;
23337 xa2 = xa + TWO52 - TWO52;
23338 Compensate:
23339 if (xa2 > xa)
23340 xa2 -= 1.0;
23341 x2 = copysign (xa2, x);
23342 return x2;
23343 */
23344
23345 TWO52 = ix86_gen_TWO52 (mode);
23346
23347 /* Temporary for holding the result, initialized to the input
23348 operand to ease control flow. */
23349 res = gen_reg_rtx (mode);
23350 emit_move_insn (res, operand1);
23351
23352 /* xa = abs (operand1) */
23353 xa = ix86_expand_sse_fabs (res, &smask);
23354
23355 /* if (!isless (xa, TWO52)) goto label; */
23356 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23357
23358 /* res = xa + TWO52 - TWO52; */
63be4b32
RG
23359 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23360 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23361 emit_move_insn (res, tmp);
044928d6
RG
23362
23363 /* generate 1.0 */
23364 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23365
23366 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23367 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23368 emit_insn (gen_rtx_SET (VOIDmode, mask,
23369 gen_rtx_AND (mode, mask, one)));
63be4b32
RG
23370 tmp = expand_simple_binop (mode, MINUS,
23371 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23372 emit_move_insn (res, tmp);
044928d6
RG
23373
23374 /* res = copysign (res, operand1) */
23375 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23376
23377 emit_label (label);
23378 LABEL_NUSES (label) = 1;
23379
23380 emit_move_insn (operand0, res);
23381}
23382
097f2964
RG
23383/* Expand SSE sequence for computing round from OPERAND1 storing
23384 into OPERAND0. */
23385void
23386ix86_expand_round (rtx operand0, rtx operand1)
23387{
23388 /* C code for the stuff we're doing below:
23389 double xa = fabs (x);
23390 if (!isless (xa, TWO52))
23391 return x;
23392 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23393 return copysign (xa, x);
23394 */
23395 enum machine_mode mode = GET_MODE (operand0);
23396 rtx res, TWO52, xa, label, xi, half, mask;
23397 const struct real_format *fmt;
23398 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23399
23400 /* Temporary for holding the result, initialized to the input
23401 operand to ease control flow. */
23402 res = gen_reg_rtx (mode);
23403 emit_move_insn (res, operand1);
23404
23405 TWO52 = ix86_gen_TWO52 (mode);
23406 xa = ix86_expand_sse_fabs (res, &mask);
23407 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23408
23409 /* load nextafter (0.5, 0.0) */
23410 fmt = REAL_MODE_FORMAT (mode);
23411 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23412 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23413
23414 /* xa = xa + 0.5 */
23415 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
63be4b32 23416 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
097f2964
RG
23417
23418 /* xa = (double)(int64_t)xa */
23419 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23420 expand_fix (xi, xa, 0);
23421 expand_float (xa, xi, 0);
23422
23423 /* res = copysign (xa, operand1) */
23424 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23425
23426 emit_label (label);
23427 LABEL_NUSES (label) = 1;
23428
23429 emit_move_insn (operand0, res);
23430}
23431
2ed941ec
RH
23432\f
23433/* Table of valid machine attributes. */
23434static const struct attribute_spec ix86_attribute_table[] =
23435{
23436 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23437 /* Stdcall attribute says callee is responsible for popping arguments
23438 if they are not variable. */
23439 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23440 /* Fastcall attribute says callee is responsible for popping arguments
23441 if they are not variable. */
23442 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23443 /* Cdecl attribute says the callee is a normal C declaration */
23444 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23445 /* Regparm attribute specifies how many integer arguments are to be
23446 passed in registers. */
23447 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23448 /* Sseregparm attribute says we are using x86_64 calling conventions
23449 for FP arguments. */
23450 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23451 /* force_align_arg_pointer says this function realigns the stack at entry. */
23452 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23453 false, true, true, ix86_handle_cconv_attribute },
23454#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23455 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23456 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23458#endif
23459 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23460 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23461#ifdef SUBTARGET_ATTRIBUTE_TABLE
23462 SUBTARGET_ATTRIBUTE_TABLE,
23463#endif
23464 { NULL, 0, 0, false, false, false, NULL }
23465};
23466
23467/* Initialize the GCC target structure. */
23468#undef TARGET_ATTRIBUTE_TABLE
23469#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23470#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23471# undef TARGET_MERGE_DECL_ATTRIBUTES
23472# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23473#endif
23474
23475#undef TARGET_COMP_TYPE_ATTRIBUTES
23476#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23477
23478#undef TARGET_INIT_BUILTINS
23479#define TARGET_INIT_BUILTINS ix86_init_builtins
23480#undef TARGET_EXPAND_BUILTIN
23481#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23482
23483#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
6b889d89
UB
23484#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23485 ix86_builtin_vectorized_function
23486
2ed941ec 23487#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
6b889d89
UB
23488#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23489
23490#undef TARGET_BUILTIN_RECIPROCAL
23491#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
2ed941ec
RH
23492
23493#undef TARGET_ASM_FUNCTION_EPILOGUE
23494#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23495
23496#undef TARGET_ENCODE_SECTION_INFO
23497#ifndef SUBTARGET_ENCODE_SECTION_INFO
23498#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23499#else
23500#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23501#endif
23502
23503#undef TARGET_ASM_OPEN_PAREN
23504#define TARGET_ASM_OPEN_PAREN ""
23505#undef TARGET_ASM_CLOSE_PAREN
23506#define TARGET_ASM_CLOSE_PAREN ""
23507
23508#undef TARGET_ASM_ALIGNED_HI_OP
23509#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23510#undef TARGET_ASM_ALIGNED_SI_OP
23511#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23512#ifdef ASM_QUAD
23513#undef TARGET_ASM_ALIGNED_DI_OP
23514#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23515#endif
23516
23517#undef TARGET_ASM_UNALIGNED_HI_OP
23518#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23519#undef TARGET_ASM_UNALIGNED_SI_OP
23520#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23521#undef TARGET_ASM_UNALIGNED_DI_OP
23522#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23523
23524#undef TARGET_SCHED_ADJUST_COST
23525#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23526#undef TARGET_SCHED_ISSUE_RATE
23527#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23528#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23529#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23530 ia32_multipass_dfa_lookahead
23531
23532#undef TARGET_FUNCTION_OK_FOR_SIBCALL
23533#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23534
23535#ifdef HAVE_AS_TLS
23536#undef TARGET_HAVE_TLS
23537#define TARGET_HAVE_TLS true
23538#endif
23539#undef TARGET_CANNOT_FORCE_CONST_MEM
23540#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23541#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23542#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23543
23544#undef TARGET_DELEGITIMIZE_ADDRESS
23545#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23546
23547#undef TARGET_MS_BITFIELD_LAYOUT_P
23548#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23549
23550#if TARGET_MACHO
23551#undef TARGET_BINDS_LOCAL_P
23552#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23553#endif
da489f73
RH
23554#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23555#undef TARGET_BINDS_LOCAL_P
23556#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23557#endif
2ed941ec
RH
23558
23559#undef TARGET_ASM_OUTPUT_MI_THUNK
23560#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23561#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23562#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23563
23564#undef TARGET_ASM_FILE_START
23565#define TARGET_ASM_FILE_START x86_file_start
23566
23567#undef TARGET_DEFAULT_TARGET_FLAGS
23568#define TARGET_DEFAULT_TARGET_FLAGS \
23569 (TARGET_DEFAULT \
2ed941ec
RH
23570 | TARGET_SUBTARGET_DEFAULT \
23571 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23572
23573#undef TARGET_HANDLE_OPTION
23574#define TARGET_HANDLE_OPTION ix86_handle_option
23575
23576#undef TARGET_RTX_COSTS
23577#define TARGET_RTX_COSTS ix86_rtx_costs
23578#undef TARGET_ADDRESS_COST
23579#define TARGET_ADDRESS_COST ix86_address_cost
23580
23581#undef TARGET_FIXED_CONDITION_CODE_REGS
23582#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23583#undef TARGET_CC_MODES_COMPATIBLE
23584#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23585
23586#undef TARGET_MACHINE_DEPENDENT_REORG
23587#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23588
23589#undef TARGET_BUILD_BUILTIN_VA_LIST
23590#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23591
23592#undef TARGET_MD_ASM_CLOBBERS
23593#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23594
23595#undef TARGET_PROMOTE_PROTOTYPES
23596#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23597#undef TARGET_STRUCT_VALUE_RTX
23598#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23599#undef TARGET_SETUP_INCOMING_VARARGS
23600#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23601#undef TARGET_MUST_PASS_IN_STACK
23602#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23603#undef TARGET_PASS_BY_REFERENCE
23604#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23605#undef TARGET_INTERNAL_ARG_POINTER
23606#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23607#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23608#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
ccf8e764
RH
23609#undef TARGET_STRICT_ARGUMENT_NAMING
23610#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
2ed941ec
RH
23611
23612#undef TARGET_GIMPLIFY_VA_ARG_EXPR
23613#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23614
23615#undef TARGET_SCALAR_MODE_SUPPORTED_P
23616#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23617
23618#undef TARGET_VECTOR_MODE_SUPPORTED_P
23619#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23620
c77cd3d1
UB
23621#undef TARGET_C_MODE_FOR_SUFFIX
23622#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23623
2ed941ec
RH
23624#ifdef HAVE_AS_TLS
23625#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23626#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23627#endif
23628
23629#ifdef SUBTARGET_INSERT_ATTRIBUTES
23630#undef TARGET_INSERT_ATTRIBUTES
23631#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23632#endif
23633
608063c3
JB
23634#undef TARGET_MANGLE_TYPE
23635#define TARGET_MANGLE_TYPE ix86_mangle_type
2ed941ec
RH
23636
23637#undef TARGET_STACK_PROTECT_FAIL
23638#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23639
23640#undef TARGET_FUNCTION_VALUE
23641#define TARGET_FUNCTION_VALUE ix86_function_value
23642
23643struct gcc_target targetm = TARGET_INITIALIZER;
23644\f
e2500fed 23645#include "gt-i386.h"