]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/i386.c
i386-protos.h (x86_function_profiler): New function
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
fce5a9f2 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
229b303a
RS
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
2ab0437e 93};
229b303a 94
32b5b1aa 95/* Processor costs (relative to an add) */
fce5a9f2 96static const
32b5b1aa 97struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 98 1, /* cost of an add instruction */
32b5b1aa
SC
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
e075ae69 104 23, /* cost of a divide/mod */
44cf5b6a
JH
105 3, /* cost of movsx */
106 2, /* cost of movzx */
96e7ae40 107 15, /* "large" insn */
e2e52e1b 108 3, /* MOVE_RATIO */
7c6b971d 109 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
0f290768 112 Relative to reg-reg move (2). */
96e7ae40
JH
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
fa79946e
JH
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
f4365627
JH
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
229b303a
RS
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
137};
138
fce5a9f2 139static const
32b5b1aa
SC
140struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
e075ae69 147 40, /* cost of a divide/mod */
44cf5b6a
JH
148 3, /* cost of movsx */
149 2, /* cost of movzx */
96e7ae40 150 15, /* "large" insn */
e2e52e1b 151 3, /* MOVE_RATIO */
7c6b971d 152 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
0f290768 155 Relative to reg-reg move (2). */
96e7ae40
JH
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
fa79946e
JH
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
f4365627
JH
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
229b303a
RS
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
180};
181
fce5a9f2 182static const
e5cb57e8 183struct processor_costs pentium_cost = {
32b5b1aa
SC
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
856b07a1 186 4, /* variable shift costs */
e5cb57e8 187 1, /* constant shift costs */
856b07a1
SC
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
e075ae69 190 25, /* cost of a divide/mod */
44cf5b6a
JH
191 3, /* cost of movsx */
192 2, /* cost of movzx */
96e7ae40 193 8, /* "large" insn */
e2e52e1b 194 6, /* MOVE_RATIO */
7c6b971d 195 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
0f290768 198 Relative to reg-reg move (2). */
96e7ae40
JH
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
fa79946e
JH
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
f4365627
JH
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
229b303a
RS
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
223};
224
fce5a9f2 225static const
856b07a1
SC
226struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
e075ae69 229 1, /* variable shift costs */
856b07a1 230 1, /* constant shift costs */
369e59b1 231 4, /* cost of starting a multiply */
856b07a1 232 0, /* cost of multiply per each bit set */
e075ae69 233 17, /* cost of a divide/mod */
44cf5b6a
JH
234 1, /* cost of movsx */
235 1, /* cost of movzx */
96e7ae40 236 8, /* "large" insn */
e2e52e1b 237 6, /* MOVE_RATIO */
7c6b971d 238 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
0f290768 241 Relative to reg-reg move (2). */
96e7ae40
JH
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
fa79946e
JH
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
f4365627
JH
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
229b303a
RS
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
856b07a1
SC
266};
267
fce5a9f2 268static const
a269a03c
JC
269struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
e075ae69 271 2, /* cost of a lea instruction */
a269a03c
JC
272 1, /* variable shift costs */
273 1, /* constant shift costs */
73fe76e4 274 3, /* cost of starting a multiply */
a269a03c 275 0, /* cost of multiply per each bit set */
e075ae69 276 18, /* cost of a divide/mod */
44cf5b6a
JH
277 2, /* cost of movsx */
278 2, /* cost of movzx */
96e7ae40 279 8, /* "large" insn */
e2e52e1b 280 4, /* MOVE_RATIO */
7c6b971d 281 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
0f290768 284 Relative to reg-reg move (2). */
96e7ae40
JH
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
fa79946e
JH
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
f4365627
JH
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
229b303a
RS
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
4f770e7b
RS
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
229b303a
RS
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
a269a03c
JC
309};
310
fce5a9f2 311static const
309ada50
JH
312struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
0b5107cf 314 2, /* cost of a lea instruction */
309ada50
JH
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
0b5107cf 319 42, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
309ada50 322 8, /* "large" insn */
e2e52e1b 323 9, /* MOVE_RATIO */
309ada50 324 4, /* cost for loading QImode using movzbl */
b72b1c29 325 {3, 4, 3}, /* cost of loading integer registers
309ada50 326 in QImode, HImode and SImode.
0f290768 327 Relative to reg-reg move (2). */
b72b1c29 328 {3, 4, 3}, /* cost of storing integer registers */
309ada50 329 4, /* cost of reg,reg fld/fst */
b72b1c29 330 {4, 4, 12}, /* cost of loading fp registers
309ada50 331 in SFmode, DFmode and XFmode */
b72b1c29 332 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 333 2, /* cost of moving MMX register */
b72b1c29 334 {4, 4}, /* cost of loading MMX registers
fa79946e 335 in SImode and DImode */
b72b1c29 336 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
b72b1c29 339 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 340 in SImode, DImode and TImode */
b72b1c29 341 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 342 in SImode, DImode and TImode */
b72b1c29 343 5, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
229b303a
RS
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
309ada50
JH
352};
353
fce5a9f2 354static const
b4e89e2d
JH
355struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
44cf5b6a
JH
363 1, /* cost of movsx */
364 1, /* cost of movzx */
b4e89e2d
JH
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
f4365627
JH
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
229b303a
RS
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
395};
396
8b60264b 397const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 398
a269a03c
JC
399/* Processor feature/optimization bitmasks. */
400#define m_386 (1<<PROCESSOR_I386)
401#define m_486 (1<<PROCESSOR_I486)
402#define m_PENT (1<<PROCESSOR_PENTIUM)
403#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404#define m_K6 (1<<PROCESSOR_K6)
309ada50 405#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 406#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 407
309ada50 408const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 409const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 410const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 411const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 412const int x86_double_with_add = ~m_386;
a269a03c 413const int x86_use_bit_test = m_386;
e2e52e1b 414const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 415const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 416const int x86_3dnow_a = m_ATHLON;
b4e89e2d 417const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 418const int x86_branch_hints = m_PENT4;
b4e89e2d 419const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
420const int x86_partial_reg_stall = m_PPRO;
421const int x86_use_loop = m_K6;
309ada50 422const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
423const int x86_use_mov0 = m_K6;
424const int x86_use_cltd = ~(m_PENT | m_K6);
425const int x86_read_modify_write = ~m_PENT;
426const int x86_read_modify = ~(m_PENT | m_PPRO);
427const int x86_split_long_moves = m_PPRO;
285464d0
JH
428const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 430const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
431const int x86_qimode_math = ~(0);
432const int x86_promote_qi_regs = 0;
433const int x86_himode_math = ~(m_PPRO);
434const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
435const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
77966be3 439const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
b4e89e2d
JH
440const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
442const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 445const int x86_decompose_lea = m_PENT4;
495333a6 446const int x86_shift1 = ~m_486;
285464d0 447const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
a269a03c 448
6ab16dd9
JH
449/* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 453
6ab16dd9
JH
454/* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456static int use_fast_prologue_epilogue;
457
5bf0ebab
RH
458/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
462
463/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 465
e075ae69 466enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
467{
468 /* ax, dx, cx, bx */
ab408a86 469 AREG, DREG, CREG, BREG,
4c0d89b5 470 /* si, di, bp, sp */
e075ae69 471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 475 /* arg pointer */
83774849 476 NON_Q_REGS,
564d80f4 477 /* flags, fpsr, dirflag, frame */
a7180f70
BS
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
4c0d89b5 487};
c572e5ba 488
3d117b30 489/* The "default" register map used in 32bit mode. */
83774849 490
0f290768 491int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
492{
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
500};
501
5bf0ebab
RH
502static int const x86_64_int_parameter_registers[6] =
503{
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506};
507
508static int const x86_64_int_return_registers[4] =
509{
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511};
53c17031 512
0f7fa3d0
JH
513/* The "default" register map used in 64bit mode. */
514int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515{
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523};
524
83774849
RH
525/* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578*/
0f290768 579int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
580{
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
588};
589
c572e5ba
JVA
590/* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
07933f72
GS
593rtx ix86_compare_op0 = NULL_RTX;
594rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 595
f996902d
RH
596/* The encoding characters for the four TLS models present in ELF. */
597
755ac5d4 598static char const tls_model_chars[] = " GLil";
f996902d 599
7a2e09f4 600#define MAX_386_STACK_LOCALS 3
8362f420
JH
601/* Size of the register save area. */
602#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
603
604/* Define the structure for the machine field in struct function. */
e2500fed 605struct machine_function GTY(())
36edd3cc
BS
606{
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 608 const char *some_ld_name;
8362f420 609 int save_varrargs_registers;
6fca22eb 610 int accesses_prev_frame;
36edd3cc
BS
611};
612
01d939e8 613#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 614#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 615
4dd2ac2c
JH
616/* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635struct ix86_frame
636{
637 int nregs;
638 int padding1;
8362f420 639 int va_arg_size;
4dd2ac2c
JH
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
8362f420 643 int red_zone_size;
4dd2ac2c
JH
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650};
651
c93e80a5
JH
652/* Used to enable/disable debugging features. */
653const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
654/* Code model option as passed by user. */
655const char *ix86_cmodel_string;
656/* Parsed value. */
657enum cmodel ix86_cmodel;
80f33d06
GS
658/* Asm dialect. */
659const char *ix86_asm_string;
660enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
661/* TLS dialext. */
662const char *ix86_tls_dialect_string;
663enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 664
5bf0ebab 665/* Which unit we are generating floating point math for. */
965f5423
JH
666enum fpmath_unit ix86_fpmath;
667
5bf0ebab
RH
668/* Which cpu are we scheduling for. */
669enum processor_type ix86_cpu;
670/* Which instruction set architecture to use. */
671enum processor_type ix86_arch;
c8c5cb99
SC
672
673/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
674const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 676const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 677
0f290768 678/* # of registers to use to pass arguments. */
e075ae69 679const char *ix86_regparm_string;
e9a25f70 680
f4365627
JH
681/* true if sse prefetch instruction is not NOOP. */
682int x86_prefetch_sse;
683
e075ae69
RH
684/* ix86_regparm_string as a number */
685int ix86_regparm;
e9a25f70
JL
686
687/* Alignment to use for loops and jumps: */
688
0f290768 689/* Power of two alignment for loops. */
e075ae69 690const char *ix86_align_loops_string;
e9a25f70 691
0f290768 692/* Power of two alignment for non-loop jumps. */
e075ae69 693const char *ix86_align_jumps_string;
e9a25f70 694
3af4bd89 695/* Power of two alignment for stack boundary in bytes. */
e075ae69 696const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
697
698/* Preferred alignment for stack boundary in bits. */
e075ae69 699int ix86_preferred_stack_boundary;
3af4bd89 700
e9a25f70 701/* Values 1-5: see jump.c */
e075ae69
RH
702int ix86_branch_cost;
703const char *ix86_branch_cost_string;
e9a25f70 704
0f290768 705/* Power of two alignment for functions. */
e075ae69 706const char *ix86_align_funcs_string;
623fe810
RH
707
708/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709static char internal_label_prefix[16];
710static int internal_label_prefix_len;
e075ae69 711\f
623fe810 712static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 713static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
714static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 716 int, int, FILE *));
f996902d
RH
717static const char *get_some_local_dynamic_name PARAMS ((void));
718static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 720static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
721static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
f996902d 723static rtx get_thread_pointer PARAMS ((void));
145aacc2 724static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
725static rtx gen_push PARAMS ((rtx));
726static int memory_address_length PARAMS ((rtx addr));
727static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
729static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730static void ix86_dump_ppro_packet PARAMS ((FILE *));
731static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 732static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 733static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
734static int ix86_nsaved_regs PARAMS ((void));
735static void ix86_emit_save_regs PARAMS ((void));
c6036a37 736static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 737static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 738static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 739static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 740static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 741static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 742static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
743static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
745static int ix86_issue_rate PARAMS ((void));
746static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747static void ix86_sched_init PARAMS ((FILE *, int, int));
748static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
750static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 752static void ix86_init_mmx_sse_builtins PARAMS ((void));
3961e8fe
RH
753static rtx x86_this_parameter PARAMS ((tree));
754static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
755 HOST_WIDE_INT, tree));
756static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
757 HOST_WIDE_INT, tree));
e075ae69
RH
758
759struct ix86_address
760{
761 rtx base, index, disp;
762 HOST_WIDE_INT scale;
763};
b08de47e 764
e075ae69 765static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 766
f996902d
RH
767static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
768static const char *ix86_strip_name_encoding PARAMS ((const char *))
769 ATTRIBUTE_UNUSED;
fb49053f 770
bd793c65 771struct builtin_description;
8b60264b
KG
772static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
773 tree, rtx));
774static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
775 tree, rtx));
bd793c65
BS
776static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
777static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
778static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218 779static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 780static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
781static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
782static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
783 enum rtx_code *,
784 enum rtx_code *,
785 enum rtx_code *));
9e7adcb3
JH
786static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
787 rtx *, rtx *));
788static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
789static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
790static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
791static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 792static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 793static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 794static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 795static int ix86_comp_type_attributes PARAMS ((tree, tree));
483ab821 796static int ix86_fntype_regparm PARAMS ((tree));
91d231cb
JM
797const struct attribute_spec ix86_attribute_table[];
798static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
799static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 800static int ix86_value_regno PARAMS ((enum machine_mode));
7c262518 801
21c318ba 802#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
803static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
804#endif
e56feed6 805
53c17031
JH
806/* Register class used for passing given 64bit part of the argument.
807 These represent classes as documented by the PS ABI, with the exception
808 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
809 use SF or DFmode move instead of DImode to avoid reformating penalties.
810
811 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
812 whenever possible (upper half does contain padding).
813 */
814enum x86_64_reg_class
815 {
816 X86_64_NO_CLASS,
817 X86_64_INTEGER_CLASS,
818 X86_64_INTEGERSI_CLASS,
819 X86_64_SSE_CLASS,
820 X86_64_SSESF_CLASS,
821 X86_64_SSEDF_CLASS,
822 X86_64_SSEUP_CLASS,
823 X86_64_X87_CLASS,
824 X86_64_X87UP_CLASS,
825 X86_64_MEMORY_CLASS
826 };
0b5826ac 827static const char * const x86_64_reg_class_name[] =
53c17031
JH
828 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
829
830#define MAX_CLASSES 4
831static int classify_argument PARAMS ((enum machine_mode, tree,
832 enum x86_64_reg_class [MAX_CLASSES],
833 int));
834static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
835 int *));
836static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 837 const int *, int));
53c17031
JH
838static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
839 enum x86_64_reg_class));
672a6f42
NB
840\f
841/* Initialize the GCC target structure. */
91d231cb
JM
842#undef TARGET_ATTRIBUTE_TABLE
843#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 844#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
845# undef TARGET_MERGE_DECL_ATTRIBUTES
846# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
847#endif
848
8d8e52be
JM
849#undef TARGET_COMP_TYPE_ATTRIBUTES
850#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
851
f6155fda
SS
852#undef TARGET_INIT_BUILTINS
853#define TARGET_INIT_BUILTINS ix86_init_builtins
854
855#undef TARGET_EXPAND_BUILTIN
856#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
857
bd09bdeb
RH
858#undef TARGET_ASM_FUNCTION_EPILOGUE
859#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 860
17b53c33
NB
861#undef TARGET_ASM_OPEN_PAREN
862#define TARGET_ASM_OPEN_PAREN ""
863#undef TARGET_ASM_CLOSE_PAREN
864#define TARGET_ASM_CLOSE_PAREN ""
865
301d03af
RS
866#undef TARGET_ASM_ALIGNED_HI_OP
867#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
868#undef TARGET_ASM_ALIGNED_SI_OP
869#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
870#ifdef ASM_QUAD
871#undef TARGET_ASM_ALIGNED_DI_OP
872#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
873#endif
874
875#undef TARGET_ASM_UNALIGNED_HI_OP
876#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
877#undef TARGET_ASM_UNALIGNED_SI_OP
878#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
879#undef TARGET_ASM_UNALIGNED_DI_OP
880#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
881
c237e94a
ZW
882#undef TARGET_SCHED_ADJUST_COST
883#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
884#undef TARGET_SCHED_ISSUE_RATE
885#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
886#undef TARGET_SCHED_VARIABLE_ISSUE
887#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
888#undef TARGET_SCHED_INIT
889#define TARGET_SCHED_INIT ix86_sched_init
890#undef TARGET_SCHED_REORDER
891#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 892#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
893#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
894 ia32_use_dfa_pipeline_interface
895#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
896#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
897 ia32_multipass_dfa_lookahead
c237e94a 898
f996902d
RH
899#ifdef HAVE_AS_TLS
900#undef TARGET_HAVE_TLS
901#define TARGET_HAVE_TLS true
902#endif
903
c590b625
RH
904#undef TARGET_ASM_OUTPUT_MI_THUNK
905#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
906#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
907#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 908
f6897b10 909struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 910\f
f5316dfe
MM
911/* Sometimes certain combinations of command options do not make
912 sense on a particular target machine. You can define a macro
913 `OVERRIDE_OPTIONS' to take account of this. This macro, if
914 defined, is executed once just after all the command options have
915 been parsed.
916
917 Don't use this macro to turn on various extra optimizations for
918 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
919
920void
921override_options ()
922{
400500c4 923 int i;
e075ae69
RH
924 /* Comes from final.c -- no real reason to change it. */
925#define MAX_CODE_ALIGN 16
f5316dfe 926
c8c5cb99
SC
927 static struct ptt
928 {
8b60264b
KG
929 const struct processor_costs *cost; /* Processor costs */
930 const int target_enable; /* Target flags to enable. */
931 const int target_disable; /* Target flags to disable. */
932 const int align_loop; /* Default alignments. */
2cca7283 933 const int align_loop_max_skip;
8b60264b 934 const int align_jump;
2cca7283 935 const int align_jump_max_skip;
8b60264b
KG
936 const int align_func;
937 const int branch_cost;
e075ae69 938 }
0f290768 939 const processor_target_table[PROCESSOR_max] =
e075ae69 940 {
2cca7283
JH
941 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
942 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
943 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
944 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
945 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
946 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
947 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
948 };
949
f4365627 950 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
951 static struct pta
952 {
8b60264b
KG
953 const char *const name; /* processor name or nickname. */
954 const enum processor_type processor;
0dd0e980
JH
955 const enum pta_flags
956 {
957 PTA_SSE = 1,
958 PTA_SSE2 = 2,
959 PTA_MMX = 4,
f4365627 960 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
961 PTA_3DNOW = 16,
962 PTA_3DNOW_A = 64
963 } flags;
e075ae69 964 }
0f290768 965 const processor_alias_table[] =
e075ae69 966 {
0dd0e980
JH
967 {"i386", PROCESSOR_I386, 0},
968 {"i486", PROCESSOR_I486, 0},
969 {"i586", PROCESSOR_PENTIUM, 0},
970 {"pentium", PROCESSOR_PENTIUM, 0},
971 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
972 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
973 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
974 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0dd0e980
JH
975 {"i686", PROCESSOR_PENTIUMPRO, 0},
976 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
977 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 978 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 979 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 980 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
981 {"k6", PROCESSOR_K6, PTA_MMX},
982 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
983 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 984 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 985 | PTA_3DNOW_A},
f4365627 986 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 987 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 988 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 989 | PTA_3DNOW_A | PTA_SSE},
f4365627 990 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 991 | PTA_3DNOW_A | PTA_SSE},
f4365627 992 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 993 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 994 };
c8c5cb99 995
ca7558fc 996 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 997
3dc85dfb
RH
998 /* By default our XFmode is the 80-bit extended format. If we have
999 use TFmode instead, it's also the 80-bit format, but with padding. */
1000 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1001 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1002
41ed2237
JH
1003 /* Set the default values for switches whose default depends on TARGET_64BIT
1004 in case they weren't overwriten by command line options. */
55ba61f3
JH
1005 if (TARGET_64BIT)
1006 {
1007 if (flag_omit_frame_pointer == 2)
1008 flag_omit_frame_pointer = 1;
1009 if (flag_asynchronous_unwind_tables == 2)
1010 flag_asynchronous_unwind_tables = 1;
1011 if (flag_pcc_struct_return == 2)
1012 flag_pcc_struct_return = 0;
1013 }
1014 else
1015 {
1016 if (flag_omit_frame_pointer == 2)
1017 flag_omit_frame_pointer = 0;
1018 if (flag_asynchronous_unwind_tables == 2)
1019 flag_asynchronous_unwind_tables = 0;
1020 if (flag_pcc_struct_return == 2)
1021 flag_pcc_struct_return = 1;
1022 }
1023
f5316dfe
MM
1024#ifdef SUBTARGET_OVERRIDE_OPTIONS
1025 SUBTARGET_OVERRIDE_OPTIONS;
1026#endif
1027
f4365627
JH
1028 if (!ix86_cpu_string && ix86_arch_string)
1029 ix86_cpu_string = ix86_arch_string;
1030 if (!ix86_cpu_string)
1031 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1032 if (!ix86_arch_string)
1033 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 1034
6189a572
JH
1035 if (ix86_cmodel_string != 0)
1036 {
1037 if (!strcmp (ix86_cmodel_string, "small"))
1038 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1039 else if (flag_pic)
c725bd79 1040 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1041 else if (!strcmp (ix86_cmodel_string, "32"))
1042 ix86_cmodel = CM_32;
1043 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1044 ix86_cmodel = CM_KERNEL;
1045 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1046 ix86_cmodel = CM_MEDIUM;
1047 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1048 ix86_cmodel = CM_LARGE;
1049 else
1050 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1051 }
1052 else
1053 {
1054 ix86_cmodel = CM_32;
1055 if (TARGET_64BIT)
1056 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1057 }
c93e80a5
JH
1058 if (ix86_asm_string != 0)
1059 {
1060 if (!strcmp (ix86_asm_string, "intel"))
1061 ix86_asm_dialect = ASM_INTEL;
1062 else if (!strcmp (ix86_asm_string, "att"))
1063 ix86_asm_dialect = ASM_ATT;
1064 else
1065 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1066 }
6189a572 1067 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1068 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1069 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1070 if (ix86_cmodel == CM_LARGE)
c725bd79 1071 sorry ("code model `large' not supported yet");
0c2dc519 1072 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1073 sorry ("%i-bit mode not compiled in",
0c2dc519 1074 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1075
f4365627
JH
1076 for (i = 0; i < pta_size; i++)
1077 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1078 {
1079 ix86_arch = processor_alias_table[i].processor;
1080 /* Default cpu tuning to the architecture. */
1081 ix86_cpu = ix86_arch;
1082 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1083 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1084 target_flags |= MASK_MMX;
1085 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1086 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1087 target_flags |= MASK_3DNOW;
1088 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1089 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1090 target_flags |= MASK_3DNOW_A;
1091 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1092 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1093 target_flags |= MASK_SSE;
1094 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1095 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1096 target_flags |= MASK_SSE2;
1097 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1098 x86_prefetch_sse = true;
1099 break;
1100 }
400500c4 1101
f4365627
JH
1102 if (i == pta_size)
1103 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1104
f4365627
JH
1105 for (i = 0; i < pta_size; i++)
1106 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1107 {
1108 ix86_cpu = processor_alias_table[i].processor;
1109 break;
1110 }
1111 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1112 x86_prefetch_sse = true;
1113 if (i == pta_size)
1114 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1115
2ab0437e
JH
1116 if (optimize_size)
1117 ix86_cost = &size_cost;
1118 else
1119 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1120 target_flags |= processor_target_table[ix86_cpu].target_enable;
1121 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1122
36edd3cc
BS
1123 /* Arrange to set up i386_stack_locals for all functions. */
1124 init_machine_status = ix86_init_machine_status;
fce5a9f2 1125
0f290768 1126 /* Validate -mregparm= value. */
e075ae69 1127 if (ix86_regparm_string)
b08de47e 1128 {
400500c4
RK
1129 i = atoi (ix86_regparm_string);
1130 if (i < 0 || i > REGPARM_MAX)
1131 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1132 else
1133 ix86_regparm = i;
b08de47e 1134 }
0d7d98ee
JH
1135 else
1136 if (TARGET_64BIT)
1137 ix86_regparm = REGPARM_MAX;
b08de47e 1138
3e18fdf6 1139 /* If the user has provided any of the -malign-* options,
a4f31c00 1140 warn and use that value only if -falign-* is not set.
3e18fdf6 1141 Remove this code in GCC 3.2 or later. */
e075ae69 1142 if (ix86_align_loops_string)
b08de47e 1143 {
3e18fdf6
GK
1144 warning ("-malign-loops is obsolete, use -falign-loops");
1145 if (align_loops == 0)
1146 {
1147 i = atoi (ix86_align_loops_string);
1148 if (i < 0 || i > MAX_CODE_ALIGN)
1149 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1150 else
1151 align_loops = 1 << i;
1152 }
b08de47e 1153 }
3af4bd89 1154
e075ae69 1155 if (ix86_align_jumps_string)
b08de47e 1156 {
3e18fdf6
GK
1157 warning ("-malign-jumps is obsolete, use -falign-jumps");
1158 if (align_jumps == 0)
1159 {
1160 i = atoi (ix86_align_jumps_string);
1161 if (i < 0 || i > MAX_CODE_ALIGN)
1162 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1163 else
1164 align_jumps = 1 << i;
1165 }
b08de47e 1166 }
b08de47e 1167
e075ae69 1168 if (ix86_align_funcs_string)
b08de47e 1169 {
3e18fdf6
GK
1170 warning ("-malign-functions is obsolete, use -falign-functions");
1171 if (align_functions == 0)
1172 {
1173 i = atoi (ix86_align_funcs_string);
1174 if (i < 0 || i > MAX_CODE_ALIGN)
1175 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1176 else
1177 align_functions = 1 << i;
1178 }
b08de47e 1179 }
3af4bd89 1180
3e18fdf6 1181 /* Default align_* from the processor table. */
3e18fdf6 1182 if (align_loops == 0)
2cca7283
JH
1183 {
1184 align_loops = processor_target_table[ix86_cpu].align_loop;
1185 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1186 }
3e18fdf6 1187 if (align_jumps == 0)
2cca7283
JH
1188 {
1189 align_jumps = processor_target_table[ix86_cpu].align_jump;
1190 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1191 }
3e18fdf6 1192 if (align_functions == 0)
2cca7283
JH
1193 {
1194 align_functions = processor_target_table[ix86_cpu].align_func;
1195 }
3e18fdf6 1196
e4c0478d 1197 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1198 The default of 128 bits is for Pentium III's SSE __m128, but we
1199 don't want additional code to keep the stack aligned when
1200 optimizing for code size. */
1201 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1202 ? TARGET_64BIT ? 128 : 32
fbb83b43 1203 : 128);
e075ae69 1204 if (ix86_preferred_stack_boundary_string)
3af4bd89 1205 {
400500c4 1206 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1207 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1208 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1209 TARGET_64BIT ? 4 : 2);
400500c4
RK
1210 else
1211 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1212 }
77a989d1 1213
0f290768 1214 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1215 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1216 if (ix86_branch_cost_string)
804a8ee0 1217 {
400500c4
RK
1218 i = atoi (ix86_branch_cost_string);
1219 if (i < 0 || i > 5)
1220 error ("-mbranch-cost=%d is not between 0 and 5", i);
1221 else
1222 ix86_branch_cost = i;
804a8ee0 1223 }
804a8ee0 1224
f996902d
RH
1225 if (ix86_tls_dialect_string)
1226 {
1227 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1228 ix86_tls_dialect = TLS_DIALECT_GNU;
1229 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1230 ix86_tls_dialect = TLS_DIALECT_SUN;
1231 else
1232 error ("bad value (%s) for -mtls-dialect= switch",
1233 ix86_tls_dialect_string);
1234 }
1235
e9a25f70
JL
1236 /* Keep nonleaf frame pointers. */
1237 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1238 flag_omit_frame_pointer = 1;
e075ae69
RH
1239
1240 /* If we're doing fast math, we don't care about comparison order
1241 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1242 if (flag_unsafe_math_optimizations)
e075ae69
RH
1243 target_flags &= ~MASK_IEEE_FP;
1244
30c99a84
RH
1245 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1246 since the insns won't need emulation. */
1247 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1248 target_flags &= ~MASK_NO_FANCY_MATH_387;
1249
14f73b5a
JH
1250 if (TARGET_64BIT)
1251 {
1252 if (TARGET_ALIGN_DOUBLE)
c725bd79 1253 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1254 if (TARGET_RTD)
c725bd79 1255 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1256 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1257 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1258 ix86_fpmath = FPMATH_SSE;
14f73b5a 1259 }
965f5423
JH
1260 else
1261 ix86_fpmath = FPMATH_387;
1262
1263 if (ix86_fpmath_string != 0)
1264 {
1265 if (! strcmp (ix86_fpmath_string, "387"))
1266 ix86_fpmath = FPMATH_387;
1267 else if (! strcmp (ix86_fpmath_string, "sse"))
1268 {
1269 if (!TARGET_SSE)
1270 {
1271 warning ("SSE instruction set disabled, using 387 arithmetics");
1272 ix86_fpmath = FPMATH_387;
1273 }
1274 else
1275 ix86_fpmath = FPMATH_SSE;
1276 }
1277 else if (! strcmp (ix86_fpmath_string, "387,sse")
1278 || ! strcmp (ix86_fpmath_string, "sse,387"))
1279 {
1280 if (!TARGET_SSE)
1281 {
1282 warning ("SSE instruction set disabled, using 387 arithmetics");
1283 ix86_fpmath = FPMATH_387;
1284 }
1285 else if (!TARGET_80387)
1286 {
1287 warning ("387 instruction set disabled, using SSE arithmetics");
1288 ix86_fpmath = FPMATH_SSE;
1289 }
1290 else
1291 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1292 }
fce5a9f2 1293 else
965f5423
JH
1294 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1295 }
14f73b5a 1296
a7180f70
BS
1297 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1298 on by -msse. */
1299 if (TARGET_SSE)
e37af218
RH
1300 {
1301 target_flags |= MASK_MMX;
1302 x86_prefetch_sse = true;
1303 }
c6036a37 1304
47f339cf
BS
1305 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1306 if (TARGET_3DNOW)
1307 {
1308 target_flags |= MASK_MMX;
1309 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1310 extensions it adds. */
1311 if (x86_3dnow_a & (1 << ix86_arch))
1312 target_flags |= MASK_3DNOW_A;
1313 }
c6036a37 1314 if ((x86_accumulate_outgoing_args & CPUMASK)
9ef1b13a 1315 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1316 && !optimize_size)
1317 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1318
1319 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1320 {
1321 char *p;
1322 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1323 p = strchr (internal_label_prefix, 'X');
1324 internal_label_prefix_len = p - internal_label_prefix;
1325 *p = '\0';
1326 }
f5316dfe
MM
1327}
1328\f
32b5b1aa 1329void
c6aded7c 1330optimization_options (level, size)
32b5b1aa 1331 int level;
bb5177ac 1332 int size ATTRIBUTE_UNUSED;
32b5b1aa 1333{
e9a25f70
JL
1334 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1335 make the problem with not enough registers even worse. */
32b5b1aa
SC
1336#ifdef INSN_SCHEDULING
1337 if (level > 1)
1338 flag_schedule_insns = 0;
1339#endif
55ba61f3
JH
1340
1341 /* The default values of these switches depend on the TARGET_64BIT
1342 that is not known at this moment. Mark these values with 2 and
1343 let user the to override these. In case there is no command line option
1344 specifying them, we will set the defaults in override_options. */
1345 if (optimize >= 1)
1346 flag_omit_frame_pointer = 2;
1347 flag_pcc_struct_return = 2;
1348 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1349}
b08de47e 1350\f
91d231cb
JM
1351/* Table of valid machine attributes. */
1352const struct attribute_spec ix86_attribute_table[] =
b08de47e 1353{
91d231cb 1354 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1355 /* Stdcall attribute says callee is responsible for popping arguments
1356 if they are not variable. */
91d231cb
JM
1357 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1358 /* Cdecl attribute says the callee is a normal C declaration */
1359 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1360 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1361 passed in registers. */
91d231cb
JM
1362 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1363#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1364 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1365 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1366 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1367#endif
1368 { NULL, 0, 0, false, false, false, NULL }
1369};
1370
1371/* Handle a "cdecl" or "stdcall" attribute;
1372 arguments as in struct attribute_spec.handler. */
1373static tree
1374ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1375 tree *node;
1376 tree name;
1377 tree args ATTRIBUTE_UNUSED;
1378 int flags ATTRIBUTE_UNUSED;
1379 bool *no_add_attrs;
1380{
1381 if (TREE_CODE (*node) != FUNCTION_TYPE
1382 && TREE_CODE (*node) != METHOD_TYPE
1383 && TREE_CODE (*node) != FIELD_DECL
1384 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1385 {
91d231cb
JM
1386 warning ("`%s' attribute only applies to functions",
1387 IDENTIFIER_POINTER (name));
1388 *no_add_attrs = true;
1389 }
b08de47e 1390
91d231cb
JM
1391 if (TARGET_64BIT)
1392 {
1393 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1394 *no_add_attrs = true;
1395 }
b08de47e 1396
91d231cb
JM
1397 return NULL_TREE;
1398}
b08de47e 1399
91d231cb
JM
1400/* Handle a "regparm" attribute;
1401 arguments as in struct attribute_spec.handler. */
1402static tree
1403ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1404 tree *node;
1405 tree name;
1406 tree args;
1407 int flags ATTRIBUTE_UNUSED;
1408 bool *no_add_attrs;
1409{
1410 if (TREE_CODE (*node) != FUNCTION_TYPE
1411 && TREE_CODE (*node) != METHOD_TYPE
1412 && TREE_CODE (*node) != FIELD_DECL
1413 && TREE_CODE (*node) != TYPE_DECL)
1414 {
1415 warning ("`%s' attribute only applies to functions",
1416 IDENTIFIER_POINTER (name));
1417 *no_add_attrs = true;
1418 }
1419 else
1420 {
1421 tree cst;
b08de47e 1422
91d231cb
JM
1423 cst = TREE_VALUE (args);
1424 if (TREE_CODE (cst) != INTEGER_CST)
1425 {
1426 warning ("`%s' attribute requires an integer constant argument",
1427 IDENTIFIER_POINTER (name));
1428 *no_add_attrs = true;
1429 }
1430 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1431 {
1432 warning ("argument to `%s' attribute larger than %d",
1433 IDENTIFIER_POINTER (name), REGPARM_MAX);
1434 *no_add_attrs = true;
1435 }
b08de47e
MM
1436 }
1437
91d231cb 1438 return NULL_TREE;
b08de47e
MM
1439}
1440
1441/* Return 0 if the attributes for two types are incompatible, 1 if they
1442 are compatible, and 2 if they are nearly compatible (which causes a
1443 warning to be generated). */
1444
8d8e52be 1445static int
e075ae69 1446ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1447 tree type1;
1448 tree type2;
b08de47e 1449{
0f290768 1450 /* Check for mismatch of non-default calling convention. */
27c38fbe 1451 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1452
1453 if (TREE_CODE (type1) != FUNCTION_TYPE)
1454 return 1;
1455
1456 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1457 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1458 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1459 return 0;
b08de47e
MM
1460 return 1;
1461}
b08de47e 1462\f
483ab821
MM
1463/* Return the regparm value for a fuctio with the indicated TYPE. */
1464
1465static int
1466ix86_fntype_regparm (type)
1467 tree type;
1468{
1469 tree attr;
1470
1471 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1472 if (attr)
1473 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1474 else
1475 return ix86_regparm;
1476}
1477
b08de47e
MM
1478/* Value is the number of bytes of arguments automatically
1479 popped when returning from a subroutine call.
1480 FUNDECL is the declaration node of the function (as a tree),
1481 FUNTYPE is the data type of the function (as a tree),
1482 or for a library call it is an identifier node for the subroutine name.
1483 SIZE is the number of bytes of arguments passed on the stack.
1484
1485 On the 80386, the RTD insn may be used to pop them if the number
1486 of args is fixed, but if the number is variable then the caller
1487 must pop them all. RTD can't be used for library calls now
1488 because the library is compiled with the Unix compiler.
1489 Use of RTD is a selectable option, since it is incompatible with
1490 standard Unix calling sequences. If the option is not selected,
1491 the caller must always pop the args.
1492
1493 The attribute stdcall is equivalent to RTD on a per module basis. */
1494
1495int
e075ae69 1496ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1497 tree fundecl;
1498 tree funtype;
1499 int size;
79325812 1500{
3345ee7d 1501 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1502
0f290768 1503 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1504 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1505
0f290768 1506 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1508 rtd = 1;
79325812 1509
698cdd84
SC
1510 if (rtd
1511 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1512 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1513 == void_type_node)))
698cdd84
SC
1514 return size;
1515 }
79325812 1516
232b8f52 1517 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1518 if (aggregate_value_p (TREE_TYPE (funtype))
1519 && !TARGET_64BIT)
232b8f52 1520 {
483ab821 1521 int nregs = ix86_fntype_regparm (funtype);
232b8f52
JJ
1522
1523 if (!nregs)
1524 return GET_MODE_SIZE (Pmode);
1525 }
1526
1527 return 0;
b08de47e 1528}
b08de47e
MM
1529\f
1530/* Argument support functions. */
1531
53c17031
JH
1532/* Return true when register may be used to pass function parameters. */
1533bool
1534ix86_function_arg_regno_p (regno)
1535 int regno;
1536{
1537 int i;
1538 if (!TARGET_64BIT)
0333394e
JJ
1539 return (regno < REGPARM_MAX
1540 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1541 if (SSE_REGNO_P (regno) && TARGET_SSE)
1542 return true;
1543 /* RAX is used as hidden argument to va_arg functions. */
1544 if (!regno)
1545 return true;
1546 for (i = 0; i < REGPARM_MAX; i++)
1547 if (regno == x86_64_int_parameter_registers[i])
1548 return true;
1549 return false;
1550}
1551
b08de47e
MM
1552/* Initialize a variable CUM of type CUMULATIVE_ARGS
1553 for a call to a function whose data type is FNTYPE.
1554 For a library call, FNTYPE is 0. */
1555
1556void
1557init_cumulative_args (cum, fntype, libname)
e9a25f70 1558 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1559 tree fntype; /* tree ptr for function decl */
1560 rtx libname; /* SYMBOL_REF of library name or 0 */
1561{
1562 static CUMULATIVE_ARGS zero_cum;
1563 tree param, next_param;
1564
1565 if (TARGET_DEBUG_ARG)
1566 {
1567 fprintf (stderr, "\ninit_cumulative_args (");
1568 if (fntype)
e9a25f70
JL
1569 fprintf (stderr, "fntype code = %s, ret code = %s",
1570 tree_code_name[(int) TREE_CODE (fntype)],
1571 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1572 else
1573 fprintf (stderr, "no fntype");
1574
1575 if (libname)
1576 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1577 }
1578
1579 *cum = zero_cum;
1580
1581 /* Set up the number of registers to use for passing arguments. */
e075ae69 1582 cum->nregs = ix86_regparm;
53c17031
JH
1583 cum->sse_nregs = SSE_REGPARM_MAX;
1584 if (fntype && !TARGET_64BIT)
b08de47e
MM
1585 {
1586 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1587
b08de47e
MM
1588 if (attr)
1589 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1590 }
53c17031 1591 cum->maybe_vaarg = false;
b08de47e
MM
1592
1593 /* Determine if this function has variable arguments. This is
1594 indicated by the last argument being 'void_type_mode' if there
1595 are no variable arguments. If there are variable arguments, then
1596 we won't pass anything in registers */
1597
1598 if (cum->nregs)
1599 {
1600 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1601 param != 0; param = next_param)
b08de47e
MM
1602 {
1603 next_param = TREE_CHAIN (param);
e9a25f70 1604 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1605 {
1606 if (!TARGET_64BIT)
1607 cum->nregs = 0;
1608 cum->maybe_vaarg = true;
1609 }
b08de47e
MM
1610 }
1611 }
53c17031
JH
1612 if ((!fntype && !libname)
1613 || (fntype && !TYPE_ARG_TYPES (fntype)))
1614 cum->maybe_vaarg = 1;
b08de47e
MM
1615
1616 if (TARGET_DEBUG_ARG)
1617 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1618
1619 return;
1620}
1621
53c17031 1622/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1623 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1624 class and assign registers accordingly. */
1625
1626/* Return the union class of CLASS1 and CLASS2.
1627 See the x86-64 PS ABI for details. */
1628
1629static enum x86_64_reg_class
1630merge_classes (class1, class2)
1631 enum x86_64_reg_class class1, class2;
1632{
1633 /* Rule #1: If both classes are equal, this is the resulting class. */
1634 if (class1 == class2)
1635 return class1;
1636
1637 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1638 the other class. */
1639 if (class1 == X86_64_NO_CLASS)
1640 return class2;
1641 if (class2 == X86_64_NO_CLASS)
1642 return class1;
1643
1644 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1645 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1646 return X86_64_MEMORY_CLASS;
1647
1648 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1649 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1650 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1651 return X86_64_INTEGERSI_CLASS;
1652 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1653 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1654 return X86_64_INTEGER_CLASS;
1655
1656 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1657 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1658 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1659 return X86_64_MEMORY_CLASS;
1660
1661 /* Rule #6: Otherwise class SSE is used. */
1662 return X86_64_SSE_CLASS;
1663}
1664
1665/* Classify the argument of type TYPE and mode MODE.
1666 CLASSES will be filled by the register class used to pass each word
1667 of the operand. The number of words is returned. In case the parameter
1668 should be passed in memory, 0 is returned. As a special case for zero
1669 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1670
1671 BIT_OFFSET is used internally for handling records and specifies offset
1672 of the offset in bits modulo 256 to avoid overflow cases.
1673
1674 See the x86-64 PS ABI for details.
1675*/
1676
1677static int
1678classify_argument (mode, type, classes, bit_offset)
1679 enum machine_mode mode;
1680 tree type;
1681 enum x86_64_reg_class classes[MAX_CLASSES];
1682 int bit_offset;
1683{
1684 int bytes =
1685 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1686 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1687
c60ee6f5
JH
1688 /* Variable sized entities are always passed/returned in memory. */
1689 if (bytes < 0)
1690 return 0;
1691
53c17031
JH
1692 if (type && AGGREGATE_TYPE_P (type))
1693 {
1694 int i;
1695 tree field;
1696 enum x86_64_reg_class subclasses[MAX_CLASSES];
1697
1698 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1699 if (bytes > 16)
1700 return 0;
1701
1702 for (i = 0; i < words; i++)
1703 classes[i] = X86_64_NO_CLASS;
1704
1705 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1706 signalize memory class, so handle it as special case. */
1707 if (!words)
1708 {
1709 classes[0] = X86_64_NO_CLASS;
1710 return 1;
1711 }
1712
1713 /* Classify each field of record and merge classes. */
1714 if (TREE_CODE (type) == RECORD_TYPE)
1715 {
91ea38f9
JH
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1718 {
1719 tree bases = TYPE_BINFO_BASETYPES (type);
1720 int n_bases = TREE_VEC_LENGTH (bases);
1721 int i;
1722
1723 for (i = 0; i < n_bases; ++i)
1724 {
1725 tree binfo = TREE_VEC_ELT (bases, i);
1726 int num;
1727 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1728 tree type = BINFO_TYPE (binfo);
1729
1730 num = classify_argument (TYPE_MODE (type),
1731 type, subclasses,
1732 (offset + bit_offset) % 256);
1733 if (!num)
1734 return 0;
1735 for (i = 0; i < num; i++)
1736 {
db01f480 1737 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1738 classes[i + pos] =
1739 merge_classes (subclasses[i], classes[i + pos]);
1740 }
1741 }
1742 }
1743 /* And now merge the fields of structure. */
53c17031
JH
1744 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1745 {
1746 if (TREE_CODE (field) == FIELD_DECL)
1747 {
1748 int num;
1749
1750 /* Bitfields are always classified as integer. Handle them
1751 early, since later code would consider them to be
1752 misaligned integers. */
1753 if (DECL_BIT_FIELD (field))
1754 {
1755 for (i = int_bit_position (field) / 8 / 8;
1756 i < (int_bit_position (field)
1757 + tree_low_cst (DECL_SIZE (field), 0)
1758 + 63) / 8 / 8; i++)
1759 classes[i] =
1760 merge_classes (X86_64_INTEGER_CLASS,
1761 classes[i]);
1762 }
1763 else
1764 {
1765 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1766 TREE_TYPE (field), subclasses,
1767 (int_bit_position (field)
1768 + bit_offset) % 256);
1769 if (!num)
1770 return 0;
1771 for (i = 0; i < num; i++)
1772 {
1773 int pos =
db01f480 1774 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1775 classes[i + pos] =
1776 merge_classes (subclasses[i], classes[i + pos]);
1777 }
1778 }
1779 }
1780 }
1781 }
1782 /* Arrays are handled as small records. */
1783 else if (TREE_CODE (type) == ARRAY_TYPE)
1784 {
1785 int num;
1786 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1787 TREE_TYPE (type), subclasses, bit_offset);
1788 if (!num)
1789 return 0;
1790
1791 /* The partial classes are now full classes. */
1792 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1793 subclasses[0] = X86_64_SSE_CLASS;
1794 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1795 subclasses[0] = X86_64_INTEGER_CLASS;
1796
1797 for (i = 0; i < words; i++)
1798 classes[i] = subclasses[i % num];
1799 }
1800 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1801 else if (TREE_CODE (type) == UNION_TYPE
1802 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1803 {
91ea38f9
JH
1804 /* For classes first merge in the field of the subclasses. */
1805 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1806 {
1807 tree bases = TYPE_BINFO_BASETYPES (type);
1808 int n_bases = TREE_VEC_LENGTH (bases);
1809 int i;
1810
1811 for (i = 0; i < n_bases; ++i)
1812 {
1813 tree binfo = TREE_VEC_ELT (bases, i);
1814 int num;
1815 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1816 tree type = BINFO_TYPE (binfo);
1817
1818 num = classify_argument (TYPE_MODE (type),
1819 type, subclasses,
db01f480 1820 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
1821 if (!num)
1822 return 0;
1823 for (i = 0; i < num; i++)
1824 {
c16576e6 1825 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1826 classes[i + pos] =
1827 merge_classes (subclasses[i], classes[i + pos]);
1828 }
1829 }
1830 }
53c17031
JH
1831 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1832 {
1833 if (TREE_CODE (field) == FIELD_DECL)
1834 {
1835 int num;
1836 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1837 TREE_TYPE (field), subclasses,
1838 bit_offset);
1839 if (!num)
1840 return 0;
1841 for (i = 0; i < num; i++)
1842 classes[i] = merge_classes (subclasses[i], classes[i]);
1843 }
1844 }
1845 }
1846 else
1847 abort ();
1848
1849 /* Final merger cleanup. */
1850 for (i = 0; i < words; i++)
1851 {
1852 /* If one class is MEMORY, everything should be passed in
1853 memory. */
1854 if (classes[i] == X86_64_MEMORY_CLASS)
1855 return 0;
1856
d6a7951f 1857 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1858 X86_64_SSE_CLASS. */
1859 if (classes[i] == X86_64_SSEUP_CLASS
1860 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1861 classes[i] = X86_64_SSE_CLASS;
1862
d6a7951f 1863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1864 if (classes[i] == X86_64_X87UP_CLASS
1865 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1866 classes[i] = X86_64_SSE_CLASS;
1867 }
1868 return words;
1869 }
1870
1871 /* Compute alignment needed. We align all types to natural boundaries with
1872 exception of XFmode that is aligned to 64bits. */
1873 if (mode != VOIDmode && mode != BLKmode)
1874 {
1875 int mode_alignment = GET_MODE_BITSIZE (mode);
1876
1877 if (mode == XFmode)
1878 mode_alignment = 128;
1879 else if (mode == XCmode)
1880 mode_alignment = 256;
f5143c46 1881 /* Misaligned fields are always returned in memory. */
53c17031
JH
1882 if (bit_offset % mode_alignment)
1883 return 0;
1884 }
1885
1886 /* Classification of atomic types. */
1887 switch (mode)
1888 {
1889 case DImode:
1890 case SImode:
1891 case HImode:
1892 case QImode:
1893 case CSImode:
1894 case CHImode:
1895 case CQImode:
1896 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1897 classes[0] = X86_64_INTEGERSI_CLASS;
1898 else
1899 classes[0] = X86_64_INTEGER_CLASS;
1900 return 1;
1901 case CDImode:
1902 case TImode:
1903 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1904 return 2;
1905 case CTImode:
1906 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1907 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1908 return 4;
1909 case SFmode:
1910 if (!(bit_offset % 64))
1911 classes[0] = X86_64_SSESF_CLASS;
1912 else
1913 classes[0] = X86_64_SSE_CLASS;
1914 return 1;
1915 case DFmode:
1916 classes[0] = X86_64_SSEDF_CLASS;
1917 return 1;
1918 case TFmode:
1919 classes[0] = X86_64_X87_CLASS;
1920 classes[1] = X86_64_X87UP_CLASS;
1921 return 2;
1922 case TCmode:
1923 classes[0] = X86_64_X87_CLASS;
1924 classes[1] = X86_64_X87UP_CLASS;
1925 classes[2] = X86_64_X87_CLASS;
1926 classes[3] = X86_64_X87UP_CLASS;
1927 return 4;
1928 case DCmode:
1929 classes[0] = X86_64_SSEDF_CLASS;
1930 classes[1] = X86_64_SSEDF_CLASS;
1931 return 2;
1932 case SCmode:
1933 classes[0] = X86_64_SSE_CLASS;
1934 return 1;
e95d6b23
JH
1935 case V4SFmode:
1936 case V4SImode:
495333a6
JH
1937 case V16QImode:
1938 case V8HImode:
1939 case V2DFmode:
1940 case V2DImode:
e95d6b23
JH
1941 classes[0] = X86_64_SSE_CLASS;
1942 classes[1] = X86_64_SSEUP_CLASS;
1943 return 2;
1944 case V2SFmode:
1945 case V2SImode:
1946 case V4HImode:
1947 case V8QImode:
1194ca05 1948 return 0;
53c17031 1949 case BLKmode:
e95d6b23 1950 case VOIDmode:
53c17031
JH
1951 return 0;
1952 default:
1953 abort ();
1954 }
1955}
1956
1957/* Examine the argument and return set number of register required in each
f5143c46 1958 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1959static int
1960examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1961 enum machine_mode mode;
1962 tree type;
1963 int *int_nregs, *sse_nregs;
1964 int in_return;
1965{
1966 enum x86_64_reg_class class[MAX_CLASSES];
1967 int n = classify_argument (mode, type, class, 0);
1968
1969 *int_nregs = 0;
1970 *sse_nregs = 0;
1971 if (!n)
1972 return 0;
1973 for (n--; n >= 0; n--)
1974 switch (class[n])
1975 {
1976 case X86_64_INTEGER_CLASS:
1977 case X86_64_INTEGERSI_CLASS:
1978 (*int_nregs)++;
1979 break;
1980 case X86_64_SSE_CLASS:
1981 case X86_64_SSESF_CLASS:
1982 case X86_64_SSEDF_CLASS:
1983 (*sse_nregs)++;
1984 break;
1985 case X86_64_NO_CLASS:
1986 case X86_64_SSEUP_CLASS:
1987 break;
1988 case X86_64_X87_CLASS:
1989 case X86_64_X87UP_CLASS:
1990 if (!in_return)
1991 return 0;
1992 break;
1993 case X86_64_MEMORY_CLASS:
1994 abort ();
1995 }
1996 return 1;
1997}
1998/* Construct container for the argument used by GCC interface. See
1999 FUNCTION_ARG for the detailed description. */
2000static rtx
2001construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2002 enum machine_mode mode;
2003 tree type;
2004 int in_return;
2005 int nintregs, nsseregs;
07933f72
GS
2006 const int * intreg;
2007 int sse_regno;
53c17031
JH
2008{
2009 enum machine_mode tmpmode;
2010 int bytes =
2011 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2012 enum x86_64_reg_class class[MAX_CLASSES];
2013 int n;
2014 int i;
2015 int nexps = 0;
2016 int needed_sseregs, needed_intregs;
2017 rtx exp[MAX_CLASSES];
2018 rtx ret;
2019
2020 n = classify_argument (mode, type, class, 0);
2021 if (TARGET_DEBUG_ARG)
2022 {
2023 if (!n)
2024 fprintf (stderr, "Memory class\n");
2025 else
2026 {
2027 fprintf (stderr, "Classes:");
2028 for (i = 0; i < n; i++)
2029 {
2030 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2031 }
2032 fprintf (stderr, "\n");
2033 }
2034 }
2035 if (!n)
2036 return NULL;
2037 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2038 return NULL;
2039 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2040 return NULL;
2041
2042 /* First construct simple cases. Avoid SCmode, since we want to use
2043 single register to pass this type. */
2044 if (n == 1 && mode != SCmode)
2045 switch (class[0])
2046 {
2047 case X86_64_INTEGER_CLASS:
2048 case X86_64_INTEGERSI_CLASS:
2049 return gen_rtx_REG (mode, intreg[0]);
2050 case X86_64_SSE_CLASS:
2051 case X86_64_SSESF_CLASS:
2052 case X86_64_SSEDF_CLASS:
2053 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2054 case X86_64_X87_CLASS:
2055 return gen_rtx_REG (mode, FIRST_STACK_REG);
2056 case X86_64_NO_CLASS:
2057 /* Zero sized array, struct or class. */
2058 return NULL;
2059 default:
2060 abort ();
2061 }
2062 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2063 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2064 if (n == 2
2065 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2066 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2067 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2068 && class[1] == X86_64_INTEGER_CLASS
2069 && (mode == CDImode || mode == TImode)
2070 && intreg[0] + 1 == intreg[1])
2071 return gen_rtx_REG (mode, intreg[0]);
2072 if (n == 4
2073 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2074 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2075 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2076
2077 /* Otherwise figure out the entries of the PARALLEL. */
2078 for (i = 0; i < n; i++)
2079 {
2080 switch (class[i])
2081 {
2082 case X86_64_NO_CLASS:
2083 break;
2084 case X86_64_INTEGER_CLASS:
2085 case X86_64_INTEGERSI_CLASS:
2086 /* Merge TImodes on aligned occassions here too. */
2087 if (i * 8 + 8 > bytes)
2088 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2089 else if (class[i] == X86_64_INTEGERSI_CLASS)
2090 tmpmode = SImode;
2091 else
2092 tmpmode = DImode;
2093 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2094 if (tmpmode == BLKmode)
2095 tmpmode = DImode;
2096 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2097 gen_rtx_REG (tmpmode, *intreg),
2098 GEN_INT (i*8));
2099 intreg++;
2100 break;
2101 case X86_64_SSESF_CLASS:
2102 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2103 gen_rtx_REG (SFmode,
2104 SSE_REGNO (sse_regno)),
2105 GEN_INT (i*8));
2106 sse_regno++;
2107 break;
2108 case X86_64_SSEDF_CLASS:
2109 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2110 gen_rtx_REG (DFmode,
2111 SSE_REGNO (sse_regno)),
2112 GEN_INT (i*8));
2113 sse_regno++;
2114 break;
2115 case X86_64_SSE_CLASS:
12f5c45e
JH
2116 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2117 tmpmode = TImode;
53c17031
JH
2118 else
2119 tmpmode = DImode;
2120 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2121 gen_rtx_REG (tmpmode,
2122 SSE_REGNO (sse_regno)),
2123 GEN_INT (i*8));
12f5c45e
JH
2124 if (tmpmode == TImode)
2125 i++;
53c17031
JH
2126 sse_regno++;
2127 break;
2128 default:
2129 abort ();
2130 }
2131 }
2132 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2133 for (i = 0; i < nexps; i++)
2134 XVECEXP (ret, 0, i) = exp [i];
2135 return ret;
2136}
2137
b08de47e
MM
2138/* Update the data in CUM to advance over an argument
2139 of mode MODE and data type TYPE.
2140 (TYPE is null for libcalls where that information may not be available.) */
2141
2142void
2143function_arg_advance (cum, mode, type, named)
2144 CUMULATIVE_ARGS *cum; /* current arg information */
2145 enum machine_mode mode; /* current arg mode */
2146 tree type; /* type of the argument or 0 if lib support */
2147 int named; /* whether or not the argument was named */
2148{
5ac9118e
KG
2149 int bytes =
2150 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2151 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2152
2153 if (TARGET_DEBUG_ARG)
2154 fprintf (stderr,
e9a25f70 2155 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2156 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2157 if (TARGET_64BIT)
b08de47e 2158 {
53c17031
JH
2159 int int_nregs, sse_nregs;
2160 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2161 cum->words += words;
2162 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2163 {
53c17031
JH
2164 cum->nregs -= int_nregs;
2165 cum->sse_nregs -= sse_nregs;
2166 cum->regno += int_nregs;
2167 cum->sse_regno += sse_nregs;
82a127a9 2168 }
53c17031
JH
2169 else
2170 cum->words += words;
b08de47e 2171 }
a4f31c00 2172 else
82a127a9 2173 {
53c17031
JH
2174 if (TARGET_SSE && mode == TImode)
2175 {
2176 cum->sse_words += words;
2177 cum->sse_nregs -= 1;
2178 cum->sse_regno += 1;
2179 if (cum->sse_nregs <= 0)
2180 {
2181 cum->sse_nregs = 0;
2182 cum->sse_regno = 0;
2183 }
2184 }
2185 else
82a127a9 2186 {
53c17031
JH
2187 cum->words += words;
2188 cum->nregs -= words;
2189 cum->regno += words;
2190
2191 if (cum->nregs <= 0)
2192 {
2193 cum->nregs = 0;
2194 cum->regno = 0;
2195 }
82a127a9
CM
2196 }
2197 }
b08de47e
MM
2198 return;
2199}
2200
2201/* Define where to put the arguments to a function.
2202 Value is zero to push the argument on the stack,
2203 or a hard register in which to store the argument.
2204
2205 MODE is the argument's machine mode.
2206 TYPE is the data type of the argument (as a tree).
2207 This is null for libcalls where that information may
2208 not be available.
2209 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2210 the preceding args and about the function being called.
2211 NAMED is nonzero if this argument is a named parameter
2212 (otherwise it is an extra parameter matching an ellipsis). */
2213
07933f72 2214rtx
b08de47e
MM
2215function_arg (cum, mode, type, named)
2216 CUMULATIVE_ARGS *cum; /* current arg information */
2217 enum machine_mode mode; /* current arg mode */
2218 tree type; /* type of the argument or 0 if lib support */
2219 int named; /* != 0 for normal args, == 0 for ... args */
2220{
2221 rtx ret = NULL_RTX;
5ac9118e
KG
2222 int bytes =
2223 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2224 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2225
53c17031
JH
2226 /* Handle an hidden AL argument containing number of registers for varargs
2227 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2228 any AL settings. */
32ee7d1d 2229 if (mode == VOIDmode)
b08de47e 2230 {
53c17031
JH
2231 if (TARGET_64BIT)
2232 return GEN_INT (cum->maybe_vaarg
2233 ? (cum->sse_nregs < 0
2234 ? SSE_REGPARM_MAX
2235 : cum->sse_regno)
2236 : -1);
2237 else
2238 return constm1_rtx;
b08de47e 2239 }
53c17031
JH
2240 if (TARGET_64BIT)
2241 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2242 &x86_64_int_parameter_registers [cum->regno],
2243 cum->sse_regno);
2244 else
2245 switch (mode)
2246 {
2247 /* For now, pass fp/complex values on the stack. */
2248 default:
2249 break;
2250
2251 case BLKmode:
2252 case DImode:
2253 case SImode:
2254 case HImode:
2255 case QImode:
2256 if (words <= cum->nregs)
2257 ret = gen_rtx_REG (mode, cum->regno);
2258 break;
2259 case TImode:
2260 if (cum->sse_nregs)
2261 ret = gen_rtx_REG (mode, cum->sse_regno);
2262 break;
2263 }
b08de47e
MM
2264
2265 if (TARGET_DEBUG_ARG)
2266 {
2267 fprintf (stderr,
91ea38f9 2268 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2269 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2270
2271 if (ret)
91ea38f9 2272 print_simple_rtl (stderr, ret);
b08de47e
MM
2273 else
2274 fprintf (stderr, ", stack");
2275
2276 fprintf (stderr, " )\n");
2277 }
2278
2279 return ret;
2280}
53c17031
JH
2281
2282/* Gives the alignment boundary, in bits, of an argument with the specified mode
2283 and type. */
2284
2285int
2286ix86_function_arg_boundary (mode, type)
2287 enum machine_mode mode;
2288 tree type;
2289{
2290 int align;
2291 if (!TARGET_64BIT)
2292 return PARM_BOUNDARY;
2293 if (type)
2294 align = TYPE_ALIGN (type);
2295 else
2296 align = GET_MODE_ALIGNMENT (mode);
2297 if (align < PARM_BOUNDARY)
2298 align = PARM_BOUNDARY;
2299 if (align > 128)
2300 align = 128;
2301 return align;
2302}
2303
2304/* Return true if N is a possible register number of function value. */
2305bool
2306ix86_function_value_regno_p (regno)
2307 int regno;
2308{
2309 if (!TARGET_64BIT)
2310 {
2311 return ((regno) == 0
2312 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2313 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2314 }
2315 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2316 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2317 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2318}
2319
2320/* Define how to find the value returned by a function.
2321 VALTYPE is the data type of the value (as a tree).
2322 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2323 otherwise, FUNC is 0. */
2324rtx
2325ix86_function_value (valtype)
2326 tree valtype;
2327{
2328 if (TARGET_64BIT)
2329 {
2330 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2331 REGPARM_MAX, SSE_REGPARM_MAX,
2332 x86_64_int_return_registers, 0);
2333 /* For zero sized structures, construct_continer return NULL, but we need
2334 to keep rest of compiler happy by returning meaningfull value. */
2335 if (!ret)
2336 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2337 return ret;
2338 }
2339 else
b069de3b
SS
2340 return gen_rtx_REG (TYPE_MODE (valtype),
2341 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2342}
2343
f5143c46 2344/* Return false iff type is returned in memory. */
53c17031
JH
2345int
2346ix86_return_in_memory (type)
2347 tree type;
2348{
2349 int needed_intregs, needed_sseregs;
2350 if (TARGET_64BIT)
2351 {
2352 return !examine_argument (TYPE_MODE (type), type, 1,
2353 &needed_intregs, &needed_sseregs);
2354 }
2355 else
2356 {
2357 if (TYPE_MODE (type) == BLKmode
2358 || (VECTOR_MODE_P (TYPE_MODE (type))
2359 && int_size_in_bytes (type) == 8)
2360 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2361 && TYPE_MODE (type) != TFmode
2362 && !VECTOR_MODE_P (TYPE_MODE (type))))
2363 return 1;
2364 return 0;
2365 }
2366}
2367
2368/* Define how to find the value returned by a library function
2369 assuming the value has mode MODE. */
2370rtx
2371ix86_libcall_value (mode)
2372 enum machine_mode mode;
2373{
2374 if (TARGET_64BIT)
2375 {
2376 switch (mode)
2377 {
2378 case SFmode:
2379 case SCmode:
2380 case DFmode:
2381 case DCmode:
2382 return gen_rtx_REG (mode, FIRST_SSE_REG);
2383 case TFmode:
2384 case TCmode:
2385 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2386 default:
2387 return gen_rtx_REG (mode, 0);
2388 }
2389 }
2390 else
b069de3b
SS
2391 return gen_rtx_REG (mode, ix86_value_regno (mode));
2392}
2393
2394/* Given a mode, return the register to use for a return value. */
2395
2396static int
2397ix86_value_regno (mode)
2398 enum machine_mode mode;
2399{
2400 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2401 return FIRST_FLOAT_REG;
2402 if (mode == TImode || VECTOR_MODE_P (mode))
2403 return FIRST_SSE_REG;
2404 return 0;
53c17031 2405}
ad919812
JH
2406\f
2407/* Create the va_list data type. */
53c17031 2408
ad919812
JH
2409tree
2410ix86_build_va_list ()
2411{
2412 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2413
ad919812
JH
2414 /* For i386 we use plain pointer to argument area. */
2415 if (!TARGET_64BIT)
2416 return build_pointer_type (char_type_node);
2417
f1e639b1 2418 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2419 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2420
fce5a9f2 2421 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2422 unsigned_type_node);
fce5a9f2 2423 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2424 unsigned_type_node);
2425 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2426 ptr_type_node);
2427 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2428 ptr_type_node);
2429
2430 DECL_FIELD_CONTEXT (f_gpr) = record;
2431 DECL_FIELD_CONTEXT (f_fpr) = record;
2432 DECL_FIELD_CONTEXT (f_ovf) = record;
2433 DECL_FIELD_CONTEXT (f_sav) = record;
2434
2435 TREE_CHAIN (record) = type_decl;
2436 TYPE_NAME (record) = type_decl;
2437 TYPE_FIELDS (record) = f_gpr;
2438 TREE_CHAIN (f_gpr) = f_fpr;
2439 TREE_CHAIN (f_fpr) = f_ovf;
2440 TREE_CHAIN (f_ovf) = f_sav;
2441
2442 layout_type (record);
2443
2444 /* The correct type is an array type of one element. */
2445 return build_array_type (record, build_index_type (size_zero_node));
2446}
2447
2448/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2449 variable number of arguments.
ad919812
JH
2450
2451 CUM is as above.
2452
2453 MODE and TYPE are the mode and type of the current parameter.
2454
2455 PRETEND_SIZE is a variable that should be set to the amount of stack
2456 that must be pushed by the prolog to pretend that our caller pushed
2457 it.
2458
2459 Normally, this macro will push all remaining incoming registers on the
2460 stack and set PRETEND_SIZE to the length of the registers pushed. */
2461
2462void
2463ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2464 CUMULATIVE_ARGS *cum;
2465 enum machine_mode mode;
2466 tree type;
2467 int *pretend_size ATTRIBUTE_UNUSED;
2468 int no_rtl;
2469
2470{
2471 CUMULATIVE_ARGS next_cum;
2472 rtx save_area = NULL_RTX, mem;
2473 rtx label;
2474 rtx label_ref;
2475 rtx tmp_reg;
2476 rtx nsse_reg;
2477 int set;
2478 tree fntype;
2479 int stdarg_p;
2480 int i;
2481
2482 if (!TARGET_64BIT)
2483 return;
2484
2485 /* Indicate to allocate space on the stack for varargs save area. */
2486 ix86_save_varrargs_registers = 1;
2487
2488 fntype = TREE_TYPE (current_function_decl);
2489 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2490 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2491 != void_type_node));
2492
2493 /* For varargs, we do not want to skip the dummy va_dcl argument.
2494 For stdargs, we do want to skip the last named argument. */
2495 next_cum = *cum;
2496 if (stdarg_p)
2497 function_arg_advance (&next_cum, mode, type, 1);
2498
2499 if (!no_rtl)
2500 save_area = frame_pointer_rtx;
2501
2502 set = get_varargs_alias_set ();
2503
2504 for (i = next_cum.regno; i < ix86_regparm; i++)
2505 {
2506 mem = gen_rtx_MEM (Pmode,
2507 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2508 set_mem_alias_set (mem, set);
ad919812
JH
2509 emit_move_insn (mem, gen_rtx_REG (Pmode,
2510 x86_64_int_parameter_registers[i]));
2511 }
2512
2513 if (next_cum.sse_nregs)
2514 {
2515 /* Now emit code to save SSE registers. The AX parameter contains number
2516 of SSE parameter regsiters used to call this function. We use
2517 sse_prologue_save insn template that produces computed jump across
2518 SSE saves. We need some preparation work to get this working. */
2519
2520 label = gen_label_rtx ();
2521 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2522
2523 /* Compute address to jump to :
2524 label - 5*eax + nnamed_sse_arguments*5 */
2525 tmp_reg = gen_reg_rtx (Pmode);
2526 nsse_reg = gen_reg_rtx (Pmode);
2527 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2528 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2529 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2530 GEN_INT (4))));
2531 if (next_cum.sse_regno)
2532 emit_move_insn
2533 (nsse_reg,
2534 gen_rtx_CONST (DImode,
2535 gen_rtx_PLUS (DImode,
2536 label_ref,
2537 GEN_INT (next_cum.sse_regno * 4))));
2538 else
2539 emit_move_insn (nsse_reg, label_ref);
2540 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2541
2542 /* Compute address of memory block we save into. We always use pointer
2543 pointing 127 bytes after first byte to store - this is needed to keep
2544 instruction size limited by 4 bytes. */
2545 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2546 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2547 plus_constant (save_area,
2548 8 * REGPARM_MAX + 127)));
ad919812 2549 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2550 set_mem_alias_set (mem, set);
8ac61af7 2551 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2552
2553 /* And finally do the dirty job! */
8ac61af7
RK
2554 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2555 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2556 }
2557
2558}
2559
2560/* Implement va_start. */
2561
2562void
e5faf155 2563ix86_va_start (valist, nextarg)
ad919812
JH
2564 tree valist;
2565 rtx nextarg;
2566{
2567 HOST_WIDE_INT words, n_gpr, n_fpr;
2568 tree f_gpr, f_fpr, f_ovf, f_sav;
2569 tree gpr, fpr, ovf, sav, t;
2570
2571 /* Only 64bit target needs something special. */
2572 if (!TARGET_64BIT)
2573 {
e5faf155 2574 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2575 return;
2576 }
2577
2578 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2579 f_fpr = TREE_CHAIN (f_gpr);
2580 f_ovf = TREE_CHAIN (f_fpr);
2581 f_sav = TREE_CHAIN (f_ovf);
2582
2583 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2584 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2585 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2586 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2587 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2588
2589 /* Count number of gp and fp argument registers used. */
2590 words = current_function_args_info.words;
2591 n_gpr = current_function_args_info.regno;
2592 n_fpr = current_function_args_info.sse_regno;
2593
2594 if (TARGET_DEBUG_ARG)
2595 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2596 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2597
2598 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2599 build_int_2 (n_gpr * 8, 0));
2600 TREE_SIDE_EFFECTS (t) = 1;
2601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2602
2603 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2604 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2605 TREE_SIDE_EFFECTS (t) = 1;
2606 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2607
2608 /* Find the overflow area. */
2609 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2610 if (words != 0)
2611 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2612 build_int_2 (words * UNITS_PER_WORD, 0));
2613 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2614 TREE_SIDE_EFFECTS (t) = 1;
2615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2616
2617 /* Find the register save area.
2618 Prologue of the function save it right above stack frame. */
2619 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2620 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2621 TREE_SIDE_EFFECTS (t) = 1;
2622 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2623}
2624
2625/* Implement va_arg. */
2626rtx
2627ix86_va_arg (valist, type)
2628 tree valist, type;
2629{
0139adca 2630 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2631 tree f_gpr, f_fpr, f_ovf, f_sav;
2632 tree gpr, fpr, ovf, sav, t;
b932f770 2633 int size, rsize;
ad919812
JH
2634 rtx lab_false, lab_over = NULL_RTX;
2635 rtx addr_rtx, r;
2636 rtx container;
2637
2638 /* Only 64bit target needs something special. */
2639 if (!TARGET_64BIT)
2640 {
2641 return std_expand_builtin_va_arg (valist, type);
2642 }
2643
2644 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2645 f_fpr = TREE_CHAIN (f_gpr);
2646 f_ovf = TREE_CHAIN (f_fpr);
2647 f_sav = TREE_CHAIN (f_ovf);
2648
2649 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2650 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2651 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2652 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2653 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2654
2655 size = int_size_in_bytes (type);
2656 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2657
2658 container = construct_container (TYPE_MODE (type), type, 0,
2659 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2660 /*
2661 * Pull the value out of the saved registers ...
2662 */
2663
2664 addr_rtx = gen_reg_rtx (Pmode);
2665
2666 if (container)
2667 {
2668 rtx int_addr_rtx, sse_addr_rtx;
2669 int needed_intregs, needed_sseregs;
2670 int need_temp;
2671
2672 lab_over = gen_label_rtx ();
2673 lab_false = gen_label_rtx ();
8bad7136 2674
ad919812
JH
2675 examine_argument (TYPE_MODE (type), type, 0,
2676 &needed_intregs, &needed_sseregs);
2677
2678
2679 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2680 || TYPE_ALIGN (type) > 128);
2681
2682 /* In case we are passing structure, verify that it is consetuctive block
2683 on the register save area. If not we need to do moves. */
2684 if (!need_temp && !REG_P (container))
2685 {
2686 /* Verify that all registers are strictly consetuctive */
2687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2688 {
2689 int i;
2690
2691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2692 {
2693 rtx slot = XVECEXP (container, 0, i);
b531087a 2694 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2695 || INTVAL (XEXP (slot, 1)) != i * 16)
2696 need_temp = 1;
2697 }
2698 }
2699 else
2700 {
2701 int i;
2702
2703 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2704 {
2705 rtx slot = XVECEXP (container, 0, i);
b531087a 2706 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2707 || INTVAL (XEXP (slot, 1)) != i * 8)
2708 need_temp = 1;
2709 }
2710 }
2711 }
2712 if (!need_temp)
2713 {
2714 int_addr_rtx = addr_rtx;
2715 sse_addr_rtx = addr_rtx;
2716 }
2717 else
2718 {
2719 int_addr_rtx = gen_reg_rtx (Pmode);
2720 sse_addr_rtx = gen_reg_rtx (Pmode);
2721 }
2722 /* First ensure that we fit completely in registers. */
2723 if (needed_intregs)
2724 {
2725 emit_cmp_and_jump_insns (expand_expr
2726 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2727 GEN_INT ((REGPARM_MAX - needed_intregs +
2728 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2729 1, lab_false);
ad919812
JH
2730 }
2731 if (needed_sseregs)
2732 {
2733 emit_cmp_and_jump_insns (expand_expr
2734 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2735 GEN_INT ((SSE_REGPARM_MAX -
2736 needed_sseregs + 1) * 16 +
2737 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2738 SImode, 1, lab_false);
ad919812
JH
2739 }
2740
2741 /* Compute index to start of area used for integer regs. */
2742 if (needed_intregs)
2743 {
2744 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2745 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2746 if (r != int_addr_rtx)
2747 emit_move_insn (int_addr_rtx, r);
2748 }
2749 if (needed_sseregs)
2750 {
2751 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2752 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2753 if (r != sse_addr_rtx)
2754 emit_move_insn (sse_addr_rtx, r);
2755 }
2756 if (need_temp)
2757 {
2758 int i;
2759 rtx mem;
2760
b932f770
JH
2761 /* Never use the memory itself, as it has the alias set. */
2762 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2763 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2764 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2765 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2766
ad919812
JH
2767 for (i = 0; i < XVECLEN (container, 0); i++)
2768 {
2769 rtx slot = XVECEXP (container, 0, i);
2770 rtx reg = XEXP (slot, 0);
2771 enum machine_mode mode = GET_MODE (reg);
2772 rtx src_addr;
2773 rtx src_mem;
2774 int src_offset;
2775 rtx dest_mem;
2776
2777 if (SSE_REGNO_P (REGNO (reg)))
2778 {
2779 src_addr = sse_addr_rtx;
2780 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2781 }
2782 else
2783 {
2784 src_addr = int_addr_rtx;
2785 src_offset = REGNO (reg) * 8;
2786 }
2787 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2788 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2789 src_mem = adjust_address (src_mem, mode, src_offset);
2790 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2791 emit_move_insn (dest_mem, src_mem);
2792 }
2793 }
2794
2795 if (needed_intregs)
2796 {
2797 t =
2798 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2799 build_int_2 (needed_intregs * 8, 0));
2800 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2801 TREE_SIDE_EFFECTS (t) = 1;
2802 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2803 }
2804 if (needed_sseregs)
2805 {
2806 t =
2807 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2808 build_int_2 (needed_sseregs * 16, 0));
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2810 TREE_SIDE_EFFECTS (t) = 1;
2811 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2812 }
2813
2814 emit_jump_insn (gen_jump (lab_over));
2815 emit_barrier ();
2816 emit_label (lab_false);
2817 }
2818
2819 /* ... otherwise out of the overflow area. */
2820
2821 /* Care for on-stack alignment if needed. */
2822 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2823 t = ovf;
2824 else
2825 {
2826 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2827 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2828 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2829 }
2830 t = save_expr (t);
2831
2832 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2833 if (r != addr_rtx)
2834 emit_move_insn (addr_rtx, r);
2835
2836 t =
2837 build (PLUS_EXPR, TREE_TYPE (t), t,
2838 build_int_2 (rsize * UNITS_PER_WORD, 0));
2839 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2840 TREE_SIDE_EFFECTS (t) = 1;
2841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2842
2843 if (container)
2844 emit_label (lab_over);
2845
ad919812
JH
2846 return addr_rtx;
2847}
2848\f
c3c637e3
GS
2849/* Return nonzero if OP is either a i387 or SSE fp register. */
2850int
2851any_fp_register_operand (op, mode)
2852 rtx op;
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2854{
2855 return ANY_FP_REG_P (op);
2856}
2857
2858/* Return nonzero if OP is an i387 fp register. */
2859int
2860fp_register_operand (op, mode)
2861 rtx op;
2862 enum machine_mode mode ATTRIBUTE_UNUSED;
2863{
2864 return FP_REG_P (op);
2865}
2866
2867/* Return nonzero if OP is a non-fp register_operand. */
2868int
2869register_and_not_any_fp_reg_operand (op, mode)
2870 rtx op;
2871 enum machine_mode mode;
2872{
2873 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2874}
2875
2876/* Return nonzero of OP is a register operand other than an
2877 i387 fp register. */
2878int
2879register_and_not_fp_reg_operand (op, mode)
2880 rtx op;
2881 enum machine_mode mode;
2882{
2883 return register_operand (op, mode) && !FP_REG_P (op);
2884}
2885
7dd4b4a3
JH
2886/* Return nonzero if OP is general operand representable on x86_64. */
2887
2888int
2889x86_64_general_operand (op, mode)
2890 rtx op;
2891 enum machine_mode mode;
2892{
2893 if (!TARGET_64BIT)
2894 return general_operand (op, mode);
2895 if (nonimmediate_operand (op, mode))
2896 return 1;
75d38379 2897 return x86_64_sign_extended_value (op, 1);
7dd4b4a3
JH
2898}
2899
2900/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2901 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2902
2903int
2904x86_64_szext_general_operand (op, mode)
2905 rtx op;
2906 enum machine_mode mode;
2907{
2908 if (!TARGET_64BIT)
2909 return general_operand (op, mode);
2910 if (nonimmediate_operand (op, mode))
2911 return 1;
75d38379 2912 return x86_64_sign_extended_value (op, 1) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
2913}
2914
2915/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2916
2917int
2918x86_64_nonmemory_operand (op, mode)
2919 rtx op;
2920 enum machine_mode mode;
2921{
2922 if (!TARGET_64BIT)
2923 return nonmemory_operand (op, mode);
2924 if (register_operand (op, mode))
2925 return 1;
75d38379 2926 return x86_64_sign_extended_value (op, 1);
7dd4b4a3
JH
2927}
2928
2929/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2930
2931int
2932x86_64_movabs_operand (op, mode)
2933 rtx op;
2934 enum machine_mode mode;
2935{
2936 if (!TARGET_64BIT || !flag_pic)
2937 return nonmemory_operand (op, mode);
75d38379 2938 if (register_operand (op, mode) || x86_64_sign_extended_value (op, 0))
7dd4b4a3
JH
2939 return 1;
2940 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2941 return 1;
2942 return 0;
2943}
2944
2945/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2946
2947int
2948x86_64_szext_nonmemory_operand (op, mode)
2949 rtx op;
2950 enum machine_mode mode;
2951{
2952 if (!TARGET_64BIT)
2953 return nonmemory_operand (op, mode);
2954 if (register_operand (op, mode))
2955 return 1;
75d38379 2956 return x86_64_sign_extended_value (op, 0) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
2957}
2958
2959/* Return nonzero if OP is immediate operand representable on x86_64. */
2960
2961int
2962x86_64_immediate_operand (op, mode)
2963 rtx op;
2964 enum machine_mode mode;
2965{
2966 if (!TARGET_64BIT)
2967 return immediate_operand (op, mode);
75d38379 2968 return x86_64_sign_extended_value (op, 0);
7dd4b4a3
JH
2969}
2970
2971/* Return nonzero if OP is immediate operand representable on x86_64. */
2972
2973int
2974x86_64_zext_immediate_operand (op, mode)
2975 rtx op;
2976 enum machine_mode mode ATTRIBUTE_UNUSED;
2977{
2978 return x86_64_zero_extended_value (op);
2979}
2980
8bad7136
JL
2981/* Return nonzero if OP is (const_int 1), else return zero. */
2982
2983int
2984const_int_1_operand (op, mode)
2985 rtx op;
2986 enum machine_mode mode ATTRIBUTE_UNUSED;
2987{
2988 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2989}
2990
794a292d
JJ
2991/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2992 for shift & compare patterns, as shifting by 0 does not change flags),
2993 else return zero. */
2994
2995int
2996const_int_1_31_operand (op, mode)
2997 rtx op;
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
2999{
3000 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3001}
3002
e075ae69
RH
3003/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3004 reference and a constant. */
b08de47e
MM
3005
3006int
e075ae69
RH
3007symbolic_operand (op, mode)
3008 register rtx op;
3009 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3010{
e075ae69 3011 switch (GET_CODE (op))
2a2ab3f9 3012 {
e075ae69
RH
3013 case SYMBOL_REF:
3014 case LABEL_REF:
3015 return 1;
3016
3017 case CONST:
3018 op = XEXP (op, 0);
3019 if (GET_CODE (op) == SYMBOL_REF
3020 || GET_CODE (op) == LABEL_REF
3021 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3022 && (XINT (op, 1) == UNSPEC_GOT
3023 || XINT (op, 1) == UNSPEC_GOTOFF
3024 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3025 return 1;
3026 if (GET_CODE (op) != PLUS
3027 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3028 return 0;
3029
3030 op = XEXP (op, 0);
3031 if (GET_CODE (op) == SYMBOL_REF
3032 || GET_CODE (op) == LABEL_REF)
3033 return 1;
3034 /* Only @GOTOFF gets offsets. */
3035 if (GET_CODE (op) != UNSPEC
8ee41eaf 3036 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3037 return 0;
3038
3039 op = XVECEXP (op, 0, 0);
3040 if (GET_CODE (op) == SYMBOL_REF
3041 || GET_CODE (op) == LABEL_REF)
3042 return 1;
3043 return 0;
3044
3045 default:
3046 return 0;
2a2ab3f9
JVA
3047 }
3048}
2a2ab3f9 3049
e075ae69 3050/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3051
e075ae69
RH
3052int
3053pic_symbolic_operand (op, mode)
3054 register rtx op;
3055 enum machine_mode mode ATTRIBUTE_UNUSED;
3056{
6eb791fc
JH
3057 if (GET_CODE (op) != CONST)
3058 return 0;
3059 op = XEXP (op, 0);
3060 if (TARGET_64BIT)
3061 {
3062 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3063 return 1;
3064 }
fce5a9f2 3065 else
2a2ab3f9 3066 {
e075ae69
RH
3067 if (GET_CODE (op) == UNSPEC)
3068 return 1;
3069 if (GET_CODE (op) != PLUS
3070 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3071 return 0;
3072 op = XEXP (op, 0);
3073 if (GET_CODE (op) == UNSPEC)
3074 return 1;
2a2ab3f9 3075 }
e075ae69 3076 return 0;
2a2ab3f9 3077}
2a2ab3f9 3078
623fe810
RH
3079/* Return true if OP is a symbolic operand that resolves locally. */
3080
3081static int
3082local_symbolic_operand (op, mode)
3083 rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085{
623fe810
RH
3086 if (GET_CODE (op) == CONST
3087 && GET_CODE (XEXP (op, 0)) == PLUS
75d38379
JJ
3088 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3089 && (ix86_cmodel != CM_SMALL_PIC
3090 || (INTVAL (XEXP (XEXP (op, 0), 1)) >= -16*1024*1024
3091 && INTVAL (XEXP (XEXP (op, 0), 1)) < 16*1024*1024)))
623fe810
RH
3092 op = XEXP (XEXP (op, 0), 0);
3093
8bfb45f8
JJ
3094 if (GET_CODE (op) == LABEL_REF)
3095 return 1;
3096
623fe810
RH
3097 if (GET_CODE (op) != SYMBOL_REF)
3098 return 0;
3099
3100 /* These we've been told are local by varasm and encode_section_info
3101 respectively. */
3102 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3103 return 1;
3104
3105 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3106 the compiler that assumes it can just stick the results of
623fe810
RH
3107 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3108 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3109 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3110 if (strncmp (XSTR (op, 0), internal_label_prefix,
3111 internal_label_prefix_len) == 0)
3112 return 1;
3113
3114 return 0;
3115}
3116
f996902d
RH
3117/* Test for various thread-local symbols. See ix86_encode_section_info. */
3118
3119int
3120tls_symbolic_operand (op, mode)
3121 register rtx op;
3122 enum machine_mode mode ATTRIBUTE_UNUSED;
3123{
3124 const char *symbol_str;
3125
3126 if (GET_CODE (op) != SYMBOL_REF)
3127 return 0;
3128 symbol_str = XSTR (op, 0);
3129
3130 if (symbol_str[0] != '%')
3131 return 0;
755ac5d4 3132 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3133}
3134
3135static int
3136tls_symbolic_operand_1 (op, kind)
3137 rtx op;
3138 enum tls_model kind;
3139{
3140 const char *symbol_str;
3141
3142 if (GET_CODE (op) != SYMBOL_REF)
3143 return 0;
3144 symbol_str = XSTR (op, 0);
3145
3146 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3147}
3148
3149int
3150global_dynamic_symbolic_operand (op, mode)
3151 register rtx op;
3152 enum machine_mode mode ATTRIBUTE_UNUSED;
3153{
3154 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3155}
3156
3157int
3158local_dynamic_symbolic_operand (op, mode)
3159 register rtx op;
3160 enum machine_mode mode ATTRIBUTE_UNUSED;
3161{
3162 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3163}
3164
3165int
3166initial_exec_symbolic_operand (op, mode)
3167 register rtx op;
3168 enum machine_mode mode ATTRIBUTE_UNUSED;
3169{
3170 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3171}
3172
3173int
3174local_exec_symbolic_operand (op, mode)
3175 register rtx op;
3176 enum machine_mode mode ATTRIBUTE_UNUSED;
3177{
3178 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3179}
3180
28d52ffb
RH
3181/* Test for a valid operand for a call instruction. Don't allow the
3182 arg pointer register or virtual regs since they may decay into
3183 reg + const, which the patterns can't handle. */
2a2ab3f9 3184
e075ae69
RH
3185int
3186call_insn_operand (op, mode)
3187 rtx op;
3188 enum machine_mode mode ATTRIBUTE_UNUSED;
3189{
e075ae69
RH
3190 /* Disallow indirect through a virtual register. This leads to
3191 compiler aborts when trying to eliminate them. */
3192 if (GET_CODE (op) == REG
3193 && (op == arg_pointer_rtx
564d80f4 3194 || op == frame_pointer_rtx
e075ae69
RH
3195 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3196 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3197 return 0;
2a2ab3f9 3198
28d52ffb
RH
3199 /* Disallow `call 1234'. Due to varying assembler lameness this
3200 gets either rejected or translated to `call .+1234'. */
3201 if (GET_CODE (op) == CONST_INT)
3202 return 0;
3203
cbbf65e0
RH
3204 /* Explicitly allow SYMBOL_REF even if pic. */
3205 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3206 return 1;
2a2ab3f9 3207
cbbf65e0
RH
3208 /* Otherwise we can allow any general_operand in the address. */
3209 return general_operand (op, Pmode);
e075ae69 3210}
79325812 3211
e075ae69
RH
3212int
3213constant_call_address_operand (op, mode)
3214 rtx op;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3216{
eaf19aba
JJ
3217 if (GET_CODE (op) == CONST
3218 && GET_CODE (XEXP (op, 0)) == PLUS
3219 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3220 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3221 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3222}
2a2ab3f9 3223
e075ae69 3224/* Match exactly zero and one. */
e9a25f70 3225
0f290768 3226int
e075ae69
RH
3227const0_operand (op, mode)
3228 register rtx op;
3229 enum machine_mode mode;
3230{
3231 return op == CONST0_RTX (mode);
3232}
e9a25f70 3233
0f290768 3234int
e075ae69
RH
3235const1_operand (op, mode)
3236 register rtx op;
3237 enum machine_mode mode ATTRIBUTE_UNUSED;
3238{
3239 return op == const1_rtx;
3240}
2a2ab3f9 3241
e075ae69 3242/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3243
e075ae69
RH
3244int
3245const248_operand (op, mode)
3246 register rtx op;
3247 enum machine_mode mode ATTRIBUTE_UNUSED;
3248{
3249 return (GET_CODE (op) == CONST_INT
3250 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3251}
e9a25f70 3252
e075ae69 3253/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3254
e075ae69
RH
3255int
3256incdec_operand (op, mode)
3257 register rtx op;
0631e0bf 3258 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3259{
f5143c46 3260 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3261 registers, since carry flag is not set. */
3262 if (TARGET_PENTIUM4 && !optimize_size)
3263 return 0;
2b1c08f5 3264 return op == const1_rtx || op == constm1_rtx;
e075ae69 3265}
2a2ab3f9 3266
371bc54b
JH
3267/* Return nonzero if OP is acceptable as operand of DImode shift
3268 expander. */
3269
3270int
3271shiftdi_operand (op, mode)
3272 rtx op;
3273 enum machine_mode mode ATTRIBUTE_UNUSED;
3274{
3275 if (TARGET_64BIT)
3276 return nonimmediate_operand (op, mode);
3277 else
3278 return register_operand (op, mode);
3279}
3280
0f290768 3281/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3282 register eliminable to the stack pointer. Otherwise, this is
3283 a register operand.
2a2ab3f9 3284
e075ae69
RH
3285 This is used to prevent esp from being used as an index reg.
3286 Which would only happen in pathological cases. */
5f1ec3e6 3287
e075ae69
RH
3288int
3289reg_no_sp_operand (op, mode)
3290 register rtx op;
3291 enum machine_mode mode;
3292{
3293 rtx t = op;
3294 if (GET_CODE (t) == SUBREG)
3295 t = SUBREG_REG (t);
564d80f4 3296 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3297 return 0;
2a2ab3f9 3298
e075ae69 3299 return register_operand (op, mode);
2a2ab3f9 3300}
b840bfb0 3301
915119a5
BS
3302int
3303mmx_reg_operand (op, mode)
3304 register rtx op;
bd793c65 3305 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3306{
3307 return MMX_REG_P (op);
3308}
3309
2c5a510c
RH
3310/* Return false if this is any eliminable register. Otherwise
3311 general_operand. */
3312
3313int
3314general_no_elim_operand (op, mode)
3315 register rtx op;
3316 enum machine_mode mode;
3317{
3318 rtx t = op;
3319 if (GET_CODE (t) == SUBREG)
3320 t = SUBREG_REG (t);
3321 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3322 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3323 || t == virtual_stack_dynamic_rtx)
3324 return 0;
1020a5ab
RH
3325 if (REG_P (t)
3326 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3327 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3328 return 0;
2c5a510c
RH
3329
3330 return general_operand (op, mode);
3331}
3332
3333/* Return false if this is any eliminable register. Otherwise
3334 register_operand or const_int. */
3335
3336int
3337nonmemory_no_elim_operand (op, mode)
3338 register rtx op;
3339 enum machine_mode mode;
3340{
3341 rtx t = op;
3342 if (GET_CODE (t) == SUBREG)
3343 t = SUBREG_REG (t);
3344 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3345 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3346 || t == virtual_stack_dynamic_rtx)
3347 return 0;
3348
3349 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3350}
3351
7ec70495
JH
3352/* Return false if this is any eliminable register or stack register,
3353 otherwise work like register_operand. */
3354
3355int
3356index_register_operand (op, mode)
3357 register rtx op;
3358 enum machine_mode mode;
3359{
3360 rtx t = op;
3361 if (GET_CODE (t) == SUBREG)
3362 t = SUBREG_REG (t);
3363 if (!REG_P (t))
3364 return 0;
3365 if (t == arg_pointer_rtx
3366 || t == frame_pointer_rtx
3367 || t == virtual_incoming_args_rtx
3368 || t == virtual_stack_vars_rtx
3369 || t == virtual_stack_dynamic_rtx
3370 || REGNO (t) == STACK_POINTER_REGNUM)
3371 return 0;
3372
3373 return general_operand (op, mode);
3374}
3375
e075ae69 3376/* Return true if op is a Q_REGS class register. */
b840bfb0 3377
e075ae69
RH
3378int
3379q_regs_operand (op, mode)
3380 register rtx op;
3381 enum machine_mode mode;
b840bfb0 3382{
e075ae69
RH
3383 if (mode != VOIDmode && GET_MODE (op) != mode)
3384 return 0;
3385 if (GET_CODE (op) == SUBREG)
3386 op = SUBREG_REG (op);
7799175f 3387 return ANY_QI_REG_P (op);
0f290768 3388}
b840bfb0 3389
e075ae69 3390/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3391
e075ae69
RH
3392int
3393non_q_regs_operand (op, mode)
3394 register rtx op;
3395 enum machine_mode mode;
3396{
3397 if (mode != VOIDmode && GET_MODE (op) != mode)
3398 return 0;
3399 if (GET_CODE (op) == SUBREG)
3400 op = SUBREG_REG (op);
3401 return NON_QI_REG_P (op);
0f290768 3402}
b840bfb0 3403
915119a5
BS
3404/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3405 insns. */
3406int
3407sse_comparison_operator (op, mode)
3408 rtx op;
3409 enum machine_mode mode ATTRIBUTE_UNUSED;
3410{
3411 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3412 switch (code)
3413 {
3414 /* Operations supported directly. */
3415 case EQ:
3416 case LT:
3417 case LE:
3418 case UNORDERED:
3419 case NE:
3420 case UNGE:
3421 case UNGT:
3422 case ORDERED:
3423 return 1;
3424 /* These are equivalent to ones above in non-IEEE comparisons. */
3425 case UNEQ:
3426 case UNLT:
3427 case UNLE:
3428 case LTGT:
3429 case GE:
3430 case GT:
3431 return !TARGET_IEEE_FP;
3432 default:
3433 return 0;
3434 }
915119a5 3435}
9076b9c1 3436/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3437int
9076b9c1
JH
3438ix86_comparison_operator (op, mode)
3439 register rtx op;
3440 enum machine_mode mode;
e075ae69 3441{
9076b9c1 3442 enum machine_mode inmode;
9a915772 3443 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3444 if (mode != VOIDmode && GET_MODE (op) != mode)
3445 return 0;
9a915772
JH
3446 if (GET_RTX_CLASS (code) != '<')
3447 return 0;
3448 inmode = GET_MODE (XEXP (op, 0));
3449
3450 if (inmode == CCFPmode || inmode == CCFPUmode)
3451 {
3452 enum rtx_code second_code, bypass_code;
3453 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3454 return (bypass_code == NIL && second_code == NIL);
3455 }
3456 switch (code)
3a3677ff
RH
3457 {
3458 case EQ: case NE:
3a3677ff 3459 return 1;
9076b9c1 3460 case LT: case GE:
7e08e190 3461 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3462 || inmode == CCGOCmode || inmode == CCNOmode)
3463 return 1;
3464 return 0;
7e08e190 3465 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3466 if (inmode == CCmode)
9076b9c1
JH
3467 return 1;
3468 return 0;
3469 case GT: case LE:
7e08e190 3470 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3471 return 1;
3472 return 0;
3a3677ff
RH
3473 default:
3474 return 0;
3475 }
3476}
3477
9076b9c1 3478/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3479
9076b9c1
JH
3480int
3481fcmov_comparison_operator (op, mode)
3a3677ff
RH
3482 register rtx op;
3483 enum machine_mode mode;
3484{
b62d22a2 3485 enum machine_mode inmode;
9a915772 3486 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3487 if (mode != VOIDmode && GET_MODE (op) != mode)
3488 return 0;
9a915772
JH
3489 if (GET_RTX_CLASS (code) != '<')
3490 return 0;
3491 inmode = GET_MODE (XEXP (op, 0));
3492 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3493 {
9a915772
JH
3494 enum rtx_code second_code, bypass_code;
3495 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3496 if (bypass_code != NIL || second_code != NIL)
3497 return 0;
3498 code = ix86_fp_compare_code_to_integer (code);
3499 }
3500 /* i387 supports just limited amount of conditional codes. */
3501 switch (code)
3502 {
3503 case LTU: case GTU: case LEU: case GEU:
3504 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3505 return 1;
3506 return 0;
9a915772
JH
3507 case ORDERED: case UNORDERED:
3508 case EQ: case NE:
3509 return 1;
3a3677ff
RH
3510 default:
3511 return 0;
3512 }
e075ae69 3513}
b840bfb0 3514
e9e80858
JH
3515/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3516
3517int
3518promotable_binary_operator (op, mode)
3519 register rtx op;
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3521{
3522 switch (GET_CODE (op))
3523 {
3524 case MULT:
3525 /* Modern CPUs have same latency for HImode and SImode multiply,
3526 but 386 and 486 do HImode multiply faster. */
3527 return ix86_cpu > PROCESSOR_I486;
3528 case PLUS:
3529 case AND:
3530 case IOR:
3531 case XOR:
3532 case ASHIFT:
3533 return 1;
3534 default:
3535 return 0;
3536 }
3537}
3538
e075ae69
RH
3539/* Nearly general operand, but accept any const_double, since we wish
3540 to be able to drop them into memory rather than have them get pulled
3541 into registers. */
b840bfb0 3542
2a2ab3f9 3543int
e075ae69
RH
3544cmp_fp_expander_operand (op, mode)
3545 register rtx op;
3546 enum machine_mode mode;
2a2ab3f9 3547{
e075ae69 3548 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3549 return 0;
e075ae69 3550 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3551 return 1;
e075ae69 3552 return general_operand (op, mode);
2a2ab3f9
JVA
3553}
3554
e075ae69 3555/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3556
3557int
e075ae69 3558ext_register_operand (op, mode)
2a2ab3f9 3559 register rtx op;
bb5177ac 3560 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3561{
3522082b 3562 int regno;
0d7d98ee
JH
3563 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3564 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3565 return 0;
3522082b
JH
3566
3567 if (!register_operand (op, VOIDmode))
3568 return 0;
3569
3570 /* Be curefull to accept only registers having upper parts. */
3571 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3572 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3573}
3574
3575/* Return 1 if this is a valid binary floating-point operation.
0f290768 3576 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3577
3578int
3579binary_fp_operator (op, mode)
3580 register rtx op;
3581 enum machine_mode mode;
3582{
3583 if (mode != VOIDmode && mode != GET_MODE (op))
3584 return 0;
3585
2a2ab3f9
JVA
3586 switch (GET_CODE (op))
3587 {
e075ae69
RH
3588 case PLUS:
3589 case MINUS:
3590 case MULT:
3591 case DIV:
3592 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3593
2a2ab3f9
JVA
3594 default:
3595 return 0;
3596 }
3597}
fee2770d 3598
e075ae69 3599int
b531087a 3600mult_operator (op, mode)
e075ae69
RH
3601 register rtx op;
3602 enum machine_mode mode ATTRIBUTE_UNUSED;
3603{
3604 return GET_CODE (op) == MULT;
3605}
3606
3607int
b531087a 3608div_operator (op, mode)
e075ae69
RH
3609 register rtx op;
3610 enum machine_mode mode ATTRIBUTE_UNUSED;
3611{
3612 return GET_CODE (op) == DIV;
3613}
0a726ef1
JL
3614
3615int
e075ae69
RH
3616arith_or_logical_operator (op, mode)
3617 rtx op;
3618 enum machine_mode mode;
0a726ef1 3619{
e075ae69
RH
3620 return ((mode == VOIDmode || GET_MODE (op) == mode)
3621 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3622 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3623}
3624
e075ae69 3625/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3626
3627int
e075ae69
RH
3628memory_displacement_operand (op, mode)
3629 register rtx op;
3630 enum machine_mode mode;
4f2c8ebb 3631{
e075ae69 3632 struct ix86_address parts;
e9a25f70 3633
e075ae69
RH
3634 if (! memory_operand (op, mode))
3635 return 0;
3636
3637 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3638 abort ();
3639
3640 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3641}
3642
16189740 3643/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3644 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3645
3646 ??? It seems likely that this will only work because cmpsi is an
3647 expander, and no actual insns use this. */
4f2c8ebb
RS
3648
3649int
e075ae69
RH
3650cmpsi_operand (op, mode)
3651 rtx op;
3652 enum machine_mode mode;
fee2770d 3653{
b9b2c339 3654 if (nonimmediate_operand (op, mode))
e075ae69
RH
3655 return 1;
3656
3657 if (GET_CODE (op) == AND
3658 && GET_MODE (op) == SImode
3659 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3660 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3661 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3662 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3663 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3664 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3665 return 1;
e9a25f70 3666
fee2770d
RS
3667 return 0;
3668}
d784886d 3669
e075ae69
RH
3670/* Returns 1 if OP is memory operand that can not be represented by the
3671 modRM array. */
d784886d
RK
3672
3673int
e075ae69 3674long_memory_operand (op, mode)
d784886d
RK
3675 register rtx op;
3676 enum machine_mode mode;
3677{
e075ae69 3678 if (! memory_operand (op, mode))
d784886d
RK
3679 return 0;
3680
e075ae69 3681 return memory_address_length (op) != 0;
d784886d 3682}
2247f6ed
JH
3683
3684/* Return nonzero if the rtx is known aligned. */
3685
3686int
3687aligned_operand (op, mode)
3688 rtx op;
3689 enum machine_mode mode;
3690{
3691 struct ix86_address parts;
3692
3693 if (!general_operand (op, mode))
3694 return 0;
3695
0f290768 3696 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3697 if (GET_CODE (op) != MEM)
3698 return 1;
3699
0f290768 3700 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3701 if (MEM_VOLATILE_P (op))
3702 return 0;
3703
3704 op = XEXP (op, 0);
3705
3706 /* Pushes and pops are only valid on the stack pointer. */
3707 if (GET_CODE (op) == PRE_DEC
3708 || GET_CODE (op) == POST_INC)
3709 return 1;
3710
3711 /* Decode the address. */
3712 if (! ix86_decompose_address (op, &parts))
3713 abort ();
3714
1540f9eb
JH
3715 if (parts.base && GET_CODE (parts.base) == SUBREG)
3716 parts.base = SUBREG_REG (parts.base);
3717 if (parts.index && GET_CODE (parts.index) == SUBREG)
3718 parts.index = SUBREG_REG (parts.index);
3719
2247f6ed
JH
3720 /* Look for some component that isn't known to be aligned. */
3721 if (parts.index)
3722 {
3723 if (parts.scale < 4
bdb429a5 3724 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3725 return 0;
3726 }
3727 if (parts.base)
3728 {
bdb429a5 3729 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3730 return 0;
3731 }
3732 if (parts.disp)
3733 {
3734 if (GET_CODE (parts.disp) != CONST_INT
3735 || (INTVAL (parts.disp) & 3) != 0)
3736 return 0;
3737 }
3738
3739 /* Didn't find one -- this must be an aligned address. */
3740 return 1;
3741}
e075ae69
RH
3742\f
3743/* Return true if the constant is something that can be loaded with
3744 a special instruction. Only handle 0.0 and 1.0; others are less
3745 worthwhile. */
57dbca5e
BS
3746
3747int
e075ae69
RH
3748standard_80387_constant_p (x)
3749 rtx x;
57dbca5e 3750{
2b04e52b 3751 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3752 return -1;
2b04e52b
JH
3753 /* Note that on the 80387, other constants, such as pi, that we should support
3754 too. On some machines, these are much slower to load as standard constant,
3755 than to load from doubles in memory. */
3756 if (x == CONST0_RTX (GET_MODE (x)))
3757 return 1;
3758 if (x == CONST1_RTX (GET_MODE (x)))
3759 return 2;
e075ae69 3760 return 0;
57dbca5e
BS
3761}
3762
2b04e52b
JH
3763/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3764 */
3765int
3766standard_sse_constant_p (x)
3767 rtx x;
3768{
0e67d460
JH
3769 if (x == const0_rtx)
3770 return 1;
2b04e52b
JH
3771 return (x == CONST0_RTX (GET_MODE (x)));
3772}
3773
2a2ab3f9
JVA
3774/* Returns 1 if OP contains a symbol reference */
3775
3776int
3777symbolic_reference_mentioned_p (op)
3778 rtx op;
3779{
6f7d635c 3780 register const char *fmt;
2a2ab3f9
JVA
3781 register int i;
3782
3783 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3784 return 1;
3785
3786 fmt = GET_RTX_FORMAT (GET_CODE (op));
3787 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3788 {
3789 if (fmt[i] == 'E')
3790 {
3791 register int j;
3792
3793 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3794 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3795 return 1;
3796 }
e9a25f70 3797
2a2ab3f9
JVA
3798 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3799 return 1;
3800 }
3801
3802 return 0;
3803}
e075ae69
RH
3804
3805/* Return 1 if it is appropriate to emit `ret' instructions in the
3806 body of a function. Do this only if the epilogue is simple, needing a
3807 couple of insns. Prior to reloading, we can't tell how many registers
3808 must be saved, so return 0 then. Return 0 if there is no frame
3809 marker to de-allocate.
3810
3811 If NON_SAVING_SETJMP is defined and true, then it is not possible
3812 for the epilogue to be simple, so return 0. This is a special case
3813 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3814 until final, but jump_optimize may need to know sooner if a
3815 `return' is OK. */
32b5b1aa
SC
3816
3817int
e075ae69 3818ix86_can_use_return_insn_p ()
32b5b1aa 3819{
4dd2ac2c 3820 struct ix86_frame frame;
9a7372d6 3821
e075ae69
RH
3822#ifdef NON_SAVING_SETJMP
3823 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3824 return 0;
3825#endif
9a7372d6
RH
3826
3827 if (! reload_completed || frame_pointer_needed)
3828 return 0;
32b5b1aa 3829
9a7372d6
RH
3830 /* Don't allow more than 32 pop, since that's all we can do
3831 with one instruction. */
3832 if (current_function_pops_args
3833 && current_function_args_size >= 32768)
e075ae69 3834 return 0;
32b5b1aa 3835
4dd2ac2c
JH
3836 ix86_compute_frame_layout (&frame);
3837 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3838}
6189a572
JH
3839\f
3840/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3841int
75d38379 3842x86_64_sign_extended_value (value, allow_rip)
6189a572 3843 rtx value;
75d38379 3844 int allow_rip;
6189a572
JH
3845{
3846 switch (GET_CODE (value))
3847 {
3848 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3849 to be at least 32 and this all acceptable constants are
3850 represented as CONST_INT. */
3851 case CONST_INT:
3852 if (HOST_BITS_PER_WIDE_INT == 32)
3853 return 1;
3854 else
3855 {
3856 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3857 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3858 }
3859 break;
3860
75d38379
JJ
3861 /* For certain code models, the symbolic references are known to fit.
3862 in CM_SMALL_PIC model we know it fits if it is local to the shared
3863 library. Don't count TLS SYMBOL_REFs here, since they should fit
3864 only if inside of UNSPEC handled below. */
6189a572 3865 case SYMBOL_REF:
75d38379
JJ
3866 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
3867 || (allow_rip
3868 && ix86_cmodel == CM_SMALL_PIC
3869 && (CONSTANT_POOL_ADDRESS_P (value)
3870 || SYMBOL_REF_FLAG (value))
3871 && ! tls_symbolic_operand (value, GET_MODE (value))));
6189a572
JH
3872
3873 /* For certain code models, the code is near as well. */
3874 case LABEL_REF:
75d38379
JJ
3875 return ix86_cmodel != CM_LARGE
3876 && (allow_rip || ix86_cmodel != CM_SMALL_PIC);
6189a572
JH
3877
3878 /* We also may accept the offsetted memory references in certain special
3879 cases. */
3880 case CONST:
75d38379
JJ
3881 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
3882 switch (XINT (XEXP (value, 0), 1))
3883 {
3884 case UNSPEC_GOTPCREL:
3885 case UNSPEC_DTPOFF:
3886 case UNSPEC_GOTNTPOFF:
3887 case UNSPEC_NTPOFF:
3888 return 1;
3889 default:
3890 break;
3891 }
3892 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
3893 {
3894 rtx op1 = XEXP (XEXP (value, 0), 0);
3895 rtx op2 = XEXP (XEXP (value, 0), 1);
3896 HOST_WIDE_INT offset;
3897
3898 if (ix86_cmodel == CM_LARGE)
3899 return 0;
3900 if (GET_CODE (op2) != CONST_INT)
3901 return 0;
3902 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3903 switch (GET_CODE (op1))
3904 {
3905 case SYMBOL_REF:
75d38379 3906 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
3907 end of 31bits boundary. We may also accept pretty
3908 large negative constants knowing that all objects are
3909 in the positive half of address space. */
3910 if (ix86_cmodel == CM_SMALL
75d38379 3911 && offset < 16*1024*1024
6189a572
JH
3912 && trunc_int_for_mode (offset, SImode) == offset)
3913 return 1;
3914 /* For CM_KERNEL we know that all object resist in the
3915 negative half of 32bits address space. We may not
3916 accept negative offsets, since they may be just off
d6a7951f 3917 and we may accept pretty large positive ones. */
6189a572
JH
3918 if (ix86_cmodel == CM_KERNEL
3919 && offset > 0
3920 && trunc_int_for_mode (offset, SImode) == offset)
3921 return 1;
75d38379
JJ
3922 /* For CM_SMALL_PIC, we can make similar assumptions
3923 as for CM_SMALL model, if we know the symbol is local
3924 to the shared library. Disallow any TLS symbols,
3925 since they should always be enclosed in an UNSPEC. */
3926 if (ix86_cmodel == CM_SMALL_PIC
3927 && allow_rip
3928 && (CONSTANT_POOL_ADDRESS_P (op1)
3929 || SYMBOL_REF_FLAG (op1))
3930 && ! tls_symbolic_operand (op1, GET_MODE (op1))
3931 && offset < 16*1024*1024
3932 && offset >= -16*1024*1024
3933 && trunc_int_for_mode (offset, SImode) == offset)
3934 return 1;
6189a572
JH
3935 break;
3936 case LABEL_REF:
3937 /* These conditions are similar to SYMBOL_REF ones, just the
3938 constraints for code models differ. */
75d38379
JJ
3939 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
3940 || (ix86_cmodel == CM_SMALL_PIC && allow_rip
3941 && offset >= -16*1024*1024))
3942 && offset < 16*1024*1024
6189a572
JH
3943 && trunc_int_for_mode (offset, SImode) == offset)
3944 return 1;
3945 if (ix86_cmodel == CM_KERNEL
3946 && offset > 0
3947 && trunc_int_for_mode (offset, SImode) == offset)
3948 return 1;
3949 break;
75d38379
JJ
3950 case UNSPEC:
3951 switch (XINT (op1, 1))
3952 {
3953 case UNSPEC_DTPOFF:
3954 case UNSPEC_NTPOFF:
3955 if (offset > 0
3956 && trunc_int_for_mode (offset, SImode) == offset)
3957 return 1;
3958 }
3959 break;
6189a572
JH
3960 default:
3961 return 0;
3962 }
3963 }
3964 return 0;
3965 default:
3966 return 0;
3967 }
3968}
3969
3970/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3971int
3972x86_64_zero_extended_value (value)
3973 rtx value;
3974{
3975 switch (GET_CODE (value))
3976 {
3977 case CONST_DOUBLE:
3978 if (HOST_BITS_PER_WIDE_INT == 32)
3979 return (GET_MODE (value) == VOIDmode
3980 && !CONST_DOUBLE_HIGH (value));
3981 else
3982 return 0;
3983 case CONST_INT:
3984 if (HOST_BITS_PER_WIDE_INT == 32)
3985 return INTVAL (value) >= 0;
3986 else
b531087a 3987 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3988 break;
3989
3990 /* For certain code models, the symbolic references are known to fit. */
3991 case SYMBOL_REF:
3992 return ix86_cmodel == CM_SMALL;
3993
3994 /* For certain code models, the code is near as well. */
3995 case LABEL_REF:
3996 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3997
3998 /* We also may accept the offsetted memory references in certain special
3999 cases. */
4000 case CONST:
4001 if (GET_CODE (XEXP (value, 0)) == PLUS)
4002 {
4003 rtx op1 = XEXP (XEXP (value, 0), 0);
4004 rtx op2 = XEXP (XEXP (value, 0), 1);
4005
4006 if (ix86_cmodel == CM_LARGE)
4007 return 0;
4008 switch (GET_CODE (op1))
4009 {
4010 case SYMBOL_REF:
4011 return 0;
d6a7951f 4012 /* For small code model we may accept pretty large positive
6189a572
JH
4013 offsets, since one bit is available for free. Negative
4014 offsets are limited by the size of NULL pointer area
4015 specified by the ABI. */
4016 if (ix86_cmodel == CM_SMALL
4017 && GET_CODE (op2) == CONST_INT
4018 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4019 && (trunc_int_for_mode (INTVAL (op2), SImode)
4020 == INTVAL (op2)))
4021 return 1;
4022 /* ??? For the kernel, we may accept adjustment of
4023 -0x10000000, since we know that it will just convert
d6a7951f 4024 negative address space to positive, but perhaps this
6189a572
JH
4025 is not worthwhile. */
4026 break;
4027 case LABEL_REF:
4028 /* These conditions are similar to SYMBOL_REF ones, just the
4029 constraints for code models differ. */
4030 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4031 && GET_CODE (op2) == CONST_INT
4032 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4033 && (trunc_int_for_mode (INTVAL (op2), SImode)
4034 == INTVAL (op2)))
4035 return 1;
4036 break;
4037 default:
4038 return 0;
4039 }
4040 }
4041 return 0;
4042 default:
4043 return 0;
4044 }
4045}
6fca22eb
RH
4046
4047/* Value should be nonzero if functions must have frame pointers.
4048 Zero means the frame pointer need not be set up (and parms may
4049 be accessed via the stack pointer) in functions that seem suitable. */
4050
4051int
4052ix86_frame_pointer_required ()
4053{
4054 /* If we accessed previous frames, then the generated code expects
4055 to be able to access the saved ebp value in our frame. */
4056 if (cfun->machine->accesses_prev_frame)
4057 return 1;
a4f31c00 4058
6fca22eb
RH
4059 /* Several x86 os'es need a frame pointer for other reasons,
4060 usually pertaining to setjmp. */
4061 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4062 return 1;
4063
4064 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4065 the frame pointer by default. Turn it back on now if we've not
4066 got a leaf function. */
a7943381 4067 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4068 && (!current_function_is_leaf))
4069 return 1;
4070
4071 if (current_function_profile)
6fca22eb
RH
4072 return 1;
4073
4074 return 0;
4075}
4076
4077/* Record that the current function accesses previous call frames. */
4078
4079void
4080ix86_setup_frame_addresses ()
4081{
4082 cfun->machine->accesses_prev_frame = 1;
4083}
e075ae69 4084\f
145aacc2
RH
4085#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4086# define USE_HIDDEN_LINKONCE 1
4087#else
4088# define USE_HIDDEN_LINKONCE 0
4089#endif
4090
bd09bdeb 4091static int pic_labels_used;
e9a25f70 4092
145aacc2
RH
4093/* Fills in the label name that should be used for a pc thunk for
4094 the given register. */
4095
4096static void
4097get_pc_thunk_name (name, regno)
4098 char name[32];
4099 unsigned int regno;
4100{
4101 if (USE_HIDDEN_LINKONCE)
4102 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4103 else
4104 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4105}
4106
4107
e075ae69
RH
4108/* This function generates code for -fpic that loads %ebx with
4109 the return address of the caller and then returns. */
4110
4111void
4cf12e7e 4112ix86_asm_file_end (file)
e075ae69 4113 FILE *file;
e075ae69
RH
4114{
4115 rtx xops[2];
bd09bdeb 4116 int regno;
32b5b1aa 4117
bd09bdeb 4118 for (regno = 0; regno < 8; ++regno)
7c262518 4119 {
145aacc2
RH
4120 char name[32];
4121
bd09bdeb
RH
4122 if (! ((pic_labels_used >> regno) & 1))
4123 continue;
4124
145aacc2 4125 get_pc_thunk_name (name, regno);
bd09bdeb 4126
145aacc2
RH
4127 if (USE_HIDDEN_LINKONCE)
4128 {
4129 tree decl;
4130
4131 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4132 error_mark_node);
4133 TREE_PUBLIC (decl) = 1;
4134 TREE_STATIC (decl) = 1;
4135 DECL_ONE_ONLY (decl) = 1;
4136
4137 (*targetm.asm_out.unique_section) (decl, 0);
4138 named_section (decl, NULL, 0);
4139
5eb99654 4140 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4141 fputs ("\t.hidden\t", file);
4142 assemble_name (file, name);
4143 fputc ('\n', file);
4144 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4145 }
4146 else
4147 {
4148 text_section ();
4149 ASM_OUTPUT_LABEL (file, name);
4150 }
bd09bdeb
RH
4151
4152 xops[0] = gen_rtx_REG (SImode, regno);
4153 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4154 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4155 output_asm_insn ("ret", xops);
7c262518 4156 }
32b5b1aa 4157}
32b5b1aa 4158
c8c03509 4159/* Emit code for the SET_GOT patterns. */
32b5b1aa 4160
c8c03509
RH
4161const char *
4162output_set_got (dest)
4163 rtx dest;
4164{
4165 rtx xops[3];
0d7d98ee 4166
c8c03509 4167 xops[0] = dest;
5fc0e5df 4168 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4169
c8c03509 4170 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4171 {
c8c03509
RH
4172 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4173
4174 if (!flag_pic)
4175 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4176 else
4177 output_asm_insn ("call\t%a2", xops);
4178
b069de3b
SS
4179#if TARGET_MACHO
4180 /* Output the "canonical" label name ("Lxx$pb") here too. This
4181 is what will be referred to by the Mach-O PIC subsystem. */
4182 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4183#endif
c8c03509
RH
4184 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4185 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4186
4187 if (flag_pic)
4188 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4189 }
e075ae69 4190 else
e5cb57e8 4191 {
145aacc2
RH
4192 char name[32];
4193 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4194 pic_labels_used |= 1 << REGNO (dest);
f996902d 4195
145aacc2 4196 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4197 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4198 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4199 }
e5cb57e8 4200
c8c03509
RH
4201 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4202 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4203 else if (!TARGET_MACHO)
8e9fadc3 4204 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4205
c8c03509 4206 return "";
e9a25f70 4207}
8dfe5673 4208
0d7d98ee 4209/* Generate an "push" pattern for input ARG. */
e9a25f70 4210
e075ae69
RH
4211static rtx
4212gen_push (arg)
4213 rtx arg;
e9a25f70 4214{
c5c76735 4215 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4216 gen_rtx_MEM (Pmode,
4217 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4218 stack_pointer_rtx)),
4219 arg);
e9a25f70
JL
4220}
4221
bd09bdeb
RH
4222/* Return >= 0 if there is an unused call-clobbered register available
4223 for the entire function. */
4224
4225static unsigned int
4226ix86_select_alt_pic_regnum ()
4227{
4228 if (current_function_is_leaf && !current_function_profile)
4229 {
4230 int i;
4231 for (i = 2; i >= 0; --i)
4232 if (!regs_ever_live[i])
4233 return i;
4234 }
4235
4236 return INVALID_REGNUM;
4237}
fce5a9f2 4238
4dd2ac2c
JH
4239/* Return 1 if we need to save REGNO. */
4240static int
1020a5ab 4241ix86_save_reg (regno, maybe_eh_return)
9b690711 4242 unsigned int regno;
37a58036 4243 int maybe_eh_return;
1020a5ab 4244{
bd09bdeb
RH
4245 if (pic_offset_table_rtx
4246 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4247 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4248 || current_function_profile
1020a5ab 4249 || current_function_calls_eh_return))
bd09bdeb
RH
4250 {
4251 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4252 return 0;
4253 return 1;
4254 }
1020a5ab
RH
4255
4256 if (current_function_calls_eh_return && maybe_eh_return)
4257 {
4258 unsigned i;
4259 for (i = 0; ; i++)
4260 {
b531087a 4261 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4262 if (test == INVALID_REGNUM)
4263 break;
9b690711 4264 if (test == regno)
1020a5ab
RH
4265 return 1;
4266 }
4267 }
4dd2ac2c 4268
1020a5ab
RH
4269 return (regs_ever_live[regno]
4270 && !call_used_regs[regno]
4271 && !fixed_regs[regno]
4272 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4273}
4274
0903fcab
JH
4275/* Return number of registers to be saved on the stack. */
4276
4277static int
4278ix86_nsaved_regs ()
4279{
4280 int nregs = 0;
0903fcab
JH
4281 int regno;
4282
4dd2ac2c 4283 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4284 if (ix86_save_reg (regno, true))
4dd2ac2c 4285 nregs++;
0903fcab
JH
4286 return nregs;
4287}
4288
4289/* Return the offset between two registers, one to be eliminated, and the other
4290 its replacement, at the start of a routine. */
4291
4292HOST_WIDE_INT
4293ix86_initial_elimination_offset (from, to)
4294 int from;
4295 int to;
4296{
4dd2ac2c
JH
4297 struct ix86_frame frame;
4298 ix86_compute_frame_layout (&frame);
564d80f4
JH
4299
4300 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4301 return frame.hard_frame_pointer_offset;
564d80f4
JH
4302 else if (from == FRAME_POINTER_REGNUM
4303 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4304 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4305 else
4306 {
564d80f4
JH
4307 if (to != STACK_POINTER_REGNUM)
4308 abort ();
4309 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4310 return frame.stack_pointer_offset;
564d80f4
JH
4311 else if (from != FRAME_POINTER_REGNUM)
4312 abort ();
0903fcab 4313 else
4dd2ac2c 4314 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4315 }
4316}
4317
4dd2ac2c 4318/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4319
4dd2ac2c
JH
4320static void
4321ix86_compute_frame_layout (frame)
4322 struct ix86_frame *frame;
65954bd8 4323{
65954bd8 4324 HOST_WIDE_INT total_size;
564d80f4 4325 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4326 int offset;
4327 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4328 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4329
4dd2ac2c 4330 frame->nregs = ix86_nsaved_regs ();
564d80f4 4331 total_size = size;
65954bd8 4332
9ba81eaa 4333 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4334 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4335
4336 frame->hard_frame_pointer_offset = offset;
564d80f4 4337
fcbfaa65
RK
4338 /* Do some sanity checking of stack_alignment_needed and
4339 preferred_alignment, since i386 port is the only using those features
f710504c 4340 that may break easily. */
564d80f4 4341
44affdae
JH
4342 if (size && !stack_alignment_needed)
4343 abort ();
44affdae
JH
4344 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4345 abort ();
4346 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4347 abort ();
4348 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4349 abort ();
564d80f4 4350
4dd2ac2c
JH
4351 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4352 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4353
4dd2ac2c
JH
4354 /* Register save area */
4355 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4356
8362f420
JH
4357 /* Va-arg area */
4358 if (ix86_save_varrargs_registers)
4359 {
4360 offset += X86_64_VARARGS_SIZE;
4361 frame->va_arg_size = X86_64_VARARGS_SIZE;
4362 }
4363 else
4364 frame->va_arg_size = 0;
4365
4dd2ac2c
JH
4366 /* Align start of frame for local function. */
4367 frame->padding1 = ((offset + stack_alignment_needed - 1)
4368 & -stack_alignment_needed) - offset;
f73ad30e 4369
4dd2ac2c 4370 offset += frame->padding1;
65954bd8 4371
4dd2ac2c
JH
4372 /* Frame pointer points here. */
4373 frame->frame_pointer_offset = offset;
54ff41b7 4374
4dd2ac2c 4375 offset += size;
65954bd8 4376
0b7ae565
RH
4377 /* Add outgoing arguments area. Can be skipped if we eliminated
4378 all the function calls as dead code. */
4379 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4380 {
4381 offset += current_function_outgoing_args_size;
4382 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4383 }
4384 else
4385 frame->outgoing_arguments_size = 0;
564d80f4 4386
002ff5bc
RH
4387 /* Align stack boundary. Only needed if we're calling another function
4388 or using alloca. */
4389 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4390 frame->padding2 = ((offset + preferred_alignment - 1)
4391 & -preferred_alignment) - offset;
4392 else
4393 frame->padding2 = 0;
4dd2ac2c
JH
4394
4395 offset += frame->padding2;
4396
4397 /* We've reached end of stack frame. */
4398 frame->stack_pointer_offset = offset;
4399
4400 /* Size prologue needs to allocate. */
4401 frame->to_allocate =
4402 (size + frame->padding1 + frame->padding2
8362f420 4403 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4404
8362f420
JH
4405 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4406 && current_function_is_leaf)
4407 {
4408 frame->red_zone_size = frame->to_allocate;
4409 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4410 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4411 }
4412 else
4413 frame->red_zone_size = 0;
4414 frame->to_allocate -= frame->red_zone_size;
4415 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4416#if 0
4417 fprintf (stderr, "nregs: %i\n", frame->nregs);
4418 fprintf (stderr, "size: %i\n", size);
4419 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4420 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4421 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4422 fprintf (stderr, "padding2: %i\n", frame->padding2);
4423 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4424 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4425 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4426 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4427 frame->hard_frame_pointer_offset);
4428 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4429#endif
65954bd8
JL
4430}
4431
0903fcab
JH
4432/* Emit code to save registers in the prologue. */
4433
4434static void
4435ix86_emit_save_regs ()
4436{
4437 register int regno;
0903fcab 4438 rtx insn;
0903fcab 4439
4dd2ac2c 4440 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4441 if (ix86_save_reg (regno, true))
0903fcab 4442 {
0d7d98ee 4443 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4444 RTX_FRAME_RELATED_P (insn) = 1;
4445 }
4446}
4447
c6036a37
JH
4448/* Emit code to save registers using MOV insns. First register
4449 is restored from POINTER + OFFSET. */
4450static void
4451ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4452 rtx pointer;
4453 HOST_WIDE_INT offset;
c6036a37
JH
4454{
4455 int regno;
4456 rtx insn;
4457
4458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4459 if (ix86_save_reg (regno, true))
4460 {
b72f00af
RK
4461 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4462 Pmode, offset),
c6036a37
JH
4463 gen_rtx_REG (Pmode, regno));
4464 RTX_FRAME_RELATED_P (insn) = 1;
4465 offset += UNITS_PER_WORD;
4466 }
4467}
4468
0f290768 4469/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4470
4471void
4472ix86_expand_prologue ()
2a2ab3f9 4473{
564d80f4 4474 rtx insn;
bd09bdeb 4475 bool pic_reg_used;
4dd2ac2c 4476 struct ix86_frame frame;
6ab16dd9 4477 int use_mov = 0;
c6036a37 4478 HOST_WIDE_INT allocate;
4dd2ac2c 4479
2ab0437e 4480 if (!optimize_size)
6ab16dd9
JH
4481 {
4482 use_fast_prologue_epilogue
4483 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4484 if (TARGET_PROLOGUE_USING_MOVE)
4485 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4486 }
4dd2ac2c 4487 ix86_compute_frame_layout (&frame);
79325812 4488
e075ae69
RH
4489 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4490 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4491
2a2ab3f9
JVA
4492 if (frame_pointer_needed)
4493 {
564d80f4 4494 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4495 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4496
564d80f4 4497 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4498 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4499 }
4500
c6036a37
JH
4501 allocate = frame.to_allocate;
4502 /* In case we are dealing only with single register and empty frame,
4503 push is equivalent of the mov+add sequence. */
4504 if (allocate == 0 && frame.nregs <= 1)
4505 use_mov = 0;
4506
4507 if (!use_mov)
4508 ix86_emit_save_regs ();
4509 else
4510 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4511
c6036a37 4512 if (allocate == 0)
8dfe5673 4513 ;
e323735c 4514 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4515 {
f2042df3
RH
4516 insn = emit_insn (gen_pro_epilogue_adjust_stack
4517 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4518 GEN_INT (-allocate)));
e075ae69 4519 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4520 }
79325812 4521 else
8dfe5673 4522 {
e075ae69 4523 /* ??? Is this only valid for Win32? */
e9a25f70 4524
e075ae69 4525 rtx arg0, sym;
e9a25f70 4526
8362f420 4527 if (TARGET_64BIT)
b531087a 4528 abort ();
8362f420 4529
e075ae69 4530 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4531 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4532
e075ae69
RH
4533 sym = gen_rtx_MEM (FUNCTION_MODE,
4534 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4535 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4536
4537 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4538 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4539 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4540 }
c6036a37
JH
4541 if (use_mov)
4542 {
4543 if (!frame_pointer_needed || !frame.to_allocate)
4544 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4545 else
4546 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4547 -frame.nregs * UNITS_PER_WORD);
4548 }
e9a25f70 4549
84530511
SC
4550#ifdef SUBTARGET_PROLOGUE
4551 SUBTARGET_PROLOGUE;
0f290768 4552#endif
84530511 4553
bd09bdeb
RH
4554 pic_reg_used = false;
4555 if (pic_offset_table_rtx
4556 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4557 || current_function_profile))
4558 {
4559 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4560
4561 if (alt_pic_reg_used != INVALID_REGNUM)
4562 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4563
4564 pic_reg_used = true;
4565 }
4566
e9a25f70 4567 if (pic_reg_used)
c8c03509
RH
4568 {
4569 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4570
66edd3b4
RH
4571 /* Even with accurate pre-reload life analysis, we can wind up
4572 deleting all references to the pic register after reload.
4573 Consider if cross-jumping unifies two sides of a branch
4574 controled by a comparison vs the only read from a global.
4575 In which case, allow the set_got to be deleted, though we're
4576 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4577 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4578 }
77a989d1 4579
66edd3b4
RH
4580 /* Prevent function calls from be scheduled before the call to mcount.
4581 In the pic_reg_used case, make sure that the got load isn't deleted. */
4582 if (current_function_profile)
4583 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4584}
4585
da2d1d3a
JH
4586/* Emit code to restore saved registers using MOV insns. First register
4587 is restored from POINTER + OFFSET. */
4588static void
1020a5ab
RH
4589ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4590 rtx pointer;
4591 int offset;
37a58036 4592 int maybe_eh_return;
da2d1d3a
JH
4593{
4594 int regno;
da2d1d3a 4595
4dd2ac2c 4596 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4597 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4598 {
4dd2ac2c 4599 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4600 adjust_address (gen_rtx_MEM (Pmode, pointer),
4601 Pmode, offset));
4dd2ac2c 4602 offset += UNITS_PER_WORD;
da2d1d3a
JH
4603 }
4604}
4605
0f290768 4606/* Restore function stack, frame, and registers. */
e9a25f70 4607
2a2ab3f9 4608void
1020a5ab
RH
4609ix86_expand_epilogue (style)
4610 int style;
2a2ab3f9 4611{
1c71e60e 4612 int regno;
fdb8a883 4613 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4614 struct ix86_frame frame;
65954bd8 4615 HOST_WIDE_INT offset;
4dd2ac2c
JH
4616
4617 ix86_compute_frame_layout (&frame);
2a2ab3f9 4618
a4f31c00 4619 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4620 must be taken for the normal return case of a function using
4621 eh_return: the eax and edx registers are marked as saved, but not
4622 restored along this path. */
4623 offset = frame.nregs;
4624 if (current_function_calls_eh_return && style != 2)
4625 offset -= 2;
4626 offset *= -UNITS_PER_WORD;
2a2ab3f9 4627
fdb8a883
JW
4628 /* If we're only restoring one register and sp is not valid then
4629 using a move instruction to restore the register since it's
0f290768 4630 less work than reloading sp and popping the register.
da2d1d3a
JH
4631
4632 The default code result in stack adjustment using add/lea instruction,
4633 while this code results in LEAVE instruction (or discrete equivalent),
4634 so it is profitable in some other cases as well. Especially when there
4635 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4636 and there is exactly one register to pop. This heruistic may need some
4637 tuning in future. */
4dd2ac2c 4638 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4639 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4640 && use_fast_prologue_epilogue
c6036a37 4641 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4642 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4643 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4644 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4645 || current_function_calls_eh_return)
2a2ab3f9 4646 {
da2d1d3a
JH
4647 /* Restore registers. We can use ebp or esp to address the memory
4648 locations. If both are available, default to ebp, since offsets
4649 are known to be small. Only exception is esp pointing directly to the
4650 end of block of saved registers, where we may simplify addressing
4651 mode. */
4652
4dd2ac2c 4653 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4654 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4655 frame.to_allocate, style == 2);
da2d1d3a 4656 else
1020a5ab
RH
4657 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4658 offset, style == 2);
4659
4660 /* eh_return epilogues need %ecx added to the stack pointer. */
4661 if (style == 2)
4662 {
4663 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4664
1020a5ab
RH
4665 if (frame_pointer_needed)
4666 {
4667 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4668 tmp = plus_constant (tmp, UNITS_PER_WORD);
4669 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4670
4671 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4672 emit_move_insn (hard_frame_pointer_rtx, tmp);
4673
4674 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4675 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4676 }
4677 else
4678 {
4679 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4680 tmp = plus_constant (tmp, (frame.to_allocate
4681 + frame.nregs * UNITS_PER_WORD));
4682 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4683 }
4684 }
4685 else if (!frame_pointer_needed)
f2042df3
RH
4686 emit_insn (gen_pro_epilogue_adjust_stack
4687 (stack_pointer_rtx, stack_pointer_rtx,
4688 GEN_INT (frame.to_allocate
4689 + frame.nregs * UNITS_PER_WORD)));
0f290768 4690 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4691 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4692 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4693 else
2a2ab3f9 4694 {
1c71e60e
JH
4695 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4696 hard_frame_pointer_rtx,
f2042df3 4697 const0_rtx));
8362f420
JH
4698 if (TARGET_64BIT)
4699 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4700 else
4701 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4702 }
4703 }
1c71e60e 4704 else
68f654ec 4705 {
1c71e60e
JH
4706 /* First step is to deallocate the stack frame so that we can
4707 pop the registers. */
4708 if (!sp_valid)
4709 {
4710 if (!frame_pointer_needed)
4711 abort ();
4712 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4713 hard_frame_pointer_rtx,
f2042df3 4714 GEN_INT (offset)));
1c71e60e 4715 }
4dd2ac2c 4716 else if (frame.to_allocate)
f2042df3
RH
4717 emit_insn (gen_pro_epilogue_adjust_stack
4718 (stack_pointer_rtx, stack_pointer_rtx,
4719 GEN_INT (frame.to_allocate)));
1c71e60e 4720
4dd2ac2c 4721 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4722 if (ix86_save_reg (regno, false))
8362f420
JH
4723 {
4724 if (TARGET_64BIT)
4725 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4726 else
4727 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4728 }
4dd2ac2c 4729 if (frame_pointer_needed)
8362f420 4730 {
f5143c46 4731 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4732 able to grok it fast. */
4733 if (TARGET_USE_LEAVE)
4734 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4735 else if (TARGET_64BIT)
8362f420
JH
4736 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4737 else
4738 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4739 }
68f654ec 4740 }
68f654ec 4741
cbbf65e0 4742 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4743 if (style == 0)
cbbf65e0
RH
4744 return;
4745
2a2ab3f9
JVA
4746 if (current_function_pops_args && current_function_args_size)
4747 {
e075ae69 4748 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4749
b8c752c8
UD
4750 /* i386 can only pop 64K bytes. If asked to pop more, pop
4751 return address, do explicit add, and jump indirectly to the
0f290768 4752 caller. */
2a2ab3f9 4753
b8c752c8 4754 if (current_function_pops_args >= 65536)
2a2ab3f9 4755 {
e075ae69 4756 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4757
8362f420
JH
4758 /* There are is no "pascal" calling convention in 64bit ABI. */
4759 if (TARGET_64BIT)
b531087a 4760 abort ();
8362f420 4761
e075ae69
RH
4762 emit_insn (gen_popsi1 (ecx));
4763 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4764 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4765 }
79325812 4766 else
e075ae69
RH
4767 emit_jump_insn (gen_return_pop_internal (popc));
4768 }
4769 else
4770 emit_jump_insn (gen_return_internal ());
4771}
bd09bdeb
RH
4772
4773/* Reset from the function's potential modifications. */
4774
4775static void
4776ix86_output_function_epilogue (file, size)
4777 FILE *file ATTRIBUTE_UNUSED;
4778 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4779{
4780 if (pic_offset_table_rtx)
4781 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4782}
e075ae69
RH
4783\f
4784/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4785 for an instruction. Return 0 if the structure of the address is
4786 grossly off. Return -1 if the address contains ASHIFT, so it is not
4787 strictly valid, but still used for computing length of lea instruction.
4788 */
e075ae69
RH
4789
4790static int
4791ix86_decompose_address (addr, out)
4792 register rtx addr;
4793 struct ix86_address *out;
4794{
4795 rtx base = NULL_RTX;
4796 rtx index = NULL_RTX;
4797 rtx disp = NULL_RTX;
4798 HOST_WIDE_INT scale = 1;
4799 rtx scale_rtx = NULL_RTX;
b446e5a2 4800 int retval = 1;
e075ae69 4801
1540f9eb 4802 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4803 base = addr;
4804 else if (GET_CODE (addr) == PLUS)
4805 {
4806 rtx op0 = XEXP (addr, 0);
4807 rtx op1 = XEXP (addr, 1);
4808 enum rtx_code code0 = GET_CODE (op0);
4809 enum rtx_code code1 = GET_CODE (op1);
4810
4811 if (code0 == REG || code0 == SUBREG)
4812 {
4813 if (code1 == REG || code1 == SUBREG)
4814 index = op0, base = op1; /* index + base */
4815 else
4816 base = op0, disp = op1; /* base + displacement */
4817 }
4818 else if (code0 == MULT)
e9a25f70 4819 {
e075ae69
RH
4820 index = XEXP (op0, 0);
4821 scale_rtx = XEXP (op0, 1);
4822 if (code1 == REG || code1 == SUBREG)
4823 base = op1; /* index*scale + base */
e9a25f70 4824 else
e075ae69
RH
4825 disp = op1; /* index*scale + disp */
4826 }
4827 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4828 {
4829 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4830 scale_rtx = XEXP (XEXP (op0, 0), 1);
4831 base = XEXP (op0, 1);
4832 disp = op1;
2a2ab3f9 4833 }
e075ae69
RH
4834 else if (code0 == PLUS)
4835 {
4836 index = XEXP (op0, 0); /* index + base + disp */
4837 base = XEXP (op0, 1);
4838 disp = op1;
4839 }
4840 else
b446e5a2 4841 return 0;
e075ae69
RH
4842 }
4843 else if (GET_CODE (addr) == MULT)
4844 {
4845 index = XEXP (addr, 0); /* index*scale */
4846 scale_rtx = XEXP (addr, 1);
4847 }
4848 else if (GET_CODE (addr) == ASHIFT)
4849 {
4850 rtx tmp;
4851
4852 /* We're called for lea too, which implements ashift on occasion. */
4853 index = XEXP (addr, 0);
4854 tmp = XEXP (addr, 1);
4855 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4856 return 0;
e075ae69
RH
4857 scale = INTVAL (tmp);
4858 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4859 return 0;
e075ae69 4860 scale = 1 << scale;
b446e5a2 4861 retval = -1;
2a2ab3f9 4862 }
2a2ab3f9 4863 else
e075ae69
RH
4864 disp = addr; /* displacement */
4865
4866 /* Extract the integral value of scale. */
4867 if (scale_rtx)
e9a25f70 4868 {
e075ae69 4869 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4870 return 0;
e075ae69 4871 scale = INTVAL (scale_rtx);
e9a25f70 4872 }
3b3c6a3f 4873
e075ae69
RH
4874 /* Allow arg pointer and stack pointer as index if there is not scaling */
4875 if (base && index && scale == 1
564d80f4
JH
4876 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4877 || index == stack_pointer_rtx))
e075ae69
RH
4878 {
4879 rtx tmp = base;
4880 base = index;
4881 index = tmp;
4882 }
4883
4884 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4885 if ((base == hard_frame_pointer_rtx
4886 || base == frame_pointer_rtx
4887 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4888 disp = const0_rtx;
4889
4890 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4891 Avoid this by transforming to [%esi+0]. */
4892 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4893 && base && !index && !disp
329e1d01 4894 && REG_P (base)
e075ae69
RH
4895 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4896 disp = const0_rtx;
4897
4898 /* Special case: encode reg+reg instead of reg*2. */
4899 if (!base && index && scale && scale == 2)
4900 base = index, scale = 1;
0f290768 4901
e075ae69
RH
4902 /* Special case: scaling cannot be encoded without base or displacement. */
4903 if (!base && !disp && index && scale != 1)
4904 disp = const0_rtx;
4905
4906 out->base = base;
4907 out->index = index;
4908 out->disp = disp;
4909 out->scale = scale;
3b3c6a3f 4910
b446e5a2 4911 return retval;
e075ae69 4912}
01329426
JH
4913\f
4914/* Return cost of the memory address x.
4915 For i386, it is better to use a complex address than let gcc copy
4916 the address into a reg and make a new pseudo. But not if the address
4917 requires to two regs - that would mean more pseudos with longer
4918 lifetimes. */
4919int
4920ix86_address_cost (x)
4921 rtx x;
4922{
4923 struct ix86_address parts;
4924 int cost = 1;
3b3c6a3f 4925
01329426
JH
4926 if (!ix86_decompose_address (x, &parts))
4927 abort ();
4928
1540f9eb
JH
4929 if (parts.base && GET_CODE (parts.base) == SUBREG)
4930 parts.base = SUBREG_REG (parts.base);
4931 if (parts.index && GET_CODE (parts.index) == SUBREG)
4932 parts.index = SUBREG_REG (parts.index);
4933
01329426
JH
4934 /* More complex memory references are better. */
4935 if (parts.disp && parts.disp != const0_rtx)
4936 cost--;
4937
4938 /* Attempt to minimize number of registers in the address. */
4939 if ((parts.base
4940 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4941 || (parts.index
4942 && (!REG_P (parts.index)
4943 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4944 cost++;
4945
4946 if (parts.base
4947 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4948 && parts.index
4949 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4950 && parts.base != parts.index)
4951 cost++;
4952
4953 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4954 since it's predecode logic can't detect the length of instructions
4955 and it degenerates to vector decoded. Increase cost of such
4956 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4957 to split such addresses or even refuse such addresses at all.
01329426
JH
4958
4959 Following addressing modes are affected:
4960 [base+scale*index]
4961 [scale*index+disp]
4962 [base+index]
0f290768 4963
01329426
JH
4964 The first and last case may be avoidable by explicitly coding the zero in
4965 memory address, but I don't have AMD-K6 machine handy to check this
4966 theory. */
4967
4968 if (TARGET_K6
4969 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4970 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4971 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4972 cost += 10;
0f290768 4973
01329426
JH
4974 return cost;
4975}
4976\f
b949ea8b
JW
4977/* If X is a machine specific address (i.e. a symbol or label being
4978 referenced as a displacement from the GOT implemented using an
4979 UNSPEC), then return the base term. Otherwise return X. */
4980
4981rtx
4982ix86_find_base_term (x)
4983 rtx x;
4984{
4985 rtx term;
4986
6eb791fc
JH
4987 if (TARGET_64BIT)
4988 {
4989 if (GET_CODE (x) != CONST)
4990 return x;
4991 term = XEXP (x, 0);
4992 if (GET_CODE (term) == PLUS
4993 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4994 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4995 term = XEXP (term, 0);
4996 if (GET_CODE (term) != UNSPEC
8ee41eaf 4997 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4998 return x;
4999
5000 term = XVECEXP (term, 0, 0);
5001
5002 if (GET_CODE (term) != SYMBOL_REF
5003 && GET_CODE (term) != LABEL_REF)
5004 return x;
5005
5006 return term;
5007 }
5008
b949ea8b
JW
5009 if (GET_CODE (x) != PLUS
5010 || XEXP (x, 0) != pic_offset_table_rtx
5011 || GET_CODE (XEXP (x, 1)) != CONST)
5012 return x;
5013
5014 term = XEXP (XEXP (x, 1), 0);
5015
5016 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5017 term = XEXP (term, 0);
5018
5019 if (GET_CODE (term) != UNSPEC
8ee41eaf 5020 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
5021 return x;
5022
5023 term = XVECEXP (term, 0, 0);
5024
5025 if (GET_CODE (term) != SYMBOL_REF
5026 && GET_CODE (term) != LABEL_REF)
5027 return x;
5028
5029 return term;
5030}
5031\f
f996902d
RH
5032/* Determine if a given RTX is a valid constant. We already know this
5033 satisfies CONSTANT_P. */
5034
5035bool
5036legitimate_constant_p (x)
5037 rtx x;
5038{
5039 rtx inner;
5040
5041 switch (GET_CODE (x))
5042 {
5043 case SYMBOL_REF:
5044 /* TLS symbols are not constant. */
5045 if (tls_symbolic_operand (x, Pmode))
5046 return false;
5047 break;
5048
5049 case CONST:
5050 inner = XEXP (x, 0);
5051
5052 /* Offsets of TLS symbols are never valid.
5053 Discourage CSE from creating them. */
5054 if (GET_CODE (inner) == PLUS
5055 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5056 return false;
5057
5058 /* Only some unspecs are valid as "constants". */
5059 if (GET_CODE (inner) == UNSPEC)
5060 switch (XINT (inner, 1))
5061 {
5062 case UNSPEC_TPOFF:
5063 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5064 default:
5065 return false;
5066 }
5067 break;
5068
5069 default:
5070 break;
5071 }
5072
5073 /* Otherwise we handle everything else in the move patterns. */
5074 return true;
5075}
5076
5077/* Determine if a given RTX is a valid constant address. */
5078
5079bool
5080constant_address_p (x)
5081 rtx x;
5082{
5083 switch (GET_CODE (x))
5084 {
5085 case LABEL_REF:
5086 case CONST_INT:
5087 return true;
5088
5089 case CONST_DOUBLE:
5090 return TARGET_64BIT;
5091
5092 case CONST:
b069de3b
SS
5093 /* For Mach-O, really believe the CONST. */
5094 if (TARGET_MACHO)
5095 return true;
5096 /* Otherwise fall through. */
f996902d
RH
5097 case SYMBOL_REF:
5098 return !flag_pic && legitimate_constant_p (x);
5099
5100 default:
5101 return false;
5102 }
5103}
5104
5105/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5106 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5107 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5108
5109bool
5110legitimate_pic_operand_p (x)
5111 rtx x;
5112{
5113 rtx inner;
5114
5115 switch (GET_CODE (x))
5116 {
5117 case CONST:
5118 inner = XEXP (x, 0);
5119
5120 /* Only some unspecs are valid as "constants". */
5121 if (GET_CODE (inner) == UNSPEC)
5122 switch (XINT (inner, 1))
5123 {
5124 case UNSPEC_TPOFF:
5125 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5126 default:
5127 return false;
5128 }
5129 /* FALLTHRU */
5130
5131 case SYMBOL_REF:
5132 case LABEL_REF:
5133 return legitimate_pic_address_disp_p (x);
5134
5135 default:
5136 return true;
5137 }
5138}
5139
e075ae69
RH
5140/* Determine if a given CONST RTX is a valid memory displacement
5141 in PIC mode. */
0f290768 5142
59be65f6 5143int
91bb873f
RH
5144legitimate_pic_address_disp_p (disp)
5145 register rtx disp;
5146{
f996902d
RH
5147 bool saw_plus;
5148
6eb791fc
JH
5149 /* In 64bit mode we can allow direct addresses of symbols and labels
5150 when they are not dynamic symbols. */
75d38379
JJ
5151 if (TARGET_64BIT && local_symbolic_operand (disp, Pmode))
5152 return 1;
91bb873f
RH
5153 if (GET_CODE (disp) != CONST)
5154 return 0;
5155 disp = XEXP (disp, 0);
5156
6eb791fc
JH
5157 if (TARGET_64BIT)
5158 {
5159 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5160 of GOT tables. We should not need these anyway. */
5161 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5162 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5163 return 0;
5164
5165 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5166 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5167 return 0;
5168 return 1;
5169 }
5170
f996902d 5171 saw_plus = false;
91bb873f
RH
5172 if (GET_CODE (disp) == PLUS)
5173 {
5174 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5175 return 0;
5176 disp = XEXP (disp, 0);
f996902d 5177 saw_plus = true;
91bb873f
RH
5178 }
5179
b069de3b
SS
5180 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5181 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5182 {
5183 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5184 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5185 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5186 {
5187 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5188 if (strstr (sym_name, "$pb") != 0)
5189 return 1;
5190 }
5191 }
5192
8ee41eaf 5193 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5194 return 0;
5195
623fe810
RH
5196 switch (XINT (disp, 1))
5197 {
8ee41eaf 5198 case UNSPEC_GOT:
f996902d
RH
5199 if (saw_plus)
5200 return false;
623fe810 5201 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5202 case UNSPEC_GOTOFF:
623fe810 5203 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5204 case UNSPEC_GOTTPOFF:
dea73790
JJ
5205 case UNSPEC_GOTNTPOFF:
5206 case UNSPEC_INDNTPOFF:
f996902d
RH
5207 if (saw_plus)
5208 return false;
5209 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5210 case UNSPEC_NTPOFF:
f996902d
RH
5211 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5212 case UNSPEC_DTPOFF:
f996902d 5213 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5214 }
fce5a9f2 5215
623fe810 5216 return 0;
91bb873f
RH
5217}
5218
e075ae69
RH
5219/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5220 memory address for an instruction. The MODE argument is the machine mode
5221 for the MEM expression that wants to use this address.
5222
5223 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5224 convert common non-canonical forms to canonical form so that they will
5225 be recognized. */
5226
3b3c6a3f
MM
5227int
5228legitimate_address_p (mode, addr, strict)
5229 enum machine_mode mode;
5230 register rtx addr;
5231 int strict;
5232{
e075ae69
RH
5233 struct ix86_address parts;
5234 rtx base, index, disp;
5235 HOST_WIDE_INT scale;
5236 const char *reason = NULL;
5237 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5238
5239 if (TARGET_DEBUG_ADDR)
5240 {
5241 fprintf (stderr,
e9a25f70 5242 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5243 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5244 debug_rtx (addr);
5245 }
5246
9e20be0c
JJ
5247 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5248 {
5249 if (TARGET_DEBUG_ADDR)
5250 fprintf (stderr, "Success.\n");
5251 return TRUE;
5252 }
5253
b446e5a2 5254 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5255 {
e075ae69 5256 reason = "decomposition failed";
50e60bc3 5257 goto report_error;
3b3c6a3f
MM
5258 }
5259
e075ae69
RH
5260 base = parts.base;
5261 index = parts.index;
5262 disp = parts.disp;
5263 scale = parts.scale;
91f0226f 5264
e075ae69 5265 /* Validate base register.
e9a25f70
JL
5266
5267 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5268 is one word out of a two word structure, which is represented internally
5269 as a DImode int. */
e9a25f70 5270
3b3c6a3f
MM
5271 if (base)
5272 {
1540f9eb 5273 rtx reg;
e075ae69
RH
5274 reason_rtx = base;
5275
1540f9eb
JH
5276 if (GET_CODE (base) == SUBREG)
5277 reg = SUBREG_REG (base);
5278 else
5279 reg = base;
5280
5281 if (GET_CODE (reg) != REG)
3b3c6a3f 5282 {
e075ae69 5283 reason = "base is not a register";
50e60bc3 5284 goto report_error;
3b3c6a3f
MM
5285 }
5286
c954bd01
RH
5287 if (GET_MODE (base) != Pmode)
5288 {
e075ae69 5289 reason = "base is not in Pmode";
50e60bc3 5290 goto report_error;
c954bd01
RH
5291 }
5292
1540f9eb
JH
5293 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5294 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5295 {
e075ae69 5296 reason = "base is not valid";
50e60bc3 5297 goto report_error;
3b3c6a3f
MM
5298 }
5299 }
5300
e075ae69 5301 /* Validate index register.
e9a25f70
JL
5302
5303 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5304 is one word out of a two word structure, which is represented internally
5305 as a DImode int. */
e075ae69
RH
5306
5307 if (index)
3b3c6a3f 5308 {
1540f9eb 5309 rtx reg;
e075ae69
RH
5310 reason_rtx = index;
5311
1540f9eb
JH
5312 if (GET_CODE (index) == SUBREG)
5313 reg = SUBREG_REG (index);
5314 else
5315 reg = index;
5316
5317 if (GET_CODE (reg) != REG)
3b3c6a3f 5318 {
e075ae69 5319 reason = "index is not a register";
50e60bc3 5320 goto report_error;
3b3c6a3f
MM
5321 }
5322
e075ae69 5323 if (GET_MODE (index) != Pmode)
c954bd01 5324 {
e075ae69 5325 reason = "index is not in Pmode";
50e60bc3 5326 goto report_error;
c954bd01
RH
5327 }
5328
1540f9eb
JH
5329 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5330 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5331 {
e075ae69 5332 reason = "index is not valid";
50e60bc3 5333 goto report_error;
3b3c6a3f
MM
5334 }
5335 }
3b3c6a3f 5336
e075ae69
RH
5337 /* Validate scale factor. */
5338 if (scale != 1)
3b3c6a3f 5339 {
e075ae69
RH
5340 reason_rtx = GEN_INT (scale);
5341 if (!index)
3b3c6a3f 5342 {
e075ae69 5343 reason = "scale without index";
50e60bc3 5344 goto report_error;
3b3c6a3f
MM
5345 }
5346
e075ae69 5347 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5348 {
e075ae69 5349 reason = "scale is not a valid multiplier";
50e60bc3 5350 goto report_error;
3b3c6a3f
MM
5351 }
5352 }
5353
91bb873f 5354 /* Validate displacement. */
3b3c6a3f
MM
5355 if (disp)
5356 {
e075ae69
RH
5357 reason_rtx = disp;
5358
0d7d98ee 5359 if (TARGET_64BIT)
3b3c6a3f 5360 {
75d38379 5361 if (!x86_64_sign_extended_value (disp, !(index || base)))
0d7d98ee
JH
5362 {
5363 reason = "displacement is out of range";
5364 goto report_error;
5365 }
5366 }
5367 else
5368 {
5369 if (GET_CODE (disp) == CONST_DOUBLE)
5370 {
5371 reason = "displacement is a const_double";
5372 goto report_error;
5373 }
3b3c6a3f
MM
5374 }
5375
f996902d
RH
5376 if (GET_CODE (disp) == CONST
5377 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5378 switch (XINT (XEXP (disp, 0), 1))
5379 {
5380 case UNSPEC_GOT:
5381 case UNSPEC_GOTOFF:
5382 case UNSPEC_GOTPCREL:
5383 if (!flag_pic)
5384 abort ();
5385 goto is_legitimate_pic;
5386
5387 case UNSPEC_GOTTPOFF:
dea73790
JJ
5388 case UNSPEC_GOTNTPOFF:
5389 case UNSPEC_INDNTPOFF:
f996902d
RH
5390 case UNSPEC_NTPOFF:
5391 case UNSPEC_DTPOFF:
5392 break;
5393
5394 default:
5395 reason = "invalid address unspec";
5396 goto report_error;
5397 }
5398
b069de3b
SS
5399 else if (flag_pic && (SYMBOLIC_CONST (disp)
5400#if TARGET_MACHO
5401 && !machopic_operand_p (disp)
5402#endif
5403 ))
3b3c6a3f 5404 {
f996902d 5405 is_legitimate_pic:
0d7d98ee
JH
5406 if (TARGET_64BIT && (index || base))
5407 {
75d38379
JJ
5408 /* foo@dtpoff(%rX) is ok. */
5409 if (GET_CODE (disp) != CONST
5410 || GET_CODE (XEXP (disp, 0)) != PLUS
5411 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5412 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5413 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5414 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5415 {
5416 reason = "non-constant pic memory reference";
5417 goto report_error;
5418 }
0d7d98ee 5419 }
75d38379 5420 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5421 {
e075ae69 5422 reason = "displacement is an invalid pic construct";
50e60bc3 5423 goto report_error;
91bb873f
RH
5424 }
5425
4e9efe54 5426 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5427 includes the pic_offset_table_rtx register.
5428
4e9efe54
JH
5429 While this is good idea, unfortunately these constructs may
5430 be created by "adds using lea" optimization for incorrect
5431 code like:
5432
5433 int a;
5434 int foo(int i)
5435 {
5436 return *(&a+i);
5437 }
5438
50e60bc3 5439 This code is nonsensical, but results in addressing
4e9efe54 5440 GOT table with pic_offset_table_rtx base. We can't
f710504c 5441 just refuse it easily, since it gets matched by
4e9efe54
JH
5442 "addsi3" pattern, that later gets split to lea in the
5443 case output register differs from input. While this
5444 can be handled by separate addsi pattern for this case
5445 that never results in lea, this seems to be easier and
5446 correct fix for crash to disable this test. */
3b3c6a3f 5447 }
f996902d
RH
5448 else if (!CONSTANT_ADDRESS_P (disp))
5449 {
5450 reason = "displacement is not constant";
5451 goto report_error;
5452 }
3b3c6a3f
MM
5453 }
5454
e075ae69 5455 /* Everything looks valid. */
3b3c6a3f 5456 if (TARGET_DEBUG_ADDR)
e075ae69 5457 fprintf (stderr, "Success.\n");
3b3c6a3f 5458 return TRUE;
e075ae69 5459
5bf0ebab 5460 report_error:
e075ae69
RH
5461 if (TARGET_DEBUG_ADDR)
5462 {
5463 fprintf (stderr, "Error: %s\n", reason);
5464 debug_rtx (reason_rtx);
5465 }
5466 return FALSE;
3b3c6a3f 5467}
3b3c6a3f 5468\f
55efb413
JW
5469/* Return an unique alias set for the GOT. */
5470
0f290768 5471static HOST_WIDE_INT
55efb413
JW
5472ix86_GOT_alias_set ()
5473{
5bf0ebab
RH
5474 static HOST_WIDE_INT set = -1;
5475 if (set == -1)
5476 set = new_alias_set ();
5477 return set;
0f290768 5478}
55efb413 5479
3b3c6a3f
MM
5480/* Return a legitimate reference for ORIG (an address) using the
5481 register REG. If REG is 0, a new pseudo is generated.
5482
91bb873f 5483 There are two types of references that must be handled:
3b3c6a3f
MM
5484
5485 1. Global data references must load the address from the GOT, via
5486 the PIC reg. An insn is emitted to do this load, and the reg is
5487 returned.
5488
91bb873f
RH
5489 2. Static data references, constant pool addresses, and code labels
5490 compute the address as an offset from the GOT, whose base is in
5491 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5492 differentiate them from global data objects. The returned
5493 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5494
5495 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5496 reg also appears in the address. */
3b3c6a3f
MM
5497
5498rtx
5499legitimize_pic_address (orig, reg)
5500 rtx orig;
5501 rtx reg;
5502{
5503 rtx addr = orig;
5504 rtx new = orig;
91bb873f 5505 rtx base;
3b3c6a3f 5506
b069de3b
SS
5507#if TARGET_MACHO
5508 if (reg == 0)
5509 reg = gen_reg_rtx (Pmode);
5510 /* Use the generic Mach-O PIC machinery. */
5511 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5512#endif
5513
623fe810 5514 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5515 {
14f73b5a
JH
5516 /* In 64bit mode we can address such objects directly. */
5517 if (TARGET_64BIT)
5518 new = addr;
5519 else
5520 {
5521 /* This symbol may be referenced via a displacement from the PIC
5522 base address (@GOTOFF). */
3b3c6a3f 5523
66edd3b4
RH
5524 if (reload_in_progress)
5525 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5526 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5527 new = gen_rtx_CONST (Pmode, new);
5528 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5529
14f73b5a
JH
5530 if (reg != 0)
5531 {
5532 emit_move_insn (reg, new);
5533 new = reg;
5534 }
5535 }
3b3c6a3f 5536 }
91bb873f 5537 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5538 {
14f73b5a
JH
5539 if (TARGET_64BIT)
5540 {
8ee41eaf 5541 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5542 new = gen_rtx_CONST (Pmode, new);
5543 new = gen_rtx_MEM (Pmode, new);
5544 RTX_UNCHANGING_P (new) = 1;
5545 set_mem_alias_set (new, ix86_GOT_alias_set ());
5546
5547 if (reg == 0)
5548 reg = gen_reg_rtx (Pmode);
5549 /* Use directly gen_movsi, otherwise the address is loaded
5550 into register for CSE. We don't want to CSE this addresses,
5551 instead we CSE addresses from the GOT table, so skip this. */
5552 emit_insn (gen_movsi (reg, new));
5553 new = reg;
5554 }
5555 else
5556 {
5557 /* This symbol must be referenced via a load from the
5558 Global Offset Table (@GOT). */
3b3c6a3f 5559
66edd3b4
RH
5560 if (reload_in_progress)
5561 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5562 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5563 new = gen_rtx_CONST (Pmode, new);
5564 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5565 new = gen_rtx_MEM (Pmode, new);
5566 RTX_UNCHANGING_P (new) = 1;
5567 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5568
14f73b5a
JH
5569 if (reg == 0)
5570 reg = gen_reg_rtx (Pmode);
5571 emit_move_insn (reg, new);
5572 new = reg;
5573 }
0f290768 5574 }
91bb873f
RH
5575 else
5576 {
5577 if (GET_CODE (addr) == CONST)
3b3c6a3f 5578 {
91bb873f 5579 addr = XEXP (addr, 0);
e3c8ea67
RH
5580
5581 /* We must match stuff we generate before. Assume the only
5582 unspecs that can get here are ours. Not that we could do
5583 anything with them anyway... */
5584 if (GET_CODE (addr) == UNSPEC
5585 || (GET_CODE (addr) == PLUS
5586 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5587 return orig;
5588 if (GET_CODE (addr) != PLUS)
564d80f4 5589 abort ();
3b3c6a3f 5590 }
91bb873f
RH
5591 if (GET_CODE (addr) == PLUS)
5592 {
5593 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5594
91bb873f
RH
5595 /* Check first to see if this is a constant offset from a @GOTOFF
5596 symbol reference. */
623fe810 5597 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5598 && GET_CODE (op1) == CONST_INT)
5599 {
6eb791fc
JH
5600 if (!TARGET_64BIT)
5601 {
66edd3b4
RH
5602 if (reload_in_progress)
5603 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5604 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5605 UNSPEC_GOTOFF);
6eb791fc
JH
5606 new = gen_rtx_PLUS (Pmode, new, op1);
5607 new = gen_rtx_CONST (Pmode, new);
5608 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5609
6eb791fc
JH
5610 if (reg != 0)
5611 {
5612 emit_move_insn (reg, new);
5613 new = reg;
5614 }
5615 }
5616 else
91bb873f 5617 {
75d38379
JJ
5618 if (INTVAL (op1) < -16*1024*1024
5619 || INTVAL (op1) >= 16*1024*1024)
5620 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
5621 }
5622 }
5623 else
5624 {
5625 base = legitimize_pic_address (XEXP (addr, 0), reg);
5626 new = legitimize_pic_address (XEXP (addr, 1),
5627 base == reg ? NULL_RTX : reg);
5628
5629 if (GET_CODE (new) == CONST_INT)
5630 new = plus_constant (base, INTVAL (new));
5631 else
5632 {
5633 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5634 {
5635 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5636 new = XEXP (new, 1);
5637 }
5638 new = gen_rtx_PLUS (Pmode, base, new);
5639 }
5640 }
5641 }
3b3c6a3f
MM
5642 }
5643 return new;
5644}
fb49053f 5645
fb49053f 5646static void
f996902d 5647ix86_encode_section_info (decl, first)
fb49053f
RH
5648 tree decl;
5649 int first ATTRIBUTE_UNUSED;
5650{
f996902d
RH
5651 bool local_p = (*targetm.binds_local_p) (decl);
5652 rtx rtl, symbol;
5653
5654 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5655 if (GET_CODE (rtl) != MEM)
5656 return;
5657 symbol = XEXP (rtl, 0);
5658 if (GET_CODE (symbol) != SYMBOL_REF)
5659 return;
5660
5661 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5662 symbol so that we may access it directly in the GOT. */
5663
fb49053f 5664 if (flag_pic)
f996902d
RH
5665 SYMBOL_REF_FLAG (symbol) = local_p;
5666
5667 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5668 "local dynamic", "initial exec" or "local exec" TLS models
5669 respectively. */
5670
5671 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5672 {
f996902d
RH
5673 const char *symbol_str;
5674 char *newstr;
5675 size_t len;
dce81a1a 5676 enum tls_model kind = decl_tls_model (decl);
f996902d 5677
75d38379
JJ
5678 if (TARGET_64BIT && ! flag_pic)
5679 {
5680 /* x86-64 doesn't allow non-pic code for shared libraries,
5681 so don't generate GD/LD TLS models for non-pic code. */
5682 switch (kind)
5683 {
5684 case TLS_MODEL_GLOBAL_DYNAMIC:
5685 kind = TLS_MODEL_INITIAL_EXEC; break;
5686 case TLS_MODEL_LOCAL_DYNAMIC:
5687 kind = TLS_MODEL_LOCAL_EXEC; break;
5688 default:
5689 break;
5690 }
5691 }
5692
f996902d 5693 symbol_str = XSTR (symbol, 0);
fb49053f 5694
f996902d
RH
5695 if (symbol_str[0] == '%')
5696 {
5697 if (symbol_str[1] == tls_model_chars[kind])
5698 return;
5699 symbol_str += 2;
5700 }
5701 len = strlen (symbol_str) + 1;
5702 newstr = alloca (len + 2);
5703
5704 newstr[0] = '%';
5705 newstr[1] = tls_model_chars[kind];
5706 memcpy (newstr + 2, symbol_str, len);
5707
5708 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5709 }
5710}
f996902d
RH
5711
5712/* Undo the above when printing symbol names. */
5713
5714static const char *
5715ix86_strip_name_encoding (str)
5716 const char *str;
5717{
5718 if (str[0] == '%')
5719 str += 2;
5720 if (str [0] == '*')
5721 str += 1;
5722 return str;
5723}
3b3c6a3f 5724\f
f996902d
RH
5725/* Load the thread pointer into a register. */
5726
5727static rtx
5728get_thread_pointer ()
5729{
5730 rtx tp;
5731
5732 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
5733 tp = gen_rtx_MEM (Pmode, tp);
5734 RTX_UNCHANGING_P (tp) = 1;
5735 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
5736 tp = force_reg (Pmode, tp);
5737
5738 return tp;
5739}
fce5a9f2 5740
3b3c6a3f
MM
5741/* Try machine-dependent ways of modifying an illegitimate address
5742 to be legitimate. If we find one, return the new, valid address.
5743 This macro is used in only one place: `memory_address' in explow.c.
5744
5745 OLDX is the address as it was before break_out_memory_refs was called.
5746 In some cases it is useful to look at this to decide what needs to be done.
5747
5748 MODE and WIN are passed so that this macro can use
5749 GO_IF_LEGITIMATE_ADDRESS.
5750
5751 It is always safe for this macro to do nothing. It exists to recognize
5752 opportunities to optimize the output.
5753
5754 For the 80386, we handle X+REG by loading X into a register R and
5755 using R+REG. R will go in a general reg and indexing will be used.
5756 However, if REG is a broken-out memory address or multiplication,
5757 nothing needs to be done because REG can certainly go in a general reg.
5758
5759 When -fpic is used, special handling is needed for symbolic references.
5760 See comments by legitimize_pic_address in i386.c for details. */
5761
5762rtx
5763legitimize_address (x, oldx, mode)
5764 register rtx x;
bb5177ac 5765 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5766 enum machine_mode mode;
5767{
5768 int changed = 0;
5769 unsigned log;
5770
5771 if (TARGET_DEBUG_ADDR)
5772 {
e9a25f70
JL
5773 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5774 GET_MODE_NAME (mode));
3b3c6a3f
MM
5775 debug_rtx (x);
5776 }
5777
f996902d
RH
5778 log = tls_symbolic_operand (x, mode);
5779 if (log)
5780 {
5781 rtx dest, base, off, pic;
75d38379 5782 int type;
f996902d 5783
755ac5d4 5784 switch (log)
f996902d
RH
5785 {
5786 case TLS_MODEL_GLOBAL_DYNAMIC:
5787 dest = gen_reg_rtx (Pmode);
75d38379
JJ
5788 if (TARGET_64BIT)
5789 {
5790 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5791
5792 start_sequence ();
5793 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5794 insns = get_insns ();
5795 end_sequence ();
5796
5797 emit_libcall_block (insns, dest, rax, x);
5798 }
5799 else
5800 emit_insn (gen_tls_global_dynamic_32 (dest, x));
f996902d
RH
5801 break;
5802
5803 case TLS_MODEL_LOCAL_DYNAMIC:
5804 base = gen_reg_rtx (Pmode);
75d38379
JJ
5805 if (TARGET_64BIT)
5806 {
5807 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5808
5809 start_sequence ();
5810 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5811 insns = get_insns ();
5812 end_sequence ();
5813
5814 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5815 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5816 emit_libcall_block (insns, base, rax, note);
5817 }
5818 else
5819 emit_insn (gen_tls_local_dynamic_base_32 (base));
f996902d
RH
5820
5821 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5822 off = gen_rtx_CONST (Pmode, off);
5823
5824 return gen_rtx_PLUS (Pmode, base, off);
5825
5826 case TLS_MODEL_INITIAL_EXEC:
75d38379
JJ
5827 if (TARGET_64BIT)
5828 {
5829 pic = NULL;
5830 type = UNSPEC_GOTNTPOFF;
5831 }
5832 else if (flag_pic)
f996902d 5833 {
66edd3b4
RH
5834 if (reload_in_progress)
5835 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d 5836 pic = pic_offset_table_rtx;
75d38379 5837 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
f996902d 5838 }
dea73790 5839 else if (!TARGET_GNU_TLS)
f996902d
RH
5840 {
5841 pic = gen_reg_rtx (Pmode);
5842 emit_insn (gen_set_got (pic));
75d38379 5843 type = UNSPEC_GOTTPOFF;
f996902d 5844 }
dea73790 5845 else
75d38379
JJ
5846 {
5847 pic = NULL;
5848 type = UNSPEC_INDNTPOFF;
5849 }
f996902d
RH
5850
5851 base = get_thread_pointer ();
5852
75d38379 5853 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
f996902d 5854 off = gen_rtx_CONST (Pmode, off);
75d38379 5855 if (pic)
dea73790 5856 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
5857 off = gen_rtx_MEM (Pmode, off);
5858 RTX_UNCHANGING_P (off) = 1;
5859 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 5860 dest = gen_reg_rtx (Pmode);
dea73790 5861
75d38379 5862 if (TARGET_64BIT || TARGET_GNU_TLS)
dea73790
JJ
5863 {
5864 emit_move_insn (dest, off);
5865 return gen_rtx_PLUS (Pmode, base, dest);
5866 }
5867 else
5868 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
5869 break;
5870
5871 case TLS_MODEL_LOCAL_EXEC:
5872 base = get_thread_pointer ();
5873
5874 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
75d38379
JJ
5875 (TARGET_64BIT || TARGET_GNU_TLS)
5876 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
f996902d
RH
5877 off = gen_rtx_CONST (Pmode, off);
5878
75d38379 5879 if (TARGET_64BIT || TARGET_GNU_TLS)
f996902d
RH
5880 return gen_rtx_PLUS (Pmode, base, off);
5881 else
5882 {
5883 dest = gen_reg_rtx (Pmode);
5884 emit_insn (gen_subsi3 (dest, base, off));
5885 }
5886 break;
5887
5888 default:
5889 abort ();
5890 }
5891
5892 return dest;
5893 }
5894
3b3c6a3f
MM
5895 if (flag_pic && SYMBOLIC_CONST (x))
5896 return legitimize_pic_address (x, 0);
5897
5898 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5899 if (GET_CODE (x) == ASHIFT
5900 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5901 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5902 {
5903 changed = 1;
a269a03c
JC
5904 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5905 GEN_INT (1 << log));
3b3c6a3f
MM
5906 }
5907
5908 if (GET_CODE (x) == PLUS)
5909 {
0f290768 5910 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5911
3b3c6a3f
MM
5912 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5913 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5914 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5915 {
5916 changed = 1;
c5c76735
JL
5917 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5918 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5919 GEN_INT (1 << log));
3b3c6a3f
MM
5920 }
5921
5922 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5923 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5924 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5925 {
5926 changed = 1;
c5c76735
JL
5927 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5928 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5929 GEN_INT (1 << log));
3b3c6a3f
MM
5930 }
5931
0f290768 5932 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5933 if (GET_CODE (XEXP (x, 1)) == MULT)
5934 {
5935 rtx tmp = XEXP (x, 0);
5936 XEXP (x, 0) = XEXP (x, 1);
5937 XEXP (x, 1) = tmp;
5938 changed = 1;
5939 }
5940
5941 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5942 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5943 created by virtual register instantiation, register elimination, and
5944 similar optimizations. */
5945 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5946 {
5947 changed = 1;
c5c76735
JL
5948 x = gen_rtx_PLUS (Pmode,
5949 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5950 XEXP (XEXP (x, 1), 0)),
5951 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5952 }
5953
e9a25f70
JL
5954 /* Canonicalize
5955 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5956 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5957 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5958 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5959 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5960 && CONSTANT_P (XEXP (x, 1)))
5961 {
00c79232
ML
5962 rtx constant;
5963 rtx other = NULL_RTX;
3b3c6a3f
MM
5964
5965 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5966 {
5967 constant = XEXP (x, 1);
5968 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5969 }
5970 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5971 {
5972 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5973 other = XEXP (x, 1);
5974 }
5975 else
5976 constant = 0;
5977
5978 if (constant)
5979 {
5980 changed = 1;
c5c76735
JL
5981 x = gen_rtx_PLUS (Pmode,
5982 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5983 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5984 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5985 }
5986 }
5987
5988 if (changed && legitimate_address_p (mode, x, FALSE))
5989 return x;
5990
5991 if (GET_CODE (XEXP (x, 0)) == MULT)
5992 {
5993 changed = 1;
5994 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5995 }
5996
5997 if (GET_CODE (XEXP (x, 1)) == MULT)
5998 {
5999 changed = 1;
6000 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6001 }
6002
6003 if (changed
6004 && GET_CODE (XEXP (x, 1)) == REG
6005 && GET_CODE (XEXP (x, 0)) == REG)
6006 return x;
6007
6008 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6009 {
6010 changed = 1;
6011 x = legitimize_pic_address (x, 0);
6012 }
6013
6014 if (changed && legitimate_address_p (mode, x, FALSE))
6015 return x;
6016
6017 if (GET_CODE (XEXP (x, 0)) == REG)
6018 {
6019 register rtx temp = gen_reg_rtx (Pmode);
6020 register rtx val = force_operand (XEXP (x, 1), temp);
6021 if (val != temp)
6022 emit_move_insn (temp, val);
6023
6024 XEXP (x, 1) = temp;
6025 return x;
6026 }
6027
6028 else if (GET_CODE (XEXP (x, 1)) == REG)
6029 {
6030 register rtx temp = gen_reg_rtx (Pmode);
6031 register rtx val = force_operand (XEXP (x, 0), temp);
6032 if (val != temp)
6033 emit_move_insn (temp, val);
6034
6035 XEXP (x, 0) = temp;
6036 return x;
6037 }
6038 }
6039
6040 return x;
6041}
2a2ab3f9
JVA
6042\f
6043/* Print an integer constant expression in assembler syntax. Addition
6044 and subtraction are the only arithmetic that may appear in these
6045 expressions. FILE is the stdio stream to write to, X is the rtx, and
6046 CODE is the operand print code from the output string. */
6047
6048static void
6049output_pic_addr_const (file, x, code)
6050 FILE *file;
6051 rtx x;
6052 int code;
6053{
6054 char buf[256];
6055
6056 switch (GET_CODE (x))
6057 {
6058 case PC:
6059 if (flag_pic)
6060 putc ('.', file);
6061 else
6062 abort ();
6063 break;
6064
6065 case SYMBOL_REF:
91bb873f 6066 assemble_name (file, XSTR (x, 0));
b069de3b 6067 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 6068 fputs ("@PLT", file);
2a2ab3f9
JVA
6069 break;
6070
91bb873f
RH
6071 case LABEL_REF:
6072 x = XEXP (x, 0);
6073 /* FALLTHRU */
2a2ab3f9
JVA
6074 case CODE_LABEL:
6075 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6076 assemble_name (asm_out_file, buf);
6077 break;
6078
6079 case CONST_INT:
f64cecad 6080 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6081 break;
6082
6083 case CONST:
6084 /* This used to output parentheses around the expression,
6085 but that does not work on the 386 (either ATT or BSD assembler). */
6086 output_pic_addr_const (file, XEXP (x, 0), code);
6087 break;
6088
6089 case CONST_DOUBLE:
6090 if (GET_MODE (x) == VOIDmode)
6091 {
6092 /* We can use %d if the number is <32 bits and positive. */
6093 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6094 fprintf (file, "0x%lx%08lx",
6095 (unsigned long) CONST_DOUBLE_HIGH (x),
6096 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6097 else
f64cecad 6098 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6099 }
6100 else
6101 /* We can't handle floating point constants;
6102 PRINT_OPERAND must handle them. */
6103 output_operand_lossage ("floating constant misused");
6104 break;
6105
6106 case PLUS:
e9a25f70 6107 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6108 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6109 {
2a2ab3f9 6110 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6111 putc ('+', file);
e9a25f70 6112 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6113 }
91bb873f 6114 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6115 {
2a2ab3f9 6116 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6117 putc ('+', file);
e9a25f70 6118 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6119 }
91bb873f
RH
6120 else
6121 abort ();
2a2ab3f9
JVA
6122 break;
6123
6124 case MINUS:
b069de3b
SS
6125 if (!TARGET_MACHO)
6126 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6127 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6128 putc ('-', file);
2a2ab3f9 6129 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6130 if (!TARGET_MACHO)
6131 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6132 break;
6133
91bb873f
RH
6134 case UNSPEC:
6135 if (XVECLEN (x, 0) != 1)
5bf0ebab 6136 abort ();
91bb873f
RH
6137 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6138 switch (XINT (x, 1))
77ebd435 6139 {
8ee41eaf 6140 case UNSPEC_GOT:
77ebd435
AJ
6141 fputs ("@GOT", file);
6142 break;
8ee41eaf 6143 case UNSPEC_GOTOFF:
77ebd435
AJ
6144 fputs ("@GOTOFF", file);
6145 break;
8ee41eaf 6146 case UNSPEC_GOTPCREL:
edfe8595 6147 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6148 break;
f996902d 6149 case UNSPEC_GOTTPOFF:
dea73790 6150 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6151 fputs ("@GOTTPOFF", file);
6152 break;
6153 case UNSPEC_TPOFF:
6154 fputs ("@TPOFF", file);
6155 break;
6156 case UNSPEC_NTPOFF:
75d38379
JJ
6157 if (TARGET_64BIT)
6158 fputs ("@TPOFF", file);
6159 else
6160 fputs ("@NTPOFF", file);
f996902d
RH
6161 break;
6162 case UNSPEC_DTPOFF:
6163 fputs ("@DTPOFF", file);
6164 break;
dea73790 6165 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6166 if (TARGET_64BIT)
6167 fputs ("@GOTTPOFF(%rip)", file);
6168 else
6169 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6170 break;
6171 case UNSPEC_INDNTPOFF:
6172 fputs ("@INDNTPOFF", file);
6173 break;
77ebd435
AJ
6174 default:
6175 output_operand_lossage ("invalid UNSPEC as operand");
6176 break;
6177 }
91bb873f
RH
6178 break;
6179
2a2ab3f9
JVA
6180 default:
6181 output_operand_lossage ("invalid expression as operand");
6182 }
6183}
1865dbb5 6184
0f290768 6185/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6186 We need to handle our special PIC relocations. */
6187
0f290768 6188void
1865dbb5
JM
6189i386_dwarf_output_addr_const (file, x)
6190 FILE *file;
6191 rtx x;
6192{
14f73b5a 6193#ifdef ASM_QUAD
18b5b8d6 6194 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6195#else
6196 if (TARGET_64BIT)
6197 abort ();
18b5b8d6 6198 fprintf (file, "%s", ASM_LONG);
14f73b5a 6199#endif
1865dbb5
JM
6200 if (flag_pic)
6201 output_pic_addr_const (file, x, '\0');
6202 else
6203 output_addr_const (file, x);
6204 fputc ('\n', file);
6205}
6206
b9203463
RH
6207/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6208 We need to emit DTP-relative relocations. */
6209
6210void
6211i386_output_dwarf_dtprel (file, size, x)
6212 FILE *file;
6213 int size;
6214 rtx x;
6215{
75d38379
JJ
6216 fputs (ASM_LONG, file);
6217 output_addr_const (file, x);
6218 fputs ("@DTPOFF", file);
b9203463
RH
6219 switch (size)
6220 {
6221 case 4:
b9203463
RH
6222 break;
6223 case 8:
75d38379 6224 fputs (", 0", file);
b9203463 6225 break;
b9203463
RH
6226 default:
6227 abort ();
6228 }
b9203463
RH
6229}
6230
1865dbb5
JM
6231/* In the name of slightly smaller debug output, and to cater to
6232 general assembler losage, recognize PIC+GOTOFF and turn it back
6233 into a direct symbol reference. */
6234
6235rtx
6236i386_simplify_dwarf_addr (orig_x)
6237 rtx orig_x;
6238{
ec65b2e3 6239 rtx x = orig_x, y;
1865dbb5 6240
4c8c0dec
JJ
6241 if (GET_CODE (x) == MEM)
6242 x = XEXP (x, 0);
6243
6eb791fc
JH
6244 if (TARGET_64BIT)
6245 {
6246 if (GET_CODE (x) != CONST
6247 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6248 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6249 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6250 return orig_x;
6251 return XVECEXP (XEXP (x, 0), 0, 0);
6252 }
6253
1865dbb5 6254 if (GET_CODE (x) != PLUS
1865dbb5
JM
6255 || GET_CODE (XEXP (x, 1)) != CONST)
6256 return orig_x;
6257
ec65b2e3
JJ
6258 if (GET_CODE (XEXP (x, 0)) == REG
6259 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6260 /* %ebx + GOT/GOTOFF */
6261 y = NULL;
6262 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6263 {
6264 /* %ebx + %reg * scale + GOT/GOTOFF */
6265 y = XEXP (x, 0);
6266 if (GET_CODE (XEXP (y, 0)) == REG
6267 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6268 y = XEXP (y, 1);
6269 else if (GET_CODE (XEXP (y, 1)) == REG
6270 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6271 y = XEXP (y, 0);
6272 else
6273 return orig_x;
6274 if (GET_CODE (y) != REG
6275 && GET_CODE (y) != MULT
6276 && GET_CODE (y) != ASHIFT)
6277 return orig_x;
6278 }
6279 else
6280 return orig_x;
6281
1865dbb5
JM
6282 x = XEXP (XEXP (x, 1), 0);
6283 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6284 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6285 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6286 {
6287 if (y)
6288 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6289 return XVECEXP (x, 0, 0);
6290 }
1865dbb5
JM
6291
6292 if (GET_CODE (x) == PLUS
6293 && GET_CODE (XEXP (x, 0)) == UNSPEC
6294 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6295 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6296 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6297 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6298 {
6299 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6300 if (y)
6301 return gen_rtx_PLUS (Pmode, y, x);
6302 return x;
6303 }
1865dbb5
JM
6304
6305 return orig_x;
6306}
2a2ab3f9 6307\f
a269a03c 6308static void
e075ae69 6309put_condition_code (code, mode, reverse, fp, file)
a269a03c 6310 enum rtx_code code;
e075ae69
RH
6311 enum machine_mode mode;
6312 int reverse, fp;
a269a03c
JC
6313 FILE *file;
6314{
a269a03c
JC
6315 const char *suffix;
6316
9a915772
JH
6317 if (mode == CCFPmode || mode == CCFPUmode)
6318 {
6319 enum rtx_code second_code, bypass_code;
6320 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6321 if (bypass_code != NIL || second_code != NIL)
b531087a 6322 abort ();
9a915772
JH
6323 code = ix86_fp_compare_code_to_integer (code);
6324 mode = CCmode;
6325 }
a269a03c
JC
6326 if (reverse)
6327 code = reverse_condition (code);
e075ae69 6328
a269a03c
JC
6329 switch (code)
6330 {
6331 case EQ:
6332 suffix = "e";
6333 break;
a269a03c
JC
6334 case NE:
6335 suffix = "ne";
6336 break;
a269a03c 6337 case GT:
7e08e190 6338 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6339 abort ();
6340 suffix = "g";
a269a03c 6341 break;
a269a03c 6342 case GTU:
e075ae69
RH
6343 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6344 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6345 if (mode != CCmode)
0f290768 6346 abort ();
e075ae69 6347 suffix = fp ? "nbe" : "a";
a269a03c 6348 break;
a269a03c 6349 case LT:
9076b9c1 6350 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6351 suffix = "s";
7e08e190 6352 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6353 suffix = "l";
9076b9c1 6354 else
0f290768 6355 abort ();
a269a03c 6356 break;
a269a03c 6357 case LTU:
9076b9c1 6358 if (mode != CCmode)
0f290768 6359 abort ();
a269a03c
JC
6360 suffix = "b";
6361 break;
a269a03c 6362 case GE:
9076b9c1 6363 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6364 suffix = "ns";
7e08e190 6365 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6366 suffix = "ge";
9076b9c1 6367 else
0f290768 6368 abort ();
a269a03c 6369 break;
a269a03c 6370 case GEU:
e075ae69 6371 /* ??? As above. */
7e08e190 6372 if (mode != CCmode)
0f290768 6373 abort ();
7e08e190 6374 suffix = fp ? "nb" : "ae";
a269a03c 6375 break;
a269a03c 6376 case LE:
7e08e190 6377 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6378 abort ();
6379 suffix = "le";
a269a03c 6380 break;
a269a03c 6381 case LEU:
9076b9c1
JH
6382 if (mode != CCmode)
6383 abort ();
7e08e190 6384 suffix = "be";
a269a03c 6385 break;
3a3677ff 6386 case UNORDERED:
9e7adcb3 6387 suffix = fp ? "u" : "p";
3a3677ff
RH
6388 break;
6389 case ORDERED:
9e7adcb3 6390 suffix = fp ? "nu" : "np";
3a3677ff 6391 break;
a269a03c
JC
6392 default:
6393 abort ();
6394 }
6395 fputs (suffix, file);
6396}
6397
e075ae69
RH
6398void
6399print_reg (x, code, file)
6400 rtx x;
6401 int code;
6402 FILE *file;
e5cb57e8 6403{
e075ae69 6404 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6405 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6406 || REGNO (x) == FLAGS_REG
6407 || REGNO (x) == FPSR_REG)
6408 abort ();
e9a25f70 6409
5bf0ebab 6410 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6411 putc ('%', file);
6412
ef6257cd 6413 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6414 code = 2;
6415 else if (code == 'b')
6416 code = 1;
6417 else if (code == 'k')
6418 code = 4;
3f3f2124
JH
6419 else if (code == 'q')
6420 code = 8;
e075ae69
RH
6421 else if (code == 'y')
6422 code = 3;
6423 else if (code == 'h')
6424 code = 0;
6425 else
6426 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6427
3f3f2124
JH
6428 /* Irritatingly, AMD extended registers use different naming convention
6429 from the normal registers. */
6430 if (REX_INT_REG_P (x))
6431 {
885a70fd
JH
6432 if (!TARGET_64BIT)
6433 abort ();
3f3f2124
JH
6434 switch (code)
6435 {
ef6257cd 6436 case 0:
c725bd79 6437 error ("extended registers have no high halves");
3f3f2124
JH
6438 break;
6439 case 1:
6440 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6441 break;
6442 case 2:
6443 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6444 break;
6445 case 4:
6446 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6447 break;
6448 case 8:
6449 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6450 break;
6451 default:
c725bd79 6452 error ("unsupported operand size for extended register");
3f3f2124
JH
6453 break;
6454 }
6455 return;
6456 }
e075ae69
RH
6457 switch (code)
6458 {
6459 case 3:
6460 if (STACK_TOP_P (x))
6461 {
6462 fputs ("st(0)", file);
6463 break;
6464 }
6465 /* FALLTHRU */
e075ae69 6466 case 8:
3f3f2124 6467 case 4:
e075ae69 6468 case 12:
446988df 6469 if (! ANY_FP_REG_P (x))
885a70fd 6470 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6471 /* FALLTHRU */
a7180f70 6472 case 16:
e075ae69
RH
6473 case 2:
6474 fputs (hi_reg_name[REGNO (x)], file);
6475 break;
6476 case 1:
6477 fputs (qi_reg_name[REGNO (x)], file);
6478 break;
6479 case 0:
6480 fputs (qi_high_reg_name[REGNO (x)], file);
6481 break;
6482 default:
6483 abort ();
fe25fea3 6484 }
e5cb57e8
SC
6485}
6486
f996902d
RH
6487/* Locate some local-dynamic symbol still in use by this function
6488 so that we can print its name in some tls_local_dynamic_base
6489 pattern. */
6490
6491static const char *
6492get_some_local_dynamic_name ()
6493{
6494 rtx insn;
6495
6496 if (cfun->machine->some_ld_name)
6497 return cfun->machine->some_ld_name;
6498
6499 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6500 if (INSN_P (insn)
6501 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6502 return cfun->machine->some_ld_name;
6503
6504 abort ();
6505}
6506
6507static int
6508get_some_local_dynamic_name_1 (px, data)
6509 rtx *px;
6510 void *data ATTRIBUTE_UNUSED;
6511{
6512 rtx x = *px;
6513
6514 if (GET_CODE (x) == SYMBOL_REF
6515 && local_dynamic_symbolic_operand (x, Pmode))
6516 {
6517 cfun->machine->some_ld_name = XSTR (x, 0);
6518 return 1;
6519 }
6520
6521 return 0;
6522}
6523
2a2ab3f9 6524/* Meaning of CODE:
fe25fea3 6525 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6526 C -- print opcode suffix for set/cmov insn.
fe25fea3 6527 c -- like C, but print reversed condition
ef6257cd 6528 F,f -- likewise, but for floating-point.
048b1c95
JJ
6529 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6530 nothing
2a2ab3f9
JVA
6531 R -- print the prefix for register names.
6532 z -- print the opcode suffix for the size of the current operand.
6533 * -- print a star (in certain assembler syntax)
fb204271 6534 A -- print an absolute memory reference.
2a2ab3f9 6535 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6536 s -- print a shift double count, followed by the assemblers argument
6537 delimiter.
fe25fea3
SC
6538 b -- print the QImode name of the register for the indicated operand.
6539 %b0 would print %al if operands[0] is reg 0.
6540 w -- likewise, print the HImode name of the register.
6541 k -- likewise, print the SImode name of the register.
3f3f2124 6542 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6543 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6544 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6545 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6546 P -- if PIC, print an @PLT suffix.
6547 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6548 & -- print some in-use local-dynamic symbol name.
a46d1d38 6549 */
2a2ab3f9
JVA
6550
6551void
6552print_operand (file, x, code)
6553 FILE *file;
6554 rtx x;
6555 int code;
6556{
6557 if (code)
6558 {
6559 switch (code)
6560 {
6561 case '*':
80f33d06 6562 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6563 putc ('*', file);
6564 return;
6565
f996902d
RH
6566 case '&':
6567 assemble_name (file, get_some_local_dynamic_name ());
6568 return;
6569
fb204271 6570 case 'A':
80f33d06 6571 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6572 putc ('*', file);
80f33d06 6573 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6574 {
6575 /* Intel syntax. For absolute addresses, registers should not
6576 be surrounded by braces. */
6577 if (GET_CODE (x) != REG)
6578 {
6579 putc ('[', file);
6580 PRINT_OPERAND (file, x, 0);
6581 putc (']', file);
6582 return;
6583 }
6584 }
80f33d06
GS
6585 else
6586 abort ();
fb204271
DN
6587
6588 PRINT_OPERAND (file, x, 0);
6589 return;
6590
6591
2a2ab3f9 6592 case 'L':
80f33d06 6593 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6594 putc ('l', file);
2a2ab3f9
JVA
6595 return;
6596
6597 case 'W':
80f33d06 6598 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6599 putc ('w', file);
2a2ab3f9
JVA
6600 return;
6601
6602 case 'B':
80f33d06 6603 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6604 putc ('b', file);
2a2ab3f9
JVA
6605 return;
6606
6607 case 'Q':
80f33d06 6608 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6609 putc ('l', file);
2a2ab3f9
JVA
6610 return;
6611
6612 case 'S':
80f33d06 6613 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6614 putc ('s', file);
2a2ab3f9
JVA
6615 return;
6616
5f1ec3e6 6617 case 'T':
80f33d06 6618 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6619 putc ('t', file);
5f1ec3e6
JVA
6620 return;
6621
2a2ab3f9
JVA
6622 case 'z':
6623 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6624 registers. */
2a2ab3f9
JVA
6625 if (STACK_REG_P (x))
6626 return;
6627
831c4e87
KC
6628 /* Likewise if using Intel opcodes. */
6629 if (ASSEMBLER_DIALECT == ASM_INTEL)
6630 return;
6631
6632 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6633 switch (GET_MODE_SIZE (GET_MODE (x)))
6634 {
2a2ab3f9 6635 case 2:
155d8a47
JW
6636#ifdef HAVE_GAS_FILDS_FISTS
6637 putc ('s', file);
6638#endif
2a2ab3f9
JVA
6639 return;
6640
6641 case 4:
6642 if (GET_MODE (x) == SFmode)
6643 {
e075ae69 6644 putc ('s', file);
2a2ab3f9
JVA
6645 return;
6646 }
6647 else
e075ae69 6648 putc ('l', file);
2a2ab3f9
JVA
6649 return;
6650
5f1ec3e6 6651 case 12:
2b589241 6652 case 16:
e075ae69
RH
6653 putc ('t', file);
6654 return;
5f1ec3e6 6655
2a2ab3f9
JVA
6656 case 8:
6657 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6658 {
6659#ifdef GAS_MNEMONICS
e075ae69 6660 putc ('q', file);
56c0e8fa 6661#else
e075ae69
RH
6662 putc ('l', file);
6663 putc ('l', file);
56c0e8fa
JVA
6664#endif
6665 }
e075ae69
RH
6666 else
6667 putc ('l', file);
2a2ab3f9 6668 return;
155d8a47
JW
6669
6670 default:
6671 abort ();
2a2ab3f9 6672 }
4af3895e
JVA
6673
6674 case 'b':
6675 case 'w':
6676 case 'k':
3f3f2124 6677 case 'q':
4af3895e
JVA
6678 case 'h':
6679 case 'y':
5cb6195d 6680 case 'X':
e075ae69 6681 case 'P':
4af3895e
JVA
6682 break;
6683
2d49677f
SC
6684 case 's':
6685 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6686 {
6687 PRINT_OPERAND (file, x, 0);
e075ae69 6688 putc (',', file);
2d49677f 6689 }
a269a03c
JC
6690 return;
6691
a46d1d38
JH
6692 case 'D':
6693 /* Little bit of braindamage here. The SSE compare instructions
6694 does use completely different names for the comparisons that the
6695 fp conditional moves. */
6696 switch (GET_CODE (x))
6697 {
6698 case EQ:
6699 case UNEQ:
6700 fputs ("eq", file);
6701 break;
6702 case LT:
6703 case UNLT:
6704 fputs ("lt", file);
6705 break;
6706 case LE:
6707 case UNLE:
6708 fputs ("le", file);
6709 break;
6710 case UNORDERED:
6711 fputs ("unord", file);
6712 break;
6713 case NE:
6714 case LTGT:
6715 fputs ("neq", file);
6716 break;
6717 case UNGE:
6718 case GE:
6719 fputs ("nlt", file);
6720 break;
6721 case UNGT:
6722 case GT:
6723 fputs ("nle", file);
6724 break;
6725 case ORDERED:
6726 fputs ("ord", file);
6727 break;
6728 default:
6729 abort ();
6730 break;
6731 }
6732 return;
048b1c95
JJ
6733 case 'O':
6734#ifdef CMOV_SUN_AS_SYNTAX
6735 if (ASSEMBLER_DIALECT == ASM_ATT)
6736 {
6737 switch (GET_MODE (x))
6738 {
6739 case HImode: putc ('w', file); break;
6740 case SImode:
6741 case SFmode: putc ('l', file); break;
6742 case DImode:
6743 case DFmode: putc ('q', file); break;
6744 default: abort ();
6745 }
6746 putc ('.', file);
6747 }
6748#endif
6749 return;
1853aadd 6750 case 'C':
e075ae69 6751 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6752 return;
fe25fea3 6753 case 'F':
048b1c95
JJ
6754#ifdef CMOV_SUN_AS_SYNTAX
6755 if (ASSEMBLER_DIALECT == ASM_ATT)
6756 putc ('.', file);
6757#endif
e075ae69 6758 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6759 return;
6760
e9a25f70 6761 /* Like above, but reverse condition */
e075ae69 6762 case 'c':
fce5a9f2 6763 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
6764 and not a condition code which needs to be reversed. */
6765 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6766 {
6767 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6768 return;
6769 }
e075ae69
RH
6770 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6771 return;
fe25fea3 6772 case 'f':
048b1c95
JJ
6773#ifdef CMOV_SUN_AS_SYNTAX
6774 if (ASSEMBLER_DIALECT == ASM_ATT)
6775 putc ('.', file);
6776#endif
e075ae69 6777 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6778 return;
ef6257cd
JH
6779 case '+':
6780 {
6781 rtx x;
e5cb57e8 6782
ef6257cd
JH
6783 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6784 return;
a4f31c00 6785
ef6257cd
JH
6786 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6787 if (x)
6788 {
6789 int pred_val = INTVAL (XEXP (x, 0));
6790
6791 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6792 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6793 {
6794 int taken = pred_val > REG_BR_PROB_BASE / 2;
6795 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6796
6797 /* Emit hints only in the case default branch prediction
6798 heruistics would fail. */
6799 if (taken != cputaken)
6800 {
6801 /* We use 3e (DS) prefix for taken branches and
6802 2e (CS) prefix for not taken branches. */
6803 if (taken)
6804 fputs ("ds ; ", file);
6805 else
6806 fputs ("cs ; ", file);
6807 }
6808 }
6809 }
6810 return;
6811 }
4af3895e 6812 default:
a52453cc 6813 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6814 }
6815 }
e9a25f70 6816
2a2ab3f9
JVA
6817 if (GET_CODE (x) == REG)
6818 {
6819 PRINT_REG (x, code, file);
6820 }
e9a25f70 6821
2a2ab3f9
JVA
6822 else if (GET_CODE (x) == MEM)
6823 {
e075ae69 6824 /* No `byte ptr' prefix for call instructions. */
80f33d06 6825 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6826 {
69ddee61 6827 const char * size;
e075ae69
RH
6828 switch (GET_MODE_SIZE (GET_MODE (x)))
6829 {
6830 case 1: size = "BYTE"; break;
6831 case 2: size = "WORD"; break;
6832 case 4: size = "DWORD"; break;
6833 case 8: size = "QWORD"; break;
6834 case 12: size = "XWORD"; break;
a7180f70 6835 case 16: size = "XMMWORD"; break;
e075ae69 6836 default:
564d80f4 6837 abort ();
e075ae69 6838 }
fb204271
DN
6839
6840 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6841 if (code == 'b')
6842 size = "BYTE";
6843 else if (code == 'w')
6844 size = "WORD";
6845 else if (code == 'k')
6846 size = "DWORD";
6847
e075ae69
RH
6848 fputs (size, file);
6849 fputs (" PTR ", file);
2a2ab3f9 6850 }
e075ae69
RH
6851
6852 x = XEXP (x, 0);
6853 if (flag_pic && CONSTANT_ADDRESS_P (x))
6854 output_pic_addr_const (file, x, code);
0d7d98ee 6855 /* Avoid (%rip) for call operands. */
5bf0ebab 6856 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6857 && GET_CODE (x) != CONST_INT)
6858 output_addr_const (file, x);
c8b94768
RH
6859 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6860 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6861 else
e075ae69 6862 output_address (x);
2a2ab3f9 6863 }
e9a25f70 6864
2a2ab3f9
JVA
6865 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6866 {
e9a25f70
JL
6867 REAL_VALUE_TYPE r;
6868 long l;
6869
5f1ec3e6
JVA
6870 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6871 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6872
80f33d06 6873 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6874 putc ('$', file);
52267fcb 6875 fprintf (file, "0x%lx", l);
5f1ec3e6 6876 }
e9a25f70 6877
0f290768 6878 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6879 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6880 {
e9a25f70
JL
6881 char dstr[30];
6882
da6eec72 6883 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6884 fprintf (file, "%s", dstr);
2a2ab3f9 6885 }
e9a25f70 6886
2b589241
JH
6887 else if (GET_CODE (x) == CONST_DOUBLE
6888 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6889 {
e9a25f70
JL
6890 char dstr[30];
6891
da6eec72 6892 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6893 fprintf (file, "%s", dstr);
2a2ab3f9 6894 }
f996902d 6895
79325812 6896 else
2a2ab3f9 6897 {
4af3895e 6898 if (code != 'P')
2a2ab3f9 6899 {
695dac07 6900 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6901 {
80f33d06 6902 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6903 putc ('$', file);
6904 }
2a2ab3f9
JVA
6905 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6906 || GET_CODE (x) == LABEL_REF)
e075ae69 6907 {
80f33d06 6908 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6909 putc ('$', file);
6910 else
6911 fputs ("OFFSET FLAT:", file);
6912 }
2a2ab3f9 6913 }
e075ae69
RH
6914 if (GET_CODE (x) == CONST_INT)
6915 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6916 else if (flag_pic)
2a2ab3f9
JVA
6917 output_pic_addr_const (file, x, code);
6918 else
6919 output_addr_const (file, x);
6920 }
6921}
6922\f
6923/* Print a memory operand whose address is ADDR. */
6924
6925void
6926print_operand_address (file, addr)
6927 FILE *file;
6928 register rtx addr;
6929{
e075ae69
RH
6930 struct ix86_address parts;
6931 rtx base, index, disp;
6932 int scale;
e9a25f70 6933
9e20be0c
JJ
6934 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6935 {
6936 if (ASSEMBLER_DIALECT == ASM_INTEL)
6937 fputs ("DWORD PTR ", file);
6938 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6939 putc ('%', file);
75d38379
JJ
6940 if (TARGET_64BIT)
6941 fputs ("fs:0", file);
6942 else
6943 fputs ("gs:0", file);
9e20be0c
JJ
6944 return;
6945 }
6946
e075ae69
RH
6947 if (! ix86_decompose_address (addr, &parts))
6948 abort ();
e9a25f70 6949
e075ae69
RH
6950 base = parts.base;
6951 index = parts.index;
6952 disp = parts.disp;
6953 scale = parts.scale;
e9a25f70 6954
e075ae69
RH
6955 if (!base && !index)
6956 {
6957 /* Displacement only requires special attention. */
e9a25f70 6958
e075ae69 6959 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6960 {
80f33d06 6961 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6962 {
6963 if (USER_LABEL_PREFIX[0] == 0)
6964 putc ('%', file);
6965 fputs ("ds:", file);
6966 }
e075ae69 6967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6968 }
e075ae69
RH
6969 else if (flag_pic)
6970 output_pic_addr_const (file, addr, 0);
6971 else
6972 output_addr_const (file, addr);
0d7d98ee
JH
6973
6974 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 6975 if (TARGET_64BIT
75d38379
JJ
6976 && ((GET_CODE (addr) == SYMBOL_REF
6977 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
edfe8595
RH
6978 || GET_CODE (addr) == LABEL_REF
6979 || (GET_CODE (addr) == CONST
6980 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
6981 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6982 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 6983 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6984 fputs ("(%rip)", file);
e075ae69
RH
6985 }
6986 else
6987 {
80f33d06 6988 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6989 {
e075ae69 6990 if (disp)
2a2ab3f9 6991 {
c399861d 6992 if (flag_pic)
e075ae69
RH
6993 output_pic_addr_const (file, disp, 0);
6994 else if (GET_CODE (disp) == LABEL_REF)
6995 output_asm_label (disp);
2a2ab3f9 6996 else
e075ae69 6997 output_addr_const (file, disp);
2a2ab3f9
JVA
6998 }
6999
e075ae69
RH
7000 putc ('(', file);
7001 if (base)
7002 PRINT_REG (base, 0, file);
7003 if (index)
2a2ab3f9 7004 {
e075ae69
RH
7005 putc (',', file);
7006 PRINT_REG (index, 0, file);
7007 if (scale != 1)
7008 fprintf (file, ",%d", scale);
2a2ab3f9 7009 }
e075ae69 7010 putc (')', file);
2a2ab3f9 7011 }
2a2ab3f9
JVA
7012 else
7013 {
e075ae69 7014 rtx offset = NULL_RTX;
e9a25f70 7015
e075ae69
RH
7016 if (disp)
7017 {
7018 /* Pull out the offset of a symbol; print any symbol itself. */
7019 if (GET_CODE (disp) == CONST
7020 && GET_CODE (XEXP (disp, 0)) == PLUS
7021 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7022 {
7023 offset = XEXP (XEXP (disp, 0), 1);
7024 disp = gen_rtx_CONST (VOIDmode,
7025 XEXP (XEXP (disp, 0), 0));
7026 }
ce193852 7027
e075ae69
RH
7028 if (flag_pic)
7029 output_pic_addr_const (file, disp, 0);
7030 else if (GET_CODE (disp) == LABEL_REF)
7031 output_asm_label (disp);
7032 else if (GET_CODE (disp) == CONST_INT)
7033 offset = disp;
7034 else
7035 output_addr_const (file, disp);
7036 }
e9a25f70 7037
e075ae69
RH
7038 putc ('[', file);
7039 if (base)
a8620236 7040 {
e075ae69
RH
7041 PRINT_REG (base, 0, file);
7042 if (offset)
7043 {
7044 if (INTVAL (offset) >= 0)
7045 putc ('+', file);
7046 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7047 }
a8620236 7048 }
e075ae69
RH
7049 else if (offset)
7050 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7051 else
e075ae69 7052 putc ('0', file);
e9a25f70 7053
e075ae69
RH
7054 if (index)
7055 {
7056 putc ('+', file);
7057 PRINT_REG (index, 0, file);
7058 if (scale != 1)
7059 fprintf (file, "*%d", scale);
7060 }
7061 putc (']', file);
7062 }
2a2ab3f9
JVA
7063 }
7064}
f996902d
RH
7065
7066bool
7067output_addr_const_extra (file, x)
7068 FILE *file;
7069 rtx x;
7070{
7071 rtx op;
7072
7073 if (GET_CODE (x) != UNSPEC)
7074 return false;
7075
7076 op = XVECEXP (x, 0, 0);
7077 switch (XINT (x, 1))
7078 {
7079 case UNSPEC_GOTTPOFF:
7080 output_addr_const (file, op);
dea73790 7081 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7082 fputs ("@GOTTPOFF", file);
7083 break;
7084 case UNSPEC_TPOFF:
7085 output_addr_const (file, op);
7086 fputs ("@TPOFF", file);
7087 break;
7088 case UNSPEC_NTPOFF:
7089 output_addr_const (file, op);
75d38379
JJ
7090 if (TARGET_64BIT)
7091 fputs ("@TPOFF", file);
7092 else
7093 fputs ("@NTPOFF", file);
f996902d
RH
7094 break;
7095 case UNSPEC_DTPOFF:
7096 output_addr_const (file, op);
7097 fputs ("@DTPOFF", file);
7098 break;
dea73790
JJ
7099 case UNSPEC_GOTNTPOFF:
7100 output_addr_const (file, op);
75d38379
JJ
7101 if (TARGET_64BIT)
7102 fputs ("@GOTTPOFF(%rip)", file);
7103 else
7104 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7105 break;
7106 case UNSPEC_INDNTPOFF:
7107 output_addr_const (file, op);
7108 fputs ("@INDNTPOFF", file);
7109 break;
f996902d
RH
7110
7111 default:
7112 return false;
7113 }
7114
7115 return true;
7116}
2a2ab3f9
JVA
7117\f
7118/* Split one or more DImode RTL references into pairs of SImode
7119 references. The RTL can be REG, offsettable MEM, integer constant, or
7120 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7121 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7122 that parallel "operands". */
2a2ab3f9
JVA
7123
7124void
7125split_di (operands, num, lo_half, hi_half)
7126 rtx operands[];
7127 int num;
7128 rtx lo_half[], hi_half[];
7129{
7130 while (num--)
7131 {
57dbca5e 7132 rtx op = operands[num];
b932f770
JH
7133
7134 /* simplify_subreg refuse to split volatile memory addresses,
7135 but we still have to handle it. */
7136 if (GET_CODE (op) == MEM)
2a2ab3f9 7137 {
f4ef873c 7138 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7139 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7140 }
7141 else
b932f770 7142 {
38ca929b
JH
7143 lo_half[num] = simplify_gen_subreg (SImode, op,
7144 GET_MODE (op) == VOIDmode
7145 ? DImode : GET_MODE (op), 0);
7146 hi_half[num] = simplify_gen_subreg (SImode, op,
7147 GET_MODE (op) == VOIDmode
7148 ? DImode : GET_MODE (op), 4);
b932f770 7149 }
2a2ab3f9
JVA
7150 }
7151}
44cf5b6a
JH
7152/* Split one or more TImode RTL references into pairs of SImode
7153 references. The RTL can be REG, offsettable MEM, integer constant, or
7154 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7155 split and "num" is its length. lo_half and hi_half are output arrays
7156 that parallel "operands". */
7157
7158void
7159split_ti (operands, num, lo_half, hi_half)
7160 rtx operands[];
7161 int num;
7162 rtx lo_half[], hi_half[];
7163{
7164 while (num--)
7165 {
7166 rtx op = operands[num];
b932f770
JH
7167
7168 /* simplify_subreg refuse to split volatile memory addresses, but we
7169 still have to handle it. */
7170 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7171 {
7172 lo_half[num] = adjust_address (op, DImode, 0);
7173 hi_half[num] = adjust_address (op, DImode, 8);
7174 }
7175 else
b932f770
JH
7176 {
7177 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7178 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7179 }
44cf5b6a
JH
7180 }
7181}
2a2ab3f9 7182\f
2a2ab3f9
JVA
7183/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7184 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7185 is the expression of the binary operation. The output may either be
7186 emitted here, or returned to the caller, like all output_* functions.
7187
7188 There is no guarantee that the operands are the same mode, as they
0f290768 7189 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7190
e3c2afab
AM
7191#ifndef SYSV386_COMPAT
7192/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7193 wants to fix the assemblers because that causes incompatibility
7194 with gcc. No-one wants to fix gcc because that causes
7195 incompatibility with assemblers... You can use the option of
7196 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7197#define SYSV386_COMPAT 1
7198#endif
7199
69ddee61 7200const char *
2a2ab3f9
JVA
7201output_387_binary_op (insn, operands)
7202 rtx insn;
7203 rtx *operands;
7204{
e3c2afab 7205 static char buf[30];
69ddee61 7206 const char *p;
1deaa899
JH
7207 const char *ssep;
7208 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7209
e3c2afab
AM
7210#ifdef ENABLE_CHECKING
7211 /* Even if we do not want to check the inputs, this documents input
7212 constraints. Which helps in understanding the following code. */
7213 if (STACK_REG_P (operands[0])
7214 && ((REG_P (operands[1])
7215 && REGNO (operands[0]) == REGNO (operands[1])
7216 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7217 || (REG_P (operands[2])
7218 && REGNO (operands[0]) == REGNO (operands[2])
7219 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7220 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7221 ; /* ok */
1deaa899 7222 else if (!is_sse)
e3c2afab
AM
7223 abort ();
7224#endif
7225
2a2ab3f9
JVA
7226 switch (GET_CODE (operands[3]))
7227 {
7228 case PLUS:
e075ae69
RH
7229 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7230 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7231 p = "fiadd";
7232 else
7233 p = "fadd";
1deaa899 7234 ssep = "add";
2a2ab3f9
JVA
7235 break;
7236
7237 case MINUS:
e075ae69
RH
7238 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7239 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7240 p = "fisub";
7241 else
7242 p = "fsub";
1deaa899 7243 ssep = "sub";
2a2ab3f9
JVA
7244 break;
7245
7246 case MULT:
e075ae69
RH
7247 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7248 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7249 p = "fimul";
7250 else
7251 p = "fmul";
1deaa899 7252 ssep = "mul";
2a2ab3f9
JVA
7253 break;
7254
7255 case DIV:
e075ae69
RH
7256 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7257 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7258 p = "fidiv";
7259 else
7260 p = "fdiv";
1deaa899 7261 ssep = "div";
2a2ab3f9
JVA
7262 break;
7263
7264 default:
7265 abort ();
7266 }
7267
1deaa899
JH
7268 if (is_sse)
7269 {
7270 strcpy (buf, ssep);
7271 if (GET_MODE (operands[0]) == SFmode)
7272 strcat (buf, "ss\t{%2, %0|%0, %2}");
7273 else
7274 strcat (buf, "sd\t{%2, %0|%0, %2}");
7275 return buf;
7276 }
e075ae69 7277 strcpy (buf, p);
2a2ab3f9
JVA
7278
7279 switch (GET_CODE (operands[3]))
7280 {
7281 case MULT:
7282 case PLUS:
7283 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7284 {
e3c2afab 7285 rtx temp = operands[2];
2a2ab3f9
JVA
7286 operands[2] = operands[1];
7287 operands[1] = temp;
7288 }
7289
e3c2afab
AM
7290 /* know operands[0] == operands[1]. */
7291
2a2ab3f9 7292 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7293 {
7294 p = "%z2\t%2";
7295 break;
7296 }
2a2ab3f9
JVA
7297
7298 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7299 {
7300 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7301 /* How is it that we are storing to a dead operand[2]?
7302 Well, presumably operands[1] is dead too. We can't
7303 store the result to st(0) as st(0) gets popped on this
7304 instruction. Instead store to operands[2] (which I
7305 think has to be st(1)). st(1) will be popped later.
7306 gcc <= 2.8.1 didn't have this check and generated
7307 assembly code that the Unixware assembler rejected. */
7308 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7309 else
e3c2afab 7310 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7311 break;
6b28fd63 7312 }
2a2ab3f9
JVA
7313
7314 if (STACK_TOP_P (operands[0]))
e3c2afab 7315 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7316 else
e3c2afab 7317 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7318 break;
2a2ab3f9
JVA
7319
7320 case MINUS:
7321 case DIV:
7322 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7323 {
7324 p = "r%z1\t%1";
7325 break;
7326 }
2a2ab3f9
JVA
7327
7328 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7329 {
7330 p = "%z2\t%2";
7331 break;
7332 }
2a2ab3f9 7333
2a2ab3f9 7334 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7335 {
e3c2afab
AM
7336#if SYSV386_COMPAT
7337 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7338 derived assemblers, confusingly reverse the direction of
7339 the operation for fsub{r} and fdiv{r} when the
7340 destination register is not st(0). The Intel assembler
7341 doesn't have this brain damage. Read !SYSV386_COMPAT to
7342 figure out what the hardware really does. */
7343 if (STACK_TOP_P (operands[0]))
7344 p = "{p\t%0, %2|rp\t%2, %0}";
7345 else
7346 p = "{rp\t%2, %0|p\t%0, %2}";
7347#else
6b28fd63 7348 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7349 /* As above for fmul/fadd, we can't store to st(0). */
7350 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7351 else
e3c2afab
AM
7352 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7353#endif
e075ae69 7354 break;
6b28fd63 7355 }
2a2ab3f9
JVA
7356
7357 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7358 {
e3c2afab 7359#if SYSV386_COMPAT
6b28fd63 7360 if (STACK_TOP_P (operands[0]))
e3c2afab 7361 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7362 else
e3c2afab
AM
7363 p = "{p\t%1, %0|rp\t%0, %1}";
7364#else
7365 if (STACK_TOP_P (operands[0]))
7366 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7367 else
7368 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7369#endif
e075ae69 7370 break;
6b28fd63 7371 }
2a2ab3f9
JVA
7372
7373 if (STACK_TOP_P (operands[0]))
7374 {
7375 if (STACK_TOP_P (operands[1]))
e3c2afab 7376 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7377 else
e3c2afab 7378 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7379 break;
2a2ab3f9
JVA
7380 }
7381 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7382 {
7383#if SYSV386_COMPAT
7384 p = "{\t%1, %0|r\t%0, %1}";
7385#else
7386 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7387#endif
7388 }
2a2ab3f9 7389 else
e3c2afab
AM
7390 {
7391#if SYSV386_COMPAT
7392 p = "{r\t%2, %0|\t%0, %2}";
7393#else
7394 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7395#endif
7396 }
e075ae69 7397 break;
2a2ab3f9
JVA
7398
7399 default:
7400 abort ();
7401 }
e075ae69
RH
7402
7403 strcat (buf, p);
7404 return buf;
2a2ab3f9 7405}
e075ae69 7406
a4f31c00 7407/* Output code to initialize control word copies used by
7a2e09f4
JH
7408 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7409 is set to control word rounding downwards. */
7410void
7411emit_i387_cw_initialization (normal, round_down)
7412 rtx normal, round_down;
7413{
7414 rtx reg = gen_reg_rtx (HImode);
7415
7416 emit_insn (gen_x86_fnstcw_1 (normal));
7417 emit_move_insn (reg, normal);
7418 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7419 && !TARGET_64BIT)
7420 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7421 else
7422 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7423 emit_move_insn (round_down, reg);
7424}
7425
2a2ab3f9 7426/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7427 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7428 operand may be [SDX]Fmode. */
2a2ab3f9 7429
69ddee61 7430const char *
2a2ab3f9
JVA
7431output_fix_trunc (insn, operands)
7432 rtx insn;
7433 rtx *operands;
7434{
7435 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7436 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7437
e075ae69
RH
7438 /* Jump through a hoop or two for DImode, since the hardware has no
7439 non-popping instruction. We used to do this a different way, but
7440 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7441 if (dimode_p && !stack_top_dies)
7442 output_asm_insn ("fld\t%y1", operands);
e075ae69 7443
7a2e09f4 7444 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7445 abort ();
7446
e075ae69 7447 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7448 abort ();
e9a25f70 7449
7a2e09f4 7450 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7451 if (stack_top_dies || dimode_p)
7a2e09f4 7452 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7453 else
7a2e09f4 7454 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7455 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7456
e075ae69 7457 return "";
2a2ab3f9 7458}
cda749b1 7459
e075ae69
RH
7460/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7461 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7462 when fucom should be used. */
7463
69ddee61 7464const char *
e075ae69 7465output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7466 rtx insn;
7467 rtx *operands;
e075ae69 7468 int eflags_p, unordered_p;
cda749b1 7469{
e075ae69
RH
7470 int stack_top_dies;
7471 rtx cmp_op0 = operands[0];
7472 rtx cmp_op1 = operands[1];
0644b628 7473 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7474
7475 if (eflags_p == 2)
7476 {
7477 cmp_op0 = cmp_op1;
7478 cmp_op1 = operands[2];
7479 }
0644b628
JH
7480 if (is_sse)
7481 {
7482 if (GET_MODE (operands[0]) == SFmode)
7483 if (unordered_p)
7484 return "ucomiss\t{%1, %0|%0, %1}";
7485 else
7486 return "comiss\t{%1, %0|%0, %y}";
7487 else
7488 if (unordered_p)
7489 return "ucomisd\t{%1, %0|%0, %1}";
7490 else
7491 return "comisd\t{%1, %0|%0, %y}";
7492 }
cda749b1 7493
e075ae69 7494 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7495 abort ();
7496
e075ae69 7497 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7498
e075ae69
RH
7499 if (STACK_REG_P (cmp_op1)
7500 && stack_top_dies
7501 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7502 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7503 {
e075ae69
RH
7504 /* If both the top of the 387 stack dies, and the other operand
7505 is also a stack register that dies, then this must be a
7506 `fcompp' float compare */
7507
7508 if (eflags_p == 1)
7509 {
7510 /* There is no double popping fcomi variant. Fortunately,
7511 eflags is immune from the fstp's cc clobbering. */
7512 if (unordered_p)
7513 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7514 else
7515 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7516 return "fstp\t%y0";
7517 }
7518 else
cda749b1 7519 {
e075ae69
RH
7520 if (eflags_p == 2)
7521 {
7522 if (unordered_p)
7523 return "fucompp\n\tfnstsw\t%0";
7524 else
7525 return "fcompp\n\tfnstsw\t%0";
7526 }
cda749b1
JW
7527 else
7528 {
e075ae69
RH
7529 if (unordered_p)
7530 return "fucompp";
7531 else
7532 return "fcompp";
cda749b1
JW
7533 }
7534 }
cda749b1
JW
7535 }
7536 else
7537 {
e075ae69 7538 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7539
0f290768 7540 static const char * const alt[24] =
e075ae69
RH
7541 {
7542 "fcom%z1\t%y1",
7543 "fcomp%z1\t%y1",
7544 "fucom%z1\t%y1",
7545 "fucomp%z1\t%y1",
0f290768 7546
e075ae69
RH
7547 "ficom%z1\t%y1",
7548 "ficomp%z1\t%y1",
7549 NULL,
7550 NULL,
7551
7552 "fcomi\t{%y1, %0|%0, %y1}",
7553 "fcomip\t{%y1, %0|%0, %y1}",
7554 "fucomi\t{%y1, %0|%0, %y1}",
7555 "fucomip\t{%y1, %0|%0, %y1}",
7556
7557 NULL,
7558 NULL,
7559 NULL,
7560 NULL,
7561
7562 "fcom%z2\t%y2\n\tfnstsw\t%0",
7563 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7564 "fucom%z2\t%y2\n\tfnstsw\t%0",
7565 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7566
e075ae69
RH
7567 "ficom%z2\t%y2\n\tfnstsw\t%0",
7568 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7569 NULL,
7570 NULL
7571 };
7572
7573 int mask;
69ddee61 7574 const char *ret;
e075ae69
RH
7575
7576 mask = eflags_p << 3;
7577 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7578 mask |= unordered_p << 1;
7579 mask |= stack_top_dies;
7580
7581 if (mask >= 24)
7582 abort ();
7583 ret = alt[mask];
7584 if (ret == NULL)
7585 abort ();
cda749b1 7586
e075ae69 7587 return ret;
cda749b1
JW
7588 }
7589}
2a2ab3f9 7590
f88c65f7
RH
7591void
7592ix86_output_addr_vec_elt (file, value)
7593 FILE *file;
7594 int value;
7595{
7596 const char *directive = ASM_LONG;
7597
7598 if (TARGET_64BIT)
7599 {
7600#ifdef ASM_QUAD
7601 directive = ASM_QUAD;
7602#else
7603 abort ();
7604#endif
7605 }
7606
7607 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7608}
7609
7610void
7611ix86_output_addr_diff_elt (file, value, rel)
7612 FILE *file;
7613 int value, rel;
7614{
7615 if (TARGET_64BIT)
74411039 7616 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7617 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7618 else if (HAVE_AS_GOTOFF_IN_DATA)
7619 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7620#if TARGET_MACHO
7621 else if (TARGET_MACHO)
7622 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7623 machopic_function_base_name () + 1);
7624#endif
f88c65f7 7625 else
5fc0e5df
KW
7626 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7627 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7628}
32b5b1aa 7629\f
a8bac9ab
RH
7630/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7631 for the target. */
7632
7633void
7634ix86_expand_clear (dest)
7635 rtx dest;
7636{
7637 rtx tmp;
7638
7639 /* We play register width games, which are only valid after reload. */
7640 if (!reload_completed)
7641 abort ();
7642
7643 /* Avoid HImode and its attendant prefix byte. */
7644 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7645 dest = gen_rtx_REG (SImode, REGNO (dest));
7646
7647 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7648
7649 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7650 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7651 {
7652 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7653 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7654 }
7655
7656 emit_insn (tmp);
7657}
7658
f996902d
RH
7659/* X is an unchanging MEM. If it is a constant pool reference, return
7660 the constant pool rtx, else NULL. */
7661
7662static rtx
7663maybe_get_pool_constant (x)
7664 rtx x;
7665{
7666 x = XEXP (x, 0);
7667
75d38379 7668 if (flag_pic && ! TARGET_64BIT)
f996902d
RH
7669 {
7670 if (GET_CODE (x) != PLUS)
7671 return NULL_RTX;
7672 if (XEXP (x, 0) != pic_offset_table_rtx)
7673 return NULL_RTX;
7674 x = XEXP (x, 1);
7675 if (GET_CODE (x) != CONST)
7676 return NULL_RTX;
7677 x = XEXP (x, 0);
7678 if (GET_CODE (x) != UNSPEC)
7679 return NULL_RTX;
7680 if (XINT (x, 1) != UNSPEC_GOTOFF)
7681 return NULL_RTX;
7682 x = XVECEXP (x, 0, 0);
7683 }
7684
7685 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7686 return get_pool_constant (x);
7687
7688 return NULL_RTX;
7689}
7690
79325812 7691void
e075ae69
RH
7692ix86_expand_move (mode, operands)
7693 enum machine_mode mode;
7694 rtx operands[];
32b5b1aa 7695{
e075ae69 7696 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7697 rtx insn, op0, op1, tmp;
7698
7699 op0 = operands[0];
7700 op1 = operands[1];
7701
7702 /* ??? We have a slight problem. We need to say that tls symbols are
7703 not legitimate constants so that reload does not helpfully reload
7704 these constants from a REG_EQUIV, which we cannot handle. (Recall
7705 that general- and local-dynamic address resolution requires a
7706 function call.)
e9a25f70 7707
f996902d
RH
7708 However, if we say that tls symbols are not legitimate constants,
7709 then emit_move_insn helpfully drop them into the constant pool.
7710
7711 It is far easier to work around emit_move_insn than reload. Recognize
7712 the MEM that we would have created and extract the symbol_ref. */
7713
7714 if (mode == Pmode
7715 && GET_CODE (op1) == MEM
7716 && RTX_UNCHANGING_P (op1))
32b5b1aa 7717 {
f996902d
RH
7718 tmp = maybe_get_pool_constant (op1);
7719 /* Note that we only care about symbolic constants here, which
7720 unlike CONST_INT will always have a proper mode. */
7721 if (tmp && GET_MODE (tmp) == Pmode)
7722 op1 = tmp;
7723 }
e9a25f70 7724
f996902d
RH
7725 if (tls_symbolic_operand (op1, Pmode))
7726 {
7727 op1 = legitimize_address (op1, op1, VOIDmode);
7728 if (GET_CODE (op0) == MEM)
7729 {
7730 tmp = gen_reg_rtx (mode);
7731 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7732 op1 = tmp;
7733 }
7734 }
7735 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7736 {
b069de3b
SS
7737#if TARGET_MACHO
7738 if (MACHOPIC_PURE)
7739 {
7740 rtx temp = ((reload_in_progress
7741 || ((op0 && GET_CODE (op0) == REG)
7742 && mode == Pmode))
7743 ? op0 : gen_reg_rtx (Pmode));
7744 op1 = machopic_indirect_data_reference (op1, temp);
7745 op1 = machopic_legitimize_pic_address (op1, mode,
7746 temp == op1 ? 0 : temp);
7747 }
7748 else
7749 {
7750 if (MACHOPIC_INDIRECT)
7751 op1 = machopic_indirect_data_reference (op1, 0);
7752 }
7753 if (op0 != op1)
7754 {
7755 insn = gen_rtx_SET (VOIDmode, op0, op1);
7756 emit_insn (insn);
7757 }
7758 return;
7759#endif /* TARGET_MACHO */
f996902d
RH
7760 if (GET_CODE (op0) == MEM)
7761 op1 = force_reg (Pmode, op1);
e075ae69 7762 else
32b5b1aa 7763 {
f996902d 7764 rtx temp = op0;
e075ae69
RH
7765 if (GET_CODE (temp) != REG)
7766 temp = gen_reg_rtx (Pmode);
f996902d
RH
7767 temp = legitimize_pic_address (op1, temp);
7768 if (temp == op0)
e075ae69 7769 return;
f996902d 7770 op1 = temp;
32b5b1aa 7771 }
e075ae69
RH
7772 }
7773 else
7774 {
f996902d 7775 if (GET_CODE (op0) == MEM
44cf5b6a 7776 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7777 || !push_operand (op0, mode))
7778 && GET_CODE (op1) == MEM)
7779 op1 = force_reg (mode, op1);
e9a25f70 7780
f996902d
RH
7781 if (push_operand (op0, mode)
7782 && ! general_no_elim_operand (op1, mode))
7783 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7784
44cf5b6a
JH
7785 /* Force large constants in 64bit compilation into register
7786 to get them CSEed. */
7787 if (TARGET_64BIT && mode == DImode
f996902d
RH
7788 && immediate_operand (op1, mode)
7789 && !x86_64_zero_extended_value (op1)
7790 && !register_operand (op0, mode)
44cf5b6a 7791 && optimize && !reload_completed && !reload_in_progress)
f996902d 7792 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7793
e075ae69 7794 if (FLOAT_MODE_P (mode))
32b5b1aa 7795 {
d7a29404
JH
7796 /* If we are loading a floating point constant to a register,
7797 force the value to memory now, since we'll get better code
7798 out the back end. */
e075ae69
RH
7799
7800 if (strict)
7801 ;
f996902d
RH
7802 else if (GET_CODE (op1) == CONST_DOUBLE
7803 && register_operand (op0, mode))
7804 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7805 }
32b5b1aa 7806 }
e9a25f70 7807
f996902d 7808 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7809
e075ae69
RH
7810 emit_insn (insn);
7811}
e9a25f70 7812
e37af218
RH
7813void
7814ix86_expand_vector_move (mode, operands)
7815 enum machine_mode mode;
7816 rtx operands[];
7817{
7818 /* Force constants other than zero into memory. We do not know how
7819 the instructions used to build constants modify the upper 64 bits
7820 of the register, once we have that information we may be able
7821 to handle some of them more efficiently. */
7822 if ((reload_in_progress | reload_completed) == 0
7823 && register_operand (operands[0], mode)
7824 && CONSTANT_P (operands[1]))
f8ca7923 7825 operands[1] = force_const_mem (mode, operands[1]);
e37af218
RH
7826
7827 /* Make operand1 a register if it isn't already. */
f8ca7923 7828 if (!no_new_pseudos
e37af218 7829 && !register_operand (operands[0], mode)
b105d6da 7830 && !register_operand (operands[1], mode))
e37af218 7831 {
59bef189 7832 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7833 emit_move_insn (operands[0], temp);
7834 return;
7835 }
7836
7837 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7838}
e37af218 7839
e075ae69
RH
7840/* Attempt to expand a binary operator. Make the expansion closer to the
7841 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7842 memory references (one output, two input) in a single insn. */
e9a25f70 7843
e075ae69
RH
7844void
7845ix86_expand_binary_operator (code, mode, operands)
7846 enum rtx_code code;
7847 enum machine_mode mode;
7848 rtx operands[];
7849{
7850 int matching_memory;
7851 rtx src1, src2, dst, op, clob;
7852
7853 dst = operands[0];
7854 src1 = operands[1];
7855 src2 = operands[2];
7856
7857 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7858 if (GET_RTX_CLASS (code) == 'c'
7859 && (rtx_equal_p (dst, src2)
7860 || immediate_operand (src1, mode)))
7861 {
7862 rtx temp = src1;
7863 src1 = src2;
7864 src2 = temp;
32b5b1aa 7865 }
e9a25f70 7866
e075ae69
RH
7867 /* If the destination is memory, and we do not have matching source
7868 operands, do things in registers. */
7869 matching_memory = 0;
7870 if (GET_CODE (dst) == MEM)
32b5b1aa 7871 {
e075ae69
RH
7872 if (rtx_equal_p (dst, src1))
7873 matching_memory = 1;
7874 else if (GET_RTX_CLASS (code) == 'c'
7875 && rtx_equal_p (dst, src2))
7876 matching_memory = 2;
7877 else
7878 dst = gen_reg_rtx (mode);
7879 }
0f290768 7880
e075ae69
RH
7881 /* Both source operands cannot be in memory. */
7882 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7883 {
7884 if (matching_memory != 2)
7885 src2 = force_reg (mode, src2);
7886 else
7887 src1 = force_reg (mode, src1);
32b5b1aa 7888 }
e9a25f70 7889
06a964de
JH
7890 /* If the operation is not commutable, source 1 cannot be a constant
7891 or non-matching memory. */
0f290768 7892 if ((CONSTANT_P (src1)
06a964de
JH
7893 || (!matching_memory && GET_CODE (src1) == MEM))
7894 && GET_RTX_CLASS (code) != 'c')
e075ae69 7895 src1 = force_reg (mode, src1);
0f290768 7896
e075ae69 7897 /* If optimizing, copy to regs to improve CSE */
fe577e58 7898 if (optimize && ! no_new_pseudos)
32b5b1aa 7899 {
e075ae69
RH
7900 if (GET_CODE (dst) == MEM)
7901 dst = gen_reg_rtx (mode);
7902 if (GET_CODE (src1) == MEM)
7903 src1 = force_reg (mode, src1);
7904 if (GET_CODE (src2) == MEM)
7905 src2 = force_reg (mode, src2);
32b5b1aa 7906 }
e9a25f70 7907
e075ae69
RH
7908 /* Emit the instruction. */
7909
7910 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7911 if (reload_in_progress)
7912 {
7913 /* Reload doesn't know about the flags register, and doesn't know that
7914 it doesn't want to clobber it. We can only do this with PLUS. */
7915 if (code != PLUS)
7916 abort ();
7917 emit_insn (op);
7918 }
7919 else
32b5b1aa 7920 {
e075ae69
RH
7921 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7922 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7923 }
e9a25f70 7924
e075ae69
RH
7925 /* Fix up the destination if needed. */
7926 if (dst != operands[0])
7927 emit_move_insn (operands[0], dst);
7928}
7929
7930/* Return TRUE or FALSE depending on whether the binary operator meets the
7931 appropriate constraints. */
7932
7933int
7934ix86_binary_operator_ok (code, mode, operands)
7935 enum rtx_code code;
7936 enum machine_mode mode ATTRIBUTE_UNUSED;
7937 rtx operands[3];
7938{
7939 /* Both source operands cannot be in memory. */
7940 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7941 return 0;
7942 /* If the operation is not commutable, source 1 cannot be a constant. */
7943 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7944 return 0;
7945 /* If the destination is memory, we must have a matching source operand. */
7946 if (GET_CODE (operands[0]) == MEM
7947 && ! (rtx_equal_p (operands[0], operands[1])
7948 || (GET_RTX_CLASS (code) == 'c'
7949 && rtx_equal_p (operands[0], operands[2]))))
7950 return 0;
06a964de 7951 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7952 have a matching destination. */
06a964de
JH
7953 if (GET_CODE (operands[1]) == MEM
7954 && GET_RTX_CLASS (code) != 'c'
7955 && ! rtx_equal_p (operands[0], operands[1]))
7956 return 0;
e075ae69
RH
7957 return 1;
7958}
7959
7960/* Attempt to expand a unary operator. Make the expansion closer to the
7961 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7962 memory references (one output, one input) in a single insn. */
e075ae69 7963
9d81fc27 7964void
e075ae69
RH
7965ix86_expand_unary_operator (code, mode, operands)
7966 enum rtx_code code;
7967 enum machine_mode mode;
7968 rtx operands[];
7969{
06a964de
JH
7970 int matching_memory;
7971 rtx src, dst, op, clob;
7972
7973 dst = operands[0];
7974 src = operands[1];
e075ae69 7975
06a964de
JH
7976 /* If the destination is memory, and we do not have matching source
7977 operands, do things in registers. */
7978 matching_memory = 0;
7979 if (GET_CODE (dst) == MEM)
32b5b1aa 7980 {
06a964de
JH
7981 if (rtx_equal_p (dst, src))
7982 matching_memory = 1;
e075ae69 7983 else
06a964de 7984 dst = gen_reg_rtx (mode);
32b5b1aa 7985 }
e9a25f70 7986
06a964de
JH
7987 /* When source operand is memory, destination must match. */
7988 if (!matching_memory && GET_CODE (src) == MEM)
7989 src = force_reg (mode, src);
0f290768 7990
06a964de 7991 /* If optimizing, copy to regs to improve CSE */
fe577e58 7992 if (optimize && ! no_new_pseudos)
06a964de
JH
7993 {
7994 if (GET_CODE (dst) == MEM)
7995 dst = gen_reg_rtx (mode);
7996 if (GET_CODE (src) == MEM)
7997 src = force_reg (mode, src);
7998 }
7999
8000 /* Emit the instruction. */
8001
8002 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8003 if (reload_in_progress || code == NOT)
8004 {
8005 /* Reload doesn't know about the flags register, and doesn't know that
8006 it doesn't want to clobber it. */
8007 if (code != NOT)
8008 abort ();
8009 emit_insn (op);
8010 }
8011 else
8012 {
8013 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8014 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8015 }
8016
8017 /* Fix up the destination if needed. */
8018 if (dst != operands[0])
8019 emit_move_insn (operands[0], dst);
e075ae69
RH
8020}
8021
8022/* Return TRUE or FALSE depending on whether the unary operator meets the
8023 appropriate constraints. */
8024
8025int
8026ix86_unary_operator_ok (code, mode, operands)
8027 enum rtx_code code ATTRIBUTE_UNUSED;
8028 enum machine_mode mode ATTRIBUTE_UNUSED;
8029 rtx operands[2] ATTRIBUTE_UNUSED;
8030{
06a964de
JH
8031 /* If one of operands is memory, source and destination must match. */
8032 if ((GET_CODE (operands[0]) == MEM
8033 || GET_CODE (operands[1]) == MEM)
8034 && ! rtx_equal_p (operands[0], operands[1]))
8035 return FALSE;
e075ae69
RH
8036 return TRUE;
8037}
8038
16189740
RH
8039/* Return TRUE or FALSE depending on whether the first SET in INSN
8040 has source and destination with matching CC modes, and that the
8041 CC mode is at least as constrained as REQ_MODE. */
8042
8043int
8044ix86_match_ccmode (insn, req_mode)
8045 rtx insn;
8046 enum machine_mode req_mode;
8047{
8048 rtx set;
8049 enum machine_mode set_mode;
8050
8051 set = PATTERN (insn);
8052 if (GET_CODE (set) == PARALLEL)
8053 set = XVECEXP (set, 0, 0);
8054 if (GET_CODE (set) != SET)
8055 abort ();
9076b9c1
JH
8056 if (GET_CODE (SET_SRC (set)) != COMPARE)
8057 abort ();
16189740
RH
8058
8059 set_mode = GET_MODE (SET_DEST (set));
8060 switch (set_mode)
8061 {
9076b9c1
JH
8062 case CCNOmode:
8063 if (req_mode != CCNOmode
8064 && (req_mode != CCmode
8065 || XEXP (SET_SRC (set), 1) != const0_rtx))
8066 return 0;
8067 break;
16189740 8068 case CCmode:
9076b9c1 8069 if (req_mode == CCGCmode)
16189740
RH
8070 return 0;
8071 /* FALLTHRU */
9076b9c1
JH
8072 case CCGCmode:
8073 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8074 return 0;
8075 /* FALLTHRU */
8076 case CCGOCmode:
16189740
RH
8077 if (req_mode == CCZmode)
8078 return 0;
8079 /* FALLTHRU */
8080 case CCZmode:
8081 break;
8082
8083 default:
8084 abort ();
8085 }
8086
8087 return (GET_MODE (SET_SRC (set)) == set_mode);
8088}
8089
e075ae69
RH
8090/* Generate insn patterns to do an integer compare of OPERANDS. */
8091
8092static rtx
8093ix86_expand_int_compare (code, op0, op1)
8094 enum rtx_code code;
8095 rtx op0, op1;
8096{
8097 enum machine_mode cmpmode;
8098 rtx tmp, flags;
8099
8100 cmpmode = SELECT_CC_MODE (code, op0, op1);
8101 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8102
8103 /* This is very simple, but making the interface the same as in the
8104 FP case makes the rest of the code easier. */
8105 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8106 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8107
8108 /* Return the test that should be put into the flags user, i.e.
8109 the bcc, scc, or cmov instruction. */
8110 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8111}
8112
3a3677ff
RH
8113/* Figure out whether to use ordered or unordered fp comparisons.
8114 Return the appropriate mode to use. */
e075ae69 8115
b1cdafbb 8116enum machine_mode
3a3677ff 8117ix86_fp_compare_mode (code)
8752c357 8118 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 8119{
9e7adcb3
JH
8120 /* ??? In order to make all comparisons reversible, we do all comparisons
8121 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8122 all forms trapping and nontrapping comparisons, we can make inequality
8123 comparisons trapping again, since it results in better code when using
8124 FCOM based compares. */
8125 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8126}
8127
9076b9c1
JH
8128enum machine_mode
8129ix86_cc_mode (code, op0, op1)
8130 enum rtx_code code;
8131 rtx op0, op1;
8132{
8133 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8134 return ix86_fp_compare_mode (code);
8135 switch (code)
8136 {
8137 /* Only zero flag is needed. */
8138 case EQ: /* ZF=0 */
8139 case NE: /* ZF!=0 */
8140 return CCZmode;
8141 /* Codes needing carry flag. */
265dab10
JH
8142 case GEU: /* CF=0 */
8143 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8144 case LTU: /* CF=1 */
8145 case LEU: /* CF=1 | ZF=1 */
265dab10 8146 return CCmode;
9076b9c1
JH
8147 /* Codes possibly doable only with sign flag when
8148 comparing against zero. */
8149 case GE: /* SF=OF or SF=0 */
7e08e190 8150 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8151 if (op1 == const0_rtx)
8152 return CCGOCmode;
8153 else
8154 /* For other cases Carry flag is not required. */
8155 return CCGCmode;
8156 /* Codes doable only with sign flag when comparing
8157 against zero, but we miss jump instruction for it
8158 so we need to use relational tests agains overflow
8159 that thus needs to be zero. */
8160 case GT: /* ZF=0 & SF=OF */
8161 case LE: /* ZF=1 | SF<>OF */
8162 if (op1 == const0_rtx)
8163 return CCNOmode;
8164 else
8165 return CCGCmode;
7fcd7218
JH
8166 /* strcmp pattern do (use flags) and combine may ask us for proper
8167 mode. */
8168 case USE:
8169 return CCmode;
9076b9c1 8170 default:
0f290768 8171 abort ();
9076b9c1
JH
8172 }
8173}
8174
3a3677ff
RH
8175/* Return true if we should use an FCOMI instruction for this fp comparison. */
8176
a940d8bd 8177int
3a3677ff 8178ix86_use_fcomi_compare (code)
9e7adcb3 8179 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8180{
9e7adcb3
JH
8181 enum rtx_code swapped_code = swap_condition (code);
8182 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8183 || (ix86_fp_comparison_cost (swapped_code)
8184 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8185}
8186
0f290768 8187/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
8188 to a fp comparison. The operands are updated in place; the new
8189 comparsion code is returned. */
8190
8191static enum rtx_code
8192ix86_prepare_fp_compare_args (code, pop0, pop1)
8193 enum rtx_code code;
8194 rtx *pop0, *pop1;
8195{
8196 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8197 rtx op0 = *pop0, op1 = *pop1;
8198 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8199 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8200
e075ae69 8201 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8202 The same is true of the XFmode compare instructions. The same is
8203 true of the fcomi compare instructions. */
8204
0644b628
JH
8205 if (!is_sse
8206 && (fpcmp_mode == CCFPUmode
8207 || op_mode == XFmode
8208 || op_mode == TFmode
8209 || ix86_use_fcomi_compare (code)))
e075ae69 8210 {
3a3677ff
RH
8211 op0 = force_reg (op_mode, op0);
8212 op1 = force_reg (op_mode, op1);
e075ae69
RH
8213 }
8214 else
8215 {
8216 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8217 things around if they appear profitable, otherwise force op0
8218 into a register. */
8219
8220 if (standard_80387_constant_p (op0) == 0
8221 || (GET_CODE (op0) == MEM
8222 && ! (standard_80387_constant_p (op1) == 0
8223 || GET_CODE (op1) == MEM)))
32b5b1aa 8224 {
e075ae69
RH
8225 rtx tmp;
8226 tmp = op0, op0 = op1, op1 = tmp;
8227 code = swap_condition (code);
8228 }
8229
8230 if (GET_CODE (op0) != REG)
3a3677ff 8231 op0 = force_reg (op_mode, op0);
e075ae69
RH
8232
8233 if (CONSTANT_P (op1))
8234 {
8235 if (standard_80387_constant_p (op1))
3a3677ff 8236 op1 = force_reg (op_mode, op1);
e075ae69 8237 else
3a3677ff 8238 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8239 }
8240 }
e9a25f70 8241
9e7adcb3
JH
8242 /* Try to rearrange the comparison to make it cheaper. */
8243 if (ix86_fp_comparison_cost (code)
8244 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8245 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8246 {
8247 rtx tmp;
8248 tmp = op0, op0 = op1, op1 = tmp;
8249 code = swap_condition (code);
8250 if (GET_CODE (op0) != REG)
8251 op0 = force_reg (op_mode, op0);
8252 }
8253
3a3677ff
RH
8254 *pop0 = op0;
8255 *pop1 = op1;
8256 return code;
8257}
8258
c0c102a9
JH
8259/* Convert comparison codes we use to represent FP comparison to integer
8260 code that will result in proper branch. Return UNKNOWN if no such code
8261 is available. */
8262static enum rtx_code
8263ix86_fp_compare_code_to_integer (code)
8264 enum rtx_code code;
8265{
8266 switch (code)
8267 {
8268 case GT:
8269 return GTU;
8270 case GE:
8271 return GEU;
8272 case ORDERED:
8273 case UNORDERED:
8274 return code;
8275 break;
8276 case UNEQ:
8277 return EQ;
8278 break;
8279 case UNLT:
8280 return LTU;
8281 break;
8282 case UNLE:
8283 return LEU;
8284 break;
8285 case LTGT:
8286 return NE;
8287 break;
8288 default:
8289 return UNKNOWN;
8290 }
8291}
8292
8293/* Split comparison code CODE into comparisons we can do using branch
8294 instructions. BYPASS_CODE is comparison code for branch that will
8295 branch around FIRST_CODE and SECOND_CODE. If some of branches
8296 is not required, set value to NIL.
8297 We never require more than two branches. */
8298static void
8299ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8300 enum rtx_code code, *bypass_code, *first_code, *second_code;
8301{
8302 *first_code = code;
8303 *bypass_code = NIL;
8304 *second_code = NIL;
8305
8306 /* The fcomi comparison sets flags as follows:
8307
8308 cmp ZF PF CF
8309 > 0 0 0
8310 < 0 0 1
8311 = 1 0 0
8312 un 1 1 1 */
8313
8314 switch (code)
8315 {
8316 case GT: /* GTU - CF=0 & ZF=0 */
8317 case GE: /* GEU - CF=0 */
8318 case ORDERED: /* PF=0 */
8319 case UNORDERED: /* PF=1 */
8320 case UNEQ: /* EQ - ZF=1 */
8321 case UNLT: /* LTU - CF=1 */
8322 case UNLE: /* LEU - CF=1 | ZF=1 */
8323 case LTGT: /* EQ - ZF=0 */
8324 break;
8325 case LT: /* LTU - CF=1 - fails on unordered */
8326 *first_code = UNLT;
8327 *bypass_code = UNORDERED;
8328 break;
8329 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8330 *first_code = UNLE;
8331 *bypass_code = UNORDERED;
8332 break;
8333 case EQ: /* EQ - ZF=1 - fails on unordered */
8334 *first_code = UNEQ;
8335 *bypass_code = UNORDERED;
8336 break;
8337 case NE: /* NE - ZF=0 - fails on unordered */
8338 *first_code = LTGT;
8339 *second_code = UNORDERED;
8340 break;
8341 case UNGE: /* GEU - CF=0 - fails on unordered */
8342 *first_code = GE;
8343 *second_code = UNORDERED;
8344 break;
8345 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8346 *first_code = GT;
8347 *second_code = UNORDERED;
8348 break;
8349 default:
8350 abort ();
8351 }
8352 if (!TARGET_IEEE_FP)
8353 {
8354 *second_code = NIL;
8355 *bypass_code = NIL;
8356 }
8357}
8358
9e7adcb3
JH
8359/* Return cost of comparison done fcom + arithmetics operations on AX.
8360 All following functions do use number of instructions as an cost metrics.
8361 In future this should be tweaked to compute bytes for optimize_size and
8362 take into account performance of various instructions on various CPUs. */
8363static int
8364ix86_fp_comparison_arithmetics_cost (code)
8365 enum rtx_code code;
8366{
8367 if (!TARGET_IEEE_FP)
8368 return 4;
8369 /* The cost of code output by ix86_expand_fp_compare. */
8370 switch (code)
8371 {
8372 case UNLE:
8373 case UNLT:
8374 case LTGT:
8375 case GT:
8376 case GE:
8377 case UNORDERED:
8378 case ORDERED:
8379 case UNEQ:
8380 return 4;
8381 break;
8382 case LT:
8383 case NE:
8384 case EQ:
8385 case UNGE:
8386 return 5;
8387 break;
8388 case LE:
8389 case UNGT:
8390 return 6;
8391 break;
8392 default:
8393 abort ();
8394 }
8395}
8396
8397/* Return cost of comparison done using fcomi operation.
8398 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8399static int
8400ix86_fp_comparison_fcomi_cost (code)
8401 enum rtx_code code;
8402{
8403 enum rtx_code bypass_code, first_code, second_code;
8404 /* Return arbitarily high cost when instruction is not supported - this
8405 prevents gcc from using it. */
8406 if (!TARGET_CMOVE)
8407 return 1024;
8408 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8409 return (bypass_code != NIL || second_code != NIL) + 2;
8410}
8411
8412/* Return cost of comparison done using sahf operation.
8413 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8414static int
8415ix86_fp_comparison_sahf_cost (code)
8416 enum rtx_code code;
8417{
8418 enum rtx_code bypass_code, first_code, second_code;
8419 /* Return arbitarily high cost when instruction is not preferred - this
8420 avoids gcc from using it. */
8421 if (!TARGET_USE_SAHF && !optimize_size)
8422 return 1024;
8423 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8424 return (bypass_code != NIL || second_code != NIL) + 3;
8425}
8426
8427/* Compute cost of the comparison done using any method.
8428 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8429static int
8430ix86_fp_comparison_cost (code)
8431 enum rtx_code code;
8432{
8433 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8434 int min;
8435
8436 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8437 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8438
8439 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8440 if (min > sahf_cost)
8441 min = sahf_cost;
8442 if (min > fcomi_cost)
8443 min = fcomi_cost;
8444 return min;
8445}
c0c102a9 8446
3a3677ff
RH
8447/* Generate insn patterns to do a floating point compare of OPERANDS. */
8448
9e7adcb3
JH
8449static rtx
8450ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8451 enum rtx_code code;
8452 rtx op0, op1, scratch;
9e7adcb3
JH
8453 rtx *second_test;
8454 rtx *bypass_test;
3a3677ff
RH
8455{
8456 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8457 rtx tmp, tmp2;
9e7adcb3 8458 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8459 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8460
8461 fpcmp_mode = ix86_fp_compare_mode (code);
8462 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8463
9e7adcb3
JH
8464 if (second_test)
8465 *second_test = NULL_RTX;
8466 if (bypass_test)
8467 *bypass_test = NULL_RTX;
8468
c0c102a9
JH
8469 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8470
9e7adcb3
JH
8471 /* Do fcomi/sahf based test when profitable. */
8472 if ((bypass_code == NIL || bypass_test)
8473 && (second_code == NIL || second_test)
8474 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8475 {
c0c102a9
JH
8476 if (TARGET_CMOVE)
8477 {
8478 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8479 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8480 tmp);
8481 emit_insn (tmp);
8482 }
8483 else
8484 {
8485 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8486 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8487 if (!scratch)
8488 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8489 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8490 emit_insn (gen_x86_sahf_1 (scratch));
8491 }
e075ae69
RH
8492
8493 /* The FP codes work out to act like unsigned. */
9a915772 8494 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8495 code = first_code;
8496 if (bypass_code != NIL)
8497 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8498 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8499 const0_rtx);
8500 if (second_code != NIL)
8501 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8502 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8503 const0_rtx);
e075ae69
RH
8504 }
8505 else
8506 {
8507 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8508 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8509 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8510 if (!scratch)
8511 scratch = gen_reg_rtx (HImode);
3a3677ff 8512 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8513
9a915772
JH
8514 /* In the unordered case, we have to check C2 for NaN's, which
8515 doesn't happen to work out to anything nice combination-wise.
8516 So do some bit twiddling on the value we've got in AH to come
8517 up with an appropriate set of condition codes. */
e075ae69 8518
9a915772
JH
8519 intcmp_mode = CCNOmode;
8520 switch (code)
32b5b1aa 8521 {
9a915772
JH
8522 case GT:
8523 case UNGT:
8524 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8525 {
3a3677ff 8526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8527 code = EQ;
9a915772
JH
8528 }
8529 else
8530 {
8531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8532 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8533 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8534 intcmp_mode = CCmode;
8535 code = GEU;
8536 }
8537 break;
8538 case LT:
8539 case UNLT:
8540 if (code == LT && TARGET_IEEE_FP)
8541 {
3a3677ff
RH
8542 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8544 intcmp_mode = CCmode;
8545 code = EQ;
9a915772
JH
8546 }
8547 else
8548 {
8549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8550 code = NE;
8551 }
8552 break;
8553 case GE:
8554 case UNGE:
8555 if (code == GE || !TARGET_IEEE_FP)
8556 {
3a3677ff 8557 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8558 code = EQ;
9a915772
JH
8559 }
8560 else
8561 {
8562 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8563 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8564 GEN_INT (0x01)));
8565 code = NE;
8566 }
8567 break;
8568 case LE:
8569 case UNLE:
8570 if (code == LE && TARGET_IEEE_FP)
8571 {
3a3677ff
RH
8572 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8573 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8574 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8575 intcmp_mode = CCmode;
8576 code = LTU;
9a915772
JH
8577 }
8578 else
8579 {
8580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8581 code = NE;
8582 }
8583 break;
8584 case EQ:
8585 case UNEQ:
8586 if (code == EQ && TARGET_IEEE_FP)
8587 {
3a3677ff
RH
8588 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8589 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8590 intcmp_mode = CCmode;
8591 code = EQ;
9a915772
JH
8592 }
8593 else
8594 {
3a3677ff
RH
8595 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8596 code = NE;
8597 break;
9a915772
JH
8598 }
8599 break;
8600 case NE:
8601 case LTGT:
8602 if (code == NE && TARGET_IEEE_FP)
8603 {
3a3677ff 8604 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8605 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8606 GEN_INT (0x40)));
3a3677ff 8607 code = NE;
9a915772
JH
8608 }
8609 else
8610 {
3a3677ff
RH
8611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8612 code = EQ;
32b5b1aa 8613 }
9a915772
JH
8614 break;
8615
8616 case UNORDERED:
8617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8618 code = NE;
8619 break;
8620 case ORDERED:
8621 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8622 code = EQ;
8623 break;
8624
8625 default:
8626 abort ();
32b5b1aa 8627 }
32b5b1aa 8628 }
e075ae69
RH
8629
8630 /* Return the test that should be put into the flags user, i.e.
8631 the bcc, scc, or cmov instruction. */
8632 return gen_rtx_fmt_ee (code, VOIDmode,
8633 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8634 const0_rtx);
8635}
8636
9e3e266c 8637rtx
a1b8572c 8638ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8639 enum rtx_code code;
a1b8572c 8640 rtx *second_test, *bypass_test;
e075ae69
RH
8641{
8642 rtx op0, op1, ret;
8643 op0 = ix86_compare_op0;
8644 op1 = ix86_compare_op1;
8645
a1b8572c
JH
8646 if (second_test)
8647 *second_test = NULL_RTX;
8648 if (bypass_test)
8649 *bypass_test = NULL_RTX;
8650
e075ae69 8651 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8652 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8653 second_test, bypass_test);
32b5b1aa 8654 else
e075ae69
RH
8655 ret = ix86_expand_int_compare (code, op0, op1);
8656
8657 return ret;
8658}
8659
03598dea
JH
8660/* Return true if the CODE will result in nontrivial jump sequence. */
8661bool
8662ix86_fp_jump_nontrivial_p (code)
8663 enum rtx_code code;
8664{
8665 enum rtx_code bypass_code, first_code, second_code;
8666 if (!TARGET_CMOVE)
8667 return true;
8668 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8669 return bypass_code != NIL || second_code != NIL;
8670}
8671
e075ae69 8672void
3a3677ff 8673ix86_expand_branch (code, label)
e075ae69 8674 enum rtx_code code;
e075ae69
RH
8675 rtx label;
8676{
3a3677ff 8677 rtx tmp;
e075ae69 8678
3a3677ff 8679 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8680 {
3a3677ff
RH
8681 case QImode:
8682 case HImode:
8683 case SImode:
0d7d98ee 8684 simple:
a1b8572c 8685 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8686 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8687 gen_rtx_LABEL_REF (VOIDmode, label),
8688 pc_rtx);
8689 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8690 return;
e075ae69 8691
3a3677ff
RH
8692 case SFmode:
8693 case DFmode:
0f290768 8694 case XFmode:
2b589241 8695 case TFmode:
3a3677ff
RH
8696 {
8697 rtvec vec;
8698 int use_fcomi;
03598dea 8699 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8700
8701 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8702 &ix86_compare_op1);
fce5a9f2 8703
03598dea
JH
8704 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8705
8706 /* Check whether we will use the natural sequence with one jump. If
8707 so, we can expand jump early. Otherwise delay expansion by
8708 creating compound insn to not confuse optimizers. */
8709 if (bypass_code == NIL && second_code == NIL
8710 && TARGET_CMOVE)
8711 {
8712 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8713 gen_rtx_LABEL_REF (VOIDmode, label),
8714 pc_rtx, NULL_RTX);
8715 }
8716 else
8717 {
8718 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8719 ix86_compare_op0, ix86_compare_op1);
8720 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8721 gen_rtx_LABEL_REF (VOIDmode, label),
8722 pc_rtx);
8723 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8724
8725 use_fcomi = ix86_use_fcomi_compare (code);
8726 vec = rtvec_alloc (3 + !use_fcomi);
8727 RTVEC_ELT (vec, 0) = tmp;
8728 RTVEC_ELT (vec, 1)
8729 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8730 RTVEC_ELT (vec, 2)
8731 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8732 if (! use_fcomi)
8733 RTVEC_ELT (vec, 3)
8734 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8735
8736 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8737 }
3a3677ff
RH
8738 return;
8739 }
32b5b1aa 8740
3a3677ff 8741 case DImode:
0d7d98ee
JH
8742 if (TARGET_64BIT)
8743 goto simple;
3a3677ff
RH
8744 /* Expand DImode branch into multiple compare+branch. */
8745 {
8746 rtx lo[2], hi[2], label2;
8747 enum rtx_code code1, code2, code3;
32b5b1aa 8748
3a3677ff
RH
8749 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8750 {
8751 tmp = ix86_compare_op0;
8752 ix86_compare_op0 = ix86_compare_op1;
8753 ix86_compare_op1 = tmp;
8754 code = swap_condition (code);
8755 }
8756 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8757 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8758
3a3677ff
RH
8759 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8760 avoid two branches. This costs one extra insn, so disable when
8761 optimizing for size. */
32b5b1aa 8762
3a3677ff
RH
8763 if ((code == EQ || code == NE)
8764 && (!optimize_size
8765 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8766 {
8767 rtx xor0, xor1;
32b5b1aa 8768
3a3677ff
RH
8769 xor1 = hi[0];
8770 if (hi[1] != const0_rtx)
8771 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8772 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8773
3a3677ff
RH
8774 xor0 = lo[0];
8775 if (lo[1] != const0_rtx)
8776 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8777 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8778
3a3677ff
RH
8779 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8780 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8781
3a3677ff
RH
8782 ix86_compare_op0 = tmp;
8783 ix86_compare_op1 = const0_rtx;
8784 ix86_expand_branch (code, label);
8785 return;
8786 }
e075ae69 8787
1f9124e4
JJ
8788 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8789 op1 is a constant and the low word is zero, then we can just
8790 examine the high word. */
32b5b1aa 8791
1f9124e4
JJ
8792 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8793 switch (code)
8794 {
8795 case LT: case LTU: case GE: case GEU:
8796 ix86_compare_op0 = hi[0];
8797 ix86_compare_op1 = hi[1];
8798 ix86_expand_branch (code, label);
8799 return;
8800 default:
8801 break;
8802 }
e075ae69 8803
3a3677ff 8804 /* Otherwise, we need two or three jumps. */
e075ae69 8805
3a3677ff 8806 label2 = gen_label_rtx ();
e075ae69 8807
3a3677ff
RH
8808 code1 = code;
8809 code2 = swap_condition (code);
8810 code3 = unsigned_condition (code);
e075ae69 8811
3a3677ff
RH
8812 switch (code)
8813 {
8814 case LT: case GT: case LTU: case GTU:
8815 break;
e075ae69 8816
3a3677ff
RH
8817 case LE: code1 = LT; code2 = GT; break;
8818 case GE: code1 = GT; code2 = LT; break;
8819 case LEU: code1 = LTU; code2 = GTU; break;
8820 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8821
3a3677ff
RH
8822 case EQ: code1 = NIL; code2 = NE; break;
8823 case NE: code2 = NIL; break;
e075ae69 8824
3a3677ff
RH
8825 default:
8826 abort ();
8827 }
e075ae69 8828
3a3677ff
RH
8829 /*
8830 * a < b =>
8831 * if (hi(a) < hi(b)) goto true;
8832 * if (hi(a) > hi(b)) goto false;
8833 * if (lo(a) < lo(b)) goto true;
8834 * false:
8835 */
8836
8837 ix86_compare_op0 = hi[0];
8838 ix86_compare_op1 = hi[1];
8839
8840 if (code1 != NIL)
8841 ix86_expand_branch (code1, label);
8842 if (code2 != NIL)
8843 ix86_expand_branch (code2, label2);
8844
8845 ix86_compare_op0 = lo[0];
8846 ix86_compare_op1 = lo[1];
8847 ix86_expand_branch (code3, label);
8848
8849 if (code2 != NIL)
8850 emit_label (label2);
8851 return;
8852 }
e075ae69 8853
3a3677ff
RH
8854 default:
8855 abort ();
8856 }
32b5b1aa 8857}
e075ae69 8858
9e7adcb3
JH
8859/* Split branch based on floating point condition. */
8860void
03598dea
JH
8861ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8862 enum rtx_code code;
8863 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8864{
8865 rtx second, bypass;
8866 rtx label = NULL_RTX;
03598dea 8867 rtx condition;
6b24c259
JH
8868 int bypass_probability = -1, second_probability = -1, probability = -1;
8869 rtx i;
9e7adcb3
JH
8870
8871 if (target2 != pc_rtx)
8872 {
8873 rtx tmp = target2;
8874 code = reverse_condition_maybe_unordered (code);
8875 target2 = target1;
8876 target1 = tmp;
8877 }
8878
8879 condition = ix86_expand_fp_compare (code, op1, op2,
8880 tmp, &second, &bypass);
6b24c259
JH
8881
8882 if (split_branch_probability >= 0)
8883 {
8884 /* Distribute the probabilities across the jumps.
8885 Assume the BYPASS and SECOND to be always test
8886 for UNORDERED. */
8887 probability = split_branch_probability;
8888
d6a7951f 8889 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8890 to be updated. Later we may run some experiments and see
8891 if unordered values are more frequent in practice. */
8892 if (bypass)
8893 bypass_probability = 1;
8894 if (second)
8895 second_probability = 1;
8896 }
9e7adcb3
JH
8897 if (bypass != NULL_RTX)
8898 {
8899 label = gen_label_rtx ();
6b24c259
JH
8900 i = emit_jump_insn (gen_rtx_SET
8901 (VOIDmode, pc_rtx,
8902 gen_rtx_IF_THEN_ELSE (VOIDmode,
8903 bypass,
8904 gen_rtx_LABEL_REF (VOIDmode,
8905 label),
8906 pc_rtx)));
8907 if (bypass_probability >= 0)
8908 REG_NOTES (i)
8909 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8910 GEN_INT (bypass_probability),
8911 REG_NOTES (i));
8912 }
8913 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8914 (VOIDmode, pc_rtx,
8915 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8916 condition, target1, target2)));
8917 if (probability >= 0)
8918 REG_NOTES (i)
8919 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8920 GEN_INT (probability),
8921 REG_NOTES (i));
8922 if (second != NULL_RTX)
9e7adcb3 8923 {
6b24c259
JH
8924 i = emit_jump_insn (gen_rtx_SET
8925 (VOIDmode, pc_rtx,
8926 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8927 target2)));
8928 if (second_probability >= 0)
8929 REG_NOTES (i)
8930 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8931 GEN_INT (second_probability),
8932 REG_NOTES (i));
9e7adcb3 8933 }
9e7adcb3
JH
8934 if (label != NULL_RTX)
8935 emit_label (label);
8936}
8937
32b5b1aa 8938int
3a3677ff 8939ix86_expand_setcc (code, dest)
e075ae69 8940 enum rtx_code code;
e075ae69 8941 rtx dest;
32b5b1aa 8942{
a1b8572c
JH
8943 rtx ret, tmp, tmpreg;
8944 rtx second_test, bypass_test;
e075ae69 8945
885a70fd
JH
8946 if (GET_MODE (ix86_compare_op0) == DImode
8947 && !TARGET_64BIT)
e075ae69
RH
8948 return 0; /* FAIL */
8949
b932f770
JH
8950 if (GET_MODE (dest) != QImode)
8951 abort ();
e075ae69 8952
a1b8572c 8953 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8954 PUT_MODE (ret, QImode);
8955
8956 tmp = dest;
a1b8572c 8957 tmpreg = dest;
32b5b1aa 8958
e075ae69 8959 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8960 if (bypass_test || second_test)
8961 {
8962 rtx test = second_test;
8963 int bypass = 0;
8964 rtx tmp2 = gen_reg_rtx (QImode);
8965 if (bypass_test)
8966 {
8967 if (second_test)
b531087a 8968 abort ();
a1b8572c
JH
8969 test = bypass_test;
8970 bypass = 1;
8971 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8972 }
8973 PUT_MODE (test, QImode);
8974 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8975
8976 if (bypass)
8977 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8978 else
8979 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8980 }
e075ae69 8981
e075ae69 8982 return 1; /* DONE */
32b5b1aa 8983}
e075ae69 8984
32b5b1aa 8985int
e075ae69
RH
8986ix86_expand_int_movcc (operands)
8987 rtx operands[];
32b5b1aa 8988{
e075ae69
RH
8989 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8990 rtx compare_seq, compare_op;
a1b8572c 8991 rtx second_test, bypass_test;
635559ab 8992 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8993
36583fea
JH
8994 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8995 In case comparsion is done with immediate, we can convert it to LTU or
8996 GEU by altering the integer. */
8997
8998 if ((code == LEU || code == GTU)
8999 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 9000 && mode != HImode
261376e7
RH
9001 && INTVAL (ix86_compare_op1) != -1
9002 /* For x86-64, the immediate field in the instruction is 32-bit
9003 signed, so we can't increment a DImode value above 0x7fffffff. */
74411039
JH
9004 && (!TARGET_64BIT
9005 || GET_MODE (ix86_compare_op0) != DImode
261376e7 9006 || INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 9007 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
9008 && GET_CODE (operands[3]) == CONST_INT)
9009 {
9010 if (code == LEU)
9011 code = LTU;
9012 else
9013 code = GEU;
261376e7
RH
9014 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9015 GET_MODE (ix86_compare_op0));
36583fea 9016 }
3a3677ff 9017
e075ae69 9018 start_sequence ();
a1b8572c 9019 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9020 compare_seq = get_insns ();
e075ae69
RH
9021 end_sequence ();
9022
9023 compare_code = GET_CODE (compare_op);
9024
9025 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9026 HImode insns, we'd be swallowed in word prefix ops. */
9027
635559ab
JH
9028 if (mode != HImode
9029 && (mode != DImode || TARGET_64BIT)
0f290768 9030 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9031 && GET_CODE (operands[3]) == CONST_INT)
9032 {
9033 rtx out = operands[0];
9034 HOST_WIDE_INT ct = INTVAL (operands[2]);
9035 HOST_WIDE_INT cf = INTVAL (operands[3]);
9036 HOST_WIDE_INT diff;
9037
a1b8572c
JH
9038 if ((compare_code == LTU || compare_code == GEU)
9039 && !second_test && !bypass_test)
e075ae69 9040 {
e075ae69
RH
9041 /* Detect overlap between destination and compare sources. */
9042 rtx tmp = out;
9043
0f290768 9044 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
9045 if (compare_code == LTU)
9046 {
9047 int tmp = ct;
9048 ct = cf;
9049 cf = tmp;
9050 compare_code = reverse_condition (compare_code);
9051 code = reverse_condition (code);
9052 }
9053 diff = ct - cf;
9054
e075ae69 9055 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 9056 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 9057 tmp = gen_reg_rtx (mode);
e075ae69
RH
9058
9059 emit_insn (compare_seq);
635559ab 9060 if (mode == DImode)
14f73b5a
JH
9061 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9062 else
9063 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 9064
36583fea
JH
9065 if (diff == 1)
9066 {
9067 /*
9068 * cmpl op0,op1
9069 * sbbl dest,dest
9070 * [addl dest, ct]
9071 *
9072 * Size 5 - 8.
9073 */
9074 if (ct)
635559ab
JH
9075 tmp = expand_simple_binop (mode, PLUS,
9076 tmp, GEN_INT (ct),
9077 tmp, 1, OPTAB_DIRECT);
36583fea
JH
9078 }
9079 else if (cf == -1)
9080 {
9081 /*
9082 * cmpl op0,op1
9083 * sbbl dest,dest
9084 * orl $ct, dest
9085 *
9086 * Size 8.
9087 */
635559ab
JH
9088 tmp = expand_simple_binop (mode, IOR,
9089 tmp, GEN_INT (ct),
9090 tmp, 1, OPTAB_DIRECT);
36583fea
JH
9091 }
9092 else if (diff == -1 && ct)
9093 {
9094 /*
9095 * cmpl op0,op1
9096 * sbbl dest,dest
06ec023f 9097 * notl dest
36583fea
JH
9098 * [addl dest, cf]
9099 *
9100 * Size 8 - 11.
9101 */
635559ab
JH
9102 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9103 if (cf)
9104 tmp = expand_simple_binop (mode, PLUS,
9105 tmp, GEN_INT (cf),
9106 tmp, 1, OPTAB_DIRECT);
36583fea
JH
9107 }
9108 else
9109 {
9110 /*
9111 * cmpl op0,op1
9112 * sbbl dest,dest
06ec023f 9113 * [notl dest]
36583fea
JH
9114 * andl cf - ct, dest
9115 * [addl dest, ct]
9116 *
9117 * Size 8 - 11.
9118 */
06ec023f
RB
9119
9120 if (cf == 0)
9121 {
9122 cf = ct;
9123 ct = 0;
9124 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9125 }
9126
635559ab
JH
9127 tmp = expand_simple_binop (mode, AND,
9128 tmp,
d8bf17f9 9129 gen_int_mode (cf - ct, mode),
635559ab
JH
9130 tmp, 1, OPTAB_DIRECT);
9131 if (ct)
9132 tmp = expand_simple_binop (mode, PLUS,
9133 tmp, GEN_INT (ct),
9134 tmp, 1, OPTAB_DIRECT);
36583fea 9135 }
e075ae69
RH
9136
9137 if (tmp != out)
9138 emit_move_insn (out, tmp);
9139
9140 return 1; /* DONE */
9141 }
9142
9143 diff = ct - cf;
9144 if (diff < 0)
9145 {
9146 HOST_WIDE_INT tmp;
9147 tmp = ct, ct = cf, cf = tmp;
9148 diff = -diff;
734dba19
JH
9149 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9150 {
9151 /* We may be reversing unordered compare to normal compare, that
9152 is not valid in general (we may convert non-trapping condition
9153 to trapping one), however on i386 we currently emit all
9154 comparisons unordered. */
9155 compare_code = reverse_condition_maybe_unordered (compare_code);
9156 code = reverse_condition_maybe_unordered (code);
9157 }
9158 else
9159 {
9160 compare_code = reverse_condition (compare_code);
9161 code = reverse_condition (code);
9162 }
e075ae69 9163 }
0f2a3457
JJ
9164
9165 compare_code = NIL;
9166 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9167 && GET_CODE (ix86_compare_op1) == CONST_INT)
9168 {
9169 if (ix86_compare_op1 == const0_rtx
9170 && (code == LT || code == GE))
9171 compare_code = code;
9172 else if (ix86_compare_op1 == constm1_rtx)
9173 {
9174 if (code == LE)
9175 compare_code = LT;
9176 else if (code == GT)
9177 compare_code = GE;
9178 }
9179 }
9180
9181 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9182 if (compare_code != NIL
9183 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9184 && (cf == -1 || ct == -1))
9185 {
9186 /* If lea code below could be used, only optimize
9187 if it results in a 2 insn sequence. */
9188
9189 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9190 || diff == 3 || diff == 5 || diff == 9)
9191 || (compare_code == LT && ct == -1)
9192 || (compare_code == GE && cf == -1))
9193 {
9194 /*
9195 * notl op1 (if necessary)
9196 * sarl $31, op1
9197 * orl cf, op1
9198 */
9199 if (ct != -1)
9200 {
9201 cf = ct;
9202 ct = -1;
9203 code = reverse_condition (code);
9204 }
9205
9206 out = emit_store_flag (out, code, ix86_compare_op0,
9207 ix86_compare_op1, VOIDmode, 0, -1);
9208
9209 out = expand_simple_binop (mode, IOR,
9210 out, GEN_INT (cf),
9211 out, 1, OPTAB_DIRECT);
9212 if (out != operands[0])
9213 emit_move_insn (operands[0], out);
9214
9215 return 1; /* DONE */
9216 }
9217 }
9218
635559ab
JH
9219 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9220 || diff == 3 || diff == 5 || diff == 9)
75d38379 9221 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf), 0)))
e075ae69
RH
9222 {
9223 /*
9224 * xorl dest,dest
9225 * cmpl op1,op2
9226 * setcc dest
9227 * lea cf(dest*(ct-cf)),dest
9228 *
9229 * Size 14.
9230 *
9231 * This also catches the degenerate setcc-only case.
9232 */
9233
9234 rtx tmp;
9235 int nops;
9236
9237 out = emit_store_flag (out, code, ix86_compare_op0,
9238 ix86_compare_op1, VOIDmode, 0, 1);
9239
9240 nops = 0;
97f51ac4
RB
9241 /* On x86_64 the lea instruction operates on Pmode, so we need
9242 to get arithmetics done in proper mode to match. */
e075ae69 9243 if (diff == 1)
14f73b5a 9244 tmp = out;
e075ae69
RH
9245 else
9246 {
885a70fd 9247 rtx out1;
14f73b5a 9248 out1 = out;
635559ab 9249 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9250 nops++;
9251 if (diff & 1)
9252 {
635559ab 9253 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9254 nops++;
9255 }
9256 }
9257 if (cf != 0)
9258 {
635559ab 9259 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9260 nops++;
9261 }
885a70fd
JH
9262 if (tmp != out
9263 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 9264 {
14f73b5a 9265 if (nops == 1)
e075ae69
RH
9266 {
9267 rtx clob;
9268
9269 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9270 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9271
9272 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9273 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9274 emit_insn (tmp);
9275 }
9276 else
9277 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9278 }
9279 if (out != operands[0])
1985ef90 9280 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9281
9282 return 1; /* DONE */
9283 }
9284
9285 /*
9286 * General case: Jumpful:
9287 * xorl dest,dest cmpl op1, op2
9288 * cmpl op1, op2 movl ct, dest
9289 * setcc dest jcc 1f
9290 * decl dest movl cf, dest
9291 * andl (cf-ct),dest 1:
9292 * addl ct,dest
0f290768 9293 *
e075ae69
RH
9294 * Size 20. Size 14.
9295 *
9296 * This is reasonably steep, but branch mispredict costs are
9297 * high on modern cpus, so consider failing only if optimizing
9298 * for space.
9299 *
9300 * %%% Parameterize branch_cost on the tuning architecture, then
9301 * use that. The 80386 couldn't care less about mispredicts.
9302 */
9303
9304 if (!optimize_size && !TARGET_CMOVE)
9305 {
97f51ac4 9306 if (cf == 0)
e075ae69 9307 {
97f51ac4
RB
9308 cf = ct;
9309 ct = 0;
734dba19 9310 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9311 /* We may be reversing unordered compare to normal compare,
9312 that is not valid in general (we may convert non-trapping
9313 condition to trapping one), however on i386 we currently
9314 emit all comparisons unordered. */
9315 code = reverse_condition_maybe_unordered (code);
9316 else
9317 {
9318 code = reverse_condition (code);
9319 if (compare_code != NIL)
9320 compare_code = reverse_condition (compare_code);
9321 }
9322 }
9323
9324 if (compare_code != NIL)
9325 {
9326 /* notl op1 (if needed)
9327 sarl $31, op1
9328 andl (cf-ct), op1
9329 addl ct, op1
9330
9331 For x < 0 (resp. x <= -1) there will be no notl,
9332 so if possible swap the constants to get rid of the
9333 complement.
9334 True/false will be -1/0 while code below (store flag
9335 followed by decrement) is 0/-1, so the constants need
9336 to be exchanged once more. */
9337
9338 if (compare_code == GE || !cf)
734dba19 9339 {
0f2a3457
JJ
9340 code = reverse_condition (code);
9341 compare_code = LT;
734dba19
JH
9342 }
9343 else
9344 {
0f2a3457
JJ
9345 HOST_WIDE_INT tmp = cf;
9346 cf = ct;
9347 ct = tmp;
734dba19 9348 }
0f2a3457
JJ
9349
9350 out = emit_store_flag (out, code, ix86_compare_op0,
9351 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9352 }
0f2a3457
JJ
9353 else
9354 {
9355 out = emit_store_flag (out, code, ix86_compare_op0,
9356 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9357
97f51ac4 9358 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
0f2a3457
JJ
9359 out, 1, OPTAB_DIRECT);
9360 }
e075ae69 9361
97f51ac4 9362 out = expand_simple_binop (mode, AND, out,
d8bf17f9 9363 gen_int_mode (cf - ct, mode),
635559ab 9364 out, 1, OPTAB_DIRECT);
97f51ac4
RB
9365 if (ct)
9366 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9367 out, 1, OPTAB_DIRECT);
e075ae69
RH
9368 if (out != operands[0])
9369 emit_move_insn (operands[0], out);
9370
9371 return 1; /* DONE */
9372 }
9373 }
9374
9375 if (!TARGET_CMOVE)
9376 {
9377 /* Try a few things more with specific constants and a variable. */
9378
78a0d70c 9379 optab op;
e075ae69
RH
9380 rtx var, orig_out, out, tmp;
9381
9382 if (optimize_size)
9383 return 0; /* FAIL */
9384
0f290768 9385 /* If one of the two operands is an interesting constant, load a
e075ae69 9386 constant with the above and mask it in with a logical operation. */
0f290768 9387
e075ae69
RH
9388 if (GET_CODE (operands[2]) == CONST_INT)
9389 {
9390 var = operands[3];
9391 if (INTVAL (operands[2]) == 0)
9392 operands[3] = constm1_rtx, op = and_optab;
9393 else if (INTVAL (operands[2]) == -1)
9394 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9395 else
9396 return 0; /* FAIL */
e075ae69
RH
9397 }
9398 else if (GET_CODE (operands[3]) == CONST_INT)
9399 {
9400 var = operands[2];
9401 if (INTVAL (operands[3]) == 0)
9402 operands[2] = constm1_rtx, op = and_optab;
9403 else if (INTVAL (operands[3]) == -1)
9404 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9405 else
9406 return 0; /* FAIL */
e075ae69 9407 }
78a0d70c 9408 else
e075ae69
RH
9409 return 0; /* FAIL */
9410
9411 orig_out = operands[0];
635559ab 9412 tmp = gen_reg_rtx (mode);
e075ae69
RH
9413 operands[0] = tmp;
9414
9415 /* Recurse to get the constant loaded. */
9416 if (ix86_expand_int_movcc (operands) == 0)
9417 return 0; /* FAIL */
9418
9419 /* Mask in the interesting variable. */
635559ab 9420 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9421 OPTAB_WIDEN);
9422 if (out != orig_out)
9423 emit_move_insn (orig_out, out);
9424
9425 return 1; /* DONE */
9426 }
9427
9428 /*
9429 * For comparison with above,
9430 *
9431 * movl cf,dest
9432 * movl ct,tmp
9433 * cmpl op1,op2
9434 * cmovcc tmp,dest
9435 *
9436 * Size 15.
9437 */
9438
635559ab
JH
9439 if (! nonimmediate_operand (operands[2], mode))
9440 operands[2] = force_reg (mode, operands[2]);
9441 if (! nonimmediate_operand (operands[3], mode))
9442 operands[3] = force_reg (mode, operands[3]);
e075ae69 9443
a1b8572c
JH
9444 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9445 {
635559ab 9446 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9447 emit_move_insn (tmp, operands[3]);
9448 operands[3] = tmp;
9449 }
9450 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9451 {
635559ab 9452 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9453 emit_move_insn (tmp, operands[2]);
9454 operands[2] = tmp;
9455 }
c9682caf
JH
9456 if (! register_operand (operands[2], VOIDmode)
9457 && ! register_operand (operands[3], VOIDmode))
635559ab 9458 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9459
e075ae69
RH
9460 emit_insn (compare_seq);
9461 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9462 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9463 compare_op, operands[2],
9464 operands[3])));
a1b8572c
JH
9465 if (bypass_test)
9466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9467 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9468 bypass_test,
9469 operands[3],
9470 operands[0])));
9471 if (second_test)
9472 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9473 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9474 second_test,
9475 operands[2],
9476 operands[0])));
e075ae69
RH
9477
9478 return 1; /* DONE */
e9a25f70 9479}
e075ae69 9480
32b5b1aa 9481int
e075ae69
RH
9482ix86_expand_fp_movcc (operands)
9483 rtx operands[];
32b5b1aa 9484{
e075ae69 9485 enum rtx_code code;
e075ae69 9486 rtx tmp;
a1b8572c 9487 rtx compare_op, second_test, bypass_test;
32b5b1aa 9488
0073023d
JH
9489 /* For SF/DFmode conditional moves based on comparisons
9490 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9491 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9492 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9493 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9494 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9495 && (!TARGET_IEEE_FP
9496 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9497 /* We may be called from the post-reload splitter. */
9498 && (!REG_P (operands[0])
9499 || SSE_REG_P (operands[0])
52a661a6 9500 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9501 {
9502 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9503 code = GET_CODE (operands[1]);
9504
9505 /* See if we have (cross) match between comparison operands and
9506 conditional move operands. */
9507 if (rtx_equal_p (operands[2], op1))
9508 {
9509 rtx tmp = op0;
9510 op0 = op1;
9511 op1 = tmp;
9512 code = reverse_condition_maybe_unordered (code);
9513 }
9514 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9515 {
9516 /* Check for min operation. */
9517 if (code == LT)
9518 {
9519 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9520 if (memory_operand (op0, VOIDmode))
9521 op0 = force_reg (GET_MODE (operands[0]), op0);
9522 if (GET_MODE (operands[0]) == SFmode)
9523 emit_insn (gen_minsf3 (operands[0], op0, op1));
9524 else
9525 emit_insn (gen_mindf3 (operands[0], op0, op1));
9526 return 1;
9527 }
9528 /* Check for max operation. */
9529 if (code == GT)
9530 {
9531 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9532 if (memory_operand (op0, VOIDmode))
9533 op0 = force_reg (GET_MODE (operands[0]), op0);
9534 if (GET_MODE (operands[0]) == SFmode)
9535 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9536 else
9537 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9538 return 1;
9539 }
9540 }
9541 /* Manage condition to be sse_comparison_operator. In case we are
9542 in non-ieee mode, try to canonicalize the destination operand
9543 to be first in the comparison - this helps reload to avoid extra
9544 moves. */
9545 if (!sse_comparison_operator (operands[1], VOIDmode)
9546 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9547 {
9548 rtx tmp = ix86_compare_op0;
9549 ix86_compare_op0 = ix86_compare_op1;
9550 ix86_compare_op1 = tmp;
9551 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9552 VOIDmode, ix86_compare_op0,
9553 ix86_compare_op1);
9554 }
9555 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9556 move. We also don't support the NE comparison on SSE, so try to
9557 avoid it. */
037f20f1
JH
9558 if ((rtx_equal_p (operands[0], operands[3])
9559 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9560 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9561 {
9562 rtx tmp = operands[2];
9563 operands[2] = operands[3];
92d0fb09 9564 operands[3] = tmp;
0073023d
JH
9565 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9566 (GET_CODE (operands[1])),
9567 VOIDmode, ix86_compare_op0,
9568 ix86_compare_op1);
9569 }
9570 if (GET_MODE (operands[0]) == SFmode)
9571 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9572 operands[2], operands[3],
9573 ix86_compare_op0, ix86_compare_op1));
9574 else
9575 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9576 operands[2], operands[3],
9577 ix86_compare_op0, ix86_compare_op1));
9578 return 1;
9579 }
9580
e075ae69 9581 /* The floating point conditional move instructions don't directly
0f290768 9582 support conditions resulting from a signed integer comparison. */
32b5b1aa 9583
e075ae69 9584 code = GET_CODE (operands[1]);
a1b8572c 9585 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9586
9587 /* The floating point conditional move instructions don't directly
9588 support signed integer comparisons. */
9589
a1b8572c 9590 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9591 {
a1b8572c 9592 if (second_test != NULL || bypass_test != NULL)
b531087a 9593 abort ();
e075ae69 9594 tmp = gen_reg_rtx (QImode);
3a3677ff 9595 ix86_expand_setcc (code, tmp);
e075ae69
RH
9596 code = NE;
9597 ix86_compare_op0 = tmp;
9598 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9599 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9600 }
9601 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9602 {
9603 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9604 emit_move_insn (tmp, operands[3]);
9605 operands[3] = tmp;
9606 }
9607 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9608 {
9609 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9610 emit_move_insn (tmp, operands[2]);
9611 operands[2] = tmp;
e075ae69 9612 }
e9a25f70 9613
e075ae69
RH
9614 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9615 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9616 compare_op,
e075ae69
RH
9617 operands[2],
9618 operands[3])));
a1b8572c
JH
9619 if (bypass_test)
9620 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9621 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9622 bypass_test,
9623 operands[3],
9624 operands[0])));
9625 if (second_test)
9626 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9627 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9628 second_test,
9629 operands[2],
9630 operands[0])));
32b5b1aa 9631
e075ae69 9632 return 1;
32b5b1aa
SC
9633}
9634
2450a057
JH
9635/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9636 works for floating pointer parameters and nonoffsetable memories.
9637 For pushes, it returns just stack offsets; the values will be saved
9638 in the right order. Maximally three parts are generated. */
9639
2b589241 9640static int
2450a057
JH
9641ix86_split_to_parts (operand, parts, mode)
9642 rtx operand;
9643 rtx *parts;
9644 enum machine_mode mode;
32b5b1aa 9645{
26e5b205
JH
9646 int size;
9647
9648 if (!TARGET_64BIT)
9649 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9650 else
9651 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9652
a7180f70
BS
9653 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9654 abort ();
2450a057
JH
9655 if (size < 2 || size > 3)
9656 abort ();
9657
f996902d
RH
9658 /* Optimize constant pool reference to immediates. This is used by fp
9659 moves, that force all constants to memory to allow combining. */
9660 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9661 {
9662 rtx tmp = maybe_get_pool_constant (operand);
9663 if (tmp)
9664 operand = tmp;
9665 }
d7a29404 9666
2450a057 9667 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9668 {
2450a057
JH
9669 /* The only non-offsetable memories we handle are pushes. */
9670 if (! push_operand (operand, VOIDmode))
9671 abort ();
9672
26e5b205
JH
9673 operand = copy_rtx (operand);
9674 PUT_MODE (operand, Pmode);
2450a057
JH
9675 parts[0] = parts[1] = parts[2] = operand;
9676 }
26e5b205 9677 else if (!TARGET_64BIT)
2450a057
JH
9678 {
9679 if (mode == DImode)
9680 split_di (&operand, 1, &parts[0], &parts[1]);
9681 else
e075ae69 9682 {
2450a057
JH
9683 if (REG_P (operand))
9684 {
9685 if (!reload_completed)
9686 abort ();
9687 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9688 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9689 if (size == 3)
9690 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9691 }
9692 else if (offsettable_memref_p (operand))
9693 {
f4ef873c 9694 operand = adjust_address (operand, SImode, 0);
2450a057 9695 parts[0] = operand;
b72f00af 9696 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9697 if (size == 3)
b72f00af 9698 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9699 }
9700 else if (GET_CODE (operand) == CONST_DOUBLE)
9701 {
9702 REAL_VALUE_TYPE r;
2b589241 9703 long l[4];
2450a057
JH
9704
9705 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9706 switch (mode)
9707 {
9708 case XFmode:
2b589241 9709 case TFmode:
2450a057 9710 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9711 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9712 break;
9713 case DFmode:
9714 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9715 break;
9716 default:
9717 abort ();
9718 }
d8bf17f9
LB
9719 parts[1] = gen_int_mode (l[1], SImode);
9720 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9721 }
9722 else
9723 abort ();
e075ae69 9724 }
2450a057 9725 }
26e5b205
JH
9726 else
9727 {
44cf5b6a
JH
9728 if (mode == TImode)
9729 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9730 if (mode == XFmode || mode == TFmode)
9731 {
9732 if (REG_P (operand))
9733 {
9734 if (!reload_completed)
9735 abort ();
9736 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9737 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9738 }
9739 else if (offsettable_memref_p (operand))
9740 {
b72f00af 9741 operand = adjust_address (operand, DImode, 0);
26e5b205 9742 parts[0] = operand;
b72f00af 9743 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9744 }
9745 else if (GET_CODE (operand) == CONST_DOUBLE)
9746 {
9747 REAL_VALUE_TYPE r;
9748 long l[3];
9749
9750 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9751 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9752 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9753 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9754 parts[0]
d8bf17f9 9755 = gen_int_mode
44cf5b6a 9756 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9757 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9758 DImode);
26e5b205
JH
9759 else
9760 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9761 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9762 }
9763 else
9764 abort ();
9765 }
9766 }
2450a057 9767
2b589241 9768 return size;
2450a057
JH
9769}
9770
9771/* Emit insns to perform a move or push of DI, DF, and XF values.
9772 Return false when normal moves are needed; true when all required
9773 insns have been emitted. Operands 2-4 contain the input values
9774 int the correct order; operands 5-7 contain the output values. */
9775
26e5b205
JH
9776void
9777ix86_split_long_move (operands)
9778 rtx operands[];
2450a057
JH
9779{
9780 rtx part[2][3];
26e5b205 9781 int nparts;
2450a057
JH
9782 int push = 0;
9783 int collisions = 0;
26e5b205
JH
9784 enum machine_mode mode = GET_MODE (operands[0]);
9785
9786 /* The DFmode expanders may ask us to move double.
9787 For 64bit target this is single move. By hiding the fact
9788 here we simplify i386.md splitters. */
9789 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9790 {
8cdfa312
RH
9791 /* Optimize constant pool reference to immediates. This is used by
9792 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9793
9794 if (GET_CODE (operands[1]) == MEM
9795 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9796 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9797 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9798 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9799 {
9800 operands[0] = copy_rtx (operands[0]);
9801 PUT_MODE (operands[0], Pmode);
9802 }
26e5b205
JH
9803 else
9804 operands[0] = gen_lowpart (DImode, operands[0]);
9805 operands[1] = gen_lowpart (DImode, operands[1]);
9806 emit_move_insn (operands[0], operands[1]);
9807 return;
9808 }
2450a057 9809
2450a057
JH
9810 /* The only non-offsettable memory we handle is push. */
9811 if (push_operand (operands[0], VOIDmode))
9812 push = 1;
9813 else if (GET_CODE (operands[0]) == MEM
9814 && ! offsettable_memref_p (operands[0]))
9815 abort ();
9816
26e5b205
JH
9817 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9818 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9819
9820 /* When emitting push, take care for source operands on the stack. */
9821 if (push && GET_CODE (operands[1]) == MEM
9822 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9823 {
26e5b205 9824 if (nparts == 3)
886cbb88
JH
9825 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9826 XEXP (part[1][2], 0));
9827 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9828 XEXP (part[1][1], 0));
2450a057
JH
9829 }
9830
0f290768 9831 /* We need to do copy in the right order in case an address register
2450a057
JH
9832 of the source overlaps the destination. */
9833 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9834 {
9835 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9836 collisions++;
9837 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9838 collisions++;
26e5b205 9839 if (nparts == 3
2450a057
JH
9840 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9841 collisions++;
9842
9843 /* Collision in the middle part can be handled by reordering. */
26e5b205 9844 if (collisions == 1 && nparts == 3
2450a057 9845 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9846 {
2450a057
JH
9847 rtx tmp;
9848 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9849 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9850 }
e075ae69 9851
2450a057
JH
9852 /* If there are more collisions, we can't handle it by reordering.
9853 Do an lea to the last part and use only one colliding move. */
9854 else if (collisions > 1)
9855 {
9856 collisions = 1;
26e5b205 9857 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9858 XEXP (part[1][0], 0)));
26e5b205
JH
9859 part[1][0] = change_address (part[1][0],
9860 TARGET_64BIT ? DImode : SImode,
9861 part[0][nparts - 1]);
b72f00af 9862 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9863 if (nparts == 3)
b72f00af 9864 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9865 }
9866 }
9867
9868 if (push)
9869 {
26e5b205 9870 if (!TARGET_64BIT)
2b589241 9871 {
26e5b205
JH
9872 if (nparts == 3)
9873 {
9874 /* We use only first 12 bytes of TFmode value, but for pushing we
9875 are required to adjust stack as if we were pushing real 16byte
9876 value. */
9877 if (mode == TFmode && !TARGET_64BIT)
9878 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9879 GEN_INT (-4)));
9880 emit_move_insn (part[0][2], part[1][2]);
9881 }
2b589241 9882 }
26e5b205
JH
9883 else
9884 {
9885 /* In 64bit mode we don't have 32bit push available. In case this is
9886 register, it is OK - we will just use larger counterpart. We also
9887 retype memory - these comes from attempt to avoid REX prefix on
9888 moving of second half of TFmode value. */
9889 if (GET_MODE (part[1][1]) == SImode)
9890 {
9891 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9892 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9893 else if (REG_P (part[1][1]))
9894 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9895 else
b531087a 9896 abort ();
886cbb88
JH
9897 if (GET_MODE (part[1][0]) == SImode)
9898 part[1][0] = part[1][1];
26e5b205
JH
9899 }
9900 }
9901 emit_move_insn (part[0][1], part[1][1]);
9902 emit_move_insn (part[0][0], part[1][0]);
9903 return;
2450a057
JH
9904 }
9905
9906 /* Choose correct order to not overwrite the source before it is copied. */
9907 if ((REG_P (part[0][0])
9908 && REG_P (part[1][1])
9909 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9910 || (nparts == 3
2450a057
JH
9911 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9912 || (collisions > 0
9913 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9914 {
26e5b205 9915 if (nparts == 3)
2450a057 9916 {
26e5b205
JH
9917 operands[2] = part[0][2];
9918 operands[3] = part[0][1];
9919 operands[4] = part[0][0];
9920 operands[5] = part[1][2];
9921 operands[6] = part[1][1];
9922 operands[7] = part[1][0];
2450a057
JH
9923 }
9924 else
9925 {
26e5b205
JH
9926 operands[2] = part[0][1];
9927 operands[3] = part[0][0];
9928 operands[5] = part[1][1];
9929 operands[6] = part[1][0];
2450a057
JH
9930 }
9931 }
9932 else
9933 {
26e5b205 9934 if (nparts == 3)
2450a057 9935 {
26e5b205
JH
9936 operands[2] = part[0][0];
9937 operands[3] = part[0][1];
9938 operands[4] = part[0][2];
9939 operands[5] = part[1][0];
9940 operands[6] = part[1][1];
9941 operands[7] = part[1][2];
2450a057
JH
9942 }
9943 else
9944 {
26e5b205
JH
9945 operands[2] = part[0][0];
9946 operands[3] = part[0][1];
9947 operands[5] = part[1][0];
9948 operands[6] = part[1][1];
e075ae69
RH
9949 }
9950 }
26e5b205
JH
9951 emit_move_insn (operands[2], operands[5]);
9952 emit_move_insn (operands[3], operands[6]);
9953 if (nparts == 3)
9954 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9955
26e5b205 9956 return;
32b5b1aa 9957}
32b5b1aa 9958
e075ae69
RH
9959void
9960ix86_split_ashldi (operands, scratch)
9961 rtx *operands, scratch;
32b5b1aa 9962{
e075ae69
RH
9963 rtx low[2], high[2];
9964 int count;
b985a30f 9965
e075ae69
RH
9966 if (GET_CODE (operands[2]) == CONST_INT)
9967 {
9968 split_di (operands, 2, low, high);
9969 count = INTVAL (operands[2]) & 63;
32b5b1aa 9970
e075ae69
RH
9971 if (count >= 32)
9972 {
9973 emit_move_insn (high[0], low[1]);
9974 emit_move_insn (low[0], const0_rtx);
b985a30f 9975
e075ae69
RH
9976 if (count > 32)
9977 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9978 }
9979 else
9980 {
9981 if (!rtx_equal_p (operands[0], operands[1]))
9982 emit_move_insn (operands[0], operands[1]);
9983 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9984 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9985 }
9986 }
9987 else
9988 {
9989 if (!rtx_equal_p (operands[0], operands[1]))
9990 emit_move_insn (operands[0], operands[1]);
b985a30f 9991
e075ae69 9992 split_di (operands, 1, low, high);
b985a30f 9993
e075ae69
RH
9994 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9995 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9996
fe577e58 9997 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9998 {
fe577e58 9999 if (! no_new_pseudos)
e075ae69
RH
10000 scratch = force_reg (SImode, const0_rtx);
10001 else
10002 emit_move_insn (scratch, const0_rtx);
10003
10004 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10005 scratch));
10006 }
10007 else
10008 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10009 }
e9a25f70 10010}
32b5b1aa 10011
e075ae69
RH
10012void
10013ix86_split_ashrdi (operands, scratch)
10014 rtx *operands, scratch;
32b5b1aa 10015{
e075ae69
RH
10016 rtx low[2], high[2];
10017 int count;
32b5b1aa 10018
e075ae69
RH
10019 if (GET_CODE (operands[2]) == CONST_INT)
10020 {
10021 split_di (operands, 2, low, high);
10022 count = INTVAL (operands[2]) & 63;
32b5b1aa 10023
e075ae69
RH
10024 if (count >= 32)
10025 {
10026 emit_move_insn (low[0], high[1]);
32b5b1aa 10027
e075ae69
RH
10028 if (! reload_completed)
10029 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10030 else
10031 {
10032 emit_move_insn (high[0], low[0]);
10033 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10034 }
10035
10036 if (count > 32)
10037 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10038 }
10039 else
10040 {
10041 if (!rtx_equal_p (operands[0], operands[1]))
10042 emit_move_insn (operands[0], operands[1]);
10043 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10044 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10045 }
10046 }
10047 else
32b5b1aa 10048 {
e075ae69
RH
10049 if (!rtx_equal_p (operands[0], operands[1]))
10050 emit_move_insn (operands[0], operands[1]);
10051
10052 split_di (operands, 1, low, high);
10053
10054 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10055 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10056
fe577e58 10057 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10058 {
fe577e58 10059 if (! no_new_pseudos)
e075ae69
RH
10060 scratch = gen_reg_rtx (SImode);
10061 emit_move_insn (scratch, high[0]);
10062 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10063 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10064 scratch));
10065 }
10066 else
10067 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10068 }
e075ae69 10069}
32b5b1aa 10070
e075ae69
RH
10071void
10072ix86_split_lshrdi (operands, scratch)
10073 rtx *operands, scratch;
10074{
10075 rtx low[2], high[2];
10076 int count;
32b5b1aa 10077
e075ae69 10078 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10079 {
e075ae69
RH
10080 split_di (operands, 2, low, high);
10081 count = INTVAL (operands[2]) & 63;
10082
10083 if (count >= 32)
c7271385 10084 {
e075ae69
RH
10085 emit_move_insn (low[0], high[1]);
10086 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10087
e075ae69
RH
10088 if (count > 32)
10089 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10090 }
10091 else
10092 {
10093 if (!rtx_equal_p (operands[0], operands[1]))
10094 emit_move_insn (operands[0], operands[1]);
10095 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10096 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10097 }
32b5b1aa 10098 }
e075ae69
RH
10099 else
10100 {
10101 if (!rtx_equal_p (operands[0], operands[1]))
10102 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10103
e075ae69
RH
10104 split_di (operands, 1, low, high);
10105
10106 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10107 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10108
10109 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10110 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10111 {
fe577e58 10112 if (! no_new_pseudos)
e075ae69
RH
10113 scratch = force_reg (SImode, const0_rtx);
10114 else
10115 emit_move_insn (scratch, const0_rtx);
10116
10117 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10118 scratch));
10119 }
10120 else
10121 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10122 }
32b5b1aa 10123}
3f803cd9 10124
0407c02b 10125/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10126 it is aligned to VALUE bytes. If true, jump to the label. */
10127static rtx
10128ix86_expand_aligntest (variable, value)
10129 rtx variable;
10130 int value;
10131{
10132 rtx label = gen_label_rtx ();
10133 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10134 if (GET_MODE (variable) == DImode)
10135 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10136 else
10137 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10138 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10139 1, label);
0945b39d
JH
10140 return label;
10141}
10142
10143/* Adjust COUNTER by the VALUE. */
10144static void
10145ix86_adjust_counter (countreg, value)
10146 rtx countreg;
10147 HOST_WIDE_INT value;
10148{
10149 if (GET_MODE (countreg) == DImode)
10150 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10151 else
10152 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10153}
10154
10155/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10156rtx
0945b39d
JH
10157ix86_zero_extend_to_Pmode (exp)
10158 rtx exp;
10159{
10160 rtx r;
10161 if (GET_MODE (exp) == VOIDmode)
10162 return force_reg (Pmode, exp);
10163 if (GET_MODE (exp) == Pmode)
10164 return copy_to_mode_reg (Pmode, exp);
10165 r = gen_reg_rtx (Pmode);
10166 emit_insn (gen_zero_extendsidi2 (r, exp));
10167 return r;
10168}
10169
10170/* Expand string move (memcpy) operation. Use i386 string operations when
10171 profitable. expand_clrstr contains similar code. */
10172int
10173ix86_expand_movstr (dst, src, count_exp, align_exp)
10174 rtx dst, src, count_exp, align_exp;
10175{
10176 rtx srcreg, destreg, countreg;
10177 enum machine_mode counter_mode;
10178 HOST_WIDE_INT align = 0;
10179 unsigned HOST_WIDE_INT count = 0;
10180 rtx insns;
10181
10182 start_sequence ();
10183
10184 if (GET_CODE (align_exp) == CONST_INT)
10185 align = INTVAL (align_exp);
10186
5519a4f9 10187 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10188 if (!TARGET_ALIGN_STRINGOPS)
10189 align = 64;
10190
10191 if (GET_CODE (count_exp) == CONST_INT)
10192 count = INTVAL (count_exp);
10193
10194 /* Figure out proper mode for counter. For 32bits it is always SImode,
10195 for 64bits use SImode when possible, otherwise DImode.
10196 Set count to number of bytes copied when known at compile time. */
10197 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10198 || x86_64_zero_extended_value (count_exp))
10199 counter_mode = SImode;
10200 else
10201 counter_mode = DImode;
10202
10203 if (counter_mode != SImode && counter_mode != DImode)
10204 abort ();
10205
10206 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10207 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10208
10209 emit_insn (gen_cld ());
10210
10211 /* When optimizing for size emit simple rep ; movsb instruction for
10212 counts not divisible by 4. */
10213
10214 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10215 {
10216 countreg = ix86_zero_extend_to_Pmode (count_exp);
10217 if (TARGET_64BIT)
10218 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10219 destreg, srcreg, countreg));
10220 else
10221 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10222 destreg, srcreg, countreg));
10223 }
10224
10225 /* For constant aligned (or small unaligned) copies use rep movsl
10226 followed by code copying the rest. For PentiumPro ensure 8 byte
10227 alignment to allow rep movsl acceleration. */
10228
10229 else if (count != 0
10230 && (align >= 8
10231 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10232 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10233 {
10234 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10235 if (count & ~(size - 1))
10236 {
10237 countreg = copy_to_mode_reg (counter_mode,
10238 GEN_INT ((count >> (size == 4 ? 2 : 3))
10239 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10240 countreg = ix86_zero_extend_to_Pmode (countreg);
10241 if (size == 4)
10242 {
10243 if (TARGET_64BIT)
10244 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10245 destreg, srcreg, countreg));
10246 else
10247 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10248 destreg, srcreg, countreg));
10249 }
10250 else
10251 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10252 destreg, srcreg, countreg));
10253 }
10254 if (size == 8 && (count & 0x04))
10255 emit_insn (gen_strmovsi (destreg, srcreg));
10256 if (count & 0x02)
10257 emit_insn (gen_strmovhi (destreg, srcreg));
10258 if (count & 0x01)
10259 emit_insn (gen_strmovqi (destreg, srcreg));
10260 }
10261 /* The generic code based on the glibc implementation:
10262 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10263 allowing accelerated copying there)
10264 - copy the data using rep movsl
10265 - copy the rest. */
10266 else
10267 {
10268 rtx countreg2;
10269 rtx label = NULL;
37ad04a5
JH
10270 int desired_alignment = (TARGET_PENTIUMPRO
10271 && (count == 0 || count >= (unsigned int) 260)
10272 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10273
10274 /* In case we don't know anything about the alignment, default to
10275 library version, since it is usually equally fast and result in
10276 shorter code. */
10277 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10278 {
10279 end_sequence ();
10280 return 0;
10281 }
10282
10283 if (TARGET_SINGLE_STRINGOP)
10284 emit_insn (gen_cld ());
10285
10286 countreg2 = gen_reg_rtx (Pmode);
10287 countreg = copy_to_mode_reg (counter_mode, count_exp);
10288
10289 /* We don't use loops to align destination and to copy parts smaller
10290 than 4 bytes, because gcc is able to optimize such code better (in
10291 the case the destination or the count really is aligned, gcc is often
10292 able to predict the branches) and also it is friendlier to the
a4f31c00 10293 hardware branch prediction.
0945b39d
JH
10294
10295 Using loops is benefical for generic case, because we can
10296 handle small counts using the loops. Many CPUs (such as Athlon)
10297 have large REP prefix setup costs.
10298
10299 This is quite costy. Maybe we can revisit this decision later or
10300 add some customizability to this code. */
10301
37ad04a5 10302 if (count == 0 && align < desired_alignment)
0945b39d
JH
10303 {
10304 label = gen_label_rtx ();
aaae0bb9 10305 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10306 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10307 }
10308 if (align <= 1)
10309 {
10310 rtx label = ix86_expand_aligntest (destreg, 1);
10311 emit_insn (gen_strmovqi (destreg, srcreg));
10312 ix86_adjust_counter (countreg, 1);
10313 emit_label (label);
10314 LABEL_NUSES (label) = 1;
10315 }
10316 if (align <= 2)
10317 {
10318 rtx label = ix86_expand_aligntest (destreg, 2);
10319 emit_insn (gen_strmovhi (destreg, srcreg));
10320 ix86_adjust_counter (countreg, 2);
10321 emit_label (label);
10322 LABEL_NUSES (label) = 1;
10323 }
37ad04a5 10324 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10325 {
10326 rtx label = ix86_expand_aligntest (destreg, 4);
10327 emit_insn (gen_strmovsi (destreg, srcreg));
10328 ix86_adjust_counter (countreg, 4);
10329 emit_label (label);
10330 LABEL_NUSES (label) = 1;
10331 }
10332
37ad04a5
JH
10333 if (label && desired_alignment > 4 && !TARGET_64BIT)
10334 {
10335 emit_label (label);
10336 LABEL_NUSES (label) = 1;
10337 label = NULL_RTX;
10338 }
0945b39d
JH
10339 if (!TARGET_SINGLE_STRINGOP)
10340 emit_insn (gen_cld ());
10341 if (TARGET_64BIT)
10342 {
10343 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10344 GEN_INT (3)));
10345 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10346 destreg, srcreg, countreg2));
10347 }
10348 else
10349 {
10350 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10351 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10352 destreg, srcreg, countreg2));
10353 }
10354
10355 if (label)
10356 {
10357 emit_label (label);
10358 LABEL_NUSES (label) = 1;
10359 }
10360 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10361 emit_insn (gen_strmovsi (destreg, srcreg));
10362 if ((align <= 4 || count == 0) && TARGET_64BIT)
10363 {
10364 rtx label = ix86_expand_aligntest (countreg, 4);
10365 emit_insn (gen_strmovsi (destreg, srcreg));
10366 emit_label (label);
10367 LABEL_NUSES (label) = 1;
10368 }
10369 if (align > 2 && count != 0 && (count & 2))
10370 emit_insn (gen_strmovhi (destreg, srcreg));
10371 if (align <= 2 || count == 0)
10372 {
10373 rtx label = ix86_expand_aligntest (countreg, 2);
10374 emit_insn (gen_strmovhi (destreg, srcreg));
10375 emit_label (label);
10376 LABEL_NUSES (label) = 1;
10377 }
10378 if (align > 1 && count != 0 && (count & 1))
10379 emit_insn (gen_strmovqi (destreg, srcreg));
10380 if (align <= 1 || count == 0)
10381 {
10382 rtx label = ix86_expand_aligntest (countreg, 1);
10383 emit_insn (gen_strmovqi (destreg, srcreg));
10384 emit_label (label);
10385 LABEL_NUSES (label) = 1;
10386 }
10387 }
10388
10389 insns = get_insns ();
10390 end_sequence ();
10391
10392 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10393 emit_insn (insns);
0945b39d
JH
10394 return 1;
10395}
10396
10397/* Expand string clear operation (bzero). Use i386 string operations when
10398 profitable. expand_movstr contains similar code. */
10399int
10400ix86_expand_clrstr (src, count_exp, align_exp)
10401 rtx src, count_exp, align_exp;
10402{
10403 rtx destreg, zeroreg, countreg;
10404 enum machine_mode counter_mode;
10405 HOST_WIDE_INT align = 0;
10406 unsigned HOST_WIDE_INT count = 0;
10407
10408 if (GET_CODE (align_exp) == CONST_INT)
10409 align = INTVAL (align_exp);
10410
5519a4f9 10411 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10412 if (!TARGET_ALIGN_STRINGOPS)
10413 align = 32;
10414
10415 if (GET_CODE (count_exp) == CONST_INT)
10416 count = INTVAL (count_exp);
10417 /* Figure out proper mode for counter. For 32bits it is always SImode,
10418 for 64bits use SImode when possible, otherwise DImode.
10419 Set count to number of bytes copied when known at compile time. */
10420 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10421 || x86_64_zero_extended_value (count_exp))
10422 counter_mode = SImode;
10423 else
10424 counter_mode = DImode;
10425
10426 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10427
10428 emit_insn (gen_cld ());
10429
10430 /* When optimizing for size emit simple rep ; movsb instruction for
10431 counts not divisible by 4. */
10432
10433 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10434 {
10435 countreg = ix86_zero_extend_to_Pmode (count_exp);
10436 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10437 if (TARGET_64BIT)
10438 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10439 destreg, countreg));
10440 else
10441 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10442 destreg, countreg));
10443 }
10444 else if (count != 0
10445 && (align >= 8
10446 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10447 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10448 {
10449 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10450 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10451 if (count & ~(size - 1))
10452 {
10453 countreg = copy_to_mode_reg (counter_mode,
10454 GEN_INT ((count >> (size == 4 ? 2 : 3))
10455 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10456 countreg = ix86_zero_extend_to_Pmode (countreg);
10457 if (size == 4)
10458 {
10459 if (TARGET_64BIT)
10460 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10461 destreg, countreg));
10462 else
10463 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10464 destreg, countreg));
10465 }
10466 else
10467 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10468 destreg, countreg));
10469 }
10470 if (size == 8 && (count & 0x04))
10471 emit_insn (gen_strsetsi (destreg,
10472 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10473 if (count & 0x02)
10474 emit_insn (gen_strsethi (destreg,
10475 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10476 if (count & 0x01)
10477 emit_insn (gen_strsetqi (destreg,
10478 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10479 }
10480 else
10481 {
10482 rtx countreg2;
10483 rtx label = NULL;
37ad04a5
JH
10484 /* Compute desired alignment of the string operation. */
10485 int desired_alignment = (TARGET_PENTIUMPRO
10486 && (count == 0 || count >= (unsigned int) 260)
10487 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10488
10489 /* In case we don't know anything about the alignment, default to
10490 library version, since it is usually equally fast and result in
10491 shorter code. */
10492 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10493 return 0;
10494
10495 if (TARGET_SINGLE_STRINGOP)
10496 emit_insn (gen_cld ());
10497
10498 countreg2 = gen_reg_rtx (Pmode);
10499 countreg = copy_to_mode_reg (counter_mode, count_exp);
10500 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10501
37ad04a5 10502 if (count == 0 && align < desired_alignment)
0945b39d
JH
10503 {
10504 label = gen_label_rtx ();
37ad04a5 10505 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10506 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10507 }
10508 if (align <= 1)
10509 {
10510 rtx label = ix86_expand_aligntest (destreg, 1);
10511 emit_insn (gen_strsetqi (destreg,
10512 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10513 ix86_adjust_counter (countreg, 1);
10514 emit_label (label);
10515 LABEL_NUSES (label) = 1;
10516 }
10517 if (align <= 2)
10518 {
10519 rtx label = ix86_expand_aligntest (destreg, 2);
10520 emit_insn (gen_strsethi (destreg,
10521 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10522 ix86_adjust_counter (countreg, 2);
10523 emit_label (label);
10524 LABEL_NUSES (label) = 1;
10525 }
37ad04a5 10526 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10527 {
10528 rtx label = ix86_expand_aligntest (destreg, 4);
10529 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10530 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10531 : zeroreg)));
10532 ix86_adjust_counter (countreg, 4);
10533 emit_label (label);
10534 LABEL_NUSES (label) = 1;
10535 }
10536
37ad04a5
JH
10537 if (label && desired_alignment > 4 && !TARGET_64BIT)
10538 {
10539 emit_label (label);
10540 LABEL_NUSES (label) = 1;
10541 label = NULL_RTX;
10542 }
10543
0945b39d
JH
10544 if (!TARGET_SINGLE_STRINGOP)
10545 emit_insn (gen_cld ());
10546 if (TARGET_64BIT)
10547 {
10548 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10549 GEN_INT (3)));
10550 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10551 destreg, countreg2));
10552 }
10553 else
10554 {
10555 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10556 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10557 destreg, countreg2));
10558 }
0945b39d
JH
10559 if (label)
10560 {
10561 emit_label (label);
10562 LABEL_NUSES (label) = 1;
10563 }
37ad04a5 10564
0945b39d
JH
10565 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10566 emit_insn (gen_strsetsi (destreg,
10567 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10568 if (TARGET_64BIT && (align <= 4 || count == 0))
10569 {
79258dce 10570 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
10571 emit_insn (gen_strsetsi (destreg,
10572 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10573 emit_label (label);
10574 LABEL_NUSES (label) = 1;
10575 }
10576 if (align > 2 && count != 0 && (count & 2))
10577 emit_insn (gen_strsethi (destreg,
10578 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10579 if (align <= 2 || count == 0)
10580 {
74411039 10581 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10582 emit_insn (gen_strsethi (destreg,
10583 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10584 emit_label (label);
10585 LABEL_NUSES (label) = 1;
10586 }
10587 if (align > 1 && count != 0 && (count & 1))
10588 emit_insn (gen_strsetqi (destreg,
10589 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10590 if (align <= 1 || count == 0)
10591 {
74411039 10592 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10593 emit_insn (gen_strsetqi (destreg,
10594 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10595 emit_label (label);
10596 LABEL_NUSES (label) = 1;
10597 }
10598 }
10599 return 1;
10600}
10601/* Expand strlen. */
10602int
10603ix86_expand_strlen (out, src, eoschar, align)
10604 rtx out, src, eoschar, align;
10605{
10606 rtx addr, scratch1, scratch2, scratch3, scratch4;
10607
10608 /* The generic case of strlen expander is long. Avoid it's
10609 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10610
10611 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10612 && !TARGET_INLINE_ALL_STRINGOPS
10613 && !optimize_size
10614 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10615 return 0;
10616
10617 addr = force_reg (Pmode, XEXP (src, 0));
10618 scratch1 = gen_reg_rtx (Pmode);
10619
10620 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10621 && !optimize_size)
10622 {
10623 /* Well it seems that some optimizer does not combine a call like
10624 foo(strlen(bar), strlen(bar));
10625 when the move and the subtraction is done here. It does calculate
10626 the length just once when these instructions are done inside of
10627 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10628 often used and I use one fewer register for the lifetime of
10629 output_strlen_unroll() this is better. */
10630
10631 emit_move_insn (out, addr);
10632
10633 ix86_expand_strlensi_unroll_1 (out, align);
10634
10635 /* strlensi_unroll_1 returns the address of the zero at the end of
10636 the string, like memchr(), so compute the length by subtracting
10637 the start address. */
10638 if (TARGET_64BIT)
10639 emit_insn (gen_subdi3 (out, out, addr));
10640 else
10641 emit_insn (gen_subsi3 (out, out, addr));
10642 }
10643 else
10644 {
10645 scratch2 = gen_reg_rtx (Pmode);
10646 scratch3 = gen_reg_rtx (Pmode);
10647 scratch4 = force_reg (Pmode, constm1_rtx);
10648
10649 emit_move_insn (scratch3, addr);
10650 eoschar = force_reg (QImode, eoschar);
10651
10652 emit_insn (gen_cld ());
10653 if (TARGET_64BIT)
10654 {
10655 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10656 align, scratch4, scratch3));
10657 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10658 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10659 }
10660 else
10661 {
10662 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10663 align, scratch4, scratch3));
10664 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10665 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10666 }
10667 }
10668 return 1;
10669}
10670
e075ae69
RH
10671/* Expand the appropriate insns for doing strlen if not just doing
10672 repnz; scasb
10673
10674 out = result, initialized with the start address
10675 align_rtx = alignment of the address.
10676 scratch = scratch register, initialized with the startaddress when
77ebd435 10677 not aligned, otherwise undefined
3f803cd9
SC
10678
10679 This is just the body. It needs the initialisations mentioned above and
10680 some address computing at the end. These things are done in i386.md. */
10681
0945b39d
JH
10682static void
10683ix86_expand_strlensi_unroll_1 (out, align_rtx)
10684 rtx out, align_rtx;
3f803cd9 10685{
e075ae69
RH
10686 int align;
10687 rtx tmp;
10688 rtx align_2_label = NULL_RTX;
10689 rtx align_3_label = NULL_RTX;
10690 rtx align_4_label = gen_label_rtx ();
10691 rtx end_0_label = gen_label_rtx ();
e075ae69 10692 rtx mem;
e2e52e1b 10693 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10694 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10695
10696 align = 0;
10697 if (GET_CODE (align_rtx) == CONST_INT)
10698 align = INTVAL (align_rtx);
3f803cd9 10699
e9a25f70 10700 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10701
e9a25f70 10702 /* Is there a known alignment and is it less than 4? */
e075ae69 10703 if (align < 4)
3f803cd9 10704 {
0945b39d
JH
10705 rtx scratch1 = gen_reg_rtx (Pmode);
10706 emit_move_insn (scratch1, out);
e9a25f70 10707 /* Is there a known alignment and is it not 2? */
e075ae69 10708 if (align != 2)
3f803cd9 10709 {
e075ae69
RH
10710 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10711 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10712
10713 /* Leave just the 3 lower bits. */
0945b39d 10714 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10715 NULL_RTX, 0, OPTAB_WIDEN);
10716
9076b9c1 10717 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10718 Pmode, 1, align_4_label);
9076b9c1 10719 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10720 Pmode, 1, align_2_label);
9076b9c1 10721 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10722 Pmode, 1, align_3_label);
3f803cd9
SC
10723 }
10724 else
10725 {
e9a25f70
JL
10726 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10727 check if is aligned to 4 - byte. */
e9a25f70 10728
0945b39d 10729 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10730 NULL_RTX, 0, OPTAB_WIDEN);
10731
9076b9c1 10732 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10733 Pmode, 1, align_4_label);
3f803cd9
SC
10734 }
10735
e075ae69 10736 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10737
e075ae69 10738 /* Now compare the bytes. */
e9a25f70 10739
0f290768 10740 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10741 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10742 QImode, 1, end_0_label);
3f803cd9 10743
0f290768 10744 /* Increment the address. */
0945b39d
JH
10745 if (TARGET_64BIT)
10746 emit_insn (gen_adddi3 (out, out, const1_rtx));
10747 else
10748 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10749
e075ae69
RH
10750 /* Not needed with an alignment of 2 */
10751 if (align != 2)
10752 {
10753 emit_label (align_2_label);
3f803cd9 10754
d43e0b7d
RK
10755 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10756 end_0_label);
e075ae69 10757
0945b39d
JH
10758 if (TARGET_64BIT)
10759 emit_insn (gen_adddi3 (out, out, const1_rtx));
10760 else
10761 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10762
10763 emit_label (align_3_label);
10764 }
10765
d43e0b7d
RK
10766 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10767 end_0_label);
e075ae69 10768
0945b39d
JH
10769 if (TARGET_64BIT)
10770 emit_insn (gen_adddi3 (out, out, const1_rtx));
10771 else
10772 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10773 }
10774
e075ae69
RH
10775 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10776 align this loop. It gives only huge programs, but does not help to
10777 speed up. */
10778 emit_label (align_4_label);
3f803cd9 10779
e075ae69
RH
10780 mem = gen_rtx_MEM (SImode, out);
10781 emit_move_insn (scratch, mem);
0945b39d
JH
10782 if (TARGET_64BIT)
10783 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10784 else
10785 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10786
e2e52e1b
JH
10787 /* This formula yields a nonzero result iff one of the bytes is zero.
10788 This saves three branches inside loop and many cycles. */
10789
10790 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10791 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10792 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10793 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10794 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10795 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10796 align_4_label);
e2e52e1b
JH
10797
10798 if (TARGET_CMOVE)
10799 {
10800 rtx reg = gen_reg_rtx (SImode);
0945b39d 10801 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10802 emit_move_insn (reg, tmpreg);
10803 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10804
0f290768 10805 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10806 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10807 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10808 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10809 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10810 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10811 reg,
10812 tmpreg)));
e2e52e1b 10813 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10814 emit_insn (gen_rtx_SET (SImode, reg2,
10815 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10816
10817 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10818 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10819 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10820 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10821 reg2,
10822 out)));
e2e52e1b
JH
10823
10824 }
10825 else
10826 {
10827 rtx end_2_label = gen_label_rtx ();
10828 /* Is zero in the first two bytes? */
10829
16189740 10830 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10831 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10832 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10833 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10834 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10835 pc_rtx);
10836 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10837 JUMP_LABEL (tmp) = end_2_label;
10838
0f290768 10839 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10840 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10841 if (TARGET_64BIT)
10842 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10843 else
10844 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10845
10846 emit_label (end_2_label);
10847
10848 }
10849
0f290768 10850 /* Avoid branch in fixing the byte. */
e2e52e1b 10851 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10852 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10853 if (TARGET_64BIT)
10854 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10855 else
10856 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10857
10858 emit_label (end_0_label);
10859}
0e07aff3
RH
10860
10861void
10862ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10863 rtx retval, fnaddr, callarg1, callarg2, pop;
10864{
10865 rtx use = NULL, call;
10866
10867 if (pop == const0_rtx)
10868 pop = NULL;
10869 if (TARGET_64BIT && pop)
10870 abort ();
10871
b069de3b
SS
10872#if TARGET_MACHO
10873 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10874 fnaddr = machopic_indirect_call_target (fnaddr);
10875#else
0e07aff3
RH
10876 /* Static functions and indirect calls don't need the pic register. */
10877 if (! TARGET_64BIT && flag_pic
10878 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10879 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10880 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10881
10882 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10883 {
10884 rtx al = gen_rtx_REG (QImode, 0);
10885 emit_move_insn (al, callarg2);
10886 use_reg (&use, al);
10887 }
b069de3b 10888#endif /* TARGET_MACHO */
0e07aff3
RH
10889
10890 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10891 {
10892 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10893 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10894 }
10895
10896 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10897 if (retval)
10898 call = gen_rtx_SET (VOIDmode, retval, call);
10899 if (pop)
10900 {
10901 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10902 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10903 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10904 }
10905
10906 call = emit_call_insn (call);
10907 if (use)
10908 CALL_INSN_FUNCTION_USAGE (call) = use;
10909}
fce5a9f2 10910
e075ae69 10911\f
e075ae69
RH
10912/* Clear stack slot assignments remembered from previous functions.
10913 This is called from INIT_EXPANDERS once before RTL is emitted for each
10914 function. */
10915
e2500fed
GK
10916static struct machine_function *
10917ix86_init_machine_status ()
37b15744 10918{
e2500fed 10919 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
10920}
10921
e075ae69
RH
10922/* Return a MEM corresponding to a stack slot with mode MODE.
10923 Allocate a new slot if necessary.
10924
10925 The RTL for a function can have several slots available: N is
10926 which slot to use. */
10927
10928rtx
10929assign_386_stack_local (mode, n)
10930 enum machine_mode mode;
10931 int n;
10932{
10933 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10934 abort ();
10935
10936 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10937 ix86_stack_locals[(int) mode][n]
10938 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10939
10940 return ix86_stack_locals[(int) mode][n];
10941}
f996902d
RH
10942
10943/* Construct the SYMBOL_REF for the tls_get_addr function. */
10944
e2500fed 10945static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
10946rtx
10947ix86_tls_get_addr ()
10948{
f996902d 10949
e2500fed 10950 if (!ix86_tls_symbol)
f996902d 10951 {
75d38379
JJ
10952 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
10953 (TARGET_GNU_TLS && !TARGET_64BIT)
10954 ? "___tls_get_addr"
10955 : "__tls_get_addr");
f996902d
RH
10956 }
10957
e2500fed 10958 return ix86_tls_symbol;
f996902d 10959}
e075ae69
RH
10960\f
10961/* Calculate the length of the memory address in the instruction
10962 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10963
10964static int
10965memory_address_length (addr)
10966 rtx addr;
10967{
10968 struct ix86_address parts;
10969 rtx base, index, disp;
10970 int len;
10971
10972 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10973 || GET_CODE (addr) == POST_INC
10974 || GET_CODE (addr) == PRE_MODIFY
10975 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10976 return 0;
3f803cd9 10977
e075ae69
RH
10978 if (! ix86_decompose_address (addr, &parts))
10979 abort ();
3f803cd9 10980
e075ae69
RH
10981 base = parts.base;
10982 index = parts.index;
10983 disp = parts.disp;
10984 len = 0;
3f803cd9 10985
e075ae69
RH
10986 /* Register Indirect. */
10987 if (base && !index && !disp)
10988 {
10989 /* Special cases: ebp and esp need the two-byte modrm form. */
10990 if (addr == stack_pointer_rtx
10991 || addr == arg_pointer_rtx
564d80f4
JH
10992 || addr == frame_pointer_rtx
10993 || addr == hard_frame_pointer_rtx)
e075ae69 10994 len = 1;
3f803cd9 10995 }
e9a25f70 10996
e075ae69
RH
10997 /* Direct Addressing. */
10998 else if (disp && !base && !index)
10999 len = 4;
11000
3f803cd9
SC
11001 else
11002 {
e075ae69
RH
11003 /* Find the length of the displacement constant. */
11004 if (disp)
11005 {
11006 if (GET_CODE (disp) == CONST_INT
11007 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11008 len = 1;
11009 else
11010 len = 4;
11011 }
3f803cd9 11012
e075ae69
RH
11013 /* An index requires the two-byte modrm form. */
11014 if (index)
11015 len += 1;
3f803cd9
SC
11016 }
11017
e075ae69
RH
11018 return len;
11019}
79325812 11020
5bf0ebab
RH
11021/* Compute default value for "length_immediate" attribute. When SHORTFORM
11022 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11023int
6ef67412 11024ix86_attr_length_immediate_default (insn, shortform)
e075ae69 11025 rtx insn;
6ef67412 11026 int shortform;
e075ae69 11027{
6ef67412
JH
11028 int len = 0;
11029 int i;
6c698a6d 11030 extract_insn_cached (insn);
6ef67412
JH
11031 for (i = recog_data.n_operands - 1; i >= 0; --i)
11032 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11033 {
6ef67412 11034 if (len)
3071fab5 11035 abort ();
6ef67412
JH
11036 if (shortform
11037 && GET_CODE (recog_data.operand[i]) == CONST_INT
11038 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11039 len = 1;
11040 else
11041 {
11042 switch (get_attr_mode (insn))
11043 {
11044 case MODE_QI:
11045 len+=1;
11046 break;
11047 case MODE_HI:
11048 len+=2;
11049 break;
11050 case MODE_SI:
11051 len+=4;
11052 break;
14f73b5a
JH
11053 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11054 case MODE_DI:
11055 len+=4;
11056 break;
6ef67412 11057 default:
c725bd79 11058 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11059 }
11060 }
3071fab5 11061 }
6ef67412
JH
11062 return len;
11063}
11064/* Compute default value for "length_address" attribute. */
11065int
11066ix86_attr_length_address_default (insn)
11067 rtx insn;
11068{
11069 int i;
6c698a6d 11070 extract_insn_cached (insn);
1ccbefce
RH
11071 for (i = recog_data.n_operands - 1; i >= 0; --i)
11072 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11073 {
6ef67412 11074 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11075 break;
11076 }
6ef67412 11077 return 0;
3f803cd9 11078}
e075ae69
RH
11079\f
11080/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11081
c237e94a 11082static int
e075ae69 11083ix86_issue_rate ()
b657fc39 11084{
e075ae69 11085 switch (ix86_cpu)
b657fc39 11086 {
e075ae69
RH
11087 case PROCESSOR_PENTIUM:
11088 case PROCESSOR_K6:
11089 return 2;
79325812 11090
e075ae69 11091 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11092 case PROCESSOR_PENTIUM4:
11093 case PROCESSOR_ATHLON:
e075ae69 11094 return 3;
b657fc39 11095
b657fc39 11096 default:
e075ae69 11097 return 1;
b657fc39 11098 }
b657fc39
L
11099}
11100
e075ae69
RH
11101/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11102 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11103
e075ae69
RH
11104static int
11105ix86_flags_dependant (insn, dep_insn, insn_type)
11106 rtx insn, dep_insn;
11107 enum attr_type insn_type;
11108{
11109 rtx set, set2;
b657fc39 11110
e075ae69
RH
11111 /* Simplify the test for uninteresting insns. */
11112 if (insn_type != TYPE_SETCC
11113 && insn_type != TYPE_ICMOV
11114 && insn_type != TYPE_FCMOV
11115 && insn_type != TYPE_IBR)
11116 return 0;
b657fc39 11117
e075ae69
RH
11118 if ((set = single_set (dep_insn)) != 0)
11119 {
11120 set = SET_DEST (set);
11121 set2 = NULL_RTX;
11122 }
11123 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11124 && XVECLEN (PATTERN (dep_insn), 0) == 2
11125 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11126 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11127 {
11128 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11129 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11130 }
78a0d70c
ZW
11131 else
11132 return 0;
b657fc39 11133
78a0d70c
ZW
11134 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11135 return 0;
b657fc39 11136
f5143c46 11137 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11138 not any other potentially set register. */
11139 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11140 return 0;
11141
11142 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11143 return 0;
11144
11145 return 1;
e075ae69 11146}
b657fc39 11147
e075ae69
RH
11148/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11149 address with operands set by DEP_INSN. */
11150
11151static int
11152ix86_agi_dependant (insn, dep_insn, insn_type)
11153 rtx insn, dep_insn;
11154 enum attr_type insn_type;
11155{
11156 rtx addr;
11157
6ad48e84
JH
11158 if (insn_type == TYPE_LEA
11159 && TARGET_PENTIUM)
5fbdde42
RH
11160 {
11161 addr = PATTERN (insn);
11162 if (GET_CODE (addr) == SET)
11163 ;
11164 else if (GET_CODE (addr) == PARALLEL
11165 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11166 addr = XVECEXP (addr, 0, 0);
11167 else
11168 abort ();
11169 addr = SET_SRC (addr);
11170 }
e075ae69
RH
11171 else
11172 {
11173 int i;
6c698a6d 11174 extract_insn_cached (insn);
1ccbefce
RH
11175 for (i = recog_data.n_operands - 1; i >= 0; --i)
11176 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11177 {
1ccbefce 11178 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11179 goto found;
11180 }
11181 return 0;
11182 found:;
b657fc39
L
11183 }
11184
e075ae69 11185 return modified_in_p (addr, dep_insn);
b657fc39 11186}
a269a03c 11187
c237e94a 11188static int
e075ae69 11189ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
11190 rtx insn, link, dep_insn;
11191 int cost;
11192{
e075ae69 11193 enum attr_type insn_type, dep_insn_type;
6ad48e84 11194 enum attr_memory memory, dep_memory;
e075ae69 11195 rtx set, set2;
9b00189f 11196 int dep_insn_code_number;
a269a03c 11197
309ada50 11198 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 11199 if (REG_NOTE_KIND (link) != 0)
309ada50 11200 return 0;
a269a03c 11201
9b00189f
JH
11202 dep_insn_code_number = recog_memoized (dep_insn);
11203
e075ae69 11204 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11205 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11206 return cost;
a269a03c 11207
1c71e60e
JH
11208 insn_type = get_attr_type (insn);
11209 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11210
a269a03c
JC
11211 switch (ix86_cpu)
11212 {
11213 case PROCESSOR_PENTIUM:
e075ae69
RH
11214 /* Address Generation Interlock adds a cycle of latency. */
11215 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11216 cost += 1;
11217
11218 /* ??? Compares pair with jump/setcc. */
11219 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11220 cost = 0;
11221
11222 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 11223 if (insn_type == TYPE_FMOV
e075ae69
RH
11224 && get_attr_memory (insn) == MEMORY_STORE
11225 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11226 cost += 1;
11227 break;
a269a03c 11228
e075ae69 11229 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11230 memory = get_attr_memory (insn);
11231 dep_memory = get_attr_memory (dep_insn);
11232
0f290768 11233 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11234 increase the cost here for non-imov insns. */
11235 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11236 && dep_insn_type != TYPE_FMOV
11237 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11238 cost += 1;
11239
11240 /* INT->FP conversion is expensive. */
11241 if (get_attr_fp_int_src (dep_insn))
11242 cost += 5;
11243
11244 /* There is one cycle extra latency between an FP op and a store. */
11245 if (insn_type == TYPE_FMOV
11246 && (set = single_set (dep_insn)) != NULL_RTX
11247 && (set2 = single_set (insn)) != NULL_RTX
11248 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11249 && GET_CODE (SET_DEST (set2)) == MEM)
11250 cost += 1;
6ad48e84
JH
11251
11252 /* Show ability of reorder buffer to hide latency of load by executing
11253 in parallel with previous instruction in case
11254 previous instruction is not needed to compute the address. */
11255 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11256 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11257 {
11258 /* Claim moves to take one cycle, as core can issue one load
11259 at time and the next load can start cycle later. */
11260 if (dep_insn_type == TYPE_IMOV
11261 || dep_insn_type == TYPE_FMOV)
11262 cost = 1;
11263 else if (cost > 1)
11264 cost--;
11265 }
e075ae69 11266 break;
a269a03c 11267
e075ae69 11268 case PROCESSOR_K6:
6ad48e84
JH
11269 memory = get_attr_memory (insn);
11270 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11271 /* The esp dependency is resolved before the instruction is really
11272 finished. */
11273 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11274 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11275 return 1;
a269a03c 11276
0f290768 11277 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11278 increase the cost here for non-imov insns. */
6ad48e84 11279 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11280 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11281
11282 /* INT->FP conversion is expensive. */
11283 if (get_attr_fp_int_src (dep_insn))
11284 cost += 5;
6ad48e84
JH
11285
11286 /* Show ability of reorder buffer to hide latency of load by executing
11287 in parallel with previous instruction in case
11288 previous instruction is not needed to compute the address. */
11289 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11290 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11291 {
11292 /* Claim moves to take one cycle, as core can issue one load
11293 at time and the next load can start cycle later. */
11294 if (dep_insn_type == TYPE_IMOV
11295 || dep_insn_type == TYPE_FMOV)
11296 cost = 1;
11297 else if (cost > 2)
11298 cost -= 2;
11299 else
11300 cost = 1;
11301 }
a14003ee 11302 break;
e075ae69 11303
309ada50 11304 case PROCESSOR_ATHLON:
6ad48e84
JH
11305 memory = get_attr_memory (insn);
11306 dep_memory = get_attr_memory (dep_insn);
11307
11308 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
11309 {
11310 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11311 cost += 2;
11312 else
11313 cost += 3;
11314 }
6ad48e84
JH
11315 /* Show ability of reorder buffer to hide latency of load by executing
11316 in parallel with previous instruction in case
11317 previous instruction is not needed to compute the address. */
11318 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11319 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11320 {
11321 /* Claim moves to take one cycle, as core can issue one load
11322 at time and the next load can start cycle later. */
11323 if (dep_insn_type == TYPE_IMOV
11324 || dep_insn_type == TYPE_FMOV)
11325 cost = 0;
11326 else if (cost >= 3)
11327 cost -= 3;
11328 else
11329 cost = 0;
11330 }
309ada50 11331
a269a03c 11332 default:
a269a03c
JC
11333 break;
11334 }
11335
11336 return cost;
11337}
0a726ef1 11338
e075ae69
RH
11339static union
11340{
11341 struct ppro_sched_data
11342 {
11343 rtx decode[3];
11344 int issued_this_cycle;
11345 } ppro;
11346} ix86_sched_data;
0a726ef1 11347
e075ae69
RH
11348static enum attr_ppro_uops
11349ix86_safe_ppro_uops (insn)
11350 rtx insn;
11351{
11352 if (recog_memoized (insn) >= 0)
11353 return get_attr_ppro_uops (insn);
11354 else
11355 return PPRO_UOPS_MANY;
11356}
0a726ef1 11357
e075ae69
RH
11358static void
11359ix86_dump_ppro_packet (dump)
11360 FILE *dump;
0a726ef1 11361{
e075ae69 11362 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11363 {
e075ae69
RH
11364 fprintf (dump, "PPRO packet: %d",
11365 INSN_UID (ix86_sched_data.ppro.decode[0]));
11366 if (ix86_sched_data.ppro.decode[1])
11367 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11368 if (ix86_sched_data.ppro.decode[2])
11369 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11370 fputc ('\n', dump);
11371 }
11372}
0a726ef1 11373
e075ae69 11374/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11375
c237e94a
ZW
11376static void
11377ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11378 FILE *dump ATTRIBUTE_UNUSED;
11379 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11380 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11381{
11382 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11383}
11384
11385/* Shift INSN to SLOT, and shift everything else down. */
11386
11387static void
11388ix86_reorder_insn (insnp, slot)
11389 rtx *insnp, *slot;
11390{
11391 if (insnp != slot)
11392 {
11393 rtx insn = *insnp;
0f290768 11394 do
e075ae69
RH
11395 insnp[0] = insnp[1];
11396 while (++insnp != slot);
11397 *insnp = insn;
0a726ef1 11398 }
e075ae69
RH
11399}
11400
c6991660 11401static void
78a0d70c
ZW
11402ix86_sched_reorder_ppro (ready, e_ready)
11403 rtx *ready;
11404 rtx *e_ready;
11405{
11406 rtx decode[3];
11407 enum attr_ppro_uops cur_uops;
11408 int issued_this_cycle;
11409 rtx *insnp;
11410 int i;
e075ae69 11411
0f290768 11412 /* At this point .ppro.decode contains the state of the three
78a0d70c 11413 decoders from last "cycle". That is, those insns that were
0f290768 11414 actually independent. But here we're scheduling for the
78a0d70c
ZW
11415 decoder, and we may find things that are decodable in the
11416 same cycle. */
e075ae69 11417
0f290768 11418 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11419 issued_this_cycle = 0;
e075ae69 11420
78a0d70c
ZW
11421 insnp = e_ready;
11422 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11423
78a0d70c
ZW
11424 /* If the decoders are empty, and we've a complex insn at the
11425 head of the priority queue, let it issue without complaint. */
11426 if (decode[0] == NULL)
11427 {
11428 if (cur_uops == PPRO_UOPS_MANY)
11429 {
11430 decode[0] = *insnp;
11431 goto ppro_done;
11432 }
11433
11434 /* Otherwise, search for a 2-4 uop unsn to issue. */
11435 while (cur_uops != PPRO_UOPS_FEW)
11436 {
11437 if (insnp == ready)
11438 break;
11439 cur_uops = ix86_safe_ppro_uops (*--insnp);
11440 }
11441
11442 /* If so, move it to the head of the line. */
11443 if (cur_uops == PPRO_UOPS_FEW)
11444 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11445
78a0d70c
ZW
11446 /* Issue the head of the queue. */
11447 issued_this_cycle = 1;
11448 decode[0] = *e_ready--;
11449 }
fb693d44 11450
78a0d70c
ZW
11451 /* Look for simple insns to fill in the other two slots. */
11452 for (i = 1; i < 3; ++i)
11453 if (decode[i] == NULL)
11454 {
a151daf0 11455 if (ready > e_ready)
78a0d70c 11456 goto ppro_done;
fb693d44 11457
e075ae69
RH
11458 insnp = e_ready;
11459 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11460 while (cur_uops != PPRO_UOPS_ONE)
11461 {
11462 if (insnp == ready)
11463 break;
11464 cur_uops = ix86_safe_ppro_uops (*--insnp);
11465 }
fb693d44 11466
78a0d70c
ZW
11467 /* Found one. Move it to the head of the queue and issue it. */
11468 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11469 {
78a0d70c
ZW
11470 ix86_reorder_insn (insnp, e_ready);
11471 decode[i] = *e_ready--;
11472 issued_this_cycle++;
11473 continue;
11474 }
fb693d44 11475
78a0d70c
ZW
11476 /* ??? Didn't find one. Ideally, here we would do a lazy split
11477 of 2-uop insns, issue one and queue the other. */
11478 }
fb693d44 11479
78a0d70c
ZW
11480 ppro_done:
11481 if (issued_this_cycle == 0)
11482 issued_this_cycle = 1;
11483 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11484}
fb693d44 11485
0f290768 11486/* We are about to being issuing insns for this clock cycle.
78a0d70c 11487 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11488static int
11489ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11490 FILE *dump ATTRIBUTE_UNUSED;
11491 int sched_verbose ATTRIBUTE_UNUSED;
11492 rtx *ready;
c237e94a 11493 int *n_readyp;
78a0d70c
ZW
11494 int clock_var ATTRIBUTE_UNUSED;
11495{
c237e94a 11496 int n_ready = *n_readyp;
78a0d70c 11497 rtx *e_ready = ready + n_ready - 1;
fb693d44 11498
fce5a9f2 11499 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11500 ix86_sched_data if we are not going to bother trying to
11501 reorder the ready queue. */
78a0d70c 11502 if (n_ready < 2)
a151daf0
JL
11503 {
11504 ix86_sched_data.ppro.issued_this_cycle = 1;
11505 goto out;
11506 }
e075ae69 11507
78a0d70c
ZW
11508 switch (ix86_cpu)
11509 {
11510 default:
11511 break;
e075ae69 11512
78a0d70c
ZW
11513 case PROCESSOR_PENTIUMPRO:
11514 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11515 break;
fb693d44
RH
11516 }
11517
e075ae69
RH
11518out:
11519 return ix86_issue_rate ();
11520}
fb693d44 11521
e075ae69
RH
11522/* We are about to issue INSN. Return the number of insns left on the
11523 ready queue that can be issued this cycle. */
b222082e 11524
c237e94a 11525static int
e075ae69
RH
11526ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11527 FILE *dump;
11528 int sched_verbose;
11529 rtx insn;
11530 int can_issue_more;
11531{
11532 int i;
11533 switch (ix86_cpu)
fb693d44 11534 {
e075ae69
RH
11535 default:
11536 return can_issue_more - 1;
fb693d44 11537
e075ae69
RH
11538 case PROCESSOR_PENTIUMPRO:
11539 {
11540 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11541
e075ae69
RH
11542 if (uops == PPRO_UOPS_MANY)
11543 {
11544 if (sched_verbose)
11545 ix86_dump_ppro_packet (dump);
11546 ix86_sched_data.ppro.decode[0] = insn;
11547 ix86_sched_data.ppro.decode[1] = NULL;
11548 ix86_sched_data.ppro.decode[2] = NULL;
11549 if (sched_verbose)
11550 ix86_dump_ppro_packet (dump);
11551 ix86_sched_data.ppro.decode[0] = NULL;
11552 }
11553 else if (uops == PPRO_UOPS_FEW)
11554 {
11555 if (sched_verbose)
11556 ix86_dump_ppro_packet (dump);
11557 ix86_sched_data.ppro.decode[0] = insn;
11558 ix86_sched_data.ppro.decode[1] = NULL;
11559 ix86_sched_data.ppro.decode[2] = NULL;
11560 }
11561 else
11562 {
11563 for (i = 0; i < 3; ++i)
11564 if (ix86_sched_data.ppro.decode[i] == NULL)
11565 {
11566 ix86_sched_data.ppro.decode[i] = insn;
11567 break;
11568 }
11569 if (i == 3)
11570 abort ();
11571 if (i == 2)
11572 {
11573 if (sched_verbose)
11574 ix86_dump_ppro_packet (dump);
11575 ix86_sched_data.ppro.decode[0] = NULL;
11576 ix86_sched_data.ppro.decode[1] = NULL;
11577 ix86_sched_data.ppro.decode[2] = NULL;
11578 }
11579 }
11580 }
11581 return --ix86_sched_data.ppro.issued_this_cycle;
11582 }
fb693d44 11583}
9b690711
RH
11584
11585static int
11586ia32_use_dfa_pipeline_interface ()
11587{
11588 if (ix86_cpu == PROCESSOR_PENTIUM)
11589 return 1;
11590 return 0;
11591}
11592
11593/* How many alternative schedules to try. This should be as wide as the
11594 scheduling freedom in the DFA, but no wider. Making this value too
11595 large results extra work for the scheduler. */
11596
11597static int
11598ia32_multipass_dfa_lookahead ()
11599{
11600 if (ix86_cpu == PROCESSOR_PENTIUM)
11601 return 2;
11602 else
11603 return 0;
11604}
11605
a7180f70 11606\f
0e4970d7
RK
11607/* Walk through INSNS and look for MEM references whose address is DSTREG or
11608 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11609 appropriate. */
11610
11611void
11612ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11613 rtx insns;
11614 rtx dstref, srcref, dstreg, srcreg;
11615{
11616 rtx insn;
11617
11618 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11619 if (INSN_P (insn))
11620 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11621 dstreg, srcreg);
11622}
11623
11624/* Subroutine of above to actually do the updating by recursively walking
11625 the rtx. */
11626
11627static void
11628ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11629 rtx x;
11630 rtx dstref, srcref, dstreg, srcreg;
11631{
11632 enum rtx_code code = GET_CODE (x);
11633 const char *format_ptr = GET_RTX_FORMAT (code);
11634 int i, j;
11635
11636 if (code == MEM && XEXP (x, 0) == dstreg)
11637 MEM_COPY_ATTRIBUTES (x, dstref);
11638 else if (code == MEM && XEXP (x, 0) == srcreg)
11639 MEM_COPY_ATTRIBUTES (x, srcref);
11640
11641 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11642 {
11643 if (*format_ptr == 'e')
11644 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11645 dstreg, srcreg);
11646 else if (*format_ptr == 'E')
11647 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11648 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11649 dstreg, srcreg);
11650 }
11651}
11652\f
a7180f70
BS
11653/* Compute the alignment given to a constant that is being placed in memory.
11654 EXP is the constant and ALIGN is the alignment that the object would
11655 ordinarily have.
11656 The value of this function is used instead of that alignment to align
11657 the object. */
11658
11659int
11660ix86_constant_alignment (exp, align)
11661 tree exp;
11662 int align;
11663{
11664 if (TREE_CODE (exp) == REAL_CST)
11665 {
11666 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11667 return 64;
11668 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11669 return 128;
11670 }
11671 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11672 && align < 256)
11673 return 256;
11674
11675 return align;
11676}
11677
11678/* Compute the alignment for a static variable.
11679 TYPE is the data type, and ALIGN is the alignment that
11680 the object would ordinarily have. The value of this function is used
11681 instead of that alignment to align the object. */
11682
11683int
11684ix86_data_alignment (type, align)
11685 tree type;
11686 int align;
11687{
11688 if (AGGREGATE_TYPE_P (type)
11689 && TYPE_SIZE (type)
11690 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11691 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11692 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11693 return 256;
11694
0d7d98ee
JH
11695 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11696 to 16byte boundary. */
11697 if (TARGET_64BIT)
11698 {
11699 if (AGGREGATE_TYPE_P (type)
11700 && TYPE_SIZE (type)
11701 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11702 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11703 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11704 return 128;
11705 }
11706
a7180f70
BS
11707 if (TREE_CODE (type) == ARRAY_TYPE)
11708 {
11709 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11710 return 64;
11711 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11712 return 128;
11713 }
11714 else if (TREE_CODE (type) == COMPLEX_TYPE)
11715 {
0f290768 11716
a7180f70
BS
11717 if (TYPE_MODE (type) == DCmode && align < 64)
11718 return 64;
11719 if (TYPE_MODE (type) == XCmode && align < 128)
11720 return 128;
11721 }
11722 else if ((TREE_CODE (type) == RECORD_TYPE
11723 || TREE_CODE (type) == UNION_TYPE
11724 || TREE_CODE (type) == QUAL_UNION_TYPE)
11725 && TYPE_FIELDS (type))
11726 {
11727 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11728 return 64;
11729 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11730 return 128;
11731 }
11732 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11733 || TREE_CODE (type) == INTEGER_TYPE)
11734 {
11735 if (TYPE_MODE (type) == DFmode && align < 64)
11736 return 64;
11737 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11738 return 128;
11739 }
11740
11741 return align;
11742}
11743
11744/* Compute the alignment for a local variable.
11745 TYPE is the data type, and ALIGN is the alignment that
11746 the object would ordinarily have. The value of this macro is used
11747 instead of that alignment to align the object. */
11748
11749int
11750ix86_local_alignment (type, align)
11751 tree type;
11752 int align;
11753{
0d7d98ee
JH
11754 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11755 to 16byte boundary. */
11756 if (TARGET_64BIT)
11757 {
11758 if (AGGREGATE_TYPE_P (type)
11759 && TYPE_SIZE (type)
11760 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11761 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11762 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11763 return 128;
11764 }
a7180f70
BS
11765 if (TREE_CODE (type) == ARRAY_TYPE)
11766 {
11767 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11768 return 64;
11769 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11770 return 128;
11771 }
11772 else if (TREE_CODE (type) == COMPLEX_TYPE)
11773 {
11774 if (TYPE_MODE (type) == DCmode && align < 64)
11775 return 64;
11776 if (TYPE_MODE (type) == XCmode && align < 128)
11777 return 128;
11778 }
11779 else if ((TREE_CODE (type) == RECORD_TYPE
11780 || TREE_CODE (type) == UNION_TYPE
11781 || TREE_CODE (type) == QUAL_UNION_TYPE)
11782 && TYPE_FIELDS (type))
11783 {
11784 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11785 return 64;
11786 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11787 return 128;
11788 }
11789 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11790 || TREE_CODE (type) == INTEGER_TYPE)
11791 {
0f290768 11792
a7180f70
BS
11793 if (TYPE_MODE (type) == DFmode && align < 64)
11794 return 64;
11795 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11796 return 128;
11797 }
11798 return align;
11799}
0ed08620
JH
11800\f
11801/* Emit RTL insns to initialize the variable parts of a trampoline.
11802 FNADDR is an RTX for the address of the function's pure code.
11803 CXT is an RTX for the static chain value for the function. */
11804void
11805x86_initialize_trampoline (tramp, fnaddr, cxt)
11806 rtx tramp, fnaddr, cxt;
11807{
11808 if (!TARGET_64BIT)
11809 {
11810 /* Compute offset from the end of the jmp to the target function. */
11811 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11812 plus_constant (tramp, 10),
11813 NULL_RTX, 1, OPTAB_DIRECT);
11814 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11815 gen_int_mode (0xb9, QImode));
0ed08620
JH
11816 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11817 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11818 gen_int_mode (0xe9, QImode));
0ed08620
JH
11819 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11820 }
11821 else
11822 {
11823 int offset = 0;
11824 /* Try to load address using shorter movl instead of movabs.
11825 We may want to support movq for kernel mode, but kernel does not use
11826 trampolines at the moment. */
11827 if (x86_64_zero_extended_value (fnaddr))
11828 {
11829 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11830 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11831 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11832 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11833 gen_lowpart (SImode, fnaddr));
11834 offset += 6;
11835 }
11836 else
11837 {
11838 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11839 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11840 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11841 fnaddr);
11842 offset += 10;
11843 }
11844 /* Load static chain using movabs to r10. */
11845 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11846 gen_int_mode (0xba49, HImode));
0ed08620
JH
11847 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11848 cxt);
11849 offset += 10;
11850 /* Jump to the r11 */
11851 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11852 gen_int_mode (0xff49, HImode));
0ed08620 11853 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11854 gen_int_mode (0xe3, QImode));
0ed08620
JH
11855 offset += 3;
11856 if (offset > TRAMPOLINE_SIZE)
b531087a 11857 abort ();
0ed08620 11858 }
5791cc29
JT
11859
11860#ifdef TRANSFER_FROM_TRAMPOLINE
11861 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11862 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11863#endif
0ed08620 11864}
eeb06b1b 11865\f
6a2dd09a
RS
11866#define def_builtin(MASK, NAME, TYPE, CODE) \
11867do { \
11868 if ((MASK) & target_flags) \
11869 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11870 NULL, NULL_TREE); \
eeb06b1b 11871} while (0)
bd793c65 11872
bd793c65
BS
11873struct builtin_description
11874{
8b60264b
KG
11875 const unsigned int mask;
11876 const enum insn_code icode;
11877 const char *const name;
11878 const enum ix86_builtins code;
11879 const enum rtx_code comparison;
11880 const unsigned int flag;
bd793c65
BS
11881};
11882
fbe5eb6d
BS
11883/* Used for builtins that are enabled both by -msse and -msse2. */
11884#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11885
8b60264b 11886static const struct builtin_description bdesc_comi[] =
bd793c65 11887{
1194ca05
JH
11888 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11889 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11890 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11891 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11892 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11893 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11894 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11895 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11896 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11897 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11898 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11899 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11900 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11901 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11902 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11903 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11904 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11905 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11906 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11908 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11909 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11910 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11911 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
11912};
11913
8b60264b 11914static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11915{
11916 /* SSE */
fbe5eb6d
BS
11917 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11918 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11919 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11920 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11921 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11922 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11923 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11924 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11925
11926 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11927 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11928 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11929 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11930 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11931 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11932 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11933 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11934 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11935 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11936 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11937 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11938 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11939 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11940 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
fbe5eb6d
BS
11941 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11942 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11943 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11944 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
fbe5eb6d
BS
11945 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11946
11947 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11948 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11949 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11950 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11951
1877be45
JH
11952 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11953 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11954 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11955 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11956
fbe5eb6d
BS
11957 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11958 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11959 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11960 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11961 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11962
11963 /* MMX */
eeb06b1b
BS
11964 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11965 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11966 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11967 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11968 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11969 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11970
11971 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11972 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11973 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11974 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11975 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11976 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11977 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11978 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11979
11980 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11981 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11982 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11983
11984 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11985 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11986 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11987 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11988
fbe5eb6d
BS
11989 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11990 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11991
11992 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11993 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11994 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11995 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11996 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11997 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11998
fbe5eb6d
BS
11999 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12000 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12001 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12002 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12003
12004 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12005 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12006 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12007 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12008 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12009 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12010
12011 /* Special. */
eeb06b1b
BS
12012 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12013 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12014 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12015
fbe5eb6d
BS
12016 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12017 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
12018
12019 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12020 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12021 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12022 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12023 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12024 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12025
12026 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12027 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12028 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12029 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12030 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12031 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12032
12033 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12034 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12035 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12036 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12037
fbe5eb6d
BS
12038 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12039 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12040
12041 /* SSE2 */
12042 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12043 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12044 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12045 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12046 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12047 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12050
12051 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12052 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12053 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12054 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12055 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12056 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12057 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12058 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12059 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12060 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12061 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12062 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12063 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12064 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12065 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12066 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12067 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12068 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12069 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12070 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12071
12072 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12073 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12074 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12076
1877be45
JH
12077 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12078 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12079 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12080 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12081
12082 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12083 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12084 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12085
12086 /* SSE2 MMX */
12087 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12088 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12090 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12091 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12095
12096 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12097 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12098 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12099 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12100 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12101 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12102 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12103 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12104
12105 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12109
916b60b7
BS
12110 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12111 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12112 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12114
12115 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12117
12118 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12120 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12121 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12124
12125 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12129
12130 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12133 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12134 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12136 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12137 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12138
916b60b7
BS
12139 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12140 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12141 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12142
12143 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12144 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12145
12146 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12147 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12148 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12149 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12151 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12152
12153 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12154 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12155 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12156 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12157 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12158 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12159
12160 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12161 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12162 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12163 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12164
12165 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12166
fbe5eb6d
BS
12167 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
12170};
12171
8b60264b 12172static const struct builtin_description bdesc_1arg[] =
bd793c65 12173{
fbe5eb6d
BS
12174 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12175 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12176
12177 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12178 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12179 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12180
12181 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12182 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12183 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12184 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12185
12186 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12187 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12188 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12189 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12190
12191 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12192
12193 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12194 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12195
fbe5eb6d
BS
12196 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12197 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12198 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12199 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12200 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12201
fbe5eb6d 12202 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12203
fbe5eb6d
BS
12204 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12205 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12206
12207 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12208 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12209 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12210
12211 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
bd793c65
BS
12212};
12213
f6155fda
SS
12214void
12215ix86_init_builtins ()
12216{
12217 if (TARGET_MMX)
12218 ix86_init_mmx_sse_builtins ();
12219}
12220
12221/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12222 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12223 builtins. */
e37af218 12224static void
f6155fda 12225ix86_init_mmx_sse_builtins ()
bd793c65 12226{
8b60264b 12227 const struct builtin_description * d;
77ebd435 12228 size_t i;
bd793c65
BS
12229
12230 tree pchar_type_node = build_pointer_type (char_type_node);
12231 tree pfloat_type_node = build_pointer_type (float_type_node);
12232 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12233 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12234 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12235
12236 /* Comparisons. */
12237 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12238 = build_function_type_list (integer_type_node,
12239 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12240 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12241 = build_function_type_list (V4SI_type_node,
12242 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12243 /* MMX/SSE/integer conversions. */
bd793c65 12244 tree int_ftype_v4sf
b4de2f7d
AH
12245 = build_function_type_list (integer_type_node,
12246 V4SF_type_node, NULL_TREE);
bd793c65 12247 tree int_ftype_v8qi
b4de2f7d 12248 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12249 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12250 = build_function_type_list (V4SF_type_node,
12251 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 12252 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12253 = build_function_type_list (V4SF_type_node,
12254 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12255 tree int_ftype_v4hi_int
b4de2f7d
AH
12256 = build_function_type_list (integer_type_node,
12257 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12258 tree v4hi_ftype_v4hi_int_int
e7a60f56 12259 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12260 integer_type_node, integer_type_node,
12261 NULL_TREE);
bd793c65
BS
12262 /* Miscellaneous. */
12263 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12264 = build_function_type_list (V8QI_type_node,
12265 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12266 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12267 = build_function_type_list (V4HI_type_node,
12268 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12269 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12270 = build_function_type_list (V4SF_type_node,
12271 V4SF_type_node, V4SF_type_node,
12272 integer_type_node, NULL_TREE);
bd793c65 12273 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12274 = build_function_type_list (V2SI_type_node,
12275 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12276 tree v4hi_ftype_v4hi_int
b4de2f7d 12277 = build_function_type_list (V4HI_type_node,
e7a60f56 12278 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12279 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12280 = build_function_type_list (V4HI_type_node,
12281 V4HI_type_node, long_long_unsigned_type_node,
12282 NULL_TREE);
bd793c65 12283 tree v2si_ftype_v2si_di
b4de2f7d
AH
12284 = build_function_type_list (V2SI_type_node,
12285 V2SI_type_node, long_long_unsigned_type_node,
12286 NULL_TREE);
bd793c65 12287 tree void_ftype_void
b4de2f7d 12288 = build_function_type (void_type_node, void_list_node);
bd793c65 12289 tree void_ftype_unsigned
b4de2f7d 12290 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12291 tree unsigned_ftype_void
b4de2f7d 12292 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12293 tree di_ftype_void
b4de2f7d 12294 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12295 tree v4sf_ftype_void
b4de2f7d 12296 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12297 tree v2si_ftype_v4sf
b4de2f7d 12298 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12299 /* Loads/stores. */
bd793c65 12300 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12301 = build_function_type_list (void_type_node,
12302 V8QI_type_node, V8QI_type_node,
12303 pchar_type_node, NULL_TREE);
bd793c65 12304 tree v4sf_ftype_pfloat
b4de2f7d 12305 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
12306 /* @@@ the type is bogus */
12307 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 12308 = build_function_type_list (V4SF_type_node,
f8ca7923 12309 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 12310 tree void_ftype_pv2si_v4sf
b4de2f7d 12311 = build_function_type_list (void_type_node,
f8ca7923 12312 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12313 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12314 = build_function_type_list (void_type_node,
12315 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12316 tree void_ftype_pdi_di
b4de2f7d
AH
12317 = build_function_type_list (void_type_node,
12318 pdi_type_node, long_long_unsigned_type_node,
12319 NULL_TREE);
916b60b7 12320 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12321 = build_function_type_list (void_type_node,
12322 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12323 /* Normal vector unops. */
12324 tree v4sf_ftype_v4sf
b4de2f7d 12325 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12326
bd793c65
BS
12327 /* Normal vector binops. */
12328 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12329 = build_function_type_list (V4SF_type_node,
12330 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12331 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12332 = build_function_type_list (V8QI_type_node,
12333 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12334 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12335 = build_function_type_list (V4HI_type_node,
12336 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12337 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12338 = build_function_type_list (V2SI_type_node,
12339 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12340 tree di_ftype_di_di
b4de2f7d
AH
12341 = build_function_type_list (long_long_unsigned_type_node,
12342 long_long_unsigned_type_node,
12343 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12344
47f339cf 12345 tree v2si_ftype_v2sf
ae3aa00d 12346 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12347 tree v2sf_ftype_v2si
b4de2f7d 12348 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12349 tree v2si_ftype_v2si
b4de2f7d 12350 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12351 tree v2sf_ftype_v2sf
b4de2f7d 12352 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12353 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12354 = build_function_type_list (V2SF_type_node,
12355 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12356 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12357 = build_function_type_list (V2SI_type_node,
12358 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
12359 tree pint_type_node = build_pointer_type (integer_type_node);
12360 tree pdouble_type_node = build_pointer_type (double_type_node);
12361 tree int_ftype_v2df_v2df
b4de2f7d
AH
12362 = build_function_type_list (integer_type_node,
12363 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12364
12365 tree ti_ftype_void
b4de2f7d 12366 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
12367 tree v2di_ftype_void
12368 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 12369 tree ti_ftype_ti_ti
b4de2f7d
AH
12370 = build_function_type_list (intTI_type_node,
12371 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 12372 tree void_ftype_pvoid
b4de2f7d 12373 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 12374 tree v2di_ftype_di
b4de2f7d
AH
12375 = build_function_type_list (V2DI_type_node,
12376 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
12377 tree di_ftype_v2di
12378 = build_function_type_list (long_long_unsigned_type_node,
12379 V2DI_type_node, NULL_TREE);
fbe5eb6d 12380 tree v4sf_ftype_v4si
b4de2f7d 12381 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12382 tree v4si_ftype_v4sf
b4de2f7d 12383 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12384 tree v2df_ftype_v4si
b4de2f7d 12385 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12386 tree v4si_ftype_v2df
b4de2f7d 12387 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12388 tree v2si_ftype_v2df
b4de2f7d 12389 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12390 tree v4sf_ftype_v2df
b4de2f7d 12391 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12392 tree v2df_ftype_v2si
b4de2f7d 12393 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12394 tree v2df_ftype_v4sf
b4de2f7d 12395 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12396 tree int_ftype_v2df
b4de2f7d 12397 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12398 tree v2df_ftype_v2df_int
b4de2f7d
AH
12399 = build_function_type_list (V2DF_type_node,
12400 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12401 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12402 = build_function_type_list (V4SF_type_node,
12403 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12404 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12405 = build_function_type_list (V2DF_type_node,
12406 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12407 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12408 = build_function_type_list (V2DF_type_node,
12409 V2DF_type_node, V2DF_type_node,
12410 integer_type_node,
12411 NULL_TREE);
fbe5eb6d 12412 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12413 = build_function_type_list (V2DF_type_node,
12414 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12415 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12416 = build_function_type_list (void_type_node,
12417 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12418 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12419 = build_function_type_list (void_type_node,
12420 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12421 tree void_ftype_pint_int
b4de2f7d
AH
12422 = build_function_type_list (void_type_node,
12423 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12424 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12425 = build_function_type_list (void_type_node,
12426 V16QI_type_node, V16QI_type_node,
12427 pchar_type_node, NULL_TREE);
fbe5eb6d 12428 tree v2df_ftype_pdouble
b4de2f7d 12429 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12430 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12431 = build_function_type_list (V2DF_type_node,
12432 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12433 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12434 = build_function_type_list (V16QI_type_node,
12435 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12436 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12437 = build_function_type_list (V8HI_type_node,
12438 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12439 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12440 = build_function_type_list (V4SI_type_node,
12441 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12442 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12443 = build_function_type_list (V2DI_type_node,
12444 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12445 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12446 = build_function_type_list (V2DI_type_node,
12447 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12448 tree v2df_ftype_v2df
b4de2f7d 12449 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12450 tree v2df_ftype_double
b4de2f7d 12451 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12452 tree v2df_ftype_double_double
b4de2f7d
AH
12453 = build_function_type_list (V2DF_type_node,
12454 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12455 tree int_ftype_v8hi_int
b4de2f7d
AH
12456 = build_function_type_list (integer_type_node,
12457 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12458 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12459 = build_function_type_list (V8HI_type_node,
12460 V8HI_type_node, integer_type_node,
12461 integer_type_node, NULL_TREE);
916b60b7 12462 tree v2di_ftype_v2di_int
b4de2f7d
AH
12463 = build_function_type_list (V2DI_type_node,
12464 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12465 tree v4si_ftype_v4si_int
b4de2f7d
AH
12466 = build_function_type_list (V4SI_type_node,
12467 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12468 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12469 = build_function_type_list (V8HI_type_node,
12470 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12471 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12472 = build_function_type_list (V8HI_type_node,
12473 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12474 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12475 = build_function_type_list (V4SI_type_node,
12476 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12477 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12478 = build_function_type_list (V4SI_type_node,
12479 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12480 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12481 = build_function_type_list (long_long_unsigned_type_node,
12482 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12483 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12484 = build_function_type_list (V2DI_type_node,
12485 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12486 tree int_ftype_v16qi
b4de2f7d 12487 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
f02e1358
JH
12488 tree v16qi_ftype_pchar
12489 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12490 tree void_ftype_pchar_v16qi
12491 = build_function_type_list (void_type_node,
12492 pchar_type_node, V16QI_type_node, NULL_TREE);
12493 tree v4si_ftype_pchar
12494 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12495 tree void_ftype_pchar_v4si
12496 = build_function_type_list (void_type_node,
12497 pchar_type_node, V4SI_type_node, NULL_TREE);
12498 tree v2di_ftype_v2di
12499 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 12500
bd793c65
BS
12501 /* Add all builtins that are more or less simple operations on two
12502 operands. */
ca7558fc 12503 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12504 {
12505 /* Use one of the operands; the target can have a different mode for
12506 mask-generating compares. */
12507 enum machine_mode mode;
12508 tree type;
12509
12510 if (d->name == 0)
12511 continue;
12512 mode = insn_data[d->icode].operand[1].mode;
12513
bd793c65
BS
12514 switch (mode)
12515 {
fbe5eb6d
BS
12516 case V16QImode:
12517 type = v16qi_ftype_v16qi_v16qi;
12518 break;
12519 case V8HImode:
12520 type = v8hi_ftype_v8hi_v8hi;
12521 break;
12522 case V4SImode:
12523 type = v4si_ftype_v4si_v4si;
12524 break;
12525 case V2DImode:
12526 type = v2di_ftype_v2di_v2di;
12527 break;
12528 case V2DFmode:
12529 type = v2df_ftype_v2df_v2df;
12530 break;
12531 case TImode:
12532 type = ti_ftype_ti_ti;
12533 break;
bd793c65
BS
12534 case V4SFmode:
12535 type = v4sf_ftype_v4sf_v4sf;
12536 break;
12537 case V8QImode:
12538 type = v8qi_ftype_v8qi_v8qi;
12539 break;
12540 case V4HImode:
12541 type = v4hi_ftype_v4hi_v4hi;
12542 break;
12543 case V2SImode:
12544 type = v2si_ftype_v2si_v2si;
12545 break;
bd793c65
BS
12546 case DImode:
12547 type = di_ftype_di_di;
12548 break;
12549
12550 default:
12551 abort ();
12552 }
0f290768 12553
bd793c65
BS
12554 /* Override for comparisons. */
12555 if (d->icode == CODE_FOR_maskcmpv4sf3
12556 || d->icode == CODE_FOR_maskncmpv4sf3
12557 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12558 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12559 type = v4si_ftype_v4sf_v4sf;
12560
fbe5eb6d
BS
12561 if (d->icode == CODE_FOR_maskcmpv2df3
12562 || d->icode == CODE_FOR_maskncmpv2df3
12563 || d->icode == CODE_FOR_vmmaskcmpv2df3
12564 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12565 type = v2di_ftype_v2df_v2df;
12566
eeb06b1b 12567 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12568 }
12569
12570 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12571 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12572 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12573 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12574 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12575 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12576 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12577 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12578
12579 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12580 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12581 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12582
12583 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12584 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12585
12586 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12587 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12588
bd793c65 12589 /* comi/ucomi insns. */
ca7558fc 12590 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12591 if (d->mask == MASK_SSE2)
12592 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12593 else
12594 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12595
1255c85c
BS
12596 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12597 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12598 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12599
fbe5eb6d
BS
12600 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12601 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12602 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12603 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12604 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12605 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12606
fbe5eb6d
BS
12607 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12608 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12609
fbe5eb6d 12610 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12611
fbe5eb6d
BS
12612 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12613 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12614 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12615 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12616 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12617 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12618
fbe5eb6d
BS
12619 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12620 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12621 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12622 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12623
fbe5eb6d
BS
12624 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12625 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12626 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12627 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12628
fbe5eb6d 12629 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12630
916b60b7 12631 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12632
fbe5eb6d
BS
12633 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12634 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12635 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12636 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12637 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12638 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12639
fbe5eb6d 12640 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12641
47f339cf
BS
12642 /* Original 3DNow! */
12643 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12644 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12645 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12646 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12647 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12648 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12649 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12650 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12651 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12652 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12653 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12654 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12655 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12656 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12657 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12658 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12659 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12660 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12661 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12662 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12663
12664 /* 3DNow! extension as used in the Athlon CPU. */
12665 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12666 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12667 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12668 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12669 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12670 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12671
fbe5eb6d
BS
12672 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12673
12674 /* SSE2 */
12675 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12676 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12677
12678 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12679 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 12680 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d
BS
12681
12682 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12683 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12684 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12685 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12686 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12687 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12688
12689 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12690 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12691 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12692 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12693
12694 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12695 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12696 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12697 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12698 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12699
12700 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12701 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12702 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12703 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12704
12705 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12706 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12707
12708 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12709
12710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12712
12713 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12714 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12715 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12716 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12717 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12718
12719 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12720
12721 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12722 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12723
12724 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12725 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12726 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12727
12728 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12730 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12731
12732 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12733 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12734 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12735 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12736 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12737 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12738 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12739
12740 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12741 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12742 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 12743
f02e1358
JH
12744 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
12745 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
12746 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
12747 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12748 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12749 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
12750 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12751
12752 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12753
916b60b7
BS
12754 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12755 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12756 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12757
12758 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12759 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12760 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12761
12762 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12763 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12764
ab3146fd 12765 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
12766 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12767 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12768 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12769
ab3146fd 12770 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
12771 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12772 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12773 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12774
12775 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12776 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12777
12778 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12779}
12780
12781/* Errors in the source file can cause expand_expr to return const0_rtx
12782 where we expect a vector. To avoid crashing, use one of the vector
12783 clear instructions. */
12784static rtx
12785safe_vector_operand (x, mode)
12786 rtx x;
12787 enum machine_mode mode;
12788{
12789 if (x != const0_rtx)
12790 return x;
12791 x = gen_reg_rtx (mode);
12792
47f339cf 12793 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12794 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12795 : gen_rtx_SUBREG (DImode, x, 0)));
12796 else
e37af218
RH
12797 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12798 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12799 return x;
12800}
12801
12802/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12803
12804static rtx
12805ix86_expand_binop_builtin (icode, arglist, target)
12806 enum insn_code icode;
12807 tree arglist;
12808 rtx target;
12809{
12810 rtx pat;
12811 tree arg0 = TREE_VALUE (arglist);
12812 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12813 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12814 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12815 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12816 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12817 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12818
12819 if (VECTOR_MODE_P (mode0))
12820 op0 = safe_vector_operand (op0, mode0);
12821 if (VECTOR_MODE_P (mode1))
12822 op1 = safe_vector_operand (op1, mode1);
12823
12824 if (! target
12825 || GET_MODE (target) != tmode
12826 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12827 target = gen_reg_rtx (tmode);
12828
12829 /* In case the insn wants input operands in modes different from
12830 the result, abort. */
12831 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12832 abort ();
12833
12834 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12835 op0 = copy_to_mode_reg (mode0, op0);
12836 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12837 op1 = copy_to_mode_reg (mode1, op1);
12838
59bef189
RH
12839 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12840 yet one of the two must not be a memory. This is normally enforced
12841 by expanders, but we didn't bother to create one here. */
12842 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12843 op0 = copy_to_mode_reg (mode0, op0);
12844
bd793c65
BS
12845 pat = GEN_FCN (icode) (target, op0, op1);
12846 if (! pat)
12847 return 0;
12848 emit_insn (pat);
12849 return target;
12850}
12851
12852/* Subroutine of ix86_expand_builtin to take care of stores. */
12853
12854static rtx
e37af218 12855ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12856 enum insn_code icode;
12857 tree arglist;
bd793c65
BS
12858{
12859 rtx pat;
12860 tree arg0 = TREE_VALUE (arglist);
12861 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12862 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12863 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12864 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12865 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12866
12867 if (VECTOR_MODE_P (mode1))
12868 op1 = safe_vector_operand (op1, mode1);
12869
12870 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12871
12872 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12873 op1 = copy_to_mode_reg (mode1, op1);
12874
bd793c65
BS
12875 pat = GEN_FCN (icode) (op0, op1);
12876 if (pat)
12877 emit_insn (pat);
12878 return 0;
12879}
12880
12881/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12882
12883static rtx
12884ix86_expand_unop_builtin (icode, arglist, target, do_load)
12885 enum insn_code icode;
12886 tree arglist;
12887 rtx target;
12888 int do_load;
12889{
12890 rtx pat;
12891 tree arg0 = TREE_VALUE (arglist);
12892 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12893 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12894 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12895
12896 if (! target
12897 || GET_MODE (target) != tmode
12898 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12899 target = gen_reg_rtx (tmode);
12900 if (do_load)
12901 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12902 else
12903 {
12904 if (VECTOR_MODE_P (mode0))
12905 op0 = safe_vector_operand (op0, mode0);
12906
12907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12908 op0 = copy_to_mode_reg (mode0, op0);
12909 }
12910
12911 pat = GEN_FCN (icode) (target, op0);
12912 if (! pat)
12913 return 0;
12914 emit_insn (pat);
12915 return target;
12916}
12917
12918/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12919 sqrtss, rsqrtss, rcpss. */
12920
12921static rtx
12922ix86_expand_unop1_builtin (icode, arglist, target)
12923 enum insn_code icode;
12924 tree arglist;
12925 rtx target;
12926{
12927 rtx pat;
12928 tree arg0 = TREE_VALUE (arglist);
59bef189 12929 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12930 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12931 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12932
12933 if (! target
12934 || GET_MODE (target) != tmode
12935 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12936 target = gen_reg_rtx (tmode);
12937
12938 if (VECTOR_MODE_P (mode0))
12939 op0 = safe_vector_operand (op0, mode0);
12940
12941 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12942 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 12943
59bef189
RH
12944 op1 = op0;
12945 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12946 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 12947
59bef189 12948 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12949 if (! pat)
12950 return 0;
12951 emit_insn (pat);
12952 return target;
12953}
12954
12955/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12956
12957static rtx
12958ix86_expand_sse_compare (d, arglist, target)
8b60264b 12959 const struct builtin_description *d;
bd793c65
BS
12960 tree arglist;
12961 rtx target;
12962{
12963 rtx pat;
12964 tree arg0 = TREE_VALUE (arglist);
12965 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12966 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12967 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12968 rtx op2;
12969 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12970 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12971 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12972 enum rtx_code comparison = d->comparison;
12973
12974 if (VECTOR_MODE_P (mode0))
12975 op0 = safe_vector_operand (op0, mode0);
12976 if (VECTOR_MODE_P (mode1))
12977 op1 = safe_vector_operand (op1, mode1);
12978
12979 /* Swap operands if we have a comparison that isn't available in
12980 hardware. */
12981 if (d->flag)
12982 {
21e1b5f1
BS
12983 rtx tmp = gen_reg_rtx (mode1);
12984 emit_move_insn (tmp, op1);
bd793c65 12985 op1 = op0;
21e1b5f1 12986 op0 = tmp;
bd793c65 12987 }
21e1b5f1
BS
12988
12989 if (! target
12990 || GET_MODE (target) != tmode
12991 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12992 target = gen_reg_rtx (tmode);
12993
12994 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12995 op0 = copy_to_mode_reg (mode0, op0);
12996 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12997 op1 = copy_to_mode_reg (mode1, op1);
12998
12999 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13000 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13001 if (! pat)
13002 return 0;
13003 emit_insn (pat);
13004 return target;
13005}
13006
13007/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13008
13009static rtx
13010ix86_expand_sse_comi (d, arglist, target)
8b60264b 13011 const struct builtin_description *d;
bd793c65
BS
13012 tree arglist;
13013 rtx target;
13014{
13015 rtx pat;
13016 tree arg0 = TREE_VALUE (arglist);
13017 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13018 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13019 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13020 rtx op2;
13021 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13022 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13023 enum rtx_code comparison = d->comparison;
13024
13025 if (VECTOR_MODE_P (mode0))
13026 op0 = safe_vector_operand (op0, mode0);
13027 if (VECTOR_MODE_P (mode1))
13028 op1 = safe_vector_operand (op1, mode1);
13029
13030 /* Swap operands if we have a comparison that isn't available in
13031 hardware. */
13032 if (d->flag)
13033 {
13034 rtx tmp = op1;
13035 op1 = op0;
13036 op0 = tmp;
bd793c65
BS
13037 }
13038
13039 target = gen_reg_rtx (SImode);
13040 emit_move_insn (target, const0_rtx);
13041 target = gen_rtx_SUBREG (QImode, target, 0);
13042
13043 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13044 op0 = copy_to_mode_reg (mode0, op0);
13045 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13046 op1 = copy_to_mode_reg (mode1, op1);
13047
13048 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13049 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13050 if (! pat)
13051 return 0;
13052 emit_insn (pat);
29628f27
BS
13053 emit_insn (gen_rtx_SET (VOIDmode,
13054 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13055 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13056 SET_DEST (pat),
29628f27 13057 const0_rtx)));
bd793c65 13058
6f1a6c5b 13059 return SUBREG_REG (target);
bd793c65
BS
13060}
13061
13062/* Expand an expression EXP that calls a built-in function,
13063 with result going to TARGET if that's convenient
13064 (and in mode MODE if that's convenient).
13065 SUBTARGET may be used as the target for computing one of EXP's operands.
13066 IGNORE is nonzero if the value is to be ignored. */
13067
13068rtx
13069ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13070 tree exp;
13071 rtx target;
13072 rtx subtarget ATTRIBUTE_UNUSED;
13073 enum machine_mode mode ATTRIBUTE_UNUSED;
13074 int ignore ATTRIBUTE_UNUSED;
13075{
8b60264b 13076 const struct builtin_description *d;
77ebd435 13077 size_t i;
bd793c65
BS
13078 enum insn_code icode;
13079 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13080 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13081 tree arg0, arg1, arg2;
bd793c65
BS
13082 rtx op0, op1, op2, pat;
13083 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13084 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13085
13086 switch (fcode)
13087 {
13088 case IX86_BUILTIN_EMMS:
13089 emit_insn (gen_emms ());
13090 return 0;
13091
13092 case IX86_BUILTIN_SFENCE:
13093 emit_insn (gen_sfence ());
13094 return 0;
13095
bd793c65 13096 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13097 case IX86_BUILTIN_PEXTRW128:
13098 icode = (fcode == IX86_BUILTIN_PEXTRW
13099 ? CODE_FOR_mmx_pextrw
13100 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13101 arg0 = TREE_VALUE (arglist);
13102 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13103 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13104 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13105 tmode = insn_data[icode].operand[0].mode;
13106 mode0 = insn_data[icode].operand[1].mode;
13107 mode1 = insn_data[icode].operand[2].mode;
13108
13109 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13110 op0 = copy_to_mode_reg (mode0, op0);
13111 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13112 {
13113 /* @@@ better error message */
13114 error ("selector must be an immediate");
6f1a6c5b 13115 return gen_reg_rtx (tmode);
bd793c65
BS
13116 }
13117 if (target == 0
13118 || GET_MODE (target) != tmode
13119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13120 target = gen_reg_rtx (tmode);
13121 pat = GEN_FCN (icode) (target, op0, op1);
13122 if (! pat)
13123 return 0;
13124 emit_insn (pat);
13125 return target;
13126
13127 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13128 case IX86_BUILTIN_PINSRW128:
13129 icode = (fcode == IX86_BUILTIN_PINSRW
13130 ? CODE_FOR_mmx_pinsrw
13131 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13132 arg0 = TREE_VALUE (arglist);
13133 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13134 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13135 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13136 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13137 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13138 tmode = insn_data[icode].operand[0].mode;
13139 mode0 = insn_data[icode].operand[1].mode;
13140 mode1 = insn_data[icode].operand[2].mode;
13141 mode2 = insn_data[icode].operand[3].mode;
13142
13143 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13144 op0 = copy_to_mode_reg (mode0, op0);
13145 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13146 op1 = copy_to_mode_reg (mode1, op1);
13147 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13148 {
13149 /* @@@ better error message */
13150 error ("selector must be an immediate");
13151 return const0_rtx;
13152 }
13153 if (target == 0
13154 || GET_MODE (target) != tmode
13155 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13156 target = gen_reg_rtx (tmode);
13157 pat = GEN_FCN (icode) (target, op0, op1, op2);
13158 if (! pat)
13159 return 0;
13160 emit_insn (pat);
13161 return target;
13162
13163 case IX86_BUILTIN_MASKMOVQ:
077084dd 13164 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13165 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13166 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13167 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13168 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13169 /* Note the arg order is different from the operand order. */
13170 arg1 = TREE_VALUE (arglist);
13171 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13172 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13173 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13174 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13175 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13176 mode0 = insn_data[icode].operand[0].mode;
13177 mode1 = insn_data[icode].operand[1].mode;
13178 mode2 = insn_data[icode].operand[2].mode;
13179
5c464583 13180 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13181 op0 = copy_to_mode_reg (mode0, op0);
13182 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13183 op1 = copy_to_mode_reg (mode1, op1);
13184 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13185 op2 = copy_to_mode_reg (mode2, op2);
13186 pat = GEN_FCN (icode) (op0, op1, op2);
13187 if (! pat)
13188 return 0;
13189 emit_insn (pat);
13190 return 0;
13191
13192 case IX86_BUILTIN_SQRTSS:
13193 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13194 case IX86_BUILTIN_RSQRTSS:
13195 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13196 case IX86_BUILTIN_RCPSS:
13197 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13198
13199 case IX86_BUILTIN_LOADAPS:
13200 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13201
13202 case IX86_BUILTIN_LOADUPS:
13203 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13204
13205 case IX86_BUILTIN_STOREAPS:
e37af218 13206 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 13207
bd793c65 13208 case IX86_BUILTIN_STOREUPS:
e37af218 13209 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13210
13211 case IX86_BUILTIN_LOADSS:
13212 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13213
13214 case IX86_BUILTIN_STORESS:
e37af218 13215 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13216
0f290768 13217 case IX86_BUILTIN_LOADHPS:
bd793c65 13218 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13219 case IX86_BUILTIN_LOADHPD:
13220 case IX86_BUILTIN_LOADLPD:
13221 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13222 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13223 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13224 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13225 arg0 = TREE_VALUE (arglist);
13226 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13227 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13228 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13229 tmode = insn_data[icode].operand[0].mode;
13230 mode0 = insn_data[icode].operand[1].mode;
13231 mode1 = insn_data[icode].operand[2].mode;
13232
13233 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13234 op0 = copy_to_mode_reg (mode0, op0);
13235 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13236 if (target == 0
13237 || GET_MODE (target) != tmode
13238 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13239 target = gen_reg_rtx (tmode);
13240 pat = GEN_FCN (icode) (target, op0, op1);
13241 if (! pat)
13242 return 0;
13243 emit_insn (pat);
13244 return target;
0f290768 13245
bd793c65
BS
13246 case IX86_BUILTIN_STOREHPS:
13247 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13248 case IX86_BUILTIN_STOREHPD:
13249 case IX86_BUILTIN_STORELPD:
13250 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13251 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13252 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13253 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13254 arg0 = TREE_VALUE (arglist);
13255 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13256 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13257 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13258 mode0 = insn_data[icode].operand[1].mode;
13259 mode1 = insn_data[icode].operand[2].mode;
13260
13261 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13262 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13263 op1 = copy_to_mode_reg (mode1, op1);
13264
13265 pat = GEN_FCN (icode) (op0, op0, op1);
13266 if (! pat)
13267 return 0;
13268 emit_insn (pat);
13269 return 0;
13270
13271 case IX86_BUILTIN_MOVNTPS:
e37af218 13272 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13273 case IX86_BUILTIN_MOVNTQ:
e37af218 13274 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13275
13276 case IX86_BUILTIN_LDMXCSR:
13277 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13278 target = assign_386_stack_local (SImode, 0);
13279 emit_move_insn (target, op0);
13280 emit_insn (gen_ldmxcsr (target));
13281 return 0;
13282
13283 case IX86_BUILTIN_STMXCSR:
13284 target = assign_386_stack_local (SImode, 0);
13285 emit_insn (gen_stmxcsr (target));
13286 return copy_to_mode_reg (SImode, target);
13287
bd793c65 13288 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13289 case IX86_BUILTIN_SHUFPD:
13290 icode = (fcode == IX86_BUILTIN_SHUFPS
13291 ? CODE_FOR_sse_shufps
13292 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13293 arg0 = TREE_VALUE (arglist);
13294 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13295 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13296 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13297 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13298 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13299 tmode = insn_data[icode].operand[0].mode;
13300 mode0 = insn_data[icode].operand[1].mode;
13301 mode1 = insn_data[icode].operand[2].mode;
13302 mode2 = insn_data[icode].operand[3].mode;
13303
13304 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13305 op0 = copy_to_mode_reg (mode0, op0);
13306 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13307 op1 = copy_to_mode_reg (mode1, op1);
13308 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13309 {
13310 /* @@@ better error message */
13311 error ("mask must be an immediate");
6f1a6c5b 13312 return gen_reg_rtx (tmode);
bd793c65
BS
13313 }
13314 if (target == 0
13315 || GET_MODE (target) != tmode
13316 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13317 target = gen_reg_rtx (tmode);
13318 pat = GEN_FCN (icode) (target, op0, op1, op2);
13319 if (! pat)
13320 return 0;
13321 emit_insn (pat);
13322 return target;
13323
13324 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13325 case IX86_BUILTIN_PSHUFD:
13326 case IX86_BUILTIN_PSHUFHW:
13327 case IX86_BUILTIN_PSHUFLW:
13328 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13329 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13330 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13331 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13332 arg0 = TREE_VALUE (arglist);
13333 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13334 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13335 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13336 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13337 mode1 = insn_data[icode].operand[1].mode;
13338 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13339
29628f27
BS
13340 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13341 op0 = copy_to_mode_reg (mode1, op0);
13342 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13343 {
13344 /* @@@ better error message */
13345 error ("mask must be an immediate");
13346 return const0_rtx;
13347 }
13348 if (target == 0
13349 || GET_MODE (target) != tmode
13350 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13351 target = gen_reg_rtx (tmode);
29628f27 13352 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13353 if (! pat)
13354 return 0;
13355 emit_insn (pat);
13356 return target;
13357
ab3146fd
ZD
13358 case IX86_BUILTIN_PSLLDQI128:
13359 case IX86_BUILTIN_PSRLDQI128:
13360 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13361 : CODE_FOR_sse2_lshrti3);
13362 arg0 = TREE_VALUE (arglist);
13363 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13364 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13365 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13366 tmode = insn_data[icode].operand[0].mode;
13367 mode1 = insn_data[icode].operand[1].mode;
13368 mode2 = insn_data[icode].operand[2].mode;
13369
13370 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13371 {
13372 op0 = copy_to_reg (op0);
13373 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13374 }
13375 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13376 {
13377 error ("shift must be an immediate");
13378 return const0_rtx;
13379 }
13380 target = gen_reg_rtx (V2DImode);
13381 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13382 if (! pat)
13383 return 0;
13384 emit_insn (pat);
13385 return target;
13386
47f339cf
BS
13387 case IX86_BUILTIN_FEMMS:
13388 emit_insn (gen_femms ());
13389 return NULL_RTX;
13390
13391 case IX86_BUILTIN_PAVGUSB:
13392 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13393
13394 case IX86_BUILTIN_PF2ID:
13395 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13396
13397 case IX86_BUILTIN_PFACC:
13398 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13399
13400 case IX86_BUILTIN_PFADD:
13401 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13402
13403 case IX86_BUILTIN_PFCMPEQ:
13404 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13405
13406 case IX86_BUILTIN_PFCMPGE:
13407 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13408
13409 case IX86_BUILTIN_PFCMPGT:
13410 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13411
13412 case IX86_BUILTIN_PFMAX:
13413 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13414
13415 case IX86_BUILTIN_PFMIN:
13416 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13417
13418 case IX86_BUILTIN_PFMUL:
13419 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13420
13421 case IX86_BUILTIN_PFRCP:
13422 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13423
13424 case IX86_BUILTIN_PFRCPIT1:
13425 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13426
13427 case IX86_BUILTIN_PFRCPIT2:
13428 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13429
13430 case IX86_BUILTIN_PFRSQIT1:
13431 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13432
13433 case IX86_BUILTIN_PFRSQRT:
13434 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13435
13436 case IX86_BUILTIN_PFSUB:
13437 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13438
13439 case IX86_BUILTIN_PFSUBR:
13440 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13441
13442 case IX86_BUILTIN_PI2FD:
13443 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13444
13445 case IX86_BUILTIN_PMULHRW:
13446 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13447
47f339cf
BS
13448 case IX86_BUILTIN_PF2IW:
13449 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13450
13451 case IX86_BUILTIN_PFNACC:
13452 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13453
13454 case IX86_BUILTIN_PFPNACC:
13455 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13456
13457 case IX86_BUILTIN_PI2FW:
13458 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13459
13460 case IX86_BUILTIN_PSWAPDSI:
13461 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13462
13463 case IX86_BUILTIN_PSWAPDSF:
13464 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13465
e37af218
RH
13466 case IX86_BUILTIN_SSE_ZERO:
13467 target = gen_reg_rtx (V4SFmode);
13468 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13469 return target;
13470
bd793c65
BS
13471 case IX86_BUILTIN_MMX_ZERO:
13472 target = gen_reg_rtx (DImode);
13473 emit_insn (gen_mmx_clrdi (target));
13474 return target;
13475
f02e1358
JH
13476 case IX86_BUILTIN_CLRTI:
13477 target = gen_reg_rtx (V2DImode);
13478 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13479 return target;
13480
13481
fbe5eb6d
BS
13482 case IX86_BUILTIN_SQRTSD:
13483 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13484 case IX86_BUILTIN_LOADAPD:
13485 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13486 case IX86_BUILTIN_LOADUPD:
13487 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13488
13489 case IX86_BUILTIN_STOREAPD:
13490 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13491 case IX86_BUILTIN_STOREUPD:
13492 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13493
13494 case IX86_BUILTIN_LOADSD:
13495 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13496
13497 case IX86_BUILTIN_STORESD:
13498 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13499
13500 case IX86_BUILTIN_SETPD1:
13501 target = assign_386_stack_local (DFmode, 0);
13502 arg0 = TREE_VALUE (arglist);
13503 emit_move_insn (adjust_address (target, DFmode, 0),
13504 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13505 op0 = gen_reg_rtx (V2DFmode);
13506 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13507 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13508 return op0;
13509
13510 case IX86_BUILTIN_SETPD:
13511 target = assign_386_stack_local (V2DFmode, 0);
13512 arg0 = TREE_VALUE (arglist);
13513 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13514 emit_move_insn (adjust_address (target, DFmode, 0),
13515 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13516 emit_move_insn (adjust_address (target, DFmode, 8),
13517 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13518 op0 = gen_reg_rtx (V2DFmode);
13519 emit_insn (gen_sse2_movapd (op0, target));
13520 return op0;
13521
13522 case IX86_BUILTIN_LOADRPD:
13523 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13524 gen_reg_rtx (V2DFmode), 1);
13525 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13526 return target;
13527
13528 case IX86_BUILTIN_LOADPD1:
13529 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13530 gen_reg_rtx (V2DFmode), 1);
13531 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13532 return target;
13533
13534 case IX86_BUILTIN_STOREPD1:
13535 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13536 case IX86_BUILTIN_STORERPD:
13537 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13538
48126a97
JH
13539 case IX86_BUILTIN_CLRPD:
13540 target = gen_reg_rtx (V2DFmode);
13541 emit_insn (gen_sse_clrv2df (target));
13542 return target;
13543
fbe5eb6d
BS
13544 case IX86_BUILTIN_MFENCE:
13545 emit_insn (gen_sse2_mfence ());
13546 return 0;
13547 case IX86_BUILTIN_LFENCE:
13548 emit_insn (gen_sse2_lfence ());
13549 return 0;
13550
13551 case IX86_BUILTIN_CLFLUSH:
13552 arg0 = TREE_VALUE (arglist);
13553 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13554 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
13555 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13556 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
13557
13558 emit_insn (gen_sse2_clflush (op0));
13559 return 0;
13560
13561 case IX86_BUILTIN_MOVNTPD:
13562 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13563 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13564 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13565 case IX86_BUILTIN_MOVNTI:
13566 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13567
f02e1358
JH
13568 case IX86_BUILTIN_LOADDQA:
13569 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13570 case IX86_BUILTIN_LOADDQU:
13571 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13572 case IX86_BUILTIN_LOADD:
13573 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13574
13575 case IX86_BUILTIN_STOREDQA:
13576 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13577 case IX86_BUILTIN_STOREDQU:
13578 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13579 case IX86_BUILTIN_STORED:
13580 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13581
bd793c65
BS
13582 default:
13583 break;
13584 }
13585
ca7558fc 13586 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13587 if (d->code == fcode)
13588 {
13589 /* Compares are treated specially. */
13590 if (d->icode == CODE_FOR_maskcmpv4sf3
13591 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13592 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13593 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13594 || d->icode == CODE_FOR_maskcmpv2df3
13595 || d->icode == CODE_FOR_vmmaskcmpv2df3
13596 || d->icode == CODE_FOR_maskncmpv2df3
13597 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13598 return ix86_expand_sse_compare (d, arglist, target);
13599
13600 return ix86_expand_binop_builtin (d->icode, arglist, target);
13601 }
13602
ca7558fc 13603 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13604 if (d->code == fcode)
13605 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13606
ca7558fc 13607 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13608 if (d->code == fcode)
13609 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13610
bd793c65
BS
13611 /* @@@ Should really do something sensible here. */
13612 return 0;
bd793c65 13613}
4211a8fb
JH
13614
13615/* Store OPERAND to the memory after reload is completed. This means
f710504c 13616 that we can't easily use assign_stack_local. */
4211a8fb
JH
13617rtx
13618ix86_force_to_memory (mode, operand)
13619 enum machine_mode mode;
13620 rtx operand;
13621{
898d374d 13622 rtx result;
4211a8fb
JH
13623 if (!reload_completed)
13624 abort ();
898d374d
JH
13625 if (TARGET_64BIT && TARGET_RED_ZONE)
13626 {
13627 result = gen_rtx_MEM (mode,
13628 gen_rtx_PLUS (Pmode,
13629 stack_pointer_rtx,
13630 GEN_INT (-RED_ZONE_SIZE)));
13631 emit_move_insn (result, operand);
13632 }
13633 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13634 {
898d374d 13635 switch (mode)
4211a8fb 13636 {
898d374d
JH
13637 case HImode:
13638 case SImode:
13639 operand = gen_lowpart (DImode, operand);
13640 /* FALLTHRU */
13641 case DImode:
4211a8fb 13642 emit_insn (
898d374d
JH
13643 gen_rtx_SET (VOIDmode,
13644 gen_rtx_MEM (DImode,
13645 gen_rtx_PRE_DEC (DImode,
13646 stack_pointer_rtx)),
13647 operand));
13648 break;
13649 default:
13650 abort ();
13651 }
13652 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13653 }
13654 else
13655 {
13656 switch (mode)
13657 {
13658 case DImode:
13659 {
13660 rtx operands[2];
13661 split_di (&operand, 1, operands, operands + 1);
13662 emit_insn (
13663 gen_rtx_SET (VOIDmode,
13664 gen_rtx_MEM (SImode,
13665 gen_rtx_PRE_DEC (Pmode,
13666 stack_pointer_rtx)),
13667 operands[1]));
13668 emit_insn (
13669 gen_rtx_SET (VOIDmode,
13670 gen_rtx_MEM (SImode,
13671 gen_rtx_PRE_DEC (Pmode,
13672 stack_pointer_rtx)),
13673 operands[0]));
13674 }
13675 break;
13676 case HImode:
13677 /* It is better to store HImodes as SImodes. */
13678 if (!TARGET_PARTIAL_REG_STALL)
13679 operand = gen_lowpart (SImode, operand);
13680 /* FALLTHRU */
13681 case SImode:
4211a8fb 13682 emit_insn (
898d374d
JH
13683 gen_rtx_SET (VOIDmode,
13684 gen_rtx_MEM (GET_MODE (operand),
13685 gen_rtx_PRE_DEC (SImode,
13686 stack_pointer_rtx)),
13687 operand));
13688 break;
13689 default:
13690 abort ();
4211a8fb 13691 }
898d374d 13692 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13693 }
898d374d 13694 return result;
4211a8fb
JH
13695}
13696
13697/* Free operand from the memory. */
13698void
13699ix86_free_from_memory (mode)
13700 enum machine_mode mode;
13701{
898d374d
JH
13702 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13703 {
13704 int size;
13705
13706 if (mode == DImode || TARGET_64BIT)
13707 size = 8;
13708 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13709 size = 2;
13710 else
13711 size = 4;
13712 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13713 to pop or add instruction if registers are available. */
13714 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13715 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13716 GEN_INT (size))));
13717 }
4211a8fb 13718}
a946dd00 13719
f84aa48a
JH
13720/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13721 QImode must go into class Q_REGS.
13722 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13723 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13724enum reg_class
13725ix86_preferred_reload_class (x, class)
13726 rtx x;
13727 enum reg_class class;
13728{
1877be45
JH
13729 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13730 return NO_REGS;
f84aa48a
JH
13731 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13732 {
13733 /* SSE can't load any constant directly yet. */
13734 if (SSE_CLASS_P (class))
13735 return NO_REGS;
13736 /* Floats can load 0 and 1. */
13737 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13738 {
13739 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13740 if (MAYBE_SSE_CLASS_P (class))
13741 return (reg_class_subset_p (class, GENERAL_REGS)
13742 ? GENERAL_REGS : FLOAT_REGS);
13743 else
13744 return class;
13745 }
13746 /* General regs can load everything. */
13747 if (reg_class_subset_p (class, GENERAL_REGS))
13748 return GENERAL_REGS;
13749 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13750 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13751 return NO_REGS;
13752 }
13753 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13754 return NO_REGS;
13755 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13756 return Q_REGS;
13757 return class;
13758}
13759
13760/* If we are copying between general and FP registers, we need a memory
13761 location. The same is true for SSE and MMX registers.
13762
13763 The macro can't work reliably when one of the CLASSES is class containing
13764 registers from multiple units (SSE, MMX, integer). We avoid this by never
13765 combining those units in single alternative in the machine description.
13766 Ensure that this constraint holds to avoid unexpected surprises.
13767
13768 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13769 enforce these sanity checks. */
13770int
13771ix86_secondary_memory_needed (class1, class2, mode, strict)
13772 enum reg_class class1, class2;
13773 enum machine_mode mode;
13774 int strict;
13775{
13776 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13777 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13778 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13779 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13780 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13781 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13782 {
13783 if (strict)
13784 abort ();
13785 else
13786 return 1;
13787 }
13788 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13789 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13790 && (mode) != SImode)
13791 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13792 && (mode) != SImode));
13793}
13794/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13795 one in class CLASS2.
f84aa48a
JH
13796
13797 It is not required that the cost always equal 2 when FROM is the same as TO;
13798 on some machines it is expensive to move between registers if they are not
13799 general registers. */
13800int
13801ix86_register_move_cost (mode, class1, class2)
13802 enum machine_mode mode;
13803 enum reg_class class1, class2;
13804{
13805 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
13806 by load. In order to avoid bad register allocation choices, we need
13807 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13808
f84aa48a
JH
13809 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13810 {
d631b80a
RH
13811 int cost = 1;
13812
13813 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13814 MEMORY_MOVE_COST (mode, class1, 1));
13815 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13816 MEMORY_MOVE_COST (mode, class2, 1));
13817
13818 /* In case of copying from general_purpose_register we may emit multiple
13819 stores followed by single load causing memory size mismatch stall.
13820 Count this as arbitarily high cost of 20. */
62415523 13821 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
13822 cost += 20;
13823
13824 /* In the case of FP/MMX moves, the registers actually overlap, and we
13825 have to switch modes in order to treat them differently. */
13826 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13827 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13828 cost += 20;
13829
13830 return cost;
f84aa48a 13831 }
d631b80a 13832
92d0fb09 13833 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13834 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13835 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13836 return ix86_cost->mmxsse_to_integer;
13837 if (MAYBE_FLOAT_CLASS_P (class1))
13838 return ix86_cost->fp_move;
13839 if (MAYBE_SSE_CLASS_P (class1))
13840 return ix86_cost->sse_move;
13841 if (MAYBE_MMX_CLASS_P (class1))
13842 return ix86_cost->mmx_move;
f84aa48a
JH
13843 return 2;
13844}
13845
a946dd00
JH
13846/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13847int
13848ix86_hard_regno_mode_ok (regno, mode)
13849 int regno;
13850 enum machine_mode mode;
13851{
13852 /* Flags and only flags can only hold CCmode values. */
13853 if (CC_REGNO_P (regno))
13854 return GET_MODE_CLASS (mode) == MODE_CC;
13855 if (GET_MODE_CLASS (mode) == MODE_CC
13856 || GET_MODE_CLASS (mode) == MODE_RANDOM
13857 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13858 return 0;
13859 if (FP_REGNO_P (regno))
13860 return VALID_FP_MODE_P (mode);
13861 if (SSE_REGNO_P (regno))
13862 return VALID_SSE_REG_MODE (mode);
13863 if (MMX_REGNO_P (regno))
47f339cf 13864 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13865 /* We handle both integer and floats in the general purpose registers.
13866 In future we should be able to handle vector modes as well. */
13867 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13868 return 0;
13869 /* Take care for QImode values - they can be in non-QI regs, but then
13870 they do cause partial register stalls. */
d2836273 13871 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13872 return 1;
13873 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13874}
fa79946e
JH
13875
13876/* Return the cost of moving data of mode M between a
13877 register and memory. A value of 2 is the default; this cost is
13878 relative to those in `REGISTER_MOVE_COST'.
13879
13880 If moving between registers and memory is more expensive than
13881 between two registers, you should define this macro to express the
a4f31c00
AJ
13882 relative cost.
13883
fa79946e
JH
13884 Model also increased moving costs of QImode registers in non
13885 Q_REGS classes.
13886 */
13887int
13888ix86_memory_move_cost (mode, class, in)
13889 enum machine_mode mode;
13890 enum reg_class class;
13891 int in;
13892{
13893 if (FLOAT_CLASS_P (class))
13894 {
13895 int index;
13896 switch (mode)
13897 {
13898 case SFmode:
13899 index = 0;
13900 break;
13901 case DFmode:
13902 index = 1;
13903 break;
13904 case XFmode:
13905 case TFmode:
13906 index = 2;
13907 break;
13908 default:
13909 return 100;
13910 }
13911 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13912 }
13913 if (SSE_CLASS_P (class))
13914 {
13915 int index;
13916 switch (GET_MODE_SIZE (mode))
13917 {
13918 case 4:
13919 index = 0;
13920 break;
13921 case 8:
13922 index = 1;
13923 break;
13924 case 16:
13925 index = 2;
13926 break;
13927 default:
13928 return 100;
13929 }
13930 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13931 }
13932 if (MMX_CLASS_P (class))
13933 {
13934 int index;
13935 switch (GET_MODE_SIZE (mode))
13936 {
13937 case 4:
13938 index = 0;
13939 break;
13940 case 8:
13941 index = 1;
13942 break;
13943 default:
13944 return 100;
13945 }
13946 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13947 }
13948 switch (GET_MODE_SIZE (mode))
13949 {
13950 case 1:
13951 if (in)
13952 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13953 : ix86_cost->movzbl_load);
13954 else
13955 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13956 : ix86_cost->int_store[0] + 4);
13957 break;
13958 case 2:
13959 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13960 default:
13961 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13962 if (mode == TFmode)
13963 mode = XFmode;
3bb7e126 13964 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
d09e61b9
JH
13965 * ((int) GET_MODE_SIZE (mode)
13966 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
fa79946e
JH
13967 }
13968}
0ecf09f9 13969
21c318ba 13970#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
13971static void
13972ix86_svr3_asm_out_constructor (symbol, priority)
13973 rtx symbol;
13974 int priority ATTRIBUTE_UNUSED;
13975{
13976 init_section ();
13977 fputs ("\tpushl $", asm_out_file);
13978 assemble_name (asm_out_file, XSTR (symbol, 0));
13979 fputc ('\n', asm_out_file);
13980}
13981#endif
162f023b 13982
b069de3b
SS
13983#if TARGET_MACHO
13984
13985static int current_machopic_label_num;
13986
13987/* Given a symbol name and its associated stub, write out the
13988 definition of the stub. */
13989
13990void
13991machopic_output_stub (file, symb, stub)
13992 FILE *file;
13993 const char *symb, *stub;
13994{
13995 unsigned int length;
13996 char *binder_name, *symbol_name, lazy_ptr_name[32];
13997 int label = ++current_machopic_label_num;
13998
13999 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14000 symb = (*targetm.strip_name_encoding) (symb);
14001
14002 length = strlen (stub);
14003 binder_name = alloca (length + 32);
14004 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14005
14006 length = strlen (symb);
14007 symbol_name = alloca (length + 32);
14008 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14009
14010 sprintf (lazy_ptr_name, "L%d$lz", label);
14011
14012 if (MACHOPIC_PURE)
14013 machopic_picsymbol_stub_section ();
14014 else
14015 machopic_symbol_stub_section ();
14016
14017 fprintf (file, "%s:\n", stub);
14018 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14019
14020 if (MACHOPIC_PURE)
14021 {
14022 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14023 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14024 fprintf (file, "\tjmp %%edx\n");
14025 }
14026 else
14027 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14028
14029 fprintf (file, "%s:\n", binder_name);
14030
14031 if (MACHOPIC_PURE)
14032 {
14033 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14034 fprintf (file, "\tpushl %%eax\n");
14035 }
14036 else
14037 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14038
14039 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14040
14041 machopic_lazy_symbol_ptr_section ();
14042 fprintf (file, "%s:\n", lazy_ptr_name);
14043 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14044 fprintf (file, "\t.long %s\n", binder_name);
14045}
14046#endif /* TARGET_MACHO */
14047
162f023b
JH
14048/* Order the registers for register allocator. */
14049
14050void
14051x86_order_regs_for_local_alloc ()
14052{
14053 int pos = 0;
14054 int i;
14055
14056 /* First allocate the local general purpose registers. */
14057 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14058 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14059 reg_alloc_order [pos++] = i;
14060
14061 /* Global general purpose registers. */
14062 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14063 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14064 reg_alloc_order [pos++] = i;
14065
14066 /* x87 registers come first in case we are doing FP math
14067 using them. */
14068 if (!TARGET_SSE_MATH)
14069 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14070 reg_alloc_order [pos++] = i;
fce5a9f2 14071
162f023b
JH
14072 /* SSE registers. */
14073 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14074 reg_alloc_order [pos++] = i;
14075 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14076 reg_alloc_order [pos++] = i;
14077
14078 /* x87 registerts. */
14079 if (TARGET_SSE_MATH)
14080 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14081 reg_alloc_order [pos++] = i;
14082
14083 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14084 reg_alloc_order [pos++] = i;
14085
14086 /* Initialize the rest of array as we do not allocate some registers
14087 at all. */
14088 while (pos < FIRST_PSEUDO_REGISTER)
14089 reg_alloc_order [pos++] = 0;
14090}
194734e9 14091
483ab821
MM
14092/* Returns an expression indicating where the this parameter is
14093 located on entry to the FUNCTION. */
14094
14095static rtx
3961e8fe 14096x86_this_parameter (function)
483ab821
MM
14097 tree function;
14098{
14099 tree type = TREE_TYPE (function);
14100
3961e8fe
RH
14101 if (TARGET_64BIT)
14102 {
14103 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14104 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14105 }
14106
483ab821
MM
14107 if (ix86_fntype_regparm (type) > 0)
14108 {
14109 tree parm;
14110
14111 parm = TYPE_ARG_TYPES (type);
14112 /* Figure out whether or not the function has a variable number of
14113 arguments. */
3961e8fe 14114 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
14115 if (TREE_VALUE (parm) == void_type_node)
14116 break;
14117 /* If not, the this parameter is in %eax. */
14118 if (parm)
14119 return gen_rtx_REG (SImode, 0);
14120 }
14121
14122 if (aggregate_value_p (TREE_TYPE (type)))
14123 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14124 else
14125 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14126}
14127
3961e8fe
RH
14128/* Determine whether x86_output_mi_thunk can succeed. */
14129
14130static bool
14131x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14132 tree thunk ATTRIBUTE_UNUSED;
14133 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14134 HOST_WIDE_INT vcall_offset;
14135 tree function;
14136{
14137 /* 64-bit can handle anything. */
14138 if (TARGET_64BIT)
14139 return true;
14140
14141 /* For 32-bit, everything's fine if we have one free register. */
14142 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14143 return true;
14144
14145 /* Need a free register for vcall_offset. */
14146 if (vcall_offset)
14147 return false;
14148
14149 /* Need a free register for GOT references. */
14150 if (flag_pic && !(*targetm.binds_local_p) (function))
14151 return false;
14152
14153 /* Otherwise ok. */
14154 return true;
14155}
14156
14157/* Output the assembler code for a thunk function. THUNK_DECL is the
14158 declaration for the thunk function itself, FUNCTION is the decl for
14159 the target function. DELTA is an immediate constant offset to be
14160 added to THIS. If VCALL_OFFSET is non-zero, the word at
14161 *(*this + vcall_offset) should be added to THIS. */
483ab821 14162
c590b625 14163static void
3961e8fe
RH
14164x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14165 FILE *file ATTRIBUTE_UNUSED;
483ab821 14166 tree thunk ATTRIBUTE_UNUSED;
eb0424da 14167 HOST_WIDE_INT delta;
3961e8fe 14168 HOST_WIDE_INT vcall_offset;
194734e9
JH
14169 tree function;
14170{
194734e9 14171 rtx xops[3];
3961e8fe
RH
14172 rtx this = x86_this_parameter (function);
14173 rtx this_reg, tmp;
194734e9 14174
3961e8fe
RH
14175 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14176 pull it in now and let DELTA benefit. */
14177 if (REG_P (this))
14178 this_reg = this;
14179 else if (vcall_offset)
14180 {
14181 /* Put the this parameter into %eax. */
14182 xops[0] = this;
14183 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14184 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14185 }
14186 else
14187 this_reg = NULL_RTX;
14188
14189 /* Adjust the this parameter by a fixed constant. */
14190 if (delta)
194734e9 14191 {
483ab821 14192 xops[0] = GEN_INT (delta);
3961e8fe
RH
14193 xops[1] = this_reg ? this_reg : this;
14194 if (TARGET_64BIT)
194734e9 14195 {
3961e8fe
RH
14196 if (!x86_64_general_operand (xops[0], DImode))
14197 {
14198 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14199 xops[1] = tmp;
14200 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14201 xops[0] = tmp;
14202 xops[1] = this;
14203 }
14204 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
14205 }
14206 else
3961e8fe 14207 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 14208 }
3961e8fe
RH
14209
14210 /* Adjust the this parameter by a value stored in the vtable. */
14211 if (vcall_offset)
194734e9 14212 {
3961e8fe
RH
14213 if (TARGET_64BIT)
14214 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14215 else
14216 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
483ab821 14217
3961e8fe
RH
14218 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14219 xops[1] = tmp;
14220 if (TARGET_64BIT)
14221 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14222 else
14223 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 14224
3961e8fe
RH
14225 /* Adjust the this parameter. */
14226 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14227 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14228 {
14229 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14230 xops[0] = GEN_INT (vcall_offset);
14231 xops[1] = tmp2;
14232 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14233 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 14234 }
3961e8fe
RH
14235 xops[1] = this_reg;
14236 if (TARGET_64BIT)
14237 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14238 else
14239 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14240 }
194734e9 14241
3961e8fe
RH
14242 /* If necessary, drop THIS back to its stack slot. */
14243 if (this_reg && this_reg != this)
14244 {
14245 xops[0] = this_reg;
14246 xops[1] = this;
14247 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14248 }
194734e9 14249
3961e8fe
RH
14250 xops[0] = DECL_RTL (function);
14251 if (TARGET_64BIT)
14252 {
14253 if (!flag_pic || (*targetm.binds_local_p) (function))
14254 output_asm_insn ("jmp\t%P0", xops);
14255 else
fcbe3b89
RH
14256 {
14257 tmp = XEXP (xops[0], 0);
14258 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14259 tmp = gen_rtx_CONST (Pmode, tmp);
14260 tmp = gen_rtx_MEM (QImode, tmp);
14261 xops[0] = tmp;
14262 output_asm_insn ("jmp\t%A0", xops);
14263 }
3961e8fe
RH
14264 }
14265 else
14266 {
14267 if (!flag_pic || (*targetm.binds_local_p) (function))
14268 output_asm_insn ("jmp\t%P0", xops);
194734e9
JH
14269 else
14270 {
3961e8fe
RH
14271 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14272 output_set_got (tmp);
14273
14274 xops[1] = tmp;
14275 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14276 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
14277 }
14278 }
14279}
e2500fed 14280
e932b21b
JH
14281int
14282x86_field_alignment (field, computed)
14283 tree field;
14284 int computed;
14285{
14286 enum machine_mode mode;
ad9335eb
JJ
14287 tree type = TREE_TYPE (field);
14288
14289 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 14290 return computed;
ad9335eb
JJ
14291 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14292 ? get_inner_array_type (type) : type);
39e3a681
JJ
14293 if (mode == DFmode || mode == DCmode
14294 || GET_MODE_CLASS (mode) == MODE_INT
14295 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
14296 return MIN (32, computed);
14297 return computed;
14298}
14299
a5fa1ecd
JH
14300/* Output assembler code to FILE to increment profiler label # LABELNO
14301 for profiling a function entry. */
14302void
14303x86_function_profiler (file, labelno)
14304 FILE *file;
14305 int labelno;
14306{
14307 if (TARGET_64BIT)
14308 if (flag_pic)
14309 {
14310#ifndef NO_PROFILE_COUNTERS
14311 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14312#endif
14313 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14314 }
14315 else
14316 {
14317#ifndef NO_PROFILE_COUNTERS
14318 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14319#endif
14320 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14321 }
14322 else if (flag_pic)
14323 {
14324#ifndef NO_PROFILE_COUNTERS
14325 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14326 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14327#endif
14328 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14329 }
14330 else
14331 {
14332#ifndef NO_PROFILE_COUNTERS
14333 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14334 PROFILE_COUNT_REGISTER);
14335#endif
14336 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14337 }
14338}
14339
2a500b9e
JH
14340/* Implement machine specific optimizations.
14341 At the moment we implement single transformation: AMD Athlon works faster
14342 when RET is not destination of conditional jump or directly preceeded
14343 by other jump instruction. We avoid the penalty by inserting NOP just
14344 before the RET instructions in such cases. */
14345void
14346x86_machine_dependent_reorg (first)
14347 rtx first ATTRIBUTE_UNUSED;
14348{
14349 edge e;
14350
14351 if (!TARGET_ATHLON || !optimize || optimize_size)
14352 return;
14353 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14354 {
14355 basic_block bb = e->src;
14356 rtx ret = bb->end;
14357 rtx prev;
14358 bool insert = false;
14359
14360 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14361 continue;
14362 prev = prev_nonnote_insn (ret);
14363 if (prev && GET_CODE (prev) == CODE_LABEL)
14364 {
14365 edge e;
14366 for (e = bb->pred; e; e = e->pred_next)
14367 if (EDGE_FREQUENCY (e) && e->src->index > 0
14368 && !(e->flags & EDGE_FALLTHRU))
14369 insert = 1;
14370 }
14371 if (!insert)
14372 {
14373 prev = prev_real_insn (ret);
14374 if (prev && GET_CODE (prev) == JUMP_INSN
14375 && any_condjump_p (prev))
14376 insert = 1;
14377 }
14378 if (insert)
14379 emit_insn_before (gen_nop (), ret);
14380 }
14381}
14382
e2500fed 14383#include "gt-i386.h"