]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/i386.c
invoke.texi (i386 Options): Document x86-64 options.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
2a2ab3f9 45
8dfe5673
RK
46#ifndef CHECK_STACK_LIMIT
47#define CHECK_STACK_LIMIT -1
48#endif
49
2ab0437e
JH
50/* Processor costs (relative to an add) */
51struct processor_costs size_cost = { /* costs for tunning for size */
52 2, /* cost of an add instruction */
53 3, /* cost of a lea instruction */
54 2, /* variable shift costs */
55 3, /* constant shift costs */
56 3, /* cost of starting a multiply */
57 0, /* cost of multiply per each bit set */
58 3, /* cost of a divide/mod */
59 0, /* "large" insn */
60 2, /* MOVE_RATIO */
61 2, /* cost for loading QImode using movzbl */
62 {2, 2, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
64 Relative to reg-reg move (2). */
65 {2, 2, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {2, 2, 2}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
69 {2, 2, 2}, /* cost of loading integer registers */
70 3, /* cost of moving MMX register */
71 {3, 3}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {3, 3}, /* cost of storing MMX registers
74 in SImode and DImode */
75 3, /* cost of moving SSE register */
76 {3, 3, 3}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {3, 3, 3}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
81};
32b5b1aa
SC
82/* Processor costs (relative to an add) */
83struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 84 1, /* cost of an add instruction */
32b5b1aa
SC
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 6, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
e075ae69 90 23, /* cost of a divide/mod */
96e7ae40 91 15, /* "large" insn */
e2e52e1b 92 3, /* MOVE_RATIO */
7c6b971d 93 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
0f290768 96 Relative to reg-reg move (2). */
96e7ae40
JH
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
fa79946e
JH
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
32b5b1aa
SC
113};
114
115struct processor_costs i486_cost = { /* 486 specific costs */
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
118 3, /* variable shift costs */
119 2, /* constant shift costs */
120 12, /* cost of starting a multiply */
121 1, /* cost of multiply per each bit set */
e075ae69 122 40, /* cost of a divide/mod */
96e7ae40 123 15, /* "large" insn */
e2e52e1b 124 3, /* MOVE_RATIO */
7c6b971d 125 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
0f290768 128 Relative to reg-reg move (2). */
96e7ae40
JH
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {8, 8, 8}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
fa79946e
JH
133 {8, 8, 8}, /* cost of loading integer registers */
134 2, /* cost of moving MMX register */
135 {4, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {4, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
32b5b1aa
SC
145};
146
e5cb57e8 147struct processor_costs pentium_cost = {
32b5b1aa
SC
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
856b07a1 150 4, /* variable shift costs */
e5cb57e8 151 1, /* constant shift costs */
856b07a1
SC
152 11, /* cost of starting a multiply */
153 0, /* cost of multiply per each bit set */
e075ae69 154 25, /* cost of a divide/mod */
96e7ae40 155 8, /* "large" insn */
e2e52e1b 156 6, /* MOVE_RATIO */
7c6b971d 157 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
158 {2, 4, 2}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
0f290768 160 Relative to reg-reg move (2). */
96e7ae40
JH
161 {2, 4, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
fa79946e
JH
165 {4, 4, 6}, /* cost of loading integer registers */
166 8, /* cost of moving MMX register */
167 {8, 8}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {8, 8}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {4, 8, 16}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {4, 8, 16}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
32b5b1aa
SC
177};
178
856b07a1
SC
179struct processor_costs pentiumpro_cost = {
180 1, /* cost of an add instruction */
181 1, /* cost of a lea instruction */
e075ae69 182 1, /* variable shift costs */
856b07a1 183 1, /* constant shift costs */
369e59b1 184 4, /* cost of starting a multiply */
856b07a1 185 0, /* cost of multiply per each bit set */
e075ae69 186 17, /* cost of a divide/mod */
96e7ae40 187 8, /* "large" insn */
e2e52e1b 188 6, /* MOVE_RATIO */
7c6b971d 189 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
190 {4, 4, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
0f290768 192 Relative to reg-reg move (2). */
96e7ae40
JH
193 {2, 2, 2}, /* cost of storing integer registers */
194 2, /* cost of reg,reg fld/fst */
195 {2, 2, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
fa79946e
JH
197 {4, 4, 6}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 3 /* MMX or SSE register to integer */
856b07a1
SC
209};
210
a269a03c
JC
211struct processor_costs k6_cost = {
212 1, /* cost of an add instruction */
e075ae69 213 2, /* cost of a lea instruction */
a269a03c
JC
214 1, /* variable shift costs */
215 1, /* constant shift costs */
73fe76e4 216 3, /* cost of starting a multiply */
a269a03c 217 0, /* cost of multiply per each bit set */
e075ae69 218 18, /* cost of a divide/mod */
96e7ae40 219 8, /* "large" insn */
e2e52e1b 220 4, /* MOVE_RATIO */
7c6b971d 221 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
0f290768 224 Relative to reg-reg move (2). */
96e7ae40
JH
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
227 {6, 6, 6}, /* cost of loading fp registers
228 in SFmode, DFmode and XFmode */
fa79946e
JH
229 {4, 4, 4}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
a269a03c
JC
241};
242
309ada50
JH
243struct processor_costs athlon_cost = {
244 1, /* cost of an add instruction */
0b5107cf 245 2, /* cost of a lea instruction */
309ada50
JH
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 5, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
0b5107cf 250 42, /* cost of a divide/mod */
309ada50 251 8, /* "large" insn */
e2e52e1b 252 9, /* MOVE_RATIO */
309ada50
JH
253 4, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
309ada50
JH
257 {2, 3, 2}, /* cost of storing integer registers */
258 4, /* cost of reg,reg fld/fst */
0b5107cf 259 {6, 6, 20}, /* cost of loading fp registers
309ada50 260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 16}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 6 /* MMX or SSE register to integer */
309ada50
JH
273};
274
b4e89e2d
JH
275struct processor_costs pentium4_cost = {
276 1, /* cost of an add instruction */
277 1, /* cost of a lea instruction */
278 8, /* variable shift costs */
279 8, /* constant shift costs */
280 30, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 112, /* cost of a divide/mod */
283 16, /* "large" insn */
284 6, /* MOVE_RATIO */
285 2, /* cost for loading QImode using movzbl */
286 {4, 5, 4}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 3, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {2, 2, 6}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {4, 4, 6}, /* cost of loading integer registers */
294 2, /* cost of moving MMX register */
295 {2, 2}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {2, 2}, /* cost of storing MMX registers
298 in SImode and DImode */
299 12, /* cost of moving SSE register */
300 {12, 12, 12}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {2, 2, 8}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 10, /* MMX or SSE register to integer */
305};
306
32b5b1aa
SC
307struct processor_costs *ix86_cost = &pentium_cost;
308
a269a03c
JC
309/* Processor feature/optimization bitmasks. */
310#define m_386 (1<<PROCESSOR_I386)
311#define m_486 (1<<PROCESSOR_I486)
312#define m_PENT (1<<PROCESSOR_PENTIUM)
313#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
314#define m_K6 (1<<PROCESSOR_K6)
309ada50 315#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 316#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 317
309ada50 318const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 319const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 320const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 321const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 322const int x86_double_with_add = ~m_386;
a269a03c 323const int x86_use_bit_test = m_386;
e2e52e1b 324const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 325const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 326const int x86_3dnow_a = m_ATHLON;
b4e89e2d 327const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 328const int x86_branch_hints = m_PENT4;
b4e89e2d 329const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
330const int x86_partial_reg_stall = m_PPRO;
331const int x86_use_loop = m_K6;
309ada50 332const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
333const int x86_use_mov0 = m_K6;
334const int x86_use_cltd = ~(m_PENT | m_K6);
335const int x86_read_modify_write = ~m_PENT;
336const int x86_read_modify = ~(m_PENT | m_PPRO);
337const int x86_split_long_moves = m_PPRO;
e9e80858 338const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 339const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
340const int x86_qimode_math = ~(0);
341const int x86_promote_qi_regs = 0;
342const int x86_himode_math = ~(m_PPRO);
343const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
344const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
345const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
346const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
347const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
348const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
349const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
350const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
351const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
352const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
353const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 354
6ab16dd9
JH
355/* In case the avreage insn count for single function invocation is
356 lower than this constant, emit fast (but longer) prologue and
357 epilogue code. */
358#define FAST_PROLOGUE_INSN_COUNT 30
359/* Set by prologue expander and used by epilogue expander to determine
360 the style used. */
361static int use_fast_prologue_epilogue;
362
564d80f4 363#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 364
83182544
KG
365static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
366static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
367static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
4c0d89b5
RS
368
369/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 370 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 371
e075ae69 372enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
373{
374 /* ax, dx, cx, bx */
ab408a86 375 AREG, DREG, CREG, BREG,
4c0d89b5 376 /* si, di, bp, sp */
e075ae69 377 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
378 /* FP registers */
379 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 380 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 381 /* arg pointer */
83774849 382 NON_Q_REGS,
564d80f4 383 /* flags, fpsr, dirflag, frame */
a7180f70
BS
384 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
385 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
386 SSE_REGS, SSE_REGS,
387 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
388 MMX_REGS, MMX_REGS,
389 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
390 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
391 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
392 SSE_REGS, SSE_REGS,
4c0d89b5 393};
c572e5ba 394
3d117b30 395/* The "default" register map used in 32bit mode. */
83774849 396
0f290768 397int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
398{
399 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
400 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 401 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
402 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
403 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
404 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
405 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
406};
407
53c17031
JH
408static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
409 1 /*RDX*/, 2 /*RCX*/,
410 FIRST_REX_INT_REG /*R8 */,
411 FIRST_REX_INT_REG + 1 /*R9 */};
412static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
413
0f7fa3d0
JH
414/* The "default" register map used in 64bit mode. */
415int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
416{
417 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
418 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
419 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
420 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
421 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
422 8,9,10,11,12,13,14,15, /* extended integer registers */
423 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
424};
425
83774849
RH
426/* Define the register numbers to be used in Dwarf debugging information.
427 The SVR4 reference port C compiler uses the following register numbers
428 in its Dwarf output code:
429 0 for %eax (gcc regno = 0)
430 1 for %ecx (gcc regno = 2)
431 2 for %edx (gcc regno = 1)
432 3 for %ebx (gcc regno = 3)
433 4 for %esp (gcc regno = 7)
434 5 for %ebp (gcc regno = 6)
435 6 for %esi (gcc regno = 4)
436 7 for %edi (gcc regno = 5)
437 The following three DWARF register numbers are never generated by
438 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
439 believes these numbers have these meanings.
440 8 for %eip (no gcc equivalent)
441 9 for %eflags (gcc regno = 17)
442 10 for %trapno (no gcc equivalent)
443 It is not at all clear how we should number the FP stack registers
444 for the x86 architecture. If the version of SDB on x86/svr4 were
445 a bit less brain dead with respect to floating-point then we would
446 have a precedent to follow with respect to DWARF register numbers
447 for x86 FP registers, but the SDB on x86/svr4 is so completely
448 broken with respect to FP registers that it is hardly worth thinking
449 of it as something to strive for compatibility with.
450 The version of x86/svr4 SDB I have at the moment does (partially)
451 seem to believe that DWARF register number 11 is associated with
452 the x86 register %st(0), but that's about all. Higher DWARF
453 register numbers don't seem to be associated with anything in
454 particular, and even for DWARF regno 11, SDB only seems to under-
455 stand that it should say that a variable lives in %st(0) (when
456 asked via an `=' command) if we said it was in DWARF regno 11,
457 but SDB still prints garbage when asked for the value of the
458 variable in question (via a `/' command).
459 (Also note that the labels SDB prints for various FP stack regs
460 when doing an `x' command are all wrong.)
461 Note that these problems generally don't affect the native SVR4
462 C compiler because it doesn't allow the use of -O with -g and
463 because when it is *not* optimizing, it allocates a memory
464 location for each floating-point variable, and the memory
465 location is what gets described in the DWARF AT_location
466 attribute for the variable in question.
467 Regardless of the severe mental illness of the x86/svr4 SDB, we
468 do something sensible here and we use the following DWARF
469 register numbers. Note that these are all stack-top-relative
470 numbers.
471 11 for %st(0) (gcc regno = 8)
472 12 for %st(1) (gcc regno = 9)
473 13 for %st(2) (gcc regno = 10)
474 14 for %st(3) (gcc regno = 11)
475 15 for %st(4) (gcc regno = 12)
476 16 for %st(5) (gcc regno = 13)
477 17 for %st(6) (gcc regno = 14)
478 18 for %st(7) (gcc regno = 15)
479*/
0f290768 480int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
481{
482 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
483 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 484 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
485 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
486 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
487 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
488 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
489};
490
c572e5ba
JVA
491/* Test and compare insns in i386.md store the information needed to
492 generate branch and scc insns here. */
493
e075ae69
RH
494struct rtx_def *ix86_compare_op0 = NULL_RTX;
495struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 496
7a2e09f4 497#define MAX_386_STACK_LOCALS 3
8362f420
JH
498/* Size of the register save area. */
499#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
500
501/* Define the structure for the machine field in struct function. */
502struct machine_function
503{
504 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 505 int save_varrargs_registers;
6fca22eb 506 int accesses_prev_frame;
36edd3cc
BS
507};
508
01d939e8 509#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 510#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 511
4dd2ac2c
JH
512/* Structure describing stack frame layout.
513 Stack grows downward:
514
515 [arguments]
516 <- ARG_POINTER
517 saved pc
518
519 saved frame pointer if frame_pointer_needed
520 <- HARD_FRAME_POINTER
521 [saved regs]
522
523 [padding1] \
524 )
525 [va_arg registers] (
526 > to_allocate <- FRAME_POINTER
527 [frame] (
528 )
529 [padding2] /
530 */
531struct ix86_frame
532{
533 int nregs;
534 int padding1;
8362f420 535 int va_arg_size;
4dd2ac2c
JH
536 HOST_WIDE_INT frame;
537 int padding2;
538 int outgoing_arguments_size;
8362f420 539 int red_zone_size;
4dd2ac2c
JH
540
541 HOST_WIDE_INT to_allocate;
542 /* The offsets relative to ARG_POINTER. */
543 HOST_WIDE_INT frame_pointer_offset;
544 HOST_WIDE_INT hard_frame_pointer_offset;
545 HOST_WIDE_INT stack_pointer_offset;
546};
547
6189a572
JH
548/* Code model option as passed by user. */
549const char *ix86_cmodel_string;
550/* Parsed value. */
551enum cmodel ix86_cmodel;
552
c8c5cb99 553/* which cpu are we scheduling for */
e42ea7f9 554enum processor_type ix86_cpu;
c8c5cb99
SC
555
556/* which instruction set architecture to use. */
c942177e 557int ix86_arch;
c8c5cb99
SC
558
559/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
560const char *ix86_cpu_string; /* for -mcpu=<xxx> */
561const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 562
0f290768 563/* # of registers to use to pass arguments. */
e075ae69 564const char *ix86_regparm_string;
e9a25f70 565
e075ae69
RH
566/* ix86_regparm_string as a number */
567int ix86_regparm;
e9a25f70
JL
568
569/* Alignment to use for loops and jumps: */
570
0f290768 571/* Power of two alignment for loops. */
e075ae69 572const char *ix86_align_loops_string;
e9a25f70 573
0f290768 574/* Power of two alignment for non-loop jumps. */
e075ae69 575const char *ix86_align_jumps_string;
e9a25f70 576
3af4bd89 577/* Power of two alignment for stack boundary in bytes. */
e075ae69 578const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
579
580/* Preferred alignment for stack boundary in bits. */
e075ae69 581int ix86_preferred_stack_boundary;
3af4bd89 582
e9a25f70 583/* Values 1-5: see jump.c */
e075ae69
RH
584int ix86_branch_cost;
585const char *ix86_branch_cost_string;
e9a25f70 586
0f290768 587/* Power of two alignment for functions. */
e075ae69 588const char *ix86_align_funcs_string;
623fe810
RH
589
590/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
591static char internal_label_prefix[16];
592static int internal_label_prefix_len;
e075ae69 593\f
623fe810 594static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f6da8bc3
KG
595static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
596static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 597 int, int, FILE *));
f6da8bc3 598static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
599static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
600 rtx *, rtx *));
f6da8bc3
KG
601static rtx gen_push PARAMS ((rtx));
602static int memory_address_length PARAMS ((rtx addr));
603static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
604static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
605static int ix86_safe_length PARAMS ((rtx));
606static enum attr_memory ix86_safe_memory PARAMS ((rtx));
607static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
608static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
609static void ix86_dump_ppro_packet PARAMS ((FILE *));
610static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
611static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 612 rtx));
f6da8bc3
KG
613static void ix86_init_machine_status PARAMS ((struct function *));
614static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 615static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 616static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 617static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
618static int ix86_nsaved_regs PARAMS((void));
619static void ix86_emit_save_regs PARAMS((void));
c6036a37 620static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 621static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 622static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
623static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
624static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 625static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 626static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
627static rtx ix86_expand_aligntest PARAMS ((rtx, int));
628static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
629static int ix86_issue_rate PARAMS ((void));
630static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
631static void ix86_sched_init PARAMS ((FILE *, int, int));
632static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
633static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
e075ae69
RH
634
635struct ix86_address
636{
637 rtx base, index, disp;
638 HOST_WIDE_INT scale;
639};
b08de47e 640
e075ae69 641static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
642
643struct builtin_description;
644static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
645 rtx));
646static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
647 rtx));
648static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
649static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
650static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
651static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
652static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
653static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
654static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
655 enum rtx_code *,
656 enum rtx_code *,
657 enum rtx_code *));
9e7adcb3
JH
658static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
659 rtx *, rtx *));
660static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
661static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
662static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
663static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 664static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 665static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 666static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
667const struct attribute_spec ix86_attribute_table[];
668static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
669static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
7c262518 670
2cc07db4
RH
671#ifdef DO_GLOBAL_CTORS_BODY
672static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
673#endif
7c262518 674#if defined(TARGET_ELF) && defined(TARGET_COFF)
715bdd29 675static void sco_asm_named_section PARAMS ((const char *, unsigned int));
2cc07db4 676static void sco_asm_out_constructor PARAMS ((rtx, int));
7c262518 677#endif
53c17031
JH
678/* Register class used for passing given 64bit part of the argument.
679 These represent classes as documented by the PS ABI, with the exception
680 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
681 use SF or DFmode move instead of DImode to avoid reformating penalties.
682
683 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
684 whenever possible (upper half does contain padding).
685 */
686enum x86_64_reg_class
687 {
688 X86_64_NO_CLASS,
689 X86_64_INTEGER_CLASS,
690 X86_64_INTEGERSI_CLASS,
691 X86_64_SSE_CLASS,
692 X86_64_SSESF_CLASS,
693 X86_64_SSEDF_CLASS,
694 X86_64_SSEUP_CLASS,
695 X86_64_X87_CLASS,
696 X86_64_X87UP_CLASS,
697 X86_64_MEMORY_CLASS
698 };
699const char * const x86_64_reg_class_name[] =
700 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
701
702#define MAX_CLASSES 4
703static int classify_argument PARAMS ((enum machine_mode, tree,
704 enum x86_64_reg_class [MAX_CLASSES],
705 int));
706static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
707 int *));
708static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
709 int *, int));
710static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
711 enum x86_64_reg_class));
672a6f42
NB
712\f
713/* Initialize the GCC target structure. */
91d231cb
JM
714#undef TARGET_ATTRIBUTE_TABLE
715#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 716#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
717# undef TARGET_MERGE_DECL_ATTRIBUTES
718# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
719#endif
720
8d8e52be
JM
721#undef TARGET_COMP_TYPE_ATTRIBUTES
722#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
723
f6155fda
SS
724#undef TARGET_INIT_BUILTINS
725#define TARGET_INIT_BUILTINS ix86_init_builtins
726
727#undef TARGET_EXPAND_BUILTIN
728#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
729
08c148a8
NB
730#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
731 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
732 HOST_WIDE_INT));
733# undef TARGET_ASM_FUNCTION_PROLOGUE
734# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
735#endif
736
17b53c33
NB
737#undef TARGET_ASM_OPEN_PAREN
738#define TARGET_ASM_OPEN_PAREN ""
739#undef TARGET_ASM_CLOSE_PAREN
740#define TARGET_ASM_CLOSE_PAREN ""
741
c237e94a
ZW
742#undef TARGET_SCHED_ADJUST_COST
743#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
744#undef TARGET_SCHED_ISSUE_RATE
745#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
746#undef TARGET_SCHED_VARIABLE_ISSUE
747#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
748#undef TARGET_SCHED_INIT
749#define TARGET_SCHED_INIT ix86_sched_init
750#undef TARGET_SCHED_REORDER
751#define TARGET_SCHED_REORDER ix86_sched_reorder
752
f6897b10 753struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 754\f
f5316dfe
MM
755/* Sometimes certain combinations of command options do not make
756 sense on a particular target machine. You can define a macro
757 `OVERRIDE_OPTIONS' to take account of this. This macro, if
758 defined, is executed once just after all the command options have
759 been parsed.
760
761 Don't use this macro to turn on various extra optimizations for
762 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
763
764void
765override_options ()
766{
400500c4 767 int i;
e075ae69
RH
768 /* Comes from final.c -- no real reason to change it. */
769#define MAX_CODE_ALIGN 16
f5316dfe 770
c8c5cb99
SC
771 static struct ptt
772 {
e075ae69
RH
773 struct processor_costs *cost; /* Processor costs */
774 int target_enable; /* Target flags to enable. */
775 int target_disable; /* Target flags to disable. */
776 int align_loop; /* Default alignments. */
777 int align_jump;
778 int align_func;
779 int branch_cost;
780 }
0f290768 781 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
782 {
783 {&i386_cost, 0, 0, 2, 2, 2, 1},
784 {&i486_cost, 0, 0, 4, 4, 4, 1},
785 {&pentium_cost, 0, 0, -4, -4, -4, 1},
786 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50 787 {&k6_cost, 0, 0, -5, -5, 4, 1},
b4e89e2d
JH
788 {&athlon_cost, 0, 0, 4, -4, 4, 1},
789 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
e075ae69
RH
790 };
791
792 static struct pta
793 {
0f290768 794 const char *name; /* processor name or nickname. */
e075ae69
RH
795 enum processor_type processor;
796 }
0f290768 797 const processor_alias_table[] =
e075ae69
RH
798 {
799 {"i386", PROCESSOR_I386},
800 {"i486", PROCESSOR_I486},
801 {"i586", PROCESSOR_PENTIUM},
802 {"pentium", PROCESSOR_PENTIUM},
803 {"i686", PROCESSOR_PENTIUMPRO},
804 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 805 {"k6", PROCESSOR_K6},
309ada50 806 {"athlon", PROCESSOR_ATHLON},
b4e89e2d 807 {"pentium4", PROCESSOR_PENTIUM4},
3af4bd89 808 };
c8c5cb99 809
0f290768 810 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 811
f5316dfe
MM
812#ifdef SUBTARGET_OVERRIDE_OPTIONS
813 SUBTARGET_OVERRIDE_OPTIONS;
814#endif
815
5a6ee819 816 ix86_arch = PROCESSOR_I386;
e075ae69
RH
817 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
818
6189a572
JH
819 if (ix86_cmodel_string != 0)
820 {
821 if (!strcmp (ix86_cmodel_string, "small"))
822 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
823 else if (flag_pic)
824 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
825 else if (!strcmp (ix86_cmodel_string, "32"))
826 ix86_cmodel = CM_32;
827 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
828 ix86_cmodel = CM_KERNEL;
829 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
830 ix86_cmodel = CM_MEDIUM;
831 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
832 ix86_cmodel = CM_LARGE;
833 else
834 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
835 }
836 else
837 {
838 ix86_cmodel = CM_32;
839 if (TARGET_64BIT)
840 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
841 }
842 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
843 error ("Code model `%s' not supported in the %s bit mode.",
844 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
845 if (ix86_cmodel == CM_LARGE)
846 sorry ("Code model `large' not supported yet.");
0c2dc519
JH
847 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
848 sorry ("%i-bit mode not compiled in.",
849 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 850
e075ae69
RH
851 if (ix86_arch_string != 0)
852 {
e075ae69
RH
853 for (i = 0; i < pta_size; i++)
854 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
855 {
856 ix86_arch = processor_alias_table[i].processor;
857 /* Default cpu tuning to the architecture. */
858 ix86_cpu = ix86_arch;
859 break;
860 }
400500c4 861
e075ae69
RH
862 if (i == pta_size)
863 error ("bad value (%s) for -march= switch", ix86_arch_string);
864 }
865
866 if (ix86_cpu_string != 0)
867 {
e075ae69
RH
868 for (i = 0; i < pta_size; i++)
869 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
870 {
871 ix86_cpu = processor_alias_table[i].processor;
872 break;
873 }
874 if (i == pta_size)
875 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
876 }
877
2ab0437e
JH
878 if (optimize_size)
879 ix86_cost = &size_cost;
880 else
881 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
882 target_flags |= processor_target_table[ix86_cpu].target_enable;
883 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
884
36edd3cc
BS
885 /* Arrange to set up i386_stack_locals for all functions. */
886 init_machine_status = ix86_init_machine_status;
1526a060 887 mark_machine_status = ix86_mark_machine_status;
37b15744 888 free_machine_status = ix86_free_machine_status;
36edd3cc 889
0f290768 890 /* Validate -mregparm= value. */
e075ae69 891 if (ix86_regparm_string)
b08de47e 892 {
400500c4
RK
893 i = atoi (ix86_regparm_string);
894 if (i < 0 || i > REGPARM_MAX)
895 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
896 else
897 ix86_regparm = i;
b08de47e 898 }
0d7d98ee
JH
899 else
900 if (TARGET_64BIT)
901 ix86_regparm = REGPARM_MAX;
b08de47e 902
3e18fdf6 903 /* If the user has provided any of the -malign-* options,
a4f31c00 904 warn and use that value only if -falign-* is not set.
3e18fdf6 905 Remove this code in GCC 3.2 or later. */
e075ae69 906 if (ix86_align_loops_string)
b08de47e 907 {
3e18fdf6
GK
908 warning ("-malign-loops is obsolete, use -falign-loops");
909 if (align_loops == 0)
910 {
911 i = atoi (ix86_align_loops_string);
912 if (i < 0 || i > MAX_CODE_ALIGN)
913 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
914 else
915 align_loops = 1 << i;
916 }
b08de47e 917 }
3af4bd89 918
e075ae69 919 if (ix86_align_jumps_string)
b08de47e 920 {
3e18fdf6
GK
921 warning ("-malign-jumps is obsolete, use -falign-jumps");
922 if (align_jumps == 0)
923 {
924 i = atoi (ix86_align_jumps_string);
925 if (i < 0 || i > MAX_CODE_ALIGN)
926 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
927 else
928 align_jumps = 1 << i;
929 }
b08de47e 930 }
b08de47e 931
e075ae69 932 if (ix86_align_funcs_string)
b08de47e 933 {
3e18fdf6
GK
934 warning ("-malign-functions is obsolete, use -falign-functions");
935 if (align_functions == 0)
936 {
937 i = atoi (ix86_align_funcs_string);
938 if (i < 0 || i > MAX_CODE_ALIGN)
939 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
940 else
941 align_functions = 1 << i;
942 }
b08de47e 943 }
3af4bd89 944
3e18fdf6
GK
945 /* Default align_* from the processor table. */
946#define abs(n) (n < 0 ? -n : n)
947 if (align_loops == 0)
948 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
949 if (align_jumps == 0)
950 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
951 if (align_functions == 0)
952 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
953
e4c0478d 954 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 955 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
956 ix86_preferred_stack_boundary = 128;
957 if (ix86_preferred_stack_boundary_string)
3af4bd89 958 {
400500c4 959 i = atoi (ix86_preferred_stack_boundary_string);
0d7d98ee
JH
960 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
961 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
962 TARGET_64BIT ? 3 : 2);
400500c4
RK
963 else
964 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 965 }
77a989d1 966
0f290768 967 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
968 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
969 if (ix86_branch_cost_string)
804a8ee0 970 {
400500c4
RK
971 i = atoi (ix86_branch_cost_string);
972 if (i < 0 || i > 5)
973 error ("-mbranch-cost=%d is not between 0 and 5", i);
974 else
975 ix86_branch_cost = i;
804a8ee0 976 }
804a8ee0 977
e9a25f70
JL
978 /* Keep nonleaf frame pointers. */
979 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 980 flag_omit_frame_pointer = 1;
e075ae69
RH
981
982 /* If we're doing fast math, we don't care about comparison order
983 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 984 if (flag_unsafe_math_optimizations)
e075ae69
RH
985 target_flags &= ~MASK_IEEE_FP;
986
14f73b5a
JH
987 if (TARGET_64BIT)
988 {
989 if (TARGET_ALIGN_DOUBLE)
990 error ("-malign-double makes no sense in the 64bit mode.");
991 if (TARGET_RTD)
992 error ("-mrtd calling convention not supported in the 64bit mode.");
993 /* Enable by default the SSE and MMX builtins. */
994 target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE;
995 }
996
a7180f70
BS
997 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
998 on by -msse. */
999 if (TARGET_SSE)
1000 target_flags |= MASK_MMX;
c6036a37 1001
47f339cf
BS
1002 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1003 if (TARGET_3DNOW)
1004 {
1005 target_flags |= MASK_MMX;
1006 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1007 extensions it adds. */
1008 if (x86_3dnow_a & (1 << ix86_arch))
1009 target_flags |= MASK_3DNOW_A;
1010 }
c6036a37
JH
1011 if ((x86_accumulate_outgoing_args & CPUMASK)
1012 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
1013 && !optimize_size)
1014 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1015
1016 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1017 {
1018 char *p;
1019 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1020 p = strchr (internal_label_prefix, 'X');
1021 internal_label_prefix_len = p - internal_label_prefix;
1022 *p = '\0';
1023 }
f5316dfe
MM
1024}
1025\f
32b5b1aa 1026void
c6aded7c 1027optimization_options (level, size)
32b5b1aa 1028 int level;
bb5177ac 1029 int size ATTRIBUTE_UNUSED;
32b5b1aa 1030{
e9a25f70
JL
1031 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1032 make the problem with not enough registers even worse. */
32b5b1aa
SC
1033#ifdef INSN_SCHEDULING
1034 if (level > 1)
1035 flag_schedule_insns = 0;
1036#endif
53c17031
JH
1037 if (TARGET_64BIT && optimize >= 1)
1038 flag_omit_frame_pointer = 1;
1039 if (TARGET_64BIT)
1040 flag_pcc_struct_return = 0;
32b5b1aa 1041}
b08de47e 1042\f
91d231cb
JM
1043/* Table of valid machine attributes. */
1044const struct attribute_spec ix86_attribute_table[] =
b08de47e 1045{
91d231cb 1046 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1047 /* Stdcall attribute says callee is responsible for popping arguments
1048 if they are not variable. */
91d231cb
JM
1049 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1050 /* Cdecl attribute says the callee is a normal C declaration */
1051 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1052 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1053 passed in registers. */
91d231cb
JM
1054 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1055#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1056 { "dllimport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1057 { "dllexport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1058 { "shared", 1, 1, true, false, false, ix86_handle_shared_attribute },
1059#endif
1060 { NULL, 0, 0, false, false, false, NULL }
1061};
1062
1063/* Handle a "cdecl" or "stdcall" attribute;
1064 arguments as in struct attribute_spec.handler. */
1065static tree
1066ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1067 tree *node;
1068 tree name;
1069 tree args ATTRIBUTE_UNUSED;
1070 int flags ATTRIBUTE_UNUSED;
1071 bool *no_add_attrs;
1072{
1073 if (TREE_CODE (*node) != FUNCTION_TYPE
1074 && TREE_CODE (*node) != METHOD_TYPE
1075 && TREE_CODE (*node) != FIELD_DECL
1076 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1077 {
91d231cb
JM
1078 warning ("`%s' attribute only applies to functions",
1079 IDENTIFIER_POINTER (name));
1080 *no_add_attrs = true;
1081 }
b08de47e 1082
91d231cb
JM
1083 if (TARGET_64BIT)
1084 {
1085 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1086 *no_add_attrs = true;
1087 }
b08de47e 1088
91d231cb
JM
1089 return NULL_TREE;
1090}
b08de47e 1091
91d231cb
JM
1092/* Handle a "regparm" attribute;
1093 arguments as in struct attribute_spec.handler. */
1094static tree
1095ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1096 tree *node;
1097 tree name;
1098 tree args;
1099 int flags ATTRIBUTE_UNUSED;
1100 bool *no_add_attrs;
1101{
1102 if (TREE_CODE (*node) != FUNCTION_TYPE
1103 && TREE_CODE (*node) != METHOD_TYPE
1104 && TREE_CODE (*node) != FIELD_DECL
1105 && TREE_CODE (*node) != TYPE_DECL)
1106 {
1107 warning ("`%s' attribute only applies to functions",
1108 IDENTIFIER_POINTER (name));
1109 *no_add_attrs = true;
1110 }
1111 else
1112 {
1113 tree cst;
b08de47e 1114
91d231cb
JM
1115 cst = TREE_VALUE (args);
1116 if (TREE_CODE (cst) != INTEGER_CST)
1117 {
1118 warning ("`%s' attribute requires an integer constant argument",
1119 IDENTIFIER_POINTER (name));
1120 *no_add_attrs = true;
1121 }
1122 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1123 {
1124 warning ("argument to `%s' attribute larger than %d",
1125 IDENTIFIER_POINTER (name), REGPARM_MAX);
1126 *no_add_attrs = true;
1127 }
b08de47e
MM
1128 }
1129
91d231cb 1130 return NULL_TREE;
b08de47e
MM
1131}
1132
08c148a8
NB
1133#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1134
1135/* Generate the assembly code for function entry. FILE is a stdio
1136 stream to output the code to. SIZE is an int: how many units of
1137 temporary storage to allocate.
1138
1139 Refer to the array `regs_ever_live' to determine which registers to
1140 save; `regs_ever_live[I]' is nonzero if register number I is ever
1141 used in the function. This function is responsible for knowing
1142 which registers should not be saved even if used.
1143
1144 We override it here to allow for the new profiling code to go before
1145 the prologue and the old mcount code to go after the prologue (and
1146 after %ebx has been set up for ELF shared library support). */
1147
1148static void
1149ix86_osf_output_function_prologue (file, size)
1150 FILE *file;
1151 HOST_WIDE_INT size;
1152{
1153 char *prefix = "";
1154 char *lprefix = LPREFIX;
1155 int labelno = profile_label_no;
1156
1157#ifdef OSF_OS
1158
1159 if (TARGET_UNDERSCORES)
1160 prefix = "_";
1161
1162 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1163 {
1164 if (!flag_pic && !HALF_PIC_P ())
1165 {
1166 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1167 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1168 }
1169
1170 else if (HALF_PIC_P ())
1171 {
1172 rtx symref;
1173
1174 HALF_PIC_EXTERNAL ("_mcount_ptr");
1175 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1176 "_mcount_ptr"));
1177
1178 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1179 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1180 XSTR (symref, 0));
1181 fprintf (file, "\tcall *(%%eax)\n");
1182 }
1183
1184 else
1185 {
1186 static int call_no = 0;
1187
1188 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1189 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1190 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1191 lprefix, call_no++);
1192 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1193 lprefix, labelno);
1194 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1195 prefix);
1196 fprintf (file, "\tcall *(%%eax)\n");
1197 }
1198 }
1199
1200#else /* !OSF_OS */
1201
1202 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1203 {
1204 if (!flag_pic)
1205 {
1206 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1207 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1208 }
1209
1210 else
1211 {
1212 static int call_no = 0;
1213
1214 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1215 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1216 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1217 lprefix, call_no++);
1218 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1219 lprefix, labelno);
1220 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1221 prefix);
1222 fprintf (file, "\tcall *(%%eax)\n");
1223 }
1224 }
1225#endif /* !OSF_OS */
1226
1227 function_prologue (file, size);
1228}
1229
1230#endif /* OSF_OS || TARGET_OSF1ELF */
1231
b08de47e
MM
1232/* Return 0 if the attributes for two types are incompatible, 1 if they
1233 are compatible, and 2 if they are nearly compatible (which causes a
1234 warning to be generated). */
1235
8d8e52be 1236static int
e075ae69 1237ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1238 tree type1;
1239 tree type2;
b08de47e 1240{
0f290768 1241 /* Check for mismatch of non-default calling convention. */
27c38fbe 1242 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1243
1244 if (TREE_CODE (type1) != FUNCTION_TYPE)
1245 return 1;
1246
1247 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1248 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1249 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1250 return 0;
b08de47e
MM
1251 return 1;
1252}
b08de47e
MM
1253\f
1254/* Value is the number of bytes of arguments automatically
1255 popped when returning from a subroutine call.
1256 FUNDECL is the declaration node of the function (as a tree),
1257 FUNTYPE is the data type of the function (as a tree),
1258 or for a library call it is an identifier node for the subroutine name.
1259 SIZE is the number of bytes of arguments passed on the stack.
1260
1261 On the 80386, the RTD insn may be used to pop them if the number
1262 of args is fixed, but if the number is variable then the caller
1263 must pop them all. RTD can't be used for library calls now
1264 because the library is compiled with the Unix compiler.
1265 Use of RTD is a selectable option, since it is incompatible with
1266 standard Unix calling sequences. If the option is not selected,
1267 the caller must always pop the args.
1268
1269 The attribute stdcall is equivalent to RTD on a per module basis. */
1270
1271int
e075ae69 1272ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1273 tree fundecl;
1274 tree funtype;
1275 int size;
79325812 1276{
3345ee7d 1277 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1278
0f290768 1279 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1280 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1281
0f290768 1282 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1283 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1284 rtd = 1;
79325812 1285
698cdd84
SC
1286 if (rtd
1287 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1288 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1289 == void_type_node)))
698cdd84
SC
1290 return size;
1291 }
79325812 1292
e9a25f70 1293 /* Lose any fake structure return argument. */
0d7d98ee
JH
1294 if (aggregate_value_p (TREE_TYPE (funtype))
1295 && !TARGET_64BIT)
698cdd84 1296 return GET_MODE_SIZE (Pmode);
79325812 1297
2614aac6 1298 return 0;
b08de47e 1299}
b08de47e
MM
1300\f
1301/* Argument support functions. */
1302
53c17031
JH
1303/* Return true when register may be used to pass function parameters. */
1304bool
1305ix86_function_arg_regno_p (regno)
1306 int regno;
1307{
1308 int i;
1309 if (!TARGET_64BIT)
1310 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1311 if (SSE_REGNO_P (regno) && TARGET_SSE)
1312 return true;
1313 /* RAX is used as hidden argument to va_arg functions. */
1314 if (!regno)
1315 return true;
1316 for (i = 0; i < REGPARM_MAX; i++)
1317 if (regno == x86_64_int_parameter_registers[i])
1318 return true;
1319 return false;
1320}
1321
b08de47e
MM
1322/* Initialize a variable CUM of type CUMULATIVE_ARGS
1323 for a call to a function whose data type is FNTYPE.
1324 For a library call, FNTYPE is 0. */
1325
1326void
1327init_cumulative_args (cum, fntype, libname)
e9a25f70 1328 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1329 tree fntype; /* tree ptr for function decl */
1330 rtx libname; /* SYMBOL_REF of library name or 0 */
1331{
1332 static CUMULATIVE_ARGS zero_cum;
1333 tree param, next_param;
1334
1335 if (TARGET_DEBUG_ARG)
1336 {
1337 fprintf (stderr, "\ninit_cumulative_args (");
1338 if (fntype)
e9a25f70
JL
1339 fprintf (stderr, "fntype code = %s, ret code = %s",
1340 tree_code_name[(int) TREE_CODE (fntype)],
1341 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1342 else
1343 fprintf (stderr, "no fntype");
1344
1345 if (libname)
1346 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1347 }
1348
1349 *cum = zero_cum;
1350
1351 /* Set up the number of registers to use for passing arguments. */
e075ae69 1352 cum->nregs = ix86_regparm;
53c17031
JH
1353 cum->sse_nregs = SSE_REGPARM_MAX;
1354 if (fntype && !TARGET_64BIT)
b08de47e
MM
1355 {
1356 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1357
b08de47e
MM
1358 if (attr)
1359 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1360 }
53c17031 1361 cum->maybe_vaarg = false;
b08de47e
MM
1362
1363 /* Determine if this function has variable arguments. This is
1364 indicated by the last argument being 'void_type_mode' if there
1365 are no variable arguments. If there are variable arguments, then
1366 we won't pass anything in registers */
1367
1368 if (cum->nregs)
1369 {
1370 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1371 param != 0; param = next_param)
b08de47e
MM
1372 {
1373 next_param = TREE_CHAIN (param);
e9a25f70 1374 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1375 {
1376 if (!TARGET_64BIT)
1377 cum->nregs = 0;
1378 cum->maybe_vaarg = true;
1379 }
b08de47e
MM
1380 }
1381 }
53c17031
JH
1382 if ((!fntype && !libname)
1383 || (fntype && !TYPE_ARG_TYPES (fntype)))
1384 cum->maybe_vaarg = 1;
b08de47e
MM
1385
1386 if (TARGET_DEBUG_ARG)
1387 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1388
1389 return;
1390}
1391
53c17031
JH
1392/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1393 of this code is to classify each 8bytes of incomming argument by the register
1394 class and assign registers accordingly. */
1395
1396/* Return the union class of CLASS1 and CLASS2.
1397 See the x86-64 PS ABI for details. */
1398
1399static enum x86_64_reg_class
1400merge_classes (class1, class2)
1401 enum x86_64_reg_class class1, class2;
1402{
1403 /* Rule #1: If both classes are equal, this is the resulting class. */
1404 if (class1 == class2)
1405 return class1;
1406
1407 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1408 the other class. */
1409 if (class1 == X86_64_NO_CLASS)
1410 return class2;
1411 if (class2 == X86_64_NO_CLASS)
1412 return class1;
1413
1414 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1415 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1416 return X86_64_MEMORY_CLASS;
1417
1418 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1419 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1420 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1421 return X86_64_INTEGERSI_CLASS;
1422 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1423 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1424 return X86_64_INTEGER_CLASS;
1425
1426 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1427 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1428 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1429 return X86_64_MEMORY_CLASS;
1430
1431 /* Rule #6: Otherwise class SSE is used. */
1432 return X86_64_SSE_CLASS;
1433}
1434
1435/* Classify the argument of type TYPE and mode MODE.
1436 CLASSES will be filled by the register class used to pass each word
1437 of the operand. The number of words is returned. In case the parameter
1438 should be passed in memory, 0 is returned. As a special case for zero
1439 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1440
1441 BIT_OFFSET is used internally for handling records and specifies offset
1442 of the offset in bits modulo 256 to avoid overflow cases.
1443
1444 See the x86-64 PS ABI for details.
1445*/
1446
1447static int
1448classify_argument (mode, type, classes, bit_offset)
1449 enum machine_mode mode;
1450 tree type;
1451 enum x86_64_reg_class classes[MAX_CLASSES];
1452 int bit_offset;
1453{
1454 int bytes =
1455 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1456 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1457
1458 if (type && AGGREGATE_TYPE_P (type))
1459 {
1460 int i;
1461 tree field;
1462 enum x86_64_reg_class subclasses[MAX_CLASSES];
1463
1464 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1465 if (bytes > 16)
1466 return 0;
1467
1468 for (i = 0; i < words; i++)
1469 classes[i] = X86_64_NO_CLASS;
1470
1471 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1472 signalize memory class, so handle it as special case. */
1473 if (!words)
1474 {
1475 classes[0] = X86_64_NO_CLASS;
1476 return 1;
1477 }
1478
1479 /* Classify each field of record and merge classes. */
1480 if (TREE_CODE (type) == RECORD_TYPE)
1481 {
1482 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1483 {
1484 if (TREE_CODE (field) == FIELD_DECL)
1485 {
1486 int num;
1487
1488 /* Bitfields are always classified as integer. Handle them
1489 early, since later code would consider them to be
1490 misaligned integers. */
1491 if (DECL_BIT_FIELD (field))
1492 {
1493 for (i = int_bit_position (field) / 8 / 8;
1494 i < (int_bit_position (field)
1495 + tree_low_cst (DECL_SIZE (field), 0)
1496 + 63) / 8 / 8; i++)
1497 classes[i] =
1498 merge_classes (X86_64_INTEGER_CLASS,
1499 classes[i]);
1500 }
1501 else
1502 {
1503 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1504 TREE_TYPE (field), subclasses,
1505 (int_bit_position (field)
1506 + bit_offset) % 256);
1507 if (!num)
1508 return 0;
1509 for (i = 0; i < num; i++)
1510 {
1511 int pos =
1512 (int_bit_position (field) + bit_offset) / 8 / 8;
1513 classes[i + pos] =
1514 merge_classes (subclasses[i], classes[i + pos]);
1515 }
1516 }
1517 }
1518 }
1519 }
1520 /* Arrays are handled as small records. */
1521 else if (TREE_CODE (type) == ARRAY_TYPE)
1522 {
1523 int num;
1524 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1525 TREE_TYPE (type), subclasses, bit_offset);
1526 if (!num)
1527 return 0;
1528
1529 /* The partial classes are now full classes. */
1530 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1531 subclasses[0] = X86_64_SSE_CLASS;
1532 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1533 subclasses[0] = X86_64_INTEGER_CLASS;
1534
1535 for (i = 0; i < words; i++)
1536 classes[i] = subclasses[i % num];
1537 }
1538 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1539 else if (TREE_CODE (type) == UNION_TYPE)
1540 {
1541 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1542 {
1543 if (TREE_CODE (field) == FIELD_DECL)
1544 {
1545 int num;
1546 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1547 TREE_TYPE (field), subclasses,
1548 bit_offset);
1549 if (!num)
1550 return 0;
1551 for (i = 0; i < num; i++)
1552 classes[i] = merge_classes (subclasses[i], classes[i]);
1553 }
1554 }
1555 }
1556 else
1557 abort ();
1558
1559 /* Final merger cleanup. */
1560 for (i = 0; i < words; i++)
1561 {
1562 /* If one class is MEMORY, everything should be passed in
1563 memory. */
1564 if (classes[i] == X86_64_MEMORY_CLASS)
1565 return 0;
1566
1567 /* The X86_64_SSEUP_CLASS should be always preceeded by
1568 X86_64_SSE_CLASS. */
1569 if (classes[i] == X86_64_SSEUP_CLASS
1570 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1571 classes[i] = X86_64_SSE_CLASS;
1572
1573 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1574 if (classes[i] == X86_64_X87UP_CLASS
1575 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1576 classes[i] = X86_64_SSE_CLASS;
1577 }
1578 return words;
1579 }
1580
1581 /* Compute alignment needed. We align all types to natural boundaries with
1582 exception of XFmode that is aligned to 64bits. */
1583 if (mode != VOIDmode && mode != BLKmode)
1584 {
1585 int mode_alignment = GET_MODE_BITSIZE (mode);
1586
1587 if (mode == XFmode)
1588 mode_alignment = 128;
1589 else if (mode == XCmode)
1590 mode_alignment = 256;
1591 /* Missalignmed fields are always returned in memory. */
1592 if (bit_offset % mode_alignment)
1593 return 0;
1594 }
1595
1596 /* Classification of atomic types. */
1597 switch (mode)
1598 {
1599 case DImode:
1600 case SImode:
1601 case HImode:
1602 case QImode:
1603 case CSImode:
1604 case CHImode:
1605 case CQImode:
1606 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1607 classes[0] = X86_64_INTEGERSI_CLASS;
1608 else
1609 classes[0] = X86_64_INTEGER_CLASS;
1610 return 1;
1611 case CDImode:
1612 case TImode:
1613 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1614 return 2;
1615 case CTImode:
1616 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1617 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1618 return 4;
1619 case SFmode:
1620 if (!(bit_offset % 64))
1621 classes[0] = X86_64_SSESF_CLASS;
1622 else
1623 classes[0] = X86_64_SSE_CLASS;
1624 return 1;
1625 case DFmode:
1626 classes[0] = X86_64_SSEDF_CLASS;
1627 return 1;
1628 case TFmode:
1629 classes[0] = X86_64_X87_CLASS;
1630 classes[1] = X86_64_X87UP_CLASS;
1631 return 2;
1632 case TCmode:
1633 classes[0] = X86_64_X87_CLASS;
1634 classes[1] = X86_64_X87UP_CLASS;
1635 classes[2] = X86_64_X87_CLASS;
1636 classes[3] = X86_64_X87UP_CLASS;
1637 return 4;
1638 case DCmode:
1639 classes[0] = X86_64_SSEDF_CLASS;
1640 classes[1] = X86_64_SSEDF_CLASS;
1641 return 2;
1642 case SCmode:
1643 classes[0] = X86_64_SSE_CLASS;
1644 return 1;
1645 case BLKmode:
1646 return 0;
1647 default:
1648 abort ();
1649 }
1650}
1651
1652/* Examine the argument and return set number of register required in each
1653 class. Return 0 ifif parameter should be passed in memory. */
1654static int
1655examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1656 enum machine_mode mode;
1657 tree type;
1658 int *int_nregs, *sse_nregs;
1659 int in_return;
1660{
1661 enum x86_64_reg_class class[MAX_CLASSES];
1662 int n = classify_argument (mode, type, class, 0);
1663
1664 *int_nregs = 0;
1665 *sse_nregs = 0;
1666 if (!n)
1667 return 0;
1668 for (n--; n >= 0; n--)
1669 switch (class[n])
1670 {
1671 case X86_64_INTEGER_CLASS:
1672 case X86_64_INTEGERSI_CLASS:
1673 (*int_nregs)++;
1674 break;
1675 case X86_64_SSE_CLASS:
1676 case X86_64_SSESF_CLASS:
1677 case X86_64_SSEDF_CLASS:
1678 (*sse_nregs)++;
1679 break;
1680 case X86_64_NO_CLASS:
1681 case X86_64_SSEUP_CLASS:
1682 break;
1683 case X86_64_X87_CLASS:
1684 case X86_64_X87UP_CLASS:
1685 if (!in_return)
1686 return 0;
1687 break;
1688 case X86_64_MEMORY_CLASS:
1689 abort ();
1690 }
1691 return 1;
1692}
1693/* Construct container for the argument used by GCC interface. See
1694 FUNCTION_ARG for the detailed description. */
1695static rtx
1696construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1697 enum machine_mode mode;
1698 tree type;
1699 int in_return;
1700 int nintregs, nsseregs;
1701 int *intreg, sse_regno;
1702{
1703 enum machine_mode tmpmode;
1704 int bytes =
1705 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1706 enum x86_64_reg_class class[MAX_CLASSES];
1707 int n;
1708 int i;
1709 int nexps = 0;
1710 int needed_sseregs, needed_intregs;
1711 rtx exp[MAX_CLASSES];
1712 rtx ret;
1713
1714 n = classify_argument (mode, type, class, 0);
1715 if (TARGET_DEBUG_ARG)
1716 {
1717 if (!n)
1718 fprintf (stderr, "Memory class\n");
1719 else
1720 {
1721 fprintf (stderr, "Classes:");
1722 for (i = 0; i < n; i++)
1723 {
1724 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1725 }
1726 fprintf (stderr, "\n");
1727 }
1728 }
1729 if (!n)
1730 return NULL;
1731 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1732 return NULL;
1733 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1734 return NULL;
1735
1736 /* First construct simple cases. Avoid SCmode, since we want to use
1737 single register to pass this type. */
1738 if (n == 1 && mode != SCmode)
1739 switch (class[0])
1740 {
1741 case X86_64_INTEGER_CLASS:
1742 case X86_64_INTEGERSI_CLASS:
1743 return gen_rtx_REG (mode, intreg[0]);
1744 case X86_64_SSE_CLASS:
1745 case X86_64_SSESF_CLASS:
1746 case X86_64_SSEDF_CLASS:
1747 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1748 case X86_64_X87_CLASS:
1749 return gen_rtx_REG (mode, FIRST_STACK_REG);
1750 case X86_64_NO_CLASS:
1751 /* Zero sized array, struct or class. */
1752 return NULL;
1753 default:
1754 abort ();
1755 }
1756 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1757 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1758 if (n == 2
1759 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1760 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1761 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1762 && class[1] == X86_64_INTEGER_CLASS
1763 && (mode == CDImode || mode == TImode)
1764 && intreg[0] + 1 == intreg[1])
1765 return gen_rtx_REG (mode, intreg[0]);
1766 if (n == 4
1767 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1768 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1769 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1770
1771 /* Otherwise figure out the entries of the PARALLEL. */
1772 for (i = 0; i < n; i++)
1773 {
1774 switch (class[i])
1775 {
1776 case X86_64_NO_CLASS:
1777 break;
1778 case X86_64_INTEGER_CLASS:
1779 case X86_64_INTEGERSI_CLASS:
1780 /* Merge TImodes on aligned occassions here too. */
1781 if (i * 8 + 8 > bytes)
1782 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1783 else if (class[i] == X86_64_INTEGERSI_CLASS)
1784 tmpmode = SImode;
1785 else
1786 tmpmode = DImode;
1787 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1788 if (tmpmode == BLKmode)
1789 tmpmode = DImode;
1790 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1791 gen_rtx_REG (tmpmode, *intreg),
1792 GEN_INT (i*8));
1793 intreg++;
1794 break;
1795 case X86_64_SSESF_CLASS:
1796 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1797 gen_rtx_REG (SFmode,
1798 SSE_REGNO (sse_regno)),
1799 GEN_INT (i*8));
1800 sse_regno++;
1801 break;
1802 case X86_64_SSEDF_CLASS:
1803 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1804 gen_rtx_REG (DFmode,
1805 SSE_REGNO (sse_regno)),
1806 GEN_INT (i*8));
1807 sse_regno++;
1808 break;
1809 case X86_64_SSE_CLASS:
1810 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1811 tmpmode = TImode, i++;
1812 else
1813 tmpmode = DImode;
1814 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1815 gen_rtx_REG (tmpmode,
1816 SSE_REGNO (sse_regno)),
1817 GEN_INT (i*8));
1818 sse_regno++;
1819 break;
1820 default:
1821 abort ();
1822 }
1823 }
1824 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1825 for (i = 0; i < nexps; i++)
1826 XVECEXP (ret, 0, i) = exp [i];
1827 return ret;
1828}
1829
b08de47e
MM
1830/* Update the data in CUM to advance over an argument
1831 of mode MODE and data type TYPE.
1832 (TYPE is null for libcalls where that information may not be available.) */
1833
1834void
1835function_arg_advance (cum, mode, type, named)
1836 CUMULATIVE_ARGS *cum; /* current arg information */
1837 enum machine_mode mode; /* current arg mode */
1838 tree type; /* type of the argument or 0 if lib support */
1839 int named; /* whether or not the argument was named */
1840{
5ac9118e
KG
1841 int bytes =
1842 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1843 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1844
1845 if (TARGET_DEBUG_ARG)
1846 fprintf (stderr,
e9a25f70 1847 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 1848 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 1849 if (TARGET_64BIT)
b08de47e 1850 {
53c17031
JH
1851 int int_nregs, sse_nregs;
1852 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
1853 cum->words += words;
1854 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 1855 {
53c17031
JH
1856 cum->nregs -= int_nregs;
1857 cum->sse_nregs -= sse_nregs;
1858 cum->regno += int_nregs;
1859 cum->sse_regno += sse_nregs;
82a127a9 1860 }
53c17031
JH
1861 else
1862 cum->words += words;
b08de47e 1863 }
a4f31c00 1864 else
82a127a9 1865 {
53c17031
JH
1866 if (TARGET_SSE && mode == TImode)
1867 {
1868 cum->sse_words += words;
1869 cum->sse_nregs -= 1;
1870 cum->sse_regno += 1;
1871 if (cum->sse_nregs <= 0)
1872 {
1873 cum->sse_nregs = 0;
1874 cum->sse_regno = 0;
1875 }
1876 }
1877 else
82a127a9 1878 {
53c17031
JH
1879 cum->words += words;
1880 cum->nregs -= words;
1881 cum->regno += words;
1882
1883 if (cum->nregs <= 0)
1884 {
1885 cum->nregs = 0;
1886 cum->regno = 0;
1887 }
82a127a9
CM
1888 }
1889 }
b08de47e
MM
1890 return;
1891}
1892
1893/* Define where to put the arguments to a function.
1894 Value is zero to push the argument on the stack,
1895 or a hard register in which to store the argument.
1896
1897 MODE is the argument's machine mode.
1898 TYPE is the data type of the argument (as a tree).
1899 This is null for libcalls where that information may
1900 not be available.
1901 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1902 the preceding args and about the function being called.
1903 NAMED is nonzero if this argument is a named parameter
1904 (otherwise it is an extra parameter matching an ellipsis). */
1905
1906struct rtx_def *
1907function_arg (cum, mode, type, named)
1908 CUMULATIVE_ARGS *cum; /* current arg information */
1909 enum machine_mode mode; /* current arg mode */
1910 tree type; /* type of the argument or 0 if lib support */
1911 int named; /* != 0 for normal args, == 0 for ... args */
1912{
1913 rtx ret = NULL_RTX;
5ac9118e
KG
1914 int bytes =
1915 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1916 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1917
53c17031
JH
1918 /* Handle an hidden AL argument containing number of registers for varargs
1919 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1920 any AL settings. */
32ee7d1d 1921 if (mode == VOIDmode)
b08de47e 1922 {
53c17031
JH
1923 if (TARGET_64BIT)
1924 return GEN_INT (cum->maybe_vaarg
1925 ? (cum->sse_nregs < 0
1926 ? SSE_REGPARM_MAX
1927 : cum->sse_regno)
1928 : -1);
1929 else
1930 return constm1_rtx;
b08de47e 1931 }
53c17031
JH
1932 if (TARGET_64BIT)
1933 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
1934 &x86_64_int_parameter_registers [cum->regno],
1935 cum->sse_regno);
1936 else
1937 switch (mode)
1938 {
1939 /* For now, pass fp/complex values on the stack. */
1940 default:
1941 break;
1942
1943 case BLKmode:
1944 case DImode:
1945 case SImode:
1946 case HImode:
1947 case QImode:
1948 if (words <= cum->nregs)
1949 ret = gen_rtx_REG (mode, cum->regno);
1950 break;
1951 case TImode:
1952 if (cum->sse_nregs)
1953 ret = gen_rtx_REG (mode, cum->sse_regno);
1954 break;
1955 }
b08de47e
MM
1956
1957 if (TARGET_DEBUG_ARG)
1958 {
1959 fprintf (stderr,
e9a25f70 1960 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1961 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1962
1963 if (ret)
1964 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1965 else
1966 fprintf (stderr, ", stack");
1967
1968 fprintf (stderr, " )\n");
1969 }
1970
1971 return ret;
1972}
53c17031
JH
1973
1974/* Gives the alignment boundary, in bits, of an argument with the specified mode
1975 and type. */
1976
1977int
1978ix86_function_arg_boundary (mode, type)
1979 enum machine_mode mode;
1980 tree type;
1981{
1982 int align;
1983 if (!TARGET_64BIT)
1984 return PARM_BOUNDARY;
1985 if (type)
1986 align = TYPE_ALIGN (type);
1987 else
1988 align = GET_MODE_ALIGNMENT (mode);
1989 if (align < PARM_BOUNDARY)
1990 align = PARM_BOUNDARY;
1991 if (align > 128)
1992 align = 128;
1993 return align;
1994}
1995
1996/* Return true if N is a possible register number of function value. */
1997bool
1998ix86_function_value_regno_p (regno)
1999 int regno;
2000{
2001 if (!TARGET_64BIT)
2002 {
2003 return ((regno) == 0
2004 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2005 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2006 }
2007 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2008 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2009 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2010}
2011
2012/* Define how to find the value returned by a function.
2013 VALTYPE is the data type of the value (as a tree).
2014 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2015 otherwise, FUNC is 0. */
2016rtx
2017ix86_function_value (valtype)
2018 tree valtype;
2019{
2020 if (TARGET_64BIT)
2021 {
2022 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2023 REGPARM_MAX, SSE_REGPARM_MAX,
2024 x86_64_int_return_registers, 0);
2025 /* For zero sized structures, construct_continer return NULL, but we need
2026 to keep rest of compiler happy by returning meaningfull value. */
2027 if (!ret)
2028 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2029 return ret;
2030 }
2031 else
2032 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2033}
2034
2035/* Return false ifif type is returned in memory. */
2036int
2037ix86_return_in_memory (type)
2038 tree type;
2039{
2040 int needed_intregs, needed_sseregs;
2041 if (TARGET_64BIT)
2042 {
2043 return !examine_argument (TYPE_MODE (type), type, 1,
2044 &needed_intregs, &needed_sseregs);
2045 }
2046 else
2047 {
2048 if (TYPE_MODE (type) == BLKmode
2049 || (VECTOR_MODE_P (TYPE_MODE (type))
2050 && int_size_in_bytes (type) == 8)
2051 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2052 && TYPE_MODE (type) != TFmode
2053 && !VECTOR_MODE_P (TYPE_MODE (type))))
2054 return 1;
2055 return 0;
2056 }
2057}
2058
2059/* Define how to find the value returned by a library function
2060 assuming the value has mode MODE. */
2061rtx
2062ix86_libcall_value (mode)
2063 enum machine_mode mode;
2064{
2065 if (TARGET_64BIT)
2066 {
2067 switch (mode)
2068 {
2069 case SFmode:
2070 case SCmode:
2071 case DFmode:
2072 case DCmode:
2073 return gen_rtx_REG (mode, FIRST_SSE_REG);
2074 case TFmode:
2075 case TCmode:
2076 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2077 default:
2078 return gen_rtx_REG (mode, 0);
2079 }
2080 }
2081 else
2082 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2083}
ad919812
JH
2084\f
2085/* Create the va_list data type. */
53c17031 2086
ad919812
JH
2087tree
2088ix86_build_va_list ()
2089{
2090 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2091
ad919812
JH
2092 /* For i386 we use plain pointer to argument area. */
2093 if (!TARGET_64BIT)
2094 return build_pointer_type (char_type_node);
2095
2096 record = make_lang_type (RECORD_TYPE);
2097 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2098
2099 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2100 unsigned_type_node);
2101 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2102 unsigned_type_node);
2103 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2104 ptr_type_node);
2105 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2106 ptr_type_node);
2107
2108 DECL_FIELD_CONTEXT (f_gpr) = record;
2109 DECL_FIELD_CONTEXT (f_fpr) = record;
2110 DECL_FIELD_CONTEXT (f_ovf) = record;
2111 DECL_FIELD_CONTEXT (f_sav) = record;
2112
2113 TREE_CHAIN (record) = type_decl;
2114 TYPE_NAME (record) = type_decl;
2115 TYPE_FIELDS (record) = f_gpr;
2116 TREE_CHAIN (f_gpr) = f_fpr;
2117 TREE_CHAIN (f_fpr) = f_ovf;
2118 TREE_CHAIN (f_ovf) = f_sav;
2119
2120 layout_type (record);
2121
2122 /* The correct type is an array type of one element. */
2123 return build_array_type (record, build_index_type (size_zero_node));
2124}
2125
2126/* Perform any needed actions needed for a function that is receiving a
2127 variable number of arguments.
2128
2129 CUM is as above.
2130
2131 MODE and TYPE are the mode and type of the current parameter.
2132
2133 PRETEND_SIZE is a variable that should be set to the amount of stack
2134 that must be pushed by the prolog to pretend that our caller pushed
2135 it.
2136
2137 Normally, this macro will push all remaining incoming registers on the
2138 stack and set PRETEND_SIZE to the length of the registers pushed. */
2139
2140void
2141ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2142 CUMULATIVE_ARGS *cum;
2143 enum machine_mode mode;
2144 tree type;
2145 int *pretend_size ATTRIBUTE_UNUSED;
2146 int no_rtl;
2147
2148{
2149 CUMULATIVE_ARGS next_cum;
2150 rtx save_area = NULL_RTX, mem;
2151 rtx label;
2152 rtx label_ref;
2153 rtx tmp_reg;
2154 rtx nsse_reg;
2155 int set;
2156 tree fntype;
2157 int stdarg_p;
2158 int i;
2159
2160 if (!TARGET_64BIT)
2161 return;
2162
2163 /* Indicate to allocate space on the stack for varargs save area. */
2164 ix86_save_varrargs_registers = 1;
2165
2166 fntype = TREE_TYPE (current_function_decl);
2167 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2168 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2169 != void_type_node));
2170
2171 /* For varargs, we do not want to skip the dummy va_dcl argument.
2172 For stdargs, we do want to skip the last named argument. */
2173 next_cum = *cum;
2174 if (stdarg_p)
2175 function_arg_advance (&next_cum, mode, type, 1);
2176
2177 if (!no_rtl)
2178 save_area = frame_pointer_rtx;
2179
2180 set = get_varargs_alias_set ();
2181
2182 for (i = next_cum.regno; i < ix86_regparm; i++)
2183 {
2184 mem = gen_rtx_MEM (Pmode,
2185 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2186 set_mem_alias_set (mem, set);
ad919812
JH
2187 emit_move_insn (mem, gen_rtx_REG (Pmode,
2188 x86_64_int_parameter_registers[i]));
2189 }
2190
2191 if (next_cum.sse_nregs)
2192 {
2193 /* Now emit code to save SSE registers. The AX parameter contains number
2194 of SSE parameter regsiters used to call this function. We use
2195 sse_prologue_save insn template that produces computed jump across
2196 SSE saves. We need some preparation work to get this working. */
2197
2198 label = gen_label_rtx ();
2199 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2200
2201 /* Compute address to jump to :
2202 label - 5*eax + nnamed_sse_arguments*5 */
2203 tmp_reg = gen_reg_rtx (Pmode);
2204 nsse_reg = gen_reg_rtx (Pmode);
2205 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2206 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2207 gen_rtx_MULT (VOIDmode, nsse_reg,
2208 GEN_INT (4))));
2209 if (next_cum.sse_regno)
2210 emit_move_insn
2211 (nsse_reg,
2212 gen_rtx_CONST (DImode,
2213 gen_rtx_PLUS (DImode,
2214 label_ref,
2215 GEN_INT (next_cum.sse_regno * 4))));
2216 else
2217 emit_move_insn (nsse_reg, label_ref);
2218 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2219
2220 /* Compute address of memory block we save into. We always use pointer
2221 pointing 127 bytes after first byte to store - this is needed to keep
2222 instruction size limited by 4 bytes. */
2223 tmp_reg = gen_reg_rtx (Pmode);
2224 emit_insn (gen_rtx_SET(VOIDmode, tmp_reg,
2225 plus_constant (save_area, 8 * REGPARM_MAX + 127)));
2226 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2227 set_mem_alias_set (mem, set);
ad919812
JH
2228
2229 /* And finally do the dirty job! */
2230 emit_insn (gen_sse_prologue_save (mem, nsse_reg, GEN_INT (next_cum.sse_regno),
2231 label));
2232 }
2233
2234}
2235
2236/* Implement va_start. */
2237
2238void
2239ix86_va_start (stdarg_p, valist, nextarg)
2240 int stdarg_p;
2241 tree valist;
2242 rtx nextarg;
2243{
2244 HOST_WIDE_INT words, n_gpr, n_fpr;
2245 tree f_gpr, f_fpr, f_ovf, f_sav;
2246 tree gpr, fpr, ovf, sav, t;
2247
2248 /* Only 64bit target needs something special. */
2249 if (!TARGET_64BIT)
2250 {
2251 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2252 return;
2253 }
2254
2255 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2256 f_fpr = TREE_CHAIN (f_gpr);
2257 f_ovf = TREE_CHAIN (f_fpr);
2258 f_sav = TREE_CHAIN (f_ovf);
2259
2260 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2261 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2262 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2263 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2264 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2265
2266 /* Count number of gp and fp argument registers used. */
2267 words = current_function_args_info.words;
2268 n_gpr = current_function_args_info.regno;
2269 n_fpr = current_function_args_info.sse_regno;
2270
2271 if (TARGET_DEBUG_ARG)
2272 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
14f73b5a 2273 (int)words, (int)n_gpr, (int)n_fpr);
ad919812
JH
2274
2275 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2276 build_int_2 (n_gpr * 8, 0));
2277 TREE_SIDE_EFFECTS (t) = 1;
2278 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2279
2280 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2281 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2282 TREE_SIDE_EFFECTS (t) = 1;
2283 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2284
2285 /* Find the overflow area. */
2286 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2287 if (words != 0)
2288 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2289 build_int_2 (words * UNITS_PER_WORD, 0));
2290 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2291 TREE_SIDE_EFFECTS (t) = 1;
2292 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2293
2294 /* Find the register save area.
2295 Prologue of the function save it right above stack frame. */
2296 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2297 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2298 TREE_SIDE_EFFECTS (t) = 1;
2299 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2300}
2301
2302/* Implement va_arg. */
2303rtx
2304ix86_va_arg (valist, type)
2305 tree valist, type;
2306{
2307 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2308 tree f_gpr, f_fpr, f_ovf, f_sav;
2309 tree gpr, fpr, ovf, sav, t;
2310 int indirect_p = 0, size, rsize;
2311 rtx lab_false, lab_over = NULL_RTX;
2312 rtx addr_rtx, r;
2313 rtx container;
2314
2315 /* Only 64bit target needs something special. */
2316 if (!TARGET_64BIT)
2317 {
2318 return std_expand_builtin_va_arg (valist, type);
2319 }
2320
2321 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2322 f_fpr = TREE_CHAIN (f_gpr);
2323 f_ovf = TREE_CHAIN (f_fpr);
2324 f_sav = TREE_CHAIN (f_ovf);
2325
2326 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2327 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2328 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2329 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2330 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2331
2332 size = int_size_in_bytes (type);
2333 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2334
2335 container = construct_container (TYPE_MODE (type), type, 0,
2336 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2337 /*
2338 * Pull the value out of the saved registers ...
2339 */
2340
2341 addr_rtx = gen_reg_rtx (Pmode);
2342
2343 if (container)
2344 {
2345 rtx int_addr_rtx, sse_addr_rtx;
2346 int needed_intregs, needed_sseregs;
2347 int need_temp;
2348
2349 lab_over = gen_label_rtx ();
2350 lab_false = gen_label_rtx ();
8bad7136 2351
ad919812
JH
2352 examine_argument (TYPE_MODE (type), type, 0,
2353 &needed_intregs, &needed_sseregs);
2354
2355
2356 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2357 || TYPE_ALIGN (type) > 128);
2358
2359 /* In case we are passing structure, verify that it is consetuctive block
2360 on the register save area. If not we need to do moves. */
2361 if (!need_temp && !REG_P (container))
2362 {
2363 /* Verify that all registers are strictly consetuctive */
2364 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2365 {
2366 int i;
2367
2368 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2369 {
2370 rtx slot = XVECEXP (container, 0, i);
2371 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2372 || INTVAL (XEXP (slot, 1)) != i * 16)
2373 need_temp = 1;
2374 }
2375 }
2376 else
2377 {
2378 int i;
2379
2380 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2381 {
2382 rtx slot = XVECEXP (container, 0, i);
2383 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2384 || INTVAL (XEXP (slot, 1)) != i * 8)
2385 need_temp = 1;
2386 }
2387 }
2388 }
2389 if (!need_temp)
2390 {
2391 int_addr_rtx = addr_rtx;
2392 sse_addr_rtx = addr_rtx;
2393 }
2394 else
2395 {
2396 int_addr_rtx = gen_reg_rtx (Pmode);
2397 sse_addr_rtx = gen_reg_rtx (Pmode);
2398 }
2399 /* First ensure that we fit completely in registers. */
2400 if (needed_intregs)
2401 {
2402 emit_cmp_and_jump_insns (expand_expr
2403 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2404 GEN_INT ((REGPARM_MAX - needed_intregs +
2405 1) * 8), GE, const1_rtx, SImode,
2406 1, 1, lab_false);
2407 }
2408 if (needed_sseregs)
2409 {
2410 emit_cmp_and_jump_insns (expand_expr
2411 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2412 GEN_INT ((SSE_REGPARM_MAX -
2413 needed_sseregs + 1) * 16 +
2414 REGPARM_MAX * 8), GE, const1_rtx,
2415 SImode, 1, 1, lab_false);
2416 }
2417
2418 /* Compute index to start of area used for integer regs. */
2419 if (needed_intregs)
2420 {
2421 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2422 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2423 if (r != int_addr_rtx)
2424 emit_move_insn (int_addr_rtx, r);
2425 }
2426 if (needed_sseregs)
2427 {
2428 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2429 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2430 if (r != sse_addr_rtx)
2431 emit_move_insn (sse_addr_rtx, r);
2432 }
2433 if (need_temp)
2434 {
2435 int i;
2436 rtx mem;
2437
2438 mem = assign_temp (type, 0, 1, 0);
0692acba 2439 set_mem_alias_set (mem, get_varargs_alias_set ());
ad919812
JH
2440 addr_rtx = XEXP (mem, 0);
2441 for (i = 0; i < XVECLEN (container, 0); i++)
2442 {
2443 rtx slot = XVECEXP (container, 0, i);
2444 rtx reg = XEXP (slot, 0);
2445 enum machine_mode mode = GET_MODE (reg);
2446 rtx src_addr;
2447 rtx src_mem;
2448 int src_offset;
2449 rtx dest_mem;
2450
2451 if (SSE_REGNO_P (REGNO (reg)))
2452 {
2453 src_addr = sse_addr_rtx;
2454 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2455 }
2456 else
2457 {
2458 src_addr = int_addr_rtx;
2459 src_offset = REGNO (reg) * 8;
2460 }
2461 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2462 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2463 src_mem = adjust_address (src_mem, mode, src_offset);
2464 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2465 PUT_MODE (dest_mem, mode);
2466 /* ??? Break out TImode moves from integer registers? */
2467 emit_move_insn (dest_mem, src_mem);
2468 }
2469 }
2470
2471 if (needed_intregs)
2472 {
2473 t =
2474 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2475 build_int_2 (needed_intregs * 8, 0));
2476 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2477 TREE_SIDE_EFFECTS (t) = 1;
2478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2479 }
2480 if (needed_sseregs)
2481 {
2482 t =
2483 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2484 build_int_2 (needed_sseregs * 16, 0));
2485 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2486 TREE_SIDE_EFFECTS (t) = 1;
2487 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2488 }
2489
2490 emit_jump_insn (gen_jump (lab_over));
2491 emit_barrier ();
2492 emit_label (lab_false);
2493 }
2494
2495 /* ... otherwise out of the overflow area. */
2496
2497 /* Care for on-stack alignment if needed. */
2498 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2499 t = ovf;
2500 else
2501 {
2502 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2503 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2504 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2505 }
2506 t = save_expr (t);
2507
2508 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2509 if (r != addr_rtx)
2510 emit_move_insn (addr_rtx, r);
2511
2512 t =
2513 build (PLUS_EXPR, TREE_TYPE (t), t,
2514 build_int_2 (rsize * UNITS_PER_WORD, 0));
2515 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2516 TREE_SIDE_EFFECTS (t) = 1;
2517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2518
2519 if (container)
2520 emit_label (lab_over);
2521
2522 if (indirect_p)
2523 {
2524 abort ();
2525 r = gen_rtx_MEM (Pmode, addr_rtx);
0692acba 2526 set_mem_alias_set (r, get_varargs_alias_set ());
ad919812
JH
2527 emit_move_insn (addr_rtx, r);
2528 }
2529
2530 return addr_rtx;
2531}
2532\f
7dd4b4a3
JH
2533/* Return nonzero if OP is general operand representable on x86_64. */
2534
2535int
2536x86_64_general_operand (op, mode)
2537 rtx op;
2538 enum machine_mode mode;
2539{
2540 if (!TARGET_64BIT)
2541 return general_operand (op, mode);
2542 if (nonimmediate_operand (op, mode))
2543 return 1;
2544 return x86_64_sign_extended_value (op);
2545}
2546
2547/* Return nonzero if OP is general operand representable on x86_64
2548 as eighter sign extended or zero extended constant. */
2549
2550int
2551x86_64_szext_general_operand (op, mode)
2552 rtx op;
2553 enum machine_mode mode;
2554{
2555 if (!TARGET_64BIT)
2556 return general_operand (op, mode);
2557 if (nonimmediate_operand (op, mode))
2558 return 1;
2559 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2560}
2561
2562/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2563
2564int
2565x86_64_nonmemory_operand (op, mode)
2566 rtx op;
2567 enum machine_mode mode;
2568{
2569 if (!TARGET_64BIT)
2570 return nonmemory_operand (op, mode);
2571 if (register_operand (op, mode))
2572 return 1;
2573 return x86_64_sign_extended_value (op);
2574}
2575
2576/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2577
2578int
2579x86_64_movabs_operand (op, mode)
2580 rtx op;
2581 enum machine_mode mode;
2582{
2583 if (!TARGET_64BIT || !flag_pic)
2584 return nonmemory_operand (op, mode);
2585 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2586 return 1;
2587 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2588 return 1;
2589 return 0;
2590}
2591
2592/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2593
2594int
2595x86_64_szext_nonmemory_operand (op, mode)
2596 rtx op;
2597 enum machine_mode mode;
2598{
2599 if (!TARGET_64BIT)
2600 return nonmemory_operand (op, mode);
2601 if (register_operand (op, mode))
2602 return 1;
2603 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2604}
2605
2606/* Return nonzero if OP is immediate operand representable on x86_64. */
2607
2608int
2609x86_64_immediate_operand (op, mode)
2610 rtx op;
2611 enum machine_mode mode;
2612{
2613 if (!TARGET_64BIT)
2614 return immediate_operand (op, mode);
2615 return x86_64_sign_extended_value (op);
2616}
2617
2618/* Return nonzero if OP is immediate operand representable on x86_64. */
2619
2620int
2621x86_64_zext_immediate_operand (op, mode)
2622 rtx op;
2623 enum machine_mode mode ATTRIBUTE_UNUSED;
2624{
2625 return x86_64_zero_extended_value (op);
2626}
2627
8bad7136
JL
2628/* Return nonzero if OP is (const_int 1), else return zero. */
2629
2630int
2631const_int_1_operand (op, mode)
2632 rtx op;
2633 enum machine_mode mode ATTRIBUTE_UNUSED;
2634{
2635 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2636}
2637
e075ae69
RH
2638/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2639 reference and a constant. */
b08de47e
MM
2640
2641int
e075ae69
RH
2642symbolic_operand (op, mode)
2643 register rtx op;
2644 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2645{
e075ae69 2646 switch (GET_CODE (op))
2a2ab3f9 2647 {
e075ae69
RH
2648 case SYMBOL_REF:
2649 case LABEL_REF:
2650 return 1;
2651
2652 case CONST:
2653 op = XEXP (op, 0);
2654 if (GET_CODE (op) == SYMBOL_REF
2655 || GET_CODE (op) == LABEL_REF
2656 || (GET_CODE (op) == UNSPEC
6eb791fc
JH
2657 && (XINT (op, 1) == 6
2658 || XINT (op, 1) == 7
2659 || XINT (op, 1) == 15)))
e075ae69
RH
2660 return 1;
2661 if (GET_CODE (op) != PLUS
2662 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2663 return 0;
2664
2665 op = XEXP (op, 0);
2666 if (GET_CODE (op) == SYMBOL_REF
2667 || GET_CODE (op) == LABEL_REF)
2668 return 1;
2669 /* Only @GOTOFF gets offsets. */
2670 if (GET_CODE (op) != UNSPEC
2671 || XINT (op, 1) != 7)
2672 return 0;
2673
2674 op = XVECEXP (op, 0, 0);
2675 if (GET_CODE (op) == SYMBOL_REF
2676 || GET_CODE (op) == LABEL_REF)
2677 return 1;
2678 return 0;
2679
2680 default:
2681 return 0;
2a2ab3f9
JVA
2682 }
2683}
2a2ab3f9 2684
e075ae69 2685/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2686
e075ae69
RH
2687int
2688pic_symbolic_operand (op, mode)
2689 register rtx op;
2690 enum machine_mode mode ATTRIBUTE_UNUSED;
2691{
6eb791fc
JH
2692 if (GET_CODE (op) != CONST)
2693 return 0;
2694 op = XEXP (op, 0);
2695 if (TARGET_64BIT)
2696 {
2697 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2698 return 1;
2699 }
2700 else
2a2ab3f9 2701 {
e075ae69
RH
2702 if (GET_CODE (op) == UNSPEC)
2703 return 1;
2704 if (GET_CODE (op) != PLUS
2705 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2706 return 0;
2707 op = XEXP (op, 0);
2708 if (GET_CODE (op) == UNSPEC)
2709 return 1;
2a2ab3f9 2710 }
e075ae69 2711 return 0;
2a2ab3f9 2712}
2a2ab3f9 2713
623fe810
RH
2714/* Return true if OP is a symbolic operand that resolves locally. */
2715
2716static int
2717local_symbolic_operand (op, mode)
2718 rtx op;
2719 enum machine_mode mode ATTRIBUTE_UNUSED;
2720{
2721 if (GET_CODE (op) == LABEL_REF)
2722 return 1;
2723
2724 if (GET_CODE (op) == CONST
2725 && GET_CODE (XEXP (op, 0)) == PLUS
2726 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2727 op = XEXP (XEXP (op, 0), 0);
2728
2729 if (GET_CODE (op) != SYMBOL_REF)
2730 return 0;
2731
2732 /* These we've been told are local by varasm and encode_section_info
2733 respectively. */
2734 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2735 return 1;
2736
2737 /* There is, however, a not insubstantial body of code in the rest of
2738 the compiler that assumes it can just stick the results of
2739 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2740 /* ??? This is a hack. Should update the body of the compiler to
2741 always create a DECL an invoke ENCODE_SECTION_INFO. */
2742 if (strncmp (XSTR (op, 0), internal_label_prefix,
2743 internal_label_prefix_len) == 0)
2744 return 1;
2745
2746 return 0;
2747}
2748
28d52ffb
RH
2749/* Test for a valid operand for a call instruction. Don't allow the
2750 arg pointer register or virtual regs since they may decay into
2751 reg + const, which the patterns can't handle. */
2a2ab3f9 2752
e075ae69
RH
2753int
2754call_insn_operand (op, mode)
2755 rtx op;
2756 enum machine_mode mode ATTRIBUTE_UNUSED;
2757{
e075ae69
RH
2758 /* Disallow indirect through a virtual register. This leads to
2759 compiler aborts when trying to eliminate them. */
2760 if (GET_CODE (op) == REG
2761 && (op == arg_pointer_rtx
564d80f4 2762 || op == frame_pointer_rtx
e075ae69
RH
2763 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2764 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2765 return 0;
2a2ab3f9 2766
28d52ffb
RH
2767 /* Disallow `call 1234'. Due to varying assembler lameness this
2768 gets either rejected or translated to `call .+1234'. */
2769 if (GET_CODE (op) == CONST_INT)
2770 return 0;
2771
cbbf65e0
RH
2772 /* Explicitly allow SYMBOL_REF even if pic. */
2773 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 2774 return 1;
2a2ab3f9 2775
cbbf65e0
RH
2776 /* Half-pic doesn't allow anything but registers and constants.
2777 We've just taken care of the later. */
2778 if (HALF_PIC_P ())
2779 return register_operand (op, Pmode);
2780
2781 /* Otherwise we can allow any general_operand in the address. */
2782 return general_operand (op, Pmode);
e075ae69 2783}
79325812 2784
e075ae69
RH
2785int
2786constant_call_address_operand (op, mode)
2787 rtx op;
2788 enum machine_mode mode ATTRIBUTE_UNUSED;
2789{
eaf19aba
JJ
2790 if (GET_CODE (op) == CONST
2791 && GET_CODE (XEXP (op, 0)) == PLUS
2792 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2793 op = XEXP (XEXP (op, 0), 0);
e1ff012c 2794 return GET_CODE (op) == SYMBOL_REF;
e075ae69 2795}
2a2ab3f9 2796
e075ae69 2797/* Match exactly zero and one. */
e9a25f70 2798
0f290768 2799int
e075ae69
RH
2800const0_operand (op, mode)
2801 register rtx op;
2802 enum machine_mode mode;
2803{
2804 return op == CONST0_RTX (mode);
2805}
e9a25f70 2806
0f290768 2807int
e075ae69
RH
2808const1_operand (op, mode)
2809 register rtx op;
2810 enum machine_mode mode ATTRIBUTE_UNUSED;
2811{
2812 return op == const1_rtx;
2813}
2a2ab3f9 2814
e075ae69 2815/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 2816
e075ae69
RH
2817int
2818const248_operand (op, mode)
2819 register rtx op;
2820 enum machine_mode mode ATTRIBUTE_UNUSED;
2821{
2822 return (GET_CODE (op) == CONST_INT
2823 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2824}
e9a25f70 2825
e075ae69 2826/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 2827
e075ae69
RH
2828int
2829incdec_operand (op, mode)
2830 register rtx op;
0631e0bf 2831 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 2832{
b4e89e2d
JH
2833 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
2834 registers, since carry flag is not set. */
2835 if (TARGET_PENTIUM4 && !optimize_size)
2836 return 0;
2b1c08f5 2837 return op == const1_rtx || op == constm1_rtx;
e075ae69 2838}
2a2ab3f9 2839
371bc54b
JH
2840/* Return nonzero if OP is acceptable as operand of DImode shift
2841 expander. */
2842
2843int
2844shiftdi_operand (op, mode)
2845 rtx op;
2846 enum machine_mode mode ATTRIBUTE_UNUSED;
2847{
2848 if (TARGET_64BIT)
2849 return nonimmediate_operand (op, mode);
2850 else
2851 return register_operand (op, mode);
2852}
2853
0f290768 2854/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
2855 register eliminable to the stack pointer. Otherwise, this is
2856 a register operand.
2a2ab3f9 2857
e075ae69
RH
2858 This is used to prevent esp from being used as an index reg.
2859 Which would only happen in pathological cases. */
5f1ec3e6 2860
e075ae69
RH
2861int
2862reg_no_sp_operand (op, mode)
2863 register rtx op;
2864 enum machine_mode mode;
2865{
2866 rtx t = op;
2867 if (GET_CODE (t) == SUBREG)
2868 t = SUBREG_REG (t);
564d80f4 2869 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 2870 return 0;
2a2ab3f9 2871
e075ae69 2872 return register_operand (op, mode);
2a2ab3f9 2873}
b840bfb0 2874
915119a5
BS
2875int
2876mmx_reg_operand (op, mode)
2877 register rtx op;
bd793c65 2878 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
2879{
2880 return MMX_REG_P (op);
2881}
2882
2c5a510c
RH
2883/* Return false if this is any eliminable register. Otherwise
2884 general_operand. */
2885
2886int
2887general_no_elim_operand (op, mode)
2888 register rtx op;
2889 enum machine_mode mode;
2890{
2891 rtx t = op;
2892 if (GET_CODE (t) == SUBREG)
2893 t = SUBREG_REG (t);
2894 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2895 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2896 || t == virtual_stack_dynamic_rtx)
2897 return 0;
1020a5ab
RH
2898 if (REG_P (t)
2899 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
2900 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
2901 return 0;
2c5a510c
RH
2902
2903 return general_operand (op, mode);
2904}
2905
2906/* Return false if this is any eliminable register. Otherwise
2907 register_operand or const_int. */
2908
2909int
2910nonmemory_no_elim_operand (op, mode)
2911 register rtx op;
2912 enum machine_mode mode;
2913{
2914 rtx t = op;
2915 if (GET_CODE (t) == SUBREG)
2916 t = SUBREG_REG (t);
2917 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2918 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2919 || t == virtual_stack_dynamic_rtx)
2920 return 0;
2921
2922 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
2923}
2924
e075ae69 2925/* Return true if op is a Q_REGS class register. */
b840bfb0 2926
e075ae69
RH
2927int
2928q_regs_operand (op, mode)
2929 register rtx op;
2930 enum machine_mode mode;
b840bfb0 2931{
e075ae69
RH
2932 if (mode != VOIDmode && GET_MODE (op) != mode)
2933 return 0;
2934 if (GET_CODE (op) == SUBREG)
2935 op = SUBREG_REG (op);
2936 return QI_REG_P (op);
0f290768 2937}
b840bfb0 2938
e075ae69 2939/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 2940
e075ae69
RH
2941int
2942non_q_regs_operand (op, mode)
2943 register rtx op;
2944 enum machine_mode mode;
2945{
2946 if (mode != VOIDmode && GET_MODE (op) != mode)
2947 return 0;
2948 if (GET_CODE (op) == SUBREG)
2949 op = SUBREG_REG (op);
2950 return NON_QI_REG_P (op);
0f290768 2951}
b840bfb0 2952
915119a5
BS
2953/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2954 insns. */
2955int
2956sse_comparison_operator (op, mode)
2957 rtx op;
2958 enum machine_mode mode ATTRIBUTE_UNUSED;
2959{
2960 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
2961 switch (code)
2962 {
2963 /* Operations supported directly. */
2964 case EQ:
2965 case LT:
2966 case LE:
2967 case UNORDERED:
2968 case NE:
2969 case UNGE:
2970 case UNGT:
2971 case ORDERED:
2972 return 1;
2973 /* These are equivalent to ones above in non-IEEE comparisons. */
2974 case UNEQ:
2975 case UNLT:
2976 case UNLE:
2977 case LTGT:
2978 case GE:
2979 case GT:
2980 return !TARGET_IEEE_FP;
2981 default:
2982 return 0;
2983 }
915119a5 2984}
9076b9c1 2985/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 2986int
9076b9c1
JH
2987ix86_comparison_operator (op, mode)
2988 register rtx op;
2989 enum machine_mode mode;
e075ae69 2990{
9076b9c1 2991 enum machine_mode inmode;
9a915772 2992 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
2993 if (mode != VOIDmode && GET_MODE (op) != mode)
2994 return 0;
9a915772
JH
2995 if (GET_RTX_CLASS (code) != '<')
2996 return 0;
2997 inmode = GET_MODE (XEXP (op, 0));
2998
2999 if (inmode == CCFPmode || inmode == CCFPUmode)
3000 {
3001 enum rtx_code second_code, bypass_code;
3002 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3003 return (bypass_code == NIL && second_code == NIL);
3004 }
3005 switch (code)
3a3677ff
RH
3006 {
3007 case EQ: case NE:
3a3677ff 3008 return 1;
9076b9c1 3009 case LT: case GE:
7e08e190 3010 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3011 || inmode == CCGOCmode || inmode == CCNOmode)
3012 return 1;
3013 return 0;
7e08e190 3014 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3015 if (inmode == CCmode)
9076b9c1
JH
3016 return 1;
3017 return 0;
3018 case GT: case LE:
7e08e190 3019 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3020 return 1;
3021 return 0;
3a3677ff
RH
3022 default:
3023 return 0;
3024 }
3025}
3026
9076b9c1 3027/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3028
9076b9c1
JH
3029int
3030fcmov_comparison_operator (op, mode)
3a3677ff
RH
3031 register rtx op;
3032 enum machine_mode mode;
3033{
b62d22a2 3034 enum machine_mode inmode;
9a915772 3035 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3036 if (mode != VOIDmode && GET_MODE (op) != mode)
3037 return 0;
9a915772
JH
3038 if (GET_RTX_CLASS (code) != '<')
3039 return 0;
3040 inmode = GET_MODE (XEXP (op, 0));
3041 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3042 {
9a915772
JH
3043 enum rtx_code second_code, bypass_code;
3044 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3045 if (bypass_code != NIL || second_code != NIL)
3046 return 0;
3047 code = ix86_fp_compare_code_to_integer (code);
3048 }
3049 /* i387 supports just limited amount of conditional codes. */
3050 switch (code)
3051 {
3052 case LTU: case GTU: case LEU: case GEU:
3053 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3054 return 1;
3055 return 0;
9a915772
JH
3056 case ORDERED: case UNORDERED:
3057 case EQ: case NE:
3058 return 1;
3a3677ff
RH
3059 default:
3060 return 0;
3061 }
e075ae69 3062}
b840bfb0 3063
e9e80858
JH
3064/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3065
3066int
3067promotable_binary_operator (op, mode)
3068 register rtx op;
3069 enum machine_mode mode ATTRIBUTE_UNUSED;
3070{
3071 switch (GET_CODE (op))
3072 {
3073 case MULT:
3074 /* Modern CPUs have same latency for HImode and SImode multiply,
3075 but 386 and 486 do HImode multiply faster. */
3076 return ix86_cpu > PROCESSOR_I486;
3077 case PLUS:
3078 case AND:
3079 case IOR:
3080 case XOR:
3081 case ASHIFT:
3082 return 1;
3083 default:
3084 return 0;
3085 }
3086}
3087
e075ae69
RH
3088/* Nearly general operand, but accept any const_double, since we wish
3089 to be able to drop them into memory rather than have them get pulled
3090 into registers. */
b840bfb0 3091
2a2ab3f9 3092int
e075ae69
RH
3093cmp_fp_expander_operand (op, mode)
3094 register rtx op;
3095 enum machine_mode mode;
2a2ab3f9 3096{
e075ae69 3097 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3098 return 0;
e075ae69 3099 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3100 return 1;
e075ae69 3101 return general_operand (op, mode);
2a2ab3f9
JVA
3102}
3103
e075ae69 3104/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3105
3106int
e075ae69 3107ext_register_operand (op, mode)
2a2ab3f9 3108 register rtx op;
bb5177ac 3109 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3110{
3522082b 3111 int regno;
0d7d98ee
JH
3112 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3113 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3114 return 0;
3522082b
JH
3115
3116 if (!register_operand (op, VOIDmode))
3117 return 0;
3118
3119 /* Be curefull to accept only registers having upper parts. */
3120 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3121 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3122}
3123
3124/* Return 1 if this is a valid binary floating-point operation.
0f290768 3125 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3126
3127int
3128binary_fp_operator (op, mode)
3129 register rtx op;
3130 enum machine_mode mode;
3131{
3132 if (mode != VOIDmode && mode != GET_MODE (op))
3133 return 0;
3134
2a2ab3f9
JVA
3135 switch (GET_CODE (op))
3136 {
e075ae69
RH
3137 case PLUS:
3138 case MINUS:
3139 case MULT:
3140 case DIV:
3141 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3142
2a2ab3f9
JVA
3143 default:
3144 return 0;
3145 }
3146}
fee2770d 3147
e075ae69
RH
3148int
3149mult_operator(op, mode)
3150 register rtx op;
3151 enum machine_mode mode ATTRIBUTE_UNUSED;
3152{
3153 return GET_CODE (op) == MULT;
3154}
3155
3156int
3157div_operator(op, mode)
3158 register rtx op;
3159 enum machine_mode mode ATTRIBUTE_UNUSED;
3160{
3161 return GET_CODE (op) == DIV;
3162}
0a726ef1
JL
3163
3164int
e075ae69
RH
3165arith_or_logical_operator (op, mode)
3166 rtx op;
3167 enum machine_mode mode;
0a726ef1 3168{
e075ae69
RH
3169 return ((mode == VOIDmode || GET_MODE (op) == mode)
3170 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3171 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3172}
3173
e075ae69 3174/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3175
3176int
e075ae69
RH
3177memory_displacement_operand (op, mode)
3178 register rtx op;
3179 enum machine_mode mode;
4f2c8ebb 3180{
e075ae69 3181 struct ix86_address parts;
e9a25f70 3182
e075ae69
RH
3183 if (! memory_operand (op, mode))
3184 return 0;
3185
3186 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3187 abort ();
3188
3189 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3190}
3191
16189740 3192/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3193 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3194
3195 ??? It seems likely that this will only work because cmpsi is an
3196 expander, and no actual insns use this. */
4f2c8ebb
RS
3197
3198int
e075ae69
RH
3199cmpsi_operand (op, mode)
3200 rtx op;
3201 enum machine_mode mode;
fee2770d 3202{
b9b2c339 3203 if (nonimmediate_operand (op, mode))
e075ae69
RH
3204 return 1;
3205
3206 if (GET_CODE (op) == AND
3207 && GET_MODE (op) == SImode
3208 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3209 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3210 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3211 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3212 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3213 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3214 return 1;
e9a25f70 3215
fee2770d
RS
3216 return 0;
3217}
d784886d 3218
e075ae69
RH
3219/* Returns 1 if OP is memory operand that can not be represented by the
3220 modRM array. */
d784886d
RK
3221
3222int
e075ae69 3223long_memory_operand (op, mode)
d784886d
RK
3224 register rtx op;
3225 enum machine_mode mode;
3226{
e075ae69 3227 if (! memory_operand (op, mode))
d784886d
RK
3228 return 0;
3229
e075ae69 3230 return memory_address_length (op) != 0;
d784886d 3231}
2247f6ed
JH
3232
3233/* Return nonzero if the rtx is known aligned. */
3234
3235int
3236aligned_operand (op, mode)
3237 rtx op;
3238 enum machine_mode mode;
3239{
3240 struct ix86_address parts;
3241
3242 if (!general_operand (op, mode))
3243 return 0;
3244
0f290768 3245 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3246 if (GET_CODE (op) != MEM)
3247 return 1;
3248
0f290768 3249 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3250 if (MEM_VOLATILE_P (op))
3251 return 0;
3252
3253 op = XEXP (op, 0);
3254
3255 /* Pushes and pops are only valid on the stack pointer. */
3256 if (GET_CODE (op) == PRE_DEC
3257 || GET_CODE (op) == POST_INC)
3258 return 1;
3259
3260 /* Decode the address. */
3261 if (! ix86_decompose_address (op, &parts))
3262 abort ();
3263
3264 /* Look for some component that isn't known to be aligned. */
3265 if (parts.index)
3266 {
3267 if (parts.scale < 4
bdb429a5 3268 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3269 return 0;
3270 }
3271 if (parts.base)
3272 {
bdb429a5 3273 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3274 return 0;
3275 }
3276 if (parts.disp)
3277 {
3278 if (GET_CODE (parts.disp) != CONST_INT
3279 || (INTVAL (parts.disp) & 3) != 0)
3280 return 0;
3281 }
3282
3283 /* Didn't find one -- this must be an aligned address. */
3284 return 1;
3285}
e075ae69
RH
3286\f
3287/* Return true if the constant is something that can be loaded with
3288 a special instruction. Only handle 0.0 and 1.0; others are less
3289 worthwhile. */
57dbca5e
BS
3290
3291int
e075ae69
RH
3292standard_80387_constant_p (x)
3293 rtx x;
57dbca5e 3294{
2b04e52b 3295 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3296 return -1;
2b04e52b
JH
3297 /* Note that on the 80387, other constants, such as pi, that we should support
3298 too. On some machines, these are much slower to load as standard constant,
3299 than to load from doubles in memory. */
3300 if (x == CONST0_RTX (GET_MODE (x)))
3301 return 1;
3302 if (x == CONST1_RTX (GET_MODE (x)))
3303 return 2;
e075ae69 3304 return 0;
57dbca5e
BS
3305}
3306
2b04e52b
JH
3307/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3308 */
3309int
3310standard_sse_constant_p (x)
3311 rtx x;
3312{
3313 if (GET_CODE (x) != CONST_DOUBLE)
3314 return -1;
3315 return (x == CONST0_RTX (GET_MODE (x)));
3316}
3317
2a2ab3f9
JVA
3318/* Returns 1 if OP contains a symbol reference */
3319
3320int
3321symbolic_reference_mentioned_p (op)
3322 rtx op;
3323{
6f7d635c 3324 register const char *fmt;
2a2ab3f9
JVA
3325 register int i;
3326
3327 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3328 return 1;
3329
3330 fmt = GET_RTX_FORMAT (GET_CODE (op));
3331 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3332 {
3333 if (fmt[i] == 'E')
3334 {
3335 register int j;
3336
3337 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3338 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3339 return 1;
3340 }
e9a25f70 3341
2a2ab3f9
JVA
3342 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3343 return 1;
3344 }
3345
3346 return 0;
3347}
e075ae69
RH
3348
3349/* Return 1 if it is appropriate to emit `ret' instructions in the
3350 body of a function. Do this only if the epilogue is simple, needing a
3351 couple of insns. Prior to reloading, we can't tell how many registers
3352 must be saved, so return 0 then. Return 0 if there is no frame
3353 marker to de-allocate.
3354
3355 If NON_SAVING_SETJMP is defined and true, then it is not possible
3356 for the epilogue to be simple, so return 0. This is a special case
3357 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3358 until final, but jump_optimize may need to know sooner if a
3359 `return' is OK. */
32b5b1aa
SC
3360
3361int
e075ae69 3362ix86_can_use_return_insn_p ()
32b5b1aa 3363{
4dd2ac2c 3364 struct ix86_frame frame;
9a7372d6 3365
e075ae69
RH
3366#ifdef NON_SAVING_SETJMP
3367 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3368 return 0;
3369#endif
9a7372d6
RH
3370#ifdef FUNCTION_BLOCK_PROFILER_EXIT
3371 if (profile_block_flag == 2)
3372 return 0;
3373#endif
3374
3375 if (! reload_completed || frame_pointer_needed)
3376 return 0;
32b5b1aa 3377
9a7372d6
RH
3378 /* Don't allow more than 32 pop, since that's all we can do
3379 with one instruction. */
3380 if (current_function_pops_args
3381 && current_function_args_size >= 32768)
e075ae69 3382 return 0;
32b5b1aa 3383
4dd2ac2c
JH
3384 ix86_compute_frame_layout (&frame);
3385 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3386}
6189a572
JH
3387\f
3388/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3389int
3390x86_64_sign_extended_value (value)
3391 rtx value;
3392{
3393 switch (GET_CODE (value))
3394 {
3395 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3396 to be at least 32 and this all acceptable constants are
3397 represented as CONST_INT. */
3398 case CONST_INT:
3399 if (HOST_BITS_PER_WIDE_INT == 32)
3400 return 1;
3401 else
3402 {
3403 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3404 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3405 }
3406 break;
3407
3408 /* For certain code models, the symbolic references are known to fit. */
3409 case SYMBOL_REF:
3410 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3411
3412 /* For certain code models, the code is near as well. */
3413 case LABEL_REF:
3414 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3415
3416 /* We also may accept the offsetted memory references in certain special
3417 cases. */
3418 case CONST:
3419 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3420 && XVECLEN (XEXP (value, 0), 0) == 1
3421 && XINT (XEXP (value, 0), 1) == 15)
3422 return 1;
3423 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3424 {
3425 rtx op1 = XEXP (XEXP (value, 0), 0);
3426 rtx op2 = XEXP (XEXP (value, 0), 1);
3427 HOST_WIDE_INT offset;
3428
3429 if (ix86_cmodel == CM_LARGE)
3430 return 0;
3431 if (GET_CODE (op2) != CONST_INT)
3432 return 0;
3433 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3434 switch (GET_CODE (op1))
3435 {
3436 case SYMBOL_REF:
3437 /* For CM_SMALL assume that latest object is 1MB before
3438 end of 31bits boundary. We may also accept pretty
3439 large negative constants knowing that all objects are
3440 in the positive half of address space. */
3441 if (ix86_cmodel == CM_SMALL
3442 && offset < 1024*1024*1024
3443 && trunc_int_for_mode (offset, SImode) == offset)
3444 return 1;
3445 /* For CM_KERNEL we know that all object resist in the
3446 negative half of 32bits address space. We may not
3447 accept negative offsets, since they may be just off
3448 and we may accept pretty large possitive ones. */
3449 if (ix86_cmodel == CM_KERNEL
3450 && offset > 0
3451 && trunc_int_for_mode (offset, SImode) == offset)
3452 return 1;
3453 break;
3454 case LABEL_REF:
3455 /* These conditions are similar to SYMBOL_REF ones, just the
3456 constraints for code models differ. */
3457 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3458 && offset < 1024*1024*1024
3459 && trunc_int_for_mode (offset, SImode) == offset)
3460 return 1;
3461 if (ix86_cmodel == CM_KERNEL
3462 && offset > 0
3463 && trunc_int_for_mode (offset, SImode) == offset)
3464 return 1;
3465 break;
3466 default:
3467 return 0;
3468 }
3469 }
3470 return 0;
3471 default:
3472 return 0;
3473 }
3474}
3475
3476/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3477int
3478x86_64_zero_extended_value (value)
3479 rtx value;
3480{
3481 switch (GET_CODE (value))
3482 {
3483 case CONST_DOUBLE:
3484 if (HOST_BITS_PER_WIDE_INT == 32)
3485 return (GET_MODE (value) == VOIDmode
3486 && !CONST_DOUBLE_HIGH (value));
3487 else
3488 return 0;
3489 case CONST_INT:
3490 if (HOST_BITS_PER_WIDE_INT == 32)
3491 return INTVAL (value) >= 0;
3492 else
3493 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3494 break;
3495
3496 /* For certain code models, the symbolic references are known to fit. */
3497 case SYMBOL_REF:
3498 return ix86_cmodel == CM_SMALL;
3499
3500 /* For certain code models, the code is near as well. */
3501 case LABEL_REF:
3502 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3503
3504 /* We also may accept the offsetted memory references in certain special
3505 cases. */
3506 case CONST:
3507 if (GET_CODE (XEXP (value, 0)) == PLUS)
3508 {
3509 rtx op1 = XEXP (XEXP (value, 0), 0);
3510 rtx op2 = XEXP (XEXP (value, 0), 1);
3511
3512 if (ix86_cmodel == CM_LARGE)
3513 return 0;
3514 switch (GET_CODE (op1))
3515 {
3516 case SYMBOL_REF:
3517 return 0;
3518 /* For small code model we may accept pretty large possitive
3519 offsets, since one bit is available for free. Negative
3520 offsets are limited by the size of NULL pointer area
3521 specified by the ABI. */
3522 if (ix86_cmodel == CM_SMALL
3523 && GET_CODE (op2) == CONST_INT
3524 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3525 && (trunc_int_for_mode (INTVAL (op2), SImode)
3526 == INTVAL (op2)))
3527 return 1;
3528 /* ??? For the kernel, we may accept adjustment of
3529 -0x10000000, since we know that it will just convert
3530 negative address space to possitive, but perhaps this
3531 is not worthwhile. */
3532 break;
3533 case LABEL_REF:
3534 /* These conditions are similar to SYMBOL_REF ones, just the
3535 constraints for code models differ. */
3536 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3537 && GET_CODE (op2) == CONST_INT
3538 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3539 && (trunc_int_for_mode (INTVAL (op2), SImode)
3540 == INTVAL (op2)))
3541 return 1;
3542 break;
3543 default:
3544 return 0;
3545 }
3546 }
3547 return 0;
3548 default:
3549 return 0;
3550 }
3551}
6fca22eb
RH
3552
3553/* Value should be nonzero if functions must have frame pointers.
3554 Zero means the frame pointer need not be set up (and parms may
3555 be accessed via the stack pointer) in functions that seem suitable. */
3556
3557int
3558ix86_frame_pointer_required ()
3559{
3560 /* If we accessed previous frames, then the generated code expects
3561 to be able to access the saved ebp value in our frame. */
3562 if (cfun->machine->accesses_prev_frame)
3563 return 1;
a4f31c00 3564
6fca22eb
RH
3565 /* Several x86 os'es need a frame pointer for other reasons,
3566 usually pertaining to setjmp. */
3567 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3568 return 1;
3569
3570 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3571 the frame pointer by default. Turn it back on now if we've not
3572 got a leaf function. */
3573 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3574 return 1;
3575
3576 return 0;
3577}
3578
3579/* Record that the current function accesses previous call frames. */
3580
3581void
3582ix86_setup_frame_addresses ()
3583{
3584 cfun->machine->accesses_prev_frame = 1;
3585}
e075ae69 3586\f
4cf12e7e 3587static char pic_label_name[32];
e9a25f70 3588
e075ae69
RH
3589/* This function generates code for -fpic that loads %ebx with
3590 the return address of the caller and then returns. */
3591
3592void
4cf12e7e 3593ix86_asm_file_end (file)
e075ae69 3594 FILE *file;
e075ae69
RH
3595{
3596 rtx xops[2];
32b5b1aa 3597
4cf12e7e
RH
3598 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3599 return;
32b5b1aa 3600
c7f0da1d
RH
3601 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3602 to updating relocations to a section being discarded such that this
3603 doesn't work. Ought to detect this at configure time. */
7c262518 3604#if 0
4cf12e7e
RH
3605 /* The trick here is to create a linkonce section containing the
3606 pic label thunk, but to refer to it with an internal label.
3607 Because the label is internal, we don't have inter-dso name
3608 binding issues on hosts that don't support ".hidden".
e9a25f70 3609
4cf12e7e
RH
3610 In order to use these macros, however, we must create a fake
3611 function decl. */
7c262518
RH
3612 if (targetm.have_named_sections)
3613 {
3614 tree decl = build_decl (FUNCTION_DECL,
3615 get_identifier ("i686.get_pc_thunk"),
3616 error_mark_node);
3617 DECL_ONE_ONLY (decl) = 1;
3618 UNIQUE_SECTION (decl, 0);
715bdd29 3619 named_section (decl, NULL);
7c262518
RH
3620 }
3621 else
4cf12e7e 3622#else
7c262518 3623 text_section ();
4cf12e7e 3624#endif
0afeb08a 3625
4cf12e7e
RH
3626 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3627 internal (non-global) label that's being emitted, it didn't make
3628 sense to have .type information for local labels. This caused
3629 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3630 me debug info for a label that you're declaring non-global?) this
3631 was changed to call ASM_OUTPUT_LABEL() instead. */
3632
3633 ASM_OUTPUT_LABEL (file, pic_label_name);
3634
3635 xops[0] = pic_offset_table_rtx;
3636 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3637 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3638 output_asm_insn ("ret", xops);
32b5b1aa 3639}
32b5b1aa 3640
e075ae69
RH
3641void
3642load_pic_register ()
32b5b1aa 3643{
e075ae69 3644 rtx gotsym, pclab;
32b5b1aa 3645
0d7d98ee
JH
3646 if (TARGET_64BIT)
3647 abort();
3648
a8a05998 3649 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3650
e075ae69 3651 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 3652 {
4cf12e7e
RH
3653 if (! pic_label_name[0])
3654 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 3655 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 3656 }
e075ae69 3657 else
e5cb57e8 3658 {
e075ae69 3659 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 3660 }
e5cb57e8 3661
e075ae69 3662 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 3663
e075ae69
RH
3664 if (! TARGET_DEEP_BRANCH_PREDICTION)
3665 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 3666
e075ae69 3667 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 3668}
8dfe5673 3669
0d7d98ee 3670/* Generate an "push" pattern for input ARG. */
e9a25f70 3671
e075ae69
RH
3672static rtx
3673gen_push (arg)
3674 rtx arg;
e9a25f70 3675{
c5c76735 3676 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3677 gen_rtx_MEM (Pmode,
3678 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3679 stack_pointer_rtx)),
3680 arg);
e9a25f70
JL
3681}
3682
4dd2ac2c
JH
3683/* Return 1 if we need to save REGNO. */
3684static int
1020a5ab
RH
3685ix86_save_reg (regno, maybe_eh_return)
3686 int regno;
37a58036 3687 int maybe_eh_return;
1020a5ab
RH
3688{
3689 if (flag_pic
3690 && ! TARGET_64BIT
3691 && regno == PIC_OFFSET_TABLE_REGNUM
3692 && (current_function_uses_pic_offset_table
3693 || current_function_uses_const_pool
3694 || current_function_calls_eh_return))
3695 return 1;
3696
3697 if (current_function_calls_eh_return && maybe_eh_return)
3698 {
3699 unsigned i;
3700 for (i = 0; ; i++)
3701 {
3702 unsigned test = EH_RETURN_DATA_REGNO(i);
3703 if (test == INVALID_REGNUM)
3704 break;
3705 if (test == (unsigned) regno)
3706 return 1;
3707 }
3708 }
4dd2ac2c 3709
1020a5ab
RH
3710 return (regs_ever_live[regno]
3711 && !call_used_regs[regno]
3712 && !fixed_regs[regno]
3713 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
3714}
3715
0903fcab
JH
3716/* Return number of registers to be saved on the stack. */
3717
3718static int
3719ix86_nsaved_regs ()
3720{
3721 int nregs = 0;
0903fcab
JH
3722 int regno;
3723
4dd2ac2c 3724 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 3725 if (ix86_save_reg (regno, true))
4dd2ac2c 3726 nregs++;
0903fcab
JH
3727 return nregs;
3728}
3729
3730/* Return the offset between two registers, one to be eliminated, and the other
3731 its replacement, at the start of a routine. */
3732
3733HOST_WIDE_INT
3734ix86_initial_elimination_offset (from, to)
3735 int from;
3736 int to;
3737{
4dd2ac2c
JH
3738 struct ix86_frame frame;
3739 ix86_compute_frame_layout (&frame);
564d80f4
JH
3740
3741 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3742 return frame.hard_frame_pointer_offset;
564d80f4
JH
3743 else if (from == FRAME_POINTER_REGNUM
3744 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3745 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3746 else
3747 {
564d80f4
JH
3748 if (to != STACK_POINTER_REGNUM)
3749 abort ();
3750 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 3751 return frame.stack_pointer_offset;
564d80f4
JH
3752 else if (from != FRAME_POINTER_REGNUM)
3753 abort ();
0903fcab 3754 else
4dd2ac2c 3755 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3756 }
3757}
3758
4dd2ac2c 3759/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 3760
4dd2ac2c
JH
3761static void
3762ix86_compute_frame_layout (frame)
3763 struct ix86_frame *frame;
65954bd8 3764{
65954bd8 3765 HOST_WIDE_INT total_size;
564d80f4 3766 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
3767 int offset;
3768 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 3769 HOST_WIDE_INT size = get_frame_size ();
65954bd8 3770
4dd2ac2c 3771 frame->nregs = ix86_nsaved_regs ();
564d80f4 3772 total_size = size;
65954bd8 3773
4dd2ac2c
JH
3774 /* Skip return value and save base pointer. */
3775 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3776
3777 frame->hard_frame_pointer_offset = offset;
564d80f4 3778
fcbfaa65
RK
3779 /* Do some sanity checking of stack_alignment_needed and
3780 preferred_alignment, since i386 port is the only using those features
3781 that may break easilly. */
564d80f4 3782
44affdae
JH
3783 if (size && !stack_alignment_needed)
3784 abort ();
44affdae
JH
3785 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3786 abort ();
3787 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3788 abort ();
3789 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3790 abort ();
564d80f4 3791
4dd2ac2c
JH
3792 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3793 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 3794
4dd2ac2c
JH
3795 /* Register save area */
3796 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 3797
8362f420
JH
3798 /* Va-arg area */
3799 if (ix86_save_varrargs_registers)
3800 {
3801 offset += X86_64_VARARGS_SIZE;
3802 frame->va_arg_size = X86_64_VARARGS_SIZE;
3803 }
3804 else
3805 frame->va_arg_size = 0;
3806
4dd2ac2c
JH
3807 /* Align start of frame for local function. */
3808 frame->padding1 = ((offset + stack_alignment_needed - 1)
3809 & -stack_alignment_needed) - offset;
f73ad30e 3810
4dd2ac2c 3811 offset += frame->padding1;
65954bd8 3812
4dd2ac2c
JH
3813 /* Frame pointer points here. */
3814 frame->frame_pointer_offset = offset;
54ff41b7 3815
4dd2ac2c 3816 offset += size;
65954bd8 3817
4dd2ac2c 3818 /* Add outgoing arguments area. */
f73ad30e 3819 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
3820 {
3821 offset += current_function_outgoing_args_size;
3822 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3823 }
3824 else
3825 frame->outgoing_arguments_size = 0;
564d80f4 3826
4dd2ac2c
JH
3827 /* Align stack boundary. */
3828 frame->padding2 = ((offset + preferred_alignment - 1)
3829 & -preferred_alignment) - offset;
3830
3831 offset += frame->padding2;
3832
3833 /* We've reached end of stack frame. */
3834 frame->stack_pointer_offset = offset;
3835
3836 /* Size prologue needs to allocate. */
3837 frame->to_allocate =
3838 (size + frame->padding1 + frame->padding2
8362f420 3839 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 3840
8362f420
JH
3841 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3842 && current_function_is_leaf)
3843 {
3844 frame->red_zone_size = frame->to_allocate;
3845 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3846 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3847 }
3848 else
3849 frame->red_zone_size = 0;
3850 frame->to_allocate -= frame->red_zone_size;
3851 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
3852#if 0
3853 fprintf (stderr, "nregs: %i\n", frame->nregs);
3854 fprintf (stderr, "size: %i\n", size);
3855 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
3856 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 3857 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
3858 fprintf (stderr, "padding2: %i\n", frame->padding2);
3859 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 3860 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
3861 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
3862 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
3863 frame->hard_frame_pointer_offset);
3864 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
3865#endif
65954bd8
JL
3866}
3867
0903fcab
JH
3868/* Emit code to save registers in the prologue. */
3869
3870static void
3871ix86_emit_save_regs ()
3872{
3873 register int regno;
0903fcab 3874 rtx insn;
0903fcab 3875
4dd2ac2c 3876 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 3877 if (ix86_save_reg (regno, true))
0903fcab 3878 {
0d7d98ee 3879 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
3880 RTX_FRAME_RELATED_P (insn) = 1;
3881 }
3882}
3883
c6036a37
JH
3884/* Emit code to save registers using MOV insns. First register
3885 is restored from POINTER + OFFSET. */
3886static void
3887ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
3888 rtx pointer;
3889 HOST_WIDE_INT offset;
c6036a37
JH
3890{
3891 int regno;
3892 rtx insn;
3893
3894 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3895 if (ix86_save_reg (regno, true))
3896 {
b72f00af
RK
3897 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
3898 Pmode, offset),
c6036a37
JH
3899 gen_rtx_REG (Pmode, regno));
3900 RTX_FRAME_RELATED_P (insn) = 1;
3901 offset += UNITS_PER_WORD;
3902 }
3903}
3904
0f290768 3905/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
3906
3907void
3908ix86_expand_prologue ()
2a2ab3f9 3909{
564d80f4 3910 rtx insn;
0d7d98ee
JH
3911 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
3912 || current_function_uses_const_pool)
3913 && !TARGET_64BIT);
4dd2ac2c 3914 struct ix86_frame frame;
6ab16dd9 3915 int use_mov = 0;
c6036a37 3916 HOST_WIDE_INT allocate;
4dd2ac2c 3917
2ab0437e 3918 if (!optimize_size)
6ab16dd9
JH
3919 {
3920 use_fast_prologue_epilogue
3921 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
3922 if (TARGET_PROLOGUE_USING_MOVE)
3923 use_mov = use_fast_prologue_epilogue;
6ab16dd9 3924 }
4dd2ac2c 3925 ix86_compute_frame_layout (&frame);
79325812 3926
e075ae69
RH
3927 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3928 slower on all targets. Also sdb doesn't like it. */
e9a25f70 3929
2a2ab3f9
JVA
3930 if (frame_pointer_needed)
3931 {
564d80f4 3932 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 3933 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 3934
564d80f4 3935 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 3936 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
3937 }
3938
c6036a37
JH
3939 allocate = frame.to_allocate;
3940 /* In case we are dealing only with single register and empty frame,
3941 push is equivalent of the mov+add sequence. */
3942 if (allocate == 0 && frame.nregs <= 1)
3943 use_mov = 0;
3944
3945 if (!use_mov)
3946 ix86_emit_save_regs ();
3947 else
3948 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 3949
c6036a37 3950 if (allocate == 0)
8dfe5673 3951 ;
e323735c 3952 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 3953 {
f2042df3
RH
3954 insn = emit_insn (gen_pro_epilogue_adjust_stack
3955 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 3956 GEN_INT (-allocate)));
e075ae69 3957 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 3958 }
79325812 3959 else
8dfe5673 3960 {
e075ae69 3961 /* ??? Is this only valid for Win32? */
e9a25f70 3962
e075ae69 3963 rtx arg0, sym;
e9a25f70 3964
8362f420
JH
3965 if (TARGET_64BIT)
3966 abort();
3967
e075ae69 3968 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 3969 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 3970
e075ae69
RH
3971 sym = gen_rtx_MEM (FUNCTION_MODE,
3972 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 3973 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
3974
3975 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
3976 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
3977 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 3978 }
c6036a37
JH
3979 if (use_mov)
3980 {
3981 if (!frame_pointer_needed || !frame.to_allocate)
3982 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
3983 else
3984 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
3985 -frame.nregs * UNITS_PER_WORD);
3986 }
e9a25f70 3987
84530511
SC
3988#ifdef SUBTARGET_PROLOGUE
3989 SUBTARGET_PROLOGUE;
0f290768 3990#endif
84530511 3991
e9a25f70 3992 if (pic_reg_used)
e075ae69 3993 load_pic_register ();
77a989d1 3994
e9a25f70
JL
3995 /* If we are profiling, make sure no instructions are scheduled before
3996 the call to mcount. However, if -fpic, the above call will have
3997 done that. */
e075ae69 3998 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 3999 emit_insn (gen_blockage ());
77a989d1
SC
4000}
4001
da2d1d3a
JH
4002/* Emit code to restore saved registers using MOV insns. First register
4003 is restored from POINTER + OFFSET. */
4004static void
1020a5ab
RH
4005ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4006 rtx pointer;
4007 int offset;
37a58036 4008 int maybe_eh_return;
da2d1d3a
JH
4009{
4010 int regno;
da2d1d3a 4011
4dd2ac2c 4012 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4013 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4014 {
4dd2ac2c 4015 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4016 adjust_address (gen_rtx_MEM (Pmode, pointer),
4017 Pmode, offset));
4dd2ac2c 4018 offset += UNITS_PER_WORD;
da2d1d3a
JH
4019 }
4020}
4021
0f290768 4022/* Restore function stack, frame, and registers. */
e9a25f70 4023
2a2ab3f9 4024void
1020a5ab
RH
4025ix86_expand_epilogue (style)
4026 int style;
2a2ab3f9 4027{
1c71e60e 4028 int regno;
fdb8a883 4029 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4030 struct ix86_frame frame;
65954bd8 4031 HOST_WIDE_INT offset;
4dd2ac2c
JH
4032
4033 ix86_compute_frame_layout (&frame);
2a2ab3f9 4034
a4f31c00 4035 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4036 must be taken for the normal return case of a function using
4037 eh_return: the eax and edx registers are marked as saved, but not
4038 restored along this path. */
4039 offset = frame.nregs;
4040 if (current_function_calls_eh_return && style != 2)
4041 offset -= 2;
4042 offset *= -UNITS_PER_WORD;
2a2ab3f9 4043
1c71e60e
JH
4044#ifdef FUNCTION_BLOCK_PROFILER_EXIT
4045 if (profile_block_flag == 2)
564d80f4 4046 {
1c71e60e 4047 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 4048 }
1c71e60e 4049#endif
564d80f4 4050
fdb8a883
JW
4051 /* If we're only restoring one register and sp is not valid then
4052 using a move instruction to restore the register since it's
0f290768 4053 less work than reloading sp and popping the register.
da2d1d3a
JH
4054
4055 The default code result in stack adjustment using add/lea instruction,
4056 while this code results in LEAVE instruction (or discrete equivalent),
4057 so it is profitable in some other cases as well. Especially when there
4058 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4059 and there is exactly one register to pop. This heruistic may need some
4060 tuning in future. */
4dd2ac2c 4061 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4062 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4063 && use_fast_prologue_epilogue
c6036a37 4064 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4065 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4066 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4067 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4068 || current_function_calls_eh_return)
2a2ab3f9 4069 {
da2d1d3a
JH
4070 /* Restore registers. We can use ebp or esp to address the memory
4071 locations. If both are available, default to ebp, since offsets
4072 are known to be small. Only exception is esp pointing directly to the
4073 end of block of saved registers, where we may simplify addressing
4074 mode. */
4075
4dd2ac2c 4076 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4077 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4078 frame.to_allocate, style == 2);
da2d1d3a 4079 else
1020a5ab
RH
4080 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4081 offset, style == 2);
4082
4083 /* eh_return epilogues need %ecx added to the stack pointer. */
4084 if (style == 2)
4085 {
4086 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4087
1020a5ab
RH
4088 if (frame_pointer_needed)
4089 {
4090 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4091 tmp = plus_constant (tmp, UNITS_PER_WORD);
4092 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4093
4094 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4095 emit_move_insn (hard_frame_pointer_rtx, tmp);
4096
4097 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4098 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4099 }
4100 else
4101 {
4102 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4103 tmp = plus_constant (tmp, (frame.to_allocate
4104 + frame.nregs * UNITS_PER_WORD));
4105 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4106 }
4107 }
4108 else if (!frame_pointer_needed)
f2042df3
RH
4109 emit_insn (gen_pro_epilogue_adjust_stack
4110 (stack_pointer_rtx, stack_pointer_rtx,
4111 GEN_INT (frame.to_allocate
4112 + frame.nregs * UNITS_PER_WORD)));
0f290768 4113 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4114 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4115 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4116 else
2a2ab3f9 4117 {
1c71e60e
JH
4118 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4119 hard_frame_pointer_rtx,
f2042df3 4120 const0_rtx));
8362f420
JH
4121 if (TARGET_64BIT)
4122 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4123 else
4124 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4125 }
4126 }
1c71e60e 4127 else
68f654ec 4128 {
1c71e60e
JH
4129 /* First step is to deallocate the stack frame so that we can
4130 pop the registers. */
4131 if (!sp_valid)
4132 {
4133 if (!frame_pointer_needed)
4134 abort ();
4135 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4136 hard_frame_pointer_rtx,
f2042df3 4137 GEN_INT (offset)));
1c71e60e 4138 }
4dd2ac2c 4139 else if (frame.to_allocate)
f2042df3
RH
4140 emit_insn (gen_pro_epilogue_adjust_stack
4141 (stack_pointer_rtx, stack_pointer_rtx,
4142 GEN_INT (frame.to_allocate)));
1c71e60e 4143
4dd2ac2c 4144 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4145 if (ix86_save_reg (regno, false))
8362f420
JH
4146 {
4147 if (TARGET_64BIT)
4148 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4149 else
4150 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4151 }
4dd2ac2c 4152 if (frame_pointer_needed)
8362f420 4153 {
2ab0437e
JH
4154 /* Leave results in shorter depdendancy chains on CPUs that are
4155 able to grok it fast. */
4156 if (TARGET_USE_LEAVE)
4157 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4158 else if (TARGET_64BIT)
8362f420
JH
4159 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4160 else
4161 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4162 }
68f654ec 4163 }
68f654ec 4164
cbbf65e0 4165 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4166 if (style == 0)
cbbf65e0
RH
4167 return;
4168
2a2ab3f9
JVA
4169 if (current_function_pops_args && current_function_args_size)
4170 {
e075ae69 4171 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4172
b8c752c8
UD
4173 /* i386 can only pop 64K bytes. If asked to pop more, pop
4174 return address, do explicit add, and jump indirectly to the
0f290768 4175 caller. */
2a2ab3f9 4176
b8c752c8 4177 if (current_function_pops_args >= 65536)
2a2ab3f9 4178 {
e075ae69 4179 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4180
8362f420
JH
4181 /* There are is no "pascal" calling convention in 64bit ABI. */
4182 if (TARGET_64BIT)
4183 abort();
4184
e075ae69
RH
4185 emit_insn (gen_popsi1 (ecx));
4186 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4187 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4188 }
79325812 4189 else
e075ae69
RH
4190 emit_jump_insn (gen_return_pop_internal (popc));
4191 }
4192 else
4193 emit_jump_insn (gen_return_internal ());
4194}
4195\f
4196/* Extract the parts of an RTL expression that is a valid memory address
4197 for an instruction. Return false if the structure of the address is
4198 grossly off. */
4199
4200static int
4201ix86_decompose_address (addr, out)
4202 register rtx addr;
4203 struct ix86_address *out;
4204{
4205 rtx base = NULL_RTX;
4206 rtx index = NULL_RTX;
4207 rtx disp = NULL_RTX;
4208 HOST_WIDE_INT scale = 1;
4209 rtx scale_rtx = NULL_RTX;
4210
4211 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4212 base = addr;
4213 else if (GET_CODE (addr) == PLUS)
4214 {
4215 rtx op0 = XEXP (addr, 0);
4216 rtx op1 = XEXP (addr, 1);
4217 enum rtx_code code0 = GET_CODE (op0);
4218 enum rtx_code code1 = GET_CODE (op1);
4219
4220 if (code0 == REG || code0 == SUBREG)
4221 {
4222 if (code1 == REG || code1 == SUBREG)
4223 index = op0, base = op1; /* index + base */
4224 else
4225 base = op0, disp = op1; /* base + displacement */
4226 }
4227 else if (code0 == MULT)
e9a25f70 4228 {
e075ae69
RH
4229 index = XEXP (op0, 0);
4230 scale_rtx = XEXP (op0, 1);
4231 if (code1 == REG || code1 == SUBREG)
4232 base = op1; /* index*scale + base */
e9a25f70 4233 else
e075ae69
RH
4234 disp = op1; /* index*scale + disp */
4235 }
4236 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4237 {
4238 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4239 scale_rtx = XEXP (XEXP (op0, 0), 1);
4240 base = XEXP (op0, 1);
4241 disp = op1;
2a2ab3f9 4242 }
e075ae69
RH
4243 else if (code0 == PLUS)
4244 {
4245 index = XEXP (op0, 0); /* index + base + disp */
4246 base = XEXP (op0, 1);
4247 disp = op1;
4248 }
4249 else
4250 return FALSE;
4251 }
4252 else if (GET_CODE (addr) == MULT)
4253 {
4254 index = XEXP (addr, 0); /* index*scale */
4255 scale_rtx = XEXP (addr, 1);
4256 }
4257 else if (GET_CODE (addr) == ASHIFT)
4258 {
4259 rtx tmp;
4260
4261 /* We're called for lea too, which implements ashift on occasion. */
4262 index = XEXP (addr, 0);
4263 tmp = XEXP (addr, 1);
4264 if (GET_CODE (tmp) != CONST_INT)
4265 return FALSE;
4266 scale = INTVAL (tmp);
4267 if ((unsigned HOST_WIDE_INT) scale > 3)
4268 return FALSE;
4269 scale = 1 << scale;
2a2ab3f9 4270 }
2a2ab3f9 4271 else
e075ae69
RH
4272 disp = addr; /* displacement */
4273
4274 /* Extract the integral value of scale. */
4275 if (scale_rtx)
e9a25f70 4276 {
e075ae69
RH
4277 if (GET_CODE (scale_rtx) != CONST_INT)
4278 return FALSE;
4279 scale = INTVAL (scale_rtx);
e9a25f70 4280 }
3b3c6a3f 4281
e075ae69
RH
4282 /* Allow arg pointer and stack pointer as index if there is not scaling */
4283 if (base && index && scale == 1
564d80f4
JH
4284 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4285 || index == stack_pointer_rtx))
e075ae69
RH
4286 {
4287 rtx tmp = base;
4288 base = index;
4289 index = tmp;
4290 }
4291
4292 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4293 if ((base == hard_frame_pointer_rtx
4294 || base == frame_pointer_rtx
4295 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4296 disp = const0_rtx;
4297
4298 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4299 Avoid this by transforming to [%esi+0]. */
4300 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4301 && base && !index && !disp
329e1d01 4302 && REG_P (base)
e075ae69
RH
4303 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4304 disp = const0_rtx;
4305
4306 /* Special case: encode reg+reg instead of reg*2. */
4307 if (!base && index && scale && scale == 2)
4308 base = index, scale = 1;
0f290768 4309
e075ae69
RH
4310 /* Special case: scaling cannot be encoded without base or displacement. */
4311 if (!base && !disp && index && scale != 1)
4312 disp = const0_rtx;
4313
4314 out->base = base;
4315 out->index = index;
4316 out->disp = disp;
4317 out->scale = scale;
3b3c6a3f 4318
e075ae69
RH
4319 return TRUE;
4320}
01329426
JH
4321\f
4322/* Return cost of the memory address x.
4323 For i386, it is better to use a complex address than let gcc copy
4324 the address into a reg and make a new pseudo. But not if the address
4325 requires to two regs - that would mean more pseudos with longer
4326 lifetimes. */
4327int
4328ix86_address_cost (x)
4329 rtx x;
4330{
4331 struct ix86_address parts;
4332 int cost = 1;
3b3c6a3f 4333
01329426
JH
4334 if (!ix86_decompose_address (x, &parts))
4335 abort ();
4336
4337 /* More complex memory references are better. */
4338 if (parts.disp && parts.disp != const0_rtx)
4339 cost--;
4340
4341 /* Attempt to minimize number of registers in the address. */
4342 if ((parts.base
4343 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4344 || (parts.index
4345 && (!REG_P (parts.index)
4346 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4347 cost++;
4348
4349 if (parts.base
4350 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4351 && parts.index
4352 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4353 && parts.base != parts.index)
4354 cost++;
4355
4356 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4357 since it's predecode logic can't detect the length of instructions
4358 and it degenerates to vector decoded. Increase cost of such
4359 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4360 to split such addresses or even refuse such addresses at all.
01329426
JH
4361
4362 Following addressing modes are affected:
4363 [base+scale*index]
4364 [scale*index+disp]
4365 [base+index]
0f290768 4366
01329426
JH
4367 The first and last case may be avoidable by explicitly coding the zero in
4368 memory address, but I don't have AMD-K6 machine handy to check this
4369 theory. */
4370
4371 if (TARGET_K6
4372 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4373 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4374 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4375 cost += 10;
0f290768 4376
01329426
JH
4377 return cost;
4378}
4379\f
b949ea8b
JW
4380/* If X is a machine specific address (i.e. a symbol or label being
4381 referenced as a displacement from the GOT implemented using an
4382 UNSPEC), then return the base term. Otherwise return X. */
4383
4384rtx
4385ix86_find_base_term (x)
4386 rtx x;
4387{
4388 rtx term;
4389
6eb791fc
JH
4390 if (TARGET_64BIT)
4391 {
4392 if (GET_CODE (x) != CONST)
4393 return x;
4394 term = XEXP (x, 0);
4395 if (GET_CODE (term) == PLUS
4396 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4397 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4398 term = XEXP (term, 0);
4399 if (GET_CODE (term) != UNSPEC
4400 || XVECLEN (term, 0) != 1
4401 || XINT (term, 1) != 15)
4402 return x;
4403
4404 term = XVECEXP (term, 0, 0);
4405
4406 if (GET_CODE (term) != SYMBOL_REF
4407 && GET_CODE (term) != LABEL_REF)
4408 return x;
4409
4410 return term;
4411 }
4412
b949ea8b
JW
4413 if (GET_CODE (x) != PLUS
4414 || XEXP (x, 0) != pic_offset_table_rtx
4415 || GET_CODE (XEXP (x, 1)) != CONST)
4416 return x;
4417
4418 term = XEXP (XEXP (x, 1), 0);
4419
4420 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4421 term = XEXP (term, 0);
4422
4423 if (GET_CODE (term) != UNSPEC
4424 || XVECLEN (term, 0) != 1
4425 || XINT (term, 1) != 7)
4426 return x;
4427
4428 term = XVECEXP (term, 0, 0);
4429
4430 if (GET_CODE (term) != SYMBOL_REF
4431 && GET_CODE (term) != LABEL_REF)
4432 return x;
4433
4434 return term;
4435}
4436\f
e075ae69
RH
4437/* Determine if a given CONST RTX is a valid memory displacement
4438 in PIC mode. */
0f290768 4439
59be65f6 4440int
91bb873f
RH
4441legitimate_pic_address_disp_p (disp)
4442 register rtx disp;
4443{
6eb791fc
JH
4444 /* In 64bit mode we can allow direct addresses of symbols and labels
4445 when they are not dynamic symbols. */
4446 if (TARGET_64BIT)
4447 {
4448 rtx x = disp;
4449 if (GET_CODE (disp) == CONST)
4450 x = XEXP (disp, 0);
4451 /* ??? Handle PIC code models */
4452 if (GET_CODE (x) == PLUS
4453 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4454 && ix86_cmodel == CM_SMALL_PIC
4455 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4456 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4457 x = XEXP (x, 0);
4458 if (local_symbolic_operand (x, Pmode))
4459 return 1;
4460 }
91bb873f
RH
4461 if (GET_CODE (disp) != CONST)
4462 return 0;
4463 disp = XEXP (disp, 0);
4464
6eb791fc
JH
4465 if (TARGET_64BIT)
4466 {
4467 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4468 of GOT tables. We should not need these anyway. */
4469 if (GET_CODE (disp) != UNSPEC
4470 || XVECLEN (disp, 0) != 1
4471 || XINT (disp, 1) != 15)
4472 return 0;
4473
4474 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4475 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4476 return 0;
4477 return 1;
4478 }
4479
91bb873f
RH
4480 if (GET_CODE (disp) == PLUS)
4481 {
4482 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4483 return 0;
4484 disp = XEXP (disp, 0);
4485 }
4486
4487 if (GET_CODE (disp) != UNSPEC
4488 || XVECLEN (disp, 0) != 1)
4489 return 0;
4490
4491 /* Must be @GOT or @GOTOFF. */
623fe810
RH
4492 switch (XINT (disp, 1))
4493 {
4494 case 6: /* @GOT */
4495 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
91bb873f 4496
623fe810
RH
4497 case 7: /* @GOTOFF */
4498 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4499 }
4500
4501 return 0;
91bb873f
RH
4502}
4503
e075ae69
RH
4504/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4505 memory address for an instruction. The MODE argument is the machine mode
4506 for the MEM expression that wants to use this address.
4507
4508 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4509 convert common non-canonical forms to canonical form so that they will
4510 be recognized. */
4511
3b3c6a3f
MM
4512int
4513legitimate_address_p (mode, addr, strict)
4514 enum machine_mode mode;
4515 register rtx addr;
4516 int strict;
4517{
e075ae69
RH
4518 struct ix86_address parts;
4519 rtx base, index, disp;
4520 HOST_WIDE_INT scale;
4521 const char *reason = NULL;
4522 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
4523
4524 if (TARGET_DEBUG_ADDR)
4525 {
4526 fprintf (stderr,
e9a25f70 4527 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 4528 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
4529 debug_rtx (addr);
4530 }
4531
e075ae69 4532 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 4533 {
e075ae69 4534 reason = "decomposition failed";
50e60bc3 4535 goto report_error;
3b3c6a3f
MM
4536 }
4537
e075ae69
RH
4538 base = parts.base;
4539 index = parts.index;
4540 disp = parts.disp;
4541 scale = parts.scale;
91f0226f 4542
e075ae69 4543 /* Validate base register.
e9a25f70
JL
4544
4545 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
4546 is one word out of a two word structure, which is represented internally
4547 as a DImode int. */
e9a25f70 4548
3b3c6a3f
MM
4549 if (base)
4550 {
e075ae69
RH
4551 reason_rtx = base;
4552
3d771dfd 4553 if (GET_CODE (base) != REG)
3b3c6a3f 4554 {
e075ae69 4555 reason = "base is not a register";
50e60bc3 4556 goto report_error;
3b3c6a3f
MM
4557 }
4558
c954bd01
RH
4559 if (GET_MODE (base) != Pmode)
4560 {
e075ae69 4561 reason = "base is not in Pmode";
50e60bc3 4562 goto report_error;
c954bd01
RH
4563 }
4564
e9a25f70
JL
4565 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4566 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 4567 {
e075ae69 4568 reason = "base is not valid";
50e60bc3 4569 goto report_error;
3b3c6a3f
MM
4570 }
4571 }
4572
e075ae69 4573 /* Validate index register.
e9a25f70
JL
4574
4575 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
4576 is one word out of a two word structure, which is represented internally
4577 as a DImode int. */
e075ae69
RH
4578
4579 if (index)
3b3c6a3f 4580 {
e075ae69
RH
4581 reason_rtx = index;
4582
4583 if (GET_CODE (index) != REG)
3b3c6a3f 4584 {
e075ae69 4585 reason = "index is not a register";
50e60bc3 4586 goto report_error;
3b3c6a3f
MM
4587 }
4588
e075ae69 4589 if (GET_MODE (index) != Pmode)
c954bd01 4590 {
e075ae69 4591 reason = "index is not in Pmode";
50e60bc3 4592 goto report_error;
c954bd01
RH
4593 }
4594
e075ae69
RH
4595 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4596 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 4597 {
e075ae69 4598 reason = "index is not valid";
50e60bc3 4599 goto report_error;
3b3c6a3f
MM
4600 }
4601 }
3b3c6a3f 4602
e075ae69
RH
4603 /* Validate scale factor. */
4604 if (scale != 1)
3b3c6a3f 4605 {
e075ae69
RH
4606 reason_rtx = GEN_INT (scale);
4607 if (!index)
3b3c6a3f 4608 {
e075ae69 4609 reason = "scale without index";
50e60bc3 4610 goto report_error;
3b3c6a3f
MM
4611 }
4612
e075ae69 4613 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 4614 {
e075ae69 4615 reason = "scale is not a valid multiplier";
50e60bc3 4616 goto report_error;
3b3c6a3f
MM
4617 }
4618 }
4619
91bb873f 4620 /* Validate displacement. */
3b3c6a3f
MM
4621 if (disp)
4622 {
e075ae69
RH
4623 reason_rtx = disp;
4624
91bb873f 4625 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 4626 {
e075ae69 4627 reason = "displacement is not constant";
50e60bc3 4628 goto report_error;
3b3c6a3f
MM
4629 }
4630
0d7d98ee 4631 if (TARGET_64BIT)
3b3c6a3f 4632 {
0d7d98ee
JH
4633 if (!x86_64_sign_extended_value (disp))
4634 {
4635 reason = "displacement is out of range";
4636 goto report_error;
4637 }
4638 }
4639 else
4640 {
4641 if (GET_CODE (disp) == CONST_DOUBLE)
4642 {
4643 reason = "displacement is a const_double";
4644 goto report_error;
4645 }
3b3c6a3f
MM
4646 }
4647
91bb873f 4648 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 4649 {
0d7d98ee
JH
4650 if (TARGET_64BIT && (index || base))
4651 {
4652 reason = "non-constant pic memory reference";
4653 goto report_error;
4654 }
91bb873f
RH
4655 if (! legitimate_pic_address_disp_p (disp))
4656 {
e075ae69 4657 reason = "displacement is an invalid pic construct";
50e60bc3 4658 goto report_error;
91bb873f
RH
4659 }
4660
4e9efe54 4661 /* This code used to verify that a symbolic pic displacement
0f290768
KH
4662 includes the pic_offset_table_rtx register.
4663
4e9efe54
JH
4664 While this is good idea, unfortunately these constructs may
4665 be created by "adds using lea" optimization for incorrect
4666 code like:
4667
4668 int a;
4669 int foo(int i)
4670 {
4671 return *(&a+i);
4672 }
4673
50e60bc3 4674 This code is nonsensical, but results in addressing
4e9efe54
JH
4675 GOT table with pic_offset_table_rtx base. We can't
4676 just refuse it easilly, since it gets matched by
4677 "addsi3" pattern, that later gets split to lea in the
4678 case output register differs from input. While this
4679 can be handled by separate addsi pattern for this case
4680 that never results in lea, this seems to be easier and
4681 correct fix for crash to disable this test. */
3b3c6a3f 4682 }
91bb873f 4683 else if (HALF_PIC_P ())
3b3c6a3f 4684 {
91bb873f 4685 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 4686 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 4687 {
e075ae69 4688 reason = "displacement is an invalid half-pic reference";
50e60bc3 4689 goto report_error;
91bb873f 4690 }
3b3c6a3f
MM
4691 }
4692 }
4693
e075ae69 4694 /* Everything looks valid. */
3b3c6a3f 4695 if (TARGET_DEBUG_ADDR)
e075ae69 4696 fprintf (stderr, "Success.\n");
3b3c6a3f 4697 return TRUE;
e075ae69 4698
50e60bc3 4699report_error:
e075ae69
RH
4700 if (TARGET_DEBUG_ADDR)
4701 {
4702 fprintf (stderr, "Error: %s\n", reason);
4703 debug_rtx (reason_rtx);
4704 }
4705 return FALSE;
3b3c6a3f 4706}
3b3c6a3f 4707\f
55efb413
JW
4708/* Return an unique alias set for the GOT. */
4709
0f290768 4710static HOST_WIDE_INT
55efb413
JW
4711ix86_GOT_alias_set ()
4712{
4713 static HOST_WIDE_INT set = -1;
4714 if (set == -1)
4715 set = new_alias_set ();
4716 return set;
0f290768 4717}
55efb413 4718
3b3c6a3f
MM
4719/* Return a legitimate reference for ORIG (an address) using the
4720 register REG. If REG is 0, a new pseudo is generated.
4721
91bb873f 4722 There are two types of references that must be handled:
3b3c6a3f
MM
4723
4724 1. Global data references must load the address from the GOT, via
4725 the PIC reg. An insn is emitted to do this load, and the reg is
4726 returned.
4727
91bb873f
RH
4728 2. Static data references, constant pool addresses, and code labels
4729 compute the address as an offset from the GOT, whose base is in
4730 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4731 differentiate them from global data objects. The returned
4732 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
4733
4734 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 4735 reg also appears in the address. */
3b3c6a3f
MM
4736
4737rtx
4738legitimize_pic_address (orig, reg)
4739 rtx orig;
4740 rtx reg;
4741{
4742 rtx addr = orig;
4743 rtx new = orig;
91bb873f 4744 rtx base;
3b3c6a3f 4745
623fe810 4746 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 4747 {
14f73b5a
JH
4748 /* In 64bit mode we can address such objects directly. */
4749 if (TARGET_64BIT)
4750 new = addr;
4751 else
4752 {
4753 /* This symbol may be referenced via a displacement from the PIC
4754 base address (@GOTOFF). */
3b3c6a3f 4755
14f73b5a
JH
4756 current_function_uses_pic_offset_table = 1;
4757 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4758 new = gen_rtx_CONST (Pmode, new);
4759 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 4760
14f73b5a
JH
4761 if (reg != 0)
4762 {
4763 emit_move_insn (reg, new);
4764 new = reg;
4765 }
4766 }
3b3c6a3f 4767 }
91bb873f 4768 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 4769 {
14f73b5a
JH
4770 if (TARGET_64BIT)
4771 {
4772 current_function_uses_pic_offset_table = 1;
4773 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4774 new = gen_rtx_CONST (Pmode, new);
4775 new = gen_rtx_MEM (Pmode, new);
4776 RTX_UNCHANGING_P (new) = 1;
4777 set_mem_alias_set (new, ix86_GOT_alias_set ());
4778
4779 if (reg == 0)
4780 reg = gen_reg_rtx (Pmode);
4781 /* Use directly gen_movsi, otherwise the address is loaded
4782 into register for CSE. We don't want to CSE this addresses,
4783 instead we CSE addresses from the GOT table, so skip this. */
4784 emit_insn (gen_movsi (reg, new));
4785 new = reg;
4786 }
4787 else
4788 {
4789 /* This symbol must be referenced via a load from the
4790 Global Offset Table (@GOT). */
3b3c6a3f 4791
14f73b5a
JH
4792 current_function_uses_pic_offset_table = 1;
4793 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4794 new = gen_rtx_CONST (Pmode, new);
4795 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4796 new = gen_rtx_MEM (Pmode, new);
4797 RTX_UNCHANGING_P (new) = 1;
4798 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 4799
14f73b5a
JH
4800 if (reg == 0)
4801 reg = gen_reg_rtx (Pmode);
4802 emit_move_insn (reg, new);
4803 new = reg;
4804 }
0f290768 4805 }
91bb873f
RH
4806 else
4807 {
4808 if (GET_CODE (addr) == CONST)
3b3c6a3f 4809 {
91bb873f
RH
4810 addr = XEXP (addr, 0);
4811 if (GET_CODE (addr) == UNSPEC)
4812 {
4813 /* Check that the unspec is one of the ones we generate? */
4814 }
4815 else if (GET_CODE (addr) != PLUS)
564d80f4 4816 abort ();
3b3c6a3f 4817 }
91bb873f
RH
4818 if (GET_CODE (addr) == PLUS)
4819 {
4820 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 4821
91bb873f
RH
4822 /* Check first to see if this is a constant offset from a @GOTOFF
4823 symbol reference. */
623fe810 4824 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
4825 && GET_CODE (op1) == CONST_INT)
4826 {
6eb791fc
JH
4827 if (!TARGET_64BIT)
4828 {
4829 current_function_uses_pic_offset_table = 1;
4830 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4831 new = gen_rtx_PLUS (Pmode, new, op1);
4832 new = gen_rtx_CONST (Pmode, new);
4833 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 4834
6eb791fc
JH
4835 if (reg != 0)
4836 {
4837 emit_move_insn (reg, new);
4838 new = reg;
4839 }
4840 }
4841 else
91bb873f 4842 {
6eb791fc 4843 /* ??? We need to limit offsets here. */
91bb873f
RH
4844 }
4845 }
4846 else
4847 {
4848 base = legitimize_pic_address (XEXP (addr, 0), reg);
4849 new = legitimize_pic_address (XEXP (addr, 1),
4850 base == reg ? NULL_RTX : reg);
4851
4852 if (GET_CODE (new) == CONST_INT)
4853 new = plus_constant (base, INTVAL (new));
4854 else
4855 {
4856 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4857 {
4858 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
4859 new = XEXP (new, 1);
4860 }
4861 new = gen_rtx_PLUS (Pmode, base, new);
4862 }
4863 }
4864 }
3b3c6a3f
MM
4865 }
4866 return new;
4867}
4868\f
3b3c6a3f
MM
4869/* Try machine-dependent ways of modifying an illegitimate address
4870 to be legitimate. If we find one, return the new, valid address.
4871 This macro is used in only one place: `memory_address' in explow.c.
4872
4873 OLDX is the address as it was before break_out_memory_refs was called.
4874 In some cases it is useful to look at this to decide what needs to be done.
4875
4876 MODE and WIN are passed so that this macro can use
4877 GO_IF_LEGITIMATE_ADDRESS.
4878
4879 It is always safe for this macro to do nothing. It exists to recognize
4880 opportunities to optimize the output.
4881
4882 For the 80386, we handle X+REG by loading X into a register R and
4883 using R+REG. R will go in a general reg and indexing will be used.
4884 However, if REG is a broken-out memory address or multiplication,
4885 nothing needs to be done because REG can certainly go in a general reg.
4886
4887 When -fpic is used, special handling is needed for symbolic references.
4888 See comments by legitimize_pic_address in i386.c for details. */
4889
4890rtx
4891legitimize_address (x, oldx, mode)
4892 register rtx x;
bb5177ac 4893 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
4894 enum machine_mode mode;
4895{
4896 int changed = 0;
4897 unsigned log;
4898
4899 if (TARGET_DEBUG_ADDR)
4900 {
e9a25f70
JL
4901 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4902 GET_MODE_NAME (mode));
3b3c6a3f
MM
4903 debug_rtx (x);
4904 }
4905
4906 if (flag_pic && SYMBOLIC_CONST (x))
4907 return legitimize_pic_address (x, 0);
4908
4909 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4910 if (GET_CODE (x) == ASHIFT
4911 && GET_CODE (XEXP (x, 1)) == CONST_INT
4912 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
4913 {
4914 changed = 1;
a269a03c
JC
4915 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
4916 GEN_INT (1 << log));
3b3c6a3f
MM
4917 }
4918
4919 if (GET_CODE (x) == PLUS)
4920 {
0f290768 4921 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 4922
3b3c6a3f
MM
4923 if (GET_CODE (XEXP (x, 0)) == ASHIFT
4924 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4925 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
4926 {
4927 changed = 1;
c5c76735
JL
4928 XEXP (x, 0) = gen_rtx_MULT (Pmode,
4929 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
4930 GEN_INT (1 << log));
3b3c6a3f
MM
4931 }
4932
4933 if (GET_CODE (XEXP (x, 1)) == ASHIFT
4934 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4935 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
4936 {
4937 changed = 1;
c5c76735
JL
4938 XEXP (x, 1) = gen_rtx_MULT (Pmode,
4939 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
4940 GEN_INT (1 << log));
3b3c6a3f
MM
4941 }
4942
0f290768 4943 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
4944 if (GET_CODE (XEXP (x, 1)) == MULT)
4945 {
4946 rtx tmp = XEXP (x, 0);
4947 XEXP (x, 0) = XEXP (x, 1);
4948 XEXP (x, 1) = tmp;
4949 changed = 1;
4950 }
4951
4952 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4953 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4954 created by virtual register instantiation, register elimination, and
4955 similar optimizations. */
4956 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
4957 {
4958 changed = 1;
c5c76735
JL
4959 x = gen_rtx_PLUS (Pmode,
4960 gen_rtx_PLUS (Pmode, XEXP (x, 0),
4961 XEXP (XEXP (x, 1), 0)),
4962 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
4963 }
4964
e9a25f70
JL
4965 /* Canonicalize
4966 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
4967 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4968 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
4969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4970 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
4971 && CONSTANT_P (XEXP (x, 1)))
4972 {
00c79232
ML
4973 rtx constant;
4974 rtx other = NULL_RTX;
3b3c6a3f
MM
4975
4976 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4977 {
4978 constant = XEXP (x, 1);
4979 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
4980 }
4981 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
4982 {
4983 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
4984 other = XEXP (x, 1);
4985 }
4986 else
4987 constant = 0;
4988
4989 if (constant)
4990 {
4991 changed = 1;
c5c76735
JL
4992 x = gen_rtx_PLUS (Pmode,
4993 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
4994 XEXP (XEXP (XEXP (x, 0), 1), 0)),
4995 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
4996 }
4997 }
4998
4999 if (changed && legitimate_address_p (mode, x, FALSE))
5000 return x;
5001
5002 if (GET_CODE (XEXP (x, 0)) == MULT)
5003 {
5004 changed = 1;
5005 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5006 }
5007
5008 if (GET_CODE (XEXP (x, 1)) == MULT)
5009 {
5010 changed = 1;
5011 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5012 }
5013
5014 if (changed
5015 && GET_CODE (XEXP (x, 1)) == REG
5016 && GET_CODE (XEXP (x, 0)) == REG)
5017 return x;
5018
5019 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5020 {
5021 changed = 1;
5022 x = legitimize_pic_address (x, 0);
5023 }
5024
5025 if (changed && legitimate_address_p (mode, x, FALSE))
5026 return x;
5027
5028 if (GET_CODE (XEXP (x, 0)) == REG)
5029 {
5030 register rtx temp = gen_reg_rtx (Pmode);
5031 register rtx val = force_operand (XEXP (x, 1), temp);
5032 if (val != temp)
5033 emit_move_insn (temp, val);
5034
5035 XEXP (x, 1) = temp;
5036 return x;
5037 }
5038
5039 else if (GET_CODE (XEXP (x, 1)) == REG)
5040 {
5041 register rtx temp = gen_reg_rtx (Pmode);
5042 register rtx val = force_operand (XEXP (x, 0), temp);
5043 if (val != temp)
5044 emit_move_insn (temp, val);
5045
5046 XEXP (x, 0) = temp;
5047 return x;
5048 }
5049 }
5050
5051 return x;
5052}
2a2ab3f9
JVA
5053\f
5054/* Print an integer constant expression in assembler syntax. Addition
5055 and subtraction are the only arithmetic that may appear in these
5056 expressions. FILE is the stdio stream to write to, X is the rtx, and
5057 CODE is the operand print code from the output string. */
5058
5059static void
5060output_pic_addr_const (file, x, code)
5061 FILE *file;
5062 rtx x;
5063 int code;
5064{
5065 char buf[256];
5066
5067 switch (GET_CODE (x))
5068 {
5069 case PC:
5070 if (flag_pic)
5071 putc ('.', file);
5072 else
5073 abort ();
5074 break;
5075
5076 case SYMBOL_REF:
91bb873f
RH
5077 assemble_name (file, XSTR (x, 0));
5078 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5079 fputs ("@PLT", file);
2a2ab3f9
JVA
5080 break;
5081
91bb873f
RH
5082 case LABEL_REF:
5083 x = XEXP (x, 0);
5084 /* FALLTHRU */
2a2ab3f9
JVA
5085 case CODE_LABEL:
5086 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5087 assemble_name (asm_out_file, buf);
5088 break;
5089
5090 case CONST_INT:
f64cecad 5091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5092 break;
5093
5094 case CONST:
5095 /* This used to output parentheses around the expression,
5096 but that does not work on the 386 (either ATT or BSD assembler). */
5097 output_pic_addr_const (file, XEXP (x, 0), code);
5098 break;
5099
5100 case CONST_DOUBLE:
5101 if (GET_MODE (x) == VOIDmode)
5102 {
5103 /* We can use %d if the number is <32 bits and positive. */
5104 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5105 fprintf (file, "0x%lx%08lx",
5106 (unsigned long) CONST_DOUBLE_HIGH (x),
5107 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5108 else
f64cecad 5109 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5110 }
5111 else
5112 /* We can't handle floating point constants;
5113 PRINT_OPERAND must handle them. */
5114 output_operand_lossage ("floating constant misused");
5115 break;
5116
5117 case PLUS:
e9a25f70 5118 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5119 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5120 {
2a2ab3f9 5121 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5122 putc ('+', file);
e9a25f70 5123 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5124 }
91bb873f 5125 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5126 {
2a2ab3f9 5127 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5128 putc ('+', file);
e9a25f70 5129 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5130 }
91bb873f
RH
5131 else
5132 abort ();
2a2ab3f9
JVA
5133 break;
5134
5135 case MINUS:
e075ae69 5136 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 5137 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5138 putc ('-', file);
2a2ab3f9 5139 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5140 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
5141 break;
5142
91bb873f
RH
5143 case UNSPEC:
5144 if (XVECLEN (x, 0) != 1)
77ebd435 5145 abort ();
91bb873f
RH
5146 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5147 switch (XINT (x, 1))
77ebd435
AJ
5148 {
5149 case 6:
5150 fputs ("@GOT", file);
5151 break;
5152 case 7:
5153 fputs ("@GOTOFF", file);
5154 break;
5155 case 8:
5156 fputs ("@PLT", file);
5157 break;
6eb791fc
JH
5158 case 15:
5159 fputs ("@GOTPCREL(%RIP)", file);
5160 break;
77ebd435
AJ
5161 default:
5162 output_operand_lossage ("invalid UNSPEC as operand");
5163 break;
5164 }
91bb873f
RH
5165 break;
5166
2a2ab3f9
JVA
5167 default:
5168 output_operand_lossage ("invalid expression as operand");
5169 }
5170}
1865dbb5 5171
0f290768 5172/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5173 We need to handle our special PIC relocations. */
5174
0f290768 5175void
1865dbb5
JM
5176i386_dwarf_output_addr_const (file, x)
5177 FILE *file;
5178 rtx x;
5179{
14f73b5a
JH
5180#ifdef ASM_QUAD
5181 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : INT_ASM_OP);
5182#else
5183 if (TARGET_64BIT)
5184 abort ();
f0ca81d2 5185 fprintf (file, "%s", INT_ASM_OP);
14f73b5a 5186#endif
1865dbb5
JM
5187 if (flag_pic)
5188 output_pic_addr_const (file, x, '\0');
5189 else
5190 output_addr_const (file, x);
5191 fputc ('\n', file);
5192}
5193
5194/* In the name of slightly smaller debug output, and to cater to
5195 general assembler losage, recognize PIC+GOTOFF and turn it back
5196 into a direct symbol reference. */
5197
5198rtx
5199i386_simplify_dwarf_addr (orig_x)
5200 rtx orig_x;
5201{
5202 rtx x = orig_x;
5203
6eb791fc
JH
5204 if (TARGET_64BIT)
5205 {
5206 if (GET_CODE (x) != CONST
5207 || GET_CODE (XEXP (x, 0)) != UNSPEC
5208 || XINT (XEXP (x, 0), 1) != 15)
5209 return orig_x;
5210 return XVECEXP (XEXP (x, 0), 0, 0);
5211 }
5212
1865dbb5
JM
5213 if (GET_CODE (x) != PLUS
5214 || GET_CODE (XEXP (x, 0)) != REG
5215 || GET_CODE (XEXP (x, 1)) != CONST)
5216 return orig_x;
5217
5218 x = XEXP (XEXP (x, 1), 0);
5219 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
5220 && (XINT (x, 1) == 6
5221 || XINT (x, 1) == 7))
1865dbb5
JM
5222 return XVECEXP (x, 0, 0);
5223
5224 if (GET_CODE (x) == PLUS
5225 && GET_CODE (XEXP (x, 0)) == UNSPEC
5226 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
5227 && (XINT (XEXP (x, 0), 1) == 6
5228 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
5229 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5230
5231 return orig_x;
5232}
2a2ab3f9 5233\f
a269a03c 5234static void
e075ae69 5235put_condition_code (code, mode, reverse, fp, file)
a269a03c 5236 enum rtx_code code;
e075ae69
RH
5237 enum machine_mode mode;
5238 int reverse, fp;
a269a03c
JC
5239 FILE *file;
5240{
a269a03c
JC
5241 const char *suffix;
5242
9a915772
JH
5243 if (mode == CCFPmode || mode == CCFPUmode)
5244 {
5245 enum rtx_code second_code, bypass_code;
5246 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5247 if (bypass_code != NIL || second_code != NIL)
5248 abort();
5249 code = ix86_fp_compare_code_to_integer (code);
5250 mode = CCmode;
5251 }
a269a03c
JC
5252 if (reverse)
5253 code = reverse_condition (code);
e075ae69 5254
a269a03c
JC
5255 switch (code)
5256 {
5257 case EQ:
5258 suffix = "e";
5259 break;
a269a03c
JC
5260 case NE:
5261 suffix = "ne";
5262 break;
a269a03c 5263 case GT:
7e08e190 5264 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
5265 abort ();
5266 suffix = "g";
a269a03c 5267 break;
a269a03c 5268 case GTU:
e075ae69
RH
5269 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5270 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 5271 if (mode != CCmode)
0f290768 5272 abort ();
e075ae69 5273 suffix = fp ? "nbe" : "a";
a269a03c 5274 break;
a269a03c 5275 case LT:
9076b9c1 5276 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5277 suffix = "s";
7e08e190 5278 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5279 suffix = "l";
9076b9c1 5280 else
0f290768 5281 abort ();
a269a03c 5282 break;
a269a03c 5283 case LTU:
9076b9c1 5284 if (mode != CCmode)
0f290768 5285 abort ();
a269a03c
JC
5286 suffix = "b";
5287 break;
a269a03c 5288 case GE:
9076b9c1 5289 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5290 suffix = "ns";
7e08e190 5291 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5292 suffix = "ge";
9076b9c1 5293 else
0f290768 5294 abort ();
a269a03c 5295 break;
a269a03c 5296 case GEU:
e075ae69 5297 /* ??? As above. */
7e08e190 5298 if (mode != CCmode)
0f290768 5299 abort ();
7e08e190 5300 suffix = fp ? "nb" : "ae";
a269a03c 5301 break;
a269a03c 5302 case LE:
7e08e190 5303 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
5304 abort ();
5305 suffix = "le";
a269a03c 5306 break;
a269a03c 5307 case LEU:
9076b9c1
JH
5308 if (mode != CCmode)
5309 abort ();
7e08e190 5310 suffix = "be";
a269a03c 5311 break;
3a3677ff 5312 case UNORDERED:
9e7adcb3 5313 suffix = fp ? "u" : "p";
3a3677ff
RH
5314 break;
5315 case ORDERED:
9e7adcb3 5316 suffix = fp ? "nu" : "np";
3a3677ff 5317 break;
a269a03c
JC
5318 default:
5319 abort ();
5320 }
5321 fputs (suffix, file);
5322}
5323
e075ae69
RH
5324void
5325print_reg (x, code, file)
5326 rtx x;
5327 int code;
5328 FILE *file;
e5cb57e8 5329{
e075ae69 5330 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 5331 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
5332 || REGNO (x) == FLAGS_REG
5333 || REGNO (x) == FPSR_REG)
5334 abort ();
e9a25f70 5335
e075ae69
RH
5336 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5337 putc ('%', file);
5338
ef6257cd 5339 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
5340 code = 2;
5341 else if (code == 'b')
5342 code = 1;
5343 else if (code == 'k')
5344 code = 4;
3f3f2124
JH
5345 else if (code == 'q')
5346 code = 8;
e075ae69
RH
5347 else if (code == 'y')
5348 code = 3;
5349 else if (code == 'h')
5350 code = 0;
5351 else
5352 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 5353
3f3f2124
JH
5354 /* Irritatingly, AMD extended registers use different naming convention
5355 from the normal registers. */
5356 if (REX_INT_REG_P (x))
5357 {
885a70fd
JH
5358 if (!TARGET_64BIT)
5359 abort ();
3f3f2124
JH
5360 switch (code)
5361 {
ef6257cd 5362 case 0:
3f3f2124
JH
5363 error ("Extended registers have no high halves\n");
5364 break;
5365 case 1:
5366 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5367 break;
5368 case 2:
5369 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5370 break;
5371 case 4:
5372 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5373 break;
5374 case 8:
5375 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5376 break;
5377 default:
5378 error ("Unsupported operand size for extended register.\n");
5379 break;
5380 }
5381 return;
5382 }
e075ae69
RH
5383 switch (code)
5384 {
5385 case 3:
5386 if (STACK_TOP_P (x))
5387 {
5388 fputs ("st(0)", file);
5389 break;
5390 }
5391 /* FALLTHRU */
e075ae69 5392 case 8:
3f3f2124 5393 case 4:
e075ae69 5394 case 12:
446988df 5395 if (! ANY_FP_REG_P (x))
885a70fd 5396 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 5397 /* FALLTHRU */
a7180f70 5398 case 16:
e075ae69
RH
5399 case 2:
5400 fputs (hi_reg_name[REGNO (x)], file);
5401 break;
5402 case 1:
5403 fputs (qi_reg_name[REGNO (x)], file);
5404 break;
5405 case 0:
5406 fputs (qi_high_reg_name[REGNO (x)], file);
5407 break;
5408 default:
5409 abort ();
fe25fea3 5410 }
e5cb57e8
SC
5411}
5412
2a2ab3f9 5413/* Meaning of CODE:
fe25fea3 5414 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 5415 C -- print opcode suffix for set/cmov insn.
fe25fea3 5416 c -- like C, but print reversed condition
ef6257cd 5417 F,f -- likewise, but for floating-point.
2a2ab3f9
JVA
5418 R -- print the prefix for register names.
5419 z -- print the opcode suffix for the size of the current operand.
5420 * -- print a star (in certain assembler syntax)
fb204271 5421 A -- print an absolute memory reference.
2a2ab3f9 5422 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
5423 s -- print a shift double count, followed by the assemblers argument
5424 delimiter.
fe25fea3
SC
5425 b -- print the QImode name of the register for the indicated operand.
5426 %b0 would print %al if operands[0] is reg 0.
5427 w -- likewise, print the HImode name of the register.
5428 k -- likewise, print the SImode name of the register.
3f3f2124 5429 q -- likewise, print the DImode name of the register.
ef6257cd
JH
5430 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5431 y -- print "st(0)" instead of "st" as a register.
a46d1d38 5432 D -- print condition for SSE cmp instruction.
ef6257cd
JH
5433 P -- if PIC, print an @PLT suffix.
5434 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 5435 */
2a2ab3f9
JVA
5436
5437void
5438print_operand (file, x, code)
5439 FILE *file;
5440 rtx x;
5441 int code;
5442{
5443 if (code)
5444 {
5445 switch (code)
5446 {
5447 case '*':
e075ae69 5448 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
5449 putc ('*', file);
5450 return;
5451
fb204271
DN
5452 case 'A':
5453 if (ASSEMBLER_DIALECT == 0)
5454 putc ('*', file);
5455 else if (ASSEMBLER_DIALECT == 1)
5456 {
5457 /* Intel syntax. For absolute addresses, registers should not
5458 be surrounded by braces. */
5459 if (GET_CODE (x) != REG)
5460 {
5461 putc ('[', file);
5462 PRINT_OPERAND (file, x, 0);
5463 putc (']', file);
5464 return;
5465 }
5466 }
5467
5468 PRINT_OPERAND (file, x, 0);
5469 return;
5470
5471
2a2ab3f9 5472 case 'L':
e075ae69
RH
5473 if (ASSEMBLER_DIALECT == 0)
5474 putc ('l', file);
2a2ab3f9
JVA
5475 return;
5476
5477 case 'W':
e075ae69
RH
5478 if (ASSEMBLER_DIALECT == 0)
5479 putc ('w', file);
2a2ab3f9
JVA
5480 return;
5481
5482 case 'B':
e075ae69
RH
5483 if (ASSEMBLER_DIALECT == 0)
5484 putc ('b', file);
2a2ab3f9
JVA
5485 return;
5486
5487 case 'Q':
e075ae69
RH
5488 if (ASSEMBLER_DIALECT == 0)
5489 putc ('l', file);
2a2ab3f9
JVA
5490 return;
5491
5492 case 'S':
e075ae69
RH
5493 if (ASSEMBLER_DIALECT == 0)
5494 putc ('s', file);
2a2ab3f9
JVA
5495 return;
5496
5f1ec3e6 5497 case 'T':
e075ae69
RH
5498 if (ASSEMBLER_DIALECT == 0)
5499 putc ('t', file);
5f1ec3e6
JVA
5500 return;
5501
2a2ab3f9
JVA
5502 case 'z':
5503 /* 387 opcodes don't get size suffixes if the operands are
0f290768 5504 registers. */
2a2ab3f9
JVA
5505
5506 if (STACK_REG_P (x))
5507 return;
5508
5509 /* this is the size of op from size of operand */
5510 switch (GET_MODE_SIZE (GET_MODE (x)))
5511 {
2a2ab3f9 5512 case 2:
155d8a47
JW
5513#ifdef HAVE_GAS_FILDS_FISTS
5514 putc ('s', file);
5515#endif
2a2ab3f9
JVA
5516 return;
5517
5518 case 4:
5519 if (GET_MODE (x) == SFmode)
5520 {
e075ae69 5521 putc ('s', file);
2a2ab3f9
JVA
5522 return;
5523 }
5524 else
e075ae69 5525 putc ('l', file);
2a2ab3f9
JVA
5526 return;
5527
5f1ec3e6 5528 case 12:
2b589241 5529 case 16:
e075ae69
RH
5530 putc ('t', file);
5531 return;
5f1ec3e6 5532
2a2ab3f9
JVA
5533 case 8:
5534 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
5535 {
5536#ifdef GAS_MNEMONICS
e075ae69 5537 putc ('q', file);
56c0e8fa 5538#else
e075ae69
RH
5539 putc ('l', file);
5540 putc ('l', file);
56c0e8fa
JVA
5541#endif
5542 }
e075ae69
RH
5543 else
5544 putc ('l', file);
2a2ab3f9 5545 return;
155d8a47
JW
5546
5547 default:
5548 abort ();
2a2ab3f9 5549 }
4af3895e
JVA
5550
5551 case 'b':
5552 case 'w':
5553 case 'k':
3f3f2124 5554 case 'q':
4af3895e
JVA
5555 case 'h':
5556 case 'y':
5cb6195d 5557 case 'X':
e075ae69 5558 case 'P':
4af3895e
JVA
5559 break;
5560
2d49677f
SC
5561 case 's':
5562 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5563 {
5564 PRINT_OPERAND (file, x, 0);
e075ae69 5565 putc (',', file);
2d49677f 5566 }
a269a03c
JC
5567 return;
5568
a46d1d38
JH
5569 case 'D':
5570 /* Little bit of braindamage here. The SSE compare instructions
5571 does use completely different names for the comparisons that the
5572 fp conditional moves. */
5573 switch (GET_CODE (x))
5574 {
5575 case EQ:
5576 case UNEQ:
5577 fputs ("eq", file);
5578 break;
5579 case LT:
5580 case UNLT:
5581 fputs ("lt", file);
5582 break;
5583 case LE:
5584 case UNLE:
5585 fputs ("le", file);
5586 break;
5587 case UNORDERED:
5588 fputs ("unord", file);
5589 break;
5590 case NE:
5591 case LTGT:
5592 fputs ("neq", file);
5593 break;
5594 case UNGE:
5595 case GE:
5596 fputs ("nlt", file);
5597 break;
5598 case UNGT:
5599 case GT:
5600 fputs ("nle", file);
5601 break;
5602 case ORDERED:
5603 fputs ("ord", file);
5604 break;
5605 default:
5606 abort ();
5607 break;
5608 }
5609 return;
1853aadd 5610 case 'C':
e075ae69 5611 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 5612 return;
fe25fea3 5613 case 'F':
e075ae69 5614 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
5615 return;
5616
e9a25f70 5617 /* Like above, but reverse condition */
e075ae69
RH
5618 case 'c':
5619 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5620 return;
fe25fea3 5621 case 'f':
e075ae69 5622 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 5623 return;
ef6257cd
JH
5624 case '+':
5625 {
5626 rtx x;
e5cb57e8 5627
ef6257cd
JH
5628 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5629 return;
a4f31c00 5630
ef6257cd
JH
5631 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5632 if (x)
5633 {
5634 int pred_val = INTVAL (XEXP (x, 0));
5635
5636 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5637 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5638 {
5639 int taken = pred_val > REG_BR_PROB_BASE / 2;
5640 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5641
5642 /* Emit hints only in the case default branch prediction
5643 heruistics would fail. */
5644 if (taken != cputaken)
5645 {
5646 /* We use 3e (DS) prefix for taken branches and
5647 2e (CS) prefix for not taken branches. */
5648 if (taken)
5649 fputs ("ds ; ", file);
5650 else
5651 fputs ("cs ; ", file);
5652 }
5653 }
5654 }
5655 return;
5656 }
4af3895e 5657 default:
68daafd4
JVA
5658 {
5659 char str[50];
68daafd4
JVA
5660 sprintf (str, "invalid operand code `%c'", code);
5661 output_operand_lossage (str);
5662 }
2a2ab3f9
JVA
5663 }
5664 }
e9a25f70 5665
2a2ab3f9
JVA
5666 if (GET_CODE (x) == REG)
5667 {
5668 PRINT_REG (x, code, file);
5669 }
e9a25f70 5670
2a2ab3f9
JVA
5671 else if (GET_CODE (x) == MEM)
5672 {
e075ae69
RH
5673 /* No `byte ptr' prefix for call instructions. */
5674 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 5675 {
69ddee61 5676 const char * size;
e075ae69
RH
5677 switch (GET_MODE_SIZE (GET_MODE (x)))
5678 {
5679 case 1: size = "BYTE"; break;
5680 case 2: size = "WORD"; break;
5681 case 4: size = "DWORD"; break;
5682 case 8: size = "QWORD"; break;
5683 case 12: size = "XWORD"; break;
a7180f70 5684 case 16: size = "XMMWORD"; break;
e075ae69 5685 default:
564d80f4 5686 abort ();
e075ae69 5687 }
fb204271
DN
5688
5689 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5690 if (code == 'b')
5691 size = "BYTE";
5692 else if (code == 'w')
5693 size = "WORD";
5694 else if (code == 'k')
5695 size = "DWORD";
5696
e075ae69
RH
5697 fputs (size, file);
5698 fputs (" PTR ", file);
2a2ab3f9 5699 }
e075ae69
RH
5700
5701 x = XEXP (x, 0);
5702 if (flag_pic && CONSTANT_ADDRESS_P (x))
5703 output_pic_addr_const (file, x, code);
0d7d98ee
JH
5704 /* Avoid (%rip) for call operands. */
5705 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5706 && GET_CODE (x) != CONST_INT)
5707 output_addr_const (file, x);
2a2ab3f9 5708 else
e075ae69 5709 output_address (x);
2a2ab3f9 5710 }
e9a25f70 5711
2a2ab3f9
JVA
5712 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5713 {
e9a25f70
JL
5714 REAL_VALUE_TYPE r;
5715 long l;
5716
5f1ec3e6
JVA
5717 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5718 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
5719
5720 if (ASSEMBLER_DIALECT == 0)
5721 putc ('$', file);
52267fcb 5722 fprintf (file, "0x%lx", l);
5f1ec3e6 5723 }
e9a25f70 5724
0f290768 5725 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
5726 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5727 {
e9a25f70
JL
5728 REAL_VALUE_TYPE r;
5729 char dstr[30];
5730
5f1ec3e6
JVA
5731 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5732 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5733 fprintf (file, "%s", dstr);
2a2ab3f9 5734 }
e9a25f70 5735
2b589241
JH
5736 else if (GET_CODE (x) == CONST_DOUBLE
5737 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 5738 {
e9a25f70
JL
5739 REAL_VALUE_TYPE r;
5740 char dstr[30];
5741
5f1ec3e6
JVA
5742 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5743 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5744 fprintf (file, "%s", dstr);
2a2ab3f9 5745 }
79325812 5746 else
2a2ab3f9 5747 {
4af3895e 5748 if (code != 'P')
2a2ab3f9 5749 {
695dac07 5750 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
5751 {
5752 if (ASSEMBLER_DIALECT == 0)
5753 putc ('$', file);
5754 }
2a2ab3f9
JVA
5755 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5756 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
5757 {
5758 if (ASSEMBLER_DIALECT == 0)
5759 putc ('$', file);
5760 else
5761 fputs ("OFFSET FLAT:", file);
5762 }
2a2ab3f9 5763 }
e075ae69
RH
5764 if (GET_CODE (x) == CONST_INT)
5765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5766 else if (flag_pic)
2a2ab3f9
JVA
5767 output_pic_addr_const (file, x, code);
5768 else
5769 output_addr_const (file, x);
5770 }
5771}
5772\f
5773/* Print a memory operand whose address is ADDR. */
5774
5775void
5776print_operand_address (file, addr)
5777 FILE *file;
5778 register rtx addr;
5779{
e075ae69
RH
5780 struct ix86_address parts;
5781 rtx base, index, disp;
5782 int scale;
e9a25f70 5783
e075ae69
RH
5784 if (! ix86_decompose_address (addr, &parts))
5785 abort ();
e9a25f70 5786
e075ae69
RH
5787 base = parts.base;
5788 index = parts.index;
5789 disp = parts.disp;
5790 scale = parts.scale;
e9a25f70 5791
e075ae69
RH
5792 if (!base && !index)
5793 {
5794 /* Displacement only requires special attention. */
e9a25f70 5795
e075ae69 5796 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 5797 {
e075ae69 5798 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
5799 {
5800 if (USER_LABEL_PREFIX[0] == 0)
5801 putc ('%', file);
5802 fputs ("ds:", file);
5803 }
e075ae69 5804 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 5805 }
e075ae69
RH
5806 else if (flag_pic)
5807 output_pic_addr_const (file, addr, 0);
5808 else
5809 output_addr_const (file, addr);
0d7d98ee
JH
5810
5811 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5812 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5813 fputs ("(%rip)", file);
e075ae69
RH
5814 }
5815 else
5816 {
5817 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 5818 {
e075ae69 5819 if (disp)
2a2ab3f9 5820 {
c399861d 5821 if (flag_pic)
e075ae69
RH
5822 output_pic_addr_const (file, disp, 0);
5823 else if (GET_CODE (disp) == LABEL_REF)
5824 output_asm_label (disp);
2a2ab3f9 5825 else
e075ae69 5826 output_addr_const (file, disp);
2a2ab3f9
JVA
5827 }
5828
e075ae69
RH
5829 putc ('(', file);
5830 if (base)
5831 PRINT_REG (base, 0, file);
5832 if (index)
2a2ab3f9 5833 {
e075ae69
RH
5834 putc (',', file);
5835 PRINT_REG (index, 0, file);
5836 if (scale != 1)
5837 fprintf (file, ",%d", scale);
2a2ab3f9 5838 }
e075ae69 5839 putc (')', file);
2a2ab3f9 5840 }
2a2ab3f9
JVA
5841 else
5842 {
e075ae69 5843 rtx offset = NULL_RTX;
e9a25f70 5844
e075ae69
RH
5845 if (disp)
5846 {
5847 /* Pull out the offset of a symbol; print any symbol itself. */
5848 if (GET_CODE (disp) == CONST
5849 && GET_CODE (XEXP (disp, 0)) == PLUS
5850 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
5851 {
5852 offset = XEXP (XEXP (disp, 0), 1);
5853 disp = gen_rtx_CONST (VOIDmode,
5854 XEXP (XEXP (disp, 0), 0));
5855 }
ce193852 5856
e075ae69
RH
5857 if (flag_pic)
5858 output_pic_addr_const (file, disp, 0);
5859 else if (GET_CODE (disp) == LABEL_REF)
5860 output_asm_label (disp);
5861 else if (GET_CODE (disp) == CONST_INT)
5862 offset = disp;
5863 else
5864 output_addr_const (file, disp);
5865 }
e9a25f70 5866
e075ae69
RH
5867 putc ('[', file);
5868 if (base)
a8620236 5869 {
e075ae69
RH
5870 PRINT_REG (base, 0, file);
5871 if (offset)
5872 {
5873 if (INTVAL (offset) >= 0)
5874 putc ('+', file);
5875 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5876 }
a8620236 5877 }
e075ae69
RH
5878 else if (offset)
5879 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 5880 else
e075ae69 5881 putc ('0', file);
e9a25f70 5882
e075ae69
RH
5883 if (index)
5884 {
5885 putc ('+', file);
5886 PRINT_REG (index, 0, file);
5887 if (scale != 1)
5888 fprintf (file, "*%d", scale);
5889 }
5890 putc (']', file);
5891 }
2a2ab3f9
JVA
5892 }
5893}
5894\f
5895/* Split one or more DImode RTL references into pairs of SImode
5896 references. The RTL can be REG, offsettable MEM, integer constant, or
5897 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5898 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 5899 that parallel "operands". */
2a2ab3f9
JVA
5900
5901void
5902split_di (operands, num, lo_half, hi_half)
5903 rtx operands[];
5904 int num;
5905 rtx lo_half[], hi_half[];
5906{
5907 while (num--)
5908 {
57dbca5e 5909 rtx op = operands[num];
e075ae69
RH
5910 if (CONSTANT_P (op))
5911 split_double (op, &lo_half[num], &hi_half[num]);
5912 else if (! reload_completed)
a269a03c
JC
5913 {
5914 lo_half[num] = gen_lowpart (SImode, op);
5915 hi_half[num] = gen_highpart (SImode, op);
5916 }
5917 else if (GET_CODE (op) == REG)
2a2ab3f9 5918 {
0d7d98ee
JH
5919 if (TARGET_64BIT)
5920 abort();
57dbca5e
BS
5921 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
5922 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 5923 }
57dbca5e 5924 else if (offsettable_memref_p (op))
2a2ab3f9 5925 {
f4ef873c 5926 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 5927 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
5928 }
5929 else
564d80f4 5930 abort ();
2a2ab3f9
JVA
5931 }
5932}
5933\f
2a2ab3f9
JVA
5934/* Output code to perform a 387 binary operation in INSN, one of PLUS,
5935 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5936 is the expression of the binary operation. The output may either be
5937 emitted here, or returned to the caller, like all output_* functions.
5938
5939 There is no guarantee that the operands are the same mode, as they
0f290768 5940 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 5941
e3c2afab
AM
5942#ifndef SYSV386_COMPAT
5943/* Set to 1 for compatibility with brain-damaged assemblers. No-one
5944 wants to fix the assemblers because that causes incompatibility
5945 with gcc. No-one wants to fix gcc because that causes
5946 incompatibility with assemblers... You can use the option of
5947 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
5948#define SYSV386_COMPAT 1
5949#endif
5950
69ddee61 5951const char *
2a2ab3f9
JVA
5952output_387_binary_op (insn, operands)
5953 rtx insn;
5954 rtx *operands;
5955{
e3c2afab 5956 static char buf[30];
69ddee61 5957 const char *p;
1deaa899
JH
5958 const char *ssep;
5959 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 5960
e3c2afab
AM
5961#ifdef ENABLE_CHECKING
5962 /* Even if we do not want to check the inputs, this documents input
5963 constraints. Which helps in understanding the following code. */
5964 if (STACK_REG_P (operands[0])
5965 && ((REG_P (operands[1])
5966 && REGNO (operands[0]) == REGNO (operands[1])
5967 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
5968 || (REG_P (operands[2])
5969 && REGNO (operands[0]) == REGNO (operands[2])
5970 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
5971 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
5972 ; /* ok */
1deaa899 5973 else if (!is_sse)
e3c2afab
AM
5974 abort ();
5975#endif
5976
2a2ab3f9
JVA
5977 switch (GET_CODE (operands[3]))
5978 {
5979 case PLUS:
e075ae69
RH
5980 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5981 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5982 p = "fiadd";
5983 else
5984 p = "fadd";
1deaa899 5985 ssep = "add";
2a2ab3f9
JVA
5986 break;
5987
5988 case MINUS:
e075ae69
RH
5989 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5990 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5991 p = "fisub";
5992 else
5993 p = "fsub";
1deaa899 5994 ssep = "sub";
2a2ab3f9
JVA
5995 break;
5996
5997 case MULT:
e075ae69
RH
5998 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5999 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6000 p = "fimul";
6001 else
6002 p = "fmul";
1deaa899 6003 ssep = "mul";
2a2ab3f9
JVA
6004 break;
6005
6006 case DIV:
e075ae69
RH
6007 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6008 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6009 p = "fidiv";
6010 else
6011 p = "fdiv";
1deaa899 6012 ssep = "div";
2a2ab3f9
JVA
6013 break;
6014
6015 default:
6016 abort ();
6017 }
6018
1deaa899
JH
6019 if (is_sse)
6020 {
6021 strcpy (buf, ssep);
6022 if (GET_MODE (operands[0]) == SFmode)
6023 strcat (buf, "ss\t{%2, %0|%0, %2}");
6024 else
6025 strcat (buf, "sd\t{%2, %0|%0, %2}");
6026 return buf;
6027 }
e075ae69 6028 strcpy (buf, p);
2a2ab3f9
JVA
6029
6030 switch (GET_CODE (operands[3]))
6031 {
6032 case MULT:
6033 case PLUS:
6034 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6035 {
e3c2afab 6036 rtx temp = operands[2];
2a2ab3f9
JVA
6037 operands[2] = operands[1];
6038 operands[1] = temp;
6039 }
6040
e3c2afab
AM
6041 /* know operands[0] == operands[1]. */
6042
2a2ab3f9 6043 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6044 {
6045 p = "%z2\t%2";
6046 break;
6047 }
2a2ab3f9
JVA
6048
6049 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
6050 {
6051 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6052 /* How is it that we are storing to a dead operand[2]?
6053 Well, presumably operands[1] is dead too. We can't
6054 store the result to st(0) as st(0) gets popped on this
6055 instruction. Instead store to operands[2] (which I
6056 think has to be st(1)). st(1) will be popped later.
6057 gcc <= 2.8.1 didn't have this check and generated
6058 assembly code that the Unixware assembler rejected. */
6059 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6060 else
e3c2afab 6061 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 6062 break;
6b28fd63 6063 }
2a2ab3f9
JVA
6064
6065 if (STACK_TOP_P (operands[0]))
e3c2afab 6066 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6067 else
e3c2afab 6068 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 6069 break;
2a2ab3f9
JVA
6070
6071 case MINUS:
6072 case DIV:
6073 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
6074 {
6075 p = "r%z1\t%1";
6076 break;
6077 }
2a2ab3f9
JVA
6078
6079 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6080 {
6081 p = "%z2\t%2";
6082 break;
6083 }
2a2ab3f9 6084
2a2ab3f9 6085 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 6086 {
e3c2afab
AM
6087#if SYSV386_COMPAT
6088 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6089 derived assemblers, confusingly reverse the direction of
6090 the operation for fsub{r} and fdiv{r} when the
6091 destination register is not st(0). The Intel assembler
6092 doesn't have this brain damage. Read !SYSV386_COMPAT to
6093 figure out what the hardware really does. */
6094 if (STACK_TOP_P (operands[0]))
6095 p = "{p\t%0, %2|rp\t%2, %0}";
6096 else
6097 p = "{rp\t%2, %0|p\t%0, %2}";
6098#else
6b28fd63 6099 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6100 /* As above for fmul/fadd, we can't store to st(0). */
6101 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6102 else
e3c2afab
AM
6103 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6104#endif
e075ae69 6105 break;
6b28fd63 6106 }
2a2ab3f9
JVA
6107
6108 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 6109 {
e3c2afab 6110#if SYSV386_COMPAT
6b28fd63 6111 if (STACK_TOP_P (operands[0]))
e3c2afab 6112 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 6113 else
e3c2afab
AM
6114 p = "{p\t%1, %0|rp\t%0, %1}";
6115#else
6116 if (STACK_TOP_P (operands[0]))
6117 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6118 else
6119 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6120#endif
e075ae69 6121 break;
6b28fd63 6122 }
2a2ab3f9
JVA
6123
6124 if (STACK_TOP_P (operands[0]))
6125 {
6126 if (STACK_TOP_P (operands[1]))
e3c2afab 6127 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6128 else
e3c2afab 6129 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 6130 break;
2a2ab3f9
JVA
6131 }
6132 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
6133 {
6134#if SYSV386_COMPAT
6135 p = "{\t%1, %0|r\t%0, %1}";
6136#else
6137 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6138#endif
6139 }
2a2ab3f9 6140 else
e3c2afab
AM
6141 {
6142#if SYSV386_COMPAT
6143 p = "{r\t%2, %0|\t%0, %2}";
6144#else
6145 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6146#endif
6147 }
e075ae69 6148 break;
2a2ab3f9
JVA
6149
6150 default:
6151 abort ();
6152 }
e075ae69
RH
6153
6154 strcat (buf, p);
6155 return buf;
2a2ab3f9 6156}
e075ae69 6157
a4f31c00 6158/* Output code to initialize control word copies used by
7a2e09f4
JH
6159 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6160 is set to control word rounding downwards. */
6161void
6162emit_i387_cw_initialization (normal, round_down)
6163 rtx normal, round_down;
6164{
6165 rtx reg = gen_reg_rtx (HImode);
6166
6167 emit_insn (gen_x86_fnstcw_1 (normal));
6168 emit_move_insn (reg, normal);
6169 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6170 && !TARGET_64BIT)
6171 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6172 else
6173 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6174 emit_move_insn (round_down, reg);
6175}
6176
2a2ab3f9 6177/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 6178 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 6179 operand may be [SDX]Fmode. */
2a2ab3f9 6180
69ddee61 6181const char *
2a2ab3f9
JVA
6182output_fix_trunc (insn, operands)
6183 rtx insn;
6184 rtx *operands;
6185{
6186 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 6187 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 6188
e075ae69
RH
6189 /* Jump through a hoop or two for DImode, since the hardware has no
6190 non-popping instruction. We used to do this a different way, but
6191 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
6192 if (dimode_p && !stack_top_dies)
6193 output_asm_insn ("fld\t%y1", operands);
e075ae69 6194
7a2e09f4 6195 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
6196 abort ();
6197
e075ae69 6198 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 6199 abort ();
e9a25f70 6200
7a2e09f4 6201 output_asm_insn ("fldcw\t%3", operands);
e075ae69 6202 if (stack_top_dies || dimode_p)
7a2e09f4 6203 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 6204 else
7a2e09f4 6205 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 6206 output_asm_insn ("fldcw\t%2", operands);
10195bd8 6207
e075ae69 6208 return "";
2a2ab3f9 6209}
cda749b1 6210
e075ae69
RH
6211/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6212 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6213 when fucom should be used. */
6214
69ddee61 6215const char *
e075ae69 6216output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
6217 rtx insn;
6218 rtx *operands;
e075ae69 6219 int eflags_p, unordered_p;
cda749b1 6220{
e075ae69
RH
6221 int stack_top_dies;
6222 rtx cmp_op0 = operands[0];
6223 rtx cmp_op1 = operands[1];
0644b628 6224 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
6225
6226 if (eflags_p == 2)
6227 {
6228 cmp_op0 = cmp_op1;
6229 cmp_op1 = operands[2];
6230 }
0644b628
JH
6231 if (is_sse)
6232 {
6233 if (GET_MODE (operands[0]) == SFmode)
6234 if (unordered_p)
6235 return "ucomiss\t{%1, %0|%0, %1}";
6236 else
6237 return "comiss\t{%1, %0|%0, %y}";
6238 else
6239 if (unordered_p)
6240 return "ucomisd\t{%1, %0|%0, %1}";
6241 else
6242 return "comisd\t{%1, %0|%0, %y}";
6243 }
cda749b1 6244
e075ae69 6245 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
6246 abort ();
6247
e075ae69 6248 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 6249
e075ae69
RH
6250 if (STACK_REG_P (cmp_op1)
6251 && stack_top_dies
6252 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6253 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 6254 {
e075ae69
RH
6255 /* If both the top of the 387 stack dies, and the other operand
6256 is also a stack register that dies, then this must be a
6257 `fcompp' float compare */
6258
6259 if (eflags_p == 1)
6260 {
6261 /* There is no double popping fcomi variant. Fortunately,
6262 eflags is immune from the fstp's cc clobbering. */
6263 if (unordered_p)
6264 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6265 else
6266 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6267 return "fstp\t%y0";
6268 }
6269 else
cda749b1 6270 {
e075ae69
RH
6271 if (eflags_p == 2)
6272 {
6273 if (unordered_p)
6274 return "fucompp\n\tfnstsw\t%0";
6275 else
6276 return "fcompp\n\tfnstsw\t%0";
6277 }
cda749b1
JW
6278 else
6279 {
e075ae69
RH
6280 if (unordered_p)
6281 return "fucompp";
6282 else
6283 return "fcompp";
cda749b1
JW
6284 }
6285 }
cda749b1
JW
6286 }
6287 else
6288 {
e075ae69 6289 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 6290
0f290768 6291 static const char * const alt[24] =
e075ae69
RH
6292 {
6293 "fcom%z1\t%y1",
6294 "fcomp%z1\t%y1",
6295 "fucom%z1\t%y1",
6296 "fucomp%z1\t%y1",
0f290768 6297
e075ae69
RH
6298 "ficom%z1\t%y1",
6299 "ficomp%z1\t%y1",
6300 NULL,
6301 NULL,
6302
6303 "fcomi\t{%y1, %0|%0, %y1}",
6304 "fcomip\t{%y1, %0|%0, %y1}",
6305 "fucomi\t{%y1, %0|%0, %y1}",
6306 "fucomip\t{%y1, %0|%0, %y1}",
6307
6308 NULL,
6309 NULL,
6310 NULL,
6311 NULL,
6312
6313 "fcom%z2\t%y2\n\tfnstsw\t%0",
6314 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6315 "fucom%z2\t%y2\n\tfnstsw\t%0",
6316 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 6317
e075ae69
RH
6318 "ficom%z2\t%y2\n\tfnstsw\t%0",
6319 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6320 NULL,
6321 NULL
6322 };
6323
6324 int mask;
69ddee61 6325 const char *ret;
e075ae69
RH
6326
6327 mask = eflags_p << 3;
6328 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6329 mask |= unordered_p << 1;
6330 mask |= stack_top_dies;
6331
6332 if (mask >= 24)
6333 abort ();
6334 ret = alt[mask];
6335 if (ret == NULL)
6336 abort ();
cda749b1 6337
e075ae69 6338 return ret;
cda749b1
JW
6339 }
6340}
2a2ab3f9 6341
e075ae69 6342/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 6343
e075ae69 6344 If profile_block_flag == 2
2a2ab3f9 6345
e075ae69
RH
6346 Output code to call the subroutine `__bb_init_trace_func'
6347 and pass two parameters to it. The first parameter is
6348 the address of a block allocated in the object module.
6349 The second parameter is the number of the first basic block
6350 of the function.
2a2ab3f9 6351
e075ae69 6352 The name of the block is a local symbol made with this statement:
0f290768 6353
e075ae69 6354 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 6355
e075ae69
RH
6356 Of course, since you are writing the definition of
6357 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6358 can take a short cut in the definition of this macro and use the
6359 name that you know will result.
2a2ab3f9 6360
e075ae69
RH
6361 The number of the first basic block of the function is
6362 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 6363
e075ae69
RH
6364 If described in a virtual assembler language the code to be
6365 output looks like:
2a2ab3f9 6366
e075ae69
RH
6367 parameter1 <- LPBX0
6368 parameter2 <- BLOCK_OR_LABEL
6369 call __bb_init_trace_func
2a2ab3f9 6370
e075ae69 6371 else if profile_block_flag != 0
e74389ff 6372
e075ae69
RH
6373 Output code to call the subroutine `__bb_init_func'
6374 and pass one single parameter to it, which is the same
6375 as the first parameter to `__bb_init_trace_func'.
e74389ff 6376
e075ae69
RH
6377 The first word of this parameter is a flag which will be nonzero if
6378 the object module has already been initialized. So test this word
6379 first, and do not call `__bb_init_func' if the flag is nonzero.
6380 Note: When profile_block_flag == 2 the test need not be done
6381 but `__bb_init_trace_func' *must* be called.
e74389ff 6382
e075ae69
RH
6383 BLOCK_OR_LABEL may be used to generate a label number as a
6384 branch destination in case `__bb_init_func' will not be called.
e74389ff 6385
e075ae69
RH
6386 If described in a virtual assembler language the code to be
6387 output looks like:
2a2ab3f9 6388
e075ae69
RH
6389 cmp (LPBX0),0
6390 jne local_label
6391 parameter1 <- LPBX0
6392 call __bb_init_func
6393 local_label:
6394*/
c572e5ba 6395
e075ae69
RH
6396void
6397ix86_output_function_block_profiler (file, block_or_label)
6398 FILE *file;
6399 int block_or_label;
c572e5ba 6400{
e075ae69
RH
6401 static int num_func = 0;
6402 rtx xops[8];
6403 char block_table[80], false_label[80];
c572e5ba 6404
e075ae69 6405 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 6406
e075ae69
RH
6407 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6408 xops[5] = stack_pointer_rtx;
6409 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 6410
e075ae69 6411 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 6412
e075ae69 6413 switch (profile_block_flag)
c572e5ba 6414 {
e075ae69
RH
6415 case 2:
6416 xops[2] = GEN_INT (block_or_label);
6417 xops[3] = gen_rtx_MEM (Pmode,
6418 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
6419 xops[6] = GEN_INT (8);
e9a25f70 6420
e075ae69
RH
6421 output_asm_insn ("push{l}\t%2", xops);
6422 if (!flag_pic)
6423 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 6424 else
870a0c2c 6425 {
e075ae69
RH
6426 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6427 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 6428 }
e075ae69
RH
6429 output_asm_insn ("call\t%P3", xops);
6430 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6431 break;
c572e5ba 6432
e075ae69
RH
6433 default:
6434 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 6435
e075ae69
RH
6436 xops[0] = const0_rtx;
6437 xops[2] = gen_rtx_MEM (Pmode,
6438 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
6439 xops[3] = gen_rtx_MEM (Pmode,
6440 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
6441 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
6442 xops[6] = GEN_INT (4);
a14003ee 6443
e075ae69 6444 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 6445
e075ae69
RH
6446 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
6447 output_asm_insn ("jne\t%2", xops);
870a0c2c 6448
e075ae69
RH
6449 if (!flag_pic)
6450 output_asm_insn ("push{l}\t%1", xops);
6451 else
6452 {
6453 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
6454 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 6455 }
e075ae69
RH
6456 output_asm_insn ("call\t%P3", xops);
6457 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6458 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
6459 num_func++;
6460 break;
c572e5ba 6461 }
2a2ab3f9 6462}
305f097e 6463
e075ae69
RH
6464/* Output assembler code to FILE to increment a counter associated
6465 with basic block number BLOCKNO.
305f097e 6466
e075ae69 6467 If profile_block_flag == 2
ecbc4695 6468
e075ae69
RH
6469 Output code to initialize the global structure `__bb' and
6470 call the function `__bb_trace_func' which will increment the
6471 counter.
ecbc4695 6472
e075ae69
RH
6473 `__bb' consists of two words. In the first word the number
6474 of the basic block has to be stored. In the second word
0f290768 6475 the address of a block allocated in the object module
e075ae69 6476 has to be stored.
ecbc4695 6477
e075ae69 6478 The basic block number is given by BLOCKNO.
ecbc4695 6479
0f290768 6480 The address of the block is given by the label created with
305f097e 6481
e075ae69 6482 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 6483
e075ae69 6484 by FUNCTION_BLOCK_PROFILER.
ecbc4695 6485
e075ae69
RH
6486 Of course, since you are writing the definition of
6487 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6488 can take a short cut in the definition of this macro and use the
6489 name that you know will result.
305f097e 6490
e075ae69
RH
6491 If described in a virtual assembler language the code to be
6492 output looks like:
305f097e 6493
e075ae69
RH
6494 move BLOCKNO -> (__bb)
6495 move LPBX0 -> (__bb+4)
6496 call __bb_trace_func
305f097e 6497
e075ae69
RH
6498 Note that function `__bb_trace_func' must not change the
6499 machine state, especially the flag register. To grant
6500 this, you must output code to save and restore registers
6501 either in this macro or in the macros MACHINE_STATE_SAVE
6502 and MACHINE_STATE_RESTORE. The last two macros will be
6503 used in the function `__bb_trace_func', so you must make
0f290768 6504 sure that the function prologue does not change any
e075ae69 6505 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 6506
e075ae69 6507 else if profile_block_flag != 0
305f097e 6508
e075ae69
RH
6509 Output code to increment the counter directly.
6510 Basic blocks are numbered separately from zero within each
6511 compiled object module. The count associated with block number
0f290768 6512 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 6513 this array is a local symbol made with this statement:
32b5b1aa 6514
e075ae69 6515 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 6516
e075ae69
RH
6517 Of course, since you are writing the definition of
6518 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6519 can take a short cut in the definition of this macro and use the
0f290768 6520 name that you know will result.
32b5b1aa 6521
e075ae69
RH
6522 If described in a virtual assembler language the code to be
6523 output looks like:
32b5b1aa 6524
e075ae69
RH
6525 inc (LPBX2+4*BLOCKNO)
6526*/
32b5b1aa 6527
e075ae69
RH
6528void
6529ix86_output_block_profiler (file, blockno)
6530 FILE *file ATTRIBUTE_UNUSED;
6531 int blockno;
6532{
6533 rtx xops[8], cnt_rtx;
6534 char counts[80];
6535 char *block_table = counts;
6536
6537 switch (profile_block_flag)
6538 {
6539 case 2:
6540 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 6541
e075ae69
RH
6542 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6543 xops[2] = GEN_INT (blockno);
6544 xops[3] = gen_rtx_MEM (Pmode,
6545 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
6546 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
6547 xops[5] = plus_constant (xops[4], 4);
6548 xops[0] = gen_rtx_MEM (SImode, xops[4]);
6549 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 6550
e075ae69 6551 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 6552
e075ae69
RH
6553 output_asm_insn ("pushf", xops);
6554 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6555 if (flag_pic)
32b5b1aa 6556 {
e075ae69
RH
6557 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6558 output_asm_insn ("push{l}\t%7", xops);
6559 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6560 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
6561 output_asm_insn ("pop{l}\t%7", xops);
6562 }
6563 else
6564 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
6565 output_asm_insn ("call\t%P3", xops);
6566 output_asm_insn ("popf", xops);
32b5b1aa 6567
e075ae69 6568 break;
32b5b1aa 6569
e075ae69
RH
6570 default:
6571 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
6572 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
6573 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 6574
e075ae69
RH
6575 if (blockno)
6576 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 6577
e075ae69
RH
6578 if (flag_pic)
6579 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 6580
e075ae69
RH
6581 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
6582 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 6583
e075ae69 6584 break;
32b5b1aa 6585 }
32b5b1aa 6586}
32b5b1aa 6587\f
79325812 6588void
e075ae69
RH
6589ix86_expand_move (mode, operands)
6590 enum machine_mode mode;
6591 rtx operands[];
32b5b1aa 6592{
e075ae69 6593 int strict = (reload_in_progress || reload_completed);
e075ae69 6594 rtx insn;
e9a25f70 6595
e075ae69 6596 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 6597 {
e075ae69 6598 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 6599
e075ae69
RH
6600 if (GET_CODE (operands[0]) == MEM)
6601 operands[1] = force_reg (Pmode, operands[1]);
6602 else
32b5b1aa 6603 {
e075ae69
RH
6604 rtx temp = operands[0];
6605 if (GET_CODE (temp) != REG)
6606 temp = gen_reg_rtx (Pmode);
6607 temp = legitimize_pic_address (operands[1], temp);
6608 if (temp == operands[0])
6609 return;
6610 operands[1] = temp;
32b5b1aa 6611 }
e075ae69
RH
6612 }
6613 else
6614 {
d7a29404
JH
6615 if (GET_CODE (operands[0]) == MEM
6616 && (GET_MODE (operands[0]) == QImode
6617 || !push_operand (operands[0], mode))
6618 && GET_CODE (operands[1]) == MEM)
e075ae69 6619 operands[1] = force_reg (mode, operands[1]);
e9a25f70 6620
2c5a510c
RH
6621 if (push_operand (operands[0], mode)
6622 && ! general_no_elim_operand (operands[1], mode))
6623 operands[1] = copy_to_mode_reg (mode, operands[1]);
6624
e075ae69 6625 if (FLOAT_MODE_P (mode))
32b5b1aa 6626 {
d7a29404
JH
6627 /* If we are loading a floating point constant to a register,
6628 force the value to memory now, since we'll get better code
6629 out the back end. */
e075ae69
RH
6630
6631 if (strict)
6632 ;
e075ae69 6633 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 6634 && register_operand (operands[0], mode))
e075ae69 6635 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 6636 }
32b5b1aa 6637 }
e9a25f70 6638
e075ae69 6639 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 6640
e075ae69
RH
6641 emit_insn (insn);
6642}
e9a25f70 6643
e075ae69
RH
6644/* Attempt to expand a binary operator. Make the expansion closer to the
6645 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 6646 memory references (one output, two input) in a single insn. */
e9a25f70 6647
e075ae69
RH
6648void
6649ix86_expand_binary_operator (code, mode, operands)
6650 enum rtx_code code;
6651 enum machine_mode mode;
6652 rtx operands[];
6653{
6654 int matching_memory;
6655 rtx src1, src2, dst, op, clob;
6656
6657 dst = operands[0];
6658 src1 = operands[1];
6659 src2 = operands[2];
6660
6661 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6662 if (GET_RTX_CLASS (code) == 'c'
6663 && (rtx_equal_p (dst, src2)
6664 || immediate_operand (src1, mode)))
6665 {
6666 rtx temp = src1;
6667 src1 = src2;
6668 src2 = temp;
32b5b1aa 6669 }
e9a25f70 6670
e075ae69
RH
6671 /* If the destination is memory, and we do not have matching source
6672 operands, do things in registers. */
6673 matching_memory = 0;
6674 if (GET_CODE (dst) == MEM)
32b5b1aa 6675 {
e075ae69
RH
6676 if (rtx_equal_p (dst, src1))
6677 matching_memory = 1;
6678 else if (GET_RTX_CLASS (code) == 'c'
6679 && rtx_equal_p (dst, src2))
6680 matching_memory = 2;
6681 else
6682 dst = gen_reg_rtx (mode);
6683 }
0f290768 6684
e075ae69
RH
6685 /* Both source operands cannot be in memory. */
6686 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6687 {
6688 if (matching_memory != 2)
6689 src2 = force_reg (mode, src2);
6690 else
6691 src1 = force_reg (mode, src1);
32b5b1aa 6692 }
e9a25f70 6693
06a964de
JH
6694 /* If the operation is not commutable, source 1 cannot be a constant
6695 or non-matching memory. */
0f290768 6696 if ((CONSTANT_P (src1)
06a964de
JH
6697 || (!matching_memory && GET_CODE (src1) == MEM))
6698 && GET_RTX_CLASS (code) != 'c')
e075ae69 6699 src1 = force_reg (mode, src1);
0f290768 6700
e075ae69 6701 /* If optimizing, copy to regs to improve CSE */
fe577e58 6702 if (optimize && ! no_new_pseudos)
32b5b1aa 6703 {
e075ae69
RH
6704 if (GET_CODE (dst) == MEM)
6705 dst = gen_reg_rtx (mode);
6706 if (GET_CODE (src1) == MEM)
6707 src1 = force_reg (mode, src1);
6708 if (GET_CODE (src2) == MEM)
6709 src2 = force_reg (mode, src2);
32b5b1aa 6710 }
e9a25f70 6711
e075ae69
RH
6712 /* Emit the instruction. */
6713
6714 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6715 if (reload_in_progress)
6716 {
6717 /* Reload doesn't know about the flags register, and doesn't know that
6718 it doesn't want to clobber it. We can only do this with PLUS. */
6719 if (code != PLUS)
6720 abort ();
6721 emit_insn (op);
6722 }
6723 else
32b5b1aa 6724 {
e075ae69
RH
6725 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6726 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 6727 }
e9a25f70 6728
e075ae69
RH
6729 /* Fix up the destination if needed. */
6730 if (dst != operands[0])
6731 emit_move_insn (operands[0], dst);
6732}
6733
6734/* Return TRUE or FALSE depending on whether the binary operator meets the
6735 appropriate constraints. */
6736
6737int
6738ix86_binary_operator_ok (code, mode, operands)
6739 enum rtx_code code;
6740 enum machine_mode mode ATTRIBUTE_UNUSED;
6741 rtx operands[3];
6742{
6743 /* Both source operands cannot be in memory. */
6744 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6745 return 0;
6746 /* If the operation is not commutable, source 1 cannot be a constant. */
6747 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6748 return 0;
6749 /* If the destination is memory, we must have a matching source operand. */
6750 if (GET_CODE (operands[0]) == MEM
6751 && ! (rtx_equal_p (operands[0], operands[1])
6752 || (GET_RTX_CLASS (code) == 'c'
6753 && rtx_equal_p (operands[0], operands[2]))))
6754 return 0;
06a964de
JH
6755 /* If the operation is not commutable and the source 1 is memory, we must
6756 have a matching destionation. */
6757 if (GET_CODE (operands[1]) == MEM
6758 && GET_RTX_CLASS (code) != 'c'
6759 && ! rtx_equal_p (operands[0], operands[1]))
6760 return 0;
e075ae69
RH
6761 return 1;
6762}
6763
6764/* Attempt to expand a unary operator. Make the expansion closer to the
6765 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 6766 memory references (one output, one input) in a single insn. */
e075ae69 6767
9d81fc27 6768void
e075ae69
RH
6769ix86_expand_unary_operator (code, mode, operands)
6770 enum rtx_code code;
6771 enum machine_mode mode;
6772 rtx operands[];
6773{
06a964de
JH
6774 int matching_memory;
6775 rtx src, dst, op, clob;
6776
6777 dst = operands[0];
6778 src = operands[1];
e075ae69 6779
06a964de
JH
6780 /* If the destination is memory, and we do not have matching source
6781 operands, do things in registers. */
6782 matching_memory = 0;
6783 if (GET_CODE (dst) == MEM)
32b5b1aa 6784 {
06a964de
JH
6785 if (rtx_equal_p (dst, src))
6786 matching_memory = 1;
e075ae69 6787 else
06a964de 6788 dst = gen_reg_rtx (mode);
32b5b1aa 6789 }
e9a25f70 6790
06a964de
JH
6791 /* When source operand is memory, destination must match. */
6792 if (!matching_memory && GET_CODE (src) == MEM)
6793 src = force_reg (mode, src);
0f290768 6794
06a964de 6795 /* If optimizing, copy to regs to improve CSE */
fe577e58 6796 if (optimize && ! no_new_pseudos)
06a964de
JH
6797 {
6798 if (GET_CODE (dst) == MEM)
6799 dst = gen_reg_rtx (mode);
6800 if (GET_CODE (src) == MEM)
6801 src = force_reg (mode, src);
6802 }
6803
6804 /* Emit the instruction. */
6805
6806 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6807 if (reload_in_progress || code == NOT)
6808 {
6809 /* Reload doesn't know about the flags register, and doesn't know that
6810 it doesn't want to clobber it. */
6811 if (code != NOT)
6812 abort ();
6813 emit_insn (op);
6814 }
6815 else
6816 {
6817 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6818 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6819 }
6820
6821 /* Fix up the destination if needed. */
6822 if (dst != operands[0])
6823 emit_move_insn (operands[0], dst);
e075ae69
RH
6824}
6825
6826/* Return TRUE or FALSE depending on whether the unary operator meets the
6827 appropriate constraints. */
6828
6829int
6830ix86_unary_operator_ok (code, mode, operands)
6831 enum rtx_code code ATTRIBUTE_UNUSED;
6832 enum machine_mode mode ATTRIBUTE_UNUSED;
6833 rtx operands[2] ATTRIBUTE_UNUSED;
6834{
06a964de
JH
6835 /* If one of operands is memory, source and destination must match. */
6836 if ((GET_CODE (operands[0]) == MEM
6837 || GET_CODE (operands[1]) == MEM)
6838 && ! rtx_equal_p (operands[0], operands[1]))
6839 return FALSE;
e075ae69
RH
6840 return TRUE;
6841}
6842
16189740
RH
6843/* Return TRUE or FALSE depending on whether the first SET in INSN
6844 has source and destination with matching CC modes, and that the
6845 CC mode is at least as constrained as REQ_MODE. */
6846
6847int
6848ix86_match_ccmode (insn, req_mode)
6849 rtx insn;
6850 enum machine_mode req_mode;
6851{
6852 rtx set;
6853 enum machine_mode set_mode;
6854
6855 set = PATTERN (insn);
6856 if (GET_CODE (set) == PARALLEL)
6857 set = XVECEXP (set, 0, 0);
6858 if (GET_CODE (set) != SET)
6859 abort ();
9076b9c1
JH
6860 if (GET_CODE (SET_SRC (set)) != COMPARE)
6861 abort ();
16189740
RH
6862
6863 set_mode = GET_MODE (SET_DEST (set));
6864 switch (set_mode)
6865 {
9076b9c1
JH
6866 case CCNOmode:
6867 if (req_mode != CCNOmode
6868 && (req_mode != CCmode
6869 || XEXP (SET_SRC (set), 1) != const0_rtx))
6870 return 0;
6871 break;
16189740 6872 case CCmode:
9076b9c1 6873 if (req_mode == CCGCmode)
16189740
RH
6874 return 0;
6875 /* FALLTHRU */
9076b9c1
JH
6876 case CCGCmode:
6877 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6878 return 0;
6879 /* FALLTHRU */
6880 case CCGOCmode:
16189740
RH
6881 if (req_mode == CCZmode)
6882 return 0;
6883 /* FALLTHRU */
6884 case CCZmode:
6885 break;
6886
6887 default:
6888 abort ();
6889 }
6890
6891 return (GET_MODE (SET_SRC (set)) == set_mode);
6892}
6893
e075ae69
RH
6894/* Generate insn patterns to do an integer compare of OPERANDS. */
6895
6896static rtx
6897ix86_expand_int_compare (code, op0, op1)
6898 enum rtx_code code;
6899 rtx op0, op1;
6900{
6901 enum machine_mode cmpmode;
6902 rtx tmp, flags;
6903
6904 cmpmode = SELECT_CC_MODE (code, op0, op1);
6905 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6906
6907 /* This is very simple, but making the interface the same as in the
6908 FP case makes the rest of the code easier. */
6909 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6910 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6911
6912 /* Return the test that should be put into the flags user, i.e.
6913 the bcc, scc, or cmov instruction. */
6914 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6915}
6916
3a3677ff
RH
6917/* Figure out whether to use ordered or unordered fp comparisons.
6918 Return the appropriate mode to use. */
e075ae69 6919
b1cdafbb 6920enum machine_mode
3a3677ff 6921ix86_fp_compare_mode (code)
8752c357 6922 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 6923{
9e7adcb3
JH
6924 /* ??? In order to make all comparisons reversible, we do all comparisons
6925 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6926 all forms trapping and nontrapping comparisons, we can make inequality
6927 comparisons trapping again, since it results in better code when using
6928 FCOM based compares. */
6929 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
6930}
6931
9076b9c1
JH
6932enum machine_mode
6933ix86_cc_mode (code, op0, op1)
6934 enum rtx_code code;
6935 rtx op0, op1;
6936{
6937 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6938 return ix86_fp_compare_mode (code);
6939 switch (code)
6940 {
6941 /* Only zero flag is needed. */
6942 case EQ: /* ZF=0 */
6943 case NE: /* ZF!=0 */
6944 return CCZmode;
6945 /* Codes needing carry flag. */
265dab10
JH
6946 case GEU: /* CF=0 */
6947 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
6948 case LTU: /* CF=1 */
6949 case LEU: /* CF=1 | ZF=1 */
265dab10 6950 return CCmode;
9076b9c1
JH
6951 /* Codes possibly doable only with sign flag when
6952 comparing against zero. */
6953 case GE: /* SF=OF or SF=0 */
7e08e190 6954 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
6955 if (op1 == const0_rtx)
6956 return CCGOCmode;
6957 else
6958 /* For other cases Carry flag is not required. */
6959 return CCGCmode;
6960 /* Codes doable only with sign flag when comparing
6961 against zero, but we miss jump instruction for it
6962 so we need to use relational tests agains overflow
6963 that thus needs to be zero. */
6964 case GT: /* ZF=0 & SF=OF */
6965 case LE: /* ZF=1 | SF<>OF */
6966 if (op1 == const0_rtx)
6967 return CCNOmode;
6968 else
6969 return CCGCmode;
6970 default:
0f290768 6971 abort ();
9076b9c1
JH
6972 }
6973}
6974
3a3677ff
RH
6975/* Return true if we should use an FCOMI instruction for this fp comparison. */
6976
a940d8bd 6977int
3a3677ff 6978ix86_use_fcomi_compare (code)
9e7adcb3 6979 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 6980{
9e7adcb3
JH
6981 enum rtx_code swapped_code = swap_condition (code);
6982 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
6983 || (ix86_fp_comparison_cost (swapped_code)
6984 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
6985}
6986
0f290768 6987/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
6988 to a fp comparison. The operands are updated in place; the new
6989 comparsion code is returned. */
6990
6991static enum rtx_code
6992ix86_prepare_fp_compare_args (code, pop0, pop1)
6993 enum rtx_code code;
6994 rtx *pop0, *pop1;
6995{
6996 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
6997 rtx op0 = *pop0, op1 = *pop1;
6998 enum machine_mode op_mode = GET_MODE (op0);
0644b628 6999 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7000
e075ae69 7001 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7002 The same is true of the XFmode compare instructions. The same is
7003 true of the fcomi compare instructions. */
7004
0644b628
JH
7005 if (!is_sse
7006 && (fpcmp_mode == CCFPUmode
7007 || op_mode == XFmode
7008 || op_mode == TFmode
7009 || ix86_use_fcomi_compare (code)))
e075ae69 7010 {
3a3677ff
RH
7011 op0 = force_reg (op_mode, op0);
7012 op1 = force_reg (op_mode, op1);
e075ae69
RH
7013 }
7014 else
7015 {
7016 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7017 things around if they appear profitable, otherwise force op0
7018 into a register. */
7019
7020 if (standard_80387_constant_p (op0) == 0
7021 || (GET_CODE (op0) == MEM
7022 && ! (standard_80387_constant_p (op1) == 0
7023 || GET_CODE (op1) == MEM)))
32b5b1aa 7024 {
e075ae69
RH
7025 rtx tmp;
7026 tmp = op0, op0 = op1, op1 = tmp;
7027 code = swap_condition (code);
7028 }
7029
7030 if (GET_CODE (op0) != REG)
3a3677ff 7031 op0 = force_reg (op_mode, op0);
e075ae69
RH
7032
7033 if (CONSTANT_P (op1))
7034 {
7035 if (standard_80387_constant_p (op1))
3a3677ff 7036 op1 = force_reg (op_mode, op1);
e075ae69 7037 else
3a3677ff 7038 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7039 }
7040 }
e9a25f70 7041
9e7adcb3
JH
7042 /* Try to rearrange the comparison to make it cheaper. */
7043 if (ix86_fp_comparison_cost (code)
7044 > ix86_fp_comparison_cost (swap_condition (code))
7045 && (GET_CODE (op0) == REG || !reload_completed))
7046 {
7047 rtx tmp;
7048 tmp = op0, op0 = op1, op1 = tmp;
7049 code = swap_condition (code);
7050 if (GET_CODE (op0) != REG)
7051 op0 = force_reg (op_mode, op0);
7052 }
7053
3a3677ff
RH
7054 *pop0 = op0;
7055 *pop1 = op1;
7056 return code;
7057}
7058
c0c102a9
JH
7059/* Convert comparison codes we use to represent FP comparison to integer
7060 code that will result in proper branch. Return UNKNOWN if no such code
7061 is available. */
7062static enum rtx_code
7063ix86_fp_compare_code_to_integer (code)
7064 enum rtx_code code;
7065{
7066 switch (code)
7067 {
7068 case GT:
7069 return GTU;
7070 case GE:
7071 return GEU;
7072 case ORDERED:
7073 case UNORDERED:
7074 return code;
7075 break;
7076 case UNEQ:
7077 return EQ;
7078 break;
7079 case UNLT:
7080 return LTU;
7081 break;
7082 case UNLE:
7083 return LEU;
7084 break;
7085 case LTGT:
7086 return NE;
7087 break;
7088 default:
7089 return UNKNOWN;
7090 }
7091}
7092
7093/* Split comparison code CODE into comparisons we can do using branch
7094 instructions. BYPASS_CODE is comparison code for branch that will
7095 branch around FIRST_CODE and SECOND_CODE. If some of branches
7096 is not required, set value to NIL.
7097 We never require more than two branches. */
7098static void
7099ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7100 enum rtx_code code, *bypass_code, *first_code, *second_code;
7101{
7102 *first_code = code;
7103 *bypass_code = NIL;
7104 *second_code = NIL;
7105
7106 /* The fcomi comparison sets flags as follows:
7107
7108 cmp ZF PF CF
7109 > 0 0 0
7110 < 0 0 1
7111 = 1 0 0
7112 un 1 1 1 */
7113
7114 switch (code)
7115 {
7116 case GT: /* GTU - CF=0 & ZF=0 */
7117 case GE: /* GEU - CF=0 */
7118 case ORDERED: /* PF=0 */
7119 case UNORDERED: /* PF=1 */
7120 case UNEQ: /* EQ - ZF=1 */
7121 case UNLT: /* LTU - CF=1 */
7122 case UNLE: /* LEU - CF=1 | ZF=1 */
7123 case LTGT: /* EQ - ZF=0 */
7124 break;
7125 case LT: /* LTU - CF=1 - fails on unordered */
7126 *first_code = UNLT;
7127 *bypass_code = UNORDERED;
7128 break;
7129 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7130 *first_code = UNLE;
7131 *bypass_code = UNORDERED;
7132 break;
7133 case EQ: /* EQ - ZF=1 - fails on unordered */
7134 *first_code = UNEQ;
7135 *bypass_code = UNORDERED;
7136 break;
7137 case NE: /* NE - ZF=0 - fails on unordered */
7138 *first_code = LTGT;
7139 *second_code = UNORDERED;
7140 break;
7141 case UNGE: /* GEU - CF=0 - fails on unordered */
7142 *first_code = GE;
7143 *second_code = UNORDERED;
7144 break;
7145 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7146 *first_code = GT;
7147 *second_code = UNORDERED;
7148 break;
7149 default:
7150 abort ();
7151 }
7152 if (!TARGET_IEEE_FP)
7153 {
7154 *second_code = NIL;
7155 *bypass_code = NIL;
7156 }
7157}
7158
9e7adcb3
JH
7159/* Return cost of comparison done fcom + arithmetics operations on AX.
7160 All following functions do use number of instructions as an cost metrics.
7161 In future this should be tweaked to compute bytes for optimize_size and
7162 take into account performance of various instructions on various CPUs. */
7163static int
7164ix86_fp_comparison_arithmetics_cost (code)
7165 enum rtx_code code;
7166{
7167 if (!TARGET_IEEE_FP)
7168 return 4;
7169 /* The cost of code output by ix86_expand_fp_compare. */
7170 switch (code)
7171 {
7172 case UNLE:
7173 case UNLT:
7174 case LTGT:
7175 case GT:
7176 case GE:
7177 case UNORDERED:
7178 case ORDERED:
7179 case UNEQ:
7180 return 4;
7181 break;
7182 case LT:
7183 case NE:
7184 case EQ:
7185 case UNGE:
7186 return 5;
7187 break;
7188 case LE:
7189 case UNGT:
7190 return 6;
7191 break;
7192 default:
7193 abort ();
7194 }
7195}
7196
7197/* Return cost of comparison done using fcomi operation.
7198 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7199static int
7200ix86_fp_comparison_fcomi_cost (code)
7201 enum rtx_code code;
7202{
7203 enum rtx_code bypass_code, first_code, second_code;
7204 /* Return arbitarily high cost when instruction is not supported - this
7205 prevents gcc from using it. */
7206 if (!TARGET_CMOVE)
7207 return 1024;
7208 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7209 return (bypass_code != NIL || second_code != NIL) + 2;
7210}
7211
7212/* Return cost of comparison done using sahf operation.
7213 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7214static int
7215ix86_fp_comparison_sahf_cost (code)
7216 enum rtx_code code;
7217{
7218 enum rtx_code bypass_code, first_code, second_code;
7219 /* Return arbitarily high cost when instruction is not preferred - this
7220 avoids gcc from using it. */
7221 if (!TARGET_USE_SAHF && !optimize_size)
7222 return 1024;
7223 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7224 return (bypass_code != NIL || second_code != NIL) + 3;
7225}
7226
7227/* Compute cost of the comparison done using any method.
7228 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7229static int
7230ix86_fp_comparison_cost (code)
7231 enum rtx_code code;
7232{
7233 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7234 int min;
7235
7236 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7237 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7238
7239 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7240 if (min > sahf_cost)
7241 min = sahf_cost;
7242 if (min > fcomi_cost)
7243 min = fcomi_cost;
7244 return min;
7245}
c0c102a9 7246
3a3677ff
RH
7247/* Generate insn patterns to do a floating point compare of OPERANDS. */
7248
9e7adcb3
JH
7249static rtx
7250ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
7251 enum rtx_code code;
7252 rtx op0, op1, scratch;
9e7adcb3
JH
7253 rtx *second_test;
7254 rtx *bypass_test;
3a3677ff
RH
7255{
7256 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 7257 rtx tmp, tmp2;
9e7adcb3 7258 int cost = ix86_fp_comparison_cost (code);
c0c102a9 7259 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7260
7261 fpcmp_mode = ix86_fp_compare_mode (code);
7262 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7263
9e7adcb3
JH
7264 if (second_test)
7265 *second_test = NULL_RTX;
7266 if (bypass_test)
7267 *bypass_test = NULL_RTX;
7268
c0c102a9
JH
7269 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7270
9e7adcb3
JH
7271 /* Do fcomi/sahf based test when profitable. */
7272 if ((bypass_code == NIL || bypass_test)
7273 && (second_code == NIL || second_test)
7274 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 7275 {
c0c102a9
JH
7276 if (TARGET_CMOVE)
7277 {
7278 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7279 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7280 tmp);
7281 emit_insn (tmp);
7282 }
7283 else
7284 {
7285 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7286 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7287 if (!scratch)
7288 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
7289 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7290 emit_insn (gen_x86_sahf_1 (scratch));
7291 }
e075ae69
RH
7292
7293 /* The FP codes work out to act like unsigned. */
9a915772 7294 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
7295 code = first_code;
7296 if (bypass_code != NIL)
7297 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7298 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7299 const0_rtx);
7300 if (second_code != NIL)
7301 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7302 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7303 const0_rtx);
e075ae69
RH
7304 }
7305 else
7306 {
7307 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
7308 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7309 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7310 if (!scratch)
7311 scratch = gen_reg_rtx (HImode);
3a3677ff 7312 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 7313
9a915772
JH
7314 /* In the unordered case, we have to check C2 for NaN's, which
7315 doesn't happen to work out to anything nice combination-wise.
7316 So do some bit twiddling on the value we've got in AH to come
7317 up with an appropriate set of condition codes. */
e075ae69 7318
9a915772
JH
7319 intcmp_mode = CCNOmode;
7320 switch (code)
32b5b1aa 7321 {
9a915772
JH
7322 case GT:
7323 case UNGT:
7324 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 7325 {
3a3677ff 7326 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 7327 code = EQ;
9a915772
JH
7328 }
7329 else
7330 {
7331 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7332 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7333 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7334 intcmp_mode = CCmode;
7335 code = GEU;
7336 }
7337 break;
7338 case LT:
7339 case UNLT:
7340 if (code == LT && TARGET_IEEE_FP)
7341 {
3a3677ff
RH
7342 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7343 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
7344 intcmp_mode = CCmode;
7345 code = EQ;
9a915772
JH
7346 }
7347 else
7348 {
7349 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7350 code = NE;
7351 }
7352 break;
7353 case GE:
7354 case UNGE:
7355 if (code == GE || !TARGET_IEEE_FP)
7356 {
3a3677ff 7357 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 7358 code = EQ;
9a915772
JH
7359 }
7360 else
7361 {
7362 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7363 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7364 GEN_INT (0x01)));
7365 code = NE;
7366 }
7367 break;
7368 case LE:
7369 case UNLE:
7370 if (code == LE && TARGET_IEEE_FP)
7371 {
3a3677ff
RH
7372 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7373 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7374 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7375 intcmp_mode = CCmode;
7376 code = LTU;
9a915772
JH
7377 }
7378 else
7379 {
7380 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7381 code = NE;
7382 }
7383 break;
7384 case EQ:
7385 case UNEQ:
7386 if (code == EQ && TARGET_IEEE_FP)
7387 {
3a3677ff
RH
7388 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7389 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7390 intcmp_mode = CCmode;
7391 code = EQ;
9a915772
JH
7392 }
7393 else
7394 {
3a3677ff
RH
7395 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7396 code = NE;
7397 break;
9a915772
JH
7398 }
7399 break;
7400 case NE:
7401 case LTGT:
7402 if (code == NE && TARGET_IEEE_FP)
7403 {
3a3677ff 7404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
7405 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7406 GEN_INT (0x40)));
3a3677ff 7407 code = NE;
9a915772
JH
7408 }
7409 else
7410 {
3a3677ff
RH
7411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7412 code = EQ;
32b5b1aa 7413 }
9a915772
JH
7414 break;
7415
7416 case UNORDERED:
7417 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7418 code = NE;
7419 break;
7420 case ORDERED:
7421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7422 code = EQ;
7423 break;
7424
7425 default:
7426 abort ();
32b5b1aa 7427 }
32b5b1aa 7428 }
e075ae69
RH
7429
7430 /* Return the test that should be put into the flags user, i.e.
7431 the bcc, scc, or cmov instruction. */
7432 return gen_rtx_fmt_ee (code, VOIDmode,
7433 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7434 const0_rtx);
7435}
7436
9e3e266c 7437rtx
a1b8572c 7438ix86_expand_compare (code, second_test, bypass_test)
e075ae69 7439 enum rtx_code code;
a1b8572c 7440 rtx *second_test, *bypass_test;
e075ae69
RH
7441{
7442 rtx op0, op1, ret;
7443 op0 = ix86_compare_op0;
7444 op1 = ix86_compare_op1;
7445
a1b8572c
JH
7446 if (second_test)
7447 *second_test = NULL_RTX;
7448 if (bypass_test)
7449 *bypass_test = NULL_RTX;
7450
e075ae69 7451 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 7452 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 7453 second_test, bypass_test);
32b5b1aa 7454 else
e075ae69
RH
7455 ret = ix86_expand_int_compare (code, op0, op1);
7456
7457 return ret;
7458}
7459
03598dea
JH
7460/* Return true if the CODE will result in nontrivial jump sequence. */
7461bool
7462ix86_fp_jump_nontrivial_p (code)
7463 enum rtx_code code;
7464{
7465 enum rtx_code bypass_code, first_code, second_code;
7466 if (!TARGET_CMOVE)
7467 return true;
7468 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7469 return bypass_code != NIL || second_code != NIL;
7470}
7471
e075ae69 7472void
3a3677ff 7473ix86_expand_branch (code, label)
e075ae69 7474 enum rtx_code code;
e075ae69
RH
7475 rtx label;
7476{
3a3677ff 7477 rtx tmp;
e075ae69 7478
3a3677ff 7479 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 7480 {
3a3677ff
RH
7481 case QImode:
7482 case HImode:
7483 case SImode:
0d7d98ee 7484 simple:
a1b8572c 7485 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
7486 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7487 gen_rtx_LABEL_REF (VOIDmode, label),
7488 pc_rtx);
7489 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 7490 return;
e075ae69 7491
3a3677ff
RH
7492 case SFmode:
7493 case DFmode:
0f290768 7494 case XFmode:
2b589241 7495 case TFmode:
3a3677ff
RH
7496 {
7497 rtvec vec;
7498 int use_fcomi;
03598dea 7499 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7500
7501 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7502 &ix86_compare_op1);
03598dea
JH
7503
7504 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7505
7506 /* Check whether we will use the natural sequence with one jump. If
7507 so, we can expand jump early. Otherwise delay expansion by
7508 creating compound insn to not confuse optimizers. */
7509 if (bypass_code == NIL && second_code == NIL
7510 && TARGET_CMOVE)
7511 {
7512 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7513 gen_rtx_LABEL_REF (VOIDmode, label),
7514 pc_rtx, NULL_RTX);
7515 }
7516 else
7517 {
7518 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7519 ix86_compare_op0, ix86_compare_op1);
7520 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7521 gen_rtx_LABEL_REF (VOIDmode, label),
7522 pc_rtx);
7523 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7524
7525 use_fcomi = ix86_use_fcomi_compare (code);
7526 vec = rtvec_alloc (3 + !use_fcomi);
7527 RTVEC_ELT (vec, 0) = tmp;
7528 RTVEC_ELT (vec, 1)
7529 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7530 RTVEC_ELT (vec, 2)
7531 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7532 if (! use_fcomi)
7533 RTVEC_ELT (vec, 3)
7534 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7535
7536 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7537 }
3a3677ff
RH
7538 return;
7539 }
32b5b1aa 7540
3a3677ff 7541 case DImode:
0d7d98ee
JH
7542 if (TARGET_64BIT)
7543 goto simple;
3a3677ff
RH
7544 /* Expand DImode branch into multiple compare+branch. */
7545 {
7546 rtx lo[2], hi[2], label2;
7547 enum rtx_code code1, code2, code3;
32b5b1aa 7548
3a3677ff
RH
7549 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7550 {
7551 tmp = ix86_compare_op0;
7552 ix86_compare_op0 = ix86_compare_op1;
7553 ix86_compare_op1 = tmp;
7554 code = swap_condition (code);
7555 }
7556 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7557 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 7558
3a3677ff
RH
7559 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7560 avoid two branches. This costs one extra insn, so disable when
7561 optimizing for size. */
32b5b1aa 7562
3a3677ff
RH
7563 if ((code == EQ || code == NE)
7564 && (!optimize_size
7565 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7566 {
7567 rtx xor0, xor1;
32b5b1aa 7568
3a3677ff
RH
7569 xor1 = hi[0];
7570 if (hi[1] != const0_rtx)
7571 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7572 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7573
3a3677ff
RH
7574 xor0 = lo[0];
7575 if (lo[1] != const0_rtx)
7576 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7577 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 7578
3a3677ff
RH
7579 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7580 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7581
3a3677ff
RH
7582 ix86_compare_op0 = tmp;
7583 ix86_compare_op1 = const0_rtx;
7584 ix86_expand_branch (code, label);
7585 return;
7586 }
e075ae69 7587
1f9124e4
JJ
7588 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7589 op1 is a constant and the low word is zero, then we can just
7590 examine the high word. */
32b5b1aa 7591
1f9124e4
JJ
7592 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7593 switch (code)
7594 {
7595 case LT: case LTU: case GE: case GEU:
7596 ix86_compare_op0 = hi[0];
7597 ix86_compare_op1 = hi[1];
7598 ix86_expand_branch (code, label);
7599 return;
7600 default:
7601 break;
7602 }
e075ae69 7603
3a3677ff 7604 /* Otherwise, we need two or three jumps. */
e075ae69 7605
3a3677ff 7606 label2 = gen_label_rtx ();
e075ae69 7607
3a3677ff
RH
7608 code1 = code;
7609 code2 = swap_condition (code);
7610 code3 = unsigned_condition (code);
e075ae69 7611
3a3677ff
RH
7612 switch (code)
7613 {
7614 case LT: case GT: case LTU: case GTU:
7615 break;
e075ae69 7616
3a3677ff
RH
7617 case LE: code1 = LT; code2 = GT; break;
7618 case GE: code1 = GT; code2 = LT; break;
7619 case LEU: code1 = LTU; code2 = GTU; break;
7620 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 7621
3a3677ff
RH
7622 case EQ: code1 = NIL; code2 = NE; break;
7623 case NE: code2 = NIL; break;
e075ae69 7624
3a3677ff
RH
7625 default:
7626 abort ();
7627 }
e075ae69 7628
3a3677ff
RH
7629 /*
7630 * a < b =>
7631 * if (hi(a) < hi(b)) goto true;
7632 * if (hi(a) > hi(b)) goto false;
7633 * if (lo(a) < lo(b)) goto true;
7634 * false:
7635 */
7636
7637 ix86_compare_op0 = hi[0];
7638 ix86_compare_op1 = hi[1];
7639
7640 if (code1 != NIL)
7641 ix86_expand_branch (code1, label);
7642 if (code2 != NIL)
7643 ix86_expand_branch (code2, label2);
7644
7645 ix86_compare_op0 = lo[0];
7646 ix86_compare_op1 = lo[1];
7647 ix86_expand_branch (code3, label);
7648
7649 if (code2 != NIL)
7650 emit_label (label2);
7651 return;
7652 }
e075ae69 7653
3a3677ff
RH
7654 default:
7655 abort ();
7656 }
32b5b1aa 7657}
e075ae69 7658
9e7adcb3
JH
7659/* Split branch based on floating point condition. */
7660void
03598dea
JH
7661ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7662 enum rtx_code code;
7663 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
7664{
7665 rtx second, bypass;
7666 rtx label = NULL_RTX;
03598dea 7667 rtx condition;
6b24c259
JH
7668 int bypass_probability = -1, second_probability = -1, probability = -1;
7669 rtx i;
9e7adcb3
JH
7670
7671 if (target2 != pc_rtx)
7672 {
7673 rtx tmp = target2;
7674 code = reverse_condition_maybe_unordered (code);
7675 target2 = target1;
7676 target1 = tmp;
7677 }
7678
7679 condition = ix86_expand_fp_compare (code, op1, op2,
7680 tmp, &second, &bypass);
6b24c259
JH
7681
7682 if (split_branch_probability >= 0)
7683 {
7684 /* Distribute the probabilities across the jumps.
7685 Assume the BYPASS and SECOND to be always test
7686 for UNORDERED. */
7687 probability = split_branch_probability;
7688
7689 /* Value of 1 is low enought to make no need for probability
7690 to be updated. Later we may run some experiments and see
7691 if unordered values are more frequent in practice. */
7692 if (bypass)
7693 bypass_probability = 1;
7694 if (second)
7695 second_probability = 1;
7696 }
9e7adcb3
JH
7697 if (bypass != NULL_RTX)
7698 {
7699 label = gen_label_rtx ();
6b24c259
JH
7700 i = emit_jump_insn (gen_rtx_SET
7701 (VOIDmode, pc_rtx,
7702 gen_rtx_IF_THEN_ELSE (VOIDmode,
7703 bypass,
7704 gen_rtx_LABEL_REF (VOIDmode,
7705 label),
7706 pc_rtx)));
7707 if (bypass_probability >= 0)
7708 REG_NOTES (i)
7709 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7710 GEN_INT (bypass_probability),
7711 REG_NOTES (i));
7712 }
7713 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
7714 (VOIDmode, pc_rtx,
7715 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
7716 condition, target1, target2)));
7717 if (probability >= 0)
7718 REG_NOTES (i)
7719 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7720 GEN_INT (probability),
7721 REG_NOTES (i));
7722 if (second != NULL_RTX)
9e7adcb3 7723 {
6b24c259
JH
7724 i = emit_jump_insn (gen_rtx_SET
7725 (VOIDmode, pc_rtx,
7726 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7727 target2)));
7728 if (second_probability >= 0)
7729 REG_NOTES (i)
7730 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7731 GEN_INT (second_probability),
7732 REG_NOTES (i));
9e7adcb3 7733 }
9e7adcb3
JH
7734 if (label != NULL_RTX)
7735 emit_label (label);
7736}
7737
32b5b1aa 7738int
3a3677ff 7739ix86_expand_setcc (code, dest)
e075ae69 7740 enum rtx_code code;
e075ae69 7741 rtx dest;
32b5b1aa 7742{
a1b8572c
JH
7743 rtx ret, tmp, tmpreg;
7744 rtx second_test, bypass_test;
e075ae69
RH
7745 int type;
7746
885a70fd
JH
7747 if (GET_MODE (ix86_compare_op0) == DImode
7748 && !TARGET_64BIT)
e075ae69
RH
7749 return 0; /* FAIL */
7750
7751 /* Three modes of generation:
7752 0 -- destination does not overlap compare sources:
7753 clear dest first, emit strict_low_part setcc.
7754 1 -- destination does overlap compare sources:
7755 emit subreg setcc, zero extend.
7756 2 -- destination is in QImode:
7757 emit setcc only.
e075ae69 7758
c50e5bc0
RH
7759 We don't use mode 0 early in compilation because it confuses CSE.
7760 There are peepholes to turn mode 1 into mode 0 if things work out
7761 nicely after reload. */
7762
7763 type = cse_not_expected ? 0 : 1;
e075ae69
RH
7764
7765 if (GET_MODE (dest) == QImode)
7766 type = 2;
7767 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 7768 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
7769 type = 1;
7770
7771 if (type == 0)
7772 emit_move_insn (dest, const0_rtx);
7773
a1b8572c 7774 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7775 PUT_MODE (ret, QImode);
7776
7777 tmp = dest;
a1b8572c 7778 tmpreg = dest;
e075ae69 7779 if (type == 0)
32b5b1aa 7780 {
e075ae69 7781 tmp = gen_lowpart (QImode, dest);
a1b8572c 7782 tmpreg = tmp;
e075ae69
RH
7783 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
7784 }
7785 else if (type == 1)
7786 {
7787 if (!cse_not_expected)
7788 tmp = gen_reg_rtx (QImode);
7789 else
7790 tmp = gen_lowpart (QImode, dest);
a1b8572c 7791 tmpreg = tmp;
e075ae69 7792 }
32b5b1aa 7793
e075ae69 7794 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
7795 if (bypass_test || second_test)
7796 {
7797 rtx test = second_test;
7798 int bypass = 0;
7799 rtx tmp2 = gen_reg_rtx (QImode);
7800 if (bypass_test)
7801 {
7802 if (second_test)
7803 abort();
7804 test = bypass_test;
7805 bypass = 1;
7806 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7807 }
7808 PUT_MODE (test, QImode);
7809 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7810
7811 if (bypass)
7812 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7813 else
7814 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7815 }
e075ae69
RH
7816
7817 if (type == 1)
7818 {
7819 rtx clob;
7820
7821 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
7822 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
7823 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7824 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7825 emit_insn (tmp);
32b5b1aa 7826 }
e075ae69
RH
7827
7828 return 1; /* DONE */
32b5b1aa 7829}
e075ae69 7830
32b5b1aa 7831int
e075ae69
RH
7832ix86_expand_int_movcc (operands)
7833 rtx operands[];
32b5b1aa 7834{
e075ae69
RH
7835 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7836 rtx compare_seq, compare_op;
a1b8572c 7837 rtx second_test, bypass_test;
32b5b1aa 7838
36583fea
JH
7839 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7840 In case comparsion is done with immediate, we can convert it to LTU or
7841 GEU by altering the integer. */
7842
7843 if ((code == LEU || code == GTU)
7844 && GET_CODE (ix86_compare_op1) == CONST_INT
7845 && GET_MODE (operands[0]) != HImode
7846 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 7847 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
7848 && GET_CODE (operands[3]) == CONST_INT)
7849 {
7850 if (code == LEU)
7851 code = LTU;
7852 else
7853 code = GEU;
7854 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7855 }
3a3677ff 7856
e075ae69 7857 start_sequence ();
a1b8572c 7858 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7859 compare_seq = gen_sequence ();
7860 end_sequence ();
7861
7862 compare_code = GET_CODE (compare_op);
7863
7864 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7865 HImode insns, we'd be swallowed in word prefix ops. */
7866
7867 if (GET_MODE (operands[0]) != HImode
14f73b5a 7868 && (GET_MODE (operands[0]) != DImode || TARGET_64BIT)
0f290768 7869 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
7870 && GET_CODE (operands[3]) == CONST_INT)
7871 {
7872 rtx out = operands[0];
7873 HOST_WIDE_INT ct = INTVAL (operands[2]);
7874 HOST_WIDE_INT cf = INTVAL (operands[3]);
7875 HOST_WIDE_INT diff;
7876
a1b8572c
JH
7877 if ((compare_code == LTU || compare_code == GEU)
7878 && !second_test && !bypass_test)
e075ae69 7879 {
e075ae69
RH
7880
7881 /* Detect overlap between destination and compare sources. */
7882 rtx tmp = out;
7883
0f290768 7884 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
7885 if (compare_code == LTU)
7886 {
7887 int tmp = ct;
7888 ct = cf;
7889 cf = tmp;
7890 compare_code = reverse_condition (compare_code);
7891 code = reverse_condition (code);
7892 }
7893 diff = ct - cf;
7894
e075ae69 7895 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 7896 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14f73b5a 7897 tmp = gen_reg_rtx (GET_MODE (operands[0]));
e075ae69
RH
7898
7899 emit_insn (compare_seq);
14f73b5a
JH
7900 if (GET_MODE (tmp) == DImode)
7901 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7902 else
7903 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 7904
36583fea
JH
7905 if (diff == 1)
7906 {
7907 /*
7908 * cmpl op0,op1
7909 * sbbl dest,dest
7910 * [addl dest, ct]
7911 *
7912 * Size 5 - 8.
7913 */
7914 if (ct)
14f73b5a
JH
7915 {
7916 if (GET_MODE (tmp) == DImode)
7917 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (ct)));
7918 else
7919 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7920 }
36583fea
JH
7921 }
7922 else if (cf == -1)
7923 {
7924 /*
7925 * cmpl op0,op1
7926 * sbbl dest,dest
7927 * orl $ct, dest
7928 *
7929 * Size 8.
7930 */
14f73b5a
JH
7931 if (GET_MODE (tmp) == DImode)
7932 emit_insn (gen_iordi3 (tmp, tmp, GEN_INT (ct)));
7933 else
7934 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
7935 }
7936 else if (diff == -1 && ct)
7937 {
7938 /*
7939 * cmpl op0,op1
7940 * sbbl dest,dest
7941 * xorl $-1, dest
7942 * [addl dest, cf]
7943 *
7944 * Size 8 - 11.
7945 */
14f73b5a
JH
7946 if (GET_MODE (tmp) == DImode)
7947 {
7948 emit_insn (gen_one_cmpldi2 (tmp, tmp));
7949 if (cf)
7950 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (cf)));
7951 }
7952 else
7953 {
7954 emit_insn (gen_one_cmplsi2 (tmp, tmp));
7955 if (cf)
7956 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
7957 }
36583fea
JH
7958 }
7959 else
7960 {
7961 /*
7962 * cmpl op0,op1
7963 * sbbl dest,dest
7964 * andl cf - ct, dest
7965 * [addl dest, ct]
7966 *
7967 * Size 8 - 11.
7968 */
14f73b5a
JH
7969 if (GET_MODE (tmp) == DImode)
7970 {
7971 emit_insn (gen_anddi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7972 (cf - ct, DImode))));
7973 if (ct)
7974 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (ct)));
7975 }
7976 else
7977 {
7978 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7979 (cf - ct, SImode))));
7980 if (ct)
7981 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7982 }
36583fea 7983 }
e075ae69
RH
7984
7985 if (tmp != out)
7986 emit_move_insn (out, tmp);
7987
7988 return 1; /* DONE */
7989 }
7990
7991 diff = ct - cf;
7992 if (diff < 0)
7993 {
7994 HOST_WIDE_INT tmp;
7995 tmp = ct, ct = cf, cf = tmp;
7996 diff = -diff;
734dba19
JH
7997 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7998 {
7999 /* We may be reversing unordered compare to normal compare, that
8000 is not valid in general (we may convert non-trapping condition
8001 to trapping one), however on i386 we currently emit all
8002 comparisons unordered. */
8003 compare_code = reverse_condition_maybe_unordered (compare_code);
8004 code = reverse_condition_maybe_unordered (code);
8005 }
8006 else
8007 {
8008 compare_code = reverse_condition (compare_code);
8009 code = reverse_condition (code);
8010 }
e075ae69
RH
8011 }
8012 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
8013 || diff == 3 || diff == 5 || diff == 9)
8014 {
8015 /*
8016 * xorl dest,dest
8017 * cmpl op1,op2
8018 * setcc dest
8019 * lea cf(dest*(ct-cf)),dest
8020 *
8021 * Size 14.
8022 *
8023 * This also catches the degenerate setcc-only case.
8024 */
8025
8026 rtx tmp;
8027 int nops;
8028
8029 out = emit_store_flag (out, code, ix86_compare_op0,
8030 ix86_compare_op1, VOIDmode, 0, 1);
8031
8032 nops = 0;
885a70fd
JH
8033 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8034 done in proper mode to match. */
e075ae69 8035 if (diff == 1)
14f73b5a 8036 tmp = out;
e075ae69
RH
8037 else
8038 {
885a70fd 8039 rtx out1;
14f73b5a
JH
8040 out1 = out;
8041 tmp = gen_rtx_MULT (GET_MODE (out), out1, GEN_INT (diff & ~1));
e075ae69
RH
8042 nops++;
8043 if (diff & 1)
8044 {
14f73b5a 8045 tmp = gen_rtx_PLUS (GET_MODE (out), tmp, out1);
e075ae69
RH
8046 nops++;
8047 }
8048 }
8049 if (cf != 0)
8050 {
14f73b5a 8051 tmp = gen_rtx_PLUS (GET_MODE (out), tmp, GEN_INT (cf));
e075ae69
RH
8052 nops++;
8053 }
885a70fd
JH
8054 if (tmp != out
8055 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8056 {
14f73b5a 8057 if (nops == 1)
e075ae69
RH
8058 {
8059 rtx clob;
8060
8061 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8062 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8063
8064 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8065 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8066 emit_insn (tmp);
8067 }
8068 else
8069 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8070 }
8071 if (out != operands[0])
8072 emit_move_insn (operands[0], out);
8073
8074 return 1; /* DONE */
8075 }
8076
8077 /*
8078 * General case: Jumpful:
8079 * xorl dest,dest cmpl op1, op2
8080 * cmpl op1, op2 movl ct, dest
8081 * setcc dest jcc 1f
8082 * decl dest movl cf, dest
8083 * andl (cf-ct),dest 1:
8084 * addl ct,dest
0f290768 8085 *
e075ae69
RH
8086 * Size 20. Size 14.
8087 *
8088 * This is reasonably steep, but branch mispredict costs are
8089 * high on modern cpus, so consider failing only if optimizing
8090 * for space.
8091 *
8092 * %%% Parameterize branch_cost on the tuning architecture, then
8093 * use that. The 80386 couldn't care less about mispredicts.
8094 */
8095
8096 if (!optimize_size && !TARGET_CMOVE)
8097 {
8098 if (ct == 0)
8099 {
8100 ct = cf;
8101 cf = 0;
734dba19
JH
8102 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8103 {
8104 /* We may be reversing unordered compare to normal compare,
8105 that is not valid in general (we may convert non-trapping
8106 condition to trapping one), however on i386 we currently
8107 emit all comparisons unordered. */
8108 compare_code = reverse_condition_maybe_unordered (compare_code);
8109 code = reverse_condition_maybe_unordered (code);
8110 }
8111 else
8112 {
8113 compare_code = reverse_condition (compare_code);
8114 code = reverse_condition (code);
8115 }
e075ae69
RH
8116 }
8117
8118 out = emit_store_flag (out, code, ix86_compare_op0,
8119 ix86_compare_op1, VOIDmode, 0, 1);
8120
8121 emit_insn (gen_addsi3 (out, out, constm1_rtx));
7471a1f0
AO
8122 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
8123 (cf - ct, SImode))));
e075ae69
RH
8124 if (ct != 0)
8125 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
8126 if (out != operands[0])
8127 emit_move_insn (operands[0], out);
8128
8129 return 1; /* DONE */
8130 }
8131 }
8132
8133 if (!TARGET_CMOVE)
8134 {
8135 /* Try a few things more with specific constants and a variable. */
8136
78a0d70c 8137 optab op;
e075ae69
RH
8138 rtx var, orig_out, out, tmp;
8139
8140 if (optimize_size)
8141 return 0; /* FAIL */
8142
0f290768 8143 /* If one of the two operands is an interesting constant, load a
e075ae69 8144 constant with the above and mask it in with a logical operation. */
0f290768 8145
e075ae69
RH
8146 if (GET_CODE (operands[2]) == CONST_INT)
8147 {
8148 var = operands[3];
8149 if (INTVAL (operands[2]) == 0)
8150 operands[3] = constm1_rtx, op = and_optab;
8151 else if (INTVAL (operands[2]) == -1)
8152 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8153 else
8154 return 0; /* FAIL */
e075ae69
RH
8155 }
8156 else if (GET_CODE (operands[3]) == CONST_INT)
8157 {
8158 var = operands[2];
8159 if (INTVAL (operands[3]) == 0)
8160 operands[2] = constm1_rtx, op = and_optab;
8161 else if (INTVAL (operands[3]) == -1)
8162 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8163 else
8164 return 0; /* FAIL */
e075ae69 8165 }
78a0d70c 8166 else
e075ae69
RH
8167 return 0; /* FAIL */
8168
8169 orig_out = operands[0];
8170 tmp = gen_reg_rtx (GET_MODE (orig_out));
8171 operands[0] = tmp;
8172
8173 /* Recurse to get the constant loaded. */
8174 if (ix86_expand_int_movcc (operands) == 0)
8175 return 0; /* FAIL */
8176
8177 /* Mask in the interesting variable. */
8178 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
8179 OPTAB_WIDEN);
8180 if (out != orig_out)
8181 emit_move_insn (orig_out, out);
8182
8183 return 1; /* DONE */
8184 }
8185
8186 /*
8187 * For comparison with above,
8188 *
8189 * movl cf,dest
8190 * movl ct,tmp
8191 * cmpl op1,op2
8192 * cmovcc tmp,dest
8193 *
8194 * Size 15.
8195 */
8196
8197 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
8198 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
8199 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
8200 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
8201
a1b8572c
JH
8202 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8203 {
8204 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8205 emit_move_insn (tmp, operands[3]);
8206 operands[3] = tmp;
8207 }
8208 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8209 {
8210 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8211 emit_move_insn (tmp, operands[2]);
8212 operands[2] = tmp;
8213 }
c9682caf
JH
8214 if (! register_operand (operands[2], VOIDmode)
8215 && ! register_operand (operands[3], VOIDmode))
8216 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
a1b8572c 8217
e075ae69
RH
8218 emit_insn (compare_seq);
8219 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8220 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8221 compare_op, operands[2],
8222 operands[3])));
a1b8572c
JH
8223 if (bypass_test)
8224 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8225 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8226 bypass_test,
8227 operands[3],
8228 operands[0])));
8229 if (second_test)
8230 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8231 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8232 second_test,
8233 operands[2],
8234 operands[0])));
e075ae69
RH
8235
8236 return 1; /* DONE */
e9a25f70 8237}
e075ae69 8238
32b5b1aa 8239int
e075ae69
RH
8240ix86_expand_fp_movcc (operands)
8241 rtx operands[];
32b5b1aa 8242{
e075ae69 8243 enum rtx_code code;
e075ae69 8244 rtx tmp;
a1b8572c 8245 rtx compare_op, second_test, bypass_test;
32b5b1aa 8246
0073023d
JH
8247 /* For SF/DFmode conditional moves based on comparisons
8248 in same mode, we may want to use SSE min/max instructions. */
8249 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
8250 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
8251 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
8252 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8253 && (!TARGET_IEEE_FP
8254 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
8255 /* We may be called from the post-reload splitter. */
8256 && (!REG_P (operands[0])
8257 || SSE_REG_P (operands[0])
52a661a6 8258 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
8259 {
8260 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8261 code = GET_CODE (operands[1]);
8262
8263 /* See if we have (cross) match between comparison operands and
8264 conditional move operands. */
8265 if (rtx_equal_p (operands[2], op1))
8266 {
8267 rtx tmp = op0;
8268 op0 = op1;
8269 op1 = tmp;
8270 code = reverse_condition_maybe_unordered (code);
8271 }
8272 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8273 {
8274 /* Check for min operation. */
8275 if (code == LT)
8276 {
8277 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8278 if (memory_operand (op0, VOIDmode))
8279 op0 = force_reg (GET_MODE (operands[0]), op0);
8280 if (GET_MODE (operands[0]) == SFmode)
8281 emit_insn (gen_minsf3 (operands[0], op0, op1));
8282 else
8283 emit_insn (gen_mindf3 (operands[0], op0, op1));
8284 return 1;
8285 }
8286 /* Check for max operation. */
8287 if (code == GT)
8288 {
8289 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8290 if (memory_operand (op0, VOIDmode))
8291 op0 = force_reg (GET_MODE (operands[0]), op0);
8292 if (GET_MODE (operands[0]) == SFmode)
8293 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8294 else
8295 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8296 return 1;
8297 }
8298 }
8299 /* Manage condition to be sse_comparison_operator. In case we are
8300 in non-ieee mode, try to canonicalize the destination operand
8301 to be first in the comparison - this helps reload to avoid extra
8302 moves. */
8303 if (!sse_comparison_operator (operands[1], VOIDmode)
8304 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8305 {
8306 rtx tmp = ix86_compare_op0;
8307 ix86_compare_op0 = ix86_compare_op1;
8308 ix86_compare_op1 = tmp;
8309 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8310 VOIDmode, ix86_compare_op0,
8311 ix86_compare_op1);
8312 }
8313 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
8314 move. We also don't support the NE comparison on SSE, so try to
8315 avoid it. */
037f20f1
JH
8316 if ((rtx_equal_p (operands[0], operands[3])
8317 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8318 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
8319 {
8320 rtx tmp = operands[2];
8321 operands[2] = operands[3];
92d0fb09 8322 operands[3] = tmp;
0073023d
JH
8323 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8324 (GET_CODE (operands[1])),
8325 VOIDmode, ix86_compare_op0,
8326 ix86_compare_op1);
8327 }
8328 if (GET_MODE (operands[0]) == SFmode)
8329 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8330 operands[2], operands[3],
8331 ix86_compare_op0, ix86_compare_op1));
8332 else
8333 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8334 operands[2], operands[3],
8335 ix86_compare_op0, ix86_compare_op1));
8336 return 1;
8337 }
8338
e075ae69 8339 /* The floating point conditional move instructions don't directly
0f290768 8340 support conditions resulting from a signed integer comparison. */
32b5b1aa 8341
e075ae69 8342 code = GET_CODE (operands[1]);
a1b8572c 8343 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
8344
8345 /* The floating point conditional move instructions don't directly
8346 support signed integer comparisons. */
8347
a1b8572c 8348 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 8349 {
a1b8572c
JH
8350 if (second_test != NULL || bypass_test != NULL)
8351 abort();
e075ae69 8352 tmp = gen_reg_rtx (QImode);
3a3677ff 8353 ix86_expand_setcc (code, tmp);
e075ae69
RH
8354 code = NE;
8355 ix86_compare_op0 = tmp;
8356 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
8357 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8358 }
8359 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8360 {
8361 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8362 emit_move_insn (tmp, operands[3]);
8363 operands[3] = tmp;
8364 }
8365 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8366 {
8367 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8368 emit_move_insn (tmp, operands[2]);
8369 operands[2] = tmp;
e075ae69 8370 }
e9a25f70 8371
e075ae69
RH
8372 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8373 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 8374 compare_op,
e075ae69
RH
8375 operands[2],
8376 operands[3])));
a1b8572c
JH
8377 if (bypass_test)
8378 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8379 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8380 bypass_test,
8381 operands[3],
8382 operands[0])));
8383 if (second_test)
8384 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8385 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8386 second_test,
8387 operands[2],
8388 operands[0])));
32b5b1aa 8389
e075ae69 8390 return 1;
32b5b1aa
SC
8391}
8392
2450a057
JH
8393/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8394 works for floating pointer parameters and nonoffsetable memories.
8395 For pushes, it returns just stack offsets; the values will be saved
8396 in the right order. Maximally three parts are generated. */
8397
2b589241 8398static int
2450a057
JH
8399ix86_split_to_parts (operand, parts, mode)
8400 rtx operand;
8401 rtx *parts;
8402 enum machine_mode mode;
32b5b1aa 8403{
26e5b205
JH
8404 int size;
8405
8406 if (!TARGET_64BIT)
8407 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8408 else
8409 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 8410
a7180f70
BS
8411 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8412 abort ();
2450a057
JH
8413 if (size < 2 || size > 3)
8414 abort ();
8415
d7a29404
JH
8416 /* Optimize constant pool reference to immediates. This is used by fp moves,
8417 that force all constants to memory to allow combining. */
8418
8419 if (GET_CODE (operand) == MEM
8420 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8421 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8422 operand = get_pool_constant (XEXP (operand, 0));
8423
2450a057 8424 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 8425 {
2450a057
JH
8426 /* The only non-offsetable memories we handle are pushes. */
8427 if (! push_operand (operand, VOIDmode))
8428 abort ();
8429
26e5b205
JH
8430 operand = copy_rtx (operand);
8431 PUT_MODE (operand, Pmode);
2450a057
JH
8432 parts[0] = parts[1] = parts[2] = operand;
8433 }
26e5b205 8434 else if (!TARGET_64BIT)
2450a057
JH
8435 {
8436 if (mode == DImode)
8437 split_di (&operand, 1, &parts[0], &parts[1]);
8438 else
e075ae69 8439 {
2450a057
JH
8440 if (REG_P (operand))
8441 {
8442 if (!reload_completed)
8443 abort ();
8444 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8445 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8446 if (size == 3)
8447 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8448 }
8449 else if (offsettable_memref_p (operand))
8450 {
f4ef873c 8451 operand = adjust_address (operand, SImode, 0);
2450a057 8452 parts[0] = operand;
b72f00af 8453 parts[1] = adjust_address (operand, SImode, 4);
2450a057 8454 if (size == 3)
b72f00af 8455 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
8456 }
8457 else if (GET_CODE (operand) == CONST_DOUBLE)
8458 {
8459 REAL_VALUE_TYPE r;
2b589241 8460 long l[4];
2450a057
JH
8461
8462 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8463 switch (mode)
8464 {
8465 case XFmode:
2b589241 8466 case TFmode:
2450a057
JH
8467 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8468 parts[2] = GEN_INT (l[2]);
8469 break;
8470 case DFmode:
8471 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8472 break;
8473 default:
8474 abort ();
8475 }
8476 parts[1] = GEN_INT (l[1]);
8477 parts[0] = GEN_INT (l[0]);
8478 }
8479 else
8480 abort ();
e075ae69 8481 }
2450a057 8482 }
26e5b205
JH
8483 else
8484 {
8485 if (mode == XFmode || mode == TFmode)
8486 {
8487 if (REG_P (operand))
8488 {
8489 if (!reload_completed)
8490 abort ();
8491 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8492 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8493 }
8494 else if (offsettable_memref_p (operand))
8495 {
b72f00af 8496 operand = adjust_address (operand, DImode, 0);
26e5b205 8497 parts[0] = operand;
b72f00af 8498 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
8499 }
8500 else if (GET_CODE (operand) == CONST_DOUBLE)
8501 {
8502 REAL_VALUE_TYPE r;
8503 long l[3];
8504
8505 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8506 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8507 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8508 if (HOST_BITS_PER_WIDE_INT >= 64)
8509 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
8510 else
8511 parts[0] = immed_double_const (l[0], l[1], DImode);
8512 parts[1] = GEN_INT (l[2]);
8513 }
8514 else
8515 abort ();
8516 }
8517 }
2450a057 8518
2b589241 8519 return size;
2450a057
JH
8520}
8521
8522/* Emit insns to perform a move or push of DI, DF, and XF values.
8523 Return false when normal moves are needed; true when all required
8524 insns have been emitted. Operands 2-4 contain the input values
8525 int the correct order; operands 5-7 contain the output values. */
8526
26e5b205
JH
8527void
8528ix86_split_long_move (operands)
8529 rtx operands[];
2450a057
JH
8530{
8531 rtx part[2][3];
26e5b205 8532 int nparts;
2450a057
JH
8533 int push = 0;
8534 int collisions = 0;
26e5b205
JH
8535 enum machine_mode mode = GET_MODE (operands[0]);
8536
8537 /* The DFmode expanders may ask us to move double.
8538 For 64bit target this is single move. By hiding the fact
8539 here we simplify i386.md splitters. */
8540 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8541 {
8542 /* Optimize constant pool reference to immediates. This is used by fp moves,
8543 that force all constants to memory to allow combining. */
8544
8545 if (GET_CODE (operands[1]) == MEM
8546 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8547 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8548 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8549 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
8550 {
8551 operands[0] = copy_rtx (operands[0]);
8552 PUT_MODE (operands[0], Pmode);
8553 }
26e5b205
JH
8554 else
8555 operands[0] = gen_lowpart (DImode, operands[0]);
8556 operands[1] = gen_lowpart (DImode, operands[1]);
8557 emit_move_insn (operands[0], operands[1]);
8558 return;
8559 }
2450a057 8560
2450a057
JH
8561 /* The only non-offsettable memory we handle is push. */
8562 if (push_operand (operands[0], VOIDmode))
8563 push = 1;
8564 else if (GET_CODE (operands[0]) == MEM
8565 && ! offsettable_memref_p (operands[0]))
8566 abort ();
8567
26e5b205
JH
8568 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8569 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
8570
8571 /* When emitting push, take care for source operands on the stack. */
8572 if (push && GET_CODE (operands[1]) == MEM
8573 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8574 {
26e5b205 8575 if (nparts == 3)
886cbb88
JH
8576 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8577 XEXP (part[1][2], 0));
8578 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8579 XEXP (part[1][1], 0));
2450a057
JH
8580 }
8581
0f290768 8582 /* We need to do copy in the right order in case an address register
2450a057
JH
8583 of the source overlaps the destination. */
8584 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8585 {
8586 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8587 collisions++;
8588 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8589 collisions++;
26e5b205 8590 if (nparts == 3
2450a057
JH
8591 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8592 collisions++;
8593
8594 /* Collision in the middle part can be handled by reordering. */
26e5b205 8595 if (collisions == 1 && nparts == 3
2450a057 8596 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 8597 {
2450a057
JH
8598 rtx tmp;
8599 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8600 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8601 }
e075ae69 8602
2450a057
JH
8603 /* If there are more collisions, we can't handle it by reordering.
8604 Do an lea to the last part and use only one colliding move. */
8605 else if (collisions > 1)
8606 {
8607 collisions = 1;
26e5b205 8608 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 8609 XEXP (part[1][0], 0)));
26e5b205
JH
8610 part[1][0] = change_address (part[1][0],
8611 TARGET_64BIT ? DImode : SImode,
8612 part[0][nparts - 1]);
b72f00af 8613 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 8614 if (nparts == 3)
b72f00af 8615 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
8616 }
8617 }
8618
8619 if (push)
8620 {
26e5b205 8621 if (!TARGET_64BIT)
2b589241 8622 {
26e5b205
JH
8623 if (nparts == 3)
8624 {
8625 /* We use only first 12 bytes of TFmode value, but for pushing we
8626 are required to adjust stack as if we were pushing real 16byte
8627 value. */
8628 if (mode == TFmode && !TARGET_64BIT)
8629 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8630 GEN_INT (-4)));
8631 emit_move_insn (part[0][2], part[1][2]);
8632 }
2b589241 8633 }
26e5b205
JH
8634 else
8635 {
8636 /* In 64bit mode we don't have 32bit push available. In case this is
8637 register, it is OK - we will just use larger counterpart. We also
8638 retype memory - these comes from attempt to avoid REX prefix on
8639 moving of second half of TFmode value. */
8640 if (GET_MODE (part[1][1]) == SImode)
8641 {
8642 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 8643 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
8644 else if (REG_P (part[1][1]))
8645 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8646 else
8647 abort();
886cbb88
JH
8648 if (GET_MODE (part[1][0]) == SImode)
8649 part[1][0] = part[1][1];
26e5b205
JH
8650 }
8651 }
8652 emit_move_insn (part[0][1], part[1][1]);
8653 emit_move_insn (part[0][0], part[1][0]);
8654 return;
2450a057
JH
8655 }
8656
8657 /* Choose correct order to not overwrite the source before it is copied. */
8658 if ((REG_P (part[0][0])
8659 && REG_P (part[1][1])
8660 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 8661 || (nparts == 3
2450a057
JH
8662 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8663 || (collisions > 0
8664 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8665 {
26e5b205 8666 if (nparts == 3)
2450a057 8667 {
26e5b205
JH
8668 operands[2] = part[0][2];
8669 operands[3] = part[0][1];
8670 operands[4] = part[0][0];
8671 operands[5] = part[1][2];
8672 operands[6] = part[1][1];
8673 operands[7] = part[1][0];
2450a057
JH
8674 }
8675 else
8676 {
26e5b205
JH
8677 operands[2] = part[0][1];
8678 operands[3] = part[0][0];
8679 operands[5] = part[1][1];
8680 operands[6] = part[1][0];
2450a057
JH
8681 }
8682 }
8683 else
8684 {
26e5b205 8685 if (nparts == 3)
2450a057 8686 {
26e5b205
JH
8687 operands[2] = part[0][0];
8688 operands[3] = part[0][1];
8689 operands[4] = part[0][2];
8690 operands[5] = part[1][0];
8691 operands[6] = part[1][1];
8692 operands[7] = part[1][2];
2450a057
JH
8693 }
8694 else
8695 {
26e5b205
JH
8696 operands[2] = part[0][0];
8697 operands[3] = part[0][1];
8698 operands[5] = part[1][0];
8699 operands[6] = part[1][1];
e075ae69
RH
8700 }
8701 }
26e5b205
JH
8702 emit_move_insn (operands[2], operands[5]);
8703 emit_move_insn (operands[3], operands[6]);
8704 if (nparts == 3)
8705 emit_move_insn (operands[4], operands[7]);
32b5b1aa 8706
26e5b205 8707 return;
32b5b1aa 8708}
32b5b1aa 8709
e075ae69
RH
8710void
8711ix86_split_ashldi (operands, scratch)
8712 rtx *operands, scratch;
32b5b1aa 8713{
e075ae69
RH
8714 rtx low[2], high[2];
8715 int count;
b985a30f 8716
e075ae69
RH
8717 if (GET_CODE (operands[2]) == CONST_INT)
8718 {
8719 split_di (operands, 2, low, high);
8720 count = INTVAL (operands[2]) & 63;
32b5b1aa 8721
e075ae69
RH
8722 if (count >= 32)
8723 {
8724 emit_move_insn (high[0], low[1]);
8725 emit_move_insn (low[0], const0_rtx);
b985a30f 8726
e075ae69
RH
8727 if (count > 32)
8728 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8729 }
8730 else
8731 {
8732 if (!rtx_equal_p (operands[0], operands[1]))
8733 emit_move_insn (operands[0], operands[1]);
8734 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8735 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8736 }
8737 }
8738 else
8739 {
8740 if (!rtx_equal_p (operands[0], operands[1]))
8741 emit_move_insn (operands[0], operands[1]);
b985a30f 8742
e075ae69 8743 split_di (operands, 1, low, high);
b985a30f 8744
e075ae69
RH
8745 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8746 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 8747
fe577e58 8748 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8749 {
fe577e58 8750 if (! no_new_pseudos)
e075ae69
RH
8751 scratch = force_reg (SImode, const0_rtx);
8752 else
8753 emit_move_insn (scratch, const0_rtx);
8754
8755 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8756 scratch));
8757 }
8758 else
8759 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8760 }
e9a25f70 8761}
32b5b1aa 8762
e075ae69
RH
8763void
8764ix86_split_ashrdi (operands, scratch)
8765 rtx *operands, scratch;
32b5b1aa 8766{
e075ae69
RH
8767 rtx low[2], high[2];
8768 int count;
32b5b1aa 8769
e075ae69
RH
8770 if (GET_CODE (operands[2]) == CONST_INT)
8771 {
8772 split_di (operands, 2, low, high);
8773 count = INTVAL (operands[2]) & 63;
32b5b1aa 8774
e075ae69
RH
8775 if (count >= 32)
8776 {
8777 emit_move_insn (low[0], high[1]);
32b5b1aa 8778
e075ae69
RH
8779 if (! reload_completed)
8780 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8781 else
8782 {
8783 emit_move_insn (high[0], low[0]);
8784 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8785 }
8786
8787 if (count > 32)
8788 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8789 }
8790 else
8791 {
8792 if (!rtx_equal_p (operands[0], operands[1]))
8793 emit_move_insn (operands[0], operands[1]);
8794 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8795 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8796 }
8797 }
8798 else
32b5b1aa 8799 {
e075ae69
RH
8800 if (!rtx_equal_p (operands[0], operands[1]))
8801 emit_move_insn (operands[0], operands[1]);
8802
8803 split_di (operands, 1, low, high);
8804
8805 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8806 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8807
fe577e58 8808 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8809 {
fe577e58 8810 if (! no_new_pseudos)
e075ae69
RH
8811 scratch = gen_reg_rtx (SImode);
8812 emit_move_insn (scratch, high[0]);
8813 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8814 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8815 scratch));
8816 }
8817 else
8818 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 8819 }
e075ae69 8820}
32b5b1aa 8821
e075ae69
RH
8822void
8823ix86_split_lshrdi (operands, scratch)
8824 rtx *operands, scratch;
8825{
8826 rtx low[2], high[2];
8827 int count;
32b5b1aa 8828
e075ae69 8829 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 8830 {
e075ae69
RH
8831 split_di (operands, 2, low, high);
8832 count = INTVAL (operands[2]) & 63;
8833
8834 if (count >= 32)
c7271385 8835 {
e075ae69
RH
8836 emit_move_insn (low[0], high[1]);
8837 emit_move_insn (high[0], const0_rtx);
32b5b1aa 8838
e075ae69
RH
8839 if (count > 32)
8840 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8841 }
8842 else
8843 {
8844 if (!rtx_equal_p (operands[0], operands[1]))
8845 emit_move_insn (operands[0], operands[1]);
8846 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8847 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8848 }
32b5b1aa 8849 }
e075ae69
RH
8850 else
8851 {
8852 if (!rtx_equal_p (operands[0], operands[1]))
8853 emit_move_insn (operands[0], operands[1]);
32b5b1aa 8854
e075ae69
RH
8855 split_di (operands, 1, low, high);
8856
8857 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8858 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8859
8860 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 8861 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8862 {
fe577e58 8863 if (! no_new_pseudos)
e075ae69
RH
8864 scratch = force_reg (SImode, const0_rtx);
8865 else
8866 emit_move_insn (scratch, const0_rtx);
8867
8868 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8869 scratch));
8870 }
8871 else
8872 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8873 }
32b5b1aa 8874}
3f803cd9 8875
0407c02b 8876/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
8877 it is aligned to VALUE bytes. If true, jump to the label. */
8878static rtx
8879ix86_expand_aligntest (variable, value)
8880 rtx variable;
8881 int value;
8882{
8883 rtx label = gen_label_rtx ();
8884 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8885 if (GET_MODE (variable) == DImode)
8886 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8887 else
8888 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8889 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8890 1, 0, label);
8891 return label;
8892}
8893
8894/* Adjust COUNTER by the VALUE. */
8895static void
8896ix86_adjust_counter (countreg, value)
8897 rtx countreg;
8898 HOST_WIDE_INT value;
8899{
8900 if (GET_MODE (countreg) == DImode)
8901 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8902 else
8903 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8904}
8905
8906/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 8907rtx
0945b39d
JH
8908ix86_zero_extend_to_Pmode (exp)
8909 rtx exp;
8910{
8911 rtx r;
8912 if (GET_MODE (exp) == VOIDmode)
8913 return force_reg (Pmode, exp);
8914 if (GET_MODE (exp) == Pmode)
8915 return copy_to_mode_reg (Pmode, exp);
8916 r = gen_reg_rtx (Pmode);
8917 emit_insn (gen_zero_extendsidi2 (r, exp));
8918 return r;
8919}
8920
8921/* Expand string move (memcpy) operation. Use i386 string operations when
8922 profitable. expand_clrstr contains similar code. */
8923int
8924ix86_expand_movstr (dst, src, count_exp, align_exp)
8925 rtx dst, src, count_exp, align_exp;
8926{
8927 rtx srcreg, destreg, countreg;
8928 enum machine_mode counter_mode;
8929 HOST_WIDE_INT align = 0;
8930 unsigned HOST_WIDE_INT count = 0;
8931 rtx insns;
8932
8933 start_sequence ();
8934
8935 if (GET_CODE (align_exp) == CONST_INT)
8936 align = INTVAL (align_exp);
8937
8938 /* This simple hack avoids all inlining code and simplifies code bellow. */
8939 if (!TARGET_ALIGN_STRINGOPS)
8940 align = 64;
8941
8942 if (GET_CODE (count_exp) == CONST_INT)
8943 count = INTVAL (count_exp);
8944
8945 /* Figure out proper mode for counter. For 32bits it is always SImode,
8946 for 64bits use SImode when possible, otherwise DImode.
8947 Set count to number of bytes copied when known at compile time. */
8948 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8949 || x86_64_zero_extended_value (count_exp))
8950 counter_mode = SImode;
8951 else
8952 counter_mode = DImode;
8953
8954 if (counter_mode != SImode && counter_mode != DImode)
8955 abort ();
8956
8957 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8958 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8959
8960 emit_insn (gen_cld ());
8961
8962 /* When optimizing for size emit simple rep ; movsb instruction for
8963 counts not divisible by 4. */
8964
8965 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8966 {
8967 countreg = ix86_zero_extend_to_Pmode (count_exp);
8968 if (TARGET_64BIT)
8969 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8970 destreg, srcreg, countreg));
8971 else
8972 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8973 destreg, srcreg, countreg));
8974 }
8975
8976 /* For constant aligned (or small unaligned) copies use rep movsl
8977 followed by code copying the rest. For PentiumPro ensure 8 byte
8978 alignment to allow rep movsl acceleration. */
8979
8980 else if (count != 0
8981 && (align >= 8
8982 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8983 || optimize_size || count < (unsigned int)64))
8984 {
8985 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
8986 if (count & ~(size - 1))
8987 {
8988 countreg = copy_to_mode_reg (counter_mode,
8989 GEN_INT ((count >> (size == 4 ? 2 : 3))
8990 & (TARGET_64BIT ? -1 : 0x3fffffff)));
8991 countreg = ix86_zero_extend_to_Pmode (countreg);
8992 if (size == 4)
8993 {
8994 if (TARGET_64BIT)
8995 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
8996 destreg, srcreg, countreg));
8997 else
8998 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
8999 destreg, srcreg, countreg));
9000 }
9001 else
9002 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9003 destreg, srcreg, countreg));
9004 }
9005 if (size == 8 && (count & 0x04))
9006 emit_insn (gen_strmovsi (destreg, srcreg));
9007 if (count & 0x02)
9008 emit_insn (gen_strmovhi (destreg, srcreg));
9009 if (count & 0x01)
9010 emit_insn (gen_strmovqi (destreg, srcreg));
9011 }
9012 /* The generic code based on the glibc implementation:
9013 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9014 allowing accelerated copying there)
9015 - copy the data using rep movsl
9016 - copy the rest. */
9017 else
9018 {
9019 rtx countreg2;
9020 rtx label = NULL;
9021
9022 /* In case we don't know anything about the alignment, default to
9023 library version, since it is usually equally fast and result in
9024 shorter code. */
9025 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9026 {
9027 end_sequence ();
9028 return 0;
9029 }
9030
9031 if (TARGET_SINGLE_STRINGOP)
9032 emit_insn (gen_cld ());
9033
9034 countreg2 = gen_reg_rtx (Pmode);
9035 countreg = copy_to_mode_reg (counter_mode, count_exp);
9036
9037 /* We don't use loops to align destination and to copy parts smaller
9038 than 4 bytes, because gcc is able to optimize such code better (in
9039 the case the destination or the count really is aligned, gcc is often
9040 able to predict the branches) and also it is friendlier to the
a4f31c00 9041 hardware branch prediction.
0945b39d
JH
9042
9043 Using loops is benefical for generic case, because we can
9044 handle small counts using the loops. Many CPUs (such as Athlon)
9045 have large REP prefix setup costs.
9046
9047 This is quite costy. Maybe we can revisit this decision later or
9048 add some customizability to this code. */
9049
9050 if (count == 0
9051 && align < (TARGET_PENTIUMPRO && (count == 0
9052 || count >= (unsigned int)260)
9053 ? 8 : UNITS_PER_WORD))
9054 {
9055 label = gen_label_rtx ();
9056 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9057 LEU, 0, counter_mode, 1, 0, label);
9058 }
9059 if (align <= 1)
9060 {
9061 rtx label = ix86_expand_aligntest (destreg, 1);
9062 emit_insn (gen_strmovqi (destreg, srcreg));
9063 ix86_adjust_counter (countreg, 1);
9064 emit_label (label);
9065 LABEL_NUSES (label) = 1;
9066 }
9067 if (align <= 2)
9068 {
9069 rtx label = ix86_expand_aligntest (destreg, 2);
9070 emit_insn (gen_strmovhi (destreg, srcreg));
9071 ix86_adjust_counter (countreg, 2);
9072 emit_label (label);
9073 LABEL_NUSES (label) = 1;
9074 }
9075 if (align <= 4
9076 && ((TARGET_PENTIUMPRO && (count == 0
9077 || count >= (unsigned int)260))
9078 || TARGET_64BIT))
9079 {
9080 rtx label = ix86_expand_aligntest (destreg, 4);
9081 emit_insn (gen_strmovsi (destreg, srcreg));
9082 ix86_adjust_counter (countreg, 4);
9083 emit_label (label);
9084 LABEL_NUSES (label) = 1;
9085 }
9086
9087 if (!TARGET_SINGLE_STRINGOP)
9088 emit_insn (gen_cld ());
9089 if (TARGET_64BIT)
9090 {
9091 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9092 GEN_INT (3)));
9093 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9094 destreg, srcreg, countreg2));
9095 }
9096 else
9097 {
9098 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9099 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9100 destreg, srcreg, countreg2));
9101 }
9102
9103 if (label)
9104 {
9105 emit_label (label);
9106 LABEL_NUSES (label) = 1;
9107 }
9108 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9109 emit_insn (gen_strmovsi (destreg, srcreg));
9110 if ((align <= 4 || count == 0) && TARGET_64BIT)
9111 {
9112 rtx label = ix86_expand_aligntest (countreg, 4);
9113 emit_insn (gen_strmovsi (destreg, srcreg));
9114 emit_label (label);
9115 LABEL_NUSES (label) = 1;
9116 }
9117 if (align > 2 && count != 0 && (count & 2))
9118 emit_insn (gen_strmovhi (destreg, srcreg));
9119 if (align <= 2 || count == 0)
9120 {
9121 rtx label = ix86_expand_aligntest (countreg, 2);
9122 emit_insn (gen_strmovhi (destreg, srcreg));
9123 emit_label (label);
9124 LABEL_NUSES (label) = 1;
9125 }
9126 if (align > 1 && count != 0 && (count & 1))
9127 emit_insn (gen_strmovqi (destreg, srcreg));
9128 if (align <= 1 || count == 0)
9129 {
9130 rtx label = ix86_expand_aligntest (countreg, 1);
9131 emit_insn (gen_strmovqi (destreg, srcreg));
9132 emit_label (label);
9133 LABEL_NUSES (label) = 1;
9134 }
9135 }
9136
9137 insns = get_insns ();
9138 end_sequence ();
9139
9140 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9141 emit_insns (insns);
9142 return 1;
9143}
9144
9145/* Expand string clear operation (bzero). Use i386 string operations when
9146 profitable. expand_movstr contains similar code. */
9147int
9148ix86_expand_clrstr (src, count_exp, align_exp)
9149 rtx src, count_exp, align_exp;
9150{
9151 rtx destreg, zeroreg, countreg;
9152 enum machine_mode counter_mode;
9153 HOST_WIDE_INT align = 0;
9154 unsigned HOST_WIDE_INT count = 0;
9155
9156 if (GET_CODE (align_exp) == CONST_INT)
9157 align = INTVAL (align_exp);
9158
9159 /* This simple hack avoids all inlining code and simplifies code bellow. */
9160 if (!TARGET_ALIGN_STRINGOPS)
9161 align = 32;
9162
9163 if (GET_CODE (count_exp) == CONST_INT)
9164 count = INTVAL (count_exp);
9165 /* Figure out proper mode for counter. For 32bits it is always SImode,
9166 for 64bits use SImode when possible, otherwise DImode.
9167 Set count to number of bytes copied when known at compile time. */
9168 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9169 || x86_64_zero_extended_value (count_exp))
9170 counter_mode = SImode;
9171 else
9172 counter_mode = DImode;
9173
9174 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9175
9176 emit_insn (gen_cld ());
9177
9178 /* When optimizing for size emit simple rep ; movsb instruction for
9179 counts not divisible by 4. */
9180
9181 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9182 {
9183 countreg = ix86_zero_extend_to_Pmode (count_exp);
9184 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9185 if (TARGET_64BIT)
9186 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9187 destreg, countreg));
9188 else
9189 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9190 destreg, countreg));
9191 }
9192 else if (count != 0
9193 && (align >= 8
9194 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9195 || optimize_size || count < (unsigned int)64))
9196 {
9197 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9198 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9199 if (count & ~(size - 1))
9200 {
9201 countreg = copy_to_mode_reg (counter_mode,
9202 GEN_INT ((count >> (size == 4 ? 2 : 3))
9203 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9204 countreg = ix86_zero_extend_to_Pmode (countreg);
9205 if (size == 4)
9206 {
9207 if (TARGET_64BIT)
9208 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9209 destreg, countreg));
9210 else
9211 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9212 destreg, countreg));
9213 }
9214 else
9215 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9216 destreg, countreg));
9217 }
9218 if (size == 8 && (count & 0x04))
9219 emit_insn (gen_strsetsi (destreg,
9220 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9221 if (count & 0x02)
9222 emit_insn (gen_strsethi (destreg,
9223 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9224 if (count & 0x01)
9225 emit_insn (gen_strsetqi (destreg,
9226 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9227 }
9228 else
9229 {
9230 rtx countreg2;
9231 rtx label = NULL;
9232
9233 /* In case we don't know anything about the alignment, default to
9234 library version, since it is usually equally fast and result in
9235 shorter code. */
9236 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9237 return 0;
9238
9239 if (TARGET_SINGLE_STRINGOP)
9240 emit_insn (gen_cld ());
9241
9242 countreg2 = gen_reg_rtx (Pmode);
9243 countreg = copy_to_mode_reg (counter_mode, count_exp);
9244 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9245
9246 if (count == 0
9247 && align < (TARGET_PENTIUMPRO && (count == 0
9248 || count >= (unsigned int)260)
9249 ? 8 : UNITS_PER_WORD))
9250 {
9251 label = gen_label_rtx ();
9252 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9253 LEU, 0, counter_mode, 1, 0, label);
9254 }
9255 if (align <= 1)
9256 {
9257 rtx label = ix86_expand_aligntest (destreg, 1);
9258 emit_insn (gen_strsetqi (destreg,
9259 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9260 ix86_adjust_counter (countreg, 1);
9261 emit_label (label);
9262 LABEL_NUSES (label) = 1;
9263 }
9264 if (align <= 2)
9265 {
9266 rtx label = ix86_expand_aligntest (destreg, 2);
9267 emit_insn (gen_strsethi (destreg,
9268 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9269 ix86_adjust_counter (countreg, 2);
9270 emit_label (label);
9271 LABEL_NUSES (label) = 1;
9272 }
9273 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9274 || count >= (unsigned int)260))
9275 {
9276 rtx label = ix86_expand_aligntest (destreg, 4);
9277 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9278 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9279 : zeroreg)));
9280 ix86_adjust_counter (countreg, 4);
9281 emit_label (label);
9282 LABEL_NUSES (label) = 1;
9283 }
9284
9285 if (!TARGET_SINGLE_STRINGOP)
9286 emit_insn (gen_cld ());
9287 if (TARGET_64BIT)
9288 {
9289 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9290 GEN_INT (3)));
9291 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9292 destreg, countreg2));
9293 }
9294 else
9295 {
9296 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9297 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9298 destreg, countreg2));
9299 }
9300
9301 if (label)
9302 {
9303 emit_label (label);
9304 LABEL_NUSES (label) = 1;
9305 }
9306 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9307 emit_insn (gen_strsetsi (destreg,
9308 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9309 if (TARGET_64BIT && (align <= 4 || count == 0))
9310 {
9311 rtx label = ix86_expand_aligntest (destreg, 2);
9312 emit_insn (gen_strsetsi (destreg,
9313 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9314 emit_label (label);
9315 LABEL_NUSES (label) = 1;
9316 }
9317 if (align > 2 && count != 0 && (count & 2))
9318 emit_insn (gen_strsethi (destreg,
9319 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9320 if (align <= 2 || count == 0)
9321 {
9322 rtx label = ix86_expand_aligntest (destreg, 2);
9323 emit_insn (gen_strsethi (destreg,
9324 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9325 emit_label (label);
9326 LABEL_NUSES (label) = 1;
9327 }
9328 if (align > 1 && count != 0 && (count & 1))
9329 emit_insn (gen_strsetqi (destreg,
9330 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9331 if (align <= 1 || count == 0)
9332 {
9333 rtx label = ix86_expand_aligntest (destreg, 1);
9334 emit_insn (gen_strsetqi (destreg,
9335 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9336 emit_label (label);
9337 LABEL_NUSES (label) = 1;
9338 }
9339 }
9340 return 1;
9341}
9342/* Expand strlen. */
9343int
9344ix86_expand_strlen (out, src, eoschar, align)
9345 rtx out, src, eoschar, align;
9346{
9347 rtx addr, scratch1, scratch2, scratch3, scratch4;
9348
9349 /* The generic case of strlen expander is long. Avoid it's
9350 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9351
9352 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9353 && !TARGET_INLINE_ALL_STRINGOPS
9354 && !optimize_size
9355 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9356 return 0;
9357
9358 addr = force_reg (Pmode, XEXP (src, 0));
9359 scratch1 = gen_reg_rtx (Pmode);
9360
9361 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9362 && !optimize_size)
9363 {
9364 /* Well it seems that some optimizer does not combine a call like
9365 foo(strlen(bar), strlen(bar));
9366 when the move and the subtraction is done here. It does calculate
9367 the length just once when these instructions are done inside of
9368 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9369 often used and I use one fewer register for the lifetime of
9370 output_strlen_unroll() this is better. */
9371
9372 emit_move_insn (out, addr);
9373
9374 ix86_expand_strlensi_unroll_1 (out, align);
9375
9376 /* strlensi_unroll_1 returns the address of the zero at the end of
9377 the string, like memchr(), so compute the length by subtracting
9378 the start address. */
9379 if (TARGET_64BIT)
9380 emit_insn (gen_subdi3 (out, out, addr));
9381 else
9382 emit_insn (gen_subsi3 (out, out, addr));
9383 }
9384 else
9385 {
9386 scratch2 = gen_reg_rtx (Pmode);
9387 scratch3 = gen_reg_rtx (Pmode);
9388 scratch4 = force_reg (Pmode, constm1_rtx);
9389
9390 emit_move_insn (scratch3, addr);
9391 eoschar = force_reg (QImode, eoschar);
9392
9393 emit_insn (gen_cld ());
9394 if (TARGET_64BIT)
9395 {
9396 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9397 align, scratch4, scratch3));
9398 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9399 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9400 }
9401 else
9402 {
9403 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9404 align, scratch4, scratch3));
9405 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9406 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9407 }
9408 }
9409 return 1;
9410}
9411
e075ae69
RH
9412/* Expand the appropriate insns for doing strlen if not just doing
9413 repnz; scasb
9414
9415 out = result, initialized with the start address
9416 align_rtx = alignment of the address.
9417 scratch = scratch register, initialized with the startaddress when
77ebd435 9418 not aligned, otherwise undefined
3f803cd9
SC
9419
9420 This is just the body. It needs the initialisations mentioned above and
9421 some address computing at the end. These things are done in i386.md. */
9422
0945b39d
JH
9423static void
9424ix86_expand_strlensi_unroll_1 (out, align_rtx)
9425 rtx out, align_rtx;
3f803cd9 9426{
e075ae69
RH
9427 int align;
9428 rtx tmp;
9429 rtx align_2_label = NULL_RTX;
9430 rtx align_3_label = NULL_RTX;
9431 rtx align_4_label = gen_label_rtx ();
9432 rtx end_0_label = gen_label_rtx ();
e075ae69 9433 rtx mem;
e2e52e1b 9434 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 9435 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
9436
9437 align = 0;
9438 if (GET_CODE (align_rtx) == CONST_INT)
9439 align = INTVAL (align_rtx);
3f803cd9 9440
e9a25f70 9441 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 9442
e9a25f70 9443 /* Is there a known alignment and is it less than 4? */
e075ae69 9444 if (align < 4)
3f803cd9 9445 {
0945b39d
JH
9446 rtx scratch1 = gen_reg_rtx (Pmode);
9447 emit_move_insn (scratch1, out);
e9a25f70 9448 /* Is there a known alignment and is it not 2? */
e075ae69 9449 if (align != 2)
3f803cd9 9450 {
e075ae69
RH
9451 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9452 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9453
9454 /* Leave just the 3 lower bits. */
0945b39d 9455 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
9456 NULL_RTX, 0, OPTAB_WIDEN);
9457
9076b9c1 9458 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 9459 Pmode, 1, 0, align_4_label);
9076b9c1 9460 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
0945b39d 9461 Pmode, 1, 0, align_2_label);
9076b9c1 9462 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
0945b39d 9463 Pmode, 1, 0, align_3_label);
3f803cd9
SC
9464 }
9465 else
9466 {
e9a25f70
JL
9467 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9468 check if is aligned to 4 - byte. */
e9a25f70 9469
0945b39d 9470 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
9471 NULL_RTX, 0, OPTAB_WIDEN);
9472
9076b9c1 9473 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 9474 Pmode, 1, 0, align_4_label);
3f803cd9
SC
9475 }
9476
e075ae69 9477 mem = gen_rtx_MEM (QImode, out);
e9a25f70 9478
e075ae69 9479 /* Now compare the bytes. */
e9a25f70 9480
0f290768 9481 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
9482 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9483 QImode, 1, 0, end_0_label);
3f803cd9 9484
0f290768 9485 /* Increment the address. */
0945b39d
JH
9486 if (TARGET_64BIT)
9487 emit_insn (gen_adddi3 (out, out, const1_rtx));
9488 else
9489 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 9490
e075ae69
RH
9491 /* Not needed with an alignment of 2 */
9492 if (align != 2)
9493 {
9494 emit_label (align_2_label);
3f803cd9 9495
9076b9c1
JH
9496 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9497 QImode, 1, 0, end_0_label);
e075ae69 9498
0945b39d
JH
9499 if (TARGET_64BIT)
9500 emit_insn (gen_adddi3 (out, out, const1_rtx));
9501 else
9502 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
9503
9504 emit_label (align_3_label);
9505 }
9506
9076b9c1
JH
9507 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9508 QImode, 1, 0, end_0_label);
e075ae69 9509
0945b39d
JH
9510 if (TARGET_64BIT)
9511 emit_insn (gen_adddi3 (out, out, const1_rtx));
9512 else
9513 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
9514 }
9515
e075ae69
RH
9516 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9517 align this loop. It gives only huge programs, but does not help to
9518 speed up. */
9519 emit_label (align_4_label);
3f803cd9 9520
e075ae69
RH
9521 mem = gen_rtx_MEM (SImode, out);
9522 emit_move_insn (scratch, mem);
0945b39d
JH
9523 if (TARGET_64BIT)
9524 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9525 else
9526 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 9527
e2e52e1b
JH
9528 /* This formula yields a nonzero result iff one of the bytes is zero.
9529 This saves three branches inside loop and many cycles. */
9530
9531 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9532 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9533 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0
AO
9534 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9535 GEN_INT (trunc_int_for_mode
9536 (0x80808080, SImode))));
9076b9c1
JH
9537 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
9538 SImode, 1, 0, align_4_label);
e2e52e1b
JH
9539
9540 if (TARGET_CMOVE)
9541 {
9542 rtx reg = gen_reg_rtx (SImode);
0945b39d 9543 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
9544 emit_move_insn (reg, tmpreg);
9545 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9546
0f290768 9547 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 9548 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9549 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9550 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9551 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9552 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
9553 reg,
9554 tmpreg)));
e2e52e1b 9555 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
9556 emit_insn (gen_rtx_SET (SImode, reg2,
9557 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
9558
9559 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9560 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9561 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 9562 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
9563 reg2,
9564 out)));
e2e52e1b
JH
9565
9566 }
9567 else
9568 {
9569 rtx end_2_label = gen_label_rtx ();
9570 /* Is zero in the first two bytes? */
9571
16189740 9572 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9573 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9574 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9575 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9576 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9577 pc_rtx);
9578 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9579 JUMP_LABEL (tmp) = end_2_label;
9580
0f290768 9581 /* Not in the first two. Move two bytes forward. */
e2e52e1b 9582 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
9583 if (TARGET_64BIT)
9584 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9585 else
9586 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
9587
9588 emit_label (end_2_label);
9589
9590 }
9591
0f290768 9592 /* Avoid branch in fixing the byte. */
e2e52e1b 9593 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 9594 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
9595 if (TARGET_64BIT)
9596 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9597 else
9598 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
9599
9600 emit_label (end_0_label);
9601}
9602\f
e075ae69
RH
9603/* Clear stack slot assignments remembered from previous functions.
9604 This is called from INIT_EXPANDERS once before RTL is emitted for each
9605 function. */
9606
36edd3cc
BS
9607static void
9608ix86_init_machine_status (p)
1526a060 9609 struct function *p;
e075ae69 9610{
37b15744
RH
9611 p->machine = (struct machine_function *)
9612 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
9613}
9614
1526a060
BS
9615/* Mark machine specific bits of P for GC. */
9616static void
9617ix86_mark_machine_status (p)
9618 struct function *p;
9619{
37b15744 9620 struct machine_function *machine = p->machine;
1526a060
BS
9621 enum machine_mode mode;
9622 int n;
9623
37b15744
RH
9624 if (! machine)
9625 return;
9626
1526a060
BS
9627 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9628 mode = (enum machine_mode) ((int) mode + 1))
9629 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
9630 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9631}
9632
9633static void
9634ix86_free_machine_status (p)
9635 struct function *p;
9636{
9637 free (p->machine);
9638 p->machine = NULL;
1526a060
BS
9639}
9640
e075ae69
RH
9641/* Return a MEM corresponding to a stack slot with mode MODE.
9642 Allocate a new slot if necessary.
9643
9644 The RTL for a function can have several slots available: N is
9645 which slot to use. */
9646
9647rtx
9648assign_386_stack_local (mode, n)
9649 enum machine_mode mode;
9650 int n;
9651{
9652 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9653 abort ();
9654
9655 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9656 ix86_stack_locals[(int) mode][n]
9657 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9658
9659 return ix86_stack_locals[(int) mode][n];
9660}
9661\f
9662/* Calculate the length of the memory address in the instruction
9663 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9664
9665static int
9666memory_address_length (addr)
9667 rtx addr;
9668{
9669 struct ix86_address parts;
9670 rtx base, index, disp;
9671 int len;
9672
9673 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
9674 || GET_CODE (addr) == POST_INC
9675 || GET_CODE (addr) == PRE_MODIFY
9676 || GET_CODE (addr) == POST_MODIFY)
e075ae69 9677 return 0;
3f803cd9 9678
e075ae69
RH
9679 if (! ix86_decompose_address (addr, &parts))
9680 abort ();
3f803cd9 9681
e075ae69
RH
9682 base = parts.base;
9683 index = parts.index;
9684 disp = parts.disp;
9685 len = 0;
3f803cd9 9686
e075ae69
RH
9687 /* Register Indirect. */
9688 if (base && !index && !disp)
9689 {
9690 /* Special cases: ebp and esp need the two-byte modrm form. */
9691 if (addr == stack_pointer_rtx
9692 || addr == arg_pointer_rtx
564d80f4
JH
9693 || addr == frame_pointer_rtx
9694 || addr == hard_frame_pointer_rtx)
e075ae69 9695 len = 1;
3f803cd9 9696 }
e9a25f70 9697
e075ae69
RH
9698 /* Direct Addressing. */
9699 else if (disp && !base && !index)
9700 len = 4;
9701
3f803cd9
SC
9702 else
9703 {
e075ae69
RH
9704 /* Find the length of the displacement constant. */
9705 if (disp)
9706 {
9707 if (GET_CODE (disp) == CONST_INT
9708 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9709 len = 1;
9710 else
9711 len = 4;
9712 }
3f803cd9 9713
e075ae69
RH
9714 /* An index requires the two-byte modrm form. */
9715 if (index)
9716 len += 1;
3f803cd9
SC
9717 }
9718
e075ae69
RH
9719 return len;
9720}
79325812 9721
6ef67412
JH
9722/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9723 expect that insn have 8bit immediate alternative. */
e075ae69 9724int
6ef67412 9725ix86_attr_length_immediate_default (insn, shortform)
e075ae69 9726 rtx insn;
6ef67412 9727 int shortform;
e075ae69 9728{
6ef67412
JH
9729 int len = 0;
9730 int i;
6c698a6d 9731 extract_insn_cached (insn);
6ef67412
JH
9732 for (i = recog_data.n_operands - 1; i >= 0; --i)
9733 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 9734 {
6ef67412 9735 if (len)
3071fab5 9736 abort ();
6ef67412
JH
9737 if (shortform
9738 && GET_CODE (recog_data.operand[i]) == CONST_INT
9739 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9740 len = 1;
9741 else
9742 {
9743 switch (get_attr_mode (insn))
9744 {
9745 case MODE_QI:
9746 len+=1;
9747 break;
9748 case MODE_HI:
9749 len+=2;
9750 break;
9751 case MODE_SI:
9752 len+=4;
9753 break;
14f73b5a
JH
9754 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9755 case MODE_DI:
9756 len+=4;
9757 break;
6ef67412
JH
9758 default:
9759 fatal_insn ("Unknown insn mode", insn);
9760 }
9761 }
3071fab5 9762 }
6ef67412
JH
9763 return len;
9764}
9765/* Compute default value for "length_address" attribute. */
9766int
9767ix86_attr_length_address_default (insn)
9768 rtx insn;
9769{
9770 int i;
6c698a6d 9771 extract_insn_cached (insn);
1ccbefce
RH
9772 for (i = recog_data.n_operands - 1; i >= 0; --i)
9773 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 9774 {
6ef67412 9775 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
9776 break;
9777 }
6ef67412 9778 return 0;
3f803cd9 9779}
e075ae69
RH
9780\f
9781/* Return the maximum number of instructions a cpu can issue. */
b657fc39 9782
c237e94a 9783static int
e075ae69 9784ix86_issue_rate ()
b657fc39 9785{
e075ae69 9786 switch (ix86_cpu)
b657fc39 9787 {
e075ae69
RH
9788 case PROCESSOR_PENTIUM:
9789 case PROCESSOR_K6:
9790 return 2;
79325812 9791
e075ae69 9792 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
9793 case PROCESSOR_PENTIUM4:
9794 case PROCESSOR_ATHLON:
e075ae69 9795 return 3;
b657fc39 9796
b657fc39 9797 default:
e075ae69 9798 return 1;
b657fc39 9799 }
b657fc39
L
9800}
9801
e075ae69
RH
9802/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9803 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 9804
e075ae69
RH
9805static int
9806ix86_flags_dependant (insn, dep_insn, insn_type)
9807 rtx insn, dep_insn;
9808 enum attr_type insn_type;
9809{
9810 rtx set, set2;
b657fc39 9811
e075ae69
RH
9812 /* Simplify the test for uninteresting insns. */
9813 if (insn_type != TYPE_SETCC
9814 && insn_type != TYPE_ICMOV
9815 && insn_type != TYPE_FCMOV
9816 && insn_type != TYPE_IBR)
9817 return 0;
b657fc39 9818
e075ae69
RH
9819 if ((set = single_set (dep_insn)) != 0)
9820 {
9821 set = SET_DEST (set);
9822 set2 = NULL_RTX;
9823 }
9824 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9825 && XVECLEN (PATTERN (dep_insn), 0) == 2
9826 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9827 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9828 {
9829 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9830 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9831 }
78a0d70c
ZW
9832 else
9833 return 0;
b657fc39 9834
78a0d70c
ZW
9835 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9836 return 0;
b657fc39 9837
78a0d70c
ZW
9838 /* This test is true if the dependant insn reads the flags but
9839 not any other potentially set register. */
9840 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9841 return 0;
9842
9843 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9844 return 0;
9845
9846 return 1;
e075ae69 9847}
b657fc39 9848
e075ae69
RH
9849/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9850 address with operands set by DEP_INSN. */
9851
9852static int
9853ix86_agi_dependant (insn, dep_insn, insn_type)
9854 rtx insn, dep_insn;
9855 enum attr_type insn_type;
9856{
9857 rtx addr;
9858
6ad48e84
JH
9859 if (insn_type == TYPE_LEA
9860 && TARGET_PENTIUM)
5fbdde42
RH
9861 {
9862 addr = PATTERN (insn);
9863 if (GET_CODE (addr) == SET)
9864 ;
9865 else if (GET_CODE (addr) == PARALLEL
9866 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9867 addr = XVECEXP (addr, 0, 0);
9868 else
9869 abort ();
9870 addr = SET_SRC (addr);
9871 }
e075ae69
RH
9872 else
9873 {
9874 int i;
6c698a6d 9875 extract_insn_cached (insn);
1ccbefce
RH
9876 for (i = recog_data.n_operands - 1; i >= 0; --i)
9877 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 9878 {
1ccbefce 9879 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
9880 goto found;
9881 }
9882 return 0;
9883 found:;
b657fc39
L
9884 }
9885
e075ae69 9886 return modified_in_p (addr, dep_insn);
b657fc39 9887}
a269a03c 9888
c237e94a 9889static int
e075ae69 9890ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
9891 rtx insn, link, dep_insn;
9892 int cost;
9893{
e075ae69 9894 enum attr_type insn_type, dep_insn_type;
6ad48e84 9895 enum attr_memory memory, dep_memory;
e075ae69 9896 rtx set, set2;
9b00189f 9897 int dep_insn_code_number;
a269a03c 9898
309ada50 9899 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 9900 if (REG_NOTE_KIND (link) != 0)
309ada50 9901 return 0;
a269a03c 9902
9b00189f
JH
9903 dep_insn_code_number = recog_memoized (dep_insn);
9904
e075ae69 9905 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 9906 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 9907 return cost;
a269a03c 9908
1c71e60e
JH
9909 insn_type = get_attr_type (insn);
9910 dep_insn_type = get_attr_type (dep_insn);
9b00189f 9911
a269a03c
JC
9912 switch (ix86_cpu)
9913 {
9914 case PROCESSOR_PENTIUM:
e075ae69
RH
9915 /* Address Generation Interlock adds a cycle of latency. */
9916 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9917 cost += 1;
9918
9919 /* ??? Compares pair with jump/setcc. */
9920 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9921 cost = 0;
9922
9923 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 9924 if (insn_type == TYPE_FMOV
e075ae69
RH
9925 && get_attr_memory (insn) == MEMORY_STORE
9926 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9927 cost += 1;
9928 break;
a269a03c 9929
e075ae69 9930 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
9931 memory = get_attr_memory (insn);
9932 dep_memory = get_attr_memory (dep_insn);
9933
0f290768 9934 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
9935 increase the cost here for non-imov insns. */
9936 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
9937 && dep_insn_type != TYPE_FMOV
9938 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
9939 cost += 1;
9940
9941 /* INT->FP conversion is expensive. */
9942 if (get_attr_fp_int_src (dep_insn))
9943 cost += 5;
9944
9945 /* There is one cycle extra latency between an FP op and a store. */
9946 if (insn_type == TYPE_FMOV
9947 && (set = single_set (dep_insn)) != NULL_RTX
9948 && (set2 = single_set (insn)) != NULL_RTX
9949 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9950 && GET_CODE (SET_DEST (set2)) == MEM)
9951 cost += 1;
6ad48e84
JH
9952
9953 /* Show ability of reorder buffer to hide latency of load by executing
9954 in parallel with previous instruction in case
9955 previous instruction is not needed to compute the address. */
9956 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9957 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9958 {
9959 /* Claim moves to take one cycle, as core can issue one load
9960 at time and the next load can start cycle later. */
9961 if (dep_insn_type == TYPE_IMOV
9962 || dep_insn_type == TYPE_FMOV)
9963 cost = 1;
9964 else if (cost > 1)
9965 cost--;
9966 }
e075ae69 9967 break;
a269a03c 9968
e075ae69 9969 case PROCESSOR_K6:
6ad48e84
JH
9970 memory = get_attr_memory (insn);
9971 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
9972 /* The esp dependency is resolved before the instruction is really
9973 finished. */
9974 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9975 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9976 return 1;
a269a03c 9977
0f290768 9978 /* Since we can't represent delayed latencies of load+operation,
e075ae69 9979 increase the cost here for non-imov insns. */
6ad48e84 9980 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
9981 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9982
9983 /* INT->FP conversion is expensive. */
9984 if (get_attr_fp_int_src (dep_insn))
9985 cost += 5;
6ad48e84
JH
9986
9987 /* Show ability of reorder buffer to hide latency of load by executing
9988 in parallel with previous instruction in case
9989 previous instruction is not needed to compute the address. */
9990 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9991 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9992 {
9993 /* Claim moves to take one cycle, as core can issue one load
9994 at time and the next load can start cycle later. */
9995 if (dep_insn_type == TYPE_IMOV
9996 || dep_insn_type == TYPE_FMOV)
9997 cost = 1;
9998 else if (cost > 2)
9999 cost -= 2;
10000 else
10001 cost = 1;
10002 }
a14003ee 10003 break;
e075ae69 10004
309ada50 10005 case PROCESSOR_ATHLON:
6ad48e84
JH
10006 memory = get_attr_memory (insn);
10007 dep_memory = get_attr_memory (dep_insn);
10008
10009 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
10010 {
10011 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10012 cost += 2;
10013 else
10014 cost += 3;
10015 }
6ad48e84
JH
10016 /* Show ability of reorder buffer to hide latency of load by executing
10017 in parallel with previous instruction in case
10018 previous instruction is not needed to compute the address. */
10019 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10020 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10021 {
10022 /* Claim moves to take one cycle, as core can issue one load
10023 at time and the next load can start cycle later. */
10024 if (dep_insn_type == TYPE_IMOV
10025 || dep_insn_type == TYPE_FMOV)
10026 cost = 0;
10027 else if (cost >= 3)
10028 cost -= 3;
10029 else
10030 cost = 0;
10031 }
309ada50 10032
a269a03c 10033 default:
a269a03c
JC
10034 break;
10035 }
10036
10037 return cost;
10038}
0a726ef1 10039
e075ae69
RH
10040static union
10041{
10042 struct ppro_sched_data
10043 {
10044 rtx decode[3];
10045 int issued_this_cycle;
10046 } ppro;
10047} ix86_sched_data;
0a726ef1 10048
e075ae69
RH
10049static int
10050ix86_safe_length (insn)
10051 rtx insn;
10052{
10053 if (recog_memoized (insn) >= 0)
10054 return get_attr_length(insn);
10055 else
10056 return 128;
10057}
0a726ef1 10058
e075ae69
RH
10059static int
10060ix86_safe_length_prefix (insn)
10061 rtx insn;
10062{
10063 if (recog_memoized (insn) >= 0)
10064 return get_attr_length(insn);
10065 else
10066 return 0;
10067}
10068
10069static enum attr_memory
10070ix86_safe_memory (insn)
10071 rtx insn;
10072{
10073 if (recog_memoized (insn) >= 0)
10074 return get_attr_memory(insn);
10075 else
10076 return MEMORY_UNKNOWN;
10077}
0a726ef1 10078
e075ae69
RH
10079static enum attr_pent_pair
10080ix86_safe_pent_pair (insn)
10081 rtx insn;
10082{
10083 if (recog_memoized (insn) >= 0)
10084 return get_attr_pent_pair(insn);
10085 else
10086 return PENT_PAIR_NP;
10087}
0a726ef1 10088
e075ae69
RH
10089static enum attr_ppro_uops
10090ix86_safe_ppro_uops (insn)
10091 rtx insn;
10092{
10093 if (recog_memoized (insn) >= 0)
10094 return get_attr_ppro_uops (insn);
10095 else
10096 return PPRO_UOPS_MANY;
10097}
0a726ef1 10098
e075ae69
RH
10099static void
10100ix86_dump_ppro_packet (dump)
10101 FILE *dump;
0a726ef1 10102{
e075ae69 10103 if (ix86_sched_data.ppro.decode[0])
0a726ef1 10104 {
e075ae69
RH
10105 fprintf (dump, "PPRO packet: %d",
10106 INSN_UID (ix86_sched_data.ppro.decode[0]));
10107 if (ix86_sched_data.ppro.decode[1])
10108 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10109 if (ix86_sched_data.ppro.decode[2])
10110 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10111 fputc ('\n', dump);
10112 }
10113}
0a726ef1 10114
e075ae69 10115/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 10116
c237e94a
ZW
10117static void
10118ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
10119 FILE *dump ATTRIBUTE_UNUSED;
10120 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 10121 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
10122{
10123 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10124}
10125
10126/* Shift INSN to SLOT, and shift everything else down. */
10127
10128static void
10129ix86_reorder_insn (insnp, slot)
10130 rtx *insnp, *slot;
10131{
10132 if (insnp != slot)
10133 {
10134 rtx insn = *insnp;
0f290768 10135 do
e075ae69
RH
10136 insnp[0] = insnp[1];
10137 while (++insnp != slot);
10138 *insnp = insn;
0a726ef1 10139 }
e075ae69
RH
10140}
10141
10142/* Find an instruction with given pairability and minimal amount of cycles
10143 lost by the fact that the CPU waits for both pipelines to finish before
10144 reading next instructions. Also take care that both instructions together
10145 can not exceed 7 bytes. */
10146
10147static rtx *
10148ix86_pent_find_pair (e_ready, ready, type, first)
10149 rtx *e_ready;
10150 rtx *ready;
10151 enum attr_pent_pair type;
10152 rtx first;
10153{
10154 int mincycles, cycles;
10155 enum attr_pent_pair tmp;
10156 enum attr_memory memory;
10157 rtx *insnp, *bestinsnp = NULL;
0a726ef1 10158
e075ae69
RH
10159 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10160 return NULL;
0a726ef1 10161
e075ae69
RH
10162 memory = ix86_safe_memory (first);
10163 cycles = result_ready_cost (first);
10164 mincycles = INT_MAX;
10165
10166 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10167 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10168 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 10169 {
e075ae69
RH
10170 enum attr_memory second_memory;
10171 int secondcycles, currentcycles;
10172
10173 second_memory = ix86_safe_memory (*insnp);
10174 secondcycles = result_ready_cost (*insnp);
10175 currentcycles = abs (cycles - secondcycles);
10176
10177 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 10178 {
e075ae69
RH
10179 /* Two read/modify/write instructions together takes two
10180 cycles longer. */
10181 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10182 currentcycles += 2;
0f290768 10183
e075ae69
RH
10184 /* Read modify/write instruction followed by read/modify
10185 takes one cycle longer. */
10186 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10187 && tmp != PENT_PAIR_UV
10188 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10189 currentcycles += 1;
6ec6d558 10190 }
e075ae69
RH
10191 if (currentcycles < mincycles)
10192 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 10193 }
0a726ef1 10194
e075ae69
RH
10195 return bestinsnp;
10196}
10197
78a0d70c 10198/* Subroutines of ix86_sched_reorder. */
e075ae69 10199
c6991660 10200static void
78a0d70c 10201ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 10202 rtx *ready;
78a0d70c 10203 rtx *e_ready;
e075ae69 10204{
78a0d70c 10205 enum attr_pent_pair pair1, pair2;
e075ae69 10206 rtx *insnp;
e075ae69 10207
78a0d70c
ZW
10208 /* This wouldn't be necessary if Haifa knew that static insn ordering
10209 is important to which pipe an insn is issued to. So we have to make
10210 some minor rearrangements. */
e075ae69 10211
78a0d70c
ZW
10212 pair1 = ix86_safe_pent_pair (*e_ready);
10213
10214 /* If the first insn is non-pairable, let it be. */
10215 if (pair1 == PENT_PAIR_NP)
10216 return;
10217
10218 pair2 = PENT_PAIR_NP;
10219 insnp = 0;
10220
10221 /* If the first insn is UV or PV pairable, search for a PU
10222 insn to go with. */
10223 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 10224 {
78a0d70c
ZW
10225 insnp = ix86_pent_find_pair (e_ready-1, ready,
10226 PENT_PAIR_PU, *e_ready);
10227 if (insnp)
10228 pair2 = PENT_PAIR_PU;
10229 }
e075ae69 10230
78a0d70c
ZW
10231 /* If the first insn is PU or UV pairable, search for a PV
10232 insn to go with. */
10233 if (pair2 == PENT_PAIR_NP
10234 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10235 {
10236 insnp = ix86_pent_find_pair (e_ready-1, ready,
10237 PENT_PAIR_PV, *e_ready);
10238 if (insnp)
10239 pair2 = PENT_PAIR_PV;
10240 }
e075ae69 10241
78a0d70c
ZW
10242 /* If the first insn is pairable, search for a UV
10243 insn to go with. */
10244 if (pair2 == PENT_PAIR_NP)
10245 {
10246 insnp = ix86_pent_find_pair (e_ready-1, ready,
10247 PENT_PAIR_UV, *e_ready);
10248 if (insnp)
10249 pair2 = PENT_PAIR_UV;
10250 }
e075ae69 10251
78a0d70c
ZW
10252 if (pair2 == PENT_PAIR_NP)
10253 return;
e075ae69 10254
78a0d70c
ZW
10255 /* Found something! Decide if we need to swap the order. */
10256 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10257 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10258 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10259 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10260 ix86_reorder_insn (insnp, e_ready);
10261 else
10262 ix86_reorder_insn (insnp, e_ready - 1);
10263}
e075ae69 10264
c6991660 10265static void
78a0d70c
ZW
10266ix86_sched_reorder_ppro (ready, e_ready)
10267 rtx *ready;
10268 rtx *e_ready;
10269{
10270 rtx decode[3];
10271 enum attr_ppro_uops cur_uops;
10272 int issued_this_cycle;
10273 rtx *insnp;
10274 int i;
e075ae69 10275
0f290768 10276 /* At this point .ppro.decode contains the state of the three
78a0d70c 10277 decoders from last "cycle". That is, those insns that were
0f290768 10278 actually independent. But here we're scheduling for the
78a0d70c
ZW
10279 decoder, and we may find things that are decodable in the
10280 same cycle. */
e075ae69 10281
0f290768 10282 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 10283 issued_this_cycle = 0;
e075ae69 10284
78a0d70c
ZW
10285 insnp = e_ready;
10286 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 10287
78a0d70c
ZW
10288 /* If the decoders are empty, and we've a complex insn at the
10289 head of the priority queue, let it issue without complaint. */
10290 if (decode[0] == NULL)
10291 {
10292 if (cur_uops == PPRO_UOPS_MANY)
10293 {
10294 decode[0] = *insnp;
10295 goto ppro_done;
10296 }
10297
10298 /* Otherwise, search for a 2-4 uop unsn to issue. */
10299 while (cur_uops != PPRO_UOPS_FEW)
10300 {
10301 if (insnp == ready)
10302 break;
10303 cur_uops = ix86_safe_ppro_uops (*--insnp);
10304 }
10305
10306 /* If so, move it to the head of the line. */
10307 if (cur_uops == PPRO_UOPS_FEW)
10308 ix86_reorder_insn (insnp, e_ready);
0a726ef1 10309
78a0d70c
ZW
10310 /* Issue the head of the queue. */
10311 issued_this_cycle = 1;
10312 decode[0] = *e_ready--;
10313 }
fb693d44 10314
78a0d70c
ZW
10315 /* Look for simple insns to fill in the other two slots. */
10316 for (i = 1; i < 3; ++i)
10317 if (decode[i] == NULL)
10318 {
10319 if (ready >= e_ready)
10320 goto ppro_done;
fb693d44 10321
e075ae69
RH
10322 insnp = e_ready;
10323 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
10324 while (cur_uops != PPRO_UOPS_ONE)
10325 {
10326 if (insnp == ready)
10327 break;
10328 cur_uops = ix86_safe_ppro_uops (*--insnp);
10329 }
fb693d44 10330
78a0d70c
ZW
10331 /* Found one. Move it to the head of the queue and issue it. */
10332 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 10333 {
78a0d70c
ZW
10334 ix86_reorder_insn (insnp, e_ready);
10335 decode[i] = *e_ready--;
10336 issued_this_cycle++;
10337 continue;
10338 }
fb693d44 10339
78a0d70c
ZW
10340 /* ??? Didn't find one. Ideally, here we would do a lazy split
10341 of 2-uop insns, issue one and queue the other. */
10342 }
fb693d44 10343
78a0d70c
ZW
10344 ppro_done:
10345 if (issued_this_cycle == 0)
10346 issued_this_cycle = 1;
10347 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10348}
fb693d44 10349
0f290768 10350/* We are about to being issuing insns for this clock cycle.
78a0d70c 10351 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
10352static int
10353ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
10354 FILE *dump ATTRIBUTE_UNUSED;
10355 int sched_verbose ATTRIBUTE_UNUSED;
10356 rtx *ready;
c237e94a 10357 int *n_readyp;
78a0d70c
ZW
10358 int clock_var ATTRIBUTE_UNUSED;
10359{
c237e94a 10360 int n_ready = *n_readyp;
78a0d70c 10361 rtx *e_ready = ready + n_ready - 1;
fb693d44 10362
78a0d70c
ZW
10363 if (n_ready < 2)
10364 goto out;
e075ae69 10365
78a0d70c
ZW
10366 switch (ix86_cpu)
10367 {
10368 default:
10369 break;
e075ae69 10370
78a0d70c
ZW
10371 case PROCESSOR_PENTIUM:
10372 ix86_sched_reorder_pentium (ready, e_ready);
10373 break;
e075ae69 10374
78a0d70c
ZW
10375 case PROCESSOR_PENTIUMPRO:
10376 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 10377 break;
fb693d44
RH
10378 }
10379
e075ae69
RH
10380out:
10381 return ix86_issue_rate ();
10382}
fb693d44 10383
e075ae69
RH
10384/* We are about to issue INSN. Return the number of insns left on the
10385 ready queue that can be issued this cycle. */
b222082e 10386
c237e94a 10387static int
e075ae69
RH
10388ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10389 FILE *dump;
10390 int sched_verbose;
10391 rtx insn;
10392 int can_issue_more;
10393{
10394 int i;
10395 switch (ix86_cpu)
fb693d44 10396 {
e075ae69
RH
10397 default:
10398 return can_issue_more - 1;
fb693d44 10399
e075ae69
RH
10400 case PROCESSOR_PENTIUMPRO:
10401 {
10402 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 10403
e075ae69
RH
10404 if (uops == PPRO_UOPS_MANY)
10405 {
10406 if (sched_verbose)
10407 ix86_dump_ppro_packet (dump);
10408 ix86_sched_data.ppro.decode[0] = insn;
10409 ix86_sched_data.ppro.decode[1] = NULL;
10410 ix86_sched_data.ppro.decode[2] = NULL;
10411 if (sched_verbose)
10412 ix86_dump_ppro_packet (dump);
10413 ix86_sched_data.ppro.decode[0] = NULL;
10414 }
10415 else if (uops == PPRO_UOPS_FEW)
10416 {
10417 if (sched_verbose)
10418 ix86_dump_ppro_packet (dump);
10419 ix86_sched_data.ppro.decode[0] = insn;
10420 ix86_sched_data.ppro.decode[1] = NULL;
10421 ix86_sched_data.ppro.decode[2] = NULL;
10422 }
10423 else
10424 {
10425 for (i = 0; i < 3; ++i)
10426 if (ix86_sched_data.ppro.decode[i] == NULL)
10427 {
10428 ix86_sched_data.ppro.decode[i] = insn;
10429 break;
10430 }
10431 if (i == 3)
10432 abort ();
10433 if (i == 2)
10434 {
10435 if (sched_verbose)
10436 ix86_dump_ppro_packet (dump);
10437 ix86_sched_data.ppro.decode[0] = NULL;
10438 ix86_sched_data.ppro.decode[1] = NULL;
10439 ix86_sched_data.ppro.decode[2] = NULL;
10440 }
10441 }
10442 }
10443 return --ix86_sched_data.ppro.issued_this_cycle;
10444 }
fb693d44 10445}
a7180f70 10446\f
0e4970d7
RK
10447/* Walk through INSNS and look for MEM references whose address is DSTREG or
10448 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10449 appropriate. */
10450
10451void
10452ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10453 rtx insns;
10454 rtx dstref, srcref, dstreg, srcreg;
10455{
10456 rtx insn;
10457
10458 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10459 if (INSN_P (insn))
10460 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10461 dstreg, srcreg);
10462}
10463
10464/* Subroutine of above to actually do the updating by recursively walking
10465 the rtx. */
10466
10467static void
10468ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10469 rtx x;
10470 rtx dstref, srcref, dstreg, srcreg;
10471{
10472 enum rtx_code code = GET_CODE (x);
10473 const char *format_ptr = GET_RTX_FORMAT (code);
10474 int i, j;
10475
10476 if (code == MEM && XEXP (x, 0) == dstreg)
10477 MEM_COPY_ATTRIBUTES (x, dstref);
10478 else if (code == MEM && XEXP (x, 0) == srcreg)
10479 MEM_COPY_ATTRIBUTES (x, srcref);
10480
10481 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10482 {
10483 if (*format_ptr == 'e')
10484 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10485 dstreg, srcreg);
10486 else if (*format_ptr == 'E')
10487 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 10488 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
10489 dstreg, srcreg);
10490 }
10491}
10492\f
a7180f70
BS
10493/* Compute the alignment given to a constant that is being placed in memory.
10494 EXP is the constant and ALIGN is the alignment that the object would
10495 ordinarily have.
10496 The value of this function is used instead of that alignment to align
10497 the object. */
10498
10499int
10500ix86_constant_alignment (exp, align)
10501 tree exp;
10502 int align;
10503{
10504 if (TREE_CODE (exp) == REAL_CST)
10505 {
10506 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10507 return 64;
10508 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10509 return 128;
10510 }
10511 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10512 && align < 256)
10513 return 256;
10514
10515 return align;
10516}
10517
10518/* Compute the alignment for a static variable.
10519 TYPE is the data type, and ALIGN is the alignment that
10520 the object would ordinarily have. The value of this function is used
10521 instead of that alignment to align the object. */
10522
10523int
10524ix86_data_alignment (type, align)
10525 tree type;
10526 int align;
10527{
10528 if (AGGREGATE_TYPE_P (type)
10529 && TYPE_SIZE (type)
10530 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10531 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10532 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10533 return 256;
10534
0d7d98ee
JH
10535 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10536 to 16byte boundary. */
10537 if (TARGET_64BIT)
10538 {
10539 if (AGGREGATE_TYPE_P (type)
10540 && TYPE_SIZE (type)
10541 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10542 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10543 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10544 return 128;
10545 }
10546
a7180f70
BS
10547 if (TREE_CODE (type) == ARRAY_TYPE)
10548 {
10549 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10550 return 64;
10551 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10552 return 128;
10553 }
10554 else if (TREE_CODE (type) == COMPLEX_TYPE)
10555 {
0f290768 10556
a7180f70
BS
10557 if (TYPE_MODE (type) == DCmode && align < 64)
10558 return 64;
10559 if (TYPE_MODE (type) == XCmode && align < 128)
10560 return 128;
10561 }
10562 else if ((TREE_CODE (type) == RECORD_TYPE
10563 || TREE_CODE (type) == UNION_TYPE
10564 || TREE_CODE (type) == QUAL_UNION_TYPE)
10565 && TYPE_FIELDS (type))
10566 {
10567 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10568 return 64;
10569 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10570 return 128;
10571 }
10572 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10573 || TREE_CODE (type) == INTEGER_TYPE)
10574 {
10575 if (TYPE_MODE (type) == DFmode && align < 64)
10576 return 64;
10577 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10578 return 128;
10579 }
10580
10581 return align;
10582}
10583
10584/* Compute the alignment for a local variable.
10585 TYPE is the data type, and ALIGN is the alignment that
10586 the object would ordinarily have. The value of this macro is used
10587 instead of that alignment to align the object. */
10588
10589int
10590ix86_local_alignment (type, align)
10591 tree type;
10592 int align;
10593{
0d7d98ee
JH
10594 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10595 to 16byte boundary. */
10596 if (TARGET_64BIT)
10597 {
10598 if (AGGREGATE_TYPE_P (type)
10599 && TYPE_SIZE (type)
10600 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10601 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10602 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10603 return 128;
10604 }
a7180f70
BS
10605 if (TREE_CODE (type) == ARRAY_TYPE)
10606 {
10607 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10608 return 64;
10609 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10610 return 128;
10611 }
10612 else if (TREE_CODE (type) == COMPLEX_TYPE)
10613 {
10614 if (TYPE_MODE (type) == DCmode && align < 64)
10615 return 64;
10616 if (TYPE_MODE (type) == XCmode && align < 128)
10617 return 128;
10618 }
10619 else if ((TREE_CODE (type) == RECORD_TYPE
10620 || TREE_CODE (type) == UNION_TYPE
10621 || TREE_CODE (type) == QUAL_UNION_TYPE)
10622 && TYPE_FIELDS (type))
10623 {
10624 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10625 return 64;
10626 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10627 return 128;
10628 }
10629 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10630 || TREE_CODE (type) == INTEGER_TYPE)
10631 {
0f290768 10632
a7180f70
BS
10633 if (TYPE_MODE (type) == DFmode && align < 64)
10634 return 64;
10635 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10636 return 128;
10637 }
10638 return align;
10639}
0ed08620
JH
10640\f
10641/* Emit RTL insns to initialize the variable parts of a trampoline.
10642 FNADDR is an RTX for the address of the function's pure code.
10643 CXT is an RTX for the static chain value for the function. */
10644void
10645x86_initialize_trampoline (tramp, fnaddr, cxt)
10646 rtx tramp, fnaddr, cxt;
10647{
10648 if (!TARGET_64BIT)
10649 {
10650 /* Compute offset from the end of the jmp to the target function. */
10651 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10652 plus_constant (tramp, 10),
10653 NULL_RTX, 1, OPTAB_DIRECT);
10654 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10655 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10656 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10657 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10658 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10659 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10660 }
10661 else
10662 {
10663 int offset = 0;
10664 /* Try to load address using shorter movl instead of movabs.
10665 We may want to support movq for kernel mode, but kernel does not use
10666 trampolines at the moment. */
10667 if (x86_64_zero_extended_value (fnaddr))
10668 {
10669 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10670 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10671 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10672 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10673 gen_lowpart (SImode, fnaddr));
10674 offset += 6;
10675 }
10676 else
10677 {
10678 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10679 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10680 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10681 fnaddr);
10682 offset += 10;
10683 }
10684 /* Load static chain using movabs to r10. */
10685 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10686 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10687 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10688 cxt);
10689 offset += 10;
10690 /* Jump to the r11 */
10691 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10692 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10693 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10694 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
10695 offset += 3;
10696 if (offset > TRAMPOLINE_SIZE)
10697 abort();
10698 }
10699}
eeb06b1b
BS
10700\f
10701#define def_builtin(MASK, NAME, TYPE, CODE) \
10702do { \
10703 if ((MASK) & target_flags) \
10704 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10705} while (0)
bd793c65 10706
bd793c65
BS
10707struct builtin_description
10708{
eeb06b1b 10709 unsigned int mask;
bd793c65
BS
10710 enum insn_code icode;
10711 const char * name;
10712 enum ix86_builtins code;
10713 enum rtx_code comparison;
10714 unsigned int flag;
10715};
10716
10717static struct builtin_description bdesc_comi[] =
10718{
eeb06b1b
BS
10719 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10720 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10721 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10722 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10723 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10724 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10725 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10726 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10727 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10728 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10729 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10730 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
bd793c65
BS
10731};
10732
10733static struct builtin_description bdesc_2arg[] =
10734{
10735 /* SSE */
eeb06b1b
BS
10736 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10737 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10738 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10739 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10740 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10741 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10742 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10743 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10744
10745 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10746 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10747 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10748 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10749 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10750 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10751 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10752 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10753 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10754 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10755 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10756 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10757 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10758 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10759 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10760 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10761 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10762 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10763 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10764 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10765 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10766 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10767 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10768 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10769
10770 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10771 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10772 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10773 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10774
10775 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10776 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10777 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10778 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10779
10780 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10781 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10782 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10783 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10784 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
10785
10786 /* MMX */
eeb06b1b
BS
10787 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10788 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10789 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10790 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10791 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10792 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10793
10794 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10795 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10796 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10797 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10798 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10799 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10800 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10801 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10802
10803 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10804 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
47f339cf 10805 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
10806
10807 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10808 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10809 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10810 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10811
47f339cf
BS
10812 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10813 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
10814
10815 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10816 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10817 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10818 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10819 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10820 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10821
47f339cf
BS
10822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10823 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10824 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10825 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
10826
10827 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10828 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10829 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10830 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10831 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10832 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
10833
10834 /* Special. */
eeb06b1b
BS
10835 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10836 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10837 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10838
10839 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10840 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10841
10842 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10843 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10844 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10845 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10846 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10847 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10848
10849 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10850 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10851 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10852 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10853 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10854 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10855
10856 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10857 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10858 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10859 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10860
10861 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10862 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
bd793c65
BS
10863
10864};
10865
10866static struct builtin_description bdesc_1arg[] =
10867{
47f339cf 10868 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
eeb06b1b 10869 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
bd793c65 10870
eeb06b1b
BS
10871 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10872 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10873 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
bd793c65 10874
eeb06b1b
BS
10875 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10876 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10877 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10878 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
bd793c65
BS
10879
10880};
10881
f6155fda
SS
10882void
10883ix86_init_builtins ()
10884{
10885 if (TARGET_MMX)
10886 ix86_init_mmx_sse_builtins ();
10887}
10888
10889/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
10890 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10891 builtins. */
10892void
f6155fda 10893ix86_init_mmx_sse_builtins ()
bd793c65
BS
10894{
10895 struct builtin_description * d;
77ebd435 10896 size_t i;
cbd5937a 10897 tree endlink = void_list_node;
bd793c65
BS
10898
10899 tree pchar_type_node = build_pointer_type (char_type_node);
10900 tree pfloat_type_node = build_pointer_type (float_type_node);
10901 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10902 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10903
10904 /* Comparisons. */
10905 tree int_ftype_v4sf_v4sf
10906 = build_function_type (integer_type_node,
10907 tree_cons (NULL_TREE, V4SF_type_node,
10908 tree_cons (NULL_TREE,
10909 V4SF_type_node,
10910 endlink)));
10911 tree v4si_ftype_v4sf_v4sf
10912 = build_function_type (V4SI_type_node,
10913 tree_cons (NULL_TREE, V4SF_type_node,
10914 tree_cons (NULL_TREE,
10915 V4SF_type_node,
10916 endlink)));
10917 /* MMX/SSE/integer conversions. */
bd793c65
BS
10918 tree int_ftype_v4sf
10919 = build_function_type (integer_type_node,
10920 tree_cons (NULL_TREE, V4SF_type_node,
10921 endlink));
10922 tree int_ftype_v8qi
10923 = build_function_type (integer_type_node,
10924 tree_cons (NULL_TREE, V8QI_type_node,
10925 endlink));
10926 tree int_ftype_v2si
10927 = build_function_type (integer_type_node,
10928 tree_cons (NULL_TREE, V2SI_type_node,
10929 endlink));
10930 tree v2si_ftype_int
10931 = build_function_type (V2SI_type_node,
10932 tree_cons (NULL_TREE, integer_type_node,
10933 endlink));
10934 tree v4sf_ftype_v4sf_int
21e1b5f1 10935 = build_function_type (V4SF_type_node,
bd793c65
BS
10936 tree_cons (NULL_TREE, V4SF_type_node,
10937 tree_cons (NULL_TREE, integer_type_node,
10938 endlink)));
10939 tree v4sf_ftype_v4sf_v2si
10940 = build_function_type (V4SF_type_node,
10941 tree_cons (NULL_TREE, V4SF_type_node,
10942 tree_cons (NULL_TREE, V2SI_type_node,
10943 endlink)));
10944 tree int_ftype_v4hi_int
10945 = build_function_type (integer_type_node,
10946 tree_cons (NULL_TREE, V4HI_type_node,
10947 tree_cons (NULL_TREE, integer_type_node,
10948 endlink)));
10949 tree v4hi_ftype_v4hi_int_int
332316cd 10950 = build_function_type (V4HI_type_node,
bd793c65
BS
10951 tree_cons (NULL_TREE, V4HI_type_node,
10952 tree_cons (NULL_TREE, integer_type_node,
10953 tree_cons (NULL_TREE,
10954 integer_type_node,
10955 endlink))));
10956 /* Miscellaneous. */
10957 tree v8qi_ftype_v4hi_v4hi
10958 = build_function_type (V8QI_type_node,
10959 tree_cons (NULL_TREE, V4HI_type_node,
10960 tree_cons (NULL_TREE, V4HI_type_node,
10961 endlink)));
10962 tree v4hi_ftype_v2si_v2si
10963 = build_function_type (V4HI_type_node,
10964 tree_cons (NULL_TREE, V2SI_type_node,
10965 tree_cons (NULL_TREE, V2SI_type_node,
10966 endlink)));
10967 tree v4sf_ftype_v4sf_v4sf_int
10968 = build_function_type (V4SF_type_node,
10969 tree_cons (NULL_TREE, V4SF_type_node,
10970 tree_cons (NULL_TREE, V4SF_type_node,
10971 tree_cons (NULL_TREE,
10972 integer_type_node,
10973 endlink))));
10974 tree v4hi_ftype_v8qi_v8qi
10975 = build_function_type (V4HI_type_node,
10976 tree_cons (NULL_TREE, V8QI_type_node,
10977 tree_cons (NULL_TREE, V8QI_type_node,
10978 endlink)));
10979 tree v2si_ftype_v4hi_v4hi
10980 = build_function_type (V2SI_type_node,
10981 tree_cons (NULL_TREE, V4HI_type_node,
10982 tree_cons (NULL_TREE, V4HI_type_node,
10983 endlink)));
10984 tree v4hi_ftype_v4hi_int
10985 = build_function_type (V4HI_type_node,
10986 tree_cons (NULL_TREE, V4HI_type_node,
10987 tree_cons (NULL_TREE, integer_type_node,
10988 endlink)));
bd793c65
BS
10989 tree v4hi_ftype_v4hi_di
10990 = build_function_type (V4HI_type_node,
10991 tree_cons (NULL_TREE, V4HI_type_node,
10992 tree_cons (NULL_TREE,
10993 long_long_integer_type_node,
10994 endlink)));
10995 tree v2si_ftype_v2si_di
10996 = build_function_type (V2SI_type_node,
10997 tree_cons (NULL_TREE, V2SI_type_node,
10998 tree_cons (NULL_TREE,
10999 long_long_integer_type_node,
11000 endlink)));
11001 tree void_ftype_void
11002 = build_function_type (void_type_node, endlink);
11003 tree void_ftype_pchar_int
11004 = build_function_type (void_type_node,
11005 tree_cons (NULL_TREE, pchar_type_node,
11006 tree_cons (NULL_TREE, integer_type_node,
11007 endlink)));
11008 tree void_ftype_unsigned
11009 = build_function_type (void_type_node,
11010 tree_cons (NULL_TREE, unsigned_type_node,
11011 endlink));
11012 tree unsigned_ftype_void
11013 = build_function_type (unsigned_type_node, endlink);
11014 tree di_ftype_void
11015 = build_function_type (long_long_unsigned_type_node, endlink);
11016 tree ti_ftype_void
11017 = build_function_type (intTI_type_node, endlink);
11018 tree v2si_ftype_v4sf
11019 = build_function_type (V2SI_type_node,
11020 tree_cons (NULL_TREE, V4SF_type_node,
11021 endlink));
11022 /* Loads/stores. */
11023 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11024 tree_cons (NULL_TREE, V8QI_type_node,
11025 tree_cons (NULL_TREE,
11026 pchar_type_node,
11027 endlink)));
11028 tree void_ftype_v8qi_v8qi_pchar
11029 = build_function_type (void_type_node, maskmovq_args);
11030 tree v4sf_ftype_pfloat
11031 = build_function_type (V4SF_type_node,
11032 tree_cons (NULL_TREE, pfloat_type_node,
11033 endlink));
11034 tree v4sf_ftype_float
11035 = build_function_type (V4SF_type_node,
11036 tree_cons (NULL_TREE, float_type_node,
11037 endlink));
11038 tree v4sf_ftype_float_float_float_float
11039 = build_function_type (V4SF_type_node,
11040 tree_cons (NULL_TREE, float_type_node,
11041 tree_cons (NULL_TREE, float_type_node,
11042 tree_cons (NULL_TREE,
11043 float_type_node,
11044 tree_cons (NULL_TREE,
11045 float_type_node,
11046 endlink)))));
11047 /* @@@ the type is bogus */
11048 tree v4sf_ftype_v4sf_pv2si
11049 = build_function_type (V4SF_type_node,
11050 tree_cons (NULL_TREE, V4SF_type_node,
11051 tree_cons (NULL_TREE, pv2si_type_node,
11052 endlink)));
1255c85c
BS
11053 tree void_ftype_pv2si_v4sf
11054 = build_function_type (void_type_node,
11055 tree_cons (NULL_TREE, pv2si_type_node,
11056 tree_cons (NULL_TREE, V4SF_type_node,
bd793c65
BS
11057 endlink)));
11058 tree void_ftype_pfloat_v4sf
11059 = build_function_type (void_type_node,
11060 tree_cons (NULL_TREE, pfloat_type_node,
11061 tree_cons (NULL_TREE, V4SF_type_node,
11062 endlink)));
11063 tree void_ftype_pdi_di
11064 = build_function_type (void_type_node,
11065 tree_cons (NULL_TREE, pdi_type_node,
11066 tree_cons (NULL_TREE,
11067 long_long_unsigned_type_node,
11068 endlink)));
11069 /* Normal vector unops. */
11070 tree v4sf_ftype_v4sf
11071 = build_function_type (V4SF_type_node,
11072 tree_cons (NULL_TREE, V4SF_type_node,
11073 endlink));
0f290768 11074
bd793c65
BS
11075 /* Normal vector binops. */
11076 tree v4sf_ftype_v4sf_v4sf
11077 = build_function_type (V4SF_type_node,
11078 tree_cons (NULL_TREE, V4SF_type_node,
11079 tree_cons (NULL_TREE, V4SF_type_node,
11080 endlink)));
11081 tree v8qi_ftype_v8qi_v8qi
11082 = build_function_type (V8QI_type_node,
11083 tree_cons (NULL_TREE, V8QI_type_node,
11084 tree_cons (NULL_TREE, V8QI_type_node,
11085 endlink)));
11086 tree v4hi_ftype_v4hi_v4hi
11087 = build_function_type (V4HI_type_node,
11088 tree_cons (NULL_TREE, V4HI_type_node,
11089 tree_cons (NULL_TREE, V4HI_type_node,
11090 endlink)));
11091 tree v2si_ftype_v2si_v2si
11092 = build_function_type (V2SI_type_node,
11093 tree_cons (NULL_TREE, V2SI_type_node,
11094 tree_cons (NULL_TREE, V2SI_type_node,
11095 endlink)));
11096 tree ti_ftype_ti_ti
11097 = build_function_type (intTI_type_node,
11098 tree_cons (NULL_TREE, intTI_type_node,
11099 tree_cons (NULL_TREE, intTI_type_node,
11100 endlink)));
11101 tree di_ftype_di_di
11102 = build_function_type (long_long_unsigned_type_node,
11103 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11104 tree_cons (NULL_TREE,
11105 long_long_unsigned_type_node,
11106 endlink)));
11107
47f339cf
BS
11108 tree v2si_ftype_v2sf
11109 = build_function_type (V2SI_type_node,
11110 tree_cons (NULL_TREE, V2SF_type_node,
11111 endlink));
11112 tree v2sf_ftype_v2si
11113 = build_function_type (V2SF_type_node,
11114 tree_cons (NULL_TREE, V2SI_type_node,
11115 endlink));
11116 tree v2si_ftype_v2si
11117 = build_function_type (V2SI_type_node,
11118 tree_cons (NULL_TREE, V2SI_type_node,
11119 endlink));
11120 tree v2sf_ftype_v2sf
11121 = build_function_type (V2SF_type_node,
11122 tree_cons (NULL_TREE, V2SF_type_node,
11123 endlink));
11124 tree v2sf_ftype_v2sf_v2sf
11125 = build_function_type (V2SF_type_node,
11126 tree_cons (NULL_TREE, V2SF_type_node,
11127 tree_cons (NULL_TREE,
11128 V2SF_type_node,
11129 endlink)));
11130 tree v2si_ftype_v2sf_v2sf
11131 = build_function_type (V2SI_type_node,
11132 tree_cons (NULL_TREE, V2SF_type_node,
11133 tree_cons (NULL_TREE,
11134 V2SF_type_node,
11135 endlink)));
11136
11137 tree void_ftype_pchar
11138 = build_function_type (void_type_node,
11139 tree_cons (NULL_TREE, pchar_type_node,
11140 endlink));
11141
bd793c65
BS
11142 /* Add all builtins that are more or less simple operations on two
11143 operands. */
11144 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11145 {
11146 /* Use one of the operands; the target can have a different mode for
11147 mask-generating compares. */
11148 enum machine_mode mode;
11149 tree type;
11150
11151 if (d->name == 0)
11152 continue;
11153 mode = insn_data[d->icode].operand[1].mode;
11154
bd793c65
BS
11155 switch (mode)
11156 {
11157 case V4SFmode:
11158 type = v4sf_ftype_v4sf_v4sf;
11159 break;
11160 case V8QImode:
11161 type = v8qi_ftype_v8qi_v8qi;
11162 break;
11163 case V4HImode:
11164 type = v4hi_ftype_v4hi_v4hi;
11165 break;
11166 case V2SImode:
11167 type = v2si_ftype_v2si_v2si;
11168 break;
11169 case TImode:
11170 type = ti_ftype_ti_ti;
11171 break;
11172 case DImode:
11173 type = di_ftype_di_di;
11174 break;
11175
11176 default:
11177 abort ();
11178 }
0f290768 11179
bd793c65
BS
11180 /* Override for comparisons. */
11181 if (d->icode == CODE_FOR_maskcmpv4sf3
11182 || d->icode == CODE_FOR_maskncmpv4sf3
11183 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11184 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11185 type = v4si_ftype_v4sf_v4sf;
11186
eeb06b1b 11187 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
11188 }
11189
11190 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
11191 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11192 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11193 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11194 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11195 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11196 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11197 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11198 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11199 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11200
11201 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11202 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11203 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11204
11205 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11206 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11207
11208 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11209 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 11210
bd793c65
BS
11211 /* comi/ucomi insns. */
11212 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
eeb06b1b 11213 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 11214
1255c85c
BS
11215 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11216 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11217 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 11218
eeb06b1b
BS
11219 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11220 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11221 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11222 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11223 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11224 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 11225
47f339cf
BS
11226 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11227 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 11228
47f339cf 11229 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 11230
eeb06b1b
BS
11231 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11232 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11233 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11234 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11235 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11236 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 11237
eeb06b1b
BS
11238 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11239 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
1255c85c
BS
11240 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11241 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 11242
eeb06b1b 11243 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
47f339cf 11244 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
eeb06b1b 11245 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
47f339cf 11246 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 11247
47f339cf
BS
11248 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11249 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
bd793c65 11250
47f339cf 11251 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 11252
eeb06b1b
BS
11253 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11254 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11255 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11256 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11257 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11258 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 11259
eeb06b1b 11260 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 11261
47f339cf
BS
11262 /* Original 3DNow! */
11263 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11264 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11265 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11266 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11267 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11268 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11269 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11270 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11281 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11282 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11283 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11284 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11285
11286 /* 3DNow! extension as used in the Athlon CPU. */
11287 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11288 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11289 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11291 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11293
bd793c65 11294 /* Composite intrinsics. */
eeb06b1b
BS
11295 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11296 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11297 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11298 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11299 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11300 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11301 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
bd793c65
BS
11302}
11303
11304/* Errors in the source file can cause expand_expr to return const0_rtx
11305 where we expect a vector. To avoid crashing, use one of the vector
11306 clear instructions. */
11307static rtx
11308safe_vector_operand (x, mode)
11309 rtx x;
11310 enum machine_mode mode;
11311{
11312 if (x != const0_rtx)
11313 return x;
11314 x = gen_reg_rtx (mode);
11315
47f339cf 11316 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
11317 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11318 : gen_rtx_SUBREG (DImode, x, 0)));
11319 else
11320 emit_insn (gen_sse_clrti (mode == TImode ? x
11321 : gen_rtx_SUBREG (TImode, x, 0)));
11322 return x;
11323}
11324
11325/* Subroutine of ix86_expand_builtin to take care of binop insns. */
11326
11327static rtx
11328ix86_expand_binop_builtin (icode, arglist, target)
11329 enum insn_code icode;
11330 tree arglist;
11331 rtx target;
11332{
11333 rtx pat;
11334 tree arg0 = TREE_VALUE (arglist);
11335 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11336 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11337 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11338 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11339 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11340 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11341
11342 if (VECTOR_MODE_P (mode0))
11343 op0 = safe_vector_operand (op0, mode0);
11344 if (VECTOR_MODE_P (mode1))
11345 op1 = safe_vector_operand (op1, mode1);
11346
11347 if (! target
11348 || GET_MODE (target) != tmode
11349 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11350 target = gen_reg_rtx (tmode);
11351
11352 /* In case the insn wants input operands in modes different from
11353 the result, abort. */
11354 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11355 abort ();
11356
11357 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11358 op0 = copy_to_mode_reg (mode0, op0);
11359 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11360 op1 = copy_to_mode_reg (mode1, op1);
11361
11362 pat = GEN_FCN (icode) (target, op0, op1);
11363 if (! pat)
11364 return 0;
11365 emit_insn (pat);
11366 return target;
11367}
11368
11369/* Subroutine of ix86_expand_builtin to take care of stores. */
11370
11371static rtx
11372ix86_expand_store_builtin (icode, arglist, shuffle)
11373 enum insn_code icode;
11374 tree arglist;
11375 int shuffle;
11376{
11377 rtx pat;
11378 tree arg0 = TREE_VALUE (arglist);
11379 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11380 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11381 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11382 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11383 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11384
11385 if (VECTOR_MODE_P (mode1))
11386 op1 = safe_vector_operand (op1, mode1);
11387
11388 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11389 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11390 op1 = copy_to_mode_reg (mode1, op1);
11391 if (shuffle >= 0)
11392 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11393 pat = GEN_FCN (icode) (op0, op1);
11394 if (pat)
11395 emit_insn (pat);
11396 return 0;
11397}
11398
11399/* Subroutine of ix86_expand_builtin to take care of unop insns. */
11400
11401static rtx
11402ix86_expand_unop_builtin (icode, arglist, target, do_load)
11403 enum insn_code icode;
11404 tree arglist;
11405 rtx target;
11406 int do_load;
11407{
11408 rtx pat;
11409 tree arg0 = TREE_VALUE (arglist);
11410 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11411 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11412 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11413
11414 if (! target
11415 || GET_MODE (target) != tmode
11416 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11417 target = gen_reg_rtx (tmode);
11418 if (do_load)
11419 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11420 else
11421 {
11422 if (VECTOR_MODE_P (mode0))
11423 op0 = safe_vector_operand (op0, mode0);
11424
11425 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11426 op0 = copy_to_mode_reg (mode0, op0);
11427 }
11428
11429 pat = GEN_FCN (icode) (target, op0);
11430 if (! pat)
11431 return 0;
11432 emit_insn (pat);
11433 return target;
11434}
11435
11436/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11437 sqrtss, rsqrtss, rcpss. */
11438
11439static rtx
11440ix86_expand_unop1_builtin (icode, arglist, target)
11441 enum insn_code icode;
11442 tree arglist;
11443 rtx target;
11444{
11445 rtx pat;
11446 tree arg0 = TREE_VALUE (arglist);
11447 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11448 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11449 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11450
11451 if (! target
11452 || GET_MODE (target) != tmode
11453 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11454 target = gen_reg_rtx (tmode);
11455
11456 if (VECTOR_MODE_P (mode0))
11457 op0 = safe_vector_operand (op0, mode0);
11458
11459 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11460 op0 = copy_to_mode_reg (mode0, op0);
11461
11462 pat = GEN_FCN (icode) (target, op0, op0);
11463 if (! pat)
11464 return 0;
11465 emit_insn (pat);
11466 return target;
11467}
11468
11469/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11470
11471static rtx
11472ix86_expand_sse_compare (d, arglist, target)
11473 struct builtin_description *d;
11474 tree arglist;
11475 rtx target;
11476{
11477 rtx pat;
11478 tree arg0 = TREE_VALUE (arglist);
11479 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11480 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11481 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11482 rtx op2;
11483 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11484 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11485 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11486 enum rtx_code comparison = d->comparison;
11487
11488 if (VECTOR_MODE_P (mode0))
11489 op0 = safe_vector_operand (op0, mode0);
11490 if (VECTOR_MODE_P (mode1))
11491 op1 = safe_vector_operand (op1, mode1);
11492
11493 /* Swap operands if we have a comparison that isn't available in
11494 hardware. */
11495 if (d->flag)
11496 {
21e1b5f1
BS
11497 rtx tmp = gen_reg_rtx (mode1);
11498 emit_move_insn (tmp, op1);
bd793c65 11499 op1 = op0;
21e1b5f1 11500 op0 = tmp;
bd793c65 11501 }
21e1b5f1
BS
11502
11503 if (! target
11504 || GET_MODE (target) != tmode
11505 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
11506 target = gen_reg_rtx (tmode);
11507
11508 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11509 op0 = copy_to_mode_reg (mode0, op0);
11510 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11511 op1 = copy_to_mode_reg (mode1, op1);
11512
11513 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11514 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11515 if (! pat)
11516 return 0;
11517 emit_insn (pat);
11518 return target;
11519}
11520
11521/* Subroutine of ix86_expand_builtin to take care of comi insns. */
11522
11523static rtx
11524ix86_expand_sse_comi (d, arglist, target)
11525 struct builtin_description *d;
11526 tree arglist;
11527 rtx target;
11528{
11529 rtx pat;
11530 tree arg0 = TREE_VALUE (arglist);
11531 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11532 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11533 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11534 rtx op2;
11535 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11536 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11537 enum rtx_code comparison = d->comparison;
11538
11539 if (VECTOR_MODE_P (mode0))
11540 op0 = safe_vector_operand (op0, mode0);
11541 if (VECTOR_MODE_P (mode1))
11542 op1 = safe_vector_operand (op1, mode1);
11543
11544 /* Swap operands if we have a comparison that isn't available in
11545 hardware. */
11546 if (d->flag)
11547 {
11548 rtx tmp = op1;
11549 op1 = op0;
11550 op0 = tmp;
bd793c65
BS
11551 }
11552
11553 target = gen_reg_rtx (SImode);
11554 emit_move_insn (target, const0_rtx);
11555 target = gen_rtx_SUBREG (QImode, target, 0);
11556
11557 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11558 op0 = copy_to_mode_reg (mode0, op0);
11559 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11560 op1 = copy_to_mode_reg (mode1, op1);
11561
11562 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11563 pat = GEN_FCN (d->icode) (op0, op1, op2);
11564 if (! pat)
11565 return 0;
11566 emit_insn (pat);
11567 emit_insn (gen_setcc_2 (target, op2));
11568
11569 return target;
11570}
11571
11572/* Expand an expression EXP that calls a built-in function,
11573 with result going to TARGET if that's convenient
11574 (and in mode MODE if that's convenient).
11575 SUBTARGET may be used as the target for computing one of EXP's operands.
11576 IGNORE is nonzero if the value is to be ignored. */
11577
11578rtx
11579ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11580 tree exp;
11581 rtx target;
11582 rtx subtarget ATTRIBUTE_UNUSED;
11583 enum machine_mode mode ATTRIBUTE_UNUSED;
11584 int ignore ATTRIBUTE_UNUSED;
11585{
11586 struct builtin_description *d;
77ebd435 11587 size_t i;
bd793c65
BS
11588 enum insn_code icode;
11589 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11590 tree arglist = TREE_OPERAND (exp, 1);
11591 tree arg0, arg1, arg2, arg3;
11592 rtx op0, op1, op2, pat;
11593 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 11594 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
11595
11596 switch (fcode)
11597 {
11598 case IX86_BUILTIN_EMMS:
11599 emit_insn (gen_emms ());
11600 return 0;
11601
11602 case IX86_BUILTIN_SFENCE:
11603 emit_insn (gen_sfence ());
11604 return 0;
11605
11606 case IX86_BUILTIN_M_FROM_INT:
11607 target = gen_reg_rtx (DImode);
11608 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11609 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11610 return target;
11611
11612 case IX86_BUILTIN_M_TO_INT:
11613 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11614 op0 = copy_to_mode_reg (DImode, op0);
11615 target = gen_reg_rtx (SImode);
11616 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11617 return target;
11618
11619 case IX86_BUILTIN_PEXTRW:
11620 icode = CODE_FOR_mmx_pextrw;
11621 arg0 = TREE_VALUE (arglist);
11622 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11623 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11624 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11625 tmode = insn_data[icode].operand[0].mode;
11626 mode0 = insn_data[icode].operand[1].mode;
11627 mode1 = insn_data[icode].operand[2].mode;
11628
11629 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11630 op0 = copy_to_mode_reg (mode0, op0);
11631 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11632 {
11633 /* @@@ better error message */
11634 error ("selector must be an immediate");
11635 return const0_rtx;
11636 }
11637 if (target == 0
11638 || GET_MODE (target) != tmode
11639 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11640 target = gen_reg_rtx (tmode);
11641 pat = GEN_FCN (icode) (target, op0, op1);
11642 if (! pat)
11643 return 0;
11644 emit_insn (pat);
11645 return target;
11646
11647 case IX86_BUILTIN_PINSRW:
11648 icode = CODE_FOR_mmx_pinsrw;
11649 arg0 = TREE_VALUE (arglist);
11650 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11651 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11652 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11653 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11654 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11655 tmode = insn_data[icode].operand[0].mode;
11656 mode0 = insn_data[icode].operand[1].mode;
11657 mode1 = insn_data[icode].operand[2].mode;
11658 mode2 = insn_data[icode].operand[3].mode;
11659
11660 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11661 op0 = copy_to_mode_reg (mode0, op0);
11662 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11663 op1 = copy_to_mode_reg (mode1, op1);
11664 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11665 {
11666 /* @@@ better error message */
11667 error ("selector must be an immediate");
11668 return const0_rtx;
11669 }
11670 if (target == 0
11671 || GET_MODE (target) != tmode
11672 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11673 target = gen_reg_rtx (tmode);
11674 pat = GEN_FCN (icode) (target, op0, op1, op2);
11675 if (! pat)
11676 return 0;
11677 emit_insn (pat);
11678 return target;
11679
11680 case IX86_BUILTIN_MASKMOVQ:
11681 icode = CODE_FOR_mmx_maskmovq;
11682 /* Note the arg order is different from the operand order. */
11683 arg1 = TREE_VALUE (arglist);
11684 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11685 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11686 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11687 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11688 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11689 mode0 = insn_data[icode].operand[0].mode;
11690 mode1 = insn_data[icode].operand[1].mode;
11691 mode2 = insn_data[icode].operand[2].mode;
11692
11693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11694 op0 = copy_to_mode_reg (mode0, op0);
11695 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11696 op1 = copy_to_mode_reg (mode1, op1);
11697 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11698 op2 = copy_to_mode_reg (mode2, op2);
11699 pat = GEN_FCN (icode) (op0, op1, op2);
11700 if (! pat)
11701 return 0;
11702 emit_insn (pat);
11703 return 0;
11704
11705 case IX86_BUILTIN_SQRTSS:
11706 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11707 case IX86_BUILTIN_RSQRTSS:
11708 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11709 case IX86_BUILTIN_RCPSS:
11710 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11711
11712 case IX86_BUILTIN_LOADAPS:
11713 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11714
11715 case IX86_BUILTIN_LOADUPS:
11716 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11717
11718 case IX86_BUILTIN_STOREAPS:
11719 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11720 case IX86_BUILTIN_STOREUPS:
11721 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11722
11723 case IX86_BUILTIN_LOADSS:
11724 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11725
11726 case IX86_BUILTIN_STORESS:
11727 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11728
0f290768 11729 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
11730 case IX86_BUILTIN_LOADLPS:
11731 icode = (fcode == IX86_BUILTIN_LOADHPS
11732 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11733 arg0 = TREE_VALUE (arglist);
11734 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11735 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11736 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11737 tmode = insn_data[icode].operand[0].mode;
11738 mode0 = insn_data[icode].operand[1].mode;
11739 mode1 = insn_data[icode].operand[2].mode;
11740
11741 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11742 op0 = copy_to_mode_reg (mode0, op0);
11743 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11744 if (target == 0
11745 || GET_MODE (target) != tmode
11746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11747 target = gen_reg_rtx (tmode);
11748 pat = GEN_FCN (icode) (target, op0, op1);
11749 if (! pat)
11750 return 0;
11751 emit_insn (pat);
11752 return target;
0f290768 11753
bd793c65
BS
11754 case IX86_BUILTIN_STOREHPS:
11755 case IX86_BUILTIN_STORELPS:
11756 icode = (fcode == IX86_BUILTIN_STOREHPS
11757 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11758 arg0 = TREE_VALUE (arglist);
11759 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11760 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11761 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11762 mode0 = insn_data[icode].operand[1].mode;
11763 mode1 = insn_data[icode].operand[2].mode;
11764
11765 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11766 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11767 op1 = copy_to_mode_reg (mode1, op1);
11768
11769 pat = GEN_FCN (icode) (op0, op0, op1);
11770 if (! pat)
11771 return 0;
11772 emit_insn (pat);
11773 return 0;
11774
11775 case IX86_BUILTIN_MOVNTPS:
11776 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11777 case IX86_BUILTIN_MOVNTQ:
11778 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11779
11780 case IX86_BUILTIN_LDMXCSR:
11781 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11782 target = assign_386_stack_local (SImode, 0);
11783 emit_move_insn (target, op0);
11784 emit_insn (gen_ldmxcsr (target));
11785 return 0;
11786
11787 case IX86_BUILTIN_STMXCSR:
11788 target = assign_386_stack_local (SImode, 0);
11789 emit_insn (gen_stmxcsr (target));
11790 return copy_to_mode_reg (SImode, target);
11791
11792 case IX86_BUILTIN_PREFETCH:
11793 icode = CODE_FOR_prefetch;
11794 arg0 = TREE_VALUE (arglist);
11795 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11796 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11797 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
11798 mode0 = insn_data[icode].operand[0].mode;
11799 mode1 = insn_data[icode].operand[1].mode;
bd793c65 11800
332316cd 11801 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
11802 {
11803 /* @@@ better error message */
11804 error ("selector must be an immediate");
11805 return const0_rtx;
11806 }
11807
332316cd 11808 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
11809 pat = GEN_FCN (icode) (op0, op1);
11810 if (! pat)
11811 return 0;
11812 emit_insn (pat);
11813 return target;
0f290768 11814
bd793c65
BS
11815 case IX86_BUILTIN_SHUFPS:
11816 icode = CODE_FOR_sse_shufps;
11817 arg0 = TREE_VALUE (arglist);
11818 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11819 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11820 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11821 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11822 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11823 tmode = insn_data[icode].operand[0].mode;
11824 mode0 = insn_data[icode].operand[1].mode;
11825 mode1 = insn_data[icode].operand[2].mode;
11826 mode2 = insn_data[icode].operand[3].mode;
11827
11828 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11829 op0 = copy_to_mode_reg (mode0, op0);
11830 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11831 op1 = copy_to_mode_reg (mode1, op1);
11832 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11833 {
11834 /* @@@ better error message */
11835 error ("mask must be an immediate");
11836 return const0_rtx;
11837 }
11838 if (target == 0
11839 || GET_MODE (target) != tmode
11840 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11841 target = gen_reg_rtx (tmode);
11842 pat = GEN_FCN (icode) (target, op0, op1, op2);
11843 if (! pat)
11844 return 0;
11845 emit_insn (pat);
11846 return target;
11847
11848 case IX86_BUILTIN_PSHUFW:
11849 icode = CODE_FOR_mmx_pshufw;
11850 arg0 = TREE_VALUE (arglist);
11851 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11852 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11853 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11854 tmode = insn_data[icode].operand[0].mode;
11855 mode0 = insn_data[icode].operand[2].mode;
11856 mode1 = insn_data[icode].operand[3].mode;
11857
11858 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11859 op0 = copy_to_mode_reg (mode0, op0);
11860 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
11861 {
11862 /* @@@ better error message */
11863 error ("mask must be an immediate");
11864 return const0_rtx;
11865 }
11866 if (target == 0
11867 || GET_MODE (target) != tmode
11868 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11869 target = gen_reg_rtx (tmode);
11870 pat = GEN_FCN (icode) (target, target, op0, op1);
11871 if (! pat)
11872 return 0;
11873 emit_insn (pat);
11874 return target;
11875
47f339cf
BS
11876 case IX86_BUILTIN_FEMMS:
11877 emit_insn (gen_femms ());
11878 return NULL_RTX;
11879
11880 case IX86_BUILTIN_PAVGUSB:
11881 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11882
11883 case IX86_BUILTIN_PF2ID:
11884 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11885
11886 case IX86_BUILTIN_PFACC:
11887 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11888
11889 case IX86_BUILTIN_PFADD:
11890 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11891
11892 case IX86_BUILTIN_PFCMPEQ:
11893 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11894
11895 case IX86_BUILTIN_PFCMPGE:
11896 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11897
11898 case IX86_BUILTIN_PFCMPGT:
11899 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11900
11901 case IX86_BUILTIN_PFMAX:
11902 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11903
11904 case IX86_BUILTIN_PFMIN:
11905 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11906
11907 case IX86_BUILTIN_PFMUL:
11908 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11909
11910 case IX86_BUILTIN_PFRCP:
11911 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11912
11913 case IX86_BUILTIN_PFRCPIT1:
11914 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11915
11916 case IX86_BUILTIN_PFRCPIT2:
11917 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11918
11919 case IX86_BUILTIN_PFRSQIT1:
11920 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11921
11922 case IX86_BUILTIN_PFRSQRT:
11923 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11924
11925 case IX86_BUILTIN_PFSUB:
11926 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11927
11928 case IX86_BUILTIN_PFSUBR:
11929 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11930
11931 case IX86_BUILTIN_PI2FD:
11932 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11933
11934 case IX86_BUILTIN_PMULHRW:
11935 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11936
11937 case IX86_BUILTIN_PREFETCH_3DNOW:
11938 icode = CODE_FOR_prefetch_3dnow;
11939 arg0 = TREE_VALUE (arglist);
11940 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11941 mode0 = insn_data[icode].operand[0].mode;
11942 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11943 if (! pat)
11944 return NULL_RTX;
11945 emit_insn (pat);
11946 return NULL_RTX;
11947
11948 case IX86_BUILTIN_PREFETCHW:
11949 icode = CODE_FOR_prefetchw;
11950 arg0 = TREE_VALUE (arglist);
11951 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11952 mode0 = insn_data[icode].operand[0].mode;
11953 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11954 if (! pat)
11955 return NULL_RTX;
11956 emit_insn (pat);
11957 return NULL_RTX;
11958
11959 case IX86_BUILTIN_PF2IW:
11960 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11961
11962 case IX86_BUILTIN_PFNACC:
11963 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11964
11965 case IX86_BUILTIN_PFPNACC:
11966 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11967
11968 case IX86_BUILTIN_PI2FW:
11969 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11970
11971 case IX86_BUILTIN_PSWAPDSI:
11972 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11973
11974 case IX86_BUILTIN_PSWAPDSF:
11975 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11976
bd793c65
BS
11977 /* Composite intrinsics. */
11978 case IX86_BUILTIN_SETPS1:
11979 target = assign_386_stack_local (SFmode, 0);
11980 arg0 = TREE_VALUE (arglist);
f4ef873c 11981 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65
BS
11982 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11983 op0 = gen_reg_rtx (V4SFmode);
f4ef873c 11984 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
bd793c65
BS
11985 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
11986 return op0;
0f290768 11987
bd793c65
BS
11988 case IX86_BUILTIN_SETPS:
11989 target = assign_386_stack_local (V4SFmode, 0);
bd793c65
BS
11990 arg0 = TREE_VALUE (arglist);
11991 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11992 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11993 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
d2037d01 11994 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65 11995 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
d2037d01 11996 emit_move_insn (adjust_address (target, SFmode, 4),
bd793c65 11997 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
d2037d01 11998 emit_move_insn (adjust_address (target, SFmode, 8),
bd793c65 11999 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
d2037d01 12000 emit_move_insn (adjust_address (target, SFmode, 12),
bd793c65
BS
12001 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
12002 op0 = gen_reg_rtx (V4SFmode);
12003 emit_insn (gen_sse_movaps (op0, target));
12004 return op0;
12005
12006 case IX86_BUILTIN_CLRPS:
12007 target = gen_reg_rtx (TImode);
12008 emit_insn (gen_sse_clrti (target));
12009 return target;
12010
12011 case IX86_BUILTIN_LOADRPS:
12012 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
12013 gen_reg_rtx (V4SFmode), 1);
12014 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
12015 return target;
12016
12017 case IX86_BUILTIN_LOADPS1:
12018 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
12019 gen_reg_rtx (V4SFmode), 1);
12020 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
12021 return target;
12022
12023 case IX86_BUILTIN_STOREPS1:
12024 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
12025 case IX86_BUILTIN_STORERPS:
12026 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
12027
12028 case IX86_BUILTIN_MMX_ZERO:
12029 target = gen_reg_rtx (DImode);
12030 emit_insn (gen_mmx_clrdi (target));
12031 return target;
12032
12033 default:
12034 break;
12035 }
12036
12037 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12038 if (d->code == fcode)
12039 {
12040 /* Compares are treated specially. */
12041 if (d->icode == CODE_FOR_maskcmpv4sf3
12042 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12043 || d->icode == CODE_FOR_maskncmpv4sf3
12044 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12045 return ix86_expand_sse_compare (d, arglist, target);
12046
12047 return ix86_expand_binop_builtin (d->icode, arglist, target);
12048 }
12049
12050 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12051 if (d->code == fcode)
12052 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 12053
bd793c65
BS
12054 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12055 if (d->code == fcode)
12056 return ix86_expand_sse_comi (d, arglist, target);
0f290768 12057
bd793c65
BS
12058 /* @@@ Should really do something sensible here. */
12059 return 0;
bd793c65 12060}
4211a8fb
JH
12061
12062/* Store OPERAND to the memory after reload is completed. This means
12063 that we can't easilly use assign_stack_local. */
12064rtx
12065ix86_force_to_memory (mode, operand)
12066 enum machine_mode mode;
12067 rtx operand;
12068{
898d374d 12069 rtx result;
4211a8fb
JH
12070 if (!reload_completed)
12071 abort ();
898d374d
JH
12072 if (TARGET_64BIT && TARGET_RED_ZONE)
12073 {
12074 result = gen_rtx_MEM (mode,
12075 gen_rtx_PLUS (Pmode,
12076 stack_pointer_rtx,
12077 GEN_INT (-RED_ZONE_SIZE)));
12078 emit_move_insn (result, operand);
12079 }
12080 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 12081 {
898d374d 12082 switch (mode)
4211a8fb 12083 {
898d374d
JH
12084 case HImode:
12085 case SImode:
12086 operand = gen_lowpart (DImode, operand);
12087 /* FALLTHRU */
12088 case DImode:
4211a8fb 12089 emit_insn (
898d374d
JH
12090 gen_rtx_SET (VOIDmode,
12091 gen_rtx_MEM (DImode,
12092 gen_rtx_PRE_DEC (DImode,
12093 stack_pointer_rtx)),
12094 operand));
12095 break;
12096 default:
12097 abort ();
12098 }
12099 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12100 }
12101 else
12102 {
12103 switch (mode)
12104 {
12105 case DImode:
12106 {
12107 rtx operands[2];
12108 split_di (&operand, 1, operands, operands + 1);
12109 emit_insn (
12110 gen_rtx_SET (VOIDmode,
12111 gen_rtx_MEM (SImode,
12112 gen_rtx_PRE_DEC (Pmode,
12113 stack_pointer_rtx)),
12114 operands[1]));
12115 emit_insn (
12116 gen_rtx_SET (VOIDmode,
12117 gen_rtx_MEM (SImode,
12118 gen_rtx_PRE_DEC (Pmode,
12119 stack_pointer_rtx)),
12120 operands[0]));
12121 }
12122 break;
12123 case HImode:
12124 /* It is better to store HImodes as SImodes. */
12125 if (!TARGET_PARTIAL_REG_STALL)
12126 operand = gen_lowpart (SImode, operand);
12127 /* FALLTHRU */
12128 case SImode:
4211a8fb 12129 emit_insn (
898d374d
JH
12130 gen_rtx_SET (VOIDmode,
12131 gen_rtx_MEM (GET_MODE (operand),
12132 gen_rtx_PRE_DEC (SImode,
12133 stack_pointer_rtx)),
12134 operand));
12135 break;
12136 default:
12137 abort ();
4211a8fb 12138 }
898d374d 12139 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 12140 }
898d374d 12141 return result;
4211a8fb
JH
12142}
12143
12144/* Free operand from the memory. */
12145void
12146ix86_free_from_memory (mode)
12147 enum machine_mode mode;
12148{
898d374d
JH
12149 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12150 {
12151 int size;
12152
12153 if (mode == DImode || TARGET_64BIT)
12154 size = 8;
12155 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12156 size = 2;
12157 else
12158 size = 4;
12159 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12160 to pop or add instruction if registers are available. */
12161 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12162 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12163 GEN_INT (size))));
12164 }
4211a8fb 12165}
a946dd00 12166
f84aa48a
JH
12167/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12168 QImode must go into class Q_REGS.
12169 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12170 movdf to do mem-to-mem moves through integer regs. */
12171enum reg_class
12172ix86_preferred_reload_class (x, class)
12173 rtx x;
12174 enum reg_class class;
12175{
12176 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12177 {
12178 /* SSE can't load any constant directly yet. */
12179 if (SSE_CLASS_P (class))
12180 return NO_REGS;
12181 /* Floats can load 0 and 1. */
12182 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12183 {
12184 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12185 if (MAYBE_SSE_CLASS_P (class))
12186 return (reg_class_subset_p (class, GENERAL_REGS)
12187 ? GENERAL_REGS : FLOAT_REGS);
12188 else
12189 return class;
12190 }
12191 /* General regs can load everything. */
12192 if (reg_class_subset_p (class, GENERAL_REGS))
12193 return GENERAL_REGS;
12194 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12195 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12196 return NO_REGS;
12197 }
12198 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12199 return NO_REGS;
12200 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12201 return Q_REGS;
12202 return class;
12203}
12204
12205/* If we are copying between general and FP registers, we need a memory
12206 location. The same is true for SSE and MMX registers.
12207
12208 The macro can't work reliably when one of the CLASSES is class containing
12209 registers from multiple units (SSE, MMX, integer). We avoid this by never
12210 combining those units in single alternative in the machine description.
12211 Ensure that this constraint holds to avoid unexpected surprises.
12212
12213 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12214 enforce these sanity checks. */
12215int
12216ix86_secondary_memory_needed (class1, class2, mode, strict)
12217 enum reg_class class1, class2;
12218 enum machine_mode mode;
12219 int strict;
12220{
12221 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12222 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12223 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12224 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12225 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12226 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12227 {
12228 if (strict)
12229 abort ();
12230 else
12231 return 1;
12232 }
12233 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12234 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12235 && (mode) != SImode)
12236 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12237 && (mode) != SImode));
12238}
12239/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 12240 one in class CLASS2.
f84aa48a
JH
12241
12242 It is not required that the cost always equal 2 when FROM is the same as TO;
12243 on some machines it is expensive to move between registers if they are not
12244 general registers. */
12245int
12246ix86_register_move_cost (mode, class1, class2)
12247 enum machine_mode mode;
12248 enum reg_class class1, class2;
12249{
12250 /* In case we require secondary memory, compute cost of the store followed
12251 by load. In case of copying from general_purpose_register we may emit
12252 multiple stores followed by single load causing memory size mismatch
12253 stall. Count this as arbitarily high cost of 20. */
12254 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12255 {
92d0fb09 12256 int add_cost = 0;
62415523 12257 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 12258 add_cost = 20;
62415523 12259 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 12260 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 12261 }
92d0fb09 12262 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
12263 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12264 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
12265 return ix86_cost->mmxsse_to_integer;
12266 if (MAYBE_FLOAT_CLASS_P (class1))
12267 return ix86_cost->fp_move;
12268 if (MAYBE_SSE_CLASS_P (class1))
12269 return ix86_cost->sse_move;
12270 if (MAYBE_MMX_CLASS_P (class1))
12271 return ix86_cost->mmx_move;
f84aa48a
JH
12272 return 2;
12273}
12274
a946dd00
JH
12275/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12276int
12277ix86_hard_regno_mode_ok (regno, mode)
12278 int regno;
12279 enum machine_mode mode;
12280{
12281 /* Flags and only flags can only hold CCmode values. */
12282 if (CC_REGNO_P (regno))
12283 return GET_MODE_CLASS (mode) == MODE_CC;
12284 if (GET_MODE_CLASS (mode) == MODE_CC
12285 || GET_MODE_CLASS (mode) == MODE_RANDOM
12286 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12287 return 0;
12288 if (FP_REGNO_P (regno))
12289 return VALID_FP_MODE_P (mode);
12290 if (SSE_REGNO_P (regno))
12291 return VALID_SSE_REG_MODE (mode);
12292 if (MMX_REGNO_P (regno))
47f339cf 12293 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
12294 /* We handle both integer and floats in the general purpose registers.
12295 In future we should be able to handle vector modes as well. */
12296 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12297 return 0;
12298 /* Take care for QImode values - they can be in non-QI regs, but then
12299 they do cause partial register stalls. */
d2836273 12300 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
12301 return 1;
12302 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12303}
fa79946e
JH
12304
12305/* Return the cost of moving data of mode M between a
12306 register and memory. A value of 2 is the default; this cost is
12307 relative to those in `REGISTER_MOVE_COST'.
12308
12309 If moving between registers and memory is more expensive than
12310 between two registers, you should define this macro to express the
a4f31c00
AJ
12311 relative cost.
12312
fa79946e
JH
12313 Model also increased moving costs of QImode registers in non
12314 Q_REGS classes.
12315 */
12316int
12317ix86_memory_move_cost (mode, class, in)
12318 enum machine_mode mode;
12319 enum reg_class class;
12320 int in;
12321{
12322 if (FLOAT_CLASS_P (class))
12323 {
12324 int index;
12325 switch (mode)
12326 {
12327 case SFmode:
12328 index = 0;
12329 break;
12330 case DFmode:
12331 index = 1;
12332 break;
12333 case XFmode:
12334 case TFmode:
12335 index = 2;
12336 break;
12337 default:
12338 return 100;
12339 }
12340 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12341 }
12342 if (SSE_CLASS_P (class))
12343 {
12344 int index;
12345 switch (GET_MODE_SIZE (mode))
12346 {
12347 case 4:
12348 index = 0;
12349 break;
12350 case 8:
12351 index = 1;
12352 break;
12353 case 16:
12354 index = 2;
12355 break;
12356 default:
12357 return 100;
12358 }
12359 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12360 }
12361 if (MMX_CLASS_P (class))
12362 {
12363 int index;
12364 switch (GET_MODE_SIZE (mode))
12365 {
12366 case 4:
12367 index = 0;
12368 break;
12369 case 8:
12370 index = 1;
12371 break;
12372 default:
12373 return 100;
12374 }
12375 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12376 }
12377 switch (GET_MODE_SIZE (mode))
12378 {
12379 case 1:
12380 if (in)
12381 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12382 : ix86_cost->movzbl_load);
12383 else
12384 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12385 : ix86_cost->int_store[0] + 4);
12386 break;
12387 case 2:
12388 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12389 default:
12390 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12391 if (mode == TFmode)
12392 mode = XFmode;
3bb7e126 12393 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
12394 * (int) GET_MODE_SIZE (mode) / 4);
12395 }
12396}
0ecf09f9 12397
2cc07db4
RH
12398#ifdef DO_GLOBAL_CTORS_BODY
12399static void
12400ix86_svr3_asm_out_constructor (symbol, priority)
12401 rtx symbol;
12402 int priority ATTRIBUTE_UNUSED;
12403{
12404 init_section ();
12405 fputs ("\tpushl $", asm_out_file);
12406 assemble_name (asm_out_file, XSTR (symbol, 0));
12407 fputc ('\n', asm_out_file);
12408}
12409#endif
12410
7c262518
RH
12411#if defined(TARGET_ELF) && defined(TARGET_COFF)
12412static void
715bdd29 12413sco_asm_named_section (name, flags)
7c262518
RH
12414 const char *name;
12415 unsigned int flags;
7c262518
RH
12416{
12417 if (TARGET_ELF)
715bdd29 12418 default_elf_asm_named_section (name, flags);
7c262518 12419 else
715bdd29 12420 default_coff_asm_named_section (name, flags);
7c262518 12421}
2cc07db4
RH
12422
12423static void
12424sco_asm_out_constructor (symbol, priority)
12425 rtx symbol;
12426 int priority;
12427{
12428 if (TARGET_ELF)
12429 default_named_section_asm_out_constrctor (symbol, priority);
12430 else
12431 ix86_svr3_asm_out_constructor (symbol, priority);
12432}
7c262518 12433#endif