]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
invoke.texi (i386 Options): Document x86-64 options.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
48 #endif
49
50 /* Processor costs (relative to an add) */
51 struct processor_costs size_cost = { /* costs for tunning for size */
52 2, /* cost of an add instruction */
53 3, /* cost of a lea instruction */
54 2, /* variable shift costs */
55 3, /* constant shift costs */
56 3, /* cost of starting a multiply */
57 0, /* cost of multiply per each bit set */
58 3, /* cost of a divide/mod */
59 0, /* "large" insn */
60 2, /* MOVE_RATIO */
61 2, /* cost for loading QImode using movzbl */
62 {2, 2, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
64 Relative to reg-reg move (2). */
65 {2, 2, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {2, 2, 2}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
69 {2, 2, 2}, /* cost of loading integer registers */
70 3, /* cost of moving MMX register */
71 {3, 3}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {3, 3}, /* cost of storing MMX registers
74 in SImode and DImode */
75 3, /* cost of moving SSE register */
76 {3, 3, 3}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {3, 3, 3}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
81 };
82 /* Processor costs (relative to an add) */
83 struct processor_costs i386_cost = { /* 386 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 6, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
90 23, /* cost of a divide/mod */
91 15, /* "large" insn */
92 3, /* MOVE_RATIO */
93 4, /* cost for loading QImode using movzbl */
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
113 };
114
115 struct processor_costs i486_cost = { /* 486 specific costs */
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
118 3, /* variable shift costs */
119 2, /* constant shift costs */
120 12, /* cost of starting a multiply */
121 1, /* cost of multiply per each bit set */
122 40, /* cost of a divide/mod */
123 15, /* "large" insn */
124 3, /* MOVE_RATIO */
125 4, /* cost for loading QImode using movzbl */
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
128 Relative to reg-reg move (2). */
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {8, 8, 8}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
133 {8, 8, 8}, /* cost of loading integer registers */
134 2, /* cost of moving MMX register */
135 {4, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {4, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
145 };
146
147 struct processor_costs pentium_cost = {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
150 4, /* variable shift costs */
151 1, /* constant shift costs */
152 11, /* cost of starting a multiply */
153 0, /* cost of multiply per each bit set */
154 25, /* cost of a divide/mod */
155 8, /* "large" insn */
156 6, /* MOVE_RATIO */
157 6, /* cost for loading QImode using movzbl */
158 {2, 4, 2}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
160 Relative to reg-reg move (2). */
161 {2, 4, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
165 {4, 4, 6}, /* cost of loading integer registers */
166 8, /* cost of moving MMX register */
167 {8, 8}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {8, 8}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {4, 8, 16}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {4, 8, 16}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
177 };
178
179 struct processor_costs pentiumpro_cost = {
180 1, /* cost of an add instruction */
181 1, /* cost of a lea instruction */
182 1, /* variable shift costs */
183 1, /* constant shift costs */
184 4, /* cost of starting a multiply */
185 0, /* cost of multiply per each bit set */
186 17, /* cost of a divide/mod */
187 8, /* "large" insn */
188 6, /* MOVE_RATIO */
189 2, /* cost for loading QImode using movzbl */
190 {4, 4, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
192 Relative to reg-reg move (2). */
193 {2, 2, 2}, /* cost of storing integer registers */
194 2, /* cost of reg,reg fld/fst */
195 {2, 2, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
197 {4, 4, 6}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 3 /* MMX or SSE register to integer */
209 };
210
211 struct processor_costs k6_cost = {
212 1, /* cost of an add instruction */
213 2, /* cost of a lea instruction */
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 3, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
218 18, /* cost of a divide/mod */
219 8, /* "large" insn */
220 4, /* MOVE_RATIO */
221 3, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
224 Relative to reg-reg move (2). */
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
227 {6, 6, 6}, /* cost of loading fp registers
228 in SFmode, DFmode and XFmode */
229 {4, 4, 4}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
241 };
242
243 struct processor_costs athlon_cost = {
244 1, /* cost of an add instruction */
245 2, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 5, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 42, /* cost of a divide/mod */
251 8, /* "large" insn */
252 9, /* MOVE_RATIO */
253 4, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 4, /* cost of reg,reg fld/fst */
259 {6, 6, 20}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 16}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 6 /* MMX or SSE register to integer */
273 };
274
275 struct processor_costs pentium4_cost = {
276 1, /* cost of an add instruction */
277 1, /* cost of a lea instruction */
278 8, /* variable shift costs */
279 8, /* constant shift costs */
280 30, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 112, /* cost of a divide/mod */
283 16, /* "large" insn */
284 6, /* MOVE_RATIO */
285 2, /* cost for loading QImode using movzbl */
286 {4, 5, 4}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 3, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {2, 2, 6}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {4, 4, 6}, /* cost of loading integer registers */
294 2, /* cost of moving MMX register */
295 {2, 2}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {2, 2}, /* cost of storing MMX registers
298 in SImode and DImode */
299 12, /* cost of moving SSE register */
300 {12, 12, 12}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {2, 2, 8}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 10, /* MMX or SSE register to integer */
305 };
306
307 struct processor_costs *ix86_cost = &pentium_cost;
308
309 /* Processor feature/optimization bitmasks. */
310 #define m_386 (1<<PROCESSOR_I386)
311 #define m_486 (1<<PROCESSOR_I486)
312 #define m_PENT (1<<PROCESSOR_PENTIUM)
313 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
314 #define m_K6 (1<<PROCESSOR_K6)
315 #define m_ATHLON (1<<PROCESSOR_ATHLON)
316 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
317
318 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
319 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
320 const int x86_zero_extend_with_and = m_486 | m_PENT;
321 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
322 const int x86_double_with_add = ~m_386;
323 const int x86_use_bit_test = m_386;
324 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
325 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
326 const int x86_3dnow_a = m_ATHLON;
327 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
328 const int x86_branch_hints = m_PENT4;
329 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
330 const int x86_partial_reg_stall = m_PPRO;
331 const int x86_use_loop = m_K6;
332 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
333 const int x86_use_mov0 = m_K6;
334 const int x86_use_cltd = ~(m_PENT | m_K6);
335 const int x86_read_modify_write = ~m_PENT;
336 const int x86_read_modify = ~(m_PENT | m_PPRO);
337 const int x86_split_long_moves = m_PPRO;
338 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
339 const int x86_single_stringop = m_386 | m_PENT4;
340 const int x86_qimode_math = ~(0);
341 const int x86_promote_qi_regs = 0;
342 const int x86_himode_math = ~(m_PPRO);
343 const int x86_promote_hi_regs = m_PPRO;
344 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
345 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
346 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
347 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
348 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
349 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
350 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
351 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
352 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
353 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
354
355 /* In case the avreage insn count for single function invocation is
356 lower than this constant, emit fast (but longer) prologue and
357 epilogue code. */
358 #define FAST_PROLOGUE_INSN_COUNT 30
359 /* Set by prologue expander and used by epilogue expander to determine
360 the style used. */
361 static int use_fast_prologue_epilogue;
362
363 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
364
365 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
366 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
367 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
368
369 /* Array of the smallest class containing reg number REGNO, indexed by
370 REGNO. Used by REGNO_REG_CLASS in i386.h. */
371
372 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
373 {
374 /* ax, dx, cx, bx */
375 AREG, DREG, CREG, BREG,
376 /* si, di, bp, sp */
377 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
378 /* FP registers */
379 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
380 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
381 /* arg pointer */
382 NON_Q_REGS,
383 /* flags, fpsr, dirflag, frame */
384 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
385 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
386 SSE_REGS, SSE_REGS,
387 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
388 MMX_REGS, MMX_REGS,
389 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
390 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
391 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
392 SSE_REGS, SSE_REGS,
393 };
394
395 /* The "default" register map used in 32bit mode. */
396
397 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
398 {
399 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
400 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
401 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
402 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
403 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
404 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
405 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
406 };
407
408 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
409 1 /*RDX*/, 2 /*RCX*/,
410 FIRST_REX_INT_REG /*R8 */,
411 FIRST_REX_INT_REG + 1 /*R9 */};
412 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
413
414 /* The "default" register map used in 64bit mode. */
415 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
416 {
417 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
418 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
419 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
420 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
421 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
422 8,9,10,11,12,13,14,15, /* extended integer registers */
423 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
424 };
425
426 /* Define the register numbers to be used in Dwarf debugging information.
427 The SVR4 reference port C compiler uses the following register numbers
428 in its Dwarf output code:
429 0 for %eax (gcc regno = 0)
430 1 for %ecx (gcc regno = 2)
431 2 for %edx (gcc regno = 1)
432 3 for %ebx (gcc regno = 3)
433 4 for %esp (gcc regno = 7)
434 5 for %ebp (gcc regno = 6)
435 6 for %esi (gcc regno = 4)
436 7 for %edi (gcc regno = 5)
437 The following three DWARF register numbers are never generated by
438 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
439 believes these numbers have these meanings.
440 8 for %eip (no gcc equivalent)
441 9 for %eflags (gcc regno = 17)
442 10 for %trapno (no gcc equivalent)
443 It is not at all clear how we should number the FP stack registers
444 for the x86 architecture. If the version of SDB on x86/svr4 were
445 a bit less brain dead with respect to floating-point then we would
446 have a precedent to follow with respect to DWARF register numbers
447 for x86 FP registers, but the SDB on x86/svr4 is so completely
448 broken with respect to FP registers that it is hardly worth thinking
449 of it as something to strive for compatibility with.
450 The version of x86/svr4 SDB I have at the moment does (partially)
451 seem to believe that DWARF register number 11 is associated with
452 the x86 register %st(0), but that's about all. Higher DWARF
453 register numbers don't seem to be associated with anything in
454 particular, and even for DWARF regno 11, SDB only seems to under-
455 stand that it should say that a variable lives in %st(0) (when
456 asked via an `=' command) if we said it was in DWARF regno 11,
457 but SDB still prints garbage when asked for the value of the
458 variable in question (via a `/' command).
459 (Also note that the labels SDB prints for various FP stack regs
460 when doing an `x' command are all wrong.)
461 Note that these problems generally don't affect the native SVR4
462 C compiler because it doesn't allow the use of -O with -g and
463 because when it is *not* optimizing, it allocates a memory
464 location for each floating-point variable, and the memory
465 location is what gets described in the DWARF AT_location
466 attribute for the variable in question.
467 Regardless of the severe mental illness of the x86/svr4 SDB, we
468 do something sensible here and we use the following DWARF
469 register numbers. Note that these are all stack-top-relative
470 numbers.
471 11 for %st(0) (gcc regno = 8)
472 12 for %st(1) (gcc regno = 9)
473 13 for %st(2) (gcc regno = 10)
474 14 for %st(3) (gcc regno = 11)
475 15 for %st(4) (gcc regno = 12)
476 16 for %st(5) (gcc regno = 13)
477 17 for %st(6) (gcc regno = 14)
478 18 for %st(7) (gcc regno = 15)
479 */
480 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
481 {
482 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
483 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
484 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
485 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
486 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
487 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
488 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
489 };
490
491 /* Test and compare insns in i386.md store the information needed to
492 generate branch and scc insns here. */
493
494 struct rtx_def *ix86_compare_op0 = NULL_RTX;
495 struct rtx_def *ix86_compare_op1 = NULL_RTX;
496
497 #define MAX_386_STACK_LOCALS 3
498 /* Size of the register save area. */
499 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
500
501 /* Define the structure for the machine field in struct function. */
502 struct machine_function
503 {
504 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
505 int save_varrargs_registers;
506 int accesses_prev_frame;
507 };
508
509 #define ix86_stack_locals (cfun->machine->stack_locals)
510 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
511
512 /* Structure describing stack frame layout.
513 Stack grows downward:
514
515 [arguments]
516 <- ARG_POINTER
517 saved pc
518
519 saved frame pointer if frame_pointer_needed
520 <- HARD_FRAME_POINTER
521 [saved regs]
522
523 [padding1] \
524 )
525 [va_arg registers] (
526 > to_allocate <- FRAME_POINTER
527 [frame] (
528 )
529 [padding2] /
530 */
531 struct ix86_frame
532 {
533 int nregs;
534 int padding1;
535 int va_arg_size;
536 HOST_WIDE_INT frame;
537 int padding2;
538 int outgoing_arguments_size;
539 int red_zone_size;
540
541 HOST_WIDE_INT to_allocate;
542 /* The offsets relative to ARG_POINTER. */
543 HOST_WIDE_INT frame_pointer_offset;
544 HOST_WIDE_INT hard_frame_pointer_offset;
545 HOST_WIDE_INT stack_pointer_offset;
546 };
547
548 /* Code model option as passed by user. */
549 const char *ix86_cmodel_string;
550 /* Parsed value. */
551 enum cmodel ix86_cmodel;
552
553 /* which cpu are we scheduling for */
554 enum processor_type ix86_cpu;
555
556 /* which instruction set architecture to use. */
557 int ix86_arch;
558
559 /* Strings to hold which cpu and instruction set architecture to use. */
560 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
561 const char *ix86_arch_string; /* for -march=<xxx> */
562
563 /* # of registers to use to pass arguments. */
564 const char *ix86_regparm_string;
565
566 /* ix86_regparm_string as a number */
567 int ix86_regparm;
568
569 /* Alignment to use for loops and jumps: */
570
571 /* Power of two alignment for loops. */
572 const char *ix86_align_loops_string;
573
574 /* Power of two alignment for non-loop jumps. */
575 const char *ix86_align_jumps_string;
576
577 /* Power of two alignment for stack boundary in bytes. */
578 const char *ix86_preferred_stack_boundary_string;
579
580 /* Preferred alignment for stack boundary in bits. */
581 int ix86_preferred_stack_boundary;
582
583 /* Values 1-5: see jump.c */
584 int ix86_branch_cost;
585 const char *ix86_branch_cost_string;
586
587 /* Power of two alignment for functions. */
588 const char *ix86_align_funcs_string;
589
590 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
591 static char internal_label_prefix[16];
592 static int internal_label_prefix_len;
593 \f
594 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
595 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
596 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
597 int, int, FILE *));
598 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
599 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
600 rtx *, rtx *));
601 static rtx gen_push PARAMS ((rtx));
602 static int memory_address_length PARAMS ((rtx addr));
603 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
604 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
605 static int ix86_safe_length PARAMS ((rtx));
606 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
607 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
608 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
609 static void ix86_dump_ppro_packet PARAMS ((FILE *));
610 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
611 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
612 rtx));
613 static void ix86_init_machine_status PARAMS ((struct function *));
614 static void ix86_mark_machine_status PARAMS ((struct function *));
615 static void ix86_free_machine_status PARAMS ((struct function *));
616 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
617 static int ix86_safe_length_prefix PARAMS ((rtx));
618 static int ix86_nsaved_regs PARAMS((void));
619 static void ix86_emit_save_regs PARAMS((void));
620 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
621 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
622 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
623 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
624 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
625 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
626 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
627 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
628 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
629 static int ix86_issue_rate PARAMS ((void));
630 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
631 static void ix86_sched_init PARAMS ((FILE *, int, int));
632 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
633 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
634
635 struct ix86_address
636 {
637 rtx base, index, disp;
638 HOST_WIDE_INT scale;
639 };
640
641 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
642
643 struct builtin_description;
644 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
645 rtx));
646 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
647 rtx));
648 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
649 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
650 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
651 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
652 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
653 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
654 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
655 enum rtx_code *,
656 enum rtx_code *,
657 enum rtx_code *));
658 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
659 rtx *, rtx *));
660 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
661 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
662 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
663 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
664 static int ix86_save_reg PARAMS ((int, int));
665 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
666 static int ix86_comp_type_attributes PARAMS ((tree, tree));
667 const struct attribute_spec ix86_attribute_table[];
668 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
669 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
670
671 #ifdef DO_GLOBAL_CTORS_BODY
672 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
673 #endif
674 #if defined(TARGET_ELF) && defined(TARGET_COFF)
675 static void sco_asm_named_section PARAMS ((const char *, unsigned int));
676 static void sco_asm_out_constructor PARAMS ((rtx, int));
677 #endif
678 /* Register class used for passing given 64bit part of the argument.
679 These represent classes as documented by the PS ABI, with the exception
680 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
681 use SF or DFmode move instead of DImode to avoid reformating penalties.
682
683 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
684 whenever possible (upper half does contain padding).
685 */
686 enum x86_64_reg_class
687 {
688 X86_64_NO_CLASS,
689 X86_64_INTEGER_CLASS,
690 X86_64_INTEGERSI_CLASS,
691 X86_64_SSE_CLASS,
692 X86_64_SSESF_CLASS,
693 X86_64_SSEDF_CLASS,
694 X86_64_SSEUP_CLASS,
695 X86_64_X87_CLASS,
696 X86_64_X87UP_CLASS,
697 X86_64_MEMORY_CLASS
698 };
699 const char * const x86_64_reg_class_name[] =
700 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
701
702 #define MAX_CLASSES 4
703 static int classify_argument PARAMS ((enum machine_mode, tree,
704 enum x86_64_reg_class [MAX_CLASSES],
705 int));
706 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
707 int *));
708 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
709 int *, int));
710 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
711 enum x86_64_reg_class));
712 \f
713 /* Initialize the GCC target structure. */
714 #undef TARGET_ATTRIBUTE_TABLE
715 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
716 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
717 # undef TARGET_MERGE_DECL_ATTRIBUTES
718 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
719 #endif
720
721 #undef TARGET_COMP_TYPE_ATTRIBUTES
722 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
723
724 #undef TARGET_INIT_BUILTINS
725 #define TARGET_INIT_BUILTINS ix86_init_builtins
726
727 #undef TARGET_EXPAND_BUILTIN
728 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
729
730 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
731 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
732 HOST_WIDE_INT));
733 # undef TARGET_ASM_FUNCTION_PROLOGUE
734 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
735 #endif
736
737 #undef TARGET_ASM_OPEN_PAREN
738 #define TARGET_ASM_OPEN_PAREN ""
739 #undef TARGET_ASM_CLOSE_PAREN
740 #define TARGET_ASM_CLOSE_PAREN ""
741
742 #undef TARGET_SCHED_ADJUST_COST
743 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
744 #undef TARGET_SCHED_ISSUE_RATE
745 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
746 #undef TARGET_SCHED_VARIABLE_ISSUE
747 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
748 #undef TARGET_SCHED_INIT
749 #define TARGET_SCHED_INIT ix86_sched_init
750 #undef TARGET_SCHED_REORDER
751 #define TARGET_SCHED_REORDER ix86_sched_reorder
752
753 struct gcc_target targetm = TARGET_INITIALIZER;
754 \f
755 /* Sometimes certain combinations of command options do not make
756 sense on a particular target machine. You can define a macro
757 `OVERRIDE_OPTIONS' to take account of this. This macro, if
758 defined, is executed once just after all the command options have
759 been parsed.
760
761 Don't use this macro to turn on various extra optimizations for
762 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
763
764 void
765 override_options ()
766 {
767 int i;
768 /* Comes from final.c -- no real reason to change it. */
769 #define MAX_CODE_ALIGN 16
770
771 static struct ptt
772 {
773 struct processor_costs *cost; /* Processor costs */
774 int target_enable; /* Target flags to enable. */
775 int target_disable; /* Target flags to disable. */
776 int align_loop; /* Default alignments. */
777 int align_jump;
778 int align_func;
779 int branch_cost;
780 }
781 const processor_target_table[PROCESSOR_max] =
782 {
783 {&i386_cost, 0, 0, 2, 2, 2, 1},
784 {&i486_cost, 0, 0, 4, 4, 4, 1},
785 {&pentium_cost, 0, 0, -4, -4, -4, 1},
786 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
787 {&k6_cost, 0, 0, -5, -5, 4, 1},
788 {&athlon_cost, 0, 0, 4, -4, 4, 1},
789 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
790 };
791
792 static struct pta
793 {
794 const char *name; /* processor name or nickname. */
795 enum processor_type processor;
796 }
797 const processor_alias_table[] =
798 {
799 {"i386", PROCESSOR_I386},
800 {"i486", PROCESSOR_I486},
801 {"i586", PROCESSOR_PENTIUM},
802 {"pentium", PROCESSOR_PENTIUM},
803 {"i686", PROCESSOR_PENTIUMPRO},
804 {"pentiumpro", PROCESSOR_PENTIUMPRO},
805 {"k6", PROCESSOR_K6},
806 {"athlon", PROCESSOR_ATHLON},
807 {"pentium4", PROCESSOR_PENTIUM4},
808 };
809
810 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
811
812 #ifdef SUBTARGET_OVERRIDE_OPTIONS
813 SUBTARGET_OVERRIDE_OPTIONS;
814 #endif
815
816 ix86_arch = PROCESSOR_I386;
817 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
818
819 if (ix86_cmodel_string != 0)
820 {
821 if (!strcmp (ix86_cmodel_string, "small"))
822 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
823 else if (flag_pic)
824 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
825 else if (!strcmp (ix86_cmodel_string, "32"))
826 ix86_cmodel = CM_32;
827 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
828 ix86_cmodel = CM_KERNEL;
829 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
830 ix86_cmodel = CM_MEDIUM;
831 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
832 ix86_cmodel = CM_LARGE;
833 else
834 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
835 }
836 else
837 {
838 ix86_cmodel = CM_32;
839 if (TARGET_64BIT)
840 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
841 }
842 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
843 error ("Code model `%s' not supported in the %s bit mode.",
844 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
845 if (ix86_cmodel == CM_LARGE)
846 sorry ("Code model `large' not supported yet.");
847 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
848 sorry ("%i-bit mode not compiled in.",
849 (target_flags & MASK_64BIT) ? 64 : 32);
850
851 if (ix86_arch_string != 0)
852 {
853 for (i = 0; i < pta_size; i++)
854 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
855 {
856 ix86_arch = processor_alias_table[i].processor;
857 /* Default cpu tuning to the architecture. */
858 ix86_cpu = ix86_arch;
859 break;
860 }
861
862 if (i == pta_size)
863 error ("bad value (%s) for -march= switch", ix86_arch_string);
864 }
865
866 if (ix86_cpu_string != 0)
867 {
868 for (i = 0; i < pta_size; i++)
869 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
870 {
871 ix86_cpu = processor_alias_table[i].processor;
872 break;
873 }
874 if (i == pta_size)
875 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
876 }
877
878 if (optimize_size)
879 ix86_cost = &size_cost;
880 else
881 ix86_cost = processor_target_table[ix86_cpu].cost;
882 target_flags |= processor_target_table[ix86_cpu].target_enable;
883 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
884
885 /* Arrange to set up i386_stack_locals for all functions. */
886 init_machine_status = ix86_init_machine_status;
887 mark_machine_status = ix86_mark_machine_status;
888 free_machine_status = ix86_free_machine_status;
889
890 /* Validate -mregparm= value. */
891 if (ix86_regparm_string)
892 {
893 i = atoi (ix86_regparm_string);
894 if (i < 0 || i > REGPARM_MAX)
895 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
896 else
897 ix86_regparm = i;
898 }
899 else
900 if (TARGET_64BIT)
901 ix86_regparm = REGPARM_MAX;
902
903 /* If the user has provided any of the -malign-* options,
904 warn and use that value only if -falign-* is not set.
905 Remove this code in GCC 3.2 or later. */
906 if (ix86_align_loops_string)
907 {
908 warning ("-malign-loops is obsolete, use -falign-loops");
909 if (align_loops == 0)
910 {
911 i = atoi (ix86_align_loops_string);
912 if (i < 0 || i > MAX_CODE_ALIGN)
913 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
914 else
915 align_loops = 1 << i;
916 }
917 }
918
919 if (ix86_align_jumps_string)
920 {
921 warning ("-malign-jumps is obsolete, use -falign-jumps");
922 if (align_jumps == 0)
923 {
924 i = atoi (ix86_align_jumps_string);
925 if (i < 0 || i > MAX_CODE_ALIGN)
926 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
927 else
928 align_jumps = 1 << i;
929 }
930 }
931
932 if (ix86_align_funcs_string)
933 {
934 warning ("-malign-functions is obsolete, use -falign-functions");
935 if (align_functions == 0)
936 {
937 i = atoi (ix86_align_funcs_string);
938 if (i < 0 || i > MAX_CODE_ALIGN)
939 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
940 else
941 align_functions = 1 << i;
942 }
943 }
944
945 /* Default align_* from the processor table. */
946 #define abs(n) (n < 0 ? -n : n)
947 if (align_loops == 0)
948 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
949 if (align_jumps == 0)
950 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
951 if (align_functions == 0)
952 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
953
954 /* Validate -mpreferred-stack-boundary= value, or provide default.
955 The default of 128 bits is for Pentium III's SSE __m128. */
956 ix86_preferred_stack_boundary = 128;
957 if (ix86_preferred_stack_boundary_string)
958 {
959 i = atoi (ix86_preferred_stack_boundary_string);
960 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
961 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
962 TARGET_64BIT ? 3 : 2);
963 else
964 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
965 }
966
967 /* Validate -mbranch-cost= value, or provide default. */
968 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
969 if (ix86_branch_cost_string)
970 {
971 i = atoi (ix86_branch_cost_string);
972 if (i < 0 || i > 5)
973 error ("-mbranch-cost=%d is not between 0 and 5", i);
974 else
975 ix86_branch_cost = i;
976 }
977
978 /* Keep nonleaf frame pointers. */
979 if (TARGET_OMIT_LEAF_FRAME_POINTER)
980 flag_omit_frame_pointer = 1;
981
982 /* If we're doing fast math, we don't care about comparison order
983 wrt NaNs. This lets us use a shorter comparison sequence. */
984 if (flag_unsafe_math_optimizations)
985 target_flags &= ~MASK_IEEE_FP;
986
987 if (TARGET_64BIT)
988 {
989 if (TARGET_ALIGN_DOUBLE)
990 error ("-malign-double makes no sense in the 64bit mode.");
991 if (TARGET_RTD)
992 error ("-mrtd calling convention not supported in the 64bit mode.");
993 /* Enable by default the SSE and MMX builtins. */
994 target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE;
995 }
996
997 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
998 on by -msse. */
999 if (TARGET_SSE)
1000 target_flags |= MASK_MMX;
1001
1002 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1003 if (TARGET_3DNOW)
1004 {
1005 target_flags |= MASK_MMX;
1006 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1007 extensions it adds. */
1008 if (x86_3dnow_a & (1 << ix86_arch))
1009 target_flags |= MASK_3DNOW_A;
1010 }
1011 if ((x86_accumulate_outgoing_args & CPUMASK)
1012 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
1013 && !optimize_size)
1014 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1015
1016 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1017 {
1018 char *p;
1019 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1020 p = strchr (internal_label_prefix, 'X');
1021 internal_label_prefix_len = p - internal_label_prefix;
1022 *p = '\0';
1023 }
1024 }
1025 \f
1026 void
1027 optimization_options (level, size)
1028 int level;
1029 int size ATTRIBUTE_UNUSED;
1030 {
1031 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1032 make the problem with not enough registers even worse. */
1033 #ifdef INSN_SCHEDULING
1034 if (level > 1)
1035 flag_schedule_insns = 0;
1036 #endif
1037 if (TARGET_64BIT && optimize >= 1)
1038 flag_omit_frame_pointer = 1;
1039 if (TARGET_64BIT)
1040 flag_pcc_struct_return = 0;
1041 }
1042 \f
1043 /* Table of valid machine attributes. */
1044 const struct attribute_spec ix86_attribute_table[] =
1045 {
1046 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1047 /* Stdcall attribute says callee is responsible for popping arguments
1048 if they are not variable. */
1049 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1050 /* Cdecl attribute says the callee is a normal C declaration */
1051 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1052 /* Regparm attribute specifies how many integer arguments are to be
1053 passed in registers. */
1054 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1055 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1056 { "dllimport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1057 { "dllexport", 1, 1, false, false, false, ix86_handle_dll_attribute },
1058 { "shared", 1, 1, true, false, false, ix86_handle_shared_attribute },
1059 #endif
1060 { NULL, 0, 0, false, false, false, NULL }
1061 };
1062
1063 /* Handle a "cdecl" or "stdcall" attribute;
1064 arguments as in struct attribute_spec.handler. */
1065 static tree
1066 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1067 tree *node;
1068 tree name;
1069 tree args ATTRIBUTE_UNUSED;
1070 int flags ATTRIBUTE_UNUSED;
1071 bool *no_add_attrs;
1072 {
1073 if (TREE_CODE (*node) != FUNCTION_TYPE
1074 && TREE_CODE (*node) != METHOD_TYPE
1075 && TREE_CODE (*node) != FIELD_DECL
1076 && TREE_CODE (*node) != TYPE_DECL)
1077 {
1078 warning ("`%s' attribute only applies to functions",
1079 IDENTIFIER_POINTER (name));
1080 *no_add_attrs = true;
1081 }
1082
1083 if (TARGET_64BIT)
1084 {
1085 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1086 *no_add_attrs = true;
1087 }
1088
1089 return NULL_TREE;
1090 }
1091
1092 /* Handle a "regparm" attribute;
1093 arguments as in struct attribute_spec.handler. */
1094 static tree
1095 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1096 tree *node;
1097 tree name;
1098 tree args;
1099 int flags ATTRIBUTE_UNUSED;
1100 bool *no_add_attrs;
1101 {
1102 if (TREE_CODE (*node) != FUNCTION_TYPE
1103 && TREE_CODE (*node) != METHOD_TYPE
1104 && TREE_CODE (*node) != FIELD_DECL
1105 && TREE_CODE (*node) != TYPE_DECL)
1106 {
1107 warning ("`%s' attribute only applies to functions",
1108 IDENTIFIER_POINTER (name));
1109 *no_add_attrs = true;
1110 }
1111 else
1112 {
1113 tree cst;
1114
1115 cst = TREE_VALUE (args);
1116 if (TREE_CODE (cst) != INTEGER_CST)
1117 {
1118 warning ("`%s' attribute requires an integer constant argument",
1119 IDENTIFIER_POINTER (name));
1120 *no_add_attrs = true;
1121 }
1122 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1123 {
1124 warning ("argument to `%s' attribute larger than %d",
1125 IDENTIFIER_POINTER (name), REGPARM_MAX);
1126 *no_add_attrs = true;
1127 }
1128 }
1129
1130 return NULL_TREE;
1131 }
1132
1133 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1134
1135 /* Generate the assembly code for function entry. FILE is a stdio
1136 stream to output the code to. SIZE is an int: how many units of
1137 temporary storage to allocate.
1138
1139 Refer to the array `regs_ever_live' to determine which registers to
1140 save; `regs_ever_live[I]' is nonzero if register number I is ever
1141 used in the function. This function is responsible for knowing
1142 which registers should not be saved even if used.
1143
1144 We override it here to allow for the new profiling code to go before
1145 the prologue and the old mcount code to go after the prologue (and
1146 after %ebx has been set up for ELF shared library support). */
1147
1148 static void
1149 ix86_osf_output_function_prologue (file, size)
1150 FILE *file;
1151 HOST_WIDE_INT size;
1152 {
1153 char *prefix = "";
1154 char *lprefix = LPREFIX;
1155 int labelno = profile_label_no;
1156
1157 #ifdef OSF_OS
1158
1159 if (TARGET_UNDERSCORES)
1160 prefix = "_";
1161
1162 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1163 {
1164 if (!flag_pic && !HALF_PIC_P ())
1165 {
1166 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1167 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1168 }
1169
1170 else if (HALF_PIC_P ())
1171 {
1172 rtx symref;
1173
1174 HALF_PIC_EXTERNAL ("_mcount_ptr");
1175 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1176 "_mcount_ptr"));
1177
1178 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1179 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1180 XSTR (symref, 0));
1181 fprintf (file, "\tcall *(%%eax)\n");
1182 }
1183
1184 else
1185 {
1186 static int call_no = 0;
1187
1188 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1189 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1190 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1191 lprefix, call_no++);
1192 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1193 lprefix, labelno);
1194 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1195 prefix);
1196 fprintf (file, "\tcall *(%%eax)\n");
1197 }
1198 }
1199
1200 #else /* !OSF_OS */
1201
1202 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1203 {
1204 if (!flag_pic)
1205 {
1206 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1207 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1208 }
1209
1210 else
1211 {
1212 static int call_no = 0;
1213
1214 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1215 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1216 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1217 lprefix, call_no++);
1218 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1219 lprefix, labelno);
1220 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1221 prefix);
1222 fprintf (file, "\tcall *(%%eax)\n");
1223 }
1224 }
1225 #endif /* !OSF_OS */
1226
1227 function_prologue (file, size);
1228 }
1229
1230 #endif /* OSF_OS || TARGET_OSF1ELF */
1231
1232 /* Return 0 if the attributes for two types are incompatible, 1 if they
1233 are compatible, and 2 if they are nearly compatible (which causes a
1234 warning to be generated). */
1235
1236 static int
1237 ix86_comp_type_attributes (type1, type2)
1238 tree type1;
1239 tree type2;
1240 {
1241 /* Check for mismatch of non-default calling convention. */
1242 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1243
1244 if (TREE_CODE (type1) != FUNCTION_TYPE)
1245 return 1;
1246
1247 /* Check for mismatched return types (cdecl vs stdcall). */
1248 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1249 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1250 return 0;
1251 return 1;
1252 }
1253 \f
1254 /* Value is the number of bytes of arguments automatically
1255 popped when returning from a subroutine call.
1256 FUNDECL is the declaration node of the function (as a tree),
1257 FUNTYPE is the data type of the function (as a tree),
1258 or for a library call it is an identifier node for the subroutine name.
1259 SIZE is the number of bytes of arguments passed on the stack.
1260
1261 On the 80386, the RTD insn may be used to pop them if the number
1262 of args is fixed, but if the number is variable then the caller
1263 must pop them all. RTD can't be used for library calls now
1264 because the library is compiled with the Unix compiler.
1265 Use of RTD is a selectable option, since it is incompatible with
1266 standard Unix calling sequences. If the option is not selected,
1267 the caller must always pop the args.
1268
1269 The attribute stdcall is equivalent to RTD on a per module basis. */
1270
1271 int
1272 ix86_return_pops_args (fundecl, funtype, size)
1273 tree fundecl;
1274 tree funtype;
1275 int size;
1276 {
1277 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1278
1279 /* Cdecl functions override -mrtd, and never pop the stack. */
1280 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1281
1282 /* Stdcall functions will pop the stack if not variable args. */
1283 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1284 rtd = 1;
1285
1286 if (rtd
1287 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1288 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1289 == void_type_node)))
1290 return size;
1291 }
1292
1293 /* Lose any fake structure return argument. */
1294 if (aggregate_value_p (TREE_TYPE (funtype))
1295 && !TARGET_64BIT)
1296 return GET_MODE_SIZE (Pmode);
1297
1298 return 0;
1299 }
1300 \f
1301 /* Argument support functions. */
1302
1303 /* Return true when register may be used to pass function parameters. */
1304 bool
1305 ix86_function_arg_regno_p (regno)
1306 int regno;
1307 {
1308 int i;
1309 if (!TARGET_64BIT)
1310 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1311 if (SSE_REGNO_P (regno) && TARGET_SSE)
1312 return true;
1313 /* RAX is used as hidden argument to va_arg functions. */
1314 if (!regno)
1315 return true;
1316 for (i = 0; i < REGPARM_MAX; i++)
1317 if (regno == x86_64_int_parameter_registers[i])
1318 return true;
1319 return false;
1320 }
1321
1322 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1323 for a call to a function whose data type is FNTYPE.
1324 For a library call, FNTYPE is 0. */
1325
1326 void
1327 init_cumulative_args (cum, fntype, libname)
1328 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1329 tree fntype; /* tree ptr for function decl */
1330 rtx libname; /* SYMBOL_REF of library name or 0 */
1331 {
1332 static CUMULATIVE_ARGS zero_cum;
1333 tree param, next_param;
1334
1335 if (TARGET_DEBUG_ARG)
1336 {
1337 fprintf (stderr, "\ninit_cumulative_args (");
1338 if (fntype)
1339 fprintf (stderr, "fntype code = %s, ret code = %s",
1340 tree_code_name[(int) TREE_CODE (fntype)],
1341 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1342 else
1343 fprintf (stderr, "no fntype");
1344
1345 if (libname)
1346 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1347 }
1348
1349 *cum = zero_cum;
1350
1351 /* Set up the number of registers to use for passing arguments. */
1352 cum->nregs = ix86_regparm;
1353 cum->sse_nregs = SSE_REGPARM_MAX;
1354 if (fntype && !TARGET_64BIT)
1355 {
1356 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1357
1358 if (attr)
1359 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1360 }
1361 cum->maybe_vaarg = false;
1362
1363 /* Determine if this function has variable arguments. This is
1364 indicated by the last argument being 'void_type_mode' if there
1365 are no variable arguments. If there are variable arguments, then
1366 we won't pass anything in registers */
1367
1368 if (cum->nregs)
1369 {
1370 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1371 param != 0; param = next_param)
1372 {
1373 next_param = TREE_CHAIN (param);
1374 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1375 {
1376 if (!TARGET_64BIT)
1377 cum->nregs = 0;
1378 cum->maybe_vaarg = true;
1379 }
1380 }
1381 }
1382 if ((!fntype && !libname)
1383 || (fntype && !TYPE_ARG_TYPES (fntype)))
1384 cum->maybe_vaarg = 1;
1385
1386 if (TARGET_DEBUG_ARG)
1387 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1388
1389 return;
1390 }
1391
1392 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1393 of this code is to classify each 8bytes of incomming argument by the register
1394 class and assign registers accordingly. */
1395
1396 /* Return the union class of CLASS1 and CLASS2.
1397 See the x86-64 PS ABI for details. */
1398
1399 static enum x86_64_reg_class
1400 merge_classes (class1, class2)
1401 enum x86_64_reg_class class1, class2;
1402 {
1403 /* Rule #1: If both classes are equal, this is the resulting class. */
1404 if (class1 == class2)
1405 return class1;
1406
1407 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1408 the other class. */
1409 if (class1 == X86_64_NO_CLASS)
1410 return class2;
1411 if (class2 == X86_64_NO_CLASS)
1412 return class1;
1413
1414 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1415 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1416 return X86_64_MEMORY_CLASS;
1417
1418 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1419 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1420 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1421 return X86_64_INTEGERSI_CLASS;
1422 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1423 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1424 return X86_64_INTEGER_CLASS;
1425
1426 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1427 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1428 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1429 return X86_64_MEMORY_CLASS;
1430
1431 /* Rule #6: Otherwise class SSE is used. */
1432 return X86_64_SSE_CLASS;
1433 }
1434
1435 /* Classify the argument of type TYPE and mode MODE.
1436 CLASSES will be filled by the register class used to pass each word
1437 of the operand. The number of words is returned. In case the parameter
1438 should be passed in memory, 0 is returned. As a special case for zero
1439 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1440
1441 BIT_OFFSET is used internally for handling records and specifies offset
1442 of the offset in bits modulo 256 to avoid overflow cases.
1443
1444 See the x86-64 PS ABI for details.
1445 */
1446
1447 static int
1448 classify_argument (mode, type, classes, bit_offset)
1449 enum machine_mode mode;
1450 tree type;
1451 enum x86_64_reg_class classes[MAX_CLASSES];
1452 int bit_offset;
1453 {
1454 int bytes =
1455 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1456 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1457
1458 if (type && AGGREGATE_TYPE_P (type))
1459 {
1460 int i;
1461 tree field;
1462 enum x86_64_reg_class subclasses[MAX_CLASSES];
1463
1464 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1465 if (bytes > 16)
1466 return 0;
1467
1468 for (i = 0; i < words; i++)
1469 classes[i] = X86_64_NO_CLASS;
1470
1471 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1472 signalize memory class, so handle it as special case. */
1473 if (!words)
1474 {
1475 classes[0] = X86_64_NO_CLASS;
1476 return 1;
1477 }
1478
1479 /* Classify each field of record and merge classes. */
1480 if (TREE_CODE (type) == RECORD_TYPE)
1481 {
1482 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1483 {
1484 if (TREE_CODE (field) == FIELD_DECL)
1485 {
1486 int num;
1487
1488 /* Bitfields are always classified as integer. Handle them
1489 early, since later code would consider them to be
1490 misaligned integers. */
1491 if (DECL_BIT_FIELD (field))
1492 {
1493 for (i = int_bit_position (field) / 8 / 8;
1494 i < (int_bit_position (field)
1495 + tree_low_cst (DECL_SIZE (field), 0)
1496 + 63) / 8 / 8; i++)
1497 classes[i] =
1498 merge_classes (X86_64_INTEGER_CLASS,
1499 classes[i]);
1500 }
1501 else
1502 {
1503 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1504 TREE_TYPE (field), subclasses,
1505 (int_bit_position (field)
1506 + bit_offset) % 256);
1507 if (!num)
1508 return 0;
1509 for (i = 0; i < num; i++)
1510 {
1511 int pos =
1512 (int_bit_position (field) + bit_offset) / 8 / 8;
1513 classes[i + pos] =
1514 merge_classes (subclasses[i], classes[i + pos]);
1515 }
1516 }
1517 }
1518 }
1519 }
1520 /* Arrays are handled as small records. */
1521 else if (TREE_CODE (type) == ARRAY_TYPE)
1522 {
1523 int num;
1524 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1525 TREE_TYPE (type), subclasses, bit_offset);
1526 if (!num)
1527 return 0;
1528
1529 /* The partial classes are now full classes. */
1530 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1531 subclasses[0] = X86_64_SSE_CLASS;
1532 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1533 subclasses[0] = X86_64_INTEGER_CLASS;
1534
1535 for (i = 0; i < words; i++)
1536 classes[i] = subclasses[i % num];
1537 }
1538 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1539 else if (TREE_CODE (type) == UNION_TYPE)
1540 {
1541 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1542 {
1543 if (TREE_CODE (field) == FIELD_DECL)
1544 {
1545 int num;
1546 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1547 TREE_TYPE (field), subclasses,
1548 bit_offset);
1549 if (!num)
1550 return 0;
1551 for (i = 0; i < num; i++)
1552 classes[i] = merge_classes (subclasses[i], classes[i]);
1553 }
1554 }
1555 }
1556 else
1557 abort ();
1558
1559 /* Final merger cleanup. */
1560 for (i = 0; i < words; i++)
1561 {
1562 /* If one class is MEMORY, everything should be passed in
1563 memory. */
1564 if (classes[i] == X86_64_MEMORY_CLASS)
1565 return 0;
1566
1567 /* The X86_64_SSEUP_CLASS should be always preceeded by
1568 X86_64_SSE_CLASS. */
1569 if (classes[i] == X86_64_SSEUP_CLASS
1570 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1571 classes[i] = X86_64_SSE_CLASS;
1572
1573 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1574 if (classes[i] == X86_64_X87UP_CLASS
1575 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1576 classes[i] = X86_64_SSE_CLASS;
1577 }
1578 return words;
1579 }
1580
1581 /* Compute alignment needed. We align all types to natural boundaries with
1582 exception of XFmode that is aligned to 64bits. */
1583 if (mode != VOIDmode && mode != BLKmode)
1584 {
1585 int mode_alignment = GET_MODE_BITSIZE (mode);
1586
1587 if (mode == XFmode)
1588 mode_alignment = 128;
1589 else if (mode == XCmode)
1590 mode_alignment = 256;
1591 /* Missalignmed fields are always returned in memory. */
1592 if (bit_offset % mode_alignment)
1593 return 0;
1594 }
1595
1596 /* Classification of atomic types. */
1597 switch (mode)
1598 {
1599 case DImode:
1600 case SImode:
1601 case HImode:
1602 case QImode:
1603 case CSImode:
1604 case CHImode:
1605 case CQImode:
1606 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1607 classes[0] = X86_64_INTEGERSI_CLASS;
1608 else
1609 classes[0] = X86_64_INTEGER_CLASS;
1610 return 1;
1611 case CDImode:
1612 case TImode:
1613 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1614 return 2;
1615 case CTImode:
1616 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1617 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1618 return 4;
1619 case SFmode:
1620 if (!(bit_offset % 64))
1621 classes[0] = X86_64_SSESF_CLASS;
1622 else
1623 classes[0] = X86_64_SSE_CLASS;
1624 return 1;
1625 case DFmode:
1626 classes[0] = X86_64_SSEDF_CLASS;
1627 return 1;
1628 case TFmode:
1629 classes[0] = X86_64_X87_CLASS;
1630 classes[1] = X86_64_X87UP_CLASS;
1631 return 2;
1632 case TCmode:
1633 classes[0] = X86_64_X87_CLASS;
1634 classes[1] = X86_64_X87UP_CLASS;
1635 classes[2] = X86_64_X87_CLASS;
1636 classes[3] = X86_64_X87UP_CLASS;
1637 return 4;
1638 case DCmode:
1639 classes[0] = X86_64_SSEDF_CLASS;
1640 classes[1] = X86_64_SSEDF_CLASS;
1641 return 2;
1642 case SCmode:
1643 classes[0] = X86_64_SSE_CLASS;
1644 return 1;
1645 case BLKmode:
1646 return 0;
1647 default:
1648 abort ();
1649 }
1650 }
1651
1652 /* Examine the argument and return set number of register required in each
1653 class. Return 0 ifif parameter should be passed in memory. */
1654 static int
1655 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1656 enum machine_mode mode;
1657 tree type;
1658 int *int_nregs, *sse_nregs;
1659 int in_return;
1660 {
1661 enum x86_64_reg_class class[MAX_CLASSES];
1662 int n = classify_argument (mode, type, class, 0);
1663
1664 *int_nregs = 0;
1665 *sse_nregs = 0;
1666 if (!n)
1667 return 0;
1668 for (n--; n >= 0; n--)
1669 switch (class[n])
1670 {
1671 case X86_64_INTEGER_CLASS:
1672 case X86_64_INTEGERSI_CLASS:
1673 (*int_nregs)++;
1674 break;
1675 case X86_64_SSE_CLASS:
1676 case X86_64_SSESF_CLASS:
1677 case X86_64_SSEDF_CLASS:
1678 (*sse_nregs)++;
1679 break;
1680 case X86_64_NO_CLASS:
1681 case X86_64_SSEUP_CLASS:
1682 break;
1683 case X86_64_X87_CLASS:
1684 case X86_64_X87UP_CLASS:
1685 if (!in_return)
1686 return 0;
1687 break;
1688 case X86_64_MEMORY_CLASS:
1689 abort ();
1690 }
1691 return 1;
1692 }
1693 /* Construct container for the argument used by GCC interface. See
1694 FUNCTION_ARG for the detailed description. */
1695 static rtx
1696 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1697 enum machine_mode mode;
1698 tree type;
1699 int in_return;
1700 int nintregs, nsseregs;
1701 int *intreg, sse_regno;
1702 {
1703 enum machine_mode tmpmode;
1704 int bytes =
1705 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1706 enum x86_64_reg_class class[MAX_CLASSES];
1707 int n;
1708 int i;
1709 int nexps = 0;
1710 int needed_sseregs, needed_intregs;
1711 rtx exp[MAX_CLASSES];
1712 rtx ret;
1713
1714 n = classify_argument (mode, type, class, 0);
1715 if (TARGET_DEBUG_ARG)
1716 {
1717 if (!n)
1718 fprintf (stderr, "Memory class\n");
1719 else
1720 {
1721 fprintf (stderr, "Classes:");
1722 for (i = 0; i < n; i++)
1723 {
1724 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1725 }
1726 fprintf (stderr, "\n");
1727 }
1728 }
1729 if (!n)
1730 return NULL;
1731 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1732 return NULL;
1733 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1734 return NULL;
1735
1736 /* First construct simple cases. Avoid SCmode, since we want to use
1737 single register to pass this type. */
1738 if (n == 1 && mode != SCmode)
1739 switch (class[0])
1740 {
1741 case X86_64_INTEGER_CLASS:
1742 case X86_64_INTEGERSI_CLASS:
1743 return gen_rtx_REG (mode, intreg[0]);
1744 case X86_64_SSE_CLASS:
1745 case X86_64_SSESF_CLASS:
1746 case X86_64_SSEDF_CLASS:
1747 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1748 case X86_64_X87_CLASS:
1749 return gen_rtx_REG (mode, FIRST_STACK_REG);
1750 case X86_64_NO_CLASS:
1751 /* Zero sized array, struct or class. */
1752 return NULL;
1753 default:
1754 abort ();
1755 }
1756 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1757 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1758 if (n == 2
1759 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1760 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1761 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1762 && class[1] == X86_64_INTEGER_CLASS
1763 && (mode == CDImode || mode == TImode)
1764 && intreg[0] + 1 == intreg[1])
1765 return gen_rtx_REG (mode, intreg[0]);
1766 if (n == 4
1767 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1768 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1769 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1770
1771 /* Otherwise figure out the entries of the PARALLEL. */
1772 for (i = 0; i < n; i++)
1773 {
1774 switch (class[i])
1775 {
1776 case X86_64_NO_CLASS:
1777 break;
1778 case X86_64_INTEGER_CLASS:
1779 case X86_64_INTEGERSI_CLASS:
1780 /* Merge TImodes on aligned occassions here too. */
1781 if (i * 8 + 8 > bytes)
1782 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1783 else if (class[i] == X86_64_INTEGERSI_CLASS)
1784 tmpmode = SImode;
1785 else
1786 tmpmode = DImode;
1787 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1788 if (tmpmode == BLKmode)
1789 tmpmode = DImode;
1790 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1791 gen_rtx_REG (tmpmode, *intreg),
1792 GEN_INT (i*8));
1793 intreg++;
1794 break;
1795 case X86_64_SSESF_CLASS:
1796 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1797 gen_rtx_REG (SFmode,
1798 SSE_REGNO (sse_regno)),
1799 GEN_INT (i*8));
1800 sse_regno++;
1801 break;
1802 case X86_64_SSEDF_CLASS:
1803 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1804 gen_rtx_REG (DFmode,
1805 SSE_REGNO (sse_regno)),
1806 GEN_INT (i*8));
1807 sse_regno++;
1808 break;
1809 case X86_64_SSE_CLASS:
1810 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1811 tmpmode = TImode, i++;
1812 else
1813 tmpmode = DImode;
1814 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1815 gen_rtx_REG (tmpmode,
1816 SSE_REGNO (sse_regno)),
1817 GEN_INT (i*8));
1818 sse_regno++;
1819 break;
1820 default:
1821 abort ();
1822 }
1823 }
1824 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1825 for (i = 0; i < nexps; i++)
1826 XVECEXP (ret, 0, i) = exp [i];
1827 return ret;
1828 }
1829
1830 /* Update the data in CUM to advance over an argument
1831 of mode MODE and data type TYPE.
1832 (TYPE is null for libcalls where that information may not be available.) */
1833
1834 void
1835 function_arg_advance (cum, mode, type, named)
1836 CUMULATIVE_ARGS *cum; /* current arg information */
1837 enum machine_mode mode; /* current arg mode */
1838 tree type; /* type of the argument or 0 if lib support */
1839 int named; /* whether or not the argument was named */
1840 {
1841 int bytes =
1842 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1843 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1844
1845 if (TARGET_DEBUG_ARG)
1846 fprintf (stderr,
1847 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1848 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1849 if (TARGET_64BIT)
1850 {
1851 int int_nregs, sse_nregs;
1852 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
1853 cum->words += words;
1854 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
1855 {
1856 cum->nregs -= int_nregs;
1857 cum->sse_nregs -= sse_nregs;
1858 cum->regno += int_nregs;
1859 cum->sse_regno += sse_nregs;
1860 }
1861 else
1862 cum->words += words;
1863 }
1864 else
1865 {
1866 if (TARGET_SSE && mode == TImode)
1867 {
1868 cum->sse_words += words;
1869 cum->sse_nregs -= 1;
1870 cum->sse_regno += 1;
1871 if (cum->sse_nregs <= 0)
1872 {
1873 cum->sse_nregs = 0;
1874 cum->sse_regno = 0;
1875 }
1876 }
1877 else
1878 {
1879 cum->words += words;
1880 cum->nregs -= words;
1881 cum->regno += words;
1882
1883 if (cum->nregs <= 0)
1884 {
1885 cum->nregs = 0;
1886 cum->regno = 0;
1887 }
1888 }
1889 }
1890 return;
1891 }
1892
1893 /* Define where to put the arguments to a function.
1894 Value is zero to push the argument on the stack,
1895 or a hard register in which to store the argument.
1896
1897 MODE is the argument's machine mode.
1898 TYPE is the data type of the argument (as a tree).
1899 This is null for libcalls where that information may
1900 not be available.
1901 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1902 the preceding args and about the function being called.
1903 NAMED is nonzero if this argument is a named parameter
1904 (otherwise it is an extra parameter matching an ellipsis). */
1905
1906 struct rtx_def *
1907 function_arg (cum, mode, type, named)
1908 CUMULATIVE_ARGS *cum; /* current arg information */
1909 enum machine_mode mode; /* current arg mode */
1910 tree type; /* type of the argument or 0 if lib support */
1911 int named; /* != 0 for normal args, == 0 for ... args */
1912 {
1913 rtx ret = NULL_RTX;
1914 int bytes =
1915 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1916 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1917
1918 /* Handle an hidden AL argument containing number of registers for varargs
1919 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1920 any AL settings. */
1921 if (mode == VOIDmode)
1922 {
1923 if (TARGET_64BIT)
1924 return GEN_INT (cum->maybe_vaarg
1925 ? (cum->sse_nregs < 0
1926 ? SSE_REGPARM_MAX
1927 : cum->sse_regno)
1928 : -1);
1929 else
1930 return constm1_rtx;
1931 }
1932 if (TARGET_64BIT)
1933 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
1934 &x86_64_int_parameter_registers [cum->regno],
1935 cum->sse_regno);
1936 else
1937 switch (mode)
1938 {
1939 /* For now, pass fp/complex values on the stack. */
1940 default:
1941 break;
1942
1943 case BLKmode:
1944 case DImode:
1945 case SImode:
1946 case HImode:
1947 case QImode:
1948 if (words <= cum->nregs)
1949 ret = gen_rtx_REG (mode, cum->regno);
1950 break;
1951 case TImode:
1952 if (cum->sse_nregs)
1953 ret = gen_rtx_REG (mode, cum->sse_regno);
1954 break;
1955 }
1956
1957 if (TARGET_DEBUG_ARG)
1958 {
1959 fprintf (stderr,
1960 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1961 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1962
1963 if (ret)
1964 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1965 else
1966 fprintf (stderr, ", stack");
1967
1968 fprintf (stderr, " )\n");
1969 }
1970
1971 return ret;
1972 }
1973
1974 /* Gives the alignment boundary, in bits, of an argument with the specified mode
1975 and type. */
1976
1977 int
1978 ix86_function_arg_boundary (mode, type)
1979 enum machine_mode mode;
1980 tree type;
1981 {
1982 int align;
1983 if (!TARGET_64BIT)
1984 return PARM_BOUNDARY;
1985 if (type)
1986 align = TYPE_ALIGN (type);
1987 else
1988 align = GET_MODE_ALIGNMENT (mode);
1989 if (align < PARM_BOUNDARY)
1990 align = PARM_BOUNDARY;
1991 if (align > 128)
1992 align = 128;
1993 return align;
1994 }
1995
1996 /* Return true if N is a possible register number of function value. */
1997 bool
1998 ix86_function_value_regno_p (regno)
1999 int regno;
2000 {
2001 if (!TARGET_64BIT)
2002 {
2003 return ((regno) == 0
2004 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2005 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2006 }
2007 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2008 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2009 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2010 }
2011
2012 /* Define how to find the value returned by a function.
2013 VALTYPE is the data type of the value (as a tree).
2014 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2015 otherwise, FUNC is 0. */
2016 rtx
2017 ix86_function_value (valtype)
2018 tree valtype;
2019 {
2020 if (TARGET_64BIT)
2021 {
2022 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2023 REGPARM_MAX, SSE_REGPARM_MAX,
2024 x86_64_int_return_registers, 0);
2025 /* For zero sized structures, construct_continer return NULL, but we need
2026 to keep rest of compiler happy by returning meaningfull value. */
2027 if (!ret)
2028 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2029 return ret;
2030 }
2031 else
2032 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2033 }
2034
2035 /* Return false ifif type is returned in memory. */
2036 int
2037 ix86_return_in_memory (type)
2038 tree type;
2039 {
2040 int needed_intregs, needed_sseregs;
2041 if (TARGET_64BIT)
2042 {
2043 return !examine_argument (TYPE_MODE (type), type, 1,
2044 &needed_intregs, &needed_sseregs);
2045 }
2046 else
2047 {
2048 if (TYPE_MODE (type) == BLKmode
2049 || (VECTOR_MODE_P (TYPE_MODE (type))
2050 && int_size_in_bytes (type) == 8)
2051 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2052 && TYPE_MODE (type) != TFmode
2053 && !VECTOR_MODE_P (TYPE_MODE (type))))
2054 return 1;
2055 return 0;
2056 }
2057 }
2058
2059 /* Define how to find the value returned by a library function
2060 assuming the value has mode MODE. */
2061 rtx
2062 ix86_libcall_value (mode)
2063 enum machine_mode mode;
2064 {
2065 if (TARGET_64BIT)
2066 {
2067 switch (mode)
2068 {
2069 case SFmode:
2070 case SCmode:
2071 case DFmode:
2072 case DCmode:
2073 return gen_rtx_REG (mode, FIRST_SSE_REG);
2074 case TFmode:
2075 case TCmode:
2076 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2077 default:
2078 return gen_rtx_REG (mode, 0);
2079 }
2080 }
2081 else
2082 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2083 }
2084 \f
2085 /* Create the va_list data type. */
2086
2087 tree
2088 ix86_build_va_list ()
2089 {
2090 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2091
2092 /* For i386 we use plain pointer to argument area. */
2093 if (!TARGET_64BIT)
2094 return build_pointer_type (char_type_node);
2095
2096 record = make_lang_type (RECORD_TYPE);
2097 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2098
2099 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2100 unsigned_type_node);
2101 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2102 unsigned_type_node);
2103 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2104 ptr_type_node);
2105 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2106 ptr_type_node);
2107
2108 DECL_FIELD_CONTEXT (f_gpr) = record;
2109 DECL_FIELD_CONTEXT (f_fpr) = record;
2110 DECL_FIELD_CONTEXT (f_ovf) = record;
2111 DECL_FIELD_CONTEXT (f_sav) = record;
2112
2113 TREE_CHAIN (record) = type_decl;
2114 TYPE_NAME (record) = type_decl;
2115 TYPE_FIELDS (record) = f_gpr;
2116 TREE_CHAIN (f_gpr) = f_fpr;
2117 TREE_CHAIN (f_fpr) = f_ovf;
2118 TREE_CHAIN (f_ovf) = f_sav;
2119
2120 layout_type (record);
2121
2122 /* The correct type is an array type of one element. */
2123 return build_array_type (record, build_index_type (size_zero_node));
2124 }
2125
2126 /* Perform any needed actions needed for a function that is receiving a
2127 variable number of arguments.
2128
2129 CUM is as above.
2130
2131 MODE and TYPE are the mode and type of the current parameter.
2132
2133 PRETEND_SIZE is a variable that should be set to the amount of stack
2134 that must be pushed by the prolog to pretend that our caller pushed
2135 it.
2136
2137 Normally, this macro will push all remaining incoming registers on the
2138 stack and set PRETEND_SIZE to the length of the registers pushed. */
2139
2140 void
2141 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2142 CUMULATIVE_ARGS *cum;
2143 enum machine_mode mode;
2144 tree type;
2145 int *pretend_size ATTRIBUTE_UNUSED;
2146 int no_rtl;
2147
2148 {
2149 CUMULATIVE_ARGS next_cum;
2150 rtx save_area = NULL_RTX, mem;
2151 rtx label;
2152 rtx label_ref;
2153 rtx tmp_reg;
2154 rtx nsse_reg;
2155 int set;
2156 tree fntype;
2157 int stdarg_p;
2158 int i;
2159
2160 if (!TARGET_64BIT)
2161 return;
2162
2163 /* Indicate to allocate space on the stack for varargs save area. */
2164 ix86_save_varrargs_registers = 1;
2165
2166 fntype = TREE_TYPE (current_function_decl);
2167 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2168 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2169 != void_type_node));
2170
2171 /* For varargs, we do not want to skip the dummy va_dcl argument.
2172 For stdargs, we do want to skip the last named argument. */
2173 next_cum = *cum;
2174 if (stdarg_p)
2175 function_arg_advance (&next_cum, mode, type, 1);
2176
2177 if (!no_rtl)
2178 save_area = frame_pointer_rtx;
2179
2180 set = get_varargs_alias_set ();
2181
2182 for (i = next_cum.regno; i < ix86_regparm; i++)
2183 {
2184 mem = gen_rtx_MEM (Pmode,
2185 plus_constant (save_area, i * UNITS_PER_WORD));
2186 set_mem_alias_set (mem, set);
2187 emit_move_insn (mem, gen_rtx_REG (Pmode,
2188 x86_64_int_parameter_registers[i]));
2189 }
2190
2191 if (next_cum.sse_nregs)
2192 {
2193 /* Now emit code to save SSE registers. The AX parameter contains number
2194 of SSE parameter regsiters used to call this function. We use
2195 sse_prologue_save insn template that produces computed jump across
2196 SSE saves. We need some preparation work to get this working. */
2197
2198 label = gen_label_rtx ();
2199 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2200
2201 /* Compute address to jump to :
2202 label - 5*eax + nnamed_sse_arguments*5 */
2203 tmp_reg = gen_reg_rtx (Pmode);
2204 nsse_reg = gen_reg_rtx (Pmode);
2205 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2206 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2207 gen_rtx_MULT (VOIDmode, nsse_reg,
2208 GEN_INT (4))));
2209 if (next_cum.sse_regno)
2210 emit_move_insn
2211 (nsse_reg,
2212 gen_rtx_CONST (DImode,
2213 gen_rtx_PLUS (DImode,
2214 label_ref,
2215 GEN_INT (next_cum.sse_regno * 4))));
2216 else
2217 emit_move_insn (nsse_reg, label_ref);
2218 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2219
2220 /* Compute address of memory block we save into. We always use pointer
2221 pointing 127 bytes after first byte to store - this is needed to keep
2222 instruction size limited by 4 bytes. */
2223 tmp_reg = gen_reg_rtx (Pmode);
2224 emit_insn (gen_rtx_SET(VOIDmode, tmp_reg,
2225 plus_constant (save_area, 8 * REGPARM_MAX + 127)));
2226 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2227 set_mem_alias_set (mem, set);
2228
2229 /* And finally do the dirty job! */
2230 emit_insn (gen_sse_prologue_save (mem, nsse_reg, GEN_INT (next_cum.sse_regno),
2231 label));
2232 }
2233
2234 }
2235
2236 /* Implement va_start. */
2237
2238 void
2239 ix86_va_start (stdarg_p, valist, nextarg)
2240 int stdarg_p;
2241 tree valist;
2242 rtx nextarg;
2243 {
2244 HOST_WIDE_INT words, n_gpr, n_fpr;
2245 tree f_gpr, f_fpr, f_ovf, f_sav;
2246 tree gpr, fpr, ovf, sav, t;
2247
2248 /* Only 64bit target needs something special. */
2249 if (!TARGET_64BIT)
2250 {
2251 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2252 return;
2253 }
2254
2255 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2256 f_fpr = TREE_CHAIN (f_gpr);
2257 f_ovf = TREE_CHAIN (f_fpr);
2258 f_sav = TREE_CHAIN (f_ovf);
2259
2260 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2261 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2262 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2263 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2264 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2265
2266 /* Count number of gp and fp argument registers used. */
2267 words = current_function_args_info.words;
2268 n_gpr = current_function_args_info.regno;
2269 n_fpr = current_function_args_info.sse_regno;
2270
2271 if (TARGET_DEBUG_ARG)
2272 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2273 (int)words, (int)n_gpr, (int)n_fpr);
2274
2275 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2276 build_int_2 (n_gpr * 8, 0));
2277 TREE_SIDE_EFFECTS (t) = 1;
2278 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2279
2280 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2281 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2282 TREE_SIDE_EFFECTS (t) = 1;
2283 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2284
2285 /* Find the overflow area. */
2286 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2287 if (words != 0)
2288 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2289 build_int_2 (words * UNITS_PER_WORD, 0));
2290 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2291 TREE_SIDE_EFFECTS (t) = 1;
2292 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2293
2294 /* Find the register save area.
2295 Prologue of the function save it right above stack frame. */
2296 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2297 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2298 TREE_SIDE_EFFECTS (t) = 1;
2299 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2300 }
2301
2302 /* Implement va_arg. */
2303 rtx
2304 ix86_va_arg (valist, type)
2305 tree valist, type;
2306 {
2307 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2308 tree f_gpr, f_fpr, f_ovf, f_sav;
2309 tree gpr, fpr, ovf, sav, t;
2310 int indirect_p = 0, size, rsize;
2311 rtx lab_false, lab_over = NULL_RTX;
2312 rtx addr_rtx, r;
2313 rtx container;
2314
2315 /* Only 64bit target needs something special. */
2316 if (!TARGET_64BIT)
2317 {
2318 return std_expand_builtin_va_arg (valist, type);
2319 }
2320
2321 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2322 f_fpr = TREE_CHAIN (f_gpr);
2323 f_ovf = TREE_CHAIN (f_fpr);
2324 f_sav = TREE_CHAIN (f_ovf);
2325
2326 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2327 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2328 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2329 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2330 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2331
2332 size = int_size_in_bytes (type);
2333 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2334
2335 container = construct_container (TYPE_MODE (type), type, 0,
2336 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2337 /*
2338 * Pull the value out of the saved registers ...
2339 */
2340
2341 addr_rtx = gen_reg_rtx (Pmode);
2342
2343 if (container)
2344 {
2345 rtx int_addr_rtx, sse_addr_rtx;
2346 int needed_intregs, needed_sseregs;
2347 int need_temp;
2348
2349 lab_over = gen_label_rtx ();
2350 lab_false = gen_label_rtx ();
2351
2352 examine_argument (TYPE_MODE (type), type, 0,
2353 &needed_intregs, &needed_sseregs);
2354
2355
2356 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2357 || TYPE_ALIGN (type) > 128);
2358
2359 /* In case we are passing structure, verify that it is consetuctive block
2360 on the register save area. If not we need to do moves. */
2361 if (!need_temp && !REG_P (container))
2362 {
2363 /* Verify that all registers are strictly consetuctive */
2364 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2365 {
2366 int i;
2367
2368 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2369 {
2370 rtx slot = XVECEXP (container, 0, i);
2371 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2372 || INTVAL (XEXP (slot, 1)) != i * 16)
2373 need_temp = 1;
2374 }
2375 }
2376 else
2377 {
2378 int i;
2379
2380 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2381 {
2382 rtx slot = XVECEXP (container, 0, i);
2383 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2384 || INTVAL (XEXP (slot, 1)) != i * 8)
2385 need_temp = 1;
2386 }
2387 }
2388 }
2389 if (!need_temp)
2390 {
2391 int_addr_rtx = addr_rtx;
2392 sse_addr_rtx = addr_rtx;
2393 }
2394 else
2395 {
2396 int_addr_rtx = gen_reg_rtx (Pmode);
2397 sse_addr_rtx = gen_reg_rtx (Pmode);
2398 }
2399 /* First ensure that we fit completely in registers. */
2400 if (needed_intregs)
2401 {
2402 emit_cmp_and_jump_insns (expand_expr
2403 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2404 GEN_INT ((REGPARM_MAX - needed_intregs +
2405 1) * 8), GE, const1_rtx, SImode,
2406 1, 1, lab_false);
2407 }
2408 if (needed_sseregs)
2409 {
2410 emit_cmp_and_jump_insns (expand_expr
2411 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2412 GEN_INT ((SSE_REGPARM_MAX -
2413 needed_sseregs + 1) * 16 +
2414 REGPARM_MAX * 8), GE, const1_rtx,
2415 SImode, 1, 1, lab_false);
2416 }
2417
2418 /* Compute index to start of area used for integer regs. */
2419 if (needed_intregs)
2420 {
2421 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2422 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2423 if (r != int_addr_rtx)
2424 emit_move_insn (int_addr_rtx, r);
2425 }
2426 if (needed_sseregs)
2427 {
2428 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2429 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2430 if (r != sse_addr_rtx)
2431 emit_move_insn (sse_addr_rtx, r);
2432 }
2433 if (need_temp)
2434 {
2435 int i;
2436 rtx mem;
2437
2438 mem = assign_temp (type, 0, 1, 0);
2439 set_mem_alias_set (mem, get_varargs_alias_set ());
2440 addr_rtx = XEXP (mem, 0);
2441 for (i = 0; i < XVECLEN (container, 0); i++)
2442 {
2443 rtx slot = XVECEXP (container, 0, i);
2444 rtx reg = XEXP (slot, 0);
2445 enum machine_mode mode = GET_MODE (reg);
2446 rtx src_addr;
2447 rtx src_mem;
2448 int src_offset;
2449 rtx dest_mem;
2450
2451 if (SSE_REGNO_P (REGNO (reg)))
2452 {
2453 src_addr = sse_addr_rtx;
2454 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2455 }
2456 else
2457 {
2458 src_addr = int_addr_rtx;
2459 src_offset = REGNO (reg) * 8;
2460 }
2461 src_mem = gen_rtx_MEM (mode, src_addr);
2462 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2463 src_mem = adjust_address (src_mem, mode, src_offset);
2464 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2465 PUT_MODE (dest_mem, mode);
2466 /* ??? Break out TImode moves from integer registers? */
2467 emit_move_insn (dest_mem, src_mem);
2468 }
2469 }
2470
2471 if (needed_intregs)
2472 {
2473 t =
2474 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2475 build_int_2 (needed_intregs * 8, 0));
2476 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2477 TREE_SIDE_EFFECTS (t) = 1;
2478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2479 }
2480 if (needed_sseregs)
2481 {
2482 t =
2483 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2484 build_int_2 (needed_sseregs * 16, 0));
2485 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2486 TREE_SIDE_EFFECTS (t) = 1;
2487 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2488 }
2489
2490 emit_jump_insn (gen_jump (lab_over));
2491 emit_barrier ();
2492 emit_label (lab_false);
2493 }
2494
2495 /* ... otherwise out of the overflow area. */
2496
2497 /* Care for on-stack alignment if needed. */
2498 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2499 t = ovf;
2500 else
2501 {
2502 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2503 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2504 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2505 }
2506 t = save_expr (t);
2507
2508 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2509 if (r != addr_rtx)
2510 emit_move_insn (addr_rtx, r);
2511
2512 t =
2513 build (PLUS_EXPR, TREE_TYPE (t), t,
2514 build_int_2 (rsize * UNITS_PER_WORD, 0));
2515 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2516 TREE_SIDE_EFFECTS (t) = 1;
2517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2518
2519 if (container)
2520 emit_label (lab_over);
2521
2522 if (indirect_p)
2523 {
2524 abort ();
2525 r = gen_rtx_MEM (Pmode, addr_rtx);
2526 set_mem_alias_set (r, get_varargs_alias_set ());
2527 emit_move_insn (addr_rtx, r);
2528 }
2529
2530 return addr_rtx;
2531 }
2532 \f
2533 /* Return nonzero if OP is general operand representable on x86_64. */
2534
2535 int
2536 x86_64_general_operand (op, mode)
2537 rtx op;
2538 enum machine_mode mode;
2539 {
2540 if (!TARGET_64BIT)
2541 return general_operand (op, mode);
2542 if (nonimmediate_operand (op, mode))
2543 return 1;
2544 return x86_64_sign_extended_value (op);
2545 }
2546
2547 /* Return nonzero if OP is general operand representable on x86_64
2548 as eighter sign extended or zero extended constant. */
2549
2550 int
2551 x86_64_szext_general_operand (op, mode)
2552 rtx op;
2553 enum machine_mode mode;
2554 {
2555 if (!TARGET_64BIT)
2556 return general_operand (op, mode);
2557 if (nonimmediate_operand (op, mode))
2558 return 1;
2559 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2560 }
2561
2562 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2563
2564 int
2565 x86_64_nonmemory_operand (op, mode)
2566 rtx op;
2567 enum machine_mode mode;
2568 {
2569 if (!TARGET_64BIT)
2570 return nonmemory_operand (op, mode);
2571 if (register_operand (op, mode))
2572 return 1;
2573 return x86_64_sign_extended_value (op);
2574 }
2575
2576 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2577
2578 int
2579 x86_64_movabs_operand (op, mode)
2580 rtx op;
2581 enum machine_mode mode;
2582 {
2583 if (!TARGET_64BIT || !flag_pic)
2584 return nonmemory_operand (op, mode);
2585 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2586 return 1;
2587 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2588 return 1;
2589 return 0;
2590 }
2591
2592 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2593
2594 int
2595 x86_64_szext_nonmemory_operand (op, mode)
2596 rtx op;
2597 enum machine_mode mode;
2598 {
2599 if (!TARGET_64BIT)
2600 return nonmemory_operand (op, mode);
2601 if (register_operand (op, mode))
2602 return 1;
2603 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2604 }
2605
2606 /* Return nonzero if OP is immediate operand representable on x86_64. */
2607
2608 int
2609 x86_64_immediate_operand (op, mode)
2610 rtx op;
2611 enum machine_mode mode;
2612 {
2613 if (!TARGET_64BIT)
2614 return immediate_operand (op, mode);
2615 return x86_64_sign_extended_value (op);
2616 }
2617
2618 /* Return nonzero if OP is immediate operand representable on x86_64. */
2619
2620 int
2621 x86_64_zext_immediate_operand (op, mode)
2622 rtx op;
2623 enum machine_mode mode ATTRIBUTE_UNUSED;
2624 {
2625 return x86_64_zero_extended_value (op);
2626 }
2627
2628 /* Return nonzero if OP is (const_int 1), else return zero. */
2629
2630 int
2631 const_int_1_operand (op, mode)
2632 rtx op;
2633 enum machine_mode mode ATTRIBUTE_UNUSED;
2634 {
2635 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2636 }
2637
2638 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2639 reference and a constant. */
2640
2641 int
2642 symbolic_operand (op, mode)
2643 register rtx op;
2644 enum machine_mode mode ATTRIBUTE_UNUSED;
2645 {
2646 switch (GET_CODE (op))
2647 {
2648 case SYMBOL_REF:
2649 case LABEL_REF:
2650 return 1;
2651
2652 case CONST:
2653 op = XEXP (op, 0);
2654 if (GET_CODE (op) == SYMBOL_REF
2655 || GET_CODE (op) == LABEL_REF
2656 || (GET_CODE (op) == UNSPEC
2657 && (XINT (op, 1) == 6
2658 || XINT (op, 1) == 7
2659 || XINT (op, 1) == 15)))
2660 return 1;
2661 if (GET_CODE (op) != PLUS
2662 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2663 return 0;
2664
2665 op = XEXP (op, 0);
2666 if (GET_CODE (op) == SYMBOL_REF
2667 || GET_CODE (op) == LABEL_REF)
2668 return 1;
2669 /* Only @GOTOFF gets offsets. */
2670 if (GET_CODE (op) != UNSPEC
2671 || XINT (op, 1) != 7)
2672 return 0;
2673
2674 op = XVECEXP (op, 0, 0);
2675 if (GET_CODE (op) == SYMBOL_REF
2676 || GET_CODE (op) == LABEL_REF)
2677 return 1;
2678 return 0;
2679
2680 default:
2681 return 0;
2682 }
2683 }
2684
2685 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2686
2687 int
2688 pic_symbolic_operand (op, mode)
2689 register rtx op;
2690 enum machine_mode mode ATTRIBUTE_UNUSED;
2691 {
2692 if (GET_CODE (op) != CONST)
2693 return 0;
2694 op = XEXP (op, 0);
2695 if (TARGET_64BIT)
2696 {
2697 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2698 return 1;
2699 }
2700 else
2701 {
2702 if (GET_CODE (op) == UNSPEC)
2703 return 1;
2704 if (GET_CODE (op) != PLUS
2705 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2706 return 0;
2707 op = XEXP (op, 0);
2708 if (GET_CODE (op) == UNSPEC)
2709 return 1;
2710 }
2711 return 0;
2712 }
2713
2714 /* Return true if OP is a symbolic operand that resolves locally. */
2715
2716 static int
2717 local_symbolic_operand (op, mode)
2718 rtx op;
2719 enum machine_mode mode ATTRIBUTE_UNUSED;
2720 {
2721 if (GET_CODE (op) == LABEL_REF)
2722 return 1;
2723
2724 if (GET_CODE (op) == CONST
2725 && GET_CODE (XEXP (op, 0)) == PLUS
2726 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2727 op = XEXP (XEXP (op, 0), 0);
2728
2729 if (GET_CODE (op) != SYMBOL_REF)
2730 return 0;
2731
2732 /* These we've been told are local by varasm and encode_section_info
2733 respectively. */
2734 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2735 return 1;
2736
2737 /* There is, however, a not insubstantial body of code in the rest of
2738 the compiler that assumes it can just stick the results of
2739 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2740 /* ??? This is a hack. Should update the body of the compiler to
2741 always create a DECL an invoke ENCODE_SECTION_INFO. */
2742 if (strncmp (XSTR (op, 0), internal_label_prefix,
2743 internal_label_prefix_len) == 0)
2744 return 1;
2745
2746 return 0;
2747 }
2748
2749 /* Test for a valid operand for a call instruction. Don't allow the
2750 arg pointer register or virtual regs since they may decay into
2751 reg + const, which the patterns can't handle. */
2752
2753 int
2754 call_insn_operand (op, mode)
2755 rtx op;
2756 enum machine_mode mode ATTRIBUTE_UNUSED;
2757 {
2758 /* Disallow indirect through a virtual register. This leads to
2759 compiler aborts when trying to eliminate them. */
2760 if (GET_CODE (op) == REG
2761 && (op == arg_pointer_rtx
2762 || op == frame_pointer_rtx
2763 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2764 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2765 return 0;
2766
2767 /* Disallow `call 1234'. Due to varying assembler lameness this
2768 gets either rejected or translated to `call .+1234'. */
2769 if (GET_CODE (op) == CONST_INT)
2770 return 0;
2771
2772 /* Explicitly allow SYMBOL_REF even if pic. */
2773 if (GET_CODE (op) == SYMBOL_REF)
2774 return 1;
2775
2776 /* Half-pic doesn't allow anything but registers and constants.
2777 We've just taken care of the later. */
2778 if (HALF_PIC_P ())
2779 return register_operand (op, Pmode);
2780
2781 /* Otherwise we can allow any general_operand in the address. */
2782 return general_operand (op, Pmode);
2783 }
2784
2785 int
2786 constant_call_address_operand (op, mode)
2787 rtx op;
2788 enum machine_mode mode ATTRIBUTE_UNUSED;
2789 {
2790 if (GET_CODE (op) == CONST
2791 && GET_CODE (XEXP (op, 0)) == PLUS
2792 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2793 op = XEXP (XEXP (op, 0), 0);
2794 return GET_CODE (op) == SYMBOL_REF;
2795 }
2796
2797 /* Match exactly zero and one. */
2798
2799 int
2800 const0_operand (op, mode)
2801 register rtx op;
2802 enum machine_mode mode;
2803 {
2804 return op == CONST0_RTX (mode);
2805 }
2806
2807 int
2808 const1_operand (op, mode)
2809 register rtx op;
2810 enum machine_mode mode ATTRIBUTE_UNUSED;
2811 {
2812 return op == const1_rtx;
2813 }
2814
2815 /* Match 2, 4, or 8. Used for leal multiplicands. */
2816
2817 int
2818 const248_operand (op, mode)
2819 register rtx op;
2820 enum machine_mode mode ATTRIBUTE_UNUSED;
2821 {
2822 return (GET_CODE (op) == CONST_INT
2823 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2824 }
2825
2826 /* True if this is a constant appropriate for an increment or decremenmt. */
2827
2828 int
2829 incdec_operand (op, mode)
2830 register rtx op;
2831 enum machine_mode mode ATTRIBUTE_UNUSED;
2832 {
2833 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
2834 registers, since carry flag is not set. */
2835 if (TARGET_PENTIUM4 && !optimize_size)
2836 return 0;
2837 return op == const1_rtx || op == constm1_rtx;
2838 }
2839
2840 /* Return nonzero if OP is acceptable as operand of DImode shift
2841 expander. */
2842
2843 int
2844 shiftdi_operand (op, mode)
2845 rtx op;
2846 enum machine_mode mode ATTRIBUTE_UNUSED;
2847 {
2848 if (TARGET_64BIT)
2849 return nonimmediate_operand (op, mode);
2850 else
2851 return register_operand (op, mode);
2852 }
2853
2854 /* Return false if this is the stack pointer, or any other fake
2855 register eliminable to the stack pointer. Otherwise, this is
2856 a register operand.
2857
2858 This is used to prevent esp from being used as an index reg.
2859 Which would only happen in pathological cases. */
2860
2861 int
2862 reg_no_sp_operand (op, mode)
2863 register rtx op;
2864 enum machine_mode mode;
2865 {
2866 rtx t = op;
2867 if (GET_CODE (t) == SUBREG)
2868 t = SUBREG_REG (t);
2869 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
2870 return 0;
2871
2872 return register_operand (op, mode);
2873 }
2874
2875 int
2876 mmx_reg_operand (op, mode)
2877 register rtx op;
2878 enum machine_mode mode ATTRIBUTE_UNUSED;
2879 {
2880 return MMX_REG_P (op);
2881 }
2882
2883 /* Return false if this is any eliminable register. Otherwise
2884 general_operand. */
2885
2886 int
2887 general_no_elim_operand (op, mode)
2888 register rtx op;
2889 enum machine_mode mode;
2890 {
2891 rtx t = op;
2892 if (GET_CODE (t) == SUBREG)
2893 t = SUBREG_REG (t);
2894 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2895 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2896 || t == virtual_stack_dynamic_rtx)
2897 return 0;
2898 if (REG_P (t)
2899 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
2900 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
2901 return 0;
2902
2903 return general_operand (op, mode);
2904 }
2905
2906 /* Return false if this is any eliminable register. Otherwise
2907 register_operand or const_int. */
2908
2909 int
2910 nonmemory_no_elim_operand (op, mode)
2911 register rtx op;
2912 enum machine_mode mode;
2913 {
2914 rtx t = op;
2915 if (GET_CODE (t) == SUBREG)
2916 t = SUBREG_REG (t);
2917 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2918 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2919 || t == virtual_stack_dynamic_rtx)
2920 return 0;
2921
2922 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
2923 }
2924
2925 /* Return true if op is a Q_REGS class register. */
2926
2927 int
2928 q_regs_operand (op, mode)
2929 register rtx op;
2930 enum machine_mode mode;
2931 {
2932 if (mode != VOIDmode && GET_MODE (op) != mode)
2933 return 0;
2934 if (GET_CODE (op) == SUBREG)
2935 op = SUBREG_REG (op);
2936 return QI_REG_P (op);
2937 }
2938
2939 /* Return true if op is a NON_Q_REGS class register. */
2940
2941 int
2942 non_q_regs_operand (op, mode)
2943 register rtx op;
2944 enum machine_mode mode;
2945 {
2946 if (mode != VOIDmode && GET_MODE (op) != mode)
2947 return 0;
2948 if (GET_CODE (op) == SUBREG)
2949 op = SUBREG_REG (op);
2950 return NON_QI_REG_P (op);
2951 }
2952
2953 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2954 insns. */
2955 int
2956 sse_comparison_operator (op, mode)
2957 rtx op;
2958 enum machine_mode mode ATTRIBUTE_UNUSED;
2959 {
2960 enum rtx_code code = GET_CODE (op);
2961 switch (code)
2962 {
2963 /* Operations supported directly. */
2964 case EQ:
2965 case LT:
2966 case LE:
2967 case UNORDERED:
2968 case NE:
2969 case UNGE:
2970 case UNGT:
2971 case ORDERED:
2972 return 1;
2973 /* These are equivalent to ones above in non-IEEE comparisons. */
2974 case UNEQ:
2975 case UNLT:
2976 case UNLE:
2977 case LTGT:
2978 case GE:
2979 case GT:
2980 return !TARGET_IEEE_FP;
2981 default:
2982 return 0;
2983 }
2984 }
2985 /* Return 1 if OP is a valid comparison operator in valid mode. */
2986 int
2987 ix86_comparison_operator (op, mode)
2988 register rtx op;
2989 enum machine_mode mode;
2990 {
2991 enum machine_mode inmode;
2992 enum rtx_code code = GET_CODE (op);
2993 if (mode != VOIDmode && GET_MODE (op) != mode)
2994 return 0;
2995 if (GET_RTX_CLASS (code) != '<')
2996 return 0;
2997 inmode = GET_MODE (XEXP (op, 0));
2998
2999 if (inmode == CCFPmode || inmode == CCFPUmode)
3000 {
3001 enum rtx_code second_code, bypass_code;
3002 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3003 return (bypass_code == NIL && second_code == NIL);
3004 }
3005 switch (code)
3006 {
3007 case EQ: case NE:
3008 return 1;
3009 case LT: case GE:
3010 if (inmode == CCmode || inmode == CCGCmode
3011 || inmode == CCGOCmode || inmode == CCNOmode)
3012 return 1;
3013 return 0;
3014 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3015 if (inmode == CCmode)
3016 return 1;
3017 return 0;
3018 case GT: case LE:
3019 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3020 return 1;
3021 return 0;
3022 default:
3023 return 0;
3024 }
3025 }
3026
3027 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3028
3029 int
3030 fcmov_comparison_operator (op, mode)
3031 register rtx op;
3032 enum machine_mode mode;
3033 {
3034 enum machine_mode inmode;
3035 enum rtx_code code = GET_CODE (op);
3036 if (mode != VOIDmode && GET_MODE (op) != mode)
3037 return 0;
3038 if (GET_RTX_CLASS (code) != '<')
3039 return 0;
3040 inmode = GET_MODE (XEXP (op, 0));
3041 if (inmode == CCFPmode || inmode == CCFPUmode)
3042 {
3043 enum rtx_code second_code, bypass_code;
3044 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3045 if (bypass_code != NIL || second_code != NIL)
3046 return 0;
3047 code = ix86_fp_compare_code_to_integer (code);
3048 }
3049 /* i387 supports just limited amount of conditional codes. */
3050 switch (code)
3051 {
3052 case LTU: case GTU: case LEU: case GEU:
3053 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3054 return 1;
3055 return 0;
3056 case ORDERED: case UNORDERED:
3057 case EQ: case NE:
3058 return 1;
3059 default:
3060 return 0;
3061 }
3062 }
3063
3064 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3065
3066 int
3067 promotable_binary_operator (op, mode)
3068 register rtx op;
3069 enum machine_mode mode ATTRIBUTE_UNUSED;
3070 {
3071 switch (GET_CODE (op))
3072 {
3073 case MULT:
3074 /* Modern CPUs have same latency for HImode and SImode multiply,
3075 but 386 and 486 do HImode multiply faster. */
3076 return ix86_cpu > PROCESSOR_I486;
3077 case PLUS:
3078 case AND:
3079 case IOR:
3080 case XOR:
3081 case ASHIFT:
3082 return 1;
3083 default:
3084 return 0;
3085 }
3086 }
3087
3088 /* Nearly general operand, but accept any const_double, since we wish
3089 to be able to drop them into memory rather than have them get pulled
3090 into registers. */
3091
3092 int
3093 cmp_fp_expander_operand (op, mode)
3094 register rtx op;
3095 enum machine_mode mode;
3096 {
3097 if (mode != VOIDmode && mode != GET_MODE (op))
3098 return 0;
3099 if (GET_CODE (op) == CONST_DOUBLE)
3100 return 1;
3101 return general_operand (op, mode);
3102 }
3103
3104 /* Match an SI or HImode register for a zero_extract. */
3105
3106 int
3107 ext_register_operand (op, mode)
3108 register rtx op;
3109 enum machine_mode mode ATTRIBUTE_UNUSED;
3110 {
3111 int regno;
3112 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3113 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3114 return 0;
3115
3116 if (!register_operand (op, VOIDmode))
3117 return 0;
3118
3119 /* Be curefull to accept only registers having upper parts. */
3120 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3121 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3122 }
3123
3124 /* Return 1 if this is a valid binary floating-point operation.
3125 OP is the expression matched, and MODE is its mode. */
3126
3127 int
3128 binary_fp_operator (op, mode)
3129 register rtx op;
3130 enum machine_mode mode;
3131 {
3132 if (mode != VOIDmode && mode != GET_MODE (op))
3133 return 0;
3134
3135 switch (GET_CODE (op))
3136 {
3137 case PLUS:
3138 case MINUS:
3139 case MULT:
3140 case DIV:
3141 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3142
3143 default:
3144 return 0;
3145 }
3146 }
3147
3148 int
3149 mult_operator(op, mode)
3150 register rtx op;
3151 enum machine_mode mode ATTRIBUTE_UNUSED;
3152 {
3153 return GET_CODE (op) == MULT;
3154 }
3155
3156 int
3157 div_operator(op, mode)
3158 register rtx op;
3159 enum machine_mode mode ATTRIBUTE_UNUSED;
3160 {
3161 return GET_CODE (op) == DIV;
3162 }
3163
3164 int
3165 arith_or_logical_operator (op, mode)
3166 rtx op;
3167 enum machine_mode mode;
3168 {
3169 return ((mode == VOIDmode || GET_MODE (op) == mode)
3170 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3171 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3172 }
3173
3174 /* Returns 1 if OP is memory operand with a displacement. */
3175
3176 int
3177 memory_displacement_operand (op, mode)
3178 register rtx op;
3179 enum machine_mode mode;
3180 {
3181 struct ix86_address parts;
3182
3183 if (! memory_operand (op, mode))
3184 return 0;
3185
3186 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3187 abort ();
3188
3189 return parts.disp != NULL_RTX;
3190 }
3191
3192 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3193 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3194
3195 ??? It seems likely that this will only work because cmpsi is an
3196 expander, and no actual insns use this. */
3197
3198 int
3199 cmpsi_operand (op, mode)
3200 rtx op;
3201 enum machine_mode mode;
3202 {
3203 if (nonimmediate_operand (op, mode))
3204 return 1;
3205
3206 if (GET_CODE (op) == AND
3207 && GET_MODE (op) == SImode
3208 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3209 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3210 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3211 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3212 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3213 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3214 return 1;
3215
3216 return 0;
3217 }
3218
3219 /* Returns 1 if OP is memory operand that can not be represented by the
3220 modRM array. */
3221
3222 int
3223 long_memory_operand (op, mode)
3224 register rtx op;
3225 enum machine_mode mode;
3226 {
3227 if (! memory_operand (op, mode))
3228 return 0;
3229
3230 return memory_address_length (op) != 0;
3231 }
3232
3233 /* Return nonzero if the rtx is known aligned. */
3234
3235 int
3236 aligned_operand (op, mode)
3237 rtx op;
3238 enum machine_mode mode;
3239 {
3240 struct ix86_address parts;
3241
3242 if (!general_operand (op, mode))
3243 return 0;
3244
3245 /* Registers and immediate operands are always "aligned". */
3246 if (GET_CODE (op) != MEM)
3247 return 1;
3248
3249 /* Don't even try to do any aligned optimizations with volatiles. */
3250 if (MEM_VOLATILE_P (op))
3251 return 0;
3252
3253 op = XEXP (op, 0);
3254
3255 /* Pushes and pops are only valid on the stack pointer. */
3256 if (GET_CODE (op) == PRE_DEC
3257 || GET_CODE (op) == POST_INC)
3258 return 1;
3259
3260 /* Decode the address. */
3261 if (! ix86_decompose_address (op, &parts))
3262 abort ();
3263
3264 /* Look for some component that isn't known to be aligned. */
3265 if (parts.index)
3266 {
3267 if (parts.scale < 4
3268 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3269 return 0;
3270 }
3271 if (parts.base)
3272 {
3273 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3274 return 0;
3275 }
3276 if (parts.disp)
3277 {
3278 if (GET_CODE (parts.disp) != CONST_INT
3279 || (INTVAL (parts.disp) & 3) != 0)
3280 return 0;
3281 }
3282
3283 /* Didn't find one -- this must be an aligned address. */
3284 return 1;
3285 }
3286 \f
3287 /* Return true if the constant is something that can be loaded with
3288 a special instruction. Only handle 0.0 and 1.0; others are less
3289 worthwhile. */
3290
3291 int
3292 standard_80387_constant_p (x)
3293 rtx x;
3294 {
3295 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3296 return -1;
3297 /* Note that on the 80387, other constants, such as pi, that we should support
3298 too. On some machines, these are much slower to load as standard constant,
3299 than to load from doubles in memory. */
3300 if (x == CONST0_RTX (GET_MODE (x)))
3301 return 1;
3302 if (x == CONST1_RTX (GET_MODE (x)))
3303 return 2;
3304 return 0;
3305 }
3306
3307 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3308 */
3309 int
3310 standard_sse_constant_p (x)
3311 rtx x;
3312 {
3313 if (GET_CODE (x) != CONST_DOUBLE)
3314 return -1;
3315 return (x == CONST0_RTX (GET_MODE (x)));
3316 }
3317
3318 /* Returns 1 if OP contains a symbol reference */
3319
3320 int
3321 symbolic_reference_mentioned_p (op)
3322 rtx op;
3323 {
3324 register const char *fmt;
3325 register int i;
3326
3327 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3328 return 1;
3329
3330 fmt = GET_RTX_FORMAT (GET_CODE (op));
3331 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3332 {
3333 if (fmt[i] == 'E')
3334 {
3335 register int j;
3336
3337 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3338 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3339 return 1;
3340 }
3341
3342 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3343 return 1;
3344 }
3345
3346 return 0;
3347 }
3348
3349 /* Return 1 if it is appropriate to emit `ret' instructions in the
3350 body of a function. Do this only if the epilogue is simple, needing a
3351 couple of insns. Prior to reloading, we can't tell how many registers
3352 must be saved, so return 0 then. Return 0 if there is no frame
3353 marker to de-allocate.
3354
3355 If NON_SAVING_SETJMP is defined and true, then it is not possible
3356 for the epilogue to be simple, so return 0. This is a special case
3357 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3358 until final, but jump_optimize may need to know sooner if a
3359 `return' is OK. */
3360
3361 int
3362 ix86_can_use_return_insn_p ()
3363 {
3364 struct ix86_frame frame;
3365
3366 #ifdef NON_SAVING_SETJMP
3367 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3368 return 0;
3369 #endif
3370 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3371 if (profile_block_flag == 2)
3372 return 0;
3373 #endif
3374
3375 if (! reload_completed || frame_pointer_needed)
3376 return 0;
3377
3378 /* Don't allow more than 32 pop, since that's all we can do
3379 with one instruction. */
3380 if (current_function_pops_args
3381 && current_function_args_size >= 32768)
3382 return 0;
3383
3384 ix86_compute_frame_layout (&frame);
3385 return frame.to_allocate == 0 && frame.nregs == 0;
3386 }
3387 \f
3388 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3389 int
3390 x86_64_sign_extended_value (value)
3391 rtx value;
3392 {
3393 switch (GET_CODE (value))
3394 {
3395 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3396 to be at least 32 and this all acceptable constants are
3397 represented as CONST_INT. */
3398 case CONST_INT:
3399 if (HOST_BITS_PER_WIDE_INT == 32)
3400 return 1;
3401 else
3402 {
3403 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3404 return trunc_int_for_mode (val, SImode) == val;
3405 }
3406 break;
3407
3408 /* For certain code models, the symbolic references are known to fit. */
3409 case SYMBOL_REF:
3410 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3411
3412 /* For certain code models, the code is near as well. */
3413 case LABEL_REF:
3414 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3415
3416 /* We also may accept the offsetted memory references in certain special
3417 cases. */
3418 case CONST:
3419 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3420 && XVECLEN (XEXP (value, 0), 0) == 1
3421 && XINT (XEXP (value, 0), 1) == 15)
3422 return 1;
3423 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3424 {
3425 rtx op1 = XEXP (XEXP (value, 0), 0);
3426 rtx op2 = XEXP (XEXP (value, 0), 1);
3427 HOST_WIDE_INT offset;
3428
3429 if (ix86_cmodel == CM_LARGE)
3430 return 0;
3431 if (GET_CODE (op2) != CONST_INT)
3432 return 0;
3433 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3434 switch (GET_CODE (op1))
3435 {
3436 case SYMBOL_REF:
3437 /* For CM_SMALL assume that latest object is 1MB before
3438 end of 31bits boundary. We may also accept pretty
3439 large negative constants knowing that all objects are
3440 in the positive half of address space. */
3441 if (ix86_cmodel == CM_SMALL
3442 && offset < 1024*1024*1024
3443 && trunc_int_for_mode (offset, SImode) == offset)
3444 return 1;
3445 /* For CM_KERNEL we know that all object resist in the
3446 negative half of 32bits address space. We may not
3447 accept negative offsets, since they may be just off
3448 and we may accept pretty large possitive ones. */
3449 if (ix86_cmodel == CM_KERNEL
3450 && offset > 0
3451 && trunc_int_for_mode (offset, SImode) == offset)
3452 return 1;
3453 break;
3454 case LABEL_REF:
3455 /* These conditions are similar to SYMBOL_REF ones, just the
3456 constraints for code models differ. */
3457 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3458 && offset < 1024*1024*1024
3459 && trunc_int_for_mode (offset, SImode) == offset)
3460 return 1;
3461 if (ix86_cmodel == CM_KERNEL
3462 && offset > 0
3463 && trunc_int_for_mode (offset, SImode) == offset)
3464 return 1;
3465 break;
3466 default:
3467 return 0;
3468 }
3469 }
3470 return 0;
3471 default:
3472 return 0;
3473 }
3474 }
3475
3476 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3477 int
3478 x86_64_zero_extended_value (value)
3479 rtx value;
3480 {
3481 switch (GET_CODE (value))
3482 {
3483 case CONST_DOUBLE:
3484 if (HOST_BITS_PER_WIDE_INT == 32)
3485 return (GET_MODE (value) == VOIDmode
3486 && !CONST_DOUBLE_HIGH (value));
3487 else
3488 return 0;
3489 case CONST_INT:
3490 if (HOST_BITS_PER_WIDE_INT == 32)
3491 return INTVAL (value) >= 0;
3492 else
3493 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3494 break;
3495
3496 /* For certain code models, the symbolic references are known to fit. */
3497 case SYMBOL_REF:
3498 return ix86_cmodel == CM_SMALL;
3499
3500 /* For certain code models, the code is near as well. */
3501 case LABEL_REF:
3502 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3503
3504 /* We also may accept the offsetted memory references in certain special
3505 cases. */
3506 case CONST:
3507 if (GET_CODE (XEXP (value, 0)) == PLUS)
3508 {
3509 rtx op1 = XEXP (XEXP (value, 0), 0);
3510 rtx op2 = XEXP (XEXP (value, 0), 1);
3511
3512 if (ix86_cmodel == CM_LARGE)
3513 return 0;
3514 switch (GET_CODE (op1))
3515 {
3516 case SYMBOL_REF:
3517 return 0;
3518 /* For small code model we may accept pretty large possitive
3519 offsets, since one bit is available for free. Negative
3520 offsets are limited by the size of NULL pointer area
3521 specified by the ABI. */
3522 if (ix86_cmodel == CM_SMALL
3523 && GET_CODE (op2) == CONST_INT
3524 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3525 && (trunc_int_for_mode (INTVAL (op2), SImode)
3526 == INTVAL (op2)))
3527 return 1;
3528 /* ??? For the kernel, we may accept adjustment of
3529 -0x10000000, since we know that it will just convert
3530 negative address space to possitive, but perhaps this
3531 is not worthwhile. */
3532 break;
3533 case LABEL_REF:
3534 /* These conditions are similar to SYMBOL_REF ones, just the
3535 constraints for code models differ. */
3536 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3537 && GET_CODE (op2) == CONST_INT
3538 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3539 && (trunc_int_for_mode (INTVAL (op2), SImode)
3540 == INTVAL (op2)))
3541 return 1;
3542 break;
3543 default:
3544 return 0;
3545 }
3546 }
3547 return 0;
3548 default:
3549 return 0;
3550 }
3551 }
3552
3553 /* Value should be nonzero if functions must have frame pointers.
3554 Zero means the frame pointer need not be set up (and parms may
3555 be accessed via the stack pointer) in functions that seem suitable. */
3556
3557 int
3558 ix86_frame_pointer_required ()
3559 {
3560 /* If we accessed previous frames, then the generated code expects
3561 to be able to access the saved ebp value in our frame. */
3562 if (cfun->machine->accesses_prev_frame)
3563 return 1;
3564
3565 /* Several x86 os'es need a frame pointer for other reasons,
3566 usually pertaining to setjmp. */
3567 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3568 return 1;
3569
3570 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3571 the frame pointer by default. Turn it back on now if we've not
3572 got a leaf function. */
3573 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3574 return 1;
3575
3576 return 0;
3577 }
3578
3579 /* Record that the current function accesses previous call frames. */
3580
3581 void
3582 ix86_setup_frame_addresses ()
3583 {
3584 cfun->machine->accesses_prev_frame = 1;
3585 }
3586 \f
3587 static char pic_label_name[32];
3588
3589 /* This function generates code for -fpic that loads %ebx with
3590 the return address of the caller and then returns. */
3591
3592 void
3593 ix86_asm_file_end (file)
3594 FILE *file;
3595 {
3596 rtx xops[2];
3597
3598 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3599 return;
3600
3601 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3602 to updating relocations to a section being discarded such that this
3603 doesn't work. Ought to detect this at configure time. */
3604 #if 0
3605 /* The trick here is to create a linkonce section containing the
3606 pic label thunk, but to refer to it with an internal label.
3607 Because the label is internal, we don't have inter-dso name
3608 binding issues on hosts that don't support ".hidden".
3609
3610 In order to use these macros, however, we must create a fake
3611 function decl. */
3612 if (targetm.have_named_sections)
3613 {
3614 tree decl = build_decl (FUNCTION_DECL,
3615 get_identifier ("i686.get_pc_thunk"),
3616 error_mark_node);
3617 DECL_ONE_ONLY (decl) = 1;
3618 UNIQUE_SECTION (decl, 0);
3619 named_section (decl, NULL);
3620 }
3621 else
3622 #else
3623 text_section ();
3624 #endif
3625
3626 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3627 internal (non-global) label that's being emitted, it didn't make
3628 sense to have .type information for local labels. This caused
3629 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3630 me debug info for a label that you're declaring non-global?) this
3631 was changed to call ASM_OUTPUT_LABEL() instead. */
3632
3633 ASM_OUTPUT_LABEL (file, pic_label_name);
3634
3635 xops[0] = pic_offset_table_rtx;
3636 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3637 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3638 output_asm_insn ("ret", xops);
3639 }
3640
3641 void
3642 load_pic_register ()
3643 {
3644 rtx gotsym, pclab;
3645
3646 if (TARGET_64BIT)
3647 abort();
3648
3649 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3650
3651 if (TARGET_DEEP_BRANCH_PREDICTION)
3652 {
3653 if (! pic_label_name[0])
3654 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3655 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3656 }
3657 else
3658 {
3659 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3660 }
3661
3662 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3663
3664 if (! TARGET_DEEP_BRANCH_PREDICTION)
3665 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3666
3667 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3668 }
3669
3670 /* Generate an "push" pattern for input ARG. */
3671
3672 static rtx
3673 gen_push (arg)
3674 rtx arg;
3675 {
3676 return gen_rtx_SET (VOIDmode,
3677 gen_rtx_MEM (Pmode,
3678 gen_rtx_PRE_DEC (Pmode,
3679 stack_pointer_rtx)),
3680 arg);
3681 }
3682
3683 /* Return 1 if we need to save REGNO. */
3684 static int
3685 ix86_save_reg (regno, maybe_eh_return)
3686 int regno;
3687 int maybe_eh_return;
3688 {
3689 if (flag_pic
3690 && ! TARGET_64BIT
3691 && regno == PIC_OFFSET_TABLE_REGNUM
3692 && (current_function_uses_pic_offset_table
3693 || current_function_uses_const_pool
3694 || current_function_calls_eh_return))
3695 return 1;
3696
3697 if (current_function_calls_eh_return && maybe_eh_return)
3698 {
3699 unsigned i;
3700 for (i = 0; ; i++)
3701 {
3702 unsigned test = EH_RETURN_DATA_REGNO(i);
3703 if (test == INVALID_REGNUM)
3704 break;
3705 if (test == (unsigned) regno)
3706 return 1;
3707 }
3708 }
3709
3710 return (regs_ever_live[regno]
3711 && !call_used_regs[regno]
3712 && !fixed_regs[regno]
3713 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3714 }
3715
3716 /* Return number of registers to be saved on the stack. */
3717
3718 static int
3719 ix86_nsaved_regs ()
3720 {
3721 int nregs = 0;
3722 int regno;
3723
3724 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3725 if (ix86_save_reg (regno, true))
3726 nregs++;
3727 return nregs;
3728 }
3729
3730 /* Return the offset between two registers, one to be eliminated, and the other
3731 its replacement, at the start of a routine. */
3732
3733 HOST_WIDE_INT
3734 ix86_initial_elimination_offset (from, to)
3735 int from;
3736 int to;
3737 {
3738 struct ix86_frame frame;
3739 ix86_compute_frame_layout (&frame);
3740
3741 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3742 return frame.hard_frame_pointer_offset;
3743 else if (from == FRAME_POINTER_REGNUM
3744 && to == HARD_FRAME_POINTER_REGNUM)
3745 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3746 else
3747 {
3748 if (to != STACK_POINTER_REGNUM)
3749 abort ();
3750 else if (from == ARG_POINTER_REGNUM)
3751 return frame.stack_pointer_offset;
3752 else if (from != FRAME_POINTER_REGNUM)
3753 abort ();
3754 else
3755 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3756 }
3757 }
3758
3759 /* Fill structure ix86_frame about frame of currently computed function. */
3760
3761 static void
3762 ix86_compute_frame_layout (frame)
3763 struct ix86_frame *frame;
3764 {
3765 HOST_WIDE_INT total_size;
3766 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3767 int offset;
3768 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3769 HOST_WIDE_INT size = get_frame_size ();
3770
3771 frame->nregs = ix86_nsaved_regs ();
3772 total_size = size;
3773
3774 /* Skip return value and save base pointer. */
3775 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3776
3777 frame->hard_frame_pointer_offset = offset;
3778
3779 /* Do some sanity checking of stack_alignment_needed and
3780 preferred_alignment, since i386 port is the only using those features
3781 that may break easilly. */
3782
3783 if (size && !stack_alignment_needed)
3784 abort ();
3785 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3786 abort ();
3787 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3788 abort ();
3789 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3790 abort ();
3791
3792 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3793 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3794
3795 /* Register save area */
3796 offset += frame->nregs * UNITS_PER_WORD;
3797
3798 /* Va-arg area */
3799 if (ix86_save_varrargs_registers)
3800 {
3801 offset += X86_64_VARARGS_SIZE;
3802 frame->va_arg_size = X86_64_VARARGS_SIZE;
3803 }
3804 else
3805 frame->va_arg_size = 0;
3806
3807 /* Align start of frame for local function. */
3808 frame->padding1 = ((offset + stack_alignment_needed - 1)
3809 & -stack_alignment_needed) - offset;
3810
3811 offset += frame->padding1;
3812
3813 /* Frame pointer points here. */
3814 frame->frame_pointer_offset = offset;
3815
3816 offset += size;
3817
3818 /* Add outgoing arguments area. */
3819 if (ACCUMULATE_OUTGOING_ARGS)
3820 {
3821 offset += current_function_outgoing_args_size;
3822 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3823 }
3824 else
3825 frame->outgoing_arguments_size = 0;
3826
3827 /* Align stack boundary. */
3828 frame->padding2 = ((offset + preferred_alignment - 1)
3829 & -preferred_alignment) - offset;
3830
3831 offset += frame->padding2;
3832
3833 /* We've reached end of stack frame. */
3834 frame->stack_pointer_offset = offset;
3835
3836 /* Size prologue needs to allocate. */
3837 frame->to_allocate =
3838 (size + frame->padding1 + frame->padding2
3839 + frame->outgoing_arguments_size + frame->va_arg_size);
3840
3841 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3842 && current_function_is_leaf)
3843 {
3844 frame->red_zone_size = frame->to_allocate;
3845 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3846 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3847 }
3848 else
3849 frame->red_zone_size = 0;
3850 frame->to_allocate -= frame->red_zone_size;
3851 frame->stack_pointer_offset -= frame->red_zone_size;
3852 #if 0
3853 fprintf (stderr, "nregs: %i\n", frame->nregs);
3854 fprintf (stderr, "size: %i\n", size);
3855 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
3856 fprintf (stderr, "padding1: %i\n", frame->padding1);
3857 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
3858 fprintf (stderr, "padding2: %i\n", frame->padding2);
3859 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
3860 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
3861 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
3862 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
3863 frame->hard_frame_pointer_offset);
3864 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
3865 #endif
3866 }
3867
3868 /* Emit code to save registers in the prologue. */
3869
3870 static void
3871 ix86_emit_save_regs ()
3872 {
3873 register int regno;
3874 rtx insn;
3875
3876 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3877 if (ix86_save_reg (regno, true))
3878 {
3879 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
3880 RTX_FRAME_RELATED_P (insn) = 1;
3881 }
3882 }
3883
3884 /* Emit code to save registers using MOV insns. First register
3885 is restored from POINTER + OFFSET. */
3886 static void
3887 ix86_emit_save_regs_using_mov (pointer, offset)
3888 rtx pointer;
3889 HOST_WIDE_INT offset;
3890 {
3891 int regno;
3892 rtx insn;
3893
3894 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3895 if (ix86_save_reg (regno, true))
3896 {
3897 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
3898 Pmode, offset),
3899 gen_rtx_REG (Pmode, regno));
3900 RTX_FRAME_RELATED_P (insn) = 1;
3901 offset += UNITS_PER_WORD;
3902 }
3903 }
3904
3905 /* Expand the prologue into a bunch of separate insns. */
3906
3907 void
3908 ix86_expand_prologue ()
3909 {
3910 rtx insn;
3911 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
3912 || current_function_uses_const_pool)
3913 && !TARGET_64BIT);
3914 struct ix86_frame frame;
3915 int use_mov = 0;
3916 HOST_WIDE_INT allocate;
3917
3918 if (!optimize_size)
3919 {
3920 use_fast_prologue_epilogue
3921 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
3922 if (TARGET_PROLOGUE_USING_MOVE)
3923 use_mov = use_fast_prologue_epilogue;
3924 }
3925 ix86_compute_frame_layout (&frame);
3926
3927 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3928 slower on all targets. Also sdb doesn't like it. */
3929
3930 if (frame_pointer_needed)
3931 {
3932 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
3933 RTX_FRAME_RELATED_P (insn) = 1;
3934
3935 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3936 RTX_FRAME_RELATED_P (insn) = 1;
3937 }
3938
3939 allocate = frame.to_allocate;
3940 /* In case we are dealing only with single register and empty frame,
3941 push is equivalent of the mov+add sequence. */
3942 if (allocate == 0 && frame.nregs <= 1)
3943 use_mov = 0;
3944
3945 if (!use_mov)
3946 ix86_emit_save_regs ();
3947 else
3948 allocate += frame.nregs * UNITS_PER_WORD;
3949
3950 if (allocate == 0)
3951 ;
3952 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
3953 {
3954 insn = emit_insn (gen_pro_epilogue_adjust_stack
3955 (stack_pointer_rtx, stack_pointer_rtx,
3956 GEN_INT (-allocate)));
3957 RTX_FRAME_RELATED_P (insn) = 1;
3958 }
3959 else
3960 {
3961 /* ??? Is this only valid for Win32? */
3962
3963 rtx arg0, sym;
3964
3965 if (TARGET_64BIT)
3966 abort();
3967
3968 arg0 = gen_rtx_REG (SImode, 0);
3969 emit_move_insn (arg0, GEN_INT (allocate));
3970
3971 sym = gen_rtx_MEM (FUNCTION_MODE,
3972 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
3973 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
3974
3975 CALL_INSN_FUNCTION_USAGE (insn)
3976 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
3977 CALL_INSN_FUNCTION_USAGE (insn));
3978 }
3979 if (use_mov)
3980 {
3981 if (!frame_pointer_needed || !frame.to_allocate)
3982 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
3983 else
3984 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
3985 -frame.nregs * UNITS_PER_WORD);
3986 }
3987
3988 #ifdef SUBTARGET_PROLOGUE
3989 SUBTARGET_PROLOGUE;
3990 #endif
3991
3992 if (pic_reg_used)
3993 load_pic_register ();
3994
3995 /* If we are profiling, make sure no instructions are scheduled before
3996 the call to mcount. However, if -fpic, the above call will have
3997 done that. */
3998 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
3999 emit_insn (gen_blockage ());
4000 }
4001
4002 /* Emit code to restore saved registers using MOV insns. First register
4003 is restored from POINTER + OFFSET. */
4004 static void
4005 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4006 rtx pointer;
4007 int offset;
4008 int maybe_eh_return;
4009 {
4010 int regno;
4011
4012 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4013 if (ix86_save_reg (regno, maybe_eh_return))
4014 {
4015 emit_move_insn (gen_rtx_REG (Pmode, regno),
4016 adjust_address (gen_rtx_MEM (Pmode, pointer),
4017 Pmode, offset));
4018 offset += UNITS_PER_WORD;
4019 }
4020 }
4021
4022 /* Restore function stack, frame, and registers. */
4023
4024 void
4025 ix86_expand_epilogue (style)
4026 int style;
4027 {
4028 int regno;
4029 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4030 struct ix86_frame frame;
4031 HOST_WIDE_INT offset;
4032
4033 ix86_compute_frame_layout (&frame);
4034
4035 /* Calculate start of saved registers relative to ebp. Special care
4036 must be taken for the normal return case of a function using
4037 eh_return: the eax and edx registers are marked as saved, but not
4038 restored along this path. */
4039 offset = frame.nregs;
4040 if (current_function_calls_eh_return && style != 2)
4041 offset -= 2;
4042 offset *= -UNITS_PER_WORD;
4043
4044 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
4045 if (profile_block_flag == 2)
4046 {
4047 FUNCTION_BLOCK_PROFILER_EXIT;
4048 }
4049 #endif
4050
4051 /* If we're only restoring one register and sp is not valid then
4052 using a move instruction to restore the register since it's
4053 less work than reloading sp and popping the register.
4054
4055 The default code result in stack adjustment using add/lea instruction,
4056 while this code results in LEAVE instruction (or discrete equivalent),
4057 so it is profitable in some other cases as well. Especially when there
4058 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4059 and there is exactly one register to pop. This heruistic may need some
4060 tuning in future. */
4061 if ((!sp_valid && frame.nregs <= 1)
4062 || (TARGET_EPILOGUE_USING_MOVE
4063 && use_fast_prologue_epilogue
4064 && (frame.nregs > 1 || frame.to_allocate))
4065 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4066 || (frame_pointer_needed && TARGET_USE_LEAVE
4067 && use_fast_prologue_epilogue && frame.nregs == 1)
4068 || current_function_calls_eh_return)
4069 {
4070 /* Restore registers. We can use ebp or esp to address the memory
4071 locations. If both are available, default to ebp, since offsets
4072 are known to be small. Only exception is esp pointing directly to the
4073 end of block of saved registers, where we may simplify addressing
4074 mode. */
4075
4076 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4077 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4078 frame.to_allocate, style == 2);
4079 else
4080 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4081 offset, style == 2);
4082
4083 /* eh_return epilogues need %ecx added to the stack pointer. */
4084 if (style == 2)
4085 {
4086 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4087
4088 if (frame_pointer_needed)
4089 {
4090 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4091 tmp = plus_constant (tmp, UNITS_PER_WORD);
4092 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4093
4094 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4095 emit_move_insn (hard_frame_pointer_rtx, tmp);
4096
4097 emit_insn (gen_pro_epilogue_adjust_stack
4098 (stack_pointer_rtx, sa, const0_rtx));
4099 }
4100 else
4101 {
4102 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4103 tmp = plus_constant (tmp, (frame.to_allocate
4104 + frame.nregs * UNITS_PER_WORD));
4105 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4106 }
4107 }
4108 else if (!frame_pointer_needed)
4109 emit_insn (gen_pro_epilogue_adjust_stack
4110 (stack_pointer_rtx, stack_pointer_rtx,
4111 GEN_INT (frame.to_allocate
4112 + frame.nregs * UNITS_PER_WORD)));
4113 /* If not an i386, mov & pop is faster than "leave". */
4114 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4115 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4116 else
4117 {
4118 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4119 hard_frame_pointer_rtx,
4120 const0_rtx));
4121 if (TARGET_64BIT)
4122 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4123 else
4124 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4125 }
4126 }
4127 else
4128 {
4129 /* First step is to deallocate the stack frame so that we can
4130 pop the registers. */
4131 if (!sp_valid)
4132 {
4133 if (!frame_pointer_needed)
4134 abort ();
4135 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4136 hard_frame_pointer_rtx,
4137 GEN_INT (offset)));
4138 }
4139 else if (frame.to_allocate)
4140 emit_insn (gen_pro_epilogue_adjust_stack
4141 (stack_pointer_rtx, stack_pointer_rtx,
4142 GEN_INT (frame.to_allocate)));
4143
4144 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4145 if (ix86_save_reg (regno, false))
4146 {
4147 if (TARGET_64BIT)
4148 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4149 else
4150 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4151 }
4152 if (frame_pointer_needed)
4153 {
4154 /* Leave results in shorter depdendancy chains on CPUs that are
4155 able to grok it fast. */
4156 if (TARGET_USE_LEAVE)
4157 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4158 else if (TARGET_64BIT)
4159 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4160 else
4161 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4162 }
4163 }
4164
4165 /* Sibcall epilogues don't want a return instruction. */
4166 if (style == 0)
4167 return;
4168
4169 if (current_function_pops_args && current_function_args_size)
4170 {
4171 rtx popc = GEN_INT (current_function_pops_args);
4172
4173 /* i386 can only pop 64K bytes. If asked to pop more, pop
4174 return address, do explicit add, and jump indirectly to the
4175 caller. */
4176
4177 if (current_function_pops_args >= 65536)
4178 {
4179 rtx ecx = gen_rtx_REG (SImode, 2);
4180
4181 /* There are is no "pascal" calling convention in 64bit ABI. */
4182 if (TARGET_64BIT)
4183 abort();
4184
4185 emit_insn (gen_popsi1 (ecx));
4186 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4187 emit_jump_insn (gen_return_indirect_internal (ecx));
4188 }
4189 else
4190 emit_jump_insn (gen_return_pop_internal (popc));
4191 }
4192 else
4193 emit_jump_insn (gen_return_internal ());
4194 }
4195 \f
4196 /* Extract the parts of an RTL expression that is a valid memory address
4197 for an instruction. Return false if the structure of the address is
4198 grossly off. */
4199
4200 static int
4201 ix86_decompose_address (addr, out)
4202 register rtx addr;
4203 struct ix86_address *out;
4204 {
4205 rtx base = NULL_RTX;
4206 rtx index = NULL_RTX;
4207 rtx disp = NULL_RTX;
4208 HOST_WIDE_INT scale = 1;
4209 rtx scale_rtx = NULL_RTX;
4210
4211 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4212 base = addr;
4213 else if (GET_CODE (addr) == PLUS)
4214 {
4215 rtx op0 = XEXP (addr, 0);
4216 rtx op1 = XEXP (addr, 1);
4217 enum rtx_code code0 = GET_CODE (op0);
4218 enum rtx_code code1 = GET_CODE (op1);
4219
4220 if (code0 == REG || code0 == SUBREG)
4221 {
4222 if (code1 == REG || code1 == SUBREG)
4223 index = op0, base = op1; /* index + base */
4224 else
4225 base = op0, disp = op1; /* base + displacement */
4226 }
4227 else if (code0 == MULT)
4228 {
4229 index = XEXP (op0, 0);
4230 scale_rtx = XEXP (op0, 1);
4231 if (code1 == REG || code1 == SUBREG)
4232 base = op1; /* index*scale + base */
4233 else
4234 disp = op1; /* index*scale + disp */
4235 }
4236 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4237 {
4238 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4239 scale_rtx = XEXP (XEXP (op0, 0), 1);
4240 base = XEXP (op0, 1);
4241 disp = op1;
4242 }
4243 else if (code0 == PLUS)
4244 {
4245 index = XEXP (op0, 0); /* index + base + disp */
4246 base = XEXP (op0, 1);
4247 disp = op1;
4248 }
4249 else
4250 return FALSE;
4251 }
4252 else if (GET_CODE (addr) == MULT)
4253 {
4254 index = XEXP (addr, 0); /* index*scale */
4255 scale_rtx = XEXP (addr, 1);
4256 }
4257 else if (GET_CODE (addr) == ASHIFT)
4258 {
4259 rtx tmp;
4260
4261 /* We're called for lea too, which implements ashift on occasion. */
4262 index = XEXP (addr, 0);
4263 tmp = XEXP (addr, 1);
4264 if (GET_CODE (tmp) != CONST_INT)
4265 return FALSE;
4266 scale = INTVAL (tmp);
4267 if ((unsigned HOST_WIDE_INT) scale > 3)
4268 return FALSE;
4269 scale = 1 << scale;
4270 }
4271 else
4272 disp = addr; /* displacement */
4273
4274 /* Extract the integral value of scale. */
4275 if (scale_rtx)
4276 {
4277 if (GET_CODE (scale_rtx) != CONST_INT)
4278 return FALSE;
4279 scale = INTVAL (scale_rtx);
4280 }
4281
4282 /* Allow arg pointer and stack pointer as index if there is not scaling */
4283 if (base && index && scale == 1
4284 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4285 || index == stack_pointer_rtx))
4286 {
4287 rtx tmp = base;
4288 base = index;
4289 index = tmp;
4290 }
4291
4292 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4293 if ((base == hard_frame_pointer_rtx
4294 || base == frame_pointer_rtx
4295 || base == arg_pointer_rtx) && !disp)
4296 disp = const0_rtx;
4297
4298 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4299 Avoid this by transforming to [%esi+0]. */
4300 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4301 && base && !index && !disp
4302 && REG_P (base)
4303 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4304 disp = const0_rtx;
4305
4306 /* Special case: encode reg+reg instead of reg*2. */
4307 if (!base && index && scale && scale == 2)
4308 base = index, scale = 1;
4309
4310 /* Special case: scaling cannot be encoded without base or displacement. */
4311 if (!base && !disp && index && scale != 1)
4312 disp = const0_rtx;
4313
4314 out->base = base;
4315 out->index = index;
4316 out->disp = disp;
4317 out->scale = scale;
4318
4319 return TRUE;
4320 }
4321 \f
4322 /* Return cost of the memory address x.
4323 For i386, it is better to use a complex address than let gcc copy
4324 the address into a reg and make a new pseudo. But not if the address
4325 requires to two regs - that would mean more pseudos with longer
4326 lifetimes. */
4327 int
4328 ix86_address_cost (x)
4329 rtx x;
4330 {
4331 struct ix86_address parts;
4332 int cost = 1;
4333
4334 if (!ix86_decompose_address (x, &parts))
4335 abort ();
4336
4337 /* More complex memory references are better. */
4338 if (parts.disp && parts.disp != const0_rtx)
4339 cost--;
4340
4341 /* Attempt to minimize number of registers in the address. */
4342 if ((parts.base
4343 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4344 || (parts.index
4345 && (!REG_P (parts.index)
4346 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4347 cost++;
4348
4349 if (parts.base
4350 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4351 && parts.index
4352 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4353 && parts.base != parts.index)
4354 cost++;
4355
4356 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4357 since it's predecode logic can't detect the length of instructions
4358 and it degenerates to vector decoded. Increase cost of such
4359 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4360 to split such addresses or even refuse such addresses at all.
4361
4362 Following addressing modes are affected:
4363 [base+scale*index]
4364 [scale*index+disp]
4365 [base+index]
4366
4367 The first and last case may be avoidable by explicitly coding the zero in
4368 memory address, but I don't have AMD-K6 machine handy to check this
4369 theory. */
4370
4371 if (TARGET_K6
4372 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4373 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4374 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4375 cost += 10;
4376
4377 return cost;
4378 }
4379 \f
4380 /* If X is a machine specific address (i.e. a symbol or label being
4381 referenced as a displacement from the GOT implemented using an
4382 UNSPEC), then return the base term. Otherwise return X. */
4383
4384 rtx
4385 ix86_find_base_term (x)
4386 rtx x;
4387 {
4388 rtx term;
4389
4390 if (TARGET_64BIT)
4391 {
4392 if (GET_CODE (x) != CONST)
4393 return x;
4394 term = XEXP (x, 0);
4395 if (GET_CODE (term) == PLUS
4396 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4397 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4398 term = XEXP (term, 0);
4399 if (GET_CODE (term) != UNSPEC
4400 || XVECLEN (term, 0) != 1
4401 || XINT (term, 1) != 15)
4402 return x;
4403
4404 term = XVECEXP (term, 0, 0);
4405
4406 if (GET_CODE (term) != SYMBOL_REF
4407 && GET_CODE (term) != LABEL_REF)
4408 return x;
4409
4410 return term;
4411 }
4412
4413 if (GET_CODE (x) != PLUS
4414 || XEXP (x, 0) != pic_offset_table_rtx
4415 || GET_CODE (XEXP (x, 1)) != CONST)
4416 return x;
4417
4418 term = XEXP (XEXP (x, 1), 0);
4419
4420 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4421 term = XEXP (term, 0);
4422
4423 if (GET_CODE (term) != UNSPEC
4424 || XVECLEN (term, 0) != 1
4425 || XINT (term, 1) != 7)
4426 return x;
4427
4428 term = XVECEXP (term, 0, 0);
4429
4430 if (GET_CODE (term) != SYMBOL_REF
4431 && GET_CODE (term) != LABEL_REF)
4432 return x;
4433
4434 return term;
4435 }
4436 \f
4437 /* Determine if a given CONST RTX is a valid memory displacement
4438 in PIC mode. */
4439
4440 int
4441 legitimate_pic_address_disp_p (disp)
4442 register rtx disp;
4443 {
4444 /* In 64bit mode we can allow direct addresses of symbols and labels
4445 when they are not dynamic symbols. */
4446 if (TARGET_64BIT)
4447 {
4448 rtx x = disp;
4449 if (GET_CODE (disp) == CONST)
4450 x = XEXP (disp, 0);
4451 /* ??? Handle PIC code models */
4452 if (GET_CODE (x) == PLUS
4453 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4454 && ix86_cmodel == CM_SMALL_PIC
4455 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4456 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4457 x = XEXP (x, 0);
4458 if (local_symbolic_operand (x, Pmode))
4459 return 1;
4460 }
4461 if (GET_CODE (disp) != CONST)
4462 return 0;
4463 disp = XEXP (disp, 0);
4464
4465 if (TARGET_64BIT)
4466 {
4467 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4468 of GOT tables. We should not need these anyway. */
4469 if (GET_CODE (disp) != UNSPEC
4470 || XVECLEN (disp, 0) != 1
4471 || XINT (disp, 1) != 15)
4472 return 0;
4473
4474 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4475 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4476 return 0;
4477 return 1;
4478 }
4479
4480 if (GET_CODE (disp) == PLUS)
4481 {
4482 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4483 return 0;
4484 disp = XEXP (disp, 0);
4485 }
4486
4487 if (GET_CODE (disp) != UNSPEC
4488 || XVECLEN (disp, 0) != 1)
4489 return 0;
4490
4491 /* Must be @GOT or @GOTOFF. */
4492 switch (XINT (disp, 1))
4493 {
4494 case 6: /* @GOT */
4495 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4496
4497 case 7: /* @GOTOFF */
4498 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4499 }
4500
4501 return 0;
4502 }
4503
4504 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4505 memory address for an instruction. The MODE argument is the machine mode
4506 for the MEM expression that wants to use this address.
4507
4508 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4509 convert common non-canonical forms to canonical form so that they will
4510 be recognized. */
4511
4512 int
4513 legitimate_address_p (mode, addr, strict)
4514 enum machine_mode mode;
4515 register rtx addr;
4516 int strict;
4517 {
4518 struct ix86_address parts;
4519 rtx base, index, disp;
4520 HOST_WIDE_INT scale;
4521 const char *reason = NULL;
4522 rtx reason_rtx = NULL_RTX;
4523
4524 if (TARGET_DEBUG_ADDR)
4525 {
4526 fprintf (stderr,
4527 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4528 GET_MODE_NAME (mode), strict);
4529 debug_rtx (addr);
4530 }
4531
4532 if (! ix86_decompose_address (addr, &parts))
4533 {
4534 reason = "decomposition failed";
4535 goto report_error;
4536 }
4537
4538 base = parts.base;
4539 index = parts.index;
4540 disp = parts.disp;
4541 scale = parts.scale;
4542
4543 /* Validate base register.
4544
4545 Don't allow SUBREG's here, it can lead to spill failures when the base
4546 is one word out of a two word structure, which is represented internally
4547 as a DImode int. */
4548
4549 if (base)
4550 {
4551 reason_rtx = base;
4552
4553 if (GET_CODE (base) != REG)
4554 {
4555 reason = "base is not a register";
4556 goto report_error;
4557 }
4558
4559 if (GET_MODE (base) != Pmode)
4560 {
4561 reason = "base is not in Pmode";
4562 goto report_error;
4563 }
4564
4565 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4566 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4567 {
4568 reason = "base is not valid";
4569 goto report_error;
4570 }
4571 }
4572
4573 /* Validate index register.
4574
4575 Don't allow SUBREG's here, it can lead to spill failures when the index
4576 is one word out of a two word structure, which is represented internally
4577 as a DImode int. */
4578
4579 if (index)
4580 {
4581 reason_rtx = index;
4582
4583 if (GET_CODE (index) != REG)
4584 {
4585 reason = "index is not a register";
4586 goto report_error;
4587 }
4588
4589 if (GET_MODE (index) != Pmode)
4590 {
4591 reason = "index is not in Pmode";
4592 goto report_error;
4593 }
4594
4595 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4596 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4597 {
4598 reason = "index is not valid";
4599 goto report_error;
4600 }
4601 }
4602
4603 /* Validate scale factor. */
4604 if (scale != 1)
4605 {
4606 reason_rtx = GEN_INT (scale);
4607 if (!index)
4608 {
4609 reason = "scale without index";
4610 goto report_error;
4611 }
4612
4613 if (scale != 2 && scale != 4 && scale != 8)
4614 {
4615 reason = "scale is not a valid multiplier";
4616 goto report_error;
4617 }
4618 }
4619
4620 /* Validate displacement. */
4621 if (disp)
4622 {
4623 reason_rtx = disp;
4624
4625 if (!CONSTANT_ADDRESS_P (disp))
4626 {
4627 reason = "displacement is not constant";
4628 goto report_error;
4629 }
4630
4631 if (TARGET_64BIT)
4632 {
4633 if (!x86_64_sign_extended_value (disp))
4634 {
4635 reason = "displacement is out of range";
4636 goto report_error;
4637 }
4638 }
4639 else
4640 {
4641 if (GET_CODE (disp) == CONST_DOUBLE)
4642 {
4643 reason = "displacement is a const_double";
4644 goto report_error;
4645 }
4646 }
4647
4648 if (flag_pic && SYMBOLIC_CONST (disp))
4649 {
4650 if (TARGET_64BIT && (index || base))
4651 {
4652 reason = "non-constant pic memory reference";
4653 goto report_error;
4654 }
4655 if (! legitimate_pic_address_disp_p (disp))
4656 {
4657 reason = "displacement is an invalid pic construct";
4658 goto report_error;
4659 }
4660
4661 /* This code used to verify that a symbolic pic displacement
4662 includes the pic_offset_table_rtx register.
4663
4664 While this is good idea, unfortunately these constructs may
4665 be created by "adds using lea" optimization for incorrect
4666 code like:
4667
4668 int a;
4669 int foo(int i)
4670 {
4671 return *(&a+i);
4672 }
4673
4674 This code is nonsensical, but results in addressing
4675 GOT table with pic_offset_table_rtx base. We can't
4676 just refuse it easilly, since it gets matched by
4677 "addsi3" pattern, that later gets split to lea in the
4678 case output register differs from input. While this
4679 can be handled by separate addsi pattern for this case
4680 that never results in lea, this seems to be easier and
4681 correct fix for crash to disable this test. */
4682 }
4683 else if (HALF_PIC_P ())
4684 {
4685 if (! HALF_PIC_ADDRESS_P (disp)
4686 || (base != NULL_RTX || index != NULL_RTX))
4687 {
4688 reason = "displacement is an invalid half-pic reference";
4689 goto report_error;
4690 }
4691 }
4692 }
4693
4694 /* Everything looks valid. */
4695 if (TARGET_DEBUG_ADDR)
4696 fprintf (stderr, "Success.\n");
4697 return TRUE;
4698
4699 report_error:
4700 if (TARGET_DEBUG_ADDR)
4701 {
4702 fprintf (stderr, "Error: %s\n", reason);
4703 debug_rtx (reason_rtx);
4704 }
4705 return FALSE;
4706 }
4707 \f
4708 /* Return an unique alias set for the GOT. */
4709
4710 static HOST_WIDE_INT
4711 ix86_GOT_alias_set ()
4712 {
4713 static HOST_WIDE_INT set = -1;
4714 if (set == -1)
4715 set = new_alias_set ();
4716 return set;
4717 }
4718
4719 /* Return a legitimate reference for ORIG (an address) using the
4720 register REG. If REG is 0, a new pseudo is generated.
4721
4722 There are two types of references that must be handled:
4723
4724 1. Global data references must load the address from the GOT, via
4725 the PIC reg. An insn is emitted to do this load, and the reg is
4726 returned.
4727
4728 2. Static data references, constant pool addresses, and code labels
4729 compute the address as an offset from the GOT, whose base is in
4730 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4731 differentiate them from global data objects. The returned
4732 address is the PIC reg + an unspec constant.
4733
4734 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4735 reg also appears in the address. */
4736
4737 rtx
4738 legitimize_pic_address (orig, reg)
4739 rtx orig;
4740 rtx reg;
4741 {
4742 rtx addr = orig;
4743 rtx new = orig;
4744 rtx base;
4745
4746 if (local_symbolic_operand (addr, Pmode))
4747 {
4748 /* In 64bit mode we can address such objects directly. */
4749 if (TARGET_64BIT)
4750 new = addr;
4751 else
4752 {
4753 /* This symbol may be referenced via a displacement from the PIC
4754 base address (@GOTOFF). */
4755
4756 current_function_uses_pic_offset_table = 1;
4757 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4758 new = gen_rtx_CONST (Pmode, new);
4759 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4760
4761 if (reg != 0)
4762 {
4763 emit_move_insn (reg, new);
4764 new = reg;
4765 }
4766 }
4767 }
4768 else if (GET_CODE (addr) == SYMBOL_REF)
4769 {
4770 if (TARGET_64BIT)
4771 {
4772 current_function_uses_pic_offset_table = 1;
4773 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4774 new = gen_rtx_CONST (Pmode, new);
4775 new = gen_rtx_MEM (Pmode, new);
4776 RTX_UNCHANGING_P (new) = 1;
4777 set_mem_alias_set (new, ix86_GOT_alias_set ());
4778
4779 if (reg == 0)
4780 reg = gen_reg_rtx (Pmode);
4781 /* Use directly gen_movsi, otherwise the address is loaded
4782 into register for CSE. We don't want to CSE this addresses,
4783 instead we CSE addresses from the GOT table, so skip this. */
4784 emit_insn (gen_movsi (reg, new));
4785 new = reg;
4786 }
4787 else
4788 {
4789 /* This symbol must be referenced via a load from the
4790 Global Offset Table (@GOT). */
4791
4792 current_function_uses_pic_offset_table = 1;
4793 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4794 new = gen_rtx_CONST (Pmode, new);
4795 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4796 new = gen_rtx_MEM (Pmode, new);
4797 RTX_UNCHANGING_P (new) = 1;
4798 set_mem_alias_set (new, ix86_GOT_alias_set ());
4799
4800 if (reg == 0)
4801 reg = gen_reg_rtx (Pmode);
4802 emit_move_insn (reg, new);
4803 new = reg;
4804 }
4805 }
4806 else
4807 {
4808 if (GET_CODE (addr) == CONST)
4809 {
4810 addr = XEXP (addr, 0);
4811 if (GET_CODE (addr) == UNSPEC)
4812 {
4813 /* Check that the unspec is one of the ones we generate? */
4814 }
4815 else if (GET_CODE (addr) != PLUS)
4816 abort ();
4817 }
4818 if (GET_CODE (addr) == PLUS)
4819 {
4820 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4821
4822 /* Check first to see if this is a constant offset from a @GOTOFF
4823 symbol reference. */
4824 if (local_symbolic_operand (op0, Pmode)
4825 && GET_CODE (op1) == CONST_INT)
4826 {
4827 if (!TARGET_64BIT)
4828 {
4829 current_function_uses_pic_offset_table = 1;
4830 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4831 new = gen_rtx_PLUS (Pmode, new, op1);
4832 new = gen_rtx_CONST (Pmode, new);
4833 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4834
4835 if (reg != 0)
4836 {
4837 emit_move_insn (reg, new);
4838 new = reg;
4839 }
4840 }
4841 else
4842 {
4843 /* ??? We need to limit offsets here. */
4844 }
4845 }
4846 else
4847 {
4848 base = legitimize_pic_address (XEXP (addr, 0), reg);
4849 new = legitimize_pic_address (XEXP (addr, 1),
4850 base == reg ? NULL_RTX : reg);
4851
4852 if (GET_CODE (new) == CONST_INT)
4853 new = plus_constant (base, INTVAL (new));
4854 else
4855 {
4856 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4857 {
4858 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
4859 new = XEXP (new, 1);
4860 }
4861 new = gen_rtx_PLUS (Pmode, base, new);
4862 }
4863 }
4864 }
4865 }
4866 return new;
4867 }
4868 \f
4869 /* Try machine-dependent ways of modifying an illegitimate address
4870 to be legitimate. If we find one, return the new, valid address.
4871 This macro is used in only one place: `memory_address' in explow.c.
4872
4873 OLDX is the address as it was before break_out_memory_refs was called.
4874 In some cases it is useful to look at this to decide what needs to be done.
4875
4876 MODE and WIN are passed so that this macro can use
4877 GO_IF_LEGITIMATE_ADDRESS.
4878
4879 It is always safe for this macro to do nothing. It exists to recognize
4880 opportunities to optimize the output.
4881
4882 For the 80386, we handle X+REG by loading X into a register R and
4883 using R+REG. R will go in a general reg and indexing will be used.
4884 However, if REG is a broken-out memory address or multiplication,
4885 nothing needs to be done because REG can certainly go in a general reg.
4886
4887 When -fpic is used, special handling is needed for symbolic references.
4888 See comments by legitimize_pic_address in i386.c for details. */
4889
4890 rtx
4891 legitimize_address (x, oldx, mode)
4892 register rtx x;
4893 register rtx oldx ATTRIBUTE_UNUSED;
4894 enum machine_mode mode;
4895 {
4896 int changed = 0;
4897 unsigned log;
4898
4899 if (TARGET_DEBUG_ADDR)
4900 {
4901 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4902 GET_MODE_NAME (mode));
4903 debug_rtx (x);
4904 }
4905
4906 if (flag_pic && SYMBOLIC_CONST (x))
4907 return legitimize_pic_address (x, 0);
4908
4909 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4910 if (GET_CODE (x) == ASHIFT
4911 && GET_CODE (XEXP (x, 1)) == CONST_INT
4912 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
4913 {
4914 changed = 1;
4915 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
4916 GEN_INT (1 << log));
4917 }
4918
4919 if (GET_CODE (x) == PLUS)
4920 {
4921 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4922
4923 if (GET_CODE (XEXP (x, 0)) == ASHIFT
4924 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4925 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
4926 {
4927 changed = 1;
4928 XEXP (x, 0) = gen_rtx_MULT (Pmode,
4929 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
4930 GEN_INT (1 << log));
4931 }
4932
4933 if (GET_CODE (XEXP (x, 1)) == ASHIFT
4934 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4935 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
4936 {
4937 changed = 1;
4938 XEXP (x, 1) = gen_rtx_MULT (Pmode,
4939 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
4940 GEN_INT (1 << log));
4941 }
4942
4943 /* Put multiply first if it isn't already. */
4944 if (GET_CODE (XEXP (x, 1)) == MULT)
4945 {
4946 rtx tmp = XEXP (x, 0);
4947 XEXP (x, 0) = XEXP (x, 1);
4948 XEXP (x, 1) = tmp;
4949 changed = 1;
4950 }
4951
4952 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4953 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4954 created by virtual register instantiation, register elimination, and
4955 similar optimizations. */
4956 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
4957 {
4958 changed = 1;
4959 x = gen_rtx_PLUS (Pmode,
4960 gen_rtx_PLUS (Pmode, XEXP (x, 0),
4961 XEXP (XEXP (x, 1), 0)),
4962 XEXP (XEXP (x, 1), 1));
4963 }
4964
4965 /* Canonicalize
4966 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
4967 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4968 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
4969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4970 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
4971 && CONSTANT_P (XEXP (x, 1)))
4972 {
4973 rtx constant;
4974 rtx other = NULL_RTX;
4975
4976 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4977 {
4978 constant = XEXP (x, 1);
4979 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
4980 }
4981 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
4982 {
4983 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
4984 other = XEXP (x, 1);
4985 }
4986 else
4987 constant = 0;
4988
4989 if (constant)
4990 {
4991 changed = 1;
4992 x = gen_rtx_PLUS (Pmode,
4993 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
4994 XEXP (XEXP (XEXP (x, 0), 1), 0)),
4995 plus_constant (other, INTVAL (constant)));
4996 }
4997 }
4998
4999 if (changed && legitimate_address_p (mode, x, FALSE))
5000 return x;
5001
5002 if (GET_CODE (XEXP (x, 0)) == MULT)
5003 {
5004 changed = 1;
5005 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5006 }
5007
5008 if (GET_CODE (XEXP (x, 1)) == MULT)
5009 {
5010 changed = 1;
5011 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5012 }
5013
5014 if (changed
5015 && GET_CODE (XEXP (x, 1)) == REG
5016 && GET_CODE (XEXP (x, 0)) == REG)
5017 return x;
5018
5019 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5020 {
5021 changed = 1;
5022 x = legitimize_pic_address (x, 0);
5023 }
5024
5025 if (changed && legitimate_address_p (mode, x, FALSE))
5026 return x;
5027
5028 if (GET_CODE (XEXP (x, 0)) == REG)
5029 {
5030 register rtx temp = gen_reg_rtx (Pmode);
5031 register rtx val = force_operand (XEXP (x, 1), temp);
5032 if (val != temp)
5033 emit_move_insn (temp, val);
5034
5035 XEXP (x, 1) = temp;
5036 return x;
5037 }
5038
5039 else if (GET_CODE (XEXP (x, 1)) == REG)
5040 {
5041 register rtx temp = gen_reg_rtx (Pmode);
5042 register rtx val = force_operand (XEXP (x, 0), temp);
5043 if (val != temp)
5044 emit_move_insn (temp, val);
5045
5046 XEXP (x, 0) = temp;
5047 return x;
5048 }
5049 }
5050
5051 return x;
5052 }
5053 \f
5054 /* Print an integer constant expression in assembler syntax. Addition
5055 and subtraction are the only arithmetic that may appear in these
5056 expressions. FILE is the stdio stream to write to, X is the rtx, and
5057 CODE is the operand print code from the output string. */
5058
5059 static void
5060 output_pic_addr_const (file, x, code)
5061 FILE *file;
5062 rtx x;
5063 int code;
5064 {
5065 char buf[256];
5066
5067 switch (GET_CODE (x))
5068 {
5069 case PC:
5070 if (flag_pic)
5071 putc ('.', file);
5072 else
5073 abort ();
5074 break;
5075
5076 case SYMBOL_REF:
5077 assemble_name (file, XSTR (x, 0));
5078 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5079 fputs ("@PLT", file);
5080 break;
5081
5082 case LABEL_REF:
5083 x = XEXP (x, 0);
5084 /* FALLTHRU */
5085 case CODE_LABEL:
5086 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5087 assemble_name (asm_out_file, buf);
5088 break;
5089
5090 case CONST_INT:
5091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5092 break;
5093
5094 case CONST:
5095 /* This used to output parentheses around the expression,
5096 but that does not work on the 386 (either ATT or BSD assembler). */
5097 output_pic_addr_const (file, XEXP (x, 0), code);
5098 break;
5099
5100 case CONST_DOUBLE:
5101 if (GET_MODE (x) == VOIDmode)
5102 {
5103 /* We can use %d if the number is <32 bits and positive. */
5104 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5105 fprintf (file, "0x%lx%08lx",
5106 (unsigned long) CONST_DOUBLE_HIGH (x),
5107 (unsigned long) CONST_DOUBLE_LOW (x));
5108 else
5109 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5110 }
5111 else
5112 /* We can't handle floating point constants;
5113 PRINT_OPERAND must handle them. */
5114 output_operand_lossage ("floating constant misused");
5115 break;
5116
5117 case PLUS:
5118 /* Some assemblers need integer constants to appear first. */
5119 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5120 {
5121 output_pic_addr_const (file, XEXP (x, 0), code);
5122 putc ('+', file);
5123 output_pic_addr_const (file, XEXP (x, 1), code);
5124 }
5125 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5126 {
5127 output_pic_addr_const (file, XEXP (x, 1), code);
5128 putc ('+', file);
5129 output_pic_addr_const (file, XEXP (x, 0), code);
5130 }
5131 else
5132 abort ();
5133 break;
5134
5135 case MINUS:
5136 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5137 output_pic_addr_const (file, XEXP (x, 0), code);
5138 putc ('-', file);
5139 output_pic_addr_const (file, XEXP (x, 1), code);
5140 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5141 break;
5142
5143 case UNSPEC:
5144 if (XVECLEN (x, 0) != 1)
5145 abort ();
5146 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5147 switch (XINT (x, 1))
5148 {
5149 case 6:
5150 fputs ("@GOT", file);
5151 break;
5152 case 7:
5153 fputs ("@GOTOFF", file);
5154 break;
5155 case 8:
5156 fputs ("@PLT", file);
5157 break;
5158 case 15:
5159 fputs ("@GOTPCREL(%RIP)", file);
5160 break;
5161 default:
5162 output_operand_lossage ("invalid UNSPEC as operand");
5163 break;
5164 }
5165 break;
5166
5167 default:
5168 output_operand_lossage ("invalid expression as operand");
5169 }
5170 }
5171
5172 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5173 We need to handle our special PIC relocations. */
5174
5175 void
5176 i386_dwarf_output_addr_const (file, x)
5177 FILE *file;
5178 rtx x;
5179 {
5180 #ifdef ASM_QUAD
5181 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : INT_ASM_OP);
5182 #else
5183 if (TARGET_64BIT)
5184 abort ();
5185 fprintf (file, "%s", INT_ASM_OP);
5186 #endif
5187 if (flag_pic)
5188 output_pic_addr_const (file, x, '\0');
5189 else
5190 output_addr_const (file, x);
5191 fputc ('\n', file);
5192 }
5193
5194 /* In the name of slightly smaller debug output, and to cater to
5195 general assembler losage, recognize PIC+GOTOFF and turn it back
5196 into a direct symbol reference. */
5197
5198 rtx
5199 i386_simplify_dwarf_addr (orig_x)
5200 rtx orig_x;
5201 {
5202 rtx x = orig_x;
5203
5204 if (TARGET_64BIT)
5205 {
5206 if (GET_CODE (x) != CONST
5207 || GET_CODE (XEXP (x, 0)) != UNSPEC
5208 || XINT (XEXP (x, 0), 1) != 15)
5209 return orig_x;
5210 return XVECEXP (XEXP (x, 0), 0, 0);
5211 }
5212
5213 if (GET_CODE (x) != PLUS
5214 || GET_CODE (XEXP (x, 0)) != REG
5215 || GET_CODE (XEXP (x, 1)) != CONST)
5216 return orig_x;
5217
5218 x = XEXP (XEXP (x, 1), 0);
5219 if (GET_CODE (x) == UNSPEC
5220 && (XINT (x, 1) == 6
5221 || XINT (x, 1) == 7))
5222 return XVECEXP (x, 0, 0);
5223
5224 if (GET_CODE (x) == PLUS
5225 && GET_CODE (XEXP (x, 0)) == UNSPEC
5226 && GET_CODE (XEXP (x, 1)) == CONST_INT
5227 && (XINT (XEXP (x, 0), 1) == 6
5228 || XINT (XEXP (x, 0), 1) == 7))
5229 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5230
5231 return orig_x;
5232 }
5233 \f
5234 static void
5235 put_condition_code (code, mode, reverse, fp, file)
5236 enum rtx_code code;
5237 enum machine_mode mode;
5238 int reverse, fp;
5239 FILE *file;
5240 {
5241 const char *suffix;
5242
5243 if (mode == CCFPmode || mode == CCFPUmode)
5244 {
5245 enum rtx_code second_code, bypass_code;
5246 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5247 if (bypass_code != NIL || second_code != NIL)
5248 abort();
5249 code = ix86_fp_compare_code_to_integer (code);
5250 mode = CCmode;
5251 }
5252 if (reverse)
5253 code = reverse_condition (code);
5254
5255 switch (code)
5256 {
5257 case EQ:
5258 suffix = "e";
5259 break;
5260 case NE:
5261 suffix = "ne";
5262 break;
5263 case GT:
5264 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5265 abort ();
5266 suffix = "g";
5267 break;
5268 case GTU:
5269 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5270 Those same assemblers have the same but opposite losage on cmov. */
5271 if (mode != CCmode)
5272 abort ();
5273 suffix = fp ? "nbe" : "a";
5274 break;
5275 case LT:
5276 if (mode == CCNOmode || mode == CCGOCmode)
5277 suffix = "s";
5278 else if (mode == CCmode || mode == CCGCmode)
5279 suffix = "l";
5280 else
5281 abort ();
5282 break;
5283 case LTU:
5284 if (mode != CCmode)
5285 abort ();
5286 suffix = "b";
5287 break;
5288 case GE:
5289 if (mode == CCNOmode || mode == CCGOCmode)
5290 suffix = "ns";
5291 else if (mode == CCmode || mode == CCGCmode)
5292 suffix = "ge";
5293 else
5294 abort ();
5295 break;
5296 case GEU:
5297 /* ??? As above. */
5298 if (mode != CCmode)
5299 abort ();
5300 suffix = fp ? "nb" : "ae";
5301 break;
5302 case LE:
5303 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5304 abort ();
5305 suffix = "le";
5306 break;
5307 case LEU:
5308 if (mode != CCmode)
5309 abort ();
5310 suffix = "be";
5311 break;
5312 case UNORDERED:
5313 suffix = fp ? "u" : "p";
5314 break;
5315 case ORDERED:
5316 suffix = fp ? "nu" : "np";
5317 break;
5318 default:
5319 abort ();
5320 }
5321 fputs (suffix, file);
5322 }
5323
5324 void
5325 print_reg (x, code, file)
5326 rtx x;
5327 int code;
5328 FILE *file;
5329 {
5330 if (REGNO (x) == ARG_POINTER_REGNUM
5331 || REGNO (x) == FRAME_POINTER_REGNUM
5332 || REGNO (x) == FLAGS_REG
5333 || REGNO (x) == FPSR_REG)
5334 abort ();
5335
5336 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5337 putc ('%', file);
5338
5339 if (code == 'w' || MMX_REG_P (x))
5340 code = 2;
5341 else if (code == 'b')
5342 code = 1;
5343 else if (code == 'k')
5344 code = 4;
5345 else if (code == 'q')
5346 code = 8;
5347 else if (code == 'y')
5348 code = 3;
5349 else if (code == 'h')
5350 code = 0;
5351 else
5352 code = GET_MODE_SIZE (GET_MODE (x));
5353
5354 /* Irritatingly, AMD extended registers use different naming convention
5355 from the normal registers. */
5356 if (REX_INT_REG_P (x))
5357 {
5358 if (!TARGET_64BIT)
5359 abort ();
5360 switch (code)
5361 {
5362 case 0:
5363 error ("Extended registers have no high halves\n");
5364 break;
5365 case 1:
5366 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5367 break;
5368 case 2:
5369 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5370 break;
5371 case 4:
5372 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5373 break;
5374 case 8:
5375 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5376 break;
5377 default:
5378 error ("Unsupported operand size for extended register.\n");
5379 break;
5380 }
5381 return;
5382 }
5383 switch (code)
5384 {
5385 case 3:
5386 if (STACK_TOP_P (x))
5387 {
5388 fputs ("st(0)", file);
5389 break;
5390 }
5391 /* FALLTHRU */
5392 case 8:
5393 case 4:
5394 case 12:
5395 if (! ANY_FP_REG_P (x))
5396 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5397 /* FALLTHRU */
5398 case 16:
5399 case 2:
5400 fputs (hi_reg_name[REGNO (x)], file);
5401 break;
5402 case 1:
5403 fputs (qi_reg_name[REGNO (x)], file);
5404 break;
5405 case 0:
5406 fputs (qi_high_reg_name[REGNO (x)], file);
5407 break;
5408 default:
5409 abort ();
5410 }
5411 }
5412
5413 /* Meaning of CODE:
5414 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5415 C -- print opcode suffix for set/cmov insn.
5416 c -- like C, but print reversed condition
5417 F,f -- likewise, but for floating-point.
5418 R -- print the prefix for register names.
5419 z -- print the opcode suffix for the size of the current operand.
5420 * -- print a star (in certain assembler syntax)
5421 A -- print an absolute memory reference.
5422 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5423 s -- print a shift double count, followed by the assemblers argument
5424 delimiter.
5425 b -- print the QImode name of the register for the indicated operand.
5426 %b0 would print %al if operands[0] is reg 0.
5427 w -- likewise, print the HImode name of the register.
5428 k -- likewise, print the SImode name of the register.
5429 q -- likewise, print the DImode name of the register.
5430 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5431 y -- print "st(0)" instead of "st" as a register.
5432 D -- print condition for SSE cmp instruction.
5433 P -- if PIC, print an @PLT suffix.
5434 X -- don't print any sort of PIC '@' suffix for a symbol.
5435 */
5436
5437 void
5438 print_operand (file, x, code)
5439 FILE *file;
5440 rtx x;
5441 int code;
5442 {
5443 if (code)
5444 {
5445 switch (code)
5446 {
5447 case '*':
5448 if (ASSEMBLER_DIALECT == 0)
5449 putc ('*', file);
5450 return;
5451
5452 case 'A':
5453 if (ASSEMBLER_DIALECT == 0)
5454 putc ('*', file);
5455 else if (ASSEMBLER_DIALECT == 1)
5456 {
5457 /* Intel syntax. For absolute addresses, registers should not
5458 be surrounded by braces. */
5459 if (GET_CODE (x) != REG)
5460 {
5461 putc ('[', file);
5462 PRINT_OPERAND (file, x, 0);
5463 putc (']', file);
5464 return;
5465 }
5466 }
5467
5468 PRINT_OPERAND (file, x, 0);
5469 return;
5470
5471
5472 case 'L':
5473 if (ASSEMBLER_DIALECT == 0)
5474 putc ('l', file);
5475 return;
5476
5477 case 'W':
5478 if (ASSEMBLER_DIALECT == 0)
5479 putc ('w', file);
5480 return;
5481
5482 case 'B':
5483 if (ASSEMBLER_DIALECT == 0)
5484 putc ('b', file);
5485 return;
5486
5487 case 'Q':
5488 if (ASSEMBLER_DIALECT == 0)
5489 putc ('l', file);
5490 return;
5491
5492 case 'S':
5493 if (ASSEMBLER_DIALECT == 0)
5494 putc ('s', file);
5495 return;
5496
5497 case 'T':
5498 if (ASSEMBLER_DIALECT == 0)
5499 putc ('t', file);
5500 return;
5501
5502 case 'z':
5503 /* 387 opcodes don't get size suffixes if the operands are
5504 registers. */
5505
5506 if (STACK_REG_P (x))
5507 return;
5508
5509 /* this is the size of op from size of operand */
5510 switch (GET_MODE_SIZE (GET_MODE (x)))
5511 {
5512 case 2:
5513 #ifdef HAVE_GAS_FILDS_FISTS
5514 putc ('s', file);
5515 #endif
5516 return;
5517
5518 case 4:
5519 if (GET_MODE (x) == SFmode)
5520 {
5521 putc ('s', file);
5522 return;
5523 }
5524 else
5525 putc ('l', file);
5526 return;
5527
5528 case 12:
5529 case 16:
5530 putc ('t', file);
5531 return;
5532
5533 case 8:
5534 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5535 {
5536 #ifdef GAS_MNEMONICS
5537 putc ('q', file);
5538 #else
5539 putc ('l', file);
5540 putc ('l', file);
5541 #endif
5542 }
5543 else
5544 putc ('l', file);
5545 return;
5546
5547 default:
5548 abort ();
5549 }
5550
5551 case 'b':
5552 case 'w':
5553 case 'k':
5554 case 'q':
5555 case 'h':
5556 case 'y':
5557 case 'X':
5558 case 'P':
5559 break;
5560
5561 case 's':
5562 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5563 {
5564 PRINT_OPERAND (file, x, 0);
5565 putc (',', file);
5566 }
5567 return;
5568
5569 case 'D':
5570 /* Little bit of braindamage here. The SSE compare instructions
5571 does use completely different names for the comparisons that the
5572 fp conditional moves. */
5573 switch (GET_CODE (x))
5574 {
5575 case EQ:
5576 case UNEQ:
5577 fputs ("eq", file);
5578 break;
5579 case LT:
5580 case UNLT:
5581 fputs ("lt", file);
5582 break;
5583 case LE:
5584 case UNLE:
5585 fputs ("le", file);
5586 break;
5587 case UNORDERED:
5588 fputs ("unord", file);
5589 break;
5590 case NE:
5591 case LTGT:
5592 fputs ("neq", file);
5593 break;
5594 case UNGE:
5595 case GE:
5596 fputs ("nlt", file);
5597 break;
5598 case UNGT:
5599 case GT:
5600 fputs ("nle", file);
5601 break;
5602 case ORDERED:
5603 fputs ("ord", file);
5604 break;
5605 default:
5606 abort ();
5607 break;
5608 }
5609 return;
5610 case 'C':
5611 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5612 return;
5613 case 'F':
5614 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5615 return;
5616
5617 /* Like above, but reverse condition */
5618 case 'c':
5619 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5620 return;
5621 case 'f':
5622 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5623 return;
5624 case '+':
5625 {
5626 rtx x;
5627
5628 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5629 return;
5630
5631 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5632 if (x)
5633 {
5634 int pred_val = INTVAL (XEXP (x, 0));
5635
5636 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5637 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5638 {
5639 int taken = pred_val > REG_BR_PROB_BASE / 2;
5640 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5641
5642 /* Emit hints only in the case default branch prediction
5643 heruistics would fail. */
5644 if (taken != cputaken)
5645 {
5646 /* We use 3e (DS) prefix for taken branches and
5647 2e (CS) prefix for not taken branches. */
5648 if (taken)
5649 fputs ("ds ; ", file);
5650 else
5651 fputs ("cs ; ", file);
5652 }
5653 }
5654 }
5655 return;
5656 }
5657 default:
5658 {
5659 char str[50];
5660 sprintf (str, "invalid operand code `%c'", code);
5661 output_operand_lossage (str);
5662 }
5663 }
5664 }
5665
5666 if (GET_CODE (x) == REG)
5667 {
5668 PRINT_REG (x, code, file);
5669 }
5670
5671 else if (GET_CODE (x) == MEM)
5672 {
5673 /* No `byte ptr' prefix for call instructions. */
5674 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5675 {
5676 const char * size;
5677 switch (GET_MODE_SIZE (GET_MODE (x)))
5678 {
5679 case 1: size = "BYTE"; break;
5680 case 2: size = "WORD"; break;
5681 case 4: size = "DWORD"; break;
5682 case 8: size = "QWORD"; break;
5683 case 12: size = "XWORD"; break;
5684 case 16: size = "XMMWORD"; break;
5685 default:
5686 abort ();
5687 }
5688
5689 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5690 if (code == 'b')
5691 size = "BYTE";
5692 else if (code == 'w')
5693 size = "WORD";
5694 else if (code == 'k')
5695 size = "DWORD";
5696
5697 fputs (size, file);
5698 fputs (" PTR ", file);
5699 }
5700
5701 x = XEXP (x, 0);
5702 if (flag_pic && CONSTANT_ADDRESS_P (x))
5703 output_pic_addr_const (file, x, code);
5704 /* Avoid (%rip) for call operands. */
5705 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5706 && GET_CODE (x) != CONST_INT)
5707 output_addr_const (file, x);
5708 else
5709 output_address (x);
5710 }
5711
5712 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5713 {
5714 REAL_VALUE_TYPE r;
5715 long l;
5716
5717 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5718 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5719
5720 if (ASSEMBLER_DIALECT == 0)
5721 putc ('$', file);
5722 fprintf (file, "0x%lx", l);
5723 }
5724
5725 /* These float cases don't actually occur as immediate operands. */
5726 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5727 {
5728 REAL_VALUE_TYPE r;
5729 char dstr[30];
5730
5731 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5732 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5733 fprintf (file, "%s", dstr);
5734 }
5735
5736 else if (GET_CODE (x) == CONST_DOUBLE
5737 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5738 {
5739 REAL_VALUE_TYPE r;
5740 char dstr[30];
5741
5742 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5743 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5744 fprintf (file, "%s", dstr);
5745 }
5746 else
5747 {
5748 if (code != 'P')
5749 {
5750 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5751 {
5752 if (ASSEMBLER_DIALECT == 0)
5753 putc ('$', file);
5754 }
5755 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5756 || GET_CODE (x) == LABEL_REF)
5757 {
5758 if (ASSEMBLER_DIALECT == 0)
5759 putc ('$', file);
5760 else
5761 fputs ("OFFSET FLAT:", file);
5762 }
5763 }
5764 if (GET_CODE (x) == CONST_INT)
5765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5766 else if (flag_pic)
5767 output_pic_addr_const (file, x, code);
5768 else
5769 output_addr_const (file, x);
5770 }
5771 }
5772 \f
5773 /* Print a memory operand whose address is ADDR. */
5774
5775 void
5776 print_operand_address (file, addr)
5777 FILE *file;
5778 register rtx addr;
5779 {
5780 struct ix86_address parts;
5781 rtx base, index, disp;
5782 int scale;
5783
5784 if (! ix86_decompose_address (addr, &parts))
5785 abort ();
5786
5787 base = parts.base;
5788 index = parts.index;
5789 disp = parts.disp;
5790 scale = parts.scale;
5791
5792 if (!base && !index)
5793 {
5794 /* Displacement only requires special attention. */
5795
5796 if (GET_CODE (disp) == CONST_INT)
5797 {
5798 if (ASSEMBLER_DIALECT != 0)
5799 {
5800 if (USER_LABEL_PREFIX[0] == 0)
5801 putc ('%', file);
5802 fputs ("ds:", file);
5803 }
5804 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5805 }
5806 else if (flag_pic)
5807 output_pic_addr_const (file, addr, 0);
5808 else
5809 output_addr_const (file, addr);
5810
5811 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5812 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5813 fputs ("(%rip)", file);
5814 }
5815 else
5816 {
5817 if (ASSEMBLER_DIALECT == 0)
5818 {
5819 if (disp)
5820 {
5821 if (flag_pic)
5822 output_pic_addr_const (file, disp, 0);
5823 else if (GET_CODE (disp) == LABEL_REF)
5824 output_asm_label (disp);
5825 else
5826 output_addr_const (file, disp);
5827 }
5828
5829 putc ('(', file);
5830 if (base)
5831 PRINT_REG (base, 0, file);
5832 if (index)
5833 {
5834 putc (',', file);
5835 PRINT_REG (index, 0, file);
5836 if (scale != 1)
5837 fprintf (file, ",%d", scale);
5838 }
5839 putc (')', file);
5840 }
5841 else
5842 {
5843 rtx offset = NULL_RTX;
5844
5845 if (disp)
5846 {
5847 /* Pull out the offset of a symbol; print any symbol itself. */
5848 if (GET_CODE (disp) == CONST
5849 && GET_CODE (XEXP (disp, 0)) == PLUS
5850 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
5851 {
5852 offset = XEXP (XEXP (disp, 0), 1);
5853 disp = gen_rtx_CONST (VOIDmode,
5854 XEXP (XEXP (disp, 0), 0));
5855 }
5856
5857 if (flag_pic)
5858 output_pic_addr_const (file, disp, 0);
5859 else if (GET_CODE (disp) == LABEL_REF)
5860 output_asm_label (disp);
5861 else if (GET_CODE (disp) == CONST_INT)
5862 offset = disp;
5863 else
5864 output_addr_const (file, disp);
5865 }
5866
5867 putc ('[', file);
5868 if (base)
5869 {
5870 PRINT_REG (base, 0, file);
5871 if (offset)
5872 {
5873 if (INTVAL (offset) >= 0)
5874 putc ('+', file);
5875 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5876 }
5877 }
5878 else if (offset)
5879 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5880 else
5881 putc ('0', file);
5882
5883 if (index)
5884 {
5885 putc ('+', file);
5886 PRINT_REG (index, 0, file);
5887 if (scale != 1)
5888 fprintf (file, "*%d", scale);
5889 }
5890 putc (']', file);
5891 }
5892 }
5893 }
5894 \f
5895 /* Split one or more DImode RTL references into pairs of SImode
5896 references. The RTL can be REG, offsettable MEM, integer constant, or
5897 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5898 split and "num" is its length. lo_half and hi_half are output arrays
5899 that parallel "operands". */
5900
5901 void
5902 split_di (operands, num, lo_half, hi_half)
5903 rtx operands[];
5904 int num;
5905 rtx lo_half[], hi_half[];
5906 {
5907 while (num--)
5908 {
5909 rtx op = operands[num];
5910 if (CONSTANT_P (op))
5911 split_double (op, &lo_half[num], &hi_half[num]);
5912 else if (! reload_completed)
5913 {
5914 lo_half[num] = gen_lowpart (SImode, op);
5915 hi_half[num] = gen_highpart (SImode, op);
5916 }
5917 else if (GET_CODE (op) == REG)
5918 {
5919 if (TARGET_64BIT)
5920 abort();
5921 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
5922 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
5923 }
5924 else if (offsettable_memref_p (op))
5925 {
5926 lo_half[num] = adjust_address (op, SImode, 0);
5927 hi_half[num] = adjust_address (op, SImode, 4);
5928 }
5929 else
5930 abort ();
5931 }
5932 }
5933 \f
5934 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
5935 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5936 is the expression of the binary operation. The output may either be
5937 emitted here, or returned to the caller, like all output_* functions.
5938
5939 There is no guarantee that the operands are the same mode, as they
5940 might be within FLOAT or FLOAT_EXTEND expressions. */
5941
5942 #ifndef SYSV386_COMPAT
5943 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
5944 wants to fix the assemblers because that causes incompatibility
5945 with gcc. No-one wants to fix gcc because that causes
5946 incompatibility with assemblers... You can use the option of
5947 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
5948 #define SYSV386_COMPAT 1
5949 #endif
5950
5951 const char *
5952 output_387_binary_op (insn, operands)
5953 rtx insn;
5954 rtx *operands;
5955 {
5956 static char buf[30];
5957 const char *p;
5958 const char *ssep;
5959 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
5960
5961 #ifdef ENABLE_CHECKING
5962 /* Even if we do not want to check the inputs, this documents input
5963 constraints. Which helps in understanding the following code. */
5964 if (STACK_REG_P (operands[0])
5965 && ((REG_P (operands[1])
5966 && REGNO (operands[0]) == REGNO (operands[1])
5967 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
5968 || (REG_P (operands[2])
5969 && REGNO (operands[0]) == REGNO (operands[2])
5970 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
5971 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
5972 ; /* ok */
5973 else if (!is_sse)
5974 abort ();
5975 #endif
5976
5977 switch (GET_CODE (operands[3]))
5978 {
5979 case PLUS:
5980 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5981 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5982 p = "fiadd";
5983 else
5984 p = "fadd";
5985 ssep = "add";
5986 break;
5987
5988 case MINUS:
5989 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5990 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
5991 p = "fisub";
5992 else
5993 p = "fsub";
5994 ssep = "sub";
5995 break;
5996
5997 case MULT:
5998 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
5999 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6000 p = "fimul";
6001 else
6002 p = "fmul";
6003 ssep = "mul";
6004 break;
6005
6006 case DIV:
6007 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6008 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6009 p = "fidiv";
6010 else
6011 p = "fdiv";
6012 ssep = "div";
6013 break;
6014
6015 default:
6016 abort ();
6017 }
6018
6019 if (is_sse)
6020 {
6021 strcpy (buf, ssep);
6022 if (GET_MODE (operands[0]) == SFmode)
6023 strcat (buf, "ss\t{%2, %0|%0, %2}");
6024 else
6025 strcat (buf, "sd\t{%2, %0|%0, %2}");
6026 return buf;
6027 }
6028 strcpy (buf, p);
6029
6030 switch (GET_CODE (operands[3]))
6031 {
6032 case MULT:
6033 case PLUS:
6034 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6035 {
6036 rtx temp = operands[2];
6037 operands[2] = operands[1];
6038 operands[1] = temp;
6039 }
6040
6041 /* know operands[0] == operands[1]. */
6042
6043 if (GET_CODE (operands[2]) == MEM)
6044 {
6045 p = "%z2\t%2";
6046 break;
6047 }
6048
6049 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6050 {
6051 if (STACK_TOP_P (operands[0]))
6052 /* How is it that we are storing to a dead operand[2]?
6053 Well, presumably operands[1] is dead too. We can't
6054 store the result to st(0) as st(0) gets popped on this
6055 instruction. Instead store to operands[2] (which I
6056 think has to be st(1)). st(1) will be popped later.
6057 gcc <= 2.8.1 didn't have this check and generated
6058 assembly code that the Unixware assembler rejected. */
6059 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6060 else
6061 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6062 break;
6063 }
6064
6065 if (STACK_TOP_P (operands[0]))
6066 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6067 else
6068 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6069 break;
6070
6071 case MINUS:
6072 case DIV:
6073 if (GET_CODE (operands[1]) == MEM)
6074 {
6075 p = "r%z1\t%1";
6076 break;
6077 }
6078
6079 if (GET_CODE (operands[2]) == MEM)
6080 {
6081 p = "%z2\t%2";
6082 break;
6083 }
6084
6085 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6086 {
6087 #if SYSV386_COMPAT
6088 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6089 derived assemblers, confusingly reverse the direction of
6090 the operation for fsub{r} and fdiv{r} when the
6091 destination register is not st(0). The Intel assembler
6092 doesn't have this brain damage. Read !SYSV386_COMPAT to
6093 figure out what the hardware really does. */
6094 if (STACK_TOP_P (operands[0]))
6095 p = "{p\t%0, %2|rp\t%2, %0}";
6096 else
6097 p = "{rp\t%2, %0|p\t%0, %2}";
6098 #else
6099 if (STACK_TOP_P (operands[0]))
6100 /* As above for fmul/fadd, we can't store to st(0). */
6101 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6102 else
6103 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6104 #endif
6105 break;
6106 }
6107
6108 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6109 {
6110 #if SYSV386_COMPAT
6111 if (STACK_TOP_P (operands[0]))
6112 p = "{rp\t%0, %1|p\t%1, %0}";
6113 else
6114 p = "{p\t%1, %0|rp\t%0, %1}";
6115 #else
6116 if (STACK_TOP_P (operands[0]))
6117 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6118 else
6119 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6120 #endif
6121 break;
6122 }
6123
6124 if (STACK_TOP_P (operands[0]))
6125 {
6126 if (STACK_TOP_P (operands[1]))
6127 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6128 else
6129 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6130 break;
6131 }
6132 else if (STACK_TOP_P (operands[1]))
6133 {
6134 #if SYSV386_COMPAT
6135 p = "{\t%1, %0|r\t%0, %1}";
6136 #else
6137 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6138 #endif
6139 }
6140 else
6141 {
6142 #if SYSV386_COMPAT
6143 p = "{r\t%2, %0|\t%0, %2}";
6144 #else
6145 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6146 #endif
6147 }
6148 break;
6149
6150 default:
6151 abort ();
6152 }
6153
6154 strcat (buf, p);
6155 return buf;
6156 }
6157
6158 /* Output code to initialize control word copies used by
6159 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6160 is set to control word rounding downwards. */
6161 void
6162 emit_i387_cw_initialization (normal, round_down)
6163 rtx normal, round_down;
6164 {
6165 rtx reg = gen_reg_rtx (HImode);
6166
6167 emit_insn (gen_x86_fnstcw_1 (normal));
6168 emit_move_insn (reg, normal);
6169 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6170 && !TARGET_64BIT)
6171 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6172 else
6173 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6174 emit_move_insn (round_down, reg);
6175 }
6176
6177 /* Output code for INSN to convert a float to a signed int. OPERANDS
6178 are the insn operands. The output may be [HSD]Imode and the input
6179 operand may be [SDX]Fmode. */
6180
6181 const char *
6182 output_fix_trunc (insn, operands)
6183 rtx insn;
6184 rtx *operands;
6185 {
6186 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6187 int dimode_p = GET_MODE (operands[0]) == DImode;
6188
6189 /* Jump through a hoop or two for DImode, since the hardware has no
6190 non-popping instruction. We used to do this a different way, but
6191 that was somewhat fragile and broke with post-reload splitters. */
6192 if (dimode_p && !stack_top_dies)
6193 output_asm_insn ("fld\t%y1", operands);
6194
6195 if (!STACK_TOP_P (operands[1]))
6196 abort ();
6197
6198 if (GET_CODE (operands[0]) != MEM)
6199 abort ();
6200
6201 output_asm_insn ("fldcw\t%3", operands);
6202 if (stack_top_dies || dimode_p)
6203 output_asm_insn ("fistp%z0\t%0", operands);
6204 else
6205 output_asm_insn ("fist%z0\t%0", operands);
6206 output_asm_insn ("fldcw\t%2", operands);
6207
6208 return "";
6209 }
6210
6211 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6212 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6213 when fucom should be used. */
6214
6215 const char *
6216 output_fp_compare (insn, operands, eflags_p, unordered_p)
6217 rtx insn;
6218 rtx *operands;
6219 int eflags_p, unordered_p;
6220 {
6221 int stack_top_dies;
6222 rtx cmp_op0 = operands[0];
6223 rtx cmp_op1 = operands[1];
6224 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6225
6226 if (eflags_p == 2)
6227 {
6228 cmp_op0 = cmp_op1;
6229 cmp_op1 = operands[2];
6230 }
6231 if (is_sse)
6232 {
6233 if (GET_MODE (operands[0]) == SFmode)
6234 if (unordered_p)
6235 return "ucomiss\t{%1, %0|%0, %1}";
6236 else
6237 return "comiss\t{%1, %0|%0, %y}";
6238 else
6239 if (unordered_p)
6240 return "ucomisd\t{%1, %0|%0, %1}";
6241 else
6242 return "comisd\t{%1, %0|%0, %y}";
6243 }
6244
6245 if (! STACK_TOP_P (cmp_op0))
6246 abort ();
6247
6248 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6249
6250 if (STACK_REG_P (cmp_op1)
6251 && stack_top_dies
6252 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6253 && REGNO (cmp_op1) != FIRST_STACK_REG)
6254 {
6255 /* If both the top of the 387 stack dies, and the other operand
6256 is also a stack register that dies, then this must be a
6257 `fcompp' float compare */
6258
6259 if (eflags_p == 1)
6260 {
6261 /* There is no double popping fcomi variant. Fortunately,
6262 eflags is immune from the fstp's cc clobbering. */
6263 if (unordered_p)
6264 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6265 else
6266 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6267 return "fstp\t%y0";
6268 }
6269 else
6270 {
6271 if (eflags_p == 2)
6272 {
6273 if (unordered_p)
6274 return "fucompp\n\tfnstsw\t%0";
6275 else
6276 return "fcompp\n\tfnstsw\t%0";
6277 }
6278 else
6279 {
6280 if (unordered_p)
6281 return "fucompp";
6282 else
6283 return "fcompp";
6284 }
6285 }
6286 }
6287 else
6288 {
6289 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6290
6291 static const char * const alt[24] =
6292 {
6293 "fcom%z1\t%y1",
6294 "fcomp%z1\t%y1",
6295 "fucom%z1\t%y1",
6296 "fucomp%z1\t%y1",
6297
6298 "ficom%z1\t%y1",
6299 "ficomp%z1\t%y1",
6300 NULL,
6301 NULL,
6302
6303 "fcomi\t{%y1, %0|%0, %y1}",
6304 "fcomip\t{%y1, %0|%0, %y1}",
6305 "fucomi\t{%y1, %0|%0, %y1}",
6306 "fucomip\t{%y1, %0|%0, %y1}",
6307
6308 NULL,
6309 NULL,
6310 NULL,
6311 NULL,
6312
6313 "fcom%z2\t%y2\n\tfnstsw\t%0",
6314 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6315 "fucom%z2\t%y2\n\tfnstsw\t%0",
6316 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6317
6318 "ficom%z2\t%y2\n\tfnstsw\t%0",
6319 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6320 NULL,
6321 NULL
6322 };
6323
6324 int mask;
6325 const char *ret;
6326
6327 mask = eflags_p << 3;
6328 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6329 mask |= unordered_p << 1;
6330 mask |= stack_top_dies;
6331
6332 if (mask >= 24)
6333 abort ();
6334 ret = alt[mask];
6335 if (ret == NULL)
6336 abort ();
6337
6338 return ret;
6339 }
6340 }
6341
6342 /* Output assembler code to FILE to initialize basic-block profiling.
6343
6344 If profile_block_flag == 2
6345
6346 Output code to call the subroutine `__bb_init_trace_func'
6347 and pass two parameters to it. The first parameter is
6348 the address of a block allocated in the object module.
6349 The second parameter is the number of the first basic block
6350 of the function.
6351
6352 The name of the block is a local symbol made with this statement:
6353
6354 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6355
6356 Of course, since you are writing the definition of
6357 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6358 can take a short cut in the definition of this macro and use the
6359 name that you know will result.
6360
6361 The number of the first basic block of the function is
6362 passed to the macro in BLOCK_OR_LABEL.
6363
6364 If described in a virtual assembler language the code to be
6365 output looks like:
6366
6367 parameter1 <- LPBX0
6368 parameter2 <- BLOCK_OR_LABEL
6369 call __bb_init_trace_func
6370
6371 else if profile_block_flag != 0
6372
6373 Output code to call the subroutine `__bb_init_func'
6374 and pass one single parameter to it, which is the same
6375 as the first parameter to `__bb_init_trace_func'.
6376
6377 The first word of this parameter is a flag which will be nonzero if
6378 the object module has already been initialized. So test this word
6379 first, and do not call `__bb_init_func' if the flag is nonzero.
6380 Note: When profile_block_flag == 2 the test need not be done
6381 but `__bb_init_trace_func' *must* be called.
6382
6383 BLOCK_OR_LABEL may be used to generate a label number as a
6384 branch destination in case `__bb_init_func' will not be called.
6385
6386 If described in a virtual assembler language the code to be
6387 output looks like:
6388
6389 cmp (LPBX0),0
6390 jne local_label
6391 parameter1 <- LPBX0
6392 call __bb_init_func
6393 local_label:
6394 */
6395
6396 void
6397 ix86_output_function_block_profiler (file, block_or_label)
6398 FILE *file;
6399 int block_or_label;
6400 {
6401 static int num_func = 0;
6402 rtx xops[8];
6403 char block_table[80], false_label[80];
6404
6405 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6406
6407 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6408 xops[5] = stack_pointer_rtx;
6409 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6410
6411 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6412
6413 switch (profile_block_flag)
6414 {
6415 case 2:
6416 xops[2] = GEN_INT (block_or_label);
6417 xops[3] = gen_rtx_MEM (Pmode,
6418 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
6419 xops[6] = GEN_INT (8);
6420
6421 output_asm_insn ("push{l}\t%2", xops);
6422 if (!flag_pic)
6423 output_asm_insn ("push{l}\t%1", xops);
6424 else
6425 {
6426 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6427 output_asm_insn ("push{l}\t%7", xops);
6428 }
6429 output_asm_insn ("call\t%P3", xops);
6430 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6431 break;
6432
6433 default:
6434 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
6435
6436 xops[0] = const0_rtx;
6437 xops[2] = gen_rtx_MEM (Pmode,
6438 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
6439 xops[3] = gen_rtx_MEM (Pmode,
6440 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
6441 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
6442 xops[6] = GEN_INT (4);
6443
6444 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
6445
6446 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
6447 output_asm_insn ("jne\t%2", xops);
6448
6449 if (!flag_pic)
6450 output_asm_insn ("push{l}\t%1", xops);
6451 else
6452 {
6453 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
6454 output_asm_insn ("push{l}\t%7", xops);
6455 }
6456 output_asm_insn ("call\t%P3", xops);
6457 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
6458 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
6459 num_func++;
6460 break;
6461 }
6462 }
6463
6464 /* Output assembler code to FILE to increment a counter associated
6465 with basic block number BLOCKNO.
6466
6467 If profile_block_flag == 2
6468
6469 Output code to initialize the global structure `__bb' and
6470 call the function `__bb_trace_func' which will increment the
6471 counter.
6472
6473 `__bb' consists of two words. In the first word the number
6474 of the basic block has to be stored. In the second word
6475 the address of a block allocated in the object module
6476 has to be stored.
6477
6478 The basic block number is given by BLOCKNO.
6479
6480 The address of the block is given by the label created with
6481
6482 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6483
6484 by FUNCTION_BLOCK_PROFILER.
6485
6486 Of course, since you are writing the definition of
6487 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6488 can take a short cut in the definition of this macro and use the
6489 name that you know will result.
6490
6491 If described in a virtual assembler language the code to be
6492 output looks like:
6493
6494 move BLOCKNO -> (__bb)
6495 move LPBX0 -> (__bb+4)
6496 call __bb_trace_func
6497
6498 Note that function `__bb_trace_func' must not change the
6499 machine state, especially the flag register. To grant
6500 this, you must output code to save and restore registers
6501 either in this macro or in the macros MACHINE_STATE_SAVE
6502 and MACHINE_STATE_RESTORE. The last two macros will be
6503 used in the function `__bb_trace_func', so you must make
6504 sure that the function prologue does not change any
6505 register prior to saving it with MACHINE_STATE_SAVE.
6506
6507 else if profile_block_flag != 0
6508
6509 Output code to increment the counter directly.
6510 Basic blocks are numbered separately from zero within each
6511 compiled object module. The count associated with block number
6512 BLOCKNO is at index BLOCKNO in an array of words; the name of
6513 this array is a local symbol made with this statement:
6514
6515 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
6516
6517 Of course, since you are writing the definition of
6518 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6519 can take a short cut in the definition of this macro and use the
6520 name that you know will result.
6521
6522 If described in a virtual assembler language the code to be
6523 output looks like:
6524
6525 inc (LPBX2+4*BLOCKNO)
6526 */
6527
6528 void
6529 ix86_output_block_profiler (file, blockno)
6530 FILE *file ATTRIBUTE_UNUSED;
6531 int blockno;
6532 {
6533 rtx xops[8], cnt_rtx;
6534 char counts[80];
6535 char *block_table = counts;
6536
6537 switch (profile_block_flag)
6538 {
6539 case 2:
6540 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
6541
6542 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
6543 xops[2] = GEN_INT (blockno);
6544 xops[3] = gen_rtx_MEM (Pmode,
6545 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
6546 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
6547 xops[5] = plus_constant (xops[4], 4);
6548 xops[0] = gen_rtx_MEM (SImode, xops[4]);
6549 xops[6] = gen_rtx_MEM (SImode, xops[5]);
6550
6551 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
6552
6553 output_asm_insn ("pushf", xops);
6554 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6555 if (flag_pic)
6556 {
6557 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
6558 output_asm_insn ("push{l}\t%7", xops);
6559 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
6560 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
6561 output_asm_insn ("pop{l}\t%7", xops);
6562 }
6563 else
6564 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
6565 output_asm_insn ("call\t%P3", xops);
6566 output_asm_insn ("popf", xops);
6567
6568 break;
6569
6570 default:
6571 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
6572 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
6573 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
6574
6575 if (blockno)
6576 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
6577
6578 if (flag_pic)
6579 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
6580
6581 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
6582 output_asm_insn ("inc{l}\t%0", xops);
6583
6584 break;
6585 }
6586 }
6587 \f
6588 void
6589 ix86_expand_move (mode, operands)
6590 enum machine_mode mode;
6591 rtx operands[];
6592 {
6593 int strict = (reload_in_progress || reload_completed);
6594 rtx insn;
6595
6596 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6597 {
6598 /* Emit insns to move operands[1] into operands[0]. */
6599
6600 if (GET_CODE (operands[0]) == MEM)
6601 operands[1] = force_reg (Pmode, operands[1]);
6602 else
6603 {
6604 rtx temp = operands[0];
6605 if (GET_CODE (temp) != REG)
6606 temp = gen_reg_rtx (Pmode);
6607 temp = legitimize_pic_address (operands[1], temp);
6608 if (temp == operands[0])
6609 return;
6610 operands[1] = temp;
6611 }
6612 }
6613 else
6614 {
6615 if (GET_CODE (operands[0]) == MEM
6616 && (GET_MODE (operands[0]) == QImode
6617 || !push_operand (operands[0], mode))
6618 && GET_CODE (operands[1]) == MEM)
6619 operands[1] = force_reg (mode, operands[1]);
6620
6621 if (push_operand (operands[0], mode)
6622 && ! general_no_elim_operand (operands[1], mode))
6623 operands[1] = copy_to_mode_reg (mode, operands[1]);
6624
6625 if (FLOAT_MODE_P (mode))
6626 {
6627 /* If we are loading a floating point constant to a register,
6628 force the value to memory now, since we'll get better code
6629 out the back end. */
6630
6631 if (strict)
6632 ;
6633 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6634 && register_operand (operands[0], mode))
6635 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6636 }
6637 }
6638
6639 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6640
6641 emit_insn (insn);
6642 }
6643
6644 /* Attempt to expand a binary operator. Make the expansion closer to the
6645 actual machine, then just general_operand, which will allow 3 separate
6646 memory references (one output, two input) in a single insn. */
6647
6648 void
6649 ix86_expand_binary_operator (code, mode, operands)
6650 enum rtx_code code;
6651 enum machine_mode mode;
6652 rtx operands[];
6653 {
6654 int matching_memory;
6655 rtx src1, src2, dst, op, clob;
6656
6657 dst = operands[0];
6658 src1 = operands[1];
6659 src2 = operands[2];
6660
6661 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6662 if (GET_RTX_CLASS (code) == 'c'
6663 && (rtx_equal_p (dst, src2)
6664 || immediate_operand (src1, mode)))
6665 {
6666 rtx temp = src1;
6667 src1 = src2;
6668 src2 = temp;
6669 }
6670
6671 /* If the destination is memory, and we do not have matching source
6672 operands, do things in registers. */
6673 matching_memory = 0;
6674 if (GET_CODE (dst) == MEM)
6675 {
6676 if (rtx_equal_p (dst, src1))
6677 matching_memory = 1;
6678 else if (GET_RTX_CLASS (code) == 'c'
6679 && rtx_equal_p (dst, src2))
6680 matching_memory = 2;
6681 else
6682 dst = gen_reg_rtx (mode);
6683 }
6684
6685 /* Both source operands cannot be in memory. */
6686 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6687 {
6688 if (matching_memory != 2)
6689 src2 = force_reg (mode, src2);
6690 else
6691 src1 = force_reg (mode, src1);
6692 }
6693
6694 /* If the operation is not commutable, source 1 cannot be a constant
6695 or non-matching memory. */
6696 if ((CONSTANT_P (src1)
6697 || (!matching_memory && GET_CODE (src1) == MEM))
6698 && GET_RTX_CLASS (code) != 'c')
6699 src1 = force_reg (mode, src1);
6700
6701 /* If optimizing, copy to regs to improve CSE */
6702 if (optimize && ! no_new_pseudos)
6703 {
6704 if (GET_CODE (dst) == MEM)
6705 dst = gen_reg_rtx (mode);
6706 if (GET_CODE (src1) == MEM)
6707 src1 = force_reg (mode, src1);
6708 if (GET_CODE (src2) == MEM)
6709 src2 = force_reg (mode, src2);
6710 }
6711
6712 /* Emit the instruction. */
6713
6714 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6715 if (reload_in_progress)
6716 {
6717 /* Reload doesn't know about the flags register, and doesn't know that
6718 it doesn't want to clobber it. We can only do this with PLUS. */
6719 if (code != PLUS)
6720 abort ();
6721 emit_insn (op);
6722 }
6723 else
6724 {
6725 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6726 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6727 }
6728
6729 /* Fix up the destination if needed. */
6730 if (dst != operands[0])
6731 emit_move_insn (operands[0], dst);
6732 }
6733
6734 /* Return TRUE or FALSE depending on whether the binary operator meets the
6735 appropriate constraints. */
6736
6737 int
6738 ix86_binary_operator_ok (code, mode, operands)
6739 enum rtx_code code;
6740 enum machine_mode mode ATTRIBUTE_UNUSED;
6741 rtx operands[3];
6742 {
6743 /* Both source operands cannot be in memory. */
6744 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6745 return 0;
6746 /* If the operation is not commutable, source 1 cannot be a constant. */
6747 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6748 return 0;
6749 /* If the destination is memory, we must have a matching source operand. */
6750 if (GET_CODE (operands[0]) == MEM
6751 && ! (rtx_equal_p (operands[0], operands[1])
6752 || (GET_RTX_CLASS (code) == 'c'
6753 && rtx_equal_p (operands[0], operands[2]))))
6754 return 0;
6755 /* If the operation is not commutable and the source 1 is memory, we must
6756 have a matching destionation. */
6757 if (GET_CODE (operands[1]) == MEM
6758 && GET_RTX_CLASS (code) != 'c'
6759 && ! rtx_equal_p (operands[0], operands[1]))
6760 return 0;
6761 return 1;
6762 }
6763
6764 /* Attempt to expand a unary operator. Make the expansion closer to the
6765 actual machine, then just general_operand, which will allow 2 separate
6766 memory references (one output, one input) in a single insn. */
6767
6768 void
6769 ix86_expand_unary_operator (code, mode, operands)
6770 enum rtx_code code;
6771 enum machine_mode mode;
6772 rtx operands[];
6773 {
6774 int matching_memory;
6775 rtx src, dst, op, clob;
6776
6777 dst = operands[0];
6778 src = operands[1];
6779
6780 /* If the destination is memory, and we do not have matching source
6781 operands, do things in registers. */
6782 matching_memory = 0;
6783 if (GET_CODE (dst) == MEM)
6784 {
6785 if (rtx_equal_p (dst, src))
6786 matching_memory = 1;
6787 else
6788 dst = gen_reg_rtx (mode);
6789 }
6790
6791 /* When source operand is memory, destination must match. */
6792 if (!matching_memory && GET_CODE (src) == MEM)
6793 src = force_reg (mode, src);
6794
6795 /* If optimizing, copy to regs to improve CSE */
6796 if (optimize && ! no_new_pseudos)
6797 {
6798 if (GET_CODE (dst) == MEM)
6799 dst = gen_reg_rtx (mode);
6800 if (GET_CODE (src) == MEM)
6801 src = force_reg (mode, src);
6802 }
6803
6804 /* Emit the instruction. */
6805
6806 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6807 if (reload_in_progress || code == NOT)
6808 {
6809 /* Reload doesn't know about the flags register, and doesn't know that
6810 it doesn't want to clobber it. */
6811 if (code != NOT)
6812 abort ();
6813 emit_insn (op);
6814 }
6815 else
6816 {
6817 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6818 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6819 }
6820
6821 /* Fix up the destination if needed. */
6822 if (dst != operands[0])
6823 emit_move_insn (operands[0], dst);
6824 }
6825
6826 /* Return TRUE or FALSE depending on whether the unary operator meets the
6827 appropriate constraints. */
6828
6829 int
6830 ix86_unary_operator_ok (code, mode, operands)
6831 enum rtx_code code ATTRIBUTE_UNUSED;
6832 enum machine_mode mode ATTRIBUTE_UNUSED;
6833 rtx operands[2] ATTRIBUTE_UNUSED;
6834 {
6835 /* If one of operands is memory, source and destination must match. */
6836 if ((GET_CODE (operands[0]) == MEM
6837 || GET_CODE (operands[1]) == MEM)
6838 && ! rtx_equal_p (operands[0], operands[1]))
6839 return FALSE;
6840 return TRUE;
6841 }
6842
6843 /* Return TRUE or FALSE depending on whether the first SET in INSN
6844 has source and destination with matching CC modes, and that the
6845 CC mode is at least as constrained as REQ_MODE. */
6846
6847 int
6848 ix86_match_ccmode (insn, req_mode)
6849 rtx insn;
6850 enum machine_mode req_mode;
6851 {
6852 rtx set;
6853 enum machine_mode set_mode;
6854
6855 set = PATTERN (insn);
6856 if (GET_CODE (set) == PARALLEL)
6857 set = XVECEXP (set, 0, 0);
6858 if (GET_CODE (set) != SET)
6859 abort ();
6860 if (GET_CODE (SET_SRC (set)) != COMPARE)
6861 abort ();
6862
6863 set_mode = GET_MODE (SET_DEST (set));
6864 switch (set_mode)
6865 {
6866 case CCNOmode:
6867 if (req_mode != CCNOmode
6868 && (req_mode != CCmode
6869 || XEXP (SET_SRC (set), 1) != const0_rtx))
6870 return 0;
6871 break;
6872 case CCmode:
6873 if (req_mode == CCGCmode)
6874 return 0;
6875 /* FALLTHRU */
6876 case CCGCmode:
6877 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6878 return 0;
6879 /* FALLTHRU */
6880 case CCGOCmode:
6881 if (req_mode == CCZmode)
6882 return 0;
6883 /* FALLTHRU */
6884 case CCZmode:
6885 break;
6886
6887 default:
6888 abort ();
6889 }
6890
6891 return (GET_MODE (SET_SRC (set)) == set_mode);
6892 }
6893
6894 /* Generate insn patterns to do an integer compare of OPERANDS. */
6895
6896 static rtx
6897 ix86_expand_int_compare (code, op0, op1)
6898 enum rtx_code code;
6899 rtx op0, op1;
6900 {
6901 enum machine_mode cmpmode;
6902 rtx tmp, flags;
6903
6904 cmpmode = SELECT_CC_MODE (code, op0, op1);
6905 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6906
6907 /* This is very simple, but making the interface the same as in the
6908 FP case makes the rest of the code easier. */
6909 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6910 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6911
6912 /* Return the test that should be put into the flags user, i.e.
6913 the bcc, scc, or cmov instruction. */
6914 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6915 }
6916
6917 /* Figure out whether to use ordered or unordered fp comparisons.
6918 Return the appropriate mode to use. */
6919
6920 enum machine_mode
6921 ix86_fp_compare_mode (code)
6922 enum rtx_code code ATTRIBUTE_UNUSED;
6923 {
6924 /* ??? In order to make all comparisons reversible, we do all comparisons
6925 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6926 all forms trapping and nontrapping comparisons, we can make inequality
6927 comparisons trapping again, since it results in better code when using
6928 FCOM based compares. */
6929 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
6930 }
6931
6932 enum machine_mode
6933 ix86_cc_mode (code, op0, op1)
6934 enum rtx_code code;
6935 rtx op0, op1;
6936 {
6937 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6938 return ix86_fp_compare_mode (code);
6939 switch (code)
6940 {
6941 /* Only zero flag is needed. */
6942 case EQ: /* ZF=0 */
6943 case NE: /* ZF!=0 */
6944 return CCZmode;
6945 /* Codes needing carry flag. */
6946 case GEU: /* CF=0 */
6947 case GTU: /* CF=0 & ZF=0 */
6948 case LTU: /* CF=1 */
6949 case LEU: /* CF=1 | ZF=1 */
6950 return CCmode;
6951 /* Codes possibly doable only with sign flag when
6952 comparing against zero. */
6953 case GE: /* SF=OF or SF=0 */
6954 case LT: /* SF<>OF or SF=1 */
6955 if (op1 == const0_rtx)
6956 return CCGOCmode;
6957 else
6958 /* For other cases Carry flag is not required. */
6959 return CCGCmode;
6960 /* Codes doable only with sign flag when comparing
6961 against zero, but we miss jump instruction for it
6962 so we need to use relational tests agains overflow
6963 that thus needs to be zero. */
6964 case GT: /* ZF=0 & SF=OF */
6965 case LE: /* ZF=1 | SF<>OF */
6966 if (op1 == const0_rtx)
6967 return CCNOmode;
6968 else
6969 return CCGCmode;
6970 default:
6971 abort ();
6972 }
6973 }
6974
6975 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6976
6977 int
6978 ix86_use_fcomi_compare (code)
6979 enum rtx_code code ATTRIBUTE_UNUSED;
6980 {
6981 enum rtx_code swapped_code = swap_condition (code);
6982 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
6983 || (ix86_fp_comparison_cost (swapped_code)
6984 == ix86_fp_comparison_fcomi_cost (swapped_code)));
6985 }
6986
6987 /* Swap, force into registers, or otherwise massage the two operands
6988 to a fp comparison. The operands are updated in place; the new
6989 comparsion code is returned. */
6990
6991 static enum rtx_code
6992 ix86_prepare_fp_compare_args (code, pop0, pop1)
6993 enum rtx_code code;
6994 rtx *pop0, *pop1;
6995 {
6996 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
6997 rtx op0 = *pop0, op1 = *pop1;
6998 enum machine_mode op_mode = GET_MODE (op0);
6999 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7000
7001 /* All of the unordered compare instructions only work on registers.
7002 The same is true of the XFmode compare instructions. The same is
7003 true of the fcomi compare instructions. */
7004
7005 if (!is_sse
7006 && (fpcmp_mode == CCFPUmode
7007 || op_mode == XFmode
7008 || op_mode == TFmode
7009 || ix86_use_fcomi_compare (code)))
7010 {
7011 op0 = force_reg (op_mode, op0);
7012 op1 = force_reg (op_mode, op1);
7013 }
7014 else
7015 {
7016 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7017 things around if they appear profitable, otherwise force op0
7018 into a register. */
7019
7020 if (standard_80387_constant_p (op0) == 0
7021 || (GET_CODE (op0) == MEM
7022 && ! (standard_80387_constant_p (op1) == 0
7023 || GET_CODE (op1) == MEM)))
7024 {
7025 rtx tmp;
7026 tmp = op0, op0 = op1, op1 = tmp;
7027 code = swap_condition (code);
7028 }
7029
7030 if (GET_CODE (op0) != REG)
7031 op0 = force_reg (op_mode, op0);
7032
7033 if (CONSTANT_P (op1))
7034 {
7035 if (standard_80387_constant_p (op1))
7036 op1 = force_reg (op_mode, op1);
7037 else
7038 op1 = validize_mem (force_const_mem (op_mode, op1));
7039 }
7040 }
7041
7042 /* Try to rearrange the comparison to make it cheaper. */
7043 if (ix86_fp_comparison_cost (code)
7044 > ix86_fp_comparison_cost (swap_condition (code))
7045 && (GET_CODE (op0) == REG || !reload_completed))
7046 {
7047 rtx tmp;
7048 tmp = op0, op0 = op1, op1 = tmp;
7049 code = swap_condition (code);
7050 if (GET_CODE (op0) != REG)
7051 op0 = force_reg (op_mode, op0);
7052 }
7053
7054 *pop0 = op0;
7055 *pop1 = op1;
7056 return code;
7057 }
7058
7059 /* Convert comparison codes we use to represent FP comparison to integer
7060 code that will result in proper branch. Return UNKNOWN if no such code
7061 is available. */
7062 static enum rtx_code
7063 ix86_fp_compare_code_to_integer (code)
7064 enum rtx_code code;
7065 {
7066 switch (code)
7067 {
7068 case GT:
7069 return GTU;
7070 case GE:
7071 return GEU;
7072 case ORDERED:
7073 case UNORDERED:
7074 return code;
7075 break;
7076 case UNEQ:
7077 return EQ;
7078 break;
7079 case UNLT:
7080 return LTU;
7081 break;
7082 case UNLE:
7083 return LEU;
7084 break;
7085 case LTGT:
7086 return NE;
7087 break;
7088 default:
7089 return UNKNOWN;
7090 }
7091 }
7092
7093 /* Split comparison code CODE into comparisons we can do using branch
7094 instructions. BYPASS_CODE is comparison code for branch that will
7095 branch around FIRST_CODE and SECOND_CODE. If some of branches
7096 is not required, set value to NIL.
7097 We never require more than two branches. */
7098 static void
7099 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7100 enum rtx_code code, *bypass_code, *first_code, *second_code;
7101 {
7102 *first_code = code;
7103 *bypass_code = NIL;
7104 *second_code = NIL;
7105
7106 /* The fcomi comparison sets flags as follows:
7107
7108 cmp ZF PF CF
7109 > 0 0 0
7110 < 0 0 1
7111 = 1 0 0
7112 un 1 1 1 */
7113
7114 switch (code)
7115 {
7116 case GT: /* GTU - CF=0 & ZF=0 */
7117 case GE: /* GEU - CF=0 */
7118 case ORDERED: /* PF=0 */
7119 case UNORDERED: /* PF=1 */
7120 case UNEQ: /* EQ - ZF=1 */
7121 case UNLT: /* LTU - CF=1 */
7122 case UNLE: /* LEU - CF=1 | ZF=1 */
7123 case LTGT: /* EQ - ZF=0 */
7124 break;
7125 case LT: /* LTU - CF=1 - fails on unordered */
7126 *first_code = UNLT;
7127 *bypass_code = UNORDERED;
7128 break;
7129 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7130 *first_code = UNLE;
7131 *bypass_code = UNORDERED;
7132 break;
7133 case EQ: /* EQ - ZF=1 - fails on unordered */
7134 *first_code = UNEQ;
7135 *bypass_code = UNORDERED;
7136 break;
7137 case NE: /* NE - ZF=0 - fails on unordered */
7138 *first_code = LTGT;
7139 *second_code = UNORDERED;
7140 break;
7141 case UNGE: /* GEU - CF=0 - fails on unordered */
7142 *first_code = GE;
7143 *second_code = UNORDERED;
7144 break;
7145 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7146 *first_code = GT;
7147 *second_code = UNORDERED;
7148 break;
7149 default:
7150 abort ();
7151 }
7152 if (!TARGET_IEEE_FP)
7153 {
7154 *second_code = NIL;
7155 *bypass_code = NIL;
7156 }
7157 }
7158
7159 /* Return cost of comparison done fcom + arithmetics operations on AX.
7160 All following functions do use number of instructions as an cost metrics.
7161 In future this should be tweaked to compute bytes for optimize_size and
7162 take into account performance of various instructions on various CPUs. */
7163 static int
7164 ix86_fp_comparison_arithmetics_cost (code)
7165 enum rtx_code code;
7166 {
7167 if (!TARGET_IEEE_FP)
7168 return 4;
7169 /* The cost of code output by ix86_expand_fp_compare. */
7170 switch (code)
7171 {
7172 case UNLE:
7173 case UNLT:
7174 case LTGT:
7175 case GT:
7176 case GE:
7177 case UNORDERED:
7178 case ORDERED:
7179 case UNEQ:
7180 return 4;
7181 break;
7182 case LT:
7183 case NE:
7184 case EQ:
7185 case UNGE:
7186 return 5;
7187 break;
7188 case LE:
7189 case UNGT:
7190 return 6;
7191 break;
7192 default:
7193 abort ();
7194 }
7195 }
7196
7197 /* Return cost of comparison done using fcomi operation.
7198 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7199 static int
7200 ix86_fp_comparison_fcomi_cost (code)
7201 enum rtx_code code;
7202 {
7203 enum rtx_code bypass_code, first_code, second_code;
7204 /* Return arbitarily high cost when instruction is not supported - this
7205 prevents gcc from using it. */
7206 if (!TARGET_CMOVE)
7207 return 1024;
7208 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7209 return (bypass_code != NIL || second_code != NIL) + 2;
7210 }
7211
7212 /* Return cost of comparison done using sahf operation.
7213 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7214 static int
7215 ix86_fp_comparison_sahf_cost (code)
7216 enum rtx_code code;
7217 {
7218 enum rtx_code bypass_code, first_code, second_code;
7219 /* Return arbitarily high cost when instruction is not preferred - this
7220 avoids gcc from using it. */
7221 if (!TARGET_USE_SAHF && !optimize_size)
7222 return 1024;
7223 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7224 return (bypass_code != NIL || second_code != NIL) + 3;
7225 }
7226
7227 /* Compute cost of the comparison done using any method.
7228 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7229 static int
7230 ix86_fp_comparison_cost (code)
7231 enum rtx_code code;
7232 {
7233 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7234 int min;
7235
7236 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7237 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7238
7239 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7240 if (min > sahf_cost)
7241 min = sahf_cost;
7242 if (min > fcomi_cost)
7243 min = fcomi_cost;
7244 return min;
7245 }
7246
7247 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7248
7249 static rtx
7250 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7251 enum rtx_code code;
7252 rtx op0, op1, scratch;
7253 rtx *second_test;
7254 rtx *bypass_test;
7255 {
7256 enum machine_mode fpcmp_mode, intcmp_mode;
7257 rtx tmp, tmp2;
7258 int cost = ix86_fp_comparison_cost (code);
7259 enum rtx_code bypass_code, first_code, second_code;
7260
7261 fpcmp_mode = ix86_fp_compare_mode (code);
7262 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7263
7264 if (second_test)
7265 *second_test = NULL_RTX;
7266 if (bypass_test)
7267 *bypass_test = NULL_RTX;
7268
7269 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7270
7271 /* Do fcomi/sahf based test when profitable. */
7272 if ((bypass_code == NIL || bypass_test)
7273 && (second_code == NIL || second_test)
7274 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7275 {
7276 if (TARGET_CMOVE)
7277 {
7278 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7279 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7280 tmp);
7281 emit_insn (tmp);
7282 }
7283 else
7284 {
7285 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7286 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7287 if (!scratch)
7288 scratch = gen_reg_rtx (HImode);
7289 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7290 emit_insn (gen_x86_sahf_1 (scratch));
7291 }
7292
7293 /* The FP codes work out to act like unsigned. */
7294 intcmp_mode = fpcmp_mode;
7295 code = first_code;
7296 if (bypass_code != NIL)
7297 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7298 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7299 const0_rtx);
7300 if (second_code != NIL)
7301 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7302 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7303 const0_rtx);
7304 }
7305 else
7306 {
7307 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7308 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7309 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7310 if (!scratch)
7311 scratch = gen_reg_rtx (HImode);
7312 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7313
7314 /* In the unordered case, we have to check C2 for NaN's, which
7315 doesn't happen to work out to anything nice combination-wise.
7316 So do some bit twiddling on the value we've got in AH to come
7317 up with an appropriate set of condition codes. */
7318
7319 intcmp_mode = CCNOmode;
7320 switch (code)
7321 {
7322 case GT:
7323 case UNGT:
7324 if (code == GT || !TARGET_IEEE_FP)
7325 {
7326 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7327 code = EQ;
7328 }
7329 else
7330 {
7331 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7332 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7333 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7334 intcmp_mode = CCmode;
7335 code = GEU;
7336 }
7337 break;
7338 case LT:
7339 case UNLT:
7340 if (code == LT && TARGET_IEEE_FP)
7341 {
7342 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7343 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7344 intcmp_mode = CCmode;
7345 code = EQ;
7346 }
7347 else
7348 {
7349 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7350 code = NE;
7351 }
7352 break;
7353 case GE:
7354 case UNGE:
7355 if (code == GE || !TARGET_IEEE_FP)
7356 {
7357 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7358 code = EQ;
7359 }
7360 else
7361 {
7362 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7363 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7364 GEN_INT (0x01)));
7365 code = NE;
7366 }
7367 break;
7368 case LE:
7369 case UNLE:
7370 if (code == LE && TARGET_IEEE_FP)
7371 {
7372 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7373 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7374 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7375 intcmp_mode = CCmode;
7376 code = LTU;
7377 }
7378 else
7379 {
7380 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7381 code = NE;
7382 }
7383 break;
7384 case EQ:
7385 case UNEQ:
7386 if (code == EQ && TARGET_IEEE_FP)
7387 {
7388 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7389 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7390 intcmp_mode = CCmode;
7391 code = EQ;
7392 }
7393 else
7394 {
7395 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7396 code = NE;
7397 break;
7398 }
7399 break;
7400 case NE:
7401 case LTGT:
7402 if (code == NE && TARGET_IEEE_FP)
7403 {
7404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7405 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7406 GEN_INT (0x40)));
7407 code = NE;
7408 }
7409 else
7410 {
7411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7412 code = EQ;
7413 }
7414 break;
7415
7416 case UNORDERED:
7417 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7418 code = NE;
7419 break;
7420 case ORDERED:
7421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7422 code = EQ;
7423 break;
7424
7425 default:
7426 abort ();
7427 }
7428 }
7429
7430 /* Return the test that should be put into the flags user, i.e.
7431 the bcc, scc, or cmov instruction. */
7432 return gen_rtx_fmt_ee (code, VOIDmode,
7433 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7434 const0_rtx);
7435 }
7436
7437 rtx
7438 ix86_expand_compare (code, second_test, bypass_test)
7439 enum rtx_code code;
7440 rtx *second_test, *bypass_test;
7441 {
7442 rtx op0, op1, ret;
7443 op0 = ix86_compare_op0;
7444 op1 = ix86_compare_op1;
7445
7446 if (second_test)
7447 *second_test = NULL_RTX;
7448 if (bypass_test)
7449 *bypass_test = NULL_RTX;
7450
7451 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7452 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7453 second_test, bypass_test);
7454 else
7455 ret = ix86_expand_int_compare (code, op0, op1);
7456
7457 return ret;
7458 }
7459
7460 /* Return true if the CODE will result in nontrivial jump sequence. */
7461 bool
7462 ix86_fp_jump_nontrivial_p (code)
7463 enum rtx_code code;
7464 {
7465 enum rtx_code bypass_code, first_code, second_code;
7466 if (!TARGET_CMOVE)
7467 return true;
7468 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7469 return bypass_code != NIL || second_code != NIL;
7470 }
7471
7472 void
7473 ix86_expand_branch (code, label)
7474 enum rtx_code code;
7475 rtx label;
7476 {
7477 rtx tmp;
7478
7479 switch (GET_MODE (ix86_compare_op0))
7480 {
7481 case QImode:
7482 case HImode:
7483 case SImode:
7484 simple:
7485 tmp = ix86_expand_compare (code, NULL, NULL);
7486 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7487 gen_rtx_LABEL_REF (VOIDmode, label),
7488 pc_rtx);
7489 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7490 return;
7491
7492 case SFmode:
7493 case DFmode:
7494 case XFmode:
7495 case TFmode:
7496 {
7497 rtvec vec;
7498 int use_fcomi;
7499 enum rtx_code bypass_code, first_code, second_code;
7500
7501 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7502 &ix86_compare_op1);
7503
7504 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7505
7506 /* Check whether we will use the natural sequence with one jump. If
7507 so, we can expand jump early. Otherwise delay expansion by
7508 creating compound insn to not confuse optimizers. */
7509 if (bypass_code == NIL && second_code == NIL
7510 && TARGET_CMOVE)
7511 {
7512 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7513 gen_rtx_LABEL_REF (VOIDmode, label),
7514 pc_rtx, NULL_RTX);
7515 }
7516 else
7517 {
7518 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7519 ix86_compare_op0, ix86_compare_op1);
7520 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7521 gen_rtx_LABEL_REF (VOIDmode, label),
7522 pc_rtx);
7523 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7524
7525 use_fcomi = ix86_use_fcomi_compare (code);
7526 vec = rtvec_alloc (3 + !use_fcomi);
7527 RTVEC_ELT (vec, 0) = tmp;
7528 RTVEC_ELT (vec, 1)
7529 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7530 RTVEC_ELT (vec, 2)
7531 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7532 if (! use_fcomi)
7533 RTVEC_ELT (vec, 3)
7534 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7535
7536 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7537 }
7538 return;
7539 }
7540
7541 case DImode:
7542 if (TARGET_64BIT)
7543 goto simple;
7544 /* Expand DImode branch into multiple compare+branch. */
7545 {
7546 rtx lo[2], hi[2], label2;
7547 enum rtx_code code1, code2, code3;
7548
7549 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7550 {
7551 tmp = ix86_compare_op0;
7552 ix86_compare_op0 = ix86_compare_op1;
7553 ix86_compare_op1 = tmp;
7554 code = swap_condition (code);
7555 }
7556 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7557 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7558
7559 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7560 avoid two branches. This costs one extra insn, so disable when
7561 optimizing for size. */
7562
7563 if ((code == EQ || code == NE)
7564 && (!optimize_size
7565 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7566 {
7567 rtx xor0, xor1;
7568
7569 xor1 = hi[0];
7570 if (hi[1] != const0_rtx)
7571 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7572 NULL_RTX, 0, OPTAB_WIDEN);
7573
7574 xor0 = lo[0];
7575 if (lo[1] != const0_rtx)
7576 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7577 NULL_RTX, 0, OPTAB_WIDEN);
7578
7579 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7580 NULL_RTX, 0, OPTAB_WIDEN);
7581
7582 ix86_compare_op0 = tmp;
7583 ix86_compare_op1 = const0_rtx;
7584 ix86_expand_branch (code, label);
7585 return;
7586 }
7587
7588 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7589 op1 is a constant and the low word is zero, then we can just
7590 examine the high word. */
7591
7592 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7593 switch (code)
7594 {
7595 case LT: case LTU: case GE: case GEU:
7596 ix86_compare_op0 = hi[0];
7597 ix86_compare_op1 = hi[1];
7598 ix86_expand_branch (code, label);
7599 return;
7600 default:
7601 break;
7602 }
7603
7604 /* Otherwise, we need two or three jumps. */
7605
7606 label2 = gen_label_rtx ();
7607
7608 code1 = code;
7609 code2 = swap_condition (code);
7610 code3 = unsigned_condition (code);
7611
7612 switch (code)
7613 {
7614 case LT: case GT: case LTU: case GTU:
7615 break;
7616
7617 case LE: code1 = LT; code2 = GT; break;
7618 case GE: code1 = GT; code2 = LT; break;
7619 case LEU: code1 = LTU; code2 = GTU; break;
7620 case GEU: code1 = GTU; code2 = LTU; break;
7621
7622 case EQ: code1 = NIL; code2 = NE; break;
7623 case NE: code2 = NIL; break;
7624
7625 default:
7626 abort ();
7627 }
7628
7629 /*
7630 * a < b =>
7631 * if (hi(a) < hi(b)) goto true;
7632 * if (hi(a) > hi(b)) goto false;
7633 * if (lo(a) < lo(b)) goto true;
7634 * false:
7635 */
7636
7637 ix86_compare_op0 = hi[0];
7638 ix86_compare_op1 = hi[1];
7639
7640 if (code1 != NIL)
7641 ix86_expand_branch (code1, label);
7642 if (code2 != NIL)
7643 ix86_expand_branch (code2, label2);
7644
7645 ix86_compare_op0 = lo[0];
7646 ix86_compare_op1 = lo[1];
7647 ix86_expand_branch (code3, label);
7648
7649 if (code2 != NIL)
7650 emit_label (label2);
7651 return;
7652 }
7653
7654 default:
7655 abort ();
7656 }
7657 }
7658
7659 /* Split branch based on floating point condition. */
7660 void
7661 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7662 enum rtx_code code;
7663 rtx op1, op2, target1, target2, tmp;
7664 {
7665 rtx second, bypass;
7666 rtx label = NULL_RTX;
7667 rtx condition;
7668 int bypass_probability = -1, second_probability = -1, probability = -1;
7669 rtx i;
7670
7671 if (target2 != pc_rtx)
7672 {
7673 rtx tmp = target2;
7674 code = reverse_condition_maybe_unordered (code);
7675 target2 = target1;
7676 target1 = tmp;
7677 }
7678
7679 condition = ix86_expand_fp_compare (code, op1, op2,
7680 tmp, &second, &bypass);
7681
7682 if (split_branch_probability >= 0)
7683 {
7684 /* Distribute the probabilities across the jumps.
7685 Assume the BYPASS and SECOND to be always test
7686 for UNORDERED. */
7687 probability = split_branch_probability;
7688
7689 /* Value of 1 is low enought to make no need for probability
7690 to be updated. Later we may run some experiments and see
7691 if unordered values are more frequent in practice. */
7692 if (bypass)
7693 bypass_probability = 1;
7694 if (second)
7695 second_probability = 1;
7696 }
7697 if (bypass != NULL_RTX)
7698 {
7699 label = gen_label_rtx ();
7700 i = emit_jump_insn (gen_rtx_SET
7701 (VOIDmode, pc_rtx,
7702 gen_rtx_IF_THEN_ELSE (VOIDmode,
7703 bypass,
7704 gen_rtx_LABEL_REF (VOIDmode,
7705 label),
7706 pc_rtx)));
7707 if (bypass_probability >= 0)
7708 REG_NOTES (i)
7709 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7710 GEN_INT (bypass_probability),
7711 REG_NOTES (i));
7712 }
7713 i = emit_jump_insn (gen_rtx_SET
7714 (VOIDmode, pc_rtx,
7715 gen_rtx_IF_THEN_ELSE (VOIDmode,
7716 condition, target1, target2)));
7717 if (probability >= 0)
7718 REG_NOTES (i)
7719 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7720 GEN_INT (probability),
7721 REG_NOTES (i));
7722 if (second != NULL_RTX)
7723 {
7724 i = emit_jump_insn (gen_rtx_SET
7725 (VOIDmode, pc_rtx,
7726 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7727 target2)));
7728 if (second_probability >= 0)
7729 REG_NOTES (i)
7730 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7731 GEN_INT (second_probability),
7732 REG_NOTES (i));
7733 }
7734 if (label != NULL_RTX)
7735 emit_label (label);
7736 }
7737
7738 int
7739 ix86_expand_setcc (code, dest)
7740 enum rtx_code code;
7741 rtx dest;
7742 {
7743 rtx ret, tmp, tmpreg;
7744 rtx second_test, bypass_test;
7745 int type;
7746
7747 if (GET_MODE (ix86_compare_op0) == DImode
7748 && !TARGET_64BIT)
7749 return 0; /* FAIL */
7750
7751 /* Three modes of generation:
7752 0 -- destination does not overlap compare sources:
7753 clear dest first, emit strict_low_part setcc.
7754 1 -- destination does overlap compare sources:
7755 emit subreg setcc, zero extend.
7756 2 -- destination is in QImode:
7757 emit setcc only.
7758
7759 We don't use mode 0 early in compilation because it confuses CSE.
7760 There are peepholes to turn mode 1 into mode 0 if things work out
7761 nicely after reload. */
7762
7763 type = cse_not_expected ? 0 : 1;
7764
7765 if (GET_MODE (dest) == QImode)
7766 type = 2;
7767 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
7768 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
7769 type = 1;
7770
7771 if (type == 0)
7772 emit_move_insn (dest, const0_rtx);
7773
7774 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7775 PUT_MODE (ret, QImode);
7776
7777 tmp = dest;
7778 tmpreg = dest;
7779 if (type == 0)
7780 {
7781 tmp = gen_lowpart (QImode, dest);
7782 tmpreg = tmp;
7783 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
7784 }
7785 else if (type == 1)
7786 {
7787 if (!cse_not_expected)
7788 tmp = gen_reg_rtx (QImode);
7789 else
7790 tmp = gen_lowpart (QImode, dest);
7791 tmpreg = tmp;
7792 }
7793
7794 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7795 if (bypass_test || second_test)
7796 {
7797 rtx test = second_test;
7798 int bypass = 0;
7799 rtx tmp2 = gen_reg_rtx (QImode);
7800 if (bypass_test)
7801 {
7802 if (second_test)
7803 abort();
7804 test = bypass_test;
7805 bypass = 1;
7806 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7807 }
7808 PUT_MODE (test, QImode);
7809 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7810
7811 if (bypass)
7812 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7813 else
7814 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7815 }
7816
7817 if (type == 1)
7818 {
7819 rtx clob;
7820
7821 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
7822 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
7823 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7824 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7825 emit_insn (tmp);
7826 }
7827
7828 return 1; /* DONE */
7829 }
7830
7831 int
7832 ix86_expand_int_movcc (operands)
7833 rtx operands[];
7834 {
7835 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7836 rtx compare_seq, compare_op;
7837 rtx second_test, bypass_test;
7838
7839 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7840 In case comparsion is done with immediate, we can convert it to LTU or
7841 GEU by altering the integer. */
7842
7843 if ((code == LEU || code == GTU)
7844 && GET_CODE (ix86_compare_op1) == CONST_INT
7845 && GET_MODE (operands[0]) != HImode
7846 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7847 && GET_CODE (operands[2]) == CONST_INT
7848 && GET_CODE (operands[3]) == CONST_INT)
7849 {
7850 if (code == LEU)
7851 code = LTU;
7852 else
7853 code = GEU;
7854 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7855 }
7856
7857 start_sequence ();
7858 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7859 compare_seq = gen_sequence ();
7860 end_sequence ();
7861
7862 compare_code = GET_CODE (compare_op);
7863
7864 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7865 HImode insns, we'd be swallowed in word prefix ops. */
7866
7867 if (GET_MODE (operands[0]) != HImode
7868 && (GET_MODE (operands[0]) != DImode || TARGET_64BIT)
7869 && GET_CODE (operands[2]) == CONST_INT
7870 && GET_CODE (operands[3]) == CONST_INT)
7871 {
7872 rtx out = operands[0];
7873 HOST_WIDE_INT ct = INTVAL (operands[2]);
7874 HOST_WIDE_INT cf = INTVAL (operands[3]);
7875 HOST_WIDE_INT diff;
7876
7877 if ((compare_code == LTU || compare_code == GEU)
7878 && !second_test && !bypass_test)
7879 {
7880
7881 /* Detect overlap between destination and compare sources. */
7882 rtx tmp = out;
7883
7884 /* To simplify rest of code, restrict to the GEU case. */
7885 if (compare_code == LTU)
7886 {
7887 int tmp = ct;
7888 ct = cf;
7889 cf = tmp;
7890 compare_code = reverse_condition (compare_code);
7891 code = reverse_condition (code);
7892 }
7893 diff = ct - cf;
7894
7895 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7896 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7897 tmp = gen_reg_rtx (GET_MODE (operands[0]));
7898
7899 emit_insn (compare_seq);
7900 if (GET_MODE (tmp) == DImode)
7901 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7902 else
7903 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7904
7905 if (diff == 1)
7906 {
7907 /*
7908 * cmpl op0,op1
7909 * sbbl dest,dest
7910 * [addl dest, ct]
7911 *
7912 * Size 5 - 8.
7913 */
7914 if (ct)
7915 {
7916 if (GET_MODE (tmp) == DImode)
7917 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (ct)));
7918 else
7919 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7920 }
7921 }
7922 else if (cf == -1)
7923 {
7924 /*
7925 * cmpl op0,op1
7926 * sbbl dest,dest
7927 * orl $ct, dest
7928 *
7929 * Size 8.
7930 */
7931 if (GET_MODE (tmp) == DImode)
7932 emit_insn (gen_iordi3 (tmp, tmp, GEN_INT (ct)));
7933 else
7934 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
7935 }
7936 else if (diff == -1 && ct)
7937 {
7938 /*
7939 * cmpl op0,op1
7940 * sbbl dest,dest
7941 * xorl $-1, dest
7942 * [addl dest, cf]
7943 *
7944 * Size 8 - 11.
7945 */
7946 if (GET_MODE (tmp) == DImode)
7947 {
7948 emit_insn (gen_one_cmpldi2 (tmp, tmp));
7949 if (cf)
7950 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (cf)));
7951 }
7952 else
7953 {
7954 emit_insn (gen_one_cmplsi2 (tmp, tmp));
7955 if (cf)
7956 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
7957 }
7958 }
7959 else
7960 {
7961 /*
7962 * cmpl op0,op1
7963 * sbbl dest,dest
7964 * andl cf - ct, dest
7965 * [addl dest, ct]
7966 *
7967 * Size 8 - 11.
7968 */
7969 if (GET_MODE (tmp) == DImode)
7970 {
7971 emit_insn (gen_anddi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7972 (cf - ct, DImode))));
7973 if (ct)
7974 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (ct)));
7975 }
7976 else
7977 {
7978 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7979 (cf - ct, SImode))));
7980 if (ct)
7981 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
7982 }
7983 }
7984
7985 if (tmp != out)
7986 emit_move_insn (out, tmp);
7987
7988 return 1; /* DONE */
7989 }
7990
7991 diff = ct - cf;
7992 if (diff < 0)
7993 {
7994 HOST_WIDE_INT tmp;
7995 tmp = ct, ct = cf, cf = tmp;
7996 diff = -diff;
7997 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7998 {
7999 /* We may be reversing unordered compare to normal compare, that
8000 is not valid in general (we may convert non-trapping condition
8001 to trapping one), however on i386 we currently emit all
8002 comparisons unordered. */
8003 compare_code = reverse_condition_maybe_unordered (compare_code);
8004 code = reverse_condition_maybe_unordered (code);
8005 }
8006 else
8007 {
8008 compare_code = reverse_condition (compare_code);
8009 code = reverse_condition (code);
8010 }
8011 }
8012 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
8013 || diff == 3 || diff == 5 || diff == 9)
8014 {
8015 /*
8016 * xorl dest,dest
8017 * cmpl op1,op2
8018 * setcc dest
8019 * lea cf(dest*(ct-cf)),dest
8020 *
8021 * Size 14.
8022 *
8023 * This also catches the degenerate setcc-only case.
8024 */
8025
8026 rtx tmp;
8027 int nops;
8028
8029 out = emit_store_flag (out, code, ix86_compare_op0,
8030 ix86_compare_op1, VOIDmode, 0, 1);
8031
8032 nops = 0;
8033 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8034 done in proper mode to match. */
8035 if (diff == 1)
8036 tmp = out;
8037 else
8038 {
8039 rtx out1;
8040 out1 = out;
8041 tmp = gen_rtx_MULT (GET_MODE (out), out1, GEN_INT (diff & ~1));
8042 nops++;
8043 if (diff & 1)
8044 {
8045 tmp = gen_rtx_PLUS (GET_MODE (out), tmp, out1);
8046 nops++;
8047 }
8048 }
8049 if (cf != 0)
8050 {
8051 tmp = gen_rtx_PLUS (GET_MODE (out), tmp, GEN_INT (cf));
8052 nops++;
8053 }
8054 if (tmp != out
8055 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8056 {
8057 if (nops == 1)
8058 {
8059 rtx clob;
8060
8061 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8062 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8063
8064 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8065 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8066 emit_insn (tmp);
8067 }
8068 else
8069 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8070 }
8071 if (out != operands[0])
8072 emit_move_insn (operands[0], out);
8073
8074 return 1; /* DONE */
8075 }
8076
8077 /*
8078 * General case: Jumpful:
8079 * xorl dest,dest cmpl op1, op2
8080 * cmpl op1, op2 movl ct, dest
8081 * setcc dest jcc 1f
8082 * decl dest movl cf, dest
8083 * andl (cf-ct),dest 1:
8084 * addl ct,dest
8085 *
8086 * Size 20. Size 14.
8087 *
8088 * This is reasonably steep, but branch mispredict costs are
8089 * high on modern cpus, so consider failing only if optimizing
8090 * for space.
8091 *
8092 * %%% Parameterize branch_cost on the tuning architecture, then
8093 * use that. The 80386 couldn't care less about mispredicts.
8094 */
8095
8096 if (!optimize_size && !TARGET_CMOVE)
8097 {
8098 if (ct == 0)
8099 {
8100 ct = cf;
8101 cf = 0;
8102 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8103 {
8104 /* We may be reversing unordered compare to normal compare,
8105 that is not valid in general (we may convert non-trapping
8106 condition to trapping one), however on i386 we currently
8107 emit all comparisons unordered. */
8108 compare_code = reverse_condition_maybe_unordered (compare_code);
8109 code = reverse_condition_maybe_unordered (code);
8110 }
8111 else
8112 {
8113 compare_code = reverse_condition (compare_code);
8114 code = reverse_condition (code);
8115 }
8116 }
8117
8118 out = emit_store_flag (out, code, ix86_compare_op0,
8119 ix86_compare_op1, VOIDmode, 0, 1);
8120
8121 emit_insn (gen_addsi3 (out, out, constm1_rtx));
8122 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
8123 (cf - ct, SImode))));
8124 if (ct != 0)
8125 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
8126 if (out != operands[0])
8127 emit_move_insn (operands[0], out);
8128
8129 return 1; /* DONE */
8130 }
8131 }
8132
8133 if (!TARGET_CMOVE)
8134 {
8135 /* Try a few things more with specific constants and a variable. */
8136
8137 optab op;
8138 rtx var, orig_out, out, tmp;
8139
8140 if (optimize_size)
8141 return 0; /* FAIL */
8142
8143 /* If one of the two operands is an interesting constant, load a
8144 constant with the above and mask it in with a logical operation. */
8145
8146 if (GET_CODE (operands[2]) == CONST_INT)
8147 {
8148 var = operands[3];
8149 if (INTVAL (operands[2]) == 0)
8150 operands[3] = constm1_rtx, op = and_optab;
8151 else if (INTVAL (operands[2]) == -1)
8152 operands[3] = const0_rtx, op = ior_optab;
8153 else
8154 return 0; /* FAIL */
8155 }
8156 else if (GET_CODE (operands[3]) == CONST_INT)
8157 {
8158 var = operands[2];
8159 if (INTVAL (operands[3]) == 0)
8160 operands[2] = constm1_rtx, op = and_optab;
8161 else if (INTVAL (operands[3]) == -1)
8162 operands[2] = const0_rtx, op = ior_optab;
8163 else
8164 return 0; /* FAIL */
8165 }
8166 else
8167 return 0; /* FAIL */
8168
8169 orig_out = operands[0];
8170 tmp = gen_reg_rtx (GET_MODE (orig_out));
8171 operands[0] = tmp;
8172
8173 /* Recurse to get the constant loaded. */
8174 if (ix86_expand_int_movcc (operands) == 0)
8175 return 0; /* FAIL */
8176
8177 /* Mask in the interesting variable. */
8178 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
8179 OPTAB_WIDEN);
8180 if (out != orig_out)
8181 emit_move_insn (orig_out, out);
8182
8183 return 1; /* DONE */
8184 }
8185
8186 /*
8187 * For comparison with above,
8188 *
8189 * movl cf,dest
8190 * movl ct,tmp
8191 * cmpl op1,op2
8192 * cmovcc tmp,dest
8193 *
8194 * Size 15.
8195 */
8196
8197 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
8198 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
8199 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
8200 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
8201
8202 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8203 {
8204 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8205 emit_move_insn (tmp, operands[3]);
8206 operands[3] = tmp;
8207 }
8208 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8209 {
8210 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
8211 emit_move_insn (tmp, operands[2]);
8212 operands[2] = tmp;
8213 }
8214 if (! register_operand (operands[2], VOIDmode)
8215 && ! register_operand (operands[3], VOIDmode))
8216 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
8217
8218 emit_insn (compare_seq);
8219 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8220 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8221 compare_op, operands[2],
8222 operands[3])));
8223 if (bypass_test)
8224 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8225 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8226 bypass_test,
8227 operands[3],
8228 operands[0])));
8229 if (second_test)
8230 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8231 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8232 second_test,
8233 operands[2],
8234 operands[0])));
8235
8236 return 1; /* DONE */
8237 }
8238
8239 int
8240 ix86_expand_fp_movcc (operands)
8241 rtx operands[];
8242 {
8243 enum rtx_code code;
8244 rtx tmp;
8245 rtx compare_op, second_test, bypass_test;
8246
8247 /* For SF/DFmode conditional moves based on comparisons
8248 in same mode, we may want to use SSE min/max instructions. */
8249 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
8250 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
8251 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8252 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8253 && (!TARGET_IEEE_FP
8254 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8255 /* We may be called from the post-reload splitter. */
8256 && (!REG_P (operands[0])
8257 || SSE_REG_P (operands[0])
8258 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8259 {
8260 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8261 code = GET_CODE (operands[1]);
8262
8263 /* See if we have (cross) match between comparison operands and
8264 conditional move operands. */
8265 if (rtx_equal_p (operands[2], op1))
8266 {
8267 rtx tmp = op0;
8268 op0 = op1;
8269 op1 = tmp;
8270 code = reverse_condition_maybe_unordered (code);
8271 }
8272 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8273 {
8274 /* Check for min operation. */
8275 if (code == LT)
8276 {
8277 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8278 if (memory_operand (op0, VOIDmode))
8279 op0 = force_reg (GET_MODE (operands[0]), op0);
8280 if (GET_MODE (operands[0]) == SFmode)
8281 emit_insn (gen_minsf3 (operands[0], op0, op1));
8282 else
8283 emit_insn (gen_mindf3 (operands[0], op0, op1));
8284 return 1;
8285 }
8286 /* Check for max operation. */
8287 if (code == GT)
8288 {
8289 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8290 if (memory_operand (op0, VOIDmode))
8291 op0 = force_reg (GET_MODE (operands[0]), op0);
8292 if (GET_MODE (operands[0]) == SFmode)
8293 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8294 else
8295 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8296 return 1;
8297 }
8298 }
8299 /* Manage condition to be sse_comparison_operator. In case we are
8300 in non-ieee mode, try to canonicalize the destination operand
8301 to be first in the comparison - this helps reload to avoid extra
8302 moves. */
8303 if (!sse_comparison_operator (operands[1], VOIDmode)
8304 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8305 {
8306 rtx tmp = ix86_compare_op0;
8307 ix86_compare_op0 = ix86_compare_op1;
8308 ix86_compare_op1 = tmp;
8309 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8310 VOIDmode, ix86_compare_op0,
8311 ix86_compare_op1);
8312 }
8313 /* Similary try to manage result to be first operand of conditional
8314 move. We also don't support the NE comparison on SSE, so try to
8315 avoid it. */
8316 if ((rtx_equal_p (operands[0], operands[3])
8317 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8318 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8319 {
8320 rtx tmp = operands[2];
8321 operands[2] = operands[3];
8322 operands[3] = tmp;
8323 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8324 (GET_CODE (operands[1])),
8325 VOIDmode, ix86_compare_op0,
8326 ix86_compare_op1);
8327 }
8328 if (GET_MODE (operands[0]) == SFmode)
8329 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8330 operands[2], operands[3],
8331 ix86_compare_op0, ix86_compare_op1));
8332 else
8333 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8334 operands[2], operands[3],
8335 ix86_compare_op0, ix86_compare_op1));
8336 return 1;
8337 }
8338
8339 /* The floating point conditional move instructions don't directly
8340 support conditions resulting from a signed integer comparison. */
8341
8342 code = GET_CODE (operands[1]);
8343 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8344
8345 /* The floating point conditional move instructions don't directly
8346 support signed integer comparisons. */
8347
8348 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8349 {
8350 if (second_test != NULL || bypass_test != NULL)
8351 abort();
8352 tmp = gen_reg_rtx (QImode);
8353 ix86_expand_setcc (code, tmp);
8354 code = NE;
8355 ix86_compare_op0 = tmp;
8356 ix86_compare_op1 = const0_rtx;
8357 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8358 }
8359 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8360 {
8361 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8362 emit_move_insn (tmp, operands[3]);
8363 operands[3] = tmp;
8364 }
8365 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8366 {
8367 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8368 emit_move_insn (tmp, operands[2]);
8369 operands[2] = tmp;
8370 }
8371
8372 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8373 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8374 compare_op,
8375 operands[2],
8376 operands[3])));
8377 if (bypass_test)
8378 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8379 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8380 bypass_test,
8381 operands[3],
8382 operands[0])));
8383 if (second_test)
8384 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8385 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8386 second_test,
8387 operands[2],
8388 operands[0])));
8389
8390 return 1;
8391 }
8392
8393 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8394 works for floating pointer parameters and nonoffsetable memories.
8395 For pushes, it returns just stack offsets; the values will be saved
8396 in the right order. Maximally three parts are generated. */
8397
8398 static int
8399 ix86_split_to_parts (operand, parts, mode)
8400 rtx operand;
8401 rtx *parts;
8402 enum machine_mode mode;
8403 {
8404 int size;
8405
8406 if (!TARGET_64BIT)
8407 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8408 else
8409 size = (GET_MODE_SIZE (mode) + 4) / 8;
8410
8411 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8412 abort ();
8413 if (size < 2 || size > 3)
8414 abort ();
8415
8416 /* Optimize constant pool reference to immediates. This is used by fp moves,
8417 that force all constants to memory to allow combining. */
8418
8419 if (GET_CODE (operand) == MEM
8420 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8421 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8422 operand = get_pool_constant (XEXP (operand, 0));
8423
8424 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8425 {
8426 /* The only non-offsetable memories we handle are pushes. */
8427 if (! push_operand (operand, VOIDmode))
8428 abort ();
8429
8430 operand = copy_rtx (operand);
8431 PUT_MODE (operand, Pmode);
8432 parts[0] = parts[1] = parts[2] = operand;
8433 }
8434 else if (!TARGET_64BIT)
8435 {
8436 if (mode == DImode)
8437 split_di (&operand, 1, &parts[0], &parts[1]);
8438 else
8439 {
8440 if (REG_P (operand))
8441 {
8442 if (!reload_completed)
8443 abort ();
8444 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8445 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8446 if (size == 3)
8447 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8448 }
8449 else if (offsettable_memref_p (operand))
8450 {
8451 operand = adjust_address (operand, SImode, 0);
8452 parts[0] = operand;
8453 parts[1] = adjust_address (operand, SImode, 4);
8454 if (size == 3)
8455 parts[2] = adjust_address (operand, SImode, 8);
8456 }
8457 else if (GET_CODE (operand) == CONST_DOUBLE)
8458 {
8459 REAL_VALUE_TYPE r;
8460 long l[4];
8461
8462 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8463 switch (mode)
8464 {
8465 case XFmode:
8466 case TFmode:
8467 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8468 parts[2] = GEN_INT (l[2]);
8469 break;
8470 case DFmode:
8471 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8472 break;
8473 default:
8474 abort ();
8475 }
8476 parts[1] = GEN_INT (l[1]);
8477 parts[0] = GEN_INT (l[0]);
8478 }
8479 else
8480 abort ();
8481 }
8482 }
8483 else
8484 {
8485 if (mode == XFmode || mode == TFmode)
8486 {
8487 if (REG_P (operand))
8488 {
8489 if (!reload_completed)
8490 abort ();
8491 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8492 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8493 }
8494 else if (offsettable_memref_p (operand))
8495 {
8496 operand = adjust_address (operand, DImode, 0);
8497 parts[0] = operand;
8498 parts[1] = adjust_address (operand, SImode, 8);
8499 }
8500 else if (GET_CODE (operand) == CONST_DOUBLE)
8501 {
8502 REAL_VALUE_TYPE r;
8503 long l[3];
8504
8505 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8506 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8507 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8508 if (HOST_BITS_PER_WIDE_INT >= 64)
8509 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
8510 else
8511 parts[0] = immed_double_const (l[0], l[1], DImode);
8512 parts[1] = GEN_INT (l[2]);
8513 }
8514 else
8515 abort ();
8516 }
8517 }
8518
8519 return size;
8520 }
8521
8522 /* Emit insns to perform a move or push of DI, DF, and XF values.
8523 Return false when normal moves are needed; true when all required
8524 insns have been emitted. Operands 2-4 contain the input values
8525 int the correct order; operands 5-7 contain the output values. */
8526
8527 void
8528 ix86_split_long_move (operands)
8529 rtx operands[];
8530 {
8531 rtx part[2][3];
8532 int nparts;
8533 int push = 0;
8534 int collisions = 0;
8535 enum machine_mode mode = GET_MODE (operands[0]);
8536
8537 /* The DFmode expanders may ask us to move double.
8538 For 64bit target this is single move. By hiding the fact
8539 here we simplify i386.md splitters. */
8540 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8541 {
8542 /* Optimize constant pool reference to immediates. This is used by fp moves,
8543 that force all constants to memory to allow combining. */
8544
8545 if (GET_CODE (operands[1]) == MEM
8546 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8547 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8548 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8549 if (push_operand (operands[0], VOIDmode))
8550 {
8551 operands[0] = copy_rtx (operands[0]);
8552 PUT_MODE (operands[0], Pmode);
8553 }
8554 else
8555 operands[0] = gen_lowpart (DImode, operands[0]);
8556 operands[1] = gen_lowpart (DImode, operands[1]);
8557 emit_move_insn (operands[0], operands[1]);
8558 return;
8559 }
8560
8561 /* The only non-offsettable memory we handle is push. */
8562 if (push_operand (operands[0], VOIDmode))
8563 push = 1;
8564 else if (GET_CODE (operands[0]) == MEM
8565 && ! offsettable_memref_p (operands[0]))
8566 abort ();
8567
8568 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8569 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8570
8571 /* When emitting push, take care for source operands on the stack. */
8572 if (push && GET_CODE (operands[1]) == MEM
8573 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8574 {
8575 if (nparts == 3)
8576 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8577 XEXP (part[1][2], 0));
8578 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8579 XEXP (part[1][1], 0));
8580 }
8581
8582 /* We need to do copy in the right order in case an address register
8583 of the source overlaps the destination. */
8584 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8585 {
8586 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8587 collisions++;
8588 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8589 collisions++;
8590 if (nparts == 3
8591 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8592 collisions++;
8593
8594 /* Collision in the middle part can be handled by reordering. */
8595 if (collisions == 1 && nparts == 3
8596 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8597 {
8598 rtx tmp;
8599 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8600 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8601 }
8602
8603 /* If there are more collisions, we can't handle it by reordering.
8604 Do an lea to the last part and use only one colliding move. */
8605 else if (collisions > 1)
8606 {
8607 collisions = 1;
8608 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8609 XEXP (part[1][0], 0)));
8610 part[1][0] = change_address (part[1][0],
8611 TARGET_64BIT ? DImode : SImode,
8612 part[0][nparts - 1]);
8613 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8614 if (nparts == 3)
8615 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8616 }
8617 }
8618
8619 if (push)
8620 {
8621 if (!TARGET_64BIT)
8622 {
8623 if (nparts == 3)
8624 {
8625 /* We use only first 12 bytes of TFmode value, but for pushing we
8626 are required to adjust stack as if we were pushing real 16byte
8627 value. */
8628 if (mode == TFmode && !TARGET_64BIT)
8629 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8630 GEN_INT (-4)));
8631 emit_move_insn (part[0][2], part[1][2]);
8632 }
8633 }
8634 else
8635 {
8636 /* In 64bit mode we don't have 32bit push available. In case this is
8637 register, it is OK - we will just use larger counterpart. We also
8638 retype memory - these comes from attempt to avoid REX prefix on
8639 moving of second half of TFmode value. */
8640 if (GET_MODE (part[1][1]) == SImode)
8641 {
8642 if (GET_CODE (part[1][1]) == MEM)
8643 part[1][1] = adjust_address (part[1][1], DImode, 0);
8644 else if (REG_P (part[1][1]))
8645 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8646 else
8647 abort();
8648 if (GET_MODE (part[1][0]) == SImode)
8649 part[1][0] = part[1][1];
8650 }
8651 }
8652 emit_move_insn (part[0][1], part[1][1]);
8653 emit_move_insn (part[0][0], part[1][0]);
8654 return;
8655 }
8656
8657 /* Choose correct order to not overwrite the source before it is copied. */
8658 if ((REG_P (part[0][0])
8659 && REG_P (part[1][1])
8660 && (REGNO (part[0][0]) == REGNO (part[1][1])
8661 || (nparts == 3
8662 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8663 || (collisions > 0
8664 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8665 {
8666 if (nparts == 3)
8667 {
8668 operands[2] = part[0][2];
8669 operands[3] = part[0][1];
8670 operands[4] = part[0][0];
8671 operands[5] = part[1][2];
8672 operands[6] = part[1][1];
8673 operands[7] = part[1][0];
8674 }
8675 else
8676 {
8677 operands[2] = part[0][1];
8678 operands[3] = part[0][0];
8679 operands[5] = part[1][1];
8680 operands[6] = part[1][0];
8681 }
8682 }
8683 else
8684 {
8685 if (nparts == 3)
8686 {
8687 operands[2] = part[0][0];
8688 operands[3] = part[0][1];
8689 operands[4] = part[0][2];
8690 operands[5] = part[1][0];
8691 operands[6] = part[1][1];
8692 operands[7] = part[1][2];
8693 }
8694 else
8695 {
8696 operands[2] = part[0][0];
8697 operands[3] = part[0][1];
8698 operands[5] = part[1][0];
8699 operands[6] = part[1][1];
8700 }
8701 }
8702 emit_move_insn (operands[2], operands[5]);
8703 emit_move_insn (operands[3], operands[6]);
8704 if (nparts == 3)
8705 emit_move_insn (operands[4], operands[7]);
8706
8707 return;
8708 }
8709
8710 void
8711 ix86_split_ashldi (operands, scratch)
8712 rtx *operands, scratch;
8713 {
8714 rtx low[2], high[2];
8715 int count;
8716
8717 if (GET_CODE (operands[2]) == CONST_INT)
8718 {
8719 split_di (operands, 2, low, high);
8720 count = INTVAL (operands[2]) & 63;
8721
8722 if (count >= 32)
8723 {
8724 emit_move_insn (high[0], low[1]);
8725 emit_move_insn (low[0], const0_rtx);
8726
8727 if (count > 32)
8728 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8729 }
8730 else
8731 {
8732 if (!rtx_equal_p (operands[0], operands[1]))
8733 emit_move_insn (operands[0], operands[1]);
8734 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8735 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8736 }
8737 }
8738 else
8739 {
8740 if (!rtx_equal_p (operands[0], operands[1]))
8741 emit_move_insn (operands[0], operands[1]);
8742
8743 split_di (operands, 1, low, high);
8744
8745 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8746 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8747
8748 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8749 {
8750 if (! no_new_pseudos)
8751 scratch = force_reg (SImode, const0_rtx);
8752 else
8753 emit_move_insn (scratch, const0_rtx);
8754
8755 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8756 scratch));
8757 }
8758 else
8759 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8760 }
8761 }
8762
8763 void
8764 ix86_split_ashrdi (operands, scratch)
8765 rtx *operands, scratch;
8766 {
8767 rtx low[2], high[2];
8768 int count;
8769
8770 if (GET_CODE (operands[2]) == CONST_INT)
8771 {
8772 split_di (operands, 2, low, high);
8773 count = INTVAL (operands[2]) & 63;
8774
8775 if (count >= 32)
8776 {
8777 emit_move_insn (low[0], high[1]);
8778
8779 if (! reload_completed)
8780 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8781 else
8782 {
8783 emit_move_insn (high[0], low[0]);
8784 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8785 }
8786
8787 if (count > 32)
8788 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8789 }
8790 else
8791 {
8792 if (!rtx_equal_p (operands[0], operands[1]))
8793 emit_move_insn (operands[0], operands[1]);
8794 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8795 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8796 }
8797 }
8798 else
8799 {
8800 if (!rtx_equal_p (operands[0], operands[1]))
8801 emit_move_insn (operands[0], operands[1]);
8802
8803 split_di (operands, 1, low, high);
8804
8805 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8806 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8807
8808 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8809 {
8810 if (! no_new_pseudos)
8811 scratch = gen_reg_rtx (SImode);
8812 emit_move_insn (scratch, high[0]);
8813 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8814 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8815 scratch));
8816 }
8817 else
8818 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8819 }
8820 }
8821
8822 void
8823 ix86_split_lshrdi (operands, scratch)
8824 rtx *operands, scratch;
8825 {
8826 rtx low[2], high[2];
8827 int count;
8828
8829 if (GET_CODE (operands[2]) == CONST_INT)
8830 {
8831 split_di (operands, 2, low, high);
8832 count = INTVAL (operands[2]) & 63;
8833
8834 if (count >= 32)
8835 {
8836 emit_move_insn (low[0], high[1]);
8837 emit_move_insn (high[0], const0_rtx);
8838
8839 if (count > 32)
8840 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8841 }
8842 else
8843 {
8844 if (!rtx_equal_p (operands[0], operands[1]))
8845 emit_move_insn (operands[0], operands[1]);
8846 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8847 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8848 }
8849 }
8850 else
8851 {
8852 if (!rtx_equal_p (operands[0], operands[1]))
8853 emit_move_insn (operands[0], operands[1]);
8854
8855 split_di (operands, 1, low, high);
8856
8857 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8858 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8859
8860 /* Heh. By reversing the arguments, we can reuse this pattern. */
8861 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8862 {
8863 if (! no_new_pseudos)
8864 scratch = force_reg (SImode, const0_rtx);
8865 else
8866 emit_move_insn (scratch, const0_rtx);
8867
8868 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8869 scratch));
8870 }
8871 else
8872 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8873 }
8874 }
8875
8876 /* Helper function for the string operations below. Dest VARIABLE whether
8877 it is aligned to VALUE bytes. If true, jump to the label. */
8878 static rtx
8879 ix86_expand_aligntest (variable, value)
8880 rtx variable;
8881 int value;
8882 {
8883 rtx label = gen_label_rtx ();
8884 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8885 if (GET_MODE (variable) == DImode)
8886 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8887 else
8888 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8889 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8890 1, 0, label);
8891 return label;
8892 }
8893
8894 /* Adjust COUNTER by the VALUE. */
8895 static void
8896 ix86_adjust_counter (countreg, value)
8897 rtx countreg;
8898 HOST_WIDE_INT value;
8899 {
8900 if (GET_MODE (countreg) == DImode)
8901 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8902 else
8903 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8904 }
8905
8906 /* Zero extend possibly SImode EXP to Pmode register. */
8907 rtx
8908 ix86_zero_extend_to_Pmode (exp)
8909 rtx exp;
8910 {
8911 rtx r;
8912 if (GET_MODE (exp) == VOIDmode)
8913 return force_reg (Pmode, exp);
8914 if (GET_MODE (exp) == Pmode)
8915 return copy_to_mode_reg (Pmode, exp);
8916 r = gen_reg_rtx (Pmode);
8917 emit_insn (gen_zero_extendsidi2 (r, exp));
8918 return r;
8919 }
8920
8921 /* Expand string move (memcpy) operation. Use i386 string operations when
8922 profitable. expand_clrstr contains similar code. */
8923 int
8924 ix86_expand_movstr (dst, src, count_exp, align_exp)
8925 rtx dst, src, count_exp, align_exp;
8926 {
8927 rtx srcreg, destreg, countreg;
8928 enum machine_mode counter_mode;
8929 HOST_WIDE_INT align = 0;
8930 unsigned HOST_WIDE_INT count = 0;
8931 rtx insns;
8932
8933 start_sequence ();
8934
8935 if (GET_CODE (align_exp) == CONST_INT)
8936 align = INTVAL (align_exp);
8937
8938 /* This simple hack avoids all inlining code and simplifies code bellow. */
8939 if (!TARGET_ALIGN_STRINGOPS)
8940 align = 64;
8941
8942 if (GET_CODE (count_exp) == CONST_INT)
8943 count = INTVAL (count_exp);
8944
8945 /* Figure out proper mode for counter. For 32bits it is always SImode,
8946 for 64bits use SImode when possible, otherwise DImode.
8947 Set count to number of bytes copied when known at compile time. */
8948 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8949 || x86_64_zero_extended_value (count_exp))
8950 counter_mode = SImode;
8951 else
8952 counter_mode = DImode;
8953
8954 if (counter_mode != SImode && counter_mode != DImode)
8955 abort ();
8956
8957 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8958 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8959
8960 emit_insn (gen_cld ());
8961
8962 /* When optimizing for size emit simple rep ; movsb instruction for
8963 counts not divisible by 4. */
8964
8965 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8966 {
8967 countreg = ix86_zero_extend_to_Pmode (count_exp);
8968 if (TARGET_64BIT)
8969 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8970 destreg, srcreg, countreg));
8971 else
8972 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8973 destreg, srcreg, countreg));
8974 }
8975
8976 /* For constant aligned (or small unaligned) copies use rep movsl
8977 followed by code copying the rest. For PentiumPro ensure 8 byte
8978 alignment to allow rep movsl acceleration. */
8979
8980 else if (count != 0
8981 && (align >= 8
8982 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8983 || optimize_size || count < (unsigned int)64))
8984 {
8985 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
8986 if (count & ~(size - 1))
8987 {
8988 countreg = copy_to_mode_reg (counter_mode,
8989 GEN_INT ((count >> (size == 4 ? 2 : 3))
8990 & (TARGET_64BIT ? -1 : 0x3fffffff)));
8991 countreg = ix86_zero_extend_to_Pmode (countreg);
8992 if (size == 4)
8993 {
8994 if (TARGET_64BIT)
8995 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
8996 destreg, srcreg, countreg));
8997 else
8998 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
8999 destreg, srcreg, countreg));
9000 }
9001 else
9002 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9003 destreg, srcreg, countreg));
9004 }
9005 if (size == 8 && (count & 0x04))
9006 emit_insn (gen_strmovsi (destreg, srcreg));
9007 if (count & 0x02)
9008 emit_insn (gen_strmovhi (destreg, srcreg));
9009 if (count & 0x01)
9010 emit_insn (gen_strmovqi (destreg, srcreg));
9011 }
9012 /* The generic code based on the glibc implementation:
9013 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9014 allowing accelerated copying there)
9015 - copy the data using rep movsl
9016 - copy the rest. */
9017 else
9018 {
9019 rtx countreg2;
9020 rtx label = NULL;
9021
9022 /* In case we don't know anything about the alignment, default to
9023 library version, since it is usually equally fast and result in
9024 shorter code. */
9025 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9026 {
9027 end_sequence ();
9028 return 0;
9029 }
9030
9031 if (TARGET_SINGLE_STRINGOP)
9032 emit_insn (gen_cld ());
9033
9034 countreg2 = gen_reg_rtx (Pmode);
9035 countreg = copy_to_mode_reg (counter_mode, count_exp);
9036
9037 /* We don't use loops to align destination and to copy parts smaller
9038 than 4 bytes, because gcc is able to optimize such code better (in
9039 the case the destination or the count really is aligned, gcc is often
9040 able to predict the branches) and also it is friendlier to the
9041 hardware branch prediction.
9042
9043 Using loops is benefical for generic case, because we can
9044 handle small counts using the loops. Many CPUs (such as Athlon)
9045 have large REP prefix setup costs.
9046
9047 This is quite costy. Maybe we can revisit this decision later or
9048 add some customizability to this code. */
9049
9050 if (count == 0
9051 && align < (TARGET_PENTIUMPRO && (count == 0
9052 || count >= (unsigned int)260)
9053 ? 8 : UNITS_PER_WORD))
9054 {
9055 label = gen_label_rtx ();
9056 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9057 LEU, 0, counter_mode, 1, 0, label);
9058 }
9059 if (align <= 1)
9060 {
9061 rtx label = ix86_expand_aligntest (destreg, 1);
9062 emit_insn (gen_strmovqi (destreg, srcreg));
9063 ix86_adjust_counter (countreg, 1);
9064 emit_label (label);
9065 LABEL_NUSES (label) = 1;
9066 }
9067 if (align <= 2)
9068 {
9069 rtx label = ix86_expand_aligntest (destreg, 2);
9070 emit_insn (gen_strmovhi (destreg, srcreg));
9071 ix86_adjust_counter (countreg, 2);
9072 emit_label (label);
9073 LABEL_NUSES (label) = 1;
9074 }
9075 if (align <= 4
9076 && ((TARGET_PENTIUMPRO && (count == 0
9077 || count >= (unsigned int)260))
9078 || TARGET_64BIT))
9079 {
9080 rtx label = ix86_expand_aligntest (destreg, 4);
9081 emit_insn (gen_strmovsi (destreg, srcreg));
9082 ix86_adjust_counter (countreg, 4);
9083 emit_label (label);
9084 LABEL_NUSES (label) = 1;
9085 }
9086
9087 if (!TARGET_SINGLE_STRINGOP)
9088 emit_insn (gen_cld ());
9089 if (TARGET_64BIT)
9090 {
9091 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9092 GEN_INT (3)));
9093 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9094 destreg, srcreg, countreg2));
9095 }
9096 else
9097 {
9098 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9099 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9100 destreg, srcreg, countreg2));
9101 }
9102
9103 if (label)
9104 {
9105 emit_label (label);
9106 LABEL_NUSES (label) = 1;
9107 }
9108 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9109 emit_insn (gen_strmovsi (destreg, srcreg));
9110 if ((align <= 4 || count == 0) && TARGET_64BIT)
9111 {
9112 rtx label = ix86_expand_aligntest (countreg, 4);
9113 emit_insn (gen_strmovsi (destreg, srcreg));
9114 emit_label (label);
9115 LABEL_NUSES (label) = 1;
9116 }
9117 if (align > 2 && count != 0 && (count & 2))
9118 emit_insn (gen_strmovhi (destreg, srcreg));
9119 if (align <= 2 || count == 0)
9120 {
9121 rtx label = ix86_expand_aligntest (countreg, 2);
9122 emit_insn (gen_strmovhi (destreg, srcreg));
9123 emit_label (label);
9124 LABEL_NUSES (label) = 1;
9125 }
9126 if (align > 1 && count != 0 && (count & 1))
9127 emit_insn (gen_strmovqi (destreg, srcreg));
9128 if (align <= 1 || count == 0)
9129 {
9130 rtx label = ix86_expand_aligntest (countreg, 1);
9131 emit_insn (gen_strmovqi (destreg, srcreg));
9132 emit_label (label);
9133 LABEL_NUSES (label) = 1;
9134 }
9135 }
9136
9137 insns = get_insns ();
9138 end_sequence ();
9139
9140 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9141 emit_insns (insns);
9142 return 1;
9143 }
9144
9145 /* Expand string clear operation (bzero). Use i386 string operations when
9146 profitable. expand_movstr contains similar code. */
9147 int
9148 ix86_expand_clrstr (src, count_exp, align_exp)
9149 rtx src, count_exp, align_exp;
9150 {
9151 rtx destreg, zeroreg, countreg;
9152 enum machine_mode counter_mode;
9153 HOST_WIDE_INT align = 0;
9154 unsigned HOST_WIDE_INT count = 0;
9155
9156 if (GET_CODE (align_exp) == CONST_INT)
9157 align = INTVAL (align_exp);
9158
9159 /* This simple hack avoids all inlining code and simplifies code bellow. */
9160 if (!TARGET_ALIGN_STRINGOPS)
9161 align = 32;
9162
9163 if (GET_CODE (count_exp) == CONST_INT)
9164 count = INTVAL (count_exp);
9165 /* Figure out proper mode for counter. For 32bits it is always SImode,
9166 for 64bits use SImode when possible, otherwise DImode.
9167 Set count to number of bytes copied when known at compile time. */
9168 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9169 || x86_64_zero_extended_value (count_exp))
9170 counter_mode = SImode;
9171 else
9172 counter_mode = DImode;
9173
9174 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9175
9176 emit_insn (gen_cld ());
9177
9178 /* When optimizing for size emit simple rep ; movsb instruction for
9179 counts not divisible by 4. */
9180
9181 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9182 {
9183 countreg = ix86_zero_extend_to_Pmode (count_exp);
9184 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9185 if (TARGET_64BIT)
9186 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9187 destreg, countreg));
9188 else
9189 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9190 destreg, countreg));
9191 }
9192 else if (count != 0
9193 && (align >= 8
9194 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9195 || optimize_size || count < (unsigned int)64))
9196 {
9197 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9198 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9199 if (count & ~(size - 1))
9200 {
9201 countreg = copy_to_mode_reg (counter_mode,
9202 GEN_INT ((count >> (size == 4 ? 2 : 3))
9203 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9204 countreg = ix86_zero_extend_to_Pmode (countreg);
9205 if (size == 4)
9206 {
9207 if (TARGET_64BIT)
9208 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9209 destreg, countreg));
9210 else
9211 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9212 destreg, countreg));
9213 }
9214 else
9215 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9216 destreg, countreg));
9217 }
9218 if (size == 8 && (count & 0x04))
9219 emit_insn (gen_strsetsi (destreg,
9220 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9221 if (count & 0x02)
9222 emit_insn (gen_strsethi (destreg,
9223 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9224 if (count & 0x01)
9225 emit_insn (gen_strsetqi (destreg,
9226 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9227 }
9228 else
9229 {
9230 rtx countreg2;
9231 rtx label = NULL;
9232
9233 /* In case we don't know anything about the alignment, default to
9234 library version, since it is usually equally fast and result in
9235 shorter code. */
9236 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9237 return 0;
9238
9239 if (TARGET_SINGLE_STRINGOP)
9240 emit_insn (gen_cld ());
9241
9242 countreg2 = gen_reg_rtx (Pmode);
9243 countreg = copy_to_mode_reg (counter_mode, count_exp);
9244 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9245
9246 if (count == 0
9247 && align < (TARGET_PENTIUMPRO && (count == 0
9248 || count >= (unsigned int)260)
9249 ? 8 : UNITS_PER_WORD))
9250 {
9251 label = gen_label_rtx ();
9252 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9253 LEU, 0, counter_mode, 1, 0, label);
9254 }
9255 if (align <= 1)
9256 {
9257 rtx label = ix86_expand_aligntest (destreg, 1);
9258 emit_insn (gen_strsetqi (destreg,
9259 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9260 ix86_adjust_counter (countreg, 1);
9261 emit_label (label);
9262 LABEL_NUSES (label) = 1;
9263 }
9264 if (align <= 2)
9265 {
9266 rtx label = ix86_expand_aligntest (destreg, 2);
9267 emit_insn (gen_strsethi (destreg,
9268 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9269 ix86_adjust_counter (countreg, 2);
9270 emit_label (label);
9271 LABEL_NUSES (label) = 1;
9272 }
9273 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9274 || count >= (unsigned int)260))
9275 {
9276 rtx label = ix86_expand_aligntest (destreg, 4);
9277 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9278 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9279 : zeroreg)));
9280 ix86_adjust_counter (countreg, 4);
9281 emit_label (label);
9282 LABEL_NUSES (label) = 1;
9283 }
9284
9285 if (!TARGET_SINGLE_STRINGOP)
9286 emit_insn (gen_cld ());
9287 if (TARGET_64BIT)
9288 {
9289 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9290 GEN_INT (3)));
9291 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9292 destreg, countreg2));
9293 }
9294 else
9295 {
9296 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9297 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9298 destreg, countreg2));
9299 }
9300
9301 if (label)
9302 {
9303 emit_label (label);
9304 LABEL_NUSES (label) = 1;
9305 }
9306 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9307 emit_insn (gen_strsetsi (destreg,
9308 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9309 if (TARGET_64BIT && (align <= 4 || count == 0))
9310 {
9311 rtx label = ix86_expand_aligntest (destreg, 2);
9312 emit_insn (gen_strsetsi (destreg,
9313 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9314 emit_label (label);
9315 LABEL_NUSES (label) = 1;
9316 }
9317 if (align > 2 && count != 0 && (count & 2))
9318 emit_insn (gen_strsethi (destreg,
9319 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9320 if (align <= 2 || count == 0)
9321 {
9322 rtx label = ix86_expand_aligntest (destreg, 2);
9323 emit_insn (gen_strsethi (destreg,
9324 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9325 emit_label (label);
9326 LABEL_NUSES (label) = 1;
9327 }
9328 if (align > 1 && count != 0 && (count & 1))
9329 emit_insn (gen_strsetqi (destreg,
9330 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9331 if (align <= 1 || count == 0)
9332 {
9333 rtx label = ix86_expand_aligntest (destreg, 1);
9334 emit_insn (gen_strsetqi (destreg,
9335 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9336 emit_label (label);
9337 LABEL_NUSES (label) = 1;
9338 }
9339 }
9340 return 1;
9341 }
9342 /* Expand strlen. */
9343 int
9344 ix86_expand_strlen (out, src, eoschar, align)
9345 rtx out, src, eoschar, align;
9346 {
9347 rtx addr, scratch1, scratch2, scratch3, scratch4;
9348
9349 /* The generic case of strlen expander is long. Avoid it's
9350 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9351
9352 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9353 && !TARGET_INLINE_ALL_STRINGOPS
9354 && !optimize_size
9355 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9356 return 0;
9357
9358 addr = force_reg (Pmode, XEXP (src, 0));
9359 scratch1 = gen_reg_rtx (Pmode);
9360
9361 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9362 && !optimize_size)
9363 {
9364 /* Well it seems that some optimizer does not combine a call like
9365 foo(strlen(bar), strlen(bar));
9366 when the move and the subtraction is done here. It does calculate
9367 the length just once when these instructions are done inside of
9368 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9369 often used and I use one fewer register for the lifetime of
9370 output_strlen_unroll() this is better. */
9371
9372 emit_move_insn (out, addr);
9373
9374 ix86_expand_strlensi_unroll_1 (out, align);
9375
9376 /* strlensi_unroll_1 returns the address of the zero at the end of
9377 the string, like memchr(), so compute the length by subtracting
9378 the start address. */
9379 if (TARGET_64BIT)
9380 emit_insn (gen_subdi3 (out, out, addr));
9381 else
9382 emit_insn (gen_subsi3 (out, out, addr));
9383 }
9384 else
9385 {
9386 scratch2 = gen_reg_rtx (Pmode);
9387 scratch3 = gen_reg_rtx (Pmode);
9388 scratch4 = force_reg (Pmode, constm1_rtx);
9389
9390 emit_move_insn (scratch3, addr);
9391 eoschar = force_reg (QImode, eoschar);
9392
9393 emit_insn (gen_cld ());
9394 if (TARGET_64BIT)
9395 {
9396 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9397 align, scratch4, scratch3));
9398 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9399 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9400 }
9401 else
9402 {
9403 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9404 align, scratch4, scratch3));
9405 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9406 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9407 }
9408 }
9409 return 1;
9410 }
9411
9412 /* Expand the appropriate insns for doing strlen if not just doing
9413 repnz; scasb
9414
9415 out = result, initialized with the start address
9416 align_rtx = alignment of the address.
9417 scratch = scratch register, initialized with the startaddress when
9418 not aligned, otherwise undefined
9419
9420 This is just the body. It needs the initialisations mentioned above and
9421 some address computing at the end. These things are done in i386.md. */
9422
9423 static void
9424 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9425 rtx out, align_rtx;
9426 {
9427 int align;
9428 rtx tmp;
9429 rtx align_2_label = NULL_RTX;
9430 rtx align_3_label = NULL_RTX;
9431 rtx align_4_label = gen_label_rtx ();
9432 rtx end_0_label = gen_label_rtx ();
9433 rtx mem;
9434 rtx tmpreg = gen_reg_rtx (SImode);
9435 rtx scratch = gen_reg_rtx (SImode);
9436
9437 align = 0;
9438 if (GET_CODE (align_rtx) == CONST_INT)
9439 align = INTVAL (align_rtx);
9440
9441 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9442
9443 /* Is there a known alignment and is it less than 4? */
9444 if (align < 4)
9445 {
9446 rtx scratch1 = gen_reg_rtx (Pmode);
9447 emit_move_insn (scratch1, out);
9448 /* Is there a known alignment and is it not 2? */
9449 if (align != 2)
9450 {
9451 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9452 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9453
9454 /* Leave just the 3 lower bits. */
9455 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9456 NULL_RTX, 0, OPTAB_WIDEN);
9457
9458 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9459 Pmode, 1, 0, align_4_label);
9460 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9461 Pmode, 1, 0, align_2_label);
9462 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9463 Pmode, 1, 0, align_3_label);
9464 }
9465 else
9466 {
9467 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9468 check if is aligned to 4 - byte. */
9469
9470 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9471 NULL_RTX, 0, OPTAB_WIDEN);
9472
9473 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9474 Pmode, 1, 0, align_4_label);
9475 }
9476
9477 mem = gen_rtx_MEM (QImode, out);
9478
9479 /* Now compare the bytes. */
9480
9481 /* Compare the first n unaligned byte on a byte per byte basis. */
9482 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9483 QImode, 1, 0, end_0_label);
9484
9485 /* Increment the address. */
9486 if (TARGET_64BIT)
9487 emit_insn (gen_adddi3 (out, out, const1_rtx));
9488 else
9489 emit_insn (gen_addsi3 (out, out, const1_rtx));
9490
9491 /* Not needed with an alignment of 2 */
9492 if (align != 2)
9493 {
9494 emit_label (align_2_label);
9495
9496 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9497 QImode, 1, 0, end_0_label);
9498
9499 if (TARGET_64BIT)
9500 emit_insn (gen_adddi3 (out, out, const1_rtx));
9501 else
9502 emit_insn (gen_addsi3 (out, out, const1_rtx));
9503
9504 emit_label (align_3_label);
9505 }
9506
9507 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9508 QImode, 1, 0, end_0_label);
9509
9510 if (TARGET_64BIT)
9511 emit_insn (gen_adddi3 (out, out, const1_rtx));
9512 else
9513 emit_insn (gen_addsi3 (out, out, const1_rtx));
9514 }
9515
9516 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9517 align this loop. It gives only huge programs, but does not help to
9518 speed up. */
9519 emit_label (align_4_label);
9520
9521 mem = gen_rtx_MEM (SImode, out);
9522 emit_move_insn (scratch, mem);
9523 if (TARGET_64BIT)
9524 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9525 else
9526 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9527
9528 /* This formula yields a nonzero result iff one of the bytes is zero.
9529 This saves three branches inside loop and many cycles. */
9530
9531 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9532 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9533 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9534 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9535 GEN_INT (trunc_int_for_mode
9536 (0x80808080, SImode))));
9537 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
9538 SImode, 1, 0, align_4_label);
9539
9540 if (TARGET_CMOVE)
9541 {
9542 rtx reg = gen_reg_rtx (SImode);
9543 rtx reg2 = gen_reg_rtx (Pmode);
9544 emit_move_insn (reg, tmpreg);
9545 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9546
9547 /* If zero is not in the first two bytes, move two bytes forward. */
9548 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9549 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9550 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9551 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9552 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9553 reg,
9554 tmpreg)));
9555 /* Emit lea manually to avoid clobbering of flags. */
9556 emit_insn (gen_rtx_SET (SImode, reg2,
9557 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9558
9559 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9560 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9561 emit_insn (gen_rtx_SET (VOIDmode, out,
9562 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9563 reg2,
9564 out)));
9565
9566 }
9567 else
9568 {
9569 rtx end_2_label = gen_label_rtx ();
9570 /* Is zero in the first two bytes? */
9571
9572 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9573 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9574 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9575 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9576 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9577 pc_rtx);
9578 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9579 JUMP_LABEL (tmp) = end_2_label;
9580
9581 /* Not in the first two. Move two bytes forward. */
9582 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9583 if (TARGET_64BIT)
9584 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9585 else
9586 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9587
9588 emit_label (end_2_label);
9589
9590 }
9591
9592 /* Avoid branch in fixing the byte. */
9593 tmpreg = gen_lowpart (QImode, tmpreg);
9594 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9595 if (TARGET_64BIT)
9596 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9597 else
9598 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9599
9600 emit_label (end_0_label);
9601 }
9602 \f
9603 /* Clear stack slot assignments remembered from previous functions.
9604 This is called from INIT_EXPANDERS once before RTL is emitted for each
9605 function. */
9606
9607 static void
9608 ix86_init_machine_status (p)
9609 struct function *p;
9610 {
9611 p->machine = (struct machine_function *)
9612 xcalloc (1, sizeof (struct machine_function));
9613 }
9614
9615 /* Mark machine specific bits of P for GC. */
9616 static void
9617 ix86_mark_machine_status (p)
9618 struct function *p;
9619 {
9620 struct machine_function *machine = p->machine;
9621 enum machine_mode mode;
9622 int n;
9623
9624 if (! machine)
9625 return;
9626
9627 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9628 mode = (enum machine_mode) ((int) mode + 1))
9629 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9630 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9631 }
9632
9633 static void
9634 ix86_free_machine_status (p)
9635 struct function *p;
9636 {
9637 free (p->machine);
9638 p->machine = NULL;
9639 }
9640
9641 /* Return a MEM corresponding to a stack slot with mode MODE.
9642 Allocate a new slot if necessary.
9643
9644 The RTL for a function can have several slots available: N is
9645 which slot to use. */
9646
9647 rtx
9648 assign_386_stack_local (mode, n)
9649 enum machine_mode mode;
9650 int n;
9651 {
9652 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9653 abort ();
9654
9655 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9656 ix86_stack_locals[(int) mode][n]
9657 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9658
9659 return ix86_stack_locals[(int) mode][n];
9660 }
9661 \f
9662 /* Calculate the length of the memory address in the instruction
9663 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9664
9665 static int
9666 memory_address_length (addr)
9667 rtx addr;
9668 {
9669 struct ix86_address parts;
9670 rtx base, index, disp;
9671 int len;
9672
9673 if (GET_CODE (addr) == PRE_DEC
9674 || GET_CODE (addr) == POST_INC
9675 || GET_CODE (addr) == PRE_MODIFY
9676 || GET_CODE (addr) == POST_MODIFY)
9677 return 0;
9678
9679 if (! ix86_decompose_address (addr, &parts))
9680 abort ();
9681
9682 base = parts.base;
9683 index = parts.index;
9684 disp = parts.disp;
9685 len = 0;
9686
9687 /* Register Indirect. */
9688 if (base && !index && !disp)
9689 {
9690 /* Special cases: ebp and esp need the two-byte modrm form. */
9691 if (addr == stack_pointer_rtx
9692 || addr == arg_pointer_rtx
9693 || addr == frame_pointer_rtx
9694 || addr == hard_frame_pointer_rtx)
9695 len = 1;
9696 }
9697
9698 /* Direct Addressing. */
9699 else if (disp && !base && !index)
9700 len = 4;
9701
9702 else
9703 {
9704 /* Find the length of the displacement constant. */
9705 if (disp)
9706 {
9707 if (GET_CODE (disp) == CONST_INT
9708 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9709 len = 1;
9710 else
9711 len = 4;
9712 }
9713
9714 /* An index requires the two-byte modrm form. */
9715 if (index)
9716 len += 1;
9717 }
9718
9719 return len;
9720 }
9721
9722 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9723 expect that insn have 8bit immediate alternative. */
9724 int
9725 ix86_attr_length_immediate_default (insn, shortform)
9726 rtx insn;
9727 int shortform;
9728 {
9729 int len = 0;
9730 int i;
9731 extract_insn_cached (insn);
9732 for (i = recog_data.n_operands - 1; i >= 0; --i)
9733 if (CONSTANT_P (recog_data.operand[i]))
9734 {
9735 if (len)
9736 abort ();
9737 if (shortform
9738 && GET_CODE (recog_data.operand[i]) == CONST_INT
9739 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9740 len = 1;
9741 else
9742 {
9743 switch (get_attr_mode (insn))
9744 {
9745 case MODE_QI:
9746 len+=1;
9747 break;
9748 case MODE_HI:
9749 len+=2;
9750 break;
9751 case MODE_SI:
9752 len+=4;
9753 break;
9754 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9755 case MODE_DI:
9756 len+=4;
9757 break;
9758 default:
9759 fatal_insn ("Unknown insn mode", insn);
9760 }
9761 }
9762 }
9763 return len;
9764 }
9765 /* Compute default value for "length_address" attribute. */
9766 int
9767 ix86_attr_length_address_default (insn)
9768 rtx insn;
9769 {
9770 int i;
9771 extract_insn_cached (insn);
9772 for (i = recog_data.n_operands - 1; i >= 0; --i)
9773 if (GET_CODE (recog_data.operand[i]) == MEM)
9774 {
9775 return memory_address_length (XEXP (recog_data.operand[i], 0));
9776 break;
9777 }
9778 return 0;
9779 }
9780 \f
9781 /* Return the maximum number of instructions a cpu can issue. */
9782
9783 static int
9784 ix86_issue_rate ()
9785 {
9786 switch (ix86_cpu)
9787 {
9788 case PROCESSOR_PENTIUM:
9789 case PROCESSOR_K6:
9790 return 2;
9791
9792 case PROCESSOR_PENTIUMPRO:
9793 case PROCESSOR_PENTIUM4:
9794 case PROCESSOR_ATHLON:
9795 return 3;
9796
9797 default:
9798 return 1;
9799 }
9800 }
9801
9802 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9803 by DEP_INSN and nothing set by DEP_INSN. */
9804
9805 static int
9806 ix86_flags_dependant (insn, dep_insn, insn_type)
9807 rtx insn, dep_insn;
9808 enum attr_type insn_type;
9809 {
9810 rtx set, set2;
9811
9812 /* Simplify the test for uninteresting insns. */
9813 if (insn_type != TYPE_SETCC
9814 && insn_type != TYPE_ICMOV
9815 && insn_type != TYPE_FCMOV
9816 && insn_type != TYPE_IBR)
9817 return 0;
9818
9819 if ((set = single_set (dep_insn)) != 0)
9820 {
9821 set = SET_DEST (set);
9822 set2 = NULL_RTX;
9823 }
9824 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9825 && XVECLEN (PATTERN (dep_insn), 0) == 2
9826 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9827 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9828 {
9829 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9830 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9831 }
9832 else
9833 return 0;
9834
9835 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9836 return 0;
9837
9838 /* This test is true if the dependant insn reads the flags but
9839 not any other potentially set register. */
9840 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9841 return 0;
9842
9843 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9844 return 0;
9845
9846 return 1;
9847 }
9848
9849 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9850 address with operands set by DEP_INSN. */
9851
9852 static int
9853 ix86_agi_dependant (insn, dep_insn, insn_type)
9854 rtx insn, dep_insn;
9855 enum attr_type insn_type;
9856 {
9857 rtx addr;
9858
9859 if (insn_type == TYPE_LEA
9860 && TARGET_PENTIUM)
9861 {
9862 addr = PATTERN (insn);
9863 if (GET_CODE (addr) == SET)
9864 ;
9865 else if (GET_CODE (addr) == PARALLEL
9866 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9867 addr = XVECEXP (addr, 0, 0);
9868 else
9869 abort ();
9870 addr = SET_SRC (addr);
9871 }
9872 else
9873 {
9874 int i;
9875 extract_insn_cached (insn);
9876 for (i = recog_data.n_operands - 1; i >= 0; --i)
9877 if (GET_CODE (recog_data.operand[i]) == MEM)
9878 {
9879 addr = XEXP (recog_data.operand[i], 0);
9880 goto found;
9881 }
9882 return 0;
9883 found:;
9884 }
9885
9886 return modified_in_p (addr, dep_insn);
9887 }
9888
9889 static int
9890 ix86_adjust_cost (insn, link, dep_insn, cost)
9891 rtx insn, link, dep_insn;
9892 int cost;
9893 {
9894 enum attr_type insn_type, dep_insn_type;
9895 enum attr_memory memory, dep_memory;
9896 rtx set, set2;
9897 int dep_insn_code_number;
9898
9899 /* Anti and output depenancies have zero cost on all CPUs. */
9900 if (REG_NOTE_KIND (link) != 0)
9901 return 0;
9902
9903 dep_insn_code_number = recog_memoized (dep_insn);
9904
9905 /* If we can't recognize the insns, we can't really do anything. */
9906 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9907 return cost;
9908
9909 insn_type = get_attr_type (insn);
9910 dep_insn_type = get_attr_type (dep_insn);
9911
9912 switch (ix86_cpu)
9913 {
9914 case PROCESSOR_PENTIUM:
9915 /* Address Generation Interlock adds a cycle of latency. */
9916 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9917 cost += 1;
9918
9919 /* ??? Compares pair with jump/setcc. */
9920 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9921 cost = 0;
9922
9923 /* Floating point stores require value to be ready one cycle ealier. */
9924 if (insn_type == TYPE_FMOV
9925 && get_attr_memory (insn) == MEMORY_STORE
9926 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9927 cost += 1;
9928 break;
9929
9930 case PROCESSOR_PENTIUMPRO:
9931 memory = get_attr_memory (insn);
9932 dep_memory = get_attr_memory (dep_insn);
9933
9934 /* Since we can't represent delayed latencies of load+operation,
9935 increase the cost here for non-imov insns. */
9936 if (dep_insn_type != TYPE_IMOV
9937 && dep_insn_type != TYPE_FMOV
9938 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9939 cost += 1;
9940
9941 /* INT->FP conversion is expensive. */
9942 if (get_attr_fp_int_src (dep_insn))
9943 cost += 5;
9944
9945 /* There is one cycle extra latency between an FP op and a store. */
9946 if (insn_type == TYPE_FMOV
9947 && (set = single_set (dep_insn)) != NULL_RTX
9948 && (set2 = single_set (insn)) != NULL_RTX
9949 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9950 && GET_CODE (SET_DEST (set2)) == MEM)
9951 cost += 1;
9952
9953 /* Show ability of reorder buffer to hide latency of load by executing
9954 in parallel with previous instruction in case
9955 previous instruction is not needed to compute the address. */
9956 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9957 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9958 {
9959 /* Claim moves to take one cycle, as core can issue one load
9960 at time and the next load can start cycle later. */
9961 if (dep_insn_type == TYPE_IMOV
9962 || dep_insn_type == TYPE_FMOV)
9963 cost = 1;
9964 else if (cost > 1)
9965 cost--;
9966 }
9967 break;
9968
9969 case PROCESSOR_K6:
9970 memory = get_attr_memory (insn);
9971 dep_memory = get_attr_memory (dep_insn);
9972 /* The esp dependency is resolved before the instruction is really
9973 finished. */
9974 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9975 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9976 return 1;
9977
9978 /* Since we can't represent delayed latencies of load+operation,
9979 increase the cost here for non-imov insns. */
9980 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9981 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9982
9983 /* INT->FP conversion is expensive. */
9984 if (get_attr_fp_int_src (dep_insn))
9985 cost += 5;
9986
9987 /* Show ability of reorder buffer to hide latency of load by executing
9988 in parallel with previous instruction in case
9989 previous instruction is not needed to compute the address. */
9990 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9991 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9992 {
9993 /* Claim moves to take one cycle, as core can issue one load
9994 at time and the next load can start cycle later. */
9995 if (dep_insn_type == TYPE_IMOV
9996 || dep_insn_type == TYPE_FMOV)
9997 cost = 1;
9998 else if (cost > 2)
9999 cost -= 2;
10000 else
10001 cost = 1;
10002 }
10003 break;
10004
10005 case PROCESSOR_ATHLON:
10006 memory = get_attr_memory (insn);
10007 dep_memory = get_attr_memory (dep_insn);
10008
10009 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10010 {
10011 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10012 cost += 2;
10013 else
10014 cost += 3;
10015 }
10016 /* Show ability of reorder buffer to hide latency of load by executing
10017 in parallel with previous instruction in case
10018 previous instruction is not needed to compute the address. */
10019 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10020 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10021 {
10022 /* Claim moves to take one cycle, as core can issue one load
10023 at time and the next load can start cycle later. */
10024 if (dep_insn_type == TYPE_IMOV
10025 || dep_insn_type == TYPE_FMOV)
10026 cost = 0;
10027 else if (cost >= 3)
10028 cost -= 3;
10029 else
10030 cost = 0;
10031 }
10032
10033 default:
10034 break;
10035 }
10036
10037 return cost;
10038 }
10039
10040 static union
10041 {
10042 struct ppro_sched_data
10043 {
10044 rtx decode[3];
10045 int issued_this_cycle;
10046 } ppro;
10047 } ix86_sched_data;
10048
10049 static int
10050 ix86_safe_length (insn)
10051 rtx insn;
10052 {
10053 if (recog_memoized (insn) >= 0)
10054 return get_attr_length(insn);
10055 else
10056 return 128;
10057 }
10058
10059 static int
10060 ix86_safe_length_prefix (insn)
10061 rtx insn;
10062 {
10063 if (recog_memoized (insn) >= 0)
10064 return get_attr_length(insn);
10065 else
10066 return 0;
10067 }
10068
10069 static enum attr_memory
10070 ix86_safe_memory (insn)
10071 rtx insn;
10072 {
10073 if (recog_memoized (insn) >= 0)
10074 return get_attr_memory(insn);
10075 else
10076 return MEMORY_UNKNOWN;
10077 }
10078
10079 static enum attr_pent_pair
10080 ix86_safe_pent_pair (insn)
10081 rtx insn;
10082 {
10083 if (recog_memoized (insn) >= 0)
10084 return get_attr_pent_pair(insn);
10085 else
10086 return PENT_PAIR_NP;
10087 }
10088
10089 static enum attr_ppro_uops
10090 ix86_safe_ppro_uops (insn)
10091 rtx insn;
10092 {
10093 if (recog_memoized (insn) >= 0)
10094 return get_attr_ppro_uops (insn);
10095 else
10096 return PPRO_UOPS_MANY;
10097 }
10098
10099 static void
10100 ix86_dump_ppro_packet (dump)
10101 FILE *dump;
10102 {
10103 if (ix86_sched_data.ppro.decode[0])
10104 {
10105 fprintf (dump, "PPRO packet: %d",
10106 INSN_UID (ix86_sched_data.ppro.decode[0]));
10107 if (ix86_sched_data.ppro.decode[1])
10108 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10109 if (ix86_sched_data.ppro.decode[2])
10110 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10111 fputc ('\n', dump);
10112 }
10113 }
10114
10115 /* We're beginning a new block. Initialize data structures as necessary. */
10116
10117 static void
10118 ix86_sched_init (dump, sched_verbose, veclen)
10119 FILE *dump ATTRIBUTE_UNUSED;
10120 int sched_verbose ATTRIBUTE_UNUSED;
10121 int veclen ATTRIBUTE_UNUSED;
10122 {
10123 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10124 }
10125
10126 /* Shift INSN to SLOT, and shift everything else down. */
10127
10128 static void
10129 ix86_reorder_insn (insnp, slot)
10130 rtx *insnp, *slot;
10131 {
10132 if (insnp != slot)
10133 {
10134 rtx insn = *insnp;
10135 do
10136 insnp[0] = insnp[1];
10137 while (++insnp != slot);
10138 *insnp = insn;
10139 }
10140 }
10141
10142 /* Find an instruction with given pairability and minimal amount of cycles
10143 lost by the fact that the CPU waits for both pipelines to finish before
10144 reading next instructions. Also take care that both instructions together
10145 can not exceed 7 bytes. */
10146
10147 static rtx *
10148 ix86_pent_find_pair (e_ready, ready, type, first)
10149 rtx *e_ready;
10150 rtx *ready;
10151 enum attr_pent_pair type;
10152 rtx first;
10153 {
10154 int mincycles, cycles;
10155 enum attr_pent_pair tmp;
10156 enum attr_memory memory;
10157 rtx *insnp, *bestinsnp = NULL;
10158
10159 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10160 return NULL;
10161
10162 memory = ix86_safe_memory (first);
10163 cycles = result_ready_cost (first);
10164 mincycles = INT_MAX;
10165
10166 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10167 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10168 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10169 {
10170 enum attr_memory second_memory;
10171 int secondcycles, currentcycles;
10172
10173 second_memory = ix86_safe_memory (*insnp);
10174 secondcycles = result_ready_cost (*insnp);
10175 currentcycles = abs (cycles - secondcycles);
10176
10177 if (secondcycles >= 1 && cycles >= 1)
10178 {
10179 /* Two read/modify/write instructions together takes two
10180 cycles longer. */
10181 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10182 currentcycles += 2;
10183
10184 /* Read modify/write instruction followed by read/modify
10185 takes one cycle longer. */
10186 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10187 && tmp != PENT_PAIR_UV
10188 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10189 currentcycles += 1;
10190 }
10191 if (currentcycles < mincycles)
10192 bestinsnp = insnp, mincycles = currentcycles;
10193 }
10194
10195 return bestinsnp;
10196 }
10197
10198 /* Subroutines of ix86_sched_reorder. */
10199
10200 static void
10201 ix86_sched_reorder_pentium (ready, e_ready)
10202 rtx *ready;
10203 rtx *e_ready;
10204 {
10205 enum attr_pent_pair pair1, pair2;
10206 rtx *insnp;
10207
10208 /* This wouldn't be necessary if Haifa knew that static insn ordering
10209 is important to which pipe an insn is issued to. So we have to make
10210 some minor rearrangements. */
10211
10212 pair1 = ix86_safe_pent_pair (*e_ready);
10213
10214 /* If the first insn is non-pairable, let it be. */
10215 if (pair1 == PENT_PAIR_NP)
10216 return;
10217
10218 pair2 = PENT_PAIR_NP;
10219 insnp = 0;
10220
10221 /* If the first insn is UV or PV pairable, search for a PU
10222 insn to go with. */
10223 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10224 {
10225 insnp = ix86_pent_find_pair (e_ready-1, ready,
10226 PENT_PAIR_PU, *e_ready);
10227 if (insnp)
10228 pair2 = PENT_PAIR_PU;
10229 }
10230
10231 /* If the first insn is PU or UV pairable, search for a PV
10232 insn to go with. */
10233 if (pair2 == PENT_PAIR_NP
10234 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10235 {
10236 insnp = ix86_pent_find_pair (e_ready-1, ready,
10237 PENT_PAIR_PV, *e_ready);
10238 if (insnp)
10239 pair2 = PENT_PAIR_PV;
10240 }
10241
10242 /* If the first insn is pairable, search for a UV
10243 insn to go with. */
10244 if (pair2 == PENT_PAIR_NP)
10245 {
10246 insnp = ix86_pent_find_pair (e_ready-1, ready,
10247 PENT_PAIR_UV, *e_ready);
10248 if (insnp)
10249 pair2 = PENT_PAIR_UV;
10250 }
10251
10252 if (pair2 == PENT_PAIR_NP)
10253 return;
10254
10255 /* Found something! Decide if we need to swap the order. */
10256 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10257 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10258 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10259 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10260 ix86_reorder_insn (insnp, e_ready);
10261 else
10262 ix86_reorder_insn (insnp, e_ready - 1);
10263 }
10264
10265 static void
10266 ix86_sched_reorder_ppro (ready, e_ready)
10267 rtx *ready;
10268 rtx *e_ready;
10269 {
10270 rtx decode[3];
10271 enum attr_ppro_uops cur_uops;
10272 int issued_this_cycle;
10273 rtx *insnp;
10274 int i;
10275
10276 /* At this point .ppro.decode contains the state of the three
10277 decoders from last "cycle". That is, those insns that were
10278 actually independent. But here we're scheduling for the
10279 decoder, and we may find things that are decodable in the
10280 same cycle. */
10281
10282 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10283 issued_this_cycle = 0;
10284
10285 insnp = e_ready;
10286 cur_uops = ix86_safe_ppro_uops (*insnp);
10287
10288 /* If the decoders are empty, and we've a complex insn at the
10289 head of the priority queue, let it issue without complaint. */
10290 if (decode[0] == NULL)
10291 {
10292 if (cur_uops == PPRO_UOPS_MANY)
10293 {
10294 decode[0] = *insnp;
10295 goto ppro_done;
10296 }
10297
10298 /* Otherwise, search for a 2-4 uop unsn to issue. */
10299 while (cur_uops != PPRO_UOPS_FEW)
10300 {
10301 if (insnp == ready)
10302 break;
10303 cur_uops = ix86_safe_ppro_uops (*--insnp);
10304 }
10305
10306 /* If so, move it to the head of the line. */
10307 if (cur_uops == PPRO_UOPS_FEW)
10308 ix86_reorder_insn (insnp, e_ready);
10309
10310 /* Issue the head of the queue. */
10311 issued_this_cycle = 1;
10312 decode[0] = *e_ready--;
10313 }
10314
10315 /* Look for simple insns to fill in the other two slots. */
10316 for (i = 1; i < 3; ++i)
10317 if (decode[i] == NULL)
10318 {
10319 if (ready >= e_ready)
10320 goto ppro_done;
10321
10322 insnp = e_ready;
10323 cur_uops = ix86_safe_ppro_uops (*insnp);
10324 while (cur_uops != PPRO_UOPS_ONE)
10325 {
10326 if (insnp == ready)
10327 break;
10328 cur_uops = ix86_safe_ppro_uops (*--insnp);
10329 }
10330
10331 /* Found one. Move it to the head of the queue and issue it. */
10332 if (cur_uops == PPRO_UOPS_ONE)
10333 {
10334 ix86_reorder_insn (insnp, e_ready);
10335 decode[i] = *e_ready--;
10336 issued_this_cycle++;
10337 continue;
10338 }
10339
10340 /* ??? Didn't find one. Ideally, here we would do a lazy split
10341 of 2-uop insns, issue one and queue the other. */
10342 }
10343
10344 ppro_done:
10345 if (issued_this_cycle == 0)
10346 issued_this_cycle = 1;
10347 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10348 }
10349
10350 /* We are about to being issuing insns for this clock cycle.
10351 Override the default sort algorithm to better slot instructions. */
10352 static int
10353 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10354 FILE *dump ATTRIBUTE_UNUSED;
10355 int sched_verbose ATTRIBUTE_UNUSED;
10356 rtx *ready;
10357 int *n_readyp;
10358 int clock_var ATTRIBUTE_UNUSED;
10359 {
10360 int n_ready = *n_readyp;
10361 rtx *e_ready = ready + n_ready - 1;
10362
10363 if (n_ready < 2)
10364 goto out;
10365
10366 switch (ix86_cpu)
10367 {
10368 default:
10369 break;
10370
10371 case PROCESSOR_PENTIUM:
10372 ix86_sched_reorder_pentium (ready, e_ready);
10373 break;
10374
10375 case PROCESSOR_PENTIUMPRO:
10376 ix86_sched_reorder_ppro (ready, e_ready);
10377 break;
10378 }
10379
10380 out:
10381 return ix86_issue_rate ();
10382 }
10383
10384 /* We are about to issue INSN. Return the number of insns left on the
10385 ready queue that can be issued this cycle. */
10386
10387 static int
10388 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10389 FILE *dump;
10390 int sched_verbose;
10391 rtx insn;
10392 int can_issue_more;
10393 {
10394 int i;
10395 switch (ix86_cpu)
10396 {
10397 default:
10398 return can_issue_more - 1;
10399
10400 case PROCESSOR_PENTIUMPRO:
10401 {
10402 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10403
10404 if (uops == PPRO_UOPS_MANY)
10405 {
10406 if (sched_verbose)
10407 ix86_dump_ppro_packet (dump);
10408 ix86_sched_data.ppro.decode[0] = insn;
10409 ix86_sched_data.ppro.decode[1] = NULL;
10410 ix86_sched_data.ppro.decode[2] = NULL;
10411 if (sched_verbose)
10412 ix86_dump_ppro_packet (dump);
10413 ix86_sched_data.ppro.decode[0] = NULL;
10414 }
10415 else if (uops == PPRO_UOPS_FEW)
10416 {
10417 if (sched_verbose)
10418 ix86_dump_ppro_packet (dump);
10419 ix86_sched_data.ppro.decode[0] = insn;
10420 ix86_sched_data.ppro.decode[1] = NULL;
10421 ix86_sched_data.ppro.decode[2] = NULL;
10422 }
10423 else
10424 {
10425 for (i = 0; i < 3; ++i)
10426 if (ix86_sched_data.ppro.decode[i] == NULL)
10427 {
10428 ix86_sched_data.ppro.decode[i] = insn;
10429 break;
10430 }
10431 if (i == 3)
10432 abort ();
10433 if (i == 2)
10434 {
10435 if (sched_verbose)
10436 ix86_dump_ppro_packet (dump);
10437 ix86_sched_data.ppro.decode[0] = NULL;
10438 ix86_sched_data.ppro.decode[1] = NULL;
10439 ix86_sched_data.ppro.decode[2] = NULL;
10440 }
10441 }
10442 }
10443 return --ix86_sched_data.ppro.issued_this_cycle;
10444 }
10445 }
10446 \f
10447 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10448 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10449 appropriate. */
10450
10451 void
10452 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10453 rtx insns;
10454 rtx dstref, srcref, dstreg, srcreg;
10455 {
10456 rtx insn;
10457
10458 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10459 if (INSN_P (insn))
10460 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10461 dstreg, srcreg);
10462 }
10463
10464 /* Subroutine of above to actually do the updating by recursively walking
10465 the rtx. */
10466
10467 static void
10468 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10469 rtx x;
10470 rtx dstref, srcref, dstreg, srcreg;
10471 {
10472 enum rtx_code code = GET_CODE (x);
10473 const char *format_ptr = GET_RTX_FORMAT (code);
10474 int i, j;
10475
10476 if (code == MEM && XEXP (x, 0) == dstreg)
10477 MEM_COPY_ATTRIBUTES (x, dstref);
10478 else if (code == MEM && XEXP (x, 0) == srcreg)
10479 MEM_COPY_ATTRIBUTES (x, srcref);
10480
10481 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10482 {
10483 if (*format_ptr == 'e')
10484 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10485 dstreg, srcreg);
10486 else if (*format_ptr == 'E')
10487 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10488 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10489 dstreg, srcreg);
10490 }
10491 }
10492 \f
10493 /* Compute the alignment given to a constant that is being placed in memory.
10494 EXP is the constant and ALIGN is the alignment that the object would
10495 ordinarily have.
10496 The value of this function is used instead of that alignment to align
10497 the object. */
10498
10499 int
10500 ix86_constant_alignment (exp, align)
10501 tree exp;
10502 int align;
10503 {
10504 if (TREE_CODE (exp) == REAL_CST)
10505 {
10506 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10507 return 64;
10508 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10509 return 128;
10510 }
10511 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10512 && align < 256)
10513 return 256;
10514
10515 return align;
10516 }
10517
10518 /* Compute the alignment for a static variable.
10519 TYPE is the data type, and ALIGN is the alignment that
10520 the object would ordinarily have. The value of this function is used
10521 instead of that alignment to align the object. */
10522
10523 int
10524 ix86_data_alignment (type, align)
10525 tree type;
10526 int align;
10527 {
10528 if (AGGREGATE_TYPE_P (type)
10529 && TYPE_SIZE (type)
10530 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10531 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10532 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10533 return 256;
10534
10535 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10536 to 16byte boundary. */
10537 if (TARGET_64BIT)
10538 {
10539 if (AGGREGATE_TYPE_P (type)
10540 && TYPE_SIZE (type)
10541 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10542 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10543 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10544 return 128;
10545 }
10546
10547 if (TREE_CODE (type) == ARRAY_TYPE)
10548 {
10549 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10550 return 64;
10551 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10552 return 128;
10553 }
10554 else if (TREE_CODE (type) == COMPLEX_TYPE)
10555 {
10556
10557 if (TYPE_MODE (type) == DCmode && align < 64)
10558 return 64;
10559 if (TYPE_MODE (type) == XCmode && align < 128)
10560 return 128;
10561 }
10562 else if ((TREE_CODE (type) == RECORD_TYPE
10563 || TREE_CODE (type) == UNION_TYPE
10564 || TREE_CODE (type) == QUAL_UNION_TYPE)
10565 && TYPE_FIELDS (type))
10566 {
10567 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10568 return 64;
10569 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10570 return 128;
10571 }
10572 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10573 || TREE_CODE (type) == INTEGER_TYPE)
10574 {
10575 if (TYPE_MODE (type) == DFmode && align < 64)
10576 return 64;
10577 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10578 return 128;
10579 }
10580
10581 return align;
10582 }
10583
10584 /* Compute the alignment for a local variable.
10585 TYPE is the data type, and ALIGN is the alignment that
10586 the object would ordinarily have. The value of this macro is used
10587 instead of that alignment to align the object. */
10588
10589 int
10590 ix86_local_alignment (type, align)
10591 tree type;
10592 int align;
10593 {
10594 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10595 to 16byte boundary. */
10596 if (TARGET_64BIT)
10597 {
10598 if (AGGREGATE_TYPE_P (type)
10599 && TYPE_SIZE (type)
10600 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10601 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10602 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10603 return 128;
10604 }
10605 if (TREE_CODE (type) == ARRAY_TYPE)
10606 {
10607 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10608 return 64;
10609 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10610 return 128;
10611 }
10612 else if (TREE_CODE (type) == COMPLEX_TYPE)
10613 {
10614 if (TYPE_MODE (type) == DCmode && align < 64)
10615 return 64;
10616 if (TYPE_MODE (type) == XCmode && align < 128)
10617 return 128;
10618 }
10619 else if ((TREE_CODE (type) == RECORD_TYPE
10620 || TREE_CODE (type) == UNION_TYPE
10621 || TREE_CODE (type) == QUAL_UNION_TYPE)
10622 && TYPE_FIELDS (type))
10623 {
10624 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10625 return 64;
10626 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10627 return 128;
10628 }
10629 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10630 || TREE_CODE (type) == INTEGER_TYPE)
10631 {
10632
10633 if (TYPE_MODE (type) == DFmode && align < 64)
10634 return 64;
10635 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10636 return 128;
10637 }
10638 return align;
10639 }
10640 \f
10641 /* Emit RTL insns to initialize the variable parts of a trampoline.
10642 FNADDR is an RTX for the address of the function's pure code.
10643 CXT is an RTX for the static chain value for the function. */
10644 void
10645 x86_initialize_trampoline (tramp, fnaddr, cxt)
10646 rtx tramp, fnaddr, cxt;
10647 {
10648 if (!TARGET_64BIT)
10649 {
10650 /* Compute offset from the end of the jmp to the target function. */
10651 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10652 plus_constant (tramp, 10),
10653 NULL_RTX, 1, OPTAB_DIRECT);
10654 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10655 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10656 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10657 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10658 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10659 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10660 }
10661 else
10662 {
10663 int offset = 0;
10664 /* Try to load address using shorter movl instead of movabs.
10665 We may want to support movq for kernel mode, but kernel does not use
10666 trampolines at the moment. */
10667 if (x86_64_zero_extended_value (fnaddr))
10668 {
10669 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10670 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10671 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10672 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10673 gen_lowpart (SImode, fnaddr));
10674 offset += 6;
10675 }
10676 else
10677 {
10678 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10679 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10680 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10681 fnaddr);
10682 offset += 10;
10683 }
10684 /* Load static chain using movabs to r10. */
10685 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10686 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10687 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10688 cxt);
10689 offset += 10;
10690 /* Jump to the r11 */
10691 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10692 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10693 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10694 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
10695 offset += 3;
10696 if (offset > TRAMPOLINE_SIZE)
10697 abort();
10698 }
10699 }
10700 \f
10701 #define def_builtin(MASK, NAME, TYPE, CODE) \
10702 do { \
10703 if ((MASK) & target_flags) \
10704 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10705 } while (0)
10706
10707 struct builtin_description
10708 {
10709 unsigned int mask;
10710 enum insn_code icode;
10711 const char * name;
10712 enum ix86_builtins code;
10713 enum rtx_code comparison;
10714 unsigned int flag;
10715 };
10716
10717 static struct builtin_description bdesc_comi[] =
10718 {
10719 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10720 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10721 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10722 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10723 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10724 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10725 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10726 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10727 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10728 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10729 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10730 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10731 };
10732
10733 static struct builtin_description bdesc_2arg[] =
10734 {
10735 /* SSE */
10736 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10737 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10738 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10739 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10740 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10741 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10742 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10743 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10744
10745 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10746 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10747 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10748 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10749 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10750 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10751 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10752 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10753 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10754 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10755 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10756 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10757 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10758 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10759 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10760 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10761 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10762 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10763 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10764 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10765 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10766 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10767 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10768 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10769
10770 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10771 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10772 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10773 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10774
10775 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10776 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10777 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10778 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10779
10780 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10781 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10782 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10783 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10784 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10785
10786 /* MMX */
10787 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10788 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10789 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10790 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10791 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10792 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10793
10794 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10795 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10796 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10797 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10798 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10799 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10800 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10801 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10802
10803 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10804 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10805 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10806
10807 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10808 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10809 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10810 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10811
10812 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10813 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10814
10815 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10816 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10817 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10818 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10819 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10820 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10821
10822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10823 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10824 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10825 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10826
10827 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10828 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10829 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10830 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10831 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10832 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10833
10834 /* Special. */
10835 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10836 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10837 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10838
10839 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10840 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10841
10842 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10843 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10844 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10845 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10846 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10847 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10848
10849 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10850 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10851 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10852 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10853 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10854 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10855
10856 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10857 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10858 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10859 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10860
10861 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10862 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10863
10864 };
10865
10866 static struct builtin_description bdesc_1arg[] =
10867 {
10868 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10869 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10870
10871 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10872 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10873 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10874
10875 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10876 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10877 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10878 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10879
10880 };
10881
10882 void
10883 ix86_init_builtins ()
10884 {
10885 if (TARGET_MMX)
10886 ix86_init_mmx_sse_builtins ();
10887 }
10888
10889 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10890 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10891 builtins. */
10892 void
10893 ix86_init_mmx_sse_builtins ()
10894 {
10895 struct builtin_description * d;
10896 size_t i;
10897 tree endlink = void_list_node;
10898
10899 tree pchar_type_node = build_pointer_type (char_type_node);
10900 tree pfloat_type_node = build_pointer_type (float_type_node);
10901 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10902 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10903
10904 /* Comparisons. */
10905 tree int_ftype_v4sf_v4sf
10906 = build_function_type (integer_type_node,
10907 tree_cons (NULL_TREE, V4SF_type_node,
10908 tree_cons (NULL_TREE,
10909 V4SF_type_node,
10910 endlink)));
10911 tree v4si_ftype_v4sf_v4sf
10912 = build_function_type (V4SI_type_node,
10913 tree_cons (NULL_TREE, V4SF_type_node,
10914 tree_cons (NULL_TREE,
10915 V4SF_type_node,
10916 endlink)));
10917 /* MMX/SSE/integer conversions. */
10918 tree int_ftype_v4sf
10919 = build_function_type (integer_type_node,
10920 tree_cons (NULL_TREE, V4SF_type_node,
10921 endlink));
10922 tree int_ftype_v8qi
10923 = build_function_type (integer_type_node,
10924 tree_cons (NULL_TREE, V8QI_type_node,
10925 endlink));
10926 tree int_ftype_v2si
10927 = build_function_type (integer_type_node,
10928 tree_cons (NULL_TREE, V2SI_type_node,
10929 endlink));
10930 tree v2si_ftype_int
10931 = build_function_type (V2SI_type_node,
10932 tree_cons (NULL_TREE, integer_type_node,
10933 endlink));
10934 tree v4sf_ftype_v4sf_int
10935 = build_function_type (V4SF_type_node,
10936 tree_cons (NULL_TREE, V4SF_type_node,
10937 tree_cons (NULL_TREE, integer_type_node,
10938 endlink)));
10939 tree v4sf_ftype_v4sf_v2si
10940 = build_function_type (V4SF_type_node,
10941 tree_cons (NULL_TREE, V4SF_type_node,
10942 tree_cons (NULL_TREE, V2SI_type_node,
10943 endlink)));
10944 tree int_ftype_v4hi_int
10945 = build_function_type (integer_type_node,
10946 tree_cons (NULL_TREE, V4HI_type_node,
10947 tree_cons (NULL_TREE, integer_type_node,
10948 endlink)));
10949 tree v4hi_ftype_v4hi_int_int
10950 = build_function_type (V4HI_type_node,
10951 tree_cons (NULL_TREE, V4HI_type_node,
10952 tree_cons (NULL_TREE, integer_type_node,
10953 tree_cons (NULL_TREE,
10954 integer_type_node,
10955 endlink))));
10956 /* Miscellaneous. */
10957 tree v8qi_ftype_v4hi_v4hi
10958 = build_function_type (V8QI_type_node,
10959 tree_cons (NULL_TREE, V4HI_type_node,
10960 tree_cons (NULL_TREE, V4HI_type_node,
10961 endlink)));
10962 tree v4hi_ftype_v2si_v2si
10963 = build_function_type (V4HI_type_node,
10964 tree_cons (NULL_TREE, V2SI_type_node,
10965 tree_cons (NULL_TREE, V2SI_type_node,
10966 endlink)));
10967 tree v4sf_ftype_v4sf_v4sf_int
10968 = build_function_type (V4SF_type_node,
10969 tree_cons (NULL_TREE, V4SF_type_node,
10970 tree_cons (NULL_TREE, V4SF_type_node,
10971 tree_cons (NULL_TREE,
10972 integer_type_node,
10973 endlink))));
10974 tree v4hi_ftype_v8qi_v8qi
10975 = build_function_type (V4HI_type_node,
10976 tree_cons (NULL_TREE, V8QI_type_node,
10977 tree_cons (NULL_TREE, V8QI_type_node,
10978 endlink)));
10979 tree v2si_ftype_v4hi_v4hi
10980 = build_function_type (V2SI_type_node,
10981 tree_cons (NULL_TREE, V4HI_type_node,
10982 tree_cons (NULL_TREE, V4HI_type_node,
10983 endlink)));
10984 tree v4hi_ftype_v4hi_int
10985 = build_function_type (V4HI_type_node,
10986 tree_cons (NULL_TREE, V4HI_type_node,
10987 tree_cons (NULL_TREE, integer_type_node,
10988 endlink)));
10989 tree v4hi_ftype_v4hi_di
10990 = build_function_type (V4HI_type_node,
10991 tree_cons (NULL_TREE, V4HI_type_node,
10992 tree_cons (NULL_TREE,
10993 long_long_integer_type_node,
10994 endlink)));
10995 tree v2si_ftype_v2si_di
10996 = build_function_type (V2SI_type_node,
10997 tree_cons (NULL_TREE, V2SI_type_node,
10998 tree_cons (NULL_TREE,
10999 long_long_integer_type_node,
11000 endlink)));
11001 tree void_ftype_void
11002 = build_function_type (void_type_node, endlink);
11003 tree void_ftype_pchar_int
11004 = build_function_type (void_type_node,
11005 tree_cons (NULL_TREE, pchar_type_node,
11006 tree_cons (NULL_TREE, integer_type_node,
11007 endlink)));
11008 tree void_ftype_unsigned
11009 = build_function_type (void_type_node,
11010 tree_cons (NULL_TREE, unsigned_type_node,
11011 endlink));
11012 tree unsigned_ftype_void
11013 = build_function_type (unsigned_type_node, endlink);
11014 tree di_ftype_void
11015 = build_function_type (long_long_unsigned_type_node, endlink);
11016 tree ti_ftype_void
11017 = build_function_type (intTI_type_node, endlink);
11018 tree v2si_ftype_v4sf
11019 = build_function_type (V2SI_type_node,
11020 tree_cons (NULL_TREE, V4SF_type_node,
11021 endlink));
11022 /* Loads/stores. */
11023 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11024 tree_cons (NULL_TREE, V8QI_type_node,
11025 tree_cons (NULL_TREE,
11026 pchar_type_node,
11027 endlink)));
11028 tree void_ftype_v8qi_v8qi_pchar
11029 = build_function_type (void_type_node, maskmovq_args);
11030 tree v4sf_ftype_pfloat
11031 = build_function_type (V4SF_type_node,
11032 tree_cons (NULL_TREE, pfloat_type_node,
11033 endlink));
11034 tree v4sf_ftype_float
11035 = build_function_type (V4SF_type_node,
11036 tree_cons (NULL_TREE, float_type_node,
11037 endlink));
11038 tree v4sf_ftype_float_float_float_float
11039 = build_function_type (V4SF_type_node,
11040 tree_cons (NULL_TREE, float_type_node,
11041 tree_cons (NULL_TREE, float_type_node,
11042 tree_cons (NULL_TREE,
11043 float_type_node,
11044 tree_cons (NULL_TREE,
11045 float_type_node,
11046 endlink)))));
11047 /* @@@ the type is bogus */
11048 tree v4sf_ftype_v4sf_pv2si
11049 = build_function_type (V4SF_type_node,
11050 tree_cons (NULL_TREE, V4SF_type_node,
11051 tree_cons (NULL_TREE, pv2si_type_node,
11052 endlink)));
11053 tree void_ftype_pv2si_v4sf
11054 = build_function_type (void_type_node,
11055 tree_cons (NULL_TREE, pv2si_type_node,
11056 tree_cons (NULL_TREE, V4SF_type_node,
11057 endlink)));
11058 tree void_ftype_pfloat_v4sf
11059 = build_function_type (void_type_node,
11060 tree_cons (NULL_TREE, pfloat_type_node,
11061 tree_cons (NULL_TREE, V4SF_type_node,
11062 endlink)));
11063 tree void_ftype_pdi_di
11064 = build_function_type (void_type_node,
11065 tree_cons (NULL_TREE, pdi_type_node,
11066 tree_cons (NULL_TREE,
11067 long_long_unsigned_type_node,
11068 endlink)));
11069 /* Normal vector unops. */
11070 tree v4sf_ftype_v4sf
11071 = build_function_type (V4SF_type_node,
11072 tree_cons (NULL_TREE, V4SF_type_node,
11073 endlink));
11074
11075 /* Normal vector binops. */
11076 tree v4sf_ftype_v4sf_v4sf
11077 = build_function_type (V4SF_type_node,
11078 tree_cons (NULL_TREE, V4SF_type_node,
11079 tree_cons (NULL_TREE, V4SF_type_node,
11080 endlink)));
11081 tree v8qi_ftype_v8qi_v8qi
11082 = build_function_type (V8QI_type_node,
11083 tree_cons (NULL_TREE, V8QI_type_node,
11084 tree_cons (NULL_TREE, V8QI_type_node,
11085 endlink)));
11086 tree v4hi_ftype_v4hi_v4hi
11087 = build_function_type (V4HI_type_node,
11088 tree_cons (NULL_TREE, V4HI_type_node,
11089 tree_cons (NULL_TREE, V4HI_type_node,
11090 endlink)));
11091 tree v2si_ftype_v2si_v2si
11092 = build_function_type (V2SI_type_node,
11093 tree_cons (NULL_TREE, V2SI_type_node,
11094 tree_cons (NULL_TREE, V2SI_type_node,
11095 endlink)));
11096 tree ti_ftype_ti_ti
11097 = build_function_type (intTI_type_node,
11098 tree_cons (NULL_TREE, intTI_type_node,
11099 tree_cons (NULL_TREE, intTI_type_node,
11100 endlink)));
11101 tree di_ftype_di_di
11102 = build_function_type (long_long_unsigned_type_node,
11103 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11104 tree_cons (NULL_TREE,
11105 long_long_unsigned_type_node,
11106 endlink)));
11107
11108 tree v2si_ftype_v2sf
11109 = build_function_type (V2SI_type_node,
11110 tree_cons (NULL_TREE, V2SF_type_node,
11111 endlink));
11112 tree v2sf_ftype_v2si
11113 = build_function_type (V2SF_type_node,
11114 tree_cons (NULL_TREE, V2SI_type_node,
11115 endlink));
11116 tree v2si_ftype_v2si
11117 = build_function_type (V2SI_type_node,
11118 tree_cons (NULL_TREE, V2SI_type_node,
11119 endlink));
11120 tree v2sf_ftype_v2sf
11121 = build_function_type (V2SF_type_node,
11122 tree_cons (NULL_TREE, V2SF_type_node,
11123 endlink));
11124 tree v2sf_ftype_v2sf_v2sf
11125 = build_function_type (V2SF_type_node,
11126 tree_cons (NULL_TREE, V2SF_type_node,
11127 tree_cons (NULL_TREE,
11128 V2SF_type_node,
11129 endlink)));
11130 tree v2si_ftype_v2sf_v2sf
11131 = build_function_type (V2SI_type_node,
11132 tree_cons (NULL_TREE, V2SF_type_node,
11133 tree_cons (NULL_TREE,
11134 V2SF_type_node,
11135 endlink)));
11136
11137 tree void_ftype_pchar
11138 = build_function_type (void_type_node,
11139 tree_cons (NULL_TREE, pchar_type_node,
11140 endlink));
11141
11142 /* Add all builtins that are more or less simple operations on two
11143 operands. */
11144 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11145 {
11146 /* Use one of the operands; the target can have a different mode for
11147 mask-generating compares. */
11148 enum machine_mode mode;
11149 tree type;
11150
11151 if (d->name == 0)
11152 continue;
11153 mode = insn_data[d->icode].operand[1].mode;
11154
11155 switch (mode)
11156 {
11157 case V4SFmode:
11158 type = v4sf_ftype_v4sf_v4sf;
11159 break;
11160 case V8QImode:
11161 type = v8qi_ftype_v8qi_v8qi;
11162 break;
11163 case V4HImode:
11164 type = v4hi_ftype_v4hi_v4hi;
11165 break;
11166 case V2SImode:
11167 type = v2si_ftype_v2si_v2si;
11168 break;
11169 case TImode:
11170 type = ti_ftype_ti_ti;
11171 break;
11172 case DImode:
11173 type = di_ftype_di_di;
11174 break;
11175
11176 default:
11177 abort ();
11178 }
11179
11180 /* Override for comparisons. */
11181 if (d->icode == CODE_FOR_maskcmpv4sf3
11182 || d->icode == CODE_FOR_maskncmpv4sf3
11183 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11184 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11185 type = v4si_ftype_v4sf_v4sf;
11186
11187 def_builtin (d->mask, d->name, type, d->code);
11188 }
11189
11190 /* Add the remaining MMX insns with somewhat more complicated types. */
11191 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11192 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11193 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11194 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11195 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11196 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11197 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11198 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11199 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11200
11201 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11202 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11203 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11204
11205 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11206 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11207
11208 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11209 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11210
11211 /* comi/ucomi insns. */
11212 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11213 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11214
11215 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11216 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11217 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11218
11219 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11220 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11221 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11222 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11223 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11224 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11225
11226 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11227 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11228
11229 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11230
11231 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11232 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11233 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11234 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11235 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11236 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11237
11238 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11239 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11240 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11241 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11242
11243 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11244 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11245 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11246 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11247
11248 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11249 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11250
11251 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11252
11253 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11254 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11255 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11256 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11257 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11258 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11259
11260 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11261
11262 /* Original 3DNow! */
11263 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11264 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11265 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11266 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11267 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11268 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11269 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11270 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11281 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11282 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11283 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11284 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11285
11286 /* 3DNow! extension as used in the Athlon CPU. */
11287 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11288 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11289 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11291 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11293
11294 /* Composite intrinsics. */
11295 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11296 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11297 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11298 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11299 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11300 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11301 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11302 }
11303
11304 /* Errors in the source file can cause expand_expr to return const0_rtx
11305 where we expect a vector. To avoid crashing, use one of the vector
11306 clear instructions. */
11307 static rtx
11308 safe_vector_operand (x, mode)
11309 rtx x;
11310 enum machine_mode mode;
11311 {
11312 if (x != const0_rtx)
11313 return x;
11314 x = gen_reg_rtx (mode);
11315
11316 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11317 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11318 : gen_rtx_SUBREG (DImode, x, 0)));
11319 else
11320 emit_insn (gen_sse_clrti (mode == TImode ? x
11321 : gen_rtx_SUBREG (TImode, x, 0)));
11322 return x;
11323 }
11324
11325 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11326
11327 static rtx
11328 ix86_expand_binop_builtin (icode, arglist, target)
11329 enum insn_code icode;
11330 tree arglist;
11331 rtx target;
11332 {
11333 rtx pat;
11334 tree arg0 = TREE_VALUE (arglist);
11335 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11336 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11337 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11338 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11339 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11340 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11341
11342 if (VECTOR_MODE_P (mode0))
11343 op0 = safe_vector_operand (op0, mode0);
11344 if (VECTOR_MODE_P (mode1))
11345 op1 = safe_vector_operand (op1, mode1);
11346
11347 if (! target
11348 || GET_MODE (target) != tmode
11349 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11350 target = gen_reg_rtx (tmode);
11351
11352 /* In case the insn wants input operands in modes different from
11353 the result, abort. */
11354 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11355 abort ();
11356
11357 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11358 op0 = copy_to_mode_reg (mode0, op0);
11359 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11360 op1 = copy_to_mode_reg (mode1, op1);
11361
11362 pat = GEN_FCN (icode) (target, op0, op1);
11363 if (! pat)
11364 return 0;
11365 emit_insn (pat);
11366 return target;
11367 }
11368
11369 /* Subroutine of ix86_expand_builtin to take care of stores. */
11370
11371 static rtx
11372 ix86_expand_store_builtin (icode, arglist, shuffle)
11373 enum insn_code icode;
11374 tree arglist;
11375 int shuffle;
11376 {
11377 rtx pat;
11378 tree arg0 = TREE_VALUE (arglist);
11379 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11380 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11381 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11382 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11383 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11384
11385 if (VECTOR_MODE_P (mode1))
11386 op1 = safe_vector_operand (op1, mode1);
11387
11388 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11389 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11390 op1 = copy_to_mode_reg (mode1, op1);
11391 if (shuffle >= 0)
11392 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11393 pat = GEN_FCN (icode) (op0, op1);
11394 if (pat)
11395 emit_insn (pat);
11396 return 0;
11397 }
11398
11399 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11400
11401 static rtx
11402 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11403 enum insn_code icode;
11404 tree arglist;
11405 rtx target;
11406 int do_load;
11407 {
11408 rtx pat;
11409 tree arg0 = TREE_VALUE (arglist);
11410 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11411 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11412 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11413
11414 if (! target
11415 || GET_MODE (target) != tmode
11416 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11417 target = gen_reg_rtx (tmode);
11418 if (do_load)
11419 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11420 else
11421 {
11422 if (VECTOR_MODE_P (mode0))
11423 op0 = safe_vector_operand (op0, mode0);
11424
11425 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11426 op0 = copy_to_mode_reg (mode0, op0);
11427 }
11428
11429 pat = GEN_FCN (icode) (target, op0);
11430 if (! pat)
11431 return 0;
11432 emit_insn (pat);
11433 return target;
11434 }
11435
11436 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11437 sqrtss, rsqrtss, rcpss. */
11438
11439 static rtx
11440 ix86_expand_unop1_builtin (icode, arglist, target)
11441 enum insn_code icode;
11442 tree arglist;
11443 rtx target;
11444 {
11445 rtx pat;
11446 tree arg0 = TREE_VALUE (arglist);
11447 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11448 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11449 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11450
11451 if (! target
11452 || GET_MODE (target) != tmode
11453 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11454 target = gen_reg_rtx (tmode);
11455
11456 if (VECTOR_MODE_P (mode0))
11457 op0 = safe_vector_operand (op0, mode0);
11458
11459 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11460 op0 = copy_to_mode_reg (mode0, op0);
11461
11462 pat = GEN_FCN (icode) (target, op0, op0);
11463 if (! pat)
11464 return 0;
11465 emit_insn (pat);
11466 return target;
11467 }
11468
11469 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11470
11471 static rtx
11472 ix86_expand_sse_compare (d, arglist, target)
11473 struct builtin_description *d;
11474 tree arglist;
11475 rtx target;
11476 {
11477 rtx pat;
11478 tree arg0 = TREE_VALUE (arglist);
11479 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11480 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11481 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11482 rtx op2;
11483 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11484 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11485 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11486 enum rtx_code comparison = d->comparison;
11487
11488 if (VECTOR_MODE_P (mode0))
11489 op0 = safe_vector_operand (op0, mode0);
11490 if (VECTOR_MODE_P (mode1))
11491 op1 = safe_vector_operand (op1, mode1);
11492
11493 /* Swap operands if we have a comparison that isn't available in
11494 hardware. */
11495 if (d->flag)
11496 {
11497 rtx tmp = gen_reg_rtx (mode1);
11498 emit_move_insn (tmp, op1);
11499 op1 = op0;
11500 op0 = tmp;
11501 }
11502
11503 if (! target
11504 || GET_MODE (target) != tmode
11505 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11506 target = gen_reg_rtx (tmode);
11507
11508 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11509 op0 = copy_to_mode_reg (mode0, op0);
11510 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11511 op1 = copy_to_mode_reg (mode1, op1);
11512
11513 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11514 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11515 if (! pat)
11516 return 0;
11517 emit_insn (pat);
11518 return target;
11519 }
11520
11521 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11522
11523 static rtx
11524 ix86_expand_sse_comi (d, arglist, target)
11525 struct builtin_description *d;
11526 tree arglist;
11527 rtx target;
11528 {
11529 rtx pat;
11530 tree arg0 = TREE_VALUE (arglist);
11531 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11532 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11533 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11534 rtx op2;
11535 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11536 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11537 enum rtx_code comparison = d->comparison;
11538
11539 if (VECTOR_MODE_P (mode0))
11540 op0 = safe_vector_operand (op0, mode0);
11541 if (VECTOR_MODE_P (mode1))
11542 op1 = safe_vector_operand (op1, mode1);
11543
11544 /* Swap operands if we have a comparison that isn't available in
11545 hardware. */
11546 if (d->flag)
11547 {
11548 rtx tmp = op1;
11549 op1 = op0;
11550 op0 = tmp;
11551 }
11552
11553 target = gen_reg_rtx (SImode);
11554 emit_move_insn (target, const0_rtx);
11555 target = gen_rtx_SUBREG (QImode, target, 0);
11556
11557 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11558 op0 = copy_to_mode_reg (mode0, op0);
11559 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11560 op1 = copy_to_mode_reg (mode1, op1);
11561
11562 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11563 pat = GEN_FCN (d->icode) (op0, op1, op2);
11564 if (! pat)
11565 return 0;
11566 emit_insn (pat);
11567 emit_insn (gen_setcc_2 (target, op2));
11568
11569 return target;
11570 }
11571
11572 /* Expand an expression EXP that calls a built-in function,
11573 with result going to TARGET if that's convenient
11574 (and in mode MODE if that's convenient).
11575 SUBTARGET may be used as the target for computing one of EXP's operands.
11576 IGNORE is nonzero if the value is to be ignored. */
11577
11578 rtx
11579 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11580 tree exp;
11581 rtx target;
11582 rtx subtarget ATTRIBUTE_UNUSED;
11583 enum machine_mode mode ATTRIBUTE_UNUSED;
11584 int ignore ATTRIBUTE_UNUSED;
11585 {
11586 struct builtin_description *d;
11587 size_t i;
11588 enum insn_code icode;
11589 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11590 tree arglist = TREE_OPERAND (exp, 1);
11591 tree arg0, arg1, arg2, arg3;
11592 rtx op0, op1, op2, pat;
11593 enum machine_mode tmode, mode0, mode1, mode2;
11594 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11595
11596 switch (fcode)
11597 {
11598 case IX86_BUILTIN_EMMS:
11599 emit_insn (gen_emms ());
11600 return 0;
11601
11602 case IX86_BUILTIN_SFENCE:
11603 emit_insn (gen_sfence ());
11604 return 0;
11605
11606 case IX86_BUILTIN_M_FROM_INT:
11607 target = gen_reg_rtx (DImode);
11608 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11609 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11610 return target;
11611
11612 case IX86_BUILTIN_M_TO_INT:
11613 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11614 op0 = copy_to_mode_reg (DImode, op0);
11615 target = gen_reg_rtx (SImode);
11616 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11617 return target;
11618
11619 case IX86_BUILTIN_PEXTRW:
11620 icode = CODE_FOR_mmx_pextrw;
11621 arg0 = TREE_VALUE (arglist);
11622 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11623 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11624 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11625 tmode = insn_data[icode].operand[0].mode;
11626 mode0 = insn_data[icode].operand[1].mode;
11627 mode1 = insn_data[icode].operand[2].mode;
11628
11629 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11630 op0 = copy_to_mode_reg (mode0, op0);
11631 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11632 {
11633 /* @@@ better error message */
11634 error ("selector must be an immediate");
11635 return const0_rtx;
11636 }
11637 if (target == 0
11638 || GET_MODE (target) != tmode
11639 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11640 target = gen_reg_rtx (tmode);
11641 pat = GEN_FCN (icode) (target, op0, op1);
11642 if (! pat)
11643 return 0;
11644 emit_insn (pat);
11645 return target;
11646
11647 case IX86_BUILTIN_PINSRW:
11648 icode = CODE_FOR_mmx_pinsrw;
11649 arg0 = TREE_VALUE (arglist);
11650 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11651 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11652 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11653 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11654 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11655 tmode = insn_data[icode].operand[0].mode;
11656 mode0 = insn_data[icode].operand[1].mode;
11657 mode1 = insn_data[icode].operand[2].mode;
11658 mode2 = insn_data[icode].operand[3].mode;
11659
11660 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11661 op0 = copy_to_mode_reg (mode0, op0);
11662 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11663 op1 = copy_to_mode_reg (mode1, op1);
11664 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11665 {
11666 /* @@@ better error message */
11667 error ("selector must be an immediate");
11668 return const0_rtx;
11669 }
11670 if (target == 0
11671 || GET_MODE (target) != tmode
11672 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11673 target = gen_reg_rtx (tmode);
11674 pat = GEN_FCN (icode) (target, op0, op1, op2);
11675 if (! pat)
11676 return 0;
11677 emit_insn (pat);
11678 return target;
11679
11680 case IX86_BUILTIN_MASKMOVQ:
11681 icode = CODE_FOR_mmx_maskmovq;
11682 /* Note the arg order is different from the operand order. */
11683 arg1 = TREE_VALUE (arglist);
11684 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11685 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11686 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11687 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11688 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11689 mode0 = insn_data[icode].operand[0].mode;
11690 mode1 = insn_data[icode].operand[1].mode;
11691 mode2 = insn_data[icode].operand[2].mode;
11692
11693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11694 op0 = copy_to_mode_reg (mode0, op0);
11695 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11696 op1 = copy_to_mode_reg (mode1, op1);
11697 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11698 op2 = copy_to_mode_reg (mode2, op2);
11699 pat = GEN_FCN (icode) (op0, op1, op2);
11700 if (! pat)
11701 return 0;
11702 emit_insn (pat);
11703 return 0;
11704
11705 case IX86_BUILTIN_SQRTSS:
11706 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11707 case IX86_BUILTIN_RSQRTSS:
11708 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11709 case IX86_BUILTIN_RCPSS:
11710 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11711
11712 case IX86_BUILTIN_LOADAPS:
11713 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11714
11715 case IX86_BUILTIN_LOADUPS:
11716 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11717
11718 case IX86_BUILTIN_STOREAPS:
11719 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11720 case IX86_BUILTIN_STOREUPS:
11721 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11722
11723 case IX86_BUILTIN_LOADSS:
11724 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11725
11726 case IX86_BUILTIN_STORESS:
11727 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11728
11729 case IX86_BUILTIN_LOADHPS:
11730 case IX86_BUILTIN_LOADLPS:
11731 icode = (fcode == IX86_BUILTIN_LOADHPS
11732 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11733 arg0 = TREE_VALUE (arglist);
11734 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11735 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11736 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11737 tmode = insn_data[icode].operand[0].mode;
11738 mode0 = insn_data[icode].operand[1].mode;
11739 mode1 = insn_data[icode].operand[2].mode;
11740
11741 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11742 op0 = copy_to_mode_reg (mode0, op0);
11743 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11744 if (target == 0
11745 || GET_MODE (target) != tmode
11746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11747 target = gen_reg_rtx (tmode);
11748 pat = GEN_FCN (icode) (target, op0, op1);
11749 if (! pat)
11750 return 0;
11751 emit_insn (pat);
11752 return target;
11753
11754 case IX86_BUILTIN_STOREHPS:
11755 case IX86_BUILTIN_STORELPS:
11756 icode = (fcode == IX86_BUILTIN_STOREHPS
11757 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11758 arg0 = TREE_VALUE (arglist);
11759 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11760 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11761 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11762 mode0 = insn_data[icode].operand[1].mode;
11763 mode1 = insn_data[icode].operand[2].mode;
11764
11765 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11766 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11767 op1 = copy_to_mode_reg (mode1, op1);
11768
11769 pat = GEN_FCN (icode) (op0, op0, op1);
11770 if (! pat)
11771 return 0;
11772 emit_insn (pat);
11773 return 0;
11774
11775 case IX86_BUILTIN_MOVNTPS:
11776 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11777 case IX86_BUILTIN_MOVNTQ:
11778 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11779
11780 case IX86_BUILTIN_LDMXCSR:
11781 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11782 target = assign_386_stack_local (SImode, 0);
11783 emit_move_insn (target, op0);
11784 emit_insn (gen_ldmxcsr (target));
11785 return 0;
11786
11787 case IX86_BUILTIN_STMXCSR:
11788 target = assign_386_stack_local (SImode, 0);
11789 emit_insn (gen_stmxcsr (target));
11790 return copy_to_mode_reg (SImode, target);
11791
11792 case IX86_BUILTIN_PREFETCH:
11793 icode = CODE_FOR_prefetch;
11794 arg0 = TREE_VALUE (arglist);
11795 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11796 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11797 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11798 mode0 = insn_data[icode].operand[0].mode;
11799 mode1 = insn_data[icode].operand[1].mode;
11800
11801 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11802 {
11803 /* @@@ better error message */
11804 error ("selector must be an immediate");
11805 return const0_rtx;
11806 }
11807
11808 op0 = copy_to_mode_reg (Pmode, op0);
11809 pat = GEN_FCN (icode) (op0, op1);
11810 if (! pat)
11811 return 0;
11812 emit_insn (pat);
11813 return target;
11814
11815 case IX86_BUILTIN_SHUFPS:
11816 icode = CODE_FOR_sse_shufps;
11817 arg0 = TREE_VALUE (arglist);
11818 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11819 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11820 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11821 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11822 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11823 tmode = insn_data[icode].operand[0].mode;
11824 mode0 = insn_data[icode].operand[1].mode;
11825 mode1 = insn_data[icode].operand[2].mode;
11826 mode2 = insn_data[icode].operand[3].mode;
11827
11828 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11829 op0 = copy_to_mode_reg (mode0, op0);
11830 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11831 op1 = copy_to_mode_reg (mode1, op1);
11832 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11833 {
11834 /* @@@ better error message */
11835 error ("mask must be an immediate");
11836 return const0_rtx;
11837 }
11838 if (target == 0
11839 || GET_MODE (target) != tmode
11840 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11841 target = gen_reg_rtx (tmode);
11842 pat = GEN_FCN (icode) (target, op0, op1, op2);
11843 if (! pat)
11844 return 0;
11845 emit_insn (pat);
11846 return target;
11847
11848 case IX86_BUILTIN_PSHUFW:
11849 icode = CODE_FOR_mmx_pshufw;
11850 arg0 = TREE_VALUE (arglist);
11851 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11852 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11853 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11854 tmode = insn_data[icode].operand[0].mode;
11855 mode0 = insn_data[icode].operand[2].mode;
11856 mode1 = insn_data[icode].operand[3].mode;
11857
11858 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11859 op0 = copy_to_mode_reg (mode0, op0);
11860 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
11861 {
11862 /* @@@ better error message */
11863 error ("mask must be an immediate");
11864 return const0_rtx;
11865 }
11866 if (target == 0
11867 || GET_MODE (target) != tmode
11868 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11869 target = gen_reg_rtx (tmode);
11870 pat = GEN_FCN (icode) (target, target, op0, op1);
11871 if (! pat)
11872 return 0;
11873 emit_insn (pat);
11874 return target;
11875
11876 case IX86_BUILTIN_FEMMS:
11877 emit_insn (gen_femms ());
11878 return NULL_RTX;
11879
11880 case IX86_BUILTIN_PAVGUSB:
11881 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11882
11883 case IX86_BUILTIN_PF2ID:
11884 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11885
11886 case IX86_BUILTIN_PFACC:
11887 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11888
11889 case IX86_BUILTIN_PFADD:
11890 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11891
11892 case IX86_BUILTIN_PFCMPEQ:
11893 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11894
11895 case IX86_BUILTIN_PFCMPGE:
11896 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11897
11898 case IX86_BUILTIN_PFCMPGT:
11899 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11900
11901 case IX86_BUILTIN_PFMAX:
11902 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11903
11904 case IX86_BUILTIN_PFMIN:
11905 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11906
11907 case IX86_BUILTIN_PFMUL:
11908 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11909
11910 case IX86_BUILTIN_PFRCP:
11911 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11912
11913 case IX86_BUILTIN_PFRCPIT1:
11914 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11915
11916 case IX86_BUILTIN_PFRCPIT2:
11917 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11918
11919 case IX86_BUILTIN_PFRSQIT1:
11920 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11921
11922 case IX86_BUILTIN_PFRSQRT:
11923 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11924
11925 case IX86_BUILTIN_PFSUB:
11926 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11927
11928 case IX86_BUILTIN_PFSUBR:
11929 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11930
11931 case IX86_BUILTIN_PI2FD:
11932 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11933
11934 case IX86_BUILTIN_PMULHRW:
11935 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11936
11937 case IX86_BUILTIN_PREFETCH_3DNOW:
11938 icode = CODE_FOR_prefetch_3dnow;
11939 arg0 = TREE_VALUE (arglist);
11940 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11941 mode0 = insn_data[icode].operand[0].mode;
11942 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11943 if (! pat)
11944 return NULL_RTX;
11945 emit_insn (pat);
11946 return NULL_RTX;
11947
11948 case IX86_BUILTIN_PREFETCHW:
11949 icode = CODE_FOR_prefetchw;
11950 arg0 = TREE_VALUE (arglist);
11951 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11952 mode0 = insn_data[icode].operand[0].mode;
11953 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11954 if (! pat)
11955 return NULL_RTX;
11956 emit_insn (pat);
11957 return NULL_RTX;
11958
11959 case IX86_BUILTIN_PF2IW:
11960 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11961
11962 case IX86_BUILTIN_PFNACC:
11963 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11964
11965 case IX86_BUILTIN_PFPNACC:
11966 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11967
11968 case IX86_BUILTIN_PI2FW:
11969 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11970
11971 case IX86_BUILTIN_PSWAPDSI:
11972 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11973
11974 case IX86_BUILTIN_PSWAPDSF:
11975 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11976
11977 /* Composite intrinsics. */
11978 case IX86_BUILTIN_SETPS1:
11979 target = assign_386_stack_local (SFmode, 0);
11980 arg0 = TREE_VALUE (arglist);
11981 emit_move_insn (adjust_address (target, SFmode, 0),
11982 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11983 op0 = gen_reg_rtx (V4SFmode);
11984 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
11985 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
11986 return op0;
11987
11988 case IX86_BUILTIN_SETPS:
11989 target = assign_386_stack_local (V4SFmode, 0);
11990 arg0 = TREE_VALUE (arglist);
11991 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11992 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11993 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
11994 emit_move_insn (adjust_address (target, SFmode, 0),
11995 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11996 emit_move_insn (adjust_address (target, SFmode, 4),
11997 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
11998 emit_move_insn (adjust_address (target, SFmode, 8),
11999 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
12000 emit_move_insn (adjust_address (target, SFmode, 12),
12001 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
12002 op0 = gen_reg_rtx (V4SFmode);
12003 emit_insn (gen_sse_movaps (op0, target));
12004 return op0;
12005
12006 case IX86_BUILTIN_CLRPS:
12007 target = gen_reg_rtx (TImode);
12008 emit_insn (gen_sse_clrti (target));
12009 return target;
12010
12011 case IX86_BUILTIN_LOADRPS:
12012 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
12013 gen_reg_rtx (V4SFmode), 1);
12014 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
12015 return target;
12016
12017 case IX86_BUILTIN_LOADPS1:
12018 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
12019 gen_reg_rtx (V4SFmode), 1);
12020 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
12021 return target;
12022
12023 case IX86_BUILTIN_STOREPS1:
12024 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
12025 case IX86_BUILTIN_STORERPS:
12026 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
12027
12028 case IX86_BUILTIN_MMX_ZERO:
12029 target = gen_reg_rtx (DImode);
12030 emit_insn (gen_mmx_clrdi (target));
12031 return target;
12032
12033 default:
12034 break;
12035 }
12036
12037 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12038 if (d->code == fcode)
12039 {
12040 /* Compares are treated specially. */
12041 if (d->icode == CODE_FOR_maskcmpv4sf3
12042 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12043 || d->icode == CODE_FOR_maskncmpv4sf3
12044 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12045 return ix86_expand_sse_compare (d, arglist, target);
12046
12047 return ix86_expand_binop_builtin (d->icode, arglist, target);
12048 }
12049
12050 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12051 if (d->code == fcode)
12052 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12053
12054 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12055 if (d->code == fcode)
12056 return ix86_expand_sse_comi (d, arglist, target);
12057
12058 /* @@@ Should really do something sensible here. */
12059 return 0;
12060 }
12061
12062 /* Store OPERAND to the memory after reload is completed. This means
12063 that we can't easilly use assign_stack_local. */
12064 rtx
12065 ix86_force_to_memory (mode, operand)
12066 enum machine_mode mode;
12067 rtx operand;
12068 {
12069 rtx result;
12070 if (!reload_completed)
12071 abort ();
12072 if (TARGET_64BIT && TARGET_RED_ZONE)
12073 {
12074 result = gen_rtx_MEM (mode,
12075 gen_rtx_PLUS (Pmode,
12076 stack_pointer_rtx,
12077 GEN_INT (-RED_ZONE_SIZE)));
12078 emit_move_insn (result, operand);
12079 }
12080 else if (TARGET_64BIT && !TARGET_RED_ZONE)
12081 {
12082 switch (mode)
12083 {
12084 case HImode:
12085 case SImode:
12086 operand = gen_lowpart (DImode, operand);
12087 /* FALLTHRU */
12088 case DImode:
12089 emit_insn (
12090 gen_rtx_SET (VOIDmode,
12091 gen_rtx_MEM (DImode,
12092 gen_rtx_PRE_DEC (DImode,
12093 stack_pointer_rtx)),
12094 operand));
12095 break;
12096 default:
12097 abort ();
12098 }
12099 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12100 }
12101 else
12102 {
12103 switch (mode)
12104 {
12105 case DImode:
12106 {
12107 rtx operands[2];
12108 split_di (&operand, 1, operands, operands + 1);
12109 emit_insn (
12110 gen_rtx_SET (VOIDmode,
12111 gen_rtx_MEM (SImode,
12112 gen_rtx_PRE_DEC (Pmode,
12113 stack_pointer_rtx)),
12114 operands[1]));
12115 emit_insn (
12116 gen_rtx_SET (VOIDmode,
12117 gen_rtx_MEM (SImode,
12118 gen_rtx_PRE_DEC (Pmode,
12119 stack_pointer_rtx)),
12120 operands[0]));
12121 }
12122 break;
12123 case HImode:
12124 /* It is better to store HImodes as SImodes. */
12125 if (!TARGET_PARTIAL_REG_STALL)
12126 operand = gen_lowpart (SImode, operand);
12127 /* FALLTHRU */
12128 case SImode:
12129 emit_insn (
12130 gen_rtx_SET (VOIDmode,
12131 gen_rtx_MEM (GET_MODE (operand),
12132 gen_rtx_PRE_DEC (SImode,
12133 stack_pointer_rtx)),
12134 operand));
12135 break;
12136 default:
12137 abort ();
12138 }
12139 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12140 }
12141 return result;
12142 }
12143
12144 /* Free operand from the memory. */
12145 void
12146 ix86_free_from_memory (mode)
12147 enum machine_mode mode;
12148 {
12149 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12150 {
12151 int size;
12152
12153 if (mode == DImode || TARGET_64BIT)
12154 size = 8;
12155 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12156 size = 2;
12157 else
12158 size = 4;
12159 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12160 to pop or add instruction if registers are available. */
12161 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12162 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12163 GEN_INT (size))));
12164 }
12165 }
12166
12167 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12168 QImode must go into class Q_REGS.
12169 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12170 movdf to do mem-to-mem moves through integer regs. */
12171 enum reg_class
12172 ix86_preferred_reload_class (x, class)
12173 rtx x;
12174 enum reg_class class;
12175 {
12176 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12177 {
12178 /* SSE can't load any constant directly yet. */
12179 if (SSE_CLASS_P (class))
12180 return NO_REGS;
12181 /* Floats can load 0 and 1. */
12182 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12183 {
12184 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12185 if (MAYBE_SSE_CLASS_P (class))
12186 return (reg_class_subset_p (class, GENERAL_REGS)
12187 ? GENERAL_REGS : FLOAT_REGS);
12188 else
12189 return class;
12190 }
12191 /* General regs can load everything. */
12192 if (reg_class_subset_p (class, GENERAL_REGS))
12193 return GENERAL_REGS;
12194 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12195 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12196 return NO_REGS;
12197 }
12198 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12199 return NO_REGS;
12200 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12201 return Q_REGS;
12202 return class;
12203 }
12204
12205 /* If we are copying between general and FP registers, we need a memory
12206 location. The same is true for SSE and MMX registers.
12207
12208 The macro can't work reliably when one of the CLASSES is class containing
12209 registers from multiple units (SSE, MMX, integer). We avoid this by never
12210 combining those units in single alternative in the machine description.
12211 Ensure that this constraint holds to avoid unexpected surprises.
12212
12213 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12214 enforce these sanity checks. */
12215 int
12216 ix86_secondary_memory_needed (class1, class2, mode, strict)
12217 enum reg_class class1, class2;
12218 enum machine_mode mode;
12219 int strict;
12220 {
12221 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12222 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12223 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12224 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12225 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12226 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12227 {
12228 if (strict)
12229 abort ();
12230 else
12231 return 1;
12232 }
12233 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12234 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12235 && (mode) != SImode)
12236 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12237 && (mode) != SImode));
12238 }
12239 /* Return the cost of moving data from a register in class CLASS1 to
12240 one in class CLASS2.
12241
12242 It is not required that the cost always equal 2 when FROM is the same as TO;
12243 on some machines it is expensive to move between registers if they are not
12244 general registers. */
12245 int
12246 ix86_register_move_cost (mode, class1, class2)
12247 enum machine_mode mode;
12248 enum reg_class class1, class2;
12249 {
12250 /* In case we require secondary memory, compute cost of the store followed
12251 by load. In case of copying from general_purpose_register we may emit
12252 multiple stores followed by single load causing memory size mismatch
12253 stall. Count this as arbitarily high cost of 20. */
12254 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12255 {
12256 int add_cost = 0;
12257 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12258 add_cost = 20;
12259 return (MEMORY_MOVE_COST (mode, class1, 0)
12260 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12261 }
12262 /* Moves between SSE/MMX and integer unit are expensive. */
12263 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12264 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12265 return ix86_cost->mmxsse_to_integer;
12266 if (MAYBE_FLOAT_CLASS_P (class1))
12267 return ix86_cost->fp_move;
12268 if (MAYBE_SSE_CLASS_P (class1))
12269 return ix86_cost->sse_move;
12270 if (MAYBE_MMX_CLASS_P (class1))
12271 return ix86_cost->mmx_move;
12272 return 2;
12273 }
12274
12275 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12276 int
12277 ix86_hard_regno_mode_ok (regno, mode)
12278 int regno;
12279 enum machine_mode mode;
12280 {
12281 /* Flags and only flags can only hold CCmode values. */
12282 if (CC_REGNO_P (regno))
12283 return GET_MODE_CLASS (mode) == MODE_CC;
12284 if (GET_MODE_CLASS (mode) == MODE_CC
12285 || GET_MODE_CLASS (mode) == MODE_RANDOM
12286 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12287 return 0;
12288 if (FP_REGNO_P (regno))
12289 return VALID_FP_MODE_P (mode);
12290 if (SSE_REGNO_P (regno))
12291 return VALID_SSE_REG_MODE (mode);
12292 if (MMX_REGNO_P (regno))
12293 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12294 /* We handle both integer and floats in the general purpose registers.
12295 In future we should be able to handle vector modes as well. */
12296 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12297 return 0;
12298 /* Take care for QImode values - they can be in non-QI regs, but then
12299 they do cause partial register stalls. */
12300 if (regno < 4 || mode != QImode || TARGET_64BIT)
12301 return 1;
12302 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12303 }
12304
12305 /* Return the cost of moving data of mode M between a
12306 register and memory. A value of 2 is the default; this cost is
12307 relative to those in `REGISTER_MOVE_COST'.
12308
12309 If moving between registers and memory is more expensive than
12310 between two registers, you should define this macro to express the
12311 relative cost.
12312
12313 Model also increased moving costs of QImode registers in non
12314 Q_REGS classes.
12315 */
12316 int
12317 ix86_memory_move_cost (mode, class, in)
12318 enum machine_mode mode;
12319 enum reg_class class;
12320 int in;
12321 {
12322 if (FLOAT_CLASS_P (class))
12323 {
12324 int index;
12325 switch (mode)
12326 {
12327 case SFmode:
12328 index = 0;
12329 break;
12330 case DFmode:
12331 index = 1;
12332 break;
12333 case XFmode:
12334 case TFmode:
12335 index = 2;
12336 break;
12337 default:
12338 return 100;
12339 }
12340 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12341 }
12342 if (SSE_CLASS_P (class))
12343 {
12344 int index;
12345 switch (GET_MODE_SIZE (mode))
12346 {
12347 case 4:
12348 index = 0;
12349 break;
12350 case 8:
12351 index = 1;
12352 break;
12353 case 16:
12354 index = 2;
12355 break;
12356 default:
12357 return 100;
12358 }
12359 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12360 }
12361 if (MMX_CLASS_P (class))
12362 {
12363 int index;
12364 switch (GET_MODE_SIZE (mode))
12365 {
12366 case 4:
12367 index = 0;
12368 break;
12369 case 8:
12370 index = 1;
12371 break;
12372 default:
12373 return 100;
12374 }
12375 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12376 }
12377 switch (GET_MODE_SIZE (mode))
12378 {
12379 case 1:
12380 if (in)
12381 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12382 : ix86_cost->movzbl_load);
12383 else
12384 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12385 : ix86_cost->int_store[0] + 4);
12386 break;
12387 case 2:
12388 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12389 default:
12390 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12391 if (mode == TFmode)
12392 mode = XFmode;
12393 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12394 * (int) GET_MODE_SIZE (mode) / 4);
12395 }
12396 }
12397
12398 #ifdef DO_GLOBAL_CTORS_BODY
12399 static void
12400 ix86_svr3_asm_out_constructor (symbol, priority)
12401 rtx symbol;
12402 int priority ATTRIBUTE_UNUSED;
12403 {
12404 init_section ();
12405 fputs ("\tpushl $", asm_out_file);
12406 assemble_name (asm_out_file, XSTR (symbol, 0));
12407 fputc ('\n', asm_out_file);
12408 }
12409 #endif
12410
12411 #if defined(TARGET_ELF) && defined(TARGET_COFF)
12412 static void
12413 sco_asm_named_section (name, flags)
12414 const char *name;
12415 unsigned int flags;
12416 {
12417 if (TARGET_ELF)
12418 default_elf_asm_named_section (name, flags);
12419 else
12420 default_coff_asm_named_section (name, flags);
12421 }
12422
12423 static void
12424 sco_asm_out_constructor (symbol, priority)
12425 rtx symbol;
12426 int priority;
12427 {
12428 if (TARGET_ELF)
12429 default_named_section_asm_out_constrctor (symbol, priority);
12430 else
12431 ix86_svr3_asm_out_constructor (symbol, priority);
12432 }
12433 #endif