]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/alpha/alpha.c
c55835e7fe19596b51ebc21b7d081ae5035fe8df
[thirdparty/gcc.git] / gcc / config / alpha / alpha.c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2013 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "calls.h"
30 #include "varasm.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "reload.h"
42 #include "obstack.h"
43 #include "except.h"
44 #include "function.h"
45 #include "diagnostic-core.h"
46 #include "ggc.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "common/common-target.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "splay-tree.h"
54 #include "gimple.h"
55 #include "gimplify.h"
56 #include "gimple-ssa.h"
57 #include "stringpool.h"
58 #include "tree-ssanames.h"
59 #include "tree-stdarg.h"
60 #include "tm-constrs.h"
61 #include "df.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "params.h"
65
66 /* Specify which cpu to schedule for. */
67 enum processor_type alpha_tune;
68
69 /* Which cpu we're generating code for. */
70 enum processor_type alpha_cpu;
71
72 static const char * const alpha_cpu_name[] =
73 {
74 "ev4", "ev5", "ev6"
75 };
76
77 /* Specify how accurate floating-point traps need to be. */
78
79 enum alpha_trap_precision alpha_tp;
80
81 /* Specify the floating-point rounding mode. */
82
83 enum alpha_fp_rounding_mode alpha_fprm;
84
85 /* Specify which things cause traps. */
86
87 enum alpha_fp_trap_mode alpha_fptm;
88
89 /* Nonzero if inside of a function, because the Alpha asm can't
90 handle .files inside of functions. */
91
92 static int inside_function = FALSE;
93
94 /* The number of cycles of latency we should assume on memory reads. */
95
96 int alpha_memory_latency = 3;
97
98 /* Whether the function needs the GP. */
99
100 static int alpha_function_needs_gp;
101
102 /* The assembler name of the current function. */
103
104 static const char *alpha_fnname;
105
106 /* The next explicit relocation sequence number. */
107 extern GTY(()) int alpha_next_sequence_number;
108 int alpha_next_sequence_number = 1;
109
110 /* The literal and gpdisp sequence numbers for this insn, as printed
111 by %# and %* respectively. */
112 extern GTY(()) int alpha_this_literal_sequence_number;
113 extern GTY(()) int alpha_this_gpdisp_sequence_number;
114 int alpha_this_literal_sequence_number;
115 int alpha_this_gpdisp_sequence_number;
116
117 /* Costs of various operations on the different architectures. */
118
119 struct alpha_rtx_cost_data
120 {
121 unsigned char fp_add;
122 unsigned char fp_mult;
123 unsigned char fp_div_sf;
124 unsigned char fp_div_df;
125 unsigned char int_mult_si;
126 unsigned char int_mult_di;
127 unsigned char int_shift;
128 unsigned char int_cmov;
129 unsigned short int_div;
130 };
131
132 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
133 {
134 { /* EV4 */
135 COSTS_N_INSNS (6), /* fp_add */
136 COSTS_N_INSNS (6), /* fp_mult */
137 COSTS_N_INSNS (34), /* fp_div_sf */
138 COSTS_N_INSNS (63), /* fp_div_df */
139 COSTS_N_INSNS (23), /* int_mult_si */
140 COSTS_N_INSNS (23), /* int_mult_di */
141 COSTS_N_INSNS (2), /* int_shift */
142 COSTS_N_INSNS (2), /* int_cmov */
143 COSTS_N_INSNS (97), /* int_div */
144 },
145 { /* EV5 */
146 COSTS_N_INSNS (4), /* fp_add */
147 COSTS_N_INSNS (4), /* fp_mult */
148 COSTS_N_INSNS (15), /* fp_div_sf */
149 COSTS_N_INSNS (22), /* fp_div_df */
150 COSTS_N_INSNS (8), /* int_mult_si */
151 COSTS_N_INSNS (12), /* int_mult_di */
152 COSTS_N_INSNS (1) + 1, /* int_shift */
153 COSTS_N_INSNS (1), /* int_cmov */
154 COSTS_N_INSNS (83), /* int_div */
155 },
156 { /* EV6 */
157 COSTS_N_INSNS (4), /* fp_add */
158 COSTS_N_INSNS (4), /* fp_mult */
159 COSTS_N_INSNS (12), /* fp_div_sf */
160 COSTS_N_INSNS (15), /* fp_div_df */
161 COSTS_N_INSNS (7), /* int_mult_si */
162 COSTS_N_INSNS (7), /* int_mult_di */
163 COSTS_N_INSNS (1), /* int_shift */
164 COSTS_N_INSNS (2), /* int_cmov */
165 COSTS_N_INSNS (86), /* int_div */
166 },
167 };
168
169 /* Similar but tuned for code size instead of execution latency. The
170 extra +N is fractional cost tuning based on latency. It's used to
171 encourage use of cheaper insns like shift, but only if there's just
172 one of them. */
173
174 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
175 {
176 COSTS_N_INSNS (1), /* fp_add */
177 COSTS_N_INSNS (1), /* fp_mult */
178 COSTS_N_INSNS (1), /* fp_div_sf */
179 COSTS_N_INSNS (1) + 1, /* fp_div_df */
180 COSTS_N_INSNS (1) + 1, /* int_mult_si */
181 COSTS_N_INSNS (1) + 2, /* int_mult_di */
182 COSTS_N_INSNS (1), /* int_shift */
183 COSTS_N_INSNS (1), /* int_cmov */
184 COSTS_N_INSNS (6), /* int_div */
185 };
186
187 /* Get the number of args of a function in one of two ways. */
188 #if TARGET_ABI_OPEN_VMS
189 #define NUM_ARGS crtl->args.info.num_args
190 #else
191 #define NUM_ARGS crtl->args.info
192 #endif
193
194 #define REG_PV 27
195 #define REG_RA 26
196
197 /* Declarations of static functions. */
198 static struct machine_function *alpha_init_machine_status (void);
199 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
200
201 #if TARGET_ABI_OPEN_VMS
202 static void alpha_write_linkage (FILE *, const char *);
203 static bool vms_valid_pointer_mode (enum machine_mode);
204 #else
205 #define vms_patch_builtins() gcc_unreachable()
206 #endif
207 \f
208 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
209 /* Implement TARGET_MANGLE_TYPE. */
210
211 static const char *
212 alpha_mangle_type (const_tree type)
213 {
214 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
215 && TARGET_LONG_DOUBLE_128)
216 return "g";
217
218 /* For all other types, use normal C++ mangling. */
219 return NULL;
220 }
221 #endif
222
223 /* Parse target option strings. */
224
225 static void
226 alpha_option_override (void)
227 {
228 static const struct cpu_table {
229 const char *const name;
230 const enum processor_type processor;
231 const int flags;
232 const unsigned short line_size; /* in bytes */
233 const unsigned short l1_size; /* in kb. */
234 const unsigned short l2_size; /* in kb. */
235 } cpu_table[] = {
236 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
237 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
238 had 64k to 8M 8-byte direct Bcache. */
239 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
240 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
241 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
242
243 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
244 and 1M to 16M 64 byte L3 (not modeled).
245 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
246 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
247 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
248 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
249 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
250 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
251 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
252 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
253 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
254
255 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
256 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
257 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
258 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
259 64, 64, 16*1024 },
260 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
261 64, 64, 16*1024 }
262 };
263
264 int const ct_size = ARRAY_SIZE (cpu_table);
265 int line_size = 0, l1_size = 0, l2_size = 0;
266 int i;
267
268 #ifdef SUBTARGET_OVERRIDE_OPTIONS
269 SUBTARGET_OVERRIDE_OPTIONS;
270 #endif
271
272 /* Default to full IEEE compliance mode for Go language. */
273 if (strcmp (lang_hooks.name, "GNU Go") == 0
274 && !(target_flags_explicit & MASK_IEEE))
275 target_flags |= MASK_IEEE;
276
277 alpha_fprm = ALPHA_FPRM_NORM;
278 alpha_tp = ALPHA_TP_PROG;
279 alpha_fptm = ALPHA_FPTM_N;
280
281 if (TARGET_IEEE)
282 {
283 alpha_tp = ALPHA_TP_INSN;
284 alpha_fptm = ALPHA_FPTM_SU;
285 }
286 if (TARGET_IEEE_WITH_INEXACT)
287 {
288 alpha_tp = ALPHA_TP_INSN;
289 alpha_fptm = ALPHA_FPTM_SUI;
290 }
291
292 if (alpha_tp_string)
293 {
294 if (! strcmp (alpha_tp_string, "p"))
295 alpha_tp = ALPHA_TP_PROG;
296 else if (! strcmp (alpha_tp_string, "f"))
297 alpha_tp = ALPHA_TP_FUNC;
298 else if (! strcmp (alpha_tp_string, "i"))
299 alpha_tp = ALPHA_TP_INSN;
300 else
301 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
302 }
303
304 if (alpha_fprm_string)
305 {
306 if (! strcmp (alpha_fprm_string, "n"))
307 alpha_fprm = ALPHA_FPRM_NORM;
308 else if (! strcmp (alpha_fprm_string, "m"))
309 alpha_fprm = ALPHA_FPRM_MINF;
310 else if (! strcmp (alpha_fprm_string, "c"))
311 alpha_fprm = ALPHA_FPRM_CHOP;
312 else if (! strcmp (alpha_fprm_string,"d"))
313 alpha_fprm = ALPHA_FPRM_DYN;
314 else
315 error ("bad value %qs for -mfp-rounding-mode switch",
316 alpha_fprm_string);
317 }
318
319 if (alpha_fptm_string)
320 {
321 if (strcmp (alpha_fptm_string, "n") == 0)
322 alpha_fptm = ALPHA_FPTM_N;
323 else if (strcmp (alpha_fptm_string, "u") == 0)
324 alpha_fptm = ALPHA_FPTM_U;
325 else if (strcmp (alpha_fptm_string, "su") == 0)
326 alpha_fptm = ALPHA_FPTM_SU;
327 else if (strcmp (alpha_fptm_string, "sui") == 0)
328 alpha_fptm = ALPHA_FPTM_SUI;
329 else
330 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
331 }
332
333 if (alpha_cpu_string)
334 {
335 for (i = 0; i < ct_size; i++)
336 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
337 {
338 alpha_tune = alpha_cpu = cpu_table[i].processor;
339 line_size = cpu_table[i].line_size;
340 l1_size = cpu_table[i].l1_size;
341 l2_size = cpu_table[i].l2_size;
342 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
343 target_flags |= cpu_table[i].flags;
344 break;
345 }
346 if (i == ct_size)
347 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
348 }
349
350 if (alpha_tune_string)
351 {
352 for (i = 0; i < ct_size; i++)
353 if (! strcmp (alpha_tune_string, cpu_table [i].name))
354 {
355 alpha_tune = cpu_table[i].processor;
356 line_size = cpu_table[i].line_size;
357 l1_size = cpu_table[i].l1_size;
358 l2_size = cpu_table[i].l2_size;
359 break;
360 }
361 if (i == ct_size)
362 error ("bad value %qs for -mtune switch", alpha_tune_string);
363 }
364
365 if (line_size)
366 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
367 global_options.x_param_values,
368 global_options_set.x_param_values);
369 if (l1_size)
370 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
371 global_options.x_param_values,
372 global_options_set.x_param_values);
373 if (l2_size)
374 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
375 global_options.x_param_values,
376 global_options_set.x_param_values);
377
378 /* Do some sanity checks on the above options. */
379
380 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
381 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
382 {
383 warning (0, "fp software completion requires -mtrap-precision=i");
384 alpha_tp = ALPHA_TP_INSN;
385 }
386
387 if (alpha_cpu == PROCESSOR_EV6)
388 {
389 /* Except for EV6 pass 1 (not released), we always have precise
390 arithmetic traps. Which means we can do software completion
391 without minding trap shadows. */
392 alpha_tp = ALPHA_TP_PROG;
393 }
394
395 if (TARGET_FLOAT_VAX)
396 {
397 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
398 {
399 warning (0, "rounding mode not supported for VAX floats");
400 alpha_fprm = ALPHA_FPRM_NORM;
401 }
402 if (alpha_fptm == ALPHA_FPTM_SUI)
403 {
404 warning (0, "trap mode not supported for VAX floats");
405 alpha_fptm = ALPHA_FPTM_SU;
406 }
407 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
408 warning (0, "128-bit long double not supported for VAX floats");
409 target_flags &= ~MASK_LONG_DOUBLE_128;
410 }
411
412 {
413 char *end;
414 int lat;
415
416 if (!alpha_mlat_string)
417 alpha_mlat_string = "L1";
418
419 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
420 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
421 ;
422 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
423 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
424 && alpha_mlat_string[2] == '\0')
425 {
426 static int const cache_latency[][4] =
427 {
428 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
429 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
430 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
431 };
432
433 lat = alpha_mlat_string[1] - '0';
434 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
435 {
436 warning (0, "L%d cache latency unknown for %s",
437 lat, alpha_cpu_name[alpha_tune]);
438 lat = 3;
439 }
440 else
441 lat = cache_latency[alpha_tune][lat-1];
442 }
443 else if (! strcmp (alpha_mlat_string, "main"))
444 {
445 /* Most current memories have about 370ns latency. This is
446 a reasonable guess for a fast cpu. */
447 lat = 150;
448 }
449 else
450 {
451 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
452 lat = 3;
453 }
454
455 alpha_memory_latency = lat;
456 }
457
458 /* Default the definition of "small data" to 8 bytes. */
459 if (!global_options_set.x_g_switch_value)
460 g_switch_value = 8;
461
462 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
463 if (flag_pic == 1)
464 target_flags |= MASK_SMALL_DATA;
465 else if (flag_pic == 2)
466 target_flags &= ~MASK_SMALL_DATA;
467
468 /* Align labels and loops for optimal branching. */
469 /* ??? Kludge these by not doing anything if we don't optimize. */
470 if (optimize > 0)
471 {
472 if (align_loops <= 0)
473 align_loops = 16;
474 if (align_jumps <= 0)
475 align_jumps = 16;
476 }
477 if (align_functions <= 0)
478 align_functions = 16;
479
480 /* Register variables and functions with the garbage collector. */
481
482 /* Set up function hooks. */
483 init_machine_status = alpha_init_machine_status;
484
485 /* Tell the compiler when we're using VAX floating point. */
486 if (TARGET_FLOAT_VAX)
487 {
488 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
489 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
490 REAL_MODE_FORMAT (TFmode) = NULL;
491 }
492
493 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
494 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
495 target_flags |= MASK_LONG_DOUBLE_128;
496 #endif
497 }
498 \f
499 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
500
501 int
502 zap_mask (HOST_WIDE_INT value)
503 {
504 int i;
505
506 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
507 i++, value >>= 8)
508 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
509 return 0;
510
511 return 1;
512 }
513
514 /* Return true if OP is valid for a particular TLS relocation.
515 We are already guaranteed that OP is a CONST. */
516
517 int
518 tls_symbolic_operand_1 (rtx op, int size, int unspec)
519 {
520 op = XEXP (op, 0);
521
522 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
523 return 0;
524 op = XVECEXP (op, 0, 0);
525
526 if (GET_CODE (op) != SYMBOL_REF)
527 return 0;
528
529 switch (SYMBOL_REF_TLS_MODEL (op))
530 {
531 case TLS_MODEL_LOCAL_DYNAMIC:
532 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
533 case TLS_MODEL_INITIAL_EXEC:
534 return unspec == UNSPEC_TPREL && size == 64;
535 case TLS_MODEL_LOCAL_EXEC:
536 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
537 default:
538 gcc_unreachable ();
539 }
540 }
541
542 /* Used by aligned_memory_operand and unaligned_memory_operand to
543 resolve what reload is going to do with OP if it's a register. */
544
545 rtx
546 resolve_reload_operand (rtx op)
547 {
548 if (reload_in_progress)
549 {
550 rtx tmp = op;
551 if (GET_CODE (tmp) == SUBREG)
552 tmp = SUBREG_REG (tmp);
553 if (REG_P (tmp)
554 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
555 {
556 op = reg_equiv_memory_loc (REGNO (tmp));
557 if (op == 0)
558 return 0;
559 }
560 }
561 return op;
562 }
563
564 /* The scalar modes supported differs from the default check-what-c-supports
565 version in that sometimes TFmode is available even when long double
566 indicates only DFmode. */
567
568 static bool
569 alpha_scalar_mode_supported_p (enum machine_mode mode)
570 {
571 switch (mode)
572 {
573 case QImode:
574 case HImode:
575 case SImode:
576 case DImode:
577 case TImode: /* via optabs.c */
578 return true;
579
580 case SFmode:
581 case DFmode:
582 return true;
583
584 case TFmode:
585 return TARGET_HAS_XFLOATING_LIBS;
586
587 default:
588 return false;
589 }
590 }
591
592 /* Alpha implements a couple of integer vector mode operations when
593 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
594 which allows the vectorizer to operate on e.g. move instructions,
595 or when expand_vector_operations can do something useful. */
596
597 static bool
598 alpha_vector_mode_supported_p (enum machine_mode mode)
599 {
600 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
601 }
602
603 /* Return 1 if this function can directly return via $26. */
604
605 int
606 direct_return (void)
607 {
608 return (TARGET_ABI_OSF
609 && reload_completed
610 && alpha_sa_size () == 0
611 && get_frame_size () == 0
612 && crtl->outgoing_args_size == 0
613 && crtl->args.pretend_args_size == 0);
614 }
615
616 /* Return the TLS model to use for SYMBOL. */
617
618 static enum tls_model
619 tls_symbolic_operand_type (rtx symbol)
620 {
621 enum tls_model model;
622
623 if (GET_CODE (symbol) != SYMBOL_REF)
624 return TLS_MODEL_NONE;
625 model = SYMBOL_REF_TLS_MODEL (symbol);
626
627 /* Local-exec with a 64-bit size is the same code as initial-exec. */
628 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
629 model = TLS_MODEL_INITIAL_EXEC;
630
631 return model;
632 }
633 \f
634 /* Return true if the function DECL will share the same GP as any
635 function in the current unit of translation. */
636
637 static bool
638 decl_has_samegp (const_tree decl)
639 {
640 /* Functions that are not local can be overridden, and thus may
641 not share the same gp. */
642 if (!(*targetm.binds_local_p) (decl))
643 return false;
644
645 /* If -msmall-data is in effect, assume that there is only one GP
646 for the module, and so any local symbol has this property. We
647 need explicit relocations to be able to enforce this for symbols
648 not defined in this unit of translation, however. */
649 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
650 return true;
651
652 /* Functions that are not external are defined in this UoT. */
653 /* ??? Irritatingly, static functions not yet emitted are still
654 marked "external". Apply this to non-static functions only. */
655 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
656 }
657
658 /* Return true if EXP should be placed in the small data section. */
659
660 static bool
661 alpha_in_small_data_p (const_tree exp)
662 {
663 /* We want to merge strings, so we never consider them small data. */
664 if (TREE_CODE (exp) == STRING_CST)
665 return false;
666
667 /* Functions are never in the small data area. Duh. */
668 if (TREE_CODE (exp) == FUNCTION_DECL)
669 return false;
670
671 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
672 {
673 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
674 if (strcmp (section, ".sdata") == 0
675 || strcmp (section, ".sbss") == 0)
676 return true;
677 }
678 else
679 {
680 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
681
682 /* If this is an incomplete type with size 0, then we can't put it
683 in sdata because it might be too big when completed. */
684 if (size > 0 && size <= g_switch_value)
685 return true;
686 }
687
688 return false;
689 }
690
691 #if TARGET_ABI_OPEN_VMS
692 static bool
693 vms_valid_pointer_mode (enum machine_mode mode)
694 {
695 return (mode == SImode || mode == DImode);
696 }
697
698 static bool
699 alpha_linkage_symbol_p (const char *symname)
700 {
701 int symlen = strlen (symname);
702
703 if (symlen > 4)
704 return strcmp (&symname [symlen - 4], "..lk") == 0;
705
706 return false;
707 }
708
709 #define LINKAGE_SYMBOL_REF_P(X) \
710 ((GET_CODE (X) == SYMBOL_REF \
711 && alpha_linkage_symbol_p (XSTR (X, 0))) \
712 || (GET_CODE (X) == CONST \
713 && GET_CODE (XEXP (X, 0)) == PLUS \
714 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
715 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
716 #endif
717
718 /* legitimate_address_p recognizes an RTL expression that is a valid
719 memory address for an instruction. The MODE argument is the
720 machine mode for the MEM expression that wants to use this address.
721
722 For Alpha, we have either a constant address or the sum of a
723 register and a constant address, or just a register. For DImode,
724 any of those forms can be surrounded with an AND that clear the
725 low-order three bits; this is an "unaligned" access. */
726
727 static bool
728 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
729 {
730 /* If this is an ldq_u type address, discard the outer AND. */
731 if (mode == DImode
732 && GET_CODE (x) == AND
733 && CONST_INT_P (XEXP (x, 1))
734 && INTVAL (XEXP (x, 1)) == -8)
735 x = XEXP (x, 0);
736
737 /* Discard non-paradoxical subregs. */
738 if (GET_CODE (x) == SUBREG
739 && (GET_MODE_SIZE (GET_MODE (x))
740 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
741 x = SUBREG_REG (x);
742
743 /* Unadorned general registers are valid. */
744 if (REG_P (x)
745 && (strict
746 ? STRICT_REG_OK_FOR_BASE_P (x)
747 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
748 return true;
749
750 /* Constant addresses (i.e. +/- 32k) are valid. */
751 if (CONSTANT_ADDRESS_P (x))
752 return true;
753
754 #if TARGET_ABI_OPEN_VMS
755 if (LINKAGE_SYMBOL_REF_P (x))
756 return true;
757 #endif
758
759 /* Register plus a small constant offset is valid. */
760 if (GET_CODE (x) == PLUS)
761 {
762 rtx ofs = XEXP (x, 1);
763 x = XEXP (x, 0);
764
765 /* Discard non-paradoxical subregs. */
766 if (GET_CODE (x) == SUBREG
767 && (GET_MODE_SIZE (GET_MODE (x))
768 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
769 x = SUBREG_REG (x);
770
771 if (REG_P (x))
772 {
773 if (! strict
774 && NONSTRICT_REG_OK_FP_BASE_P (x)
775 && CONST_INT_P (ofs))
776 return true;
777 if ((strict
778 ? STRICT_REG_OK_FOR_BASE_P (x)
779 : NONSTRICT_REG_OK_FOR_BASE_P (x))
780 && CONSTANT_ADDRESS_P (ofs))
781 return true;
782 }
783 }
784
785 /* If we're managing explicit relocations, LO_SUM is valid, as are small
786 data symbols. Avoid explicit relocations of modes larger than word
787 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
788 else if (TARGET_EXPLICIT_RELOCS
789 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
790 {
791 if (small_symbolic_operand (x, Pmode))
792 return true;
793
794 if (GET_CODE (x) == LO_SUM)
795 {
796 rtx ofs = XEXP (x, 1);
797 x = XEXP (x, 0);
798
799 /* Discard non-paradoxical subregs. */
800 if (GET_CODE (x) == SUBREG
801 && (GET_MODE_SIZE (GET_MODE (x))
802 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
803 x = SUBREG_REG (x);
804
805 /* Must have a valid base register. */
806 if (! (REG_P (x)
807 && (strict
808 ? STRICT_REG_OK_FOR_BASE_P (x)
809 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
810 return false;
811
812 /* The symbol must be local. */
813 if (local_symbolic_operand (ofs, Pmode)
814 || dtp32_symbolic_operand (ofs, Pmode)
815 || tp32_symbolic_operand (ofs, Pmode))
816 return true;
817 }
818 }
819
820 return false;
821 }
822
823 /* Build the SYMBOL_REF for __tls_get_addr. */
824
825 static GTY(()) rtx tls_get_addr_libfunc;
826
827 static rtx
828 get_tls_get_addr (void)
829 {
830 if (!tls_get_addr_libfunc)
831 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
832 return tls_get_addr_libfunc;
833 }
834
835 /* Try machine-dependent ways of modifying an illegitimate address
836 to be legitimate. If we find one, return the new, valid address. */
837
838 static rtx
839 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
840 {
841 HOST_WIDE_INT addend;
842
843 /* If the address is (plus reg const_int) and the CONST_INT is not a
844 valid offset, compute the high part of the constant and add it to
845 the register. Then our address is (plus temp low-part-const). */
846 if (GET_CODE (x) == PLUS
847 && REG_P (XEXP (x, 0))
848 && CONST_INT_P (XEXP (x, 1))
849 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
850 {
851 addend = INTVAL (XEXP (x, 1));
852 x = XEXP (x, 0);
853 goto split_addend;
854 }
855
856 /* If the address is (const (plus FOO const_int)), find the low-order
857 part of the CONST_INT. Then load FOO plus any high-order part of the
858 CONST_INT into a register. Our address is (plus reg low-part-const).
859 This is done to reduce the number of GOT entries. */
860 if (can_create_pseudo_p ()
861 && GET_CODE (x) == CONST
862 && GET_CODE (XEXP (x, 0)) == PLUS
863 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
864 {
865 addend = INTVAL (XEXP (XEXP (x, 0), 1));
866 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
867 goto split_addend;
868 }
869
870 /* If we have a (plus reg const), emit the load as in (2), then add
871 the two registers, and finally generate (plus reg low-part-const) as
872 our address. */
873 if (can_create_pseudo_p ()
874 && GET_CODE (x) == PLUS
875 && REG_P (XEXP (x, 0))
876 && GET_CODE (XEXP (x, 1)) == CONST
877 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
878 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
879 {
880 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
881 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
882 XEXP (XEXP (XEXP (x, 1), 0), 0),
883 NULL_RTX, 1, OPTAB_LIB_WIDEN);
884 goto split_addend;
885 }
886
887 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
888 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
889 around +/- 32k offset. */
890 if (TARGET_EXPLICIT_RELOCS
891 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
892 && symbolic_operand (x, Pmode))
893 {
894 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
895
896 switch (tls_symbolic_operand_type (x))
897 {
898 case TLS_MODEL_NONE:
899 break;
900
901 case TLS_MODEL_GLOBAL_DYNAMIC:
902 start_sequence ();
903
904 r0 = gen_rtx_REG (Pmode, 0);
905 r16 = gen_rtx_REG (Pmode, 16);
906 tga = get_tls_get_addr ();
907 dest = gen_reg_rtx (Pmode);
908 seq = GEN_INT (alpha_next_sequence_number++);
909
910 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
911 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
912 insn = emit_call_insn (insn);
913 RTL_CONST_CALL_P (insn) = 1;
914 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
915
916 insn = get_insns ();
917 end_sequence ();
918
919 emit_libcall_block (insn, dest, r0, x);
920 return dest;
921
922 case TLS_MODEL_LOCAL_DYNAMIC:
923 start_sequence ();
924
925 r0 = gen_rtx_REG (Pmode, 0);
926 r16 = gen_rtx_REG (Pmode, 16);
927 tga = get_tls_get_addr ();
928 scratch = gen_reg_rtx (Pmode);
929 seq = GEN_INT (alpha_next_sequence_number++);
930
931 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
932 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
933 insn = emit_call_insn (insn);
934 RTL_CONST_CALL_P (insn) = 1;
935 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
936
937 insn = get_insns ();
938 end_sequence ();
939
940 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
941 UNSPEC_TLSLDM_CALL);
942 emit_libcall_block (insn, scratch, r0, eqv);
943
944 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
945 eqv = gen_rtx_CONST (Pmode, eqv);
946
947 if (alpha_tls_size == 64)
948 {
949 dest = gen_reg_rtx (Pmode);
950 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
951 emit_insn (gen_adddi3 (dest, dest, scratch));
952 return dest;
953 }
954 if (alpha_tls_size == 32)
955 {
956 insn = gen_rtx_HIGH (Pmode, eqv);
957 insn = gen_rtx_PLUS (Pmode, scratch, insn);
958 scratch = gen_reg_rtx (Pmode);
959 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
960 }
961 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
962
963 case TLS_MODEL_INITIAL_EXEC:
964 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
965 eqv = gen_rtx_CONST (Pmode, eqv);
966 tp = gen_reg_rtx (Pmode);
967 scratch = gen_reg_rtx (Pmode);
968 dest = gen_reg_rtx (Pmode);
969
970 emit_insn (gen_get_thread_pointerdi (tp));
971 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
972 emit_insn (gen_adddi3 (dest, tp, scratch));
973 return dest;
974
975 case TLS_MODEL_LOCAL_EXEC:
976 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
977 eqv = gen_rtx_CONST (Pmode, eqv);
978 tp = gen_reg_rtx (Pmode);
979
980 emit_insn (gen_get_thread_pointerdi (tp));
981 if (alpha_tls_size == 32)
982 {
983 insn = gen_rtx_HIGH (Pmode, eqv);
984 insn = gen_rtx_PLUS (Pmode, tp, insn);
985 tp = gen_reg_rtx (Pmode);
986 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
987 }
988 return gen_rtx_LO_SUM (Pmode, tp, eqv);
989
990 default:
991 gcc_unreachable ();
992 }
993
994 if (local_symbolic_operand (x, Pmode))
995 {
996 if (small_symbolic_operand (x, Pmode))
997 return x;
998 else
999 {
1000 if (can_create_pseudo_p ())
1001 scratch = gen_reg_rtx (Pmode);
1002 emit_insn (gen_rtx_SET (VOIDmode, scratch,
1003 gen_rtx_HIGH (Pmode, x)));
1004 return gen_rtx_LO_SUM (Pmode, scratch, x);
1005 }
1006 }
1007 }
1008
1009 return NULL;
1010
1011 split_addend:
1012 {
1013 HOST_WIDE_INT low, high;
1014
1015 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1016 addend -= low;
1017 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1018 addend -= high;
1019
1020 if (addend)
1021 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1022 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1023 1, OPTAB_LIB_WIDEN);
1024 if (high)
1025 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1026 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1027 1, OPTAB_LIB_WIDEN);
1028
1029 return plus_constant (Pmode, x, low);
1030 }
1031 }
1032
1033
1034 /* Try machine-dependent ways of modifying an illegitimate address
1035 to be legitimate. Return X or the new, valid address. */
1036
1037 static rtx
1038 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1039 enum machine_mode mode)
1040 {
1041 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1042 return new_x ? new_x : x;
1043 }
1044
1045 /* Return true if ADDR has an effect that depends on the machine mode it
1046 is used for. On the Alpha this is true only for the unaligned modes.
1047 We can simplify the test since we know that the address must be valid. */
1048
1049 static bool
1050 alpha_mode_dependent_address_p (const_rtx addr,
1051 addr_space_t as ATTRIBUTE_UNUSED)
1052 {
1053 return GET_CODE (addr) == AND;
1054 }
1055
1056 /* Primarily this is required for TLS symbols, but given that our move
1057 patterns *ought* to be able to handle any symbol at any time, we
1058 should never be spilling symbolic operands to the constant pool, ever. */
1059
1060 static bool
1061 alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1062 {
1063 enum rtx_code code = GET_CODE (x);
1064 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1065 }
1066
1067 /* We do not allow indirect calls to be optimized into sibling calls, nor
1068 can we allow a call to a function with a different GP to be optimized
1069 into a sibcall. */
1070
1071 static bool
1072 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1073 {
1074 /* Can't do indirect tail calls, since we don't know if the target
1075 uses the same GP. */
1076 if (!decl)
1077 return false;
1078
1079 /* Otherwise, we can make a tail call if the target function shares
1080 the same GP. */
1081 return decl_has_samegp (decl);
1082 }
1083
1084 int
1085 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1086 {
1087 rtx x = *px;
1088
1089 /* Don't re-split. */
1090 if (GET_CODE (x) == LO_SUM)
1091 return -1;
1092
1093 return small_symbolic_operand (x, Pmode) != 0;
1094 }
1095
1096 static int
1097 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1098 {
1099 rtx x = *px;
1100
1101 /* Don't re-split. */
1102 if (GET_CODE (x) == LO_SUM)
1103 return -1;
1104
1105 if (small_symbolic_operand (x, Pmode))
1106 {
1107 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1108 *px = x;
1109 return -1;
1110 }
1111
1112 return 0;
1113 }
1114
1115 rtx
1116 split_small_symbolic_operand (rtx x)
1117 {
1118 x = copy_insn (x);
1119 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1120 return x;
1121 }
1122
1123 /* Indicate that INSN cannot be duplicated. This is true for any insn
1124 that we've marked with gpdisp relocs, since those have to stay in
1125 1-1 correspondence with one another.
1126
1127 Technically we could copy them if we could set up a mapping from one
1128 sequence number to another, across the set of insns to be duplicated.
1129 This seems overly complicated and error-prone since interblock motion
1130 from sched-ebb could move one of the pair of insns to a different block.
1131
1132 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1133 then they'll be in a different block from their ldgp. Which could lead
1134 the bb reorder code to think that it would be ok to copy just the block
1135 containing the call and branch to the block containing the ldgp. */
1136
1137 static bool
1138 alpha_cannot_copy_insn_p (rtx insn)
1139 {
1140 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1141 return false;
1142 if (recog_memoized (insn) >= 0)
1143 return get_attr_cannot_copy (insn);
1144 else
1145 return false;
1146 }
1147
1148
1149 /* Try a machine-dependent way of reloading an illegitimate address
1150 operand. If we find one, push the reload and return the new rtx. */
1151
1152 rtx
1153 alpha_legitimize_reload_address (rtx x,
1154 enum machine_mode mode ATTRIBUTE_UNUSED,
1155 int opnum, int type,
1156 int ind_levels ATTRIBUTE_UNUSED)
1157 {
1158 /* We must recognize output that we have already generated ourselves. */
1159 if (GET_CODE (x) == PLUS
1160 && GET_CODE (XEXP (x, 0)) == PLUS
1161 && REG_P (XEXP (XEXP (x, 0), 0))
1162 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1163 && CONST_INT_P (XEXP (x, 1)))
1164 {
1165 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1166 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1167 opnum, (enum reload_type) type);
1168 return x;
1169 }
1170
1171 /* We wish to handle large displacements off a base register by
1172 splitting the addend across an ldah and the mem insn. This
1173 cuts number of extra insns needed from 3 to 1. */
1174 if (GET_CODE (x) == PLUS
1175 && REG_P (XEXP (x, 0))
1176 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1177 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1178 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1179 {
1180 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1181 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1182 HOST_WIDE_INT high
1183 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1184
1185 /* Check for 32-bit overflow. */
1186 if (high + low != val)
1187 return NULL_RTX;
1188
1189 /* Reload the high part into a base reg; leave the low part
1190 in the mem directly. */
1191 x = gen_rtx_PLUS (GET_MODE (x),
1192 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1193 GEN_INT (high)),
1194 GEN_INT (low));
1195
1196 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1197 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1198 opnum, (enum reload_type) type);
1199 return x;
1200 }
1201
1202 return NULL_RTX;
1203 }
1204 \f
1205 /* Compute a (partial) cost for rtx X. Return true if the complete
1206 cost has been computed, and false if subexpressions should be
1207 scanned. In either case, *TOTAL contains the cost result. */
1208
1209 static bool
1210 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1211 bool speed)
1212 {
1213 enum machine_mode mode = GET_MODE (x);
1214 bool float_mode_p = FLOAT_MODE_P (mode);
1215 const struct alpha_rtx_cost_data *cost_data;
1216
1217 if (!speed)
1218 cost_data = &alpha_rtx_cost_size;
1219 else
1220 cost_data = &alpha_rtx_cost_data[alpha_tune];
1221
1222 switch (code)
1223 {
1224 case CONST_INT:
1225 /* If this is an 8-bit constant, return zero since it can be used
1226 nearly anywhere with no cost. If it is a valid operand for an
1227 ADD or AND, likewise return 0 if we know it will be used in that
1228 context. Otherwise, return 2 since it might be used there later.
1229 All other constants take at least two insns. */
1230 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1231 {
1232 *total = 0;
1233 return true;
1234 }
1235 /* FALLTHRU */
1236
1237 case CONST_DOUBLE:
1238 if (x == CONST0_RTX (mode))
1239 *total = 0;
1240 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1241 || (outer_code == AND && and_operand (x, VOIDmode)))
1242 *total = 0;
1243 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1244 *total = 2;
1245 else
1246 *total = COSTS_N_INSNS (2);
1247 return true;
1248
1249 case CONST:
1250 case SYMBOL_REF:
1251 case LABEL_REF:
1252 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1253 *total = COSTS_N_INSNS (outer_code != MEM);
1254 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1255 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1256 else if (tls_symbolic_operand_type (x))
1257 /* Estimate of cost for call_pal rduniq. */
1258 /* ??? How many insns do we emit here? More than one... */
1259 *total = COSTS_N_INSNS (15);
1260 else
1261 /* Otherwise we do a load from the GOT. */
1262 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1263 return true;
1264
1265 case HIGH:
1266 /* This is effectively an add_operand. */
1267 *total = 2;
1268 return true;
1269
1270 case PLUS:
1271 case MINUS:
1272 if (float_mode_p)
1273 *total = cost_data->fp_add;
1274 else if (GET_CODE (XEXP (x, 0)) == MULT
1275 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1276 {
1277 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1278 (enum rtx_code) outer_code, opno, speed)
1279 + rtx_cost (XEXP (x, 1),
1280 (enum rtx_code) outer_code, opno, speed)
1281 + COSTS_N_INSNS (1));
1282 return true;
1283 }
1284 return false;
1285
1286 case MULT:
1287 if (float_mode_p)
1288 *total = cost_data->fp_mult;
1289 else if (mode == DImode)
1290 *total = cost_data->int_mult_di;
1291 else
1292 *total = cost_data->int_mult_si;
1293 return false;
1294
1295 case ASHIFT:
1296 if (CONST_INT_P (XEXP (x, 1))
1297 && INTVAL (XEXP (x, 1)) <= 3)
1298 {
1299 *total = COSTS_N_INSNS (1);
1300 return false;
1301 }
1302 /* FALLTHRU */
1303
1304 case ASHIFTRT:
1305 case LSHIFTRT:
1306 *total = cost_data->int_shift;
1307 return false;
1308
1309 case IF_THEN_ELSE:
1310 if (float_mode_p)
1311 *total = cost_data->fp_add;
1312 else
1313 *total = cost_data->int_cmov;
1314 return false;
1315
1316 case DIV:
1317 case UDIV:
1318 case MOD:
1319 case UMOD:
1320 if (!float_mode_p)
1321 *total = cost_data->int_div;
1322 else if (mode == SFmode)
1323 *total = cost_data->fp_div_sf;
1324 else
1325 *total = cost_data->fp_div_df;
1326 return false;
1327
1328 case MEM:
1329 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1330 return true;
1331
1332 case NEG:
1333 if (! float_mode_p)
1334 {
1335 *total = COSTS_N_INSNS (1);
1336 return false;
1337 }
1338 /* FALLTHRU */
1339
1340 case ABS:
1341 if (! float_mode_p)
1342 {
1343 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1344 return false;
1345 }
1346 /* FALLTHRU */
1347
1348 case FLOAT:
1349 case UNSIGNED_FLOAT:
1350 case FIX:
1351 case UNSIGNED_FIX:
1352 case FLOAT_TRUNCATE:
1353 *total = cost_data->fp_add;
1354 return false;
1355
1356 case FLOAT_EXTEND:
1357 if (MEM_P (XEXP (x, 0)))
1358 *total = 0;
1359 else
1360 *total = cost_data->fp_add;
1361 return false;
1362
1363 default:
1364 return false;
1365 }
1366 }
1367 \f
1368 /* REF is an alignable memory location. Place an aligned SImode
1369 reference into *PALIGNED_MEM and the number of bits to shift into
1370 *PBITNUM. SCRATCH is a free register for use in reloading out
1371 of range stack slots. */
1372
1373 void
1374 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1375 {
1376 rtx base;
1377 HOST_WIDE_INT disp, offset;
1378
1379 gcc_assert (MEM_P (ref));
1380
1381 if (reload_in_progress
1382 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1383 {
1384 base = find_replacement (&XEXP (ref, 0));
1385 gcc_assert (memory_address_p (GET_MODE (ref), base));
1386 }
1387 else
1388 base = XEXP (ref, 0);
1389
1390 if (GET_CODE (base) == PLUS)
1391 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1392 else
1393 disp = 0;
1394
1395 /* Find the byte offset within an aligned word. If the memory itself is
1396 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1397 will have examined the base register and determined it is aligned, and
1398 thus displacements from it are naturally alignable. */
1399 if (MEM_ALIGN (ref) >= 32)
1400 offset = 0;
1401 else
1402 offset = disp & 3;
1403
1404 /* The location should not cross aligned word boundary. */
1405 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1406 <= GET_MODE_SIZE (SImode));
1407
1408 /* Access the entire aligned word. */
1409 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1410
1411 /* Convert the byte offset within the word to a bit offset. */
1412 offset *= BITS_PER_UNIT;
1413 *pbitnum = GEN_INT (offset);
1414 }
1415
1416 /* Similar, but just get the address. Handle the two reload cases.
1417 Add EXTRA_OFFSET to the address we return. */
1418
1419 rtx
1420 get_unaligned_address (rtx ref)
1421 {
1422 rtx base;
1423 HOST_WIDE_INT offset = 0;
1424
1425 gcc_assert (MEM_P (ref));
1426
1427 if (reload_in_progress
1428 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1429 {
1430 base = find_replacement (&XEXP (ref, 0));
1431
1432 gcc_assert (memory_address_p (GET_MODE (ref), base));
1433 }
1434 else
1435 base = XEXP (ref, 0);
1436
1437 if (GET_CODE (base) == PLUS)
1438 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1439
1440 return plus_constant (Pmode, base, offset);
1441 }
1442
1443 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1444 X is always returned in a register. */
1445
1446 rtx
1447 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1448 {
1449 if (GET_CODE (addr) == PLUS)
1450 {
1451 ofs += INTVAL (XEXP (addr, 1));
1452 addr = XEXP (addr, 0);
1453 }
1454
1455 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1456 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1457 }
1458
1459 /* On the Alpha, all (non-symbolic) constants except zero go into
1460 a floating-point register via memory. Note that we cannot
1461 return anything that is not a subset of RCLASS, and that some
1462 symbolic constants cannot be dropped to memory. */
1463
1464 enum reg_class
1465 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1466 {
1467 /* Zero is present in any register class. */
1468 if (x == CONST0_RTX (GET_MODE (x)))
1469 return rclass;
1470
1471 /* These sorts of constants we can easily drop to memory. */
1472 if (CONST_INT_P (x)
1473 || GET_CODE (x) == CONST_DOUBLE
1474 || GET_CODE (x) == CONST_VECTOR)
1475 {
1476 if (rclass == FLOAT_REGS)
1477 return NO_REGS;
1478 if (rclass == ALL_REGS)
1479 return GENERAL_REGS;
1480 return rclass;
1481 }
1482
1483 /* All other kinds of constants should not (and in the case of HIGH
1484 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1485 secondary reload. */
1486 if (CONSTANT_P (x))
1487 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1488
1489 return rclass;
1490 }
1491
1492 /* Inform reload about cases where moving X with a mode MODE to a register in
1493 RCLASS requires an extra scratch or immediate register. Return the class
1494 needed for the immediate register. */
1495
1496 static reg_class_t
1497 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1498 enum machine_mode mode, secondary_reload_info *sri)
1499 {
1500 enum reg_class rclass = (enum reg_class) rclass_i;
1501
1502 /* Loading and storing HImode or QImode values to and from memory
1503 usually requires a scratch register. */
1504 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1505 {
1506 if (any_memory_operand (x, mode))
1507 {
1508 if (in_p)
1509 {
1510 if (!aligned_memory_operand (x, mode))
1511 sri->icode = direct_optab_handler (reload_in_optab, mode);
1512 }
1513 else
1514 sri->icode = direct_optab_handler (reload_out_optab, mode);
1515 return NO_REGS;
1516 }
1517 }
1518
1519 /* We also cannot do integral arithmetic into FP regs, as might result
1520 from register elimination into a DImode fp register. */
1521 if (rclass == FLOAT_REGS)
1522 {
1523 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1524 return GENERAL_REGS;
1525 if (in_p && INTEGRAL_MODE_P (mode)
1526 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1527 return GENERAL_REGS;
1528 }
1529
1530 return NO_REGS;
1531 }
1532 \f
1533 /* Subfunction of the following function. Update the flags of any MEM
1534 found in part of X. */
1535
1536 static int
1537 alpha_set_memflags_1 (rtx *xp, void *data)
1538 {
1539 rtx x = *xp, orig = (rtx) data;
1540
1541 if (!MEM_P (x))
1542 return 0;
1543
1544 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1545 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1546 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1547
1548 /* Sadly, we cannot use alias sets because the extra aliasing
1549 produced by the AND interferes. Given that two-byte quantities
1550 are the only thing we would be able to differentiate anyway,
1551 there does not seem to be any point in convoluting the early
1552 out of the alias check. */
1553
1554 return -1;
1555 }
1556
1557 /* Given SEQ, which is an INSN list, look for any MEMs in either
1558 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1559 volatile flags from REF into each of the MEMs found. If REF is not
1560 a MEM, don't do anything. */
1561
1562 void
1563 alpha_set_memflags (rtx seq, rtx ref)
1564 {
1565 rtx insn;
1566
1567 if (!MEM_P (ref))
1568 return;
1569
1570 /* This is only called from alpha.md, after having had something
1571 generated from one of the insn patterns. So if everything is
1572 zero, the pattern is already up-to-date. */
1573 if (!MEM_VOLATILE_P (ref)
1574 && !MEM_NOTRAP_P (ref)
1575 && !MEM_READONLY_P (ref))
1576 return;
1577
1578 for (insn = seq; insn; insn = NEXT_INSN (insn))
1579 if (INSN_P (insn))
1580 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1581 else
1582 gcc_unreachable ();
1583 }
1584 \f
1585 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1586 int, bool);
1587
1588 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1589 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1590 and return pc_rtx if successful. */
1591
1592 static rtx
1593 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1594 HOST_WIDE_INT c, int n, bool no_output)
1595 {
1596 HOST_WIDE_INT new_const;
1597 int i, bits;
1598 /* Use a pseudo if highly optimizing and still generating RTL. */
1599 rtx subtarget
1600 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1601 rtx temp, insn;
1602
1603 /* If this is a sign-extended 32-bit constant, we can do this in at most
1604 three insns, so do it if we have enough insns left. We always have
1605 a sign-extended 32-bit constant when compiling on a narrow machine. */
1606
1607 if (HOST_BITS_PER_WIDE_INT != 64
1608 || c >> 31 == -1 || c >> 31 == 0)
1609 {
1610 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1611 HOST_WIDE_INT tmp1 = c - low;
1612 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1613 HOST_WIDE_INT extra = 0;
1614
1615 /* If HIGH will be interpreted as negative but the constant is
1616 positive, we must adjust it to do two ldha insns. */
1617
1618 if ((high & 0x8000) != 0 && c >= 0)
1619 {
1620 extra = 0x4000;
1621 tmp1 -= 0x40000000;
1622 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1623 }
1624
1625 if (c == low || (low == 0 && extra == 0))
1626 {
1627 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1628 but that meant that we can't handle INT_MIN on 32-bit machines
1629 (like NT/Alpha), because we recurse indefinitely through
1630 emit_move_insn to gen_movdi. So instead, since we know exactly
1631 what we want, create it explicitly. */
1632
1633 if (no_output)
1634 return pc_rtx;
1635 if (target == NULL)
1636 target = gen_reg_rtx (mode);
1637 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1638 return target;
1639 }
1640 else if (n >= 2 + (extra != 0))
1641 {
1642 if (no_output)
1643 return pc_rtx;
1644 if (!can_create_pseudo_p ())
1645 {
1646 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1647 temp = target;
1648 }
1649 else
1650 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1651 subtarget, mode);
1652
1653 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1654 This means that if we go through expand_binop, we'll try to
1655 generate extensions, etc, which will require new pseudos, which
1656 will fail during some split phases. The SImode add patterns
1657 still exist, but are not named. So build the insns by hand. */
1658
1659 if (extra != 0)
1660 {
1661 if (! subtarget)
1662 subtarget = gen_reg_rtx (mode);
1663 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1664 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1665 emit_insn (insn);
1666 temp = subtarget;
1667 }
1668
1669 if (target == NULL)
1670 target = gen_reg_rtx (mode);
1671 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1672 insn = gen_rtx_SET (VOIDmode, target, insn);
1673 emit_insn (insn);
1674 return target;
1675 }
1676 }
1677
1678 /* If we couldn't do it that way, try some other methods. But if we have
1679 no instructions left, don't bother. Likewise, if this is SImode and
1680 we can't make pseudos, we can't do anything since the expand_binop
1681 and expand_unop calls will widen and try to make pseudos. */
1682
1683 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1684 return 0;
1685
1686 /* Next, see if we can load a related constant and then shift and possibly
1687 negate it to get the constant we want. Try this once each increasing
1688 numbers of insns. */
1689
1690 for (i = 1; i < n; i++)
1691 {
1692 /* First, see if minus some low bits, we've an easy load of
1693 high bits. */
1694
1695 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1696 if (new_const != 0)
1697 {
1698 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1699 if (temp)
1700 {
1701 if (no_output)
1702 return temp;
1703 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1704 target, 0, OPTAB_WIDEN);
1705 }
1706 }
1707
1708 /* Next try complementing. */
1709 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1710 if (temp)
1711 {
1712 if (no_output)
1713 return temp;
1714 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1715 }
1716
1717 /* Next try to form a constant and do a left shift. We can do this
1718 if some low-order bits are zero; the exact_log2 call below tells
1719 us that information. The bits we are shifting out could be any
1720 value, but here we'll just try the 0- and sign-extended forms of
1721 the constant. To try to increase the chance of having the same
1722 constant in more than one insn, start at the highest number of
1723 bits to shift, but try all possibilities in case a ZAPNOT will
1724 be useful. */
1725
1726 bits = exact_log2 (c & -c);
1727 if (bits > 0)
1728 for (; bits > 0; bits--)
1729 {
1730 new_const = c >> bits;
1731 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1732 if (!temp && c < 0)
1733 {
1734 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1735 temp = alpha_emit_set_const (subtarget, mode, new_const,
1736 i, no_output);
1737 }
1738 if (temp)
1739 {
1740 if (no_output)
1741 return temp;
1742 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1743 target, 0, OPTAB_WIDEN);
1744 }
1745 }
1746
1747 /* Now try high-order zero bits. Here we try the shifted-in bits as
1748 all zero and all ones. Be careful to avoid shifting outside the
1749 mode and to avoid shifting outside the host wide int size. */
1750 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1751 confuse the recursive call and set all of the high 32 bits. */
1752
1753 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1754 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1755 if (bits > 0)
1756 for (; bits > 0; bits--)
1757 {
1758 new_const = c << bits;
1759 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1760 if (!temp)
1761 {
1762 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1763 temp = alpha_emit_set_const (subtarget, mode, new_const,
1764 i, no_output);
1765 }
1766 if (temp)
1767 {
1768 if (no_output)
1769 return temp;
1770 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1771 target, 1, OPTAB_WIDEN);
1772 }
1773 }
1774
1775 /* Now try high-order 1 bits. We get that with a sign-extension.
1776 But one bit isn't enough here. Be careful to avoid shifting outside
1777 the mode and to avoid shifting outside the host wide int size. */
1778
1779 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1780 - floor_log2 (~ c) - 2);
1781 if (bits > 0)
1782 for (; bits > 0; bits--)
1783 {
1784 new_const = c << bits;
1785 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1786 if (!temp)
1787 {
1788 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1789 temp = alpha_emit_set_const (subtarget, mode, new_const,
1790 i, no_output);
1791 }
1792 if (temp)
1793 {
1794 if (no_output)
1795 return temp;
1796 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1797 target, 0, OPTAB_WIDEN);
1798 }
1799 }
1800 }
1801
1802 #if HOST_BITS_PER_WIDE_INT == 64
1803 /* Finally, see if can load a value into the target that is the same as the
1804 constant except that all bytes that are 0 are changed to be 0xff. If we
1805 can, then we can do a ZAPNOT to obtain the desired constant. */
1806
1807 new_const = c;
1808 for (i = 0; i < 64; i += 8)
1809 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1810 new_const |= (HOST_WIDE_INT) 0xff << i;
1811
1812 /* We are only called for SImode and DImode. If this is SImode, ensure that
1813 we are sign extended to a full word. */
1814
1815 if (mode == SImode)
1816 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1817
1818 if (new_const != c)
1819 {
1820 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1821 if (temp)
1822 {
1823 if (no_output)
1824 return temp;
1825 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1826 target, 0, OPTAB_WIDEN);
1827 }
1828 }
1829 #endif
1830
1831 return 0;
1832 }
1833
1834 /* Try to output insns to set TARGET equal to the constant C if it can be
1835 done in less than N insns. Do all computations in MODE. Returns the place
1836 where the output has been placed if it can be done and the insns have been
1837 emitted. If it would take more than N insns, zero is returned and no
1838 insns and emitted. */
1839
1840 static rtx
1841 alpha_emit_set_const (rtx target, enum machine_mode mode,
1842 HOST_WIDE_INT c, int n, bool no_output)
1843 {
1844 enum machine_mode orig_mode = mode;
1845 rtx orig_target = target;
1846 rtx result = 0;
1847 int i;
1848
1849 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1850 can't load this constant in one insn, do this in DImode. */
1851 if (!can_create_pseudo_p () && mode == SImode
1852 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1853 {
1854 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1855 if (result)
1856 return result;
1857
1858 target = no_output ? NULL : gen_lowpart (DImode, target);
1859 mode = DImode;
1860 }
1861 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
1862 {
1863 target = no_output ? NULL : gen_lowpart (DImode, target);
1864 mode = DImode;
1865 }
1866
1867 /* Try 1 insn, then 2, then up to N. */
1868 for (i = 1; i <= n; i++)
1869 {
1870 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
1871 if (result)
1872 {
1873 rtx insn, set;
1874
1875 if (no_output)
1876 return result;
1877
1878 insn = get_last_insn ();
1879 set = single_set (insn);
1880 if (! CONSTANT_P (SET_SRC (set)))
1881 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
1882 break;
1883 }
1884 }
1885
1886 /* Allow for the case where we changed the mode of TARGET. */
1887 if (result)
1888 {
1889 if (result == target)
1890 result = orig_target;
1891 else if (mode != orig_mode)
1892 result = gen_lowpart (orig_mode, result);
1893 }
1894
1895 return result;
1896 }
1897
1898 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1899 fall back to a straight forward decomposition. We do this to avoid
1900 exponential run times encountered when looking for longer sequences
1901 with alpha_emit_set_const. */
1902
1903 static rtx
1904 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
1905 {
1906 HOST_WIDE_INT d1, d2, d3, d4;
1907
1908 /* Decompose the entire word */
1909 #if HOST_BITS_PER_WIDE_INT >= 64
1910 gcc_assert (c2 == -(c1 < 0));
1911 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1912 c1 -= d1;
1913 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1914 c1 = (c1 - d2) >> 32;
1915 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1916 c1 -= d3;
1917 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1918 gcc_assert (c1 == d4);
1919 #else
1920 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1921 c1 -= d1;
1922 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1923 gcc_assert (c1 == d2);
1924 c2 += (d2 < 0);
1925 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1926 c2 -= d3;
1927 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1928 gcc_assert (c2 == d4);
1929 #endif
1930
1931 /* Construct the high word */
1932 if (d4)
1933 {
1934 emit_move_insn (target, GEN_INT (d4));
1935 if (d3)
1936 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1937 }
1938 else
1939 emit_move_insn (target, GEN_INT (d3));
1940
1941 /* Shift it into place */
1942 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1943
1944 /* Add in the low bits. */
1945 if (d2)
1946 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1947 if (d1)
1948 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1949
1950 return target;
1951 }
1952
1953 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
1954 the low 64 bits. */
1955
1956 static void
1957 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
1958 {
1959 HOST_WIDE_INT i0, i1;
1960
1961 if (GET_CODE (x) == CONST_VECTOR)
1962 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
1963
1964
1965 if (CONST_INT_P (x))
1966 {
1967 i0 = INTVAL (x);
1968 i1 = -(i0 < 0);
1969 }
1970 else if (HOST_BITS_PER_WIDE_INT >= 64)
1971 {
1972 i0 = CONST_DOUBLE_LOW (x);
1973 i1 = -(i0 < 0);
1974 }
1975 else
1976 {
1977 i0 = CONST_DOUBLE_LOW (x);
1978 i1 = CONST_DOUBLE_HIGH (x);
1979 }
1980
1981 *p0 = i0;
1982 *p1 = i1;
1983 }
1984
1985 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
1986 we are willing to load the value into a register via a move pattern.
1987 Normally this is all symbolic constants, integral constants that
1988 take three or fewer instructions, and floating-point zero. */
1989
1990 bool
1991 alpha_legitimate_constant_p (enum machine_mode mode, rtx x)
1992 {
1993 HOST_WIDE_INT i0, i1;
1994
1995 switch (GET_CODE (x))
1996 {
1997 case LABEL_REF:
1998 case HIGH:
1999 return true;
2000
2001 case CONST:
2002 if (GET_CODE (XEXP (x, 0)) == PLUS
2003 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2004 x = XEXP (XEXP (x, 0), 0);
2005 else
2006 return true;
2007
2008 if (GET_CODE (x) != SYMBOL_REF)
2009 return true;
2010
2011 /* FALLTHRU */
2012
2013 case SYMBOL_REF:
2014 /* TLS symbols are never valid. */
2015 return SYMBOL_REF_TLS_MODEL (x) == 0;
2016
2017 case CONST_DOUBLE:
2018 if (x == CONST0_RTX (mode))
2019 return true;
2020 if (FLOAT_MODE_P (mode))
2021 return false;
2022 goto do_integer;
2023
2024 case CONST_VECTOR:
2025 if (x == CONST0_RTX (mode))
2026 return true;
2027 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2028 return false;
2029 if (GET_MODE_SIZE (mode) != 8)
2030 return false;
2031 goto do_integer;
2032
2033 case CONST_INT:
2034 do_integer:
2035 if (TARGET_BUILD_CONSTANTS)
2036 return true;
2037 alpha_extract_integer (x, &i0, &i1);
2038 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2039 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2040 return false;
2041
2042 default:
2043 return false;
2044 }
2045 }
2046
2047 /* Operand 1 is known to be a constant, and should require more than one
2048 instruction to load. Emit that multi-part load. */
2049
2050 bool
2051 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2052 {
2053 HOST_WIDE_INT i0, i1;
2054 rtx temp = NULL_RTX;
2055
2056 alpha_extract_integer (operands[1], &i0, &i1);
2057
2058 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2059 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2060
2061 if (!temp && TARGET_BUILD_CONSTANTS)
2062 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2063
2064 if (temp)
2065 {
2066 if (!rtx_equal_p (operands[0], temp))
2067 emit_move_insn (operands[0], temp);
2068 return true;
2069 }
2070
2071 return false;
2072 }
2073
2074 /* Expand a move instruction; return true if all work is done.
2075 We don't handle non-bwx subword loads here. */
2076
2077 bool
2078 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2079 {
2080 rtx tmp;
2081
2082 /* If the output is not a register, the input must be. */
2083 if (MEM_P (operands[0])
2084 && ! reg_or_0_operand (operands[1], mode))
2085 operands[1] = force_reg (mode, operands[1]);
2086
2087 /* Allow legitimize_address to perform some simplifications. */
2088 if (mode == Pmode && symbolic_operand (operands[1], mode))
2089 {
2090 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2091 if (tmp)
2092 {
2093 if (tmp == operands[0])
2094 return true;
2095 operands[1] = tmp;
2096 return false;
2097 }
2098 }
2099
2100 /* Early out for non-constants and valid constants. */
2101 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2102 return false;
2103
2104 /* Split large integers. */
2105 if (CONST_INT_P (operands[1])
2106 || GET_CODE (operands[1]) == CONST_DOUBLE
2107 || GET_CODE (operands[1]) == CONST_VECTOR)
2108 {
2109 if (alpha_split_const_mov (mode, operands))
2110 return true;
2111 }
2112
2113 /* Otherwise we've nothing left but to drop the thing to memory. */
2114 tmp = force_const_mem (mode, operands[1]);
2115
2116 if (tmp == NULL_RTX)
2117 return false;
2118
2119 if (reload_in_progress)
2120 {
2121 emit_move_insn (operands[0], XEXP (tmp, 0));
2122 operands[1] = replace_equiv_address (tmp, operands[0]);
2123 }
2124 else
2125 operands[1] = validize_mem (tmp);
2126 return false;
2127 }
2128
2129 /* Expand a non-bwx QImode or HImode move instruction;
2130 return true if all work is done. */
2131
2132 bool
2133 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2134 {
2135 rtx seq;
2136
2137 /* If the output is not a register, the input must be. */
2138 if (MEM_P (operands[0]))
2139 operands[1] = force_reg (mode, operands[1]);
2140
2141 /* Handle four memory cases, unaligned and aligned for either the input
2142 or the output. The only case where we can be called during reload is
2143 for aligned loads; all other cases require temporaries. */
2144
2145 if (any_memory_operand (operands[1], mode))
2146 {
2147 if (aligned_memory_operand (operands[1], mode))
2148 {
2149 if (reload_in_progress)
2150 {
2151 if (mode == QImode)
2152 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2153 else
2154 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2155 emit_insn (seq);
2156 }
2157 else
2158 {
2159 rtx aligned_mem, bitnum;
2160 rtx scratch = gen_reg_rtx (SImode);
2161 rtx subtarget;
2162 bool copyout;
2163
2164 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2165
2166 subtarget = operands[0];
2167 if (REG_P (subtarget))
2168 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2169 else
2170 subtarget = gen_reg_rtx (DImode), copyout = true;
2171
2172 if (mode == QImode)
2173 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2174 bitnum, scratch);
2175 else
2176 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2177 bitnum, scratch);
2178 emit_insn (seq);
2179
2180 if (copyout)
2181 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2182 }
2183 }
2184 else
2185 {
2186 /* Don't pass these as parameters since that makes the generated
2187 code depend on parameter evaluation order which will cause
2188 bootstrap failures. */
2189
2190 rtx temp1, temp2, subtarget, ua;
2191 bool copyout;
2192
2193 temp1 = gen_reg_rtx (DImode);
2194 temp2 = gen_reg_rtx (DImode);
2195
2196 subtarget = operands[0];
2197 if (REG_P (subtarget))
2198 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2199 else
2200 subtarget = gen_reg_rtx (DImode), copyout = true;
2201
2202 ua = get_unaligned_address (operands[1]);
2203 if (mode == QImode)
2204 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2205 else
2206 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2207
2208 alpha_set_memflags (seq, operands[1]);
2209 emit_insn (seq);
2210
2211 if (copyout)
2212 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2213 }
2214 return true;
2215 }
2216
2217 if (any_memory_operand (operands[0], mode))
2218 {
2219 if (aligned_memory_operand (operands[0], mode))
2220 {
2221 rtx aligned_mem, bitnum;
2222 rtx temp1 = gen_reg_rtx (SImode);
2223 rtx temp2 = gen_reg_rtx (SImode);
2224
2225 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2226
2227 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2228 temp1, temp2));
2229 }
2230 else
2231 {
2232 rtx temp1 = gen_reg_rtx (DImode);
2233 rtx temp2 = gen_reg_rtx (DImode);
2234 rtx temp3 = gen_reg_rtx (DImode);
2235 rtx ua = get_unaligned_address (operands[0]);
2236
2237 if (mode == QImode)
2238 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2239 else
2240 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2241
2242 alpha_set_memflags (seq, operands[0]);
2243 emit_insn (seq);
2244 }
2245 return true;
2246 }
2247
2248 return false;
2249 }
2250
2251 /* Implement the movmisalign patterns. One of the operands is a memory
2252 that is not naturally aligned. Emit instructions to load it. */
2253
2254 void
2255 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2256 {
2257 /* Honor misaligned loads, for those we promised to do so. */
2258 if (MEM_P (operands[1]))
2259 {
2260 rtx tmp;
2261
2262 if (register_operand (operands[0], mode))
2263 tmp = operands[0];
2264 else
2265 tmp = gen_reg_rtx (mode);
2266
2267 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2268 if (tmp != operands[0])
2269 emit_move_insn (operands[0], tmp);
2270 }
2271 else if (MEM_P (operands[0]))
2272 {
2273 if (!reg_or_0_operand (operands[1], mode))
2274 operands[1] = force_reg (mode, operands[1]);
2275 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2276 }
2277 else
2278 gcc_unreachable ();
2279 }
2280
2281 /* Generate an unsigned DImode to FP conversion. This is the same code
2282 optabs would emit if we didn't have TFmode patterns.
2283
2284 For SFmode, this is the only construction I've found that can pass
2285 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2286 intermediates will work, because you'll get intermediate rounding
2287 that ruins the end result. Some of this could be fixed by turning
2288 on round-to-positive-infinity, but that requires diddling the fpsr,
2289 which kills performance. I tried turning this around and converting
2290 to a negative number, so that I could turn on /m, but either I did
2291 it wrong or there's something else cause I wound up with the exact
2292 same single-bit error. There is a branch-less form of this same code:
2293
2294 srl $16,1,$1
2295 and $16,1,$2
2296 cmplt $16,0,$3
2297 or $1,$2,$2
2298 cmovge $16,$16,$2
2299 itoft $3,$f10
2300 itoft $2,$f11
2301 cvtqs $f11,$f11
2302 adds $f11,$f11,$f0
2303 fcmoveq $f10,$f11,$f0
2304
2305 I'm not using it because it's the same number of instructions as
2306 this branch-full form, and it has more serialized long latency
2307 instructions on the critical path.
2308
2309 For DFmode, we can avoid rounding errors by breaking up the word
2310 into two pieces, converting them separately, and adding them back:
2311
2312 LC0: .long 0,0x5f800000
2313
2314 itoft $16,$f11
2315 lda $2,LC0
2316 cmplt $16,0,$1
2317 cpyse $f11,$f31,$f10
2318 cpyse $f31,$f11,$f11
2319 s4addq $1,$2,$1
2320 lds $f12,0($1)
2321 cvtqt $f10,$f10
2322 cvtqt $f11,$f11
2323 addt $f12,$f10,$f0
2324 addt $f0,$f11,$f0
2325
2326 This doesn't seem to be a clear-cut win over the optabs form.
2327 It probably all depends on the distribution of numbers being
2328 converted -- in the optabs form, all but high-bit-set has a
2329 much lower minimum execution time. */
2330
2331 void
2332 alpha_emit_floatuns (rtx operands[2])
2333 {
2334 rtx neglab, donelab, i0, i1, f0, in, out;
2335 enum machine_mode mode;
2336
2337 out = operands[0];
2338 in = force_reg (DImode, operands[1]);
2339 mode = GET_MODE (out);
2340 neglab = gen_label_rtx ();
2341 donelab = gen_label_rtx ();
2342 i0 = gen_reg_rtx (DImode);
2343 i1 = gen_reg_rtx (DImode);
2344 f0 = gen_reg_rtx (mode);
2345
2346 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2347
2348 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2349 emit_jump_insn (gen_jump (donelab));
2350 emit_barrier ();
2351
2352 emit_label (neglab);
2353
2354 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2355 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2356 emit_insn (gen_iordi3 (i0, i0, i1));
2357 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2358 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2359
2360 emit_label (donelab);
2361 }
2362
2363 /* Generate the comparison for a conditional branch. */
2364
2365 void
2366 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2367 {
2368 enum rtx_code cmp_code, branch_code;
2369 enum machine_mode branch_mode = VOIDmode;
2370 enum rtx_code code = GET_CODE (operands[0]);
2371 rtx op0 = operands[1], op1 = operands[2];
2372 rtx tem;
2373
2374 if (cmp_mode == TFmode)
2375 {
2376 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2377 op1 = const0_rtx;
2378 cmp_mode = DImode;
2379 }
2380
2381 /* The general case: fold the comparison code to the types of compares
2382 that we have, choosing the branch as necessary. */
2383 switch (code)
2384 {
2385 case EQ: case LE: case LT: case LEU: case LTU:
2386 case UNORDERED:
2387 /* We have these compares. */
2388 cmp_code = code, branch_code = NE;
2389 break;
2390
2391 case NE:
2392 case ORDERED:
2393 /* These must be reversed. */
2394 cmp_code = reverse_condition (code), branch_code = EQ;
2395 break;
2396
2397 case GE: case GT: case GEU: case GTU:
2398 /* For FP, we swap them, for INT, we reverse them. */
2399 if (cmp_mode == DFmode)
2400 {
2401 cmp_code = swap_condition (code);
2402 branch_code = NE;
2403 tem = op0, op0 = op1, op1 = tem;
2404 }
2405 else
2406 {
2407 cmp_code = reverse_condition (code);
2408 branch_code = EQ;
2409 }
2410 break;
2411
2412 default:
2413 gcc_unreachable ();
2414 }
2415
2416 if (cmp_mode == DFmode)
2417 {
2418 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2419 {
2420 /* When we are not as concerned about non-finite values, and we
2421 are comparing against zero, we can branch directly. */
2422 if (op1 == CONST0_RTX (DFmode))
2423 cmp_code = UNKNOWN, branch_code = code;
2424 else if (op0 == CONST0_RTX (DFmode))
2425 {
2426 /* Undo the swap we probably did just above. */
2427 tem = op0, op0 = op1, op1 = tem;
2428 branch_code = swap_condition (cmp_code);
2429 cmp_code = UNKNOWN;
2430 }
2431 }
2432 else
2433 {
2434 /* ??? We mark the branch mode to be CCmode to prevent the
2435 compare and branch from being combined, since the compare
2436 insn follows IEEE rules that the branch does not. */
2437 branch_mode = CCmode;
2438 }
2439 }
2440 else
2441 {
2442 /* The following optimizations are only for signed compares. */
2443 if (code != LEU && code != LTU && code != GEU && code != GTU)
2444 {
2445 /* Whee. Compare and branch against 0 directly. */
2446 if (op1 == const0_rtx)
2447 cmp_code = UNKNOWN, branch_code = code;
2448
2449 /* If the constants doesn't fit into an immediate, but can
2450 be generated by lda/ldah, we adjust the argument and
2451 compare against zero, so we can use beq/bne directly. */
2452 /* ??? Don't do this when comparing against symbols, otherwise
2453 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2454 be declared false out of hand (at least for non-weak). */
2455 else if (CONST_INT_P (op1)
2456 && (code == EQ || code == NE)
2457 && !(symbolic_operand (op0, VOIDmode)
2458 || (REG_P (op0) && REG_POINTER (op0))))
2459 {
2460 rtx n_op1 = GEN_INT (-INTVAL (op1));
2461
2462 if (! satisfies_constraint_I (op1)
2463 && (satisfies_constraint_K (n_op1)
2464 || satisfies_constraint_L (n_op1)))
2465 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2466 }
2467 }
2468
2469 if (!reg_or_0_operand (op0, DImode))
2470 op0 = force_reg (DImode, op0);
2471 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2472 op1 = force_reg (DImode, op1);
2473 }
2474
2475 /* Emit an initial compare instruction, if necessary. */
2476 tem = op0;
2477 if (cmp_code != UNKNOWN)
2478 {
2479 tem = gen_reg_rtx (cmp_mode);
2480 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2481 }
2482
2483 /* Emit the branch instruction. */
2484 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2485 gen_rtx_IF_THEN_ELSE (VOIDmode,
2486 gen_rtx_fmt_ee (branch_code,
2487 branch_mode, tem,
2488 CONST0_RTX (cmp_mode)),
2489 gen_rtx_LABEL_REF (VOIDmode,
2490 operands[3]),
2491 pc_rtx));
2492 emit_jump_insn (tem);
2493 }
2494
2495 /* Certain simplifications can be done to make invalid setcc operations
2496 valid. Return the final comparison, or NULL if we can't work. */
2497
2498 bool
2499 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2500 {
2501 enum rtx_code cmp_code;
2502 enum rtx_code code = GET_CODE (operands[1]);
2503 rtx op0 = operands[2], op1 = operands[3];
2504 rtx tmp;
2505
2506 if (cmp_mode == TFmode)
2507 {
2508 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2509 op1 = const0_rtx;
2510 cmp_mode = DImode;
2511 }
2512
2513 if (cmp_mode == DFmode && !TARGET_FIX)
2514 return 0;
2515
2516 /* The general case: fold the comparison code to the types of compares
2517 that we have, choosing the branch as necessary. */
2518
2519 cmp_code = UNKNOWN;
2520 switch (code)
2521 {
2522 case EQ: case LE: case LT: case LEU: case LTU:
2523 case UNORDERED:
2524 /* We have these compares. */
2525 if (cmp_mode == DFmode)
2526 cmp_code = code, code = NE;
2527 break;
2528
2529 case NE:
2530 if (cmp_mode == DImode && op1 == const0_rtx)
2531 break;
2532 /* FALLTHRU */
2533
2534 case ORDERED:
2535 cmp_code = reverse_condition (code);
2536 code = EQ;
2537 break;
2538
2539 case GE: case GT: case GEU: case GTU:
2540 /* These normally need swapping, but for integer zero we have
2541 special patterns that recognize swapped operands. */
2542 if (cmp_mode == DImode && op1 == const0_rtx)
2543 break;
2544 code = swap_condition (code);
2545 if (cmp_mode == DFmode)
2546 cmp_code = code, code = NE;
2547 tmp = op0, op0 = op1, op1 = tmp;
2548 break;
2549
2550 default:
2551 gcc_unreachable ();
2552 }
2553
2554 if (cmp_mode == DImode)
2555 {
2556 if (!register_operand (op0, DImode))
2557 op0 = force_reg (DImode, op0);
2558 if (!reg_or_8bit_operand (op1, DImode))
2559 op1 = force_reg (DImode, op1);
2560 }
2561
2562 /* Emit an initial compare instruction, if necessary. */
2563 if (cmp_code != UNKNOWN)
2564 {
2565 tmp = gen_reg_rtx (cmp_mode);
2566 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2567 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2568
2569 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2570 op1 = const0_rtx;
2571 }
2572
2573 /* Emit the setcc instruction. */
2574 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2575 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2576 return true;
2577 }
2578
2579
2580 /* Rewrite a comparison against zero CMP of the form
2581 (CODE (cc0) (const_int 0)) so it can be written validly in
2582 a conditional move (if_then_else CMP ...).
2583 If both of the operands that set cc0 are nonzero we must emit
2584 an insn to perform the compare (it can't be done within
2585 the conditional move). */
2586
2587 rtx
2588 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2589 {
2590 enum rtx_code code = GET_CODE (cmp);
2591 enum rtx_code cmov_code = NE;
2592 rtx op0 = XEXP (cmp, 0);
2593 rtx op1 = XEXP (cmp, 1);
2594 enum machine_mode cmp_mode
2595 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2596 enum machine_mode cmov_mode = VOIDmode;
2597 int local_fast_math = flag_unsafe_math_optimizations;
2598 rtx tem;
2599
2600 if (cmp_mode == TFmode)
2601 {
2602 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2603 op1 = const0_rtx;
2604 cmp_mode = DImode;
2605 }
2606
2607 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2608
2609 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2610 {
2611 enum rtx_code cmp_code;
2612
2613 if (! TARGET_FIX)
2614 return 0;
2615
2616 /* If we have fp<->int register move instructions, do a cmov by
2617 performing the comparison in fp registers, and move the
2618 zero/nonzero value to integer registers, where we can then
2619 use a normal cmov, or vice-versa. */
2620
2621 switch (code)
2622 {
2623 case EQ: case LE: case LT: case LEU: case LTU:
2624 case UNORDERED:
2625 /* We have these compares. */
2626 cmp_code = code, code = NE;
2627 break;
2628
2629 case NE:
2630 case ORDERED:
2631 /* These must be reversed. */
2632 cmp_code = reverse_condition (code), code = EQ;
2633 break;
2634
2635 case GE: case GT: case GEU: case GTU:
2636 /* These normally need swapping, but for integer zero we have
2637 special patterns that recognize swapped operands. */
2638 if (cmp_mode == DImode && op1 == const0_rtx)
2639 cmp_code = code, code = NE;
2640 else
2641 {
2642 cmp_code = swap_condition (code);
2643 code = NE;
2644 tem = op0, op0 = op1, op1 = tem;
2645 }
2646 break;
2647
2648 default:
2649 gcc_unreachable ();
2650 }
2651
2652 if (cmp_mode == DImode)
2653 {
2654 if (!reg_or_0_operand (op0, DImode))
2655 op0 = force_reg (DImode, op0);
2656 if (!reg_or_8bit_operand (op1, DImode))
2657 op1 = force_reg (DImode, op1);
2658 }
2659
2660 tem = gen_reg_rtx (cmp_mode);
2661 emit_insn (gen_rtx_SET (VOIDmode, tem,
2662 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2663 op0, op1)));
2664
2665 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2666 op0 = gen_lowpart (cmp_mode, tem);
2667 op1 = CONST0_RTX (cmp_mode);
2668 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2669 local_fast_math = 1;
2670 }
2671
2672 if (cmp_mode == DImode)
2673 {
2674 if (!reg_or_0_operand (op0, DImode))
2675 op0 = force_reg (DImode, op0);
2676 if (!reg_or_8bit_operand (op1, DImode))
2677 op1 = force_reg (DImode, op1);
2678 }
2679
2680 /* We may be able to use a conditional move directly.
2681 This avoids emitting spurious compares. */
2682 if (signed_comparison_operator (cmp, VOIDmode)
2683 && (cmp_mode == DImode || local_fast_math)
2684 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2685 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2686
2687 /* We can't put the comparison inside the conditional move;
2688 emit a compare instruction and put that inside the
2689 conditional move. Make sure we emit only comparisons we have;
2690 swap or reverse as necessary. */
2691
2692 if (!can_create_pseudo_p ())
2693 return NULL_RTX;
2694
2695 switch (code)
2696 {
2697 case EQ: case LE: case LT: case LEU: case LTU:
2698 case UNORDERED:
2699 /* We have these compares: */
2700 break;
2701
2702 case NE:
2703 case ORDERED:
2704 /* These must be reversed. */
2705 code = reverse_condition (code);
2706 cmov_code = EQ;
2707 break;
2708
2709 case GE: case GT: case GEU: case GTU:
2710 /* These normally need swapping, but for integer zero we have
2711 special patterns that recognize swapped operands. */
2712 if (cmp_mode == DImode && op1 == const0_rtx)
2713 break;
2714 code = swap_condition (code);
2715 tem = op0, op0 = op1, op1 = tem;
2716 break;
2717
2718 default:
2719 gcc_unreachable ();
2720 }
2721
2722 if (cmp_mode == DImode)
2723 {
2724 if (!reg_or_0_operand (op0, DImode))
2725 op0 = force_reg (DImode, op0);
2726 if (!reg_or_8bit_operand (op1, DImode))
2727 op1 = force_reg (DImode, op1);
2728 }
2729
2730 /* ??? We mark the branch mode to be CCmode to prevent the compare
2731 and cmov from being combined, since the compare insn follows IEEE
2732 rules that the cmov does not. */
2733 if (cmp_mode == DFmode && !local_fast_math)
2734 cmov_mode = CCmode;
2735
2736 tem = gen_reg_rtx (cmp_mode);
2737 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2738 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2739 }
2740
2741 /* Simplify a conditional move of two constants into a setcc with
2742 arithmetic. This is done with a splitter since combine would
2743 just undo the work if done during code generation. It also catches
2744 cases we wouldn't have before cse. */
2745
2746 int
2747 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2748 rtx t_rtx, rtx f_rtx)
2749 {
2750 HOST_WIDE_INT t, f, diff;
2751 enum machine_mode mode;
2752 rtx target, subtarget, tmp;
2753
2754 mode = GET_MODE (dest);
2755 t = INTVAL (t_rtx);
2756 f = INTVAL (f_rtx);
2757 diff = t - f;
2758
2759 if (((code == NE || code == EQ) && diff < 0)
2760 || (code == GE || code == GT))
2761 {
2762 code = reverse_condition (code);
2763 diff = t, t = f, f = diff;
2764 diff = t - f;
2765 }
2766
2767 subtarget = target = dest;
2768 if (mode != DImode)
2769 {
2770 target = gen_lowpart (DImode, dest);
2771 if (can_create_pseudo_p ())
2772 subtarget = gen_reg_rtx (DImode);
2773 else
2774 subtarget = target;
2775 }
2776 /* Below, we must be careful to use copy_rtx on target and subtarget
2777 in intermediate insns, as they may be a subreg rtx, which may not
2778 be shared. */
2779
2780 if (f == 0 && exact_log2 (diff) > 0
2781 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2782 viable over a longer latency cmove. On EV5, the E0 slot is a
2783 scarce resource, and on EV4 shift has the same latency as a cmove. */
2784 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2785 {
2786 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2787 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2788
2789 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2790 GEN_INT (exact_log2 (t)));
2791 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2792 }
2793 else if (f == 0 && t == -1)
2794 {
2795 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2796 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2797
2798 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2799 }
2800 else if (diff == 1 || diff == 4 || diff == 8)
2801 {
2802 rtx add_op;
2803
2804 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2805 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2806
2807 if (diff == 1)
2808 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2809 else
2810 {
2811 add_op = GEN_INT (f);
2812 if (sext_add_operand (add_op, mode))
2813 {
2814 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2815 GEN_INT (diff));
2816 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2817 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2818 }
2819 else
2820 return 0;
2821 }
2822 }
2823 else
2824 return 0;
2825
2826 return 1;
2827 }
2828 \f
2829 /* Look up the function X_floating library function name for the
2830 given operation. */
2831
2832 struct GTY(()) xfloating_op
2833 {
2834 const enum rtx_code code;
2835 const char *const GTY((skip)) osf_func;
2836 const char *const GTY((skip)) vms_func;
2837 rtx libcall;
2838 };
2839
2840 static GTY(()) struct xfloating_op xfloating_ops[] =
2841 {
2842 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2843 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2844 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2845 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2846 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2847 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2848 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2849 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2850 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2851 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2852 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2853 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2854 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2855 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2856 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2857 };
2858
2859 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2860 {
2861 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2862 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2863 };
2864
2865 static rtx
2866 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2867 {
2868 struct xfloating_op *ops = xfloating_ops;
2869 long n = ARRAY_SIZE (xfloating_ops);
2870 long i;
2871
2872 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2873
2874 /* How irritating. Nothing to key off for the main table. */
2875 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2876 {
2877 ops = vax_cvt_ops;
2878 n = ARRAY_SIZE (vax_cvt_ops);
2879 }
2880
2881 for (i = 0; i < n; ++i, ++ops)
2882 if (ops->code == code)
2883 {
2884 rtx func = ops->libcall;
2885 if (!func)
2886 {
2887 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2888 ? ops->vms_func : ops->osf_func);
2889 ops->libcall = func;
2890 }
2891 return func;
2892 }
2893
2894 gcc_unreachable ();
2895 }
2896
2897 /* Most X_floating operations take the rounding mode as an argument.
2898 Compute that here. */
2899
2900 static int
2901 alpha_compute_xfloating_mode_arg (enum rtx_code code,
2902 enum alpha_fp_rounding_mode round)
2903 {
2904 int mode;
2905
2906 switch (round)
2907 {
2908 case ALPHA_FPRM_NORM:
2909 mode = 2;
2910 break;
2911 case ALPHA_FPRM_MINF:
2912 mode = 1;
2913 break;
2914 case ALPHA_FPRM_CHOP:
2915 mode = 0;
2916 break;
2917 case ALPHA_FPRM_DYN:
2918 mode = 4;
2919 break;
2920 default:
2921 gcc_unreachable ();
2922
2923 /* XXX For reference, round to +inf is mode = 3. */
2924 }
2925
2926 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2927 mode |= 0x10000;
2928
2929 return mode;
2930 }
2931
2932 /* Emit an X_floating library function call.
2933
2934 Note that these functions do not follow normal calling conventions:
2935 TFmode arguments are passed in two integer registers (as opposed to
2936 indirect); TFmode return values appear in R16+R17.
2937
2938 FUNC is the function to call.
2939 TARGET is where the output belongs.
2940 OPERANDS are the inputs.
2941 NOPERANDS is the count of inputs.
2942 EQUIV is the expression equivalent for the function.
2943 */
2944
2945 static void
2946 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
2947 int noperands, rtx equiv)
2948 {
2949 rtx usage = NULL_RTX, tmp, reg;
2950 int regno = 16, i;
2951
2952 start_sequence ();
2953
2954 for (i = 0; i < noperands; ++i)
2955 {
2956 switch (GET_MODE (operands[i]))
2957 {
2958 case TFmode:
2959 reg = gen_rtx_REG (TFmode, regno);
2960 regno += 2;
2961 break;
2962
2963 case DFmode:
2964 reg = gen_rtx_REG (DFmode, regno + 32);
2965 regno += 1;
2966 break;
2967
2968 case VOIDmode:
2969 gcc_assert (CONST_INT_P (operands[i]));
2970 /* FALLTHRU */
2971 case DImode:
2972 reg = gen_rtx_REG (DImode, regno);
2973 regno += 1;
2974 break;
2975
2976 default:
2977 gcc_unreachable ();
2978 }
2979
2980 emit_move_insn (reg, operands[i]);
2981 use_reg (&usage, reg);
2982 }
2983
2984 switch (GET_MODE (target))
2985 {
2986 case TFmode:
2987 reg = gen_rtx_REG (TFmode, 16);
2988 break;
2989 case DFmode:
2990 reg = gen_rtx_REG (DFmode, 32);
2991 break;
2992 case DImode:
2993 reg = gen_rtx_REG (DImode, 0);
2994 break;
2995 default:
2996 gcc_unreachable ();
2997 }
2998
2999 tmp = gen_rtx_MEM (QImode, func);
3000 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3001 const0_rtx, const0_rtx));
3002 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3003 RTL_CONST_CALL_P (tmp) = 1;
3004
3005 tmp = get_insns ();
3006 end_sequence ();
3007
3008 emit_libcall_block (tmp, target, reg, equiv);
3009 }
3010
3011 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3012
3013 void
3014 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3015 {
3016 rtx func;
3017 int mode;
3018 rtx out_operands[3];
3019
3020 func = alpha_lookup_xfloating_lib_func (code);
3021 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3022
3023 out_operands[0] = operands[1];
3024 out_operands[1] = operands[2];
3025 out_operands[2] = GEN_INT (mode);
3026 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3027 gen_rtx_fmt_ee (code, TFmode, operands[1],
3028 operands[2]));
3029 }
3030
3031 /* Emit an X_floating library function call for a comparison. */
3032
3033 static rtx
3034 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3035 {
3036 enum rtx_code cmp_code, res_code;
3037 rtx func, out, operands[2], note;
3038
3039 /* X_floating library comparison functions return
3040 -1 unordered
3041 0 false
3042 1 true
3043 Convert the compare against the raw return value. */
3044
3045 cmp_code = *pcode;
3046 switch (cmp_code)
3047 {
3048 case UNORDERED:
3049 cmp_code = EQ;
3050 res_code = LT;
3051 break;
3052 case ORDERED:
3053 cmp_code = EQ;
3054 res_code = GE;
3055 break;
3056 case NE:
3057 res_code = NE;
3058 break;
3059 case EQ:
3060 case LT:
3061 case GT:
3062 case LE:
3063 case GE:
3064 res_code = GT;
3065 break;
3066 default:
3067 gcc_unreachable ();
3068 }
3069 *pcode = res_code;
3070
3071 func = alpha_lookup_xfloating_lib_func (cmp_code);
3072
3073 operands[0] = op0;
3074 operands[1] = op1;
3075 out = gen_reg_rtx (DImode);
3076
3077 /* What's actually returned is -1,0,1, not a proper boolean value. */
3078 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3079 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3080 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3081
3082 return out;
3083 }
3084
3085 /* Emit an X_floating library function call for a conversion. */
3086
3087 void
3088 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3089 {
3090 int noperands = 1, mode;
3091 rtx out_operands[2];
3092 rtx func;
3093 enum rtx_code code = orig_code;
3094
3095 if (code == UNSIGNED_FIX)
3096 code = FIX;
3097
3098 func = alpha_lookup_xfloating_lib_func (code);
3099
3100 out_operands[0] = operands[1];
3101
3102 switch (code)
3103 {
3104 case FIX:
3105 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3106 out_operands[1] = GEN_INT (mode);
3107 noperands = 2;
3108 break;
3109 case FLOAT_TRUNCATE:
3110 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3111 out_operands[1] = GEN_INT (mode);
3112 noperands = 2;
3113 break;
3114 default:
3115 break;
3116 }
3117
3118 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3119 gen_rtx_fmt_e (orig_code,
3120 GET_MODE (operands[0]),
3121 operands[1]));
3122 }
3123
3124 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3125 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3126 guarantee that the sequence
3127 set (OP[0] OP[2])
3128 set (OP[1] OP[3])
3129 is valid. Naturally, output operand ordering is little-endian.
3130 This is used by *movtf_internal and *movti_internal. */
3131
3132 void
3133 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3134 bool fixup_overlap)
3135 {
3136 switch (GET_CODE (operands[1]))
3137 {
3138 case REG:
3139 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3140 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3141 break;
3142
3143 case MEM:
3144 operands[3] = adjust_address (operands[1], DImode, 8);
3145 operands[2] = adjust_address (operands[1], DImode, 0);
3146 break;
3147
3148 case CONST_INT:
3149 case CONST_DOUBLE:
3150 gcc_assert (operands[1] == CONST0_RTX (mode));
3151 operands[2] = operands[3] = const0_rtx;
3152 break;
3153
3154 default:
3155 gcc_unreachable ();
3156 }
3157
3158 switch (GET_CODE (operands[0]))
3159 {
3160 case REG:
3161 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3162 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3163 break;
3164
3165 case MEM:
3166 operands[1] = adjust_address (operands[0], DImode, 8);
3167 operands[0] = adjust_address (operands[0], DImode, 0);
3168 break;
3169
3170 default:
3171 gcc_unreachable ();
3172 }
3173
3174 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3175 {
3176 rtx tmp;
3177 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3178 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3179 }
3180 }
3181
3182 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3183 op2 is a register containing the sign bit, operation is the
3184 logical operation to be performed. */
3185
3186 void
3187 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3188 {
3189 rtx high_bit = operands[2];
3190 rtx scratch;
3191 int move;
3192
3193 alpha_split_tmode_pair (operands, TFmode, false);
3194
3195 /* Detect three flavors of operand overlap. */
3196 move = 1;
3197 if (rtx_equal_p (operands[0], operands[2]))
3198 move = 0;
3199 else if (rtx_equal_p (operands[1], operands[2]))
3200 {
3201 if (rtx_equal_p (operands[0], high_bit))
3202 move = 2;
3203 else
3204 move = -1;
3205 }
3206
3207 if (move < 0)
3208 emit_move_insn (operands[0], operands[2]);
3209
3210 /* ??? If the destination overlaps both source tf and high_bit, then
3211 assume source tf is dead in its entirety and use the other half
3212 for a scratch register. Otherwise "scratch" is just the proper
3213 destination register. */
3214 scratch = operands[move < 2 ? 1 : 3];
3215
3216 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3217
3218 if (move > 0)
3219 {
3220 emit_move_insn (operands[0], operands[2]);
3221 if (move > 1)
3222 emit_move_insn (operands[1], scratch);
3223 }
3224 }
3225 \f
3226 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3227 unaligned data:
3228
3229 unsigned: signed:
3230 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3231 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3232 lda r3,X(r11) lda r3,X+2(r11)
3233 extwl r1,r3,r1 extql r1,r3,r1
3234 extwh r2,r3,r2 extqh r2,r3,r2
3235 or r1.r2.r1 or r1,r2,r1
3236 sra r1,48,r1
3237
3238 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3239 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3240 lda r3,X(r11) lda r3,X(r11)
3241 extll r1,r3,r1 extll r1,r3,r1
3242 extlh r2,r3,r2 extlh r2,r3,r2
3243 or r1.r2.r1 addl r1,r2,r1
3244
3245 quad: ldq_u r1,X(r11)
3246 ldq_u r2,X+7(r11)
3247 lda r3,X(r11)
3248 extql r1,r3,r1
3249 extqh r2,r3,r2
3250 or r1.r2.r1
3251 */
3252
3253 void
3254 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3255 HOST_WIDE_INT ofs, int sign)
3256 {
3257 rtx meml, memh, addr, extl, exth, tmp, mema;
3258 enum machine_mode mode;
3259
3260 if (TARGET_BWX && size == 2)
3261 {
3262 meml = adjust_address (mem, QImode, ofs);
3263 memh = adjust_address (mem, QImode, ofs+1);
3264 extl = gen_reg_rtx (DImode);
3265 exth = gen_reg_rtx (DImode);
3266 emit_insn (gen_zero_extendqidi2 (extl, meml));
3267 emit_insn (gen_zero_extendqidi2 (exth, memh));
3268 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3269 NULL, 1, OPTAB_LIB_WIDEN);
3270 addr = expand_simple_binop (DImode, IOR, extl, exth,
3271 NULL, 1, OPTAB_LIB_WIDEN);
3272
3273 if (sign && GET_MODE (tgt) != HImode)
3274 {
3275 addr = gen_lowpart (HImode, addr);
3276 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3277 }
3278 else
3279 {
3280 if (GET_MODE (tgt) != DImode)
3281 addr = gen_lowpart (GET_MODE (tgt), addr);
3282 emit_move_insn (tgt, addr);
3283 }
3284 return;
3285 }
3286
3287 meml = gen_reg_rtx (DImode);
3288 memh = gen_reg_rtx (DImode);
3289 addr = gen_reg_rtx (DImode);
3290 extl = gen_reg_rtx (DImode);
3291 exth = gen_reg_rtx (DImode);
3292
3293 mema = XEXP (mem, 0);
3294 if (GET_CODE (mema) == LO_SUM)
3295 mema = force_reg (Pmode, mema);
3296
3297 /* AND addresses cannot be in any alias set, since they may implicitly
3298 alias surrounding code. Ideally we'd have some alias set that
3299 covered all types except those with alignment 8 or higher. */
3300
3301 tmp = change_address (mem, DImode,
3302 gen_rtx_AND (DImode,
3303 plus_constant (DImode, mema, ofs),
3304 GEN_INT (-8)));
3305 set_mem_alias_set (tmp, 0);
3306 emit_move_insn (meml, tmp);
3307
3308 tmp = change_address (mem, DImode,
3309 gen_rtx_AND (DImode,
3310 plus_constant (DImode, mema,
3311 ofs + size - 1),
3312 GEN_INT (-8)));
3313 set_mem_alias_set (tmp, 0);
3314 emit_move_insn (memh, tmp);
3315
3316 if (sign && size == 2)
3317 {
3318 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3319
3320 emit_insn (gen_extql (extl, meml, addr));
3321 emit_insn (gen_extqh (exth, memh, addr));
3322
3323 /* We must use tgt here for the target. Alpha-vms port fails if we use
3324 addr for the target, because addr is marked as a pointer and combine
3325 knows that pointers are always sign-extended 32-bit values. */
3326 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3327 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3328 addr, 1, OPTAB_WIDEN);
3329 }
3330 else
3331 {
3332 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3333 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3334 switch ((int) size)
3335 {
3336 case 2:
3337 emit_insn (gen_extwh (exth, memh, addr));
3338 mode = HImode;
3339 break;
3340 case 4:
3341 emit_insn (gen_extlh (exth, memh, addr));
3342 mode = SImode;
3343 break;
3344 case 8:
3345 emit_insn (gen_extqh (exth, memh, addr));
3346 mode = DImode;
3347 break;
3348 default:
3349 gcc_unreachable ();
3350 }
3351
3352 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3353 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3354 sign, OPTAB_WIDEN);
3355 }
3356
3357 if (addr != tgt)
3358 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3359 }
3360
3361 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3362
3363 void
3364 alpha_expand_unaligned_store (rtx dst, rtx src,
3365 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3366 {
3367 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3368
3369 if (TARGET_BWX && size == 2)
3370 {
3371 if (src != const0_rtx)
3372 {
3373 dstl = gen_lowpart (QImode, src);
3374 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3375 NULL, 1, OPTAB_LIB_WIDEN);
3376 dsth = gen_lowpart (QImode, dsth);
3377 }
3378 else
3379 dstl = dsth = const0_rtx;
3380
3381 meml = adjust_address (dst, QImode, ofs);
3382 memh = adjust_address (dst, QImode, ofs+1);
3383
3384 emit_move_insn (meml, dstl);
3385 emit_move_insn (memh, dsth);
3386 return;
3387 }
3388
3389 dstl = gen_reg_rtx (DImode);
3390 dsth = gen_reg_rtx (DImode);
3391 insl = gen_reg_rtx (DImode);
3392 insh = gen_reg_rtx (DImode);
3393
3394 dsta = XEXP (dst, 0);
3395 if (GET_CODE (dsta) == LO_SUM)
3396 dsta = force_reg (Pmode, dsta);
3397
3398 /* AND addresses cannot be in any alias set, since they may implicitly
3399 alias surrounding code. Ideally we'd have some alias set that
3400 covered all types except those with alignment 8 or higher. */
3401
3402 meml = change_address (dst, DImode,
3403 gen_rtx_AND (DImode,
3404 plus_constant (DImode, dsta, ofs),
3405 GEN_INT (-8)));
3406 set_mem_alias_set (meml, 0);
3407
3408 memh = change_address (dst, DImode,
3409 gen_rtx_AND (DImode,
3410 plus_constant (DImode, dsta,
3411 ofs + size - 1),
3412 GEN_INT (-8)));
3413 set_mem_alias_set (memh, 0);
3414
3415 emit_move_insn (dsth, memh);
3416 emit_move_insn (dstl, meml);
3417
3418 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3419
3420 if (src != CONST0_RTX (GET_MODE (src)))
3421 {
3422 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3423 GEN_INT (size*8), addr));
3424
3425 switch ((int) size)
3426 {
3427 case 2:
3428 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3429 break;
3430 case 4:
3431 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3432 break;
3433 case 8:
3434 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3435 break;
3436 default:
3437 gcc_unreachable ();
3438 }
3439 }
3440
3441 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3442
3443 switch ((int) size)
3444 {
3445 case 2:
3446 emit_insn (gen_mskwl (dstl, dstl, addr));
3447 break;
3448 case 4:
3449 emit_insn (gen_mskll (dstl, dstl, addr));
3450 break;
3451 case 8:
3452 emit_insn (gen_mskql (dstl, dstl, addr));
3453 break;
3454 default:
3455 gcc_unreachable ();
3456 }
3457
3458 if (src != CONST0_RTX (GET_MODE (src)))
3459 {
3460 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3461 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3462 }
3463
3464 /* Must store high before low for degenerate case of aligned. */
3465 emit_move_insn (memh, dsth);
3466 emit_move_insn (meml, dstl);
3467 }
3468
3469 /* The block move code tries to maximize speed by separating loads and
3470 stores at the expense of register pressure: we load all of the data
3471 before we store it back out. There are two secondary effects worth
3472 mentioning, that this speeds copying to/from aligned and unaligned
3473 buffers, and that it makes the code significantly easier to write. */
3474
3475 #define MAX_MOVE_WORDS 8
3476
3477 /* Load an integral number of consecutive unaligned quadwords. */
3478
3479 static void
3480 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3481 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3482 {
3483 rtx const im8 = GEN_INT (-8);
3484 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3485 rtx sreg, areg, tmp, smema;
3486 HOST_WIDE_INT i;
3487
3488 smema = XEXP (smem, 0);
3489 if (GET_CODE (smema) == LO_SUM)
3490 smema = force_reg (Pmode, smema);
3491
3492 /* Generate all the tmp registers we need. */
3493 for (i = 0; i < words; ++i)
3494 {
3495 data_regs[i] = out_regs[i];
3496 ext_tmps[i] = gen_reg_rtx (DImode);
3497 }
3498 data_regs[words] = gen_reg_rtx (DImode);
3499
3500 if (ofs != 0)
3501 smem = adjust_address (smem, GET_MODE (smem), ofs);
3502
3503 /* Load up all of the source data. */
3504 for (i = 0; i < words; ++i)
3505 {
3506 tmp = change_address (smem, DImode,
3507 gen_rtx_AND (DImode,
3508 plus_constant (DImode, smema, 8*i),
3509 im8));
3510 set_mem_alias_set (tmp, 0);
3511 emit_move_insn (data_regs[i], tmp);
3512 }
3513
3514 tmp = change_address (smem, DImode,
3515 gen_rtx_AND (DImode,
3516 plus_constant (DImode, smema,
3517 8*words - 1),
3518 im8));
3519 set_mem_alias_set (tmp, 0);
3520 emit_move_insn (data_regs[words], tmp);
3521
3522 /* Extract the half-word fragments. Unfortunately DEC decided to make
3523 extxh with offset zero a noop instead of zeroing the register, so
3524 we must take care of that edge condition ourselves with cmov. */
3525
3526 sreg = copy_addr_to_reg (smema);
3527 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3528 1, OPTAB_WIDEN);
3529 for (i = 0; i < words; ++i)
3530 {
3531 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3532 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3533 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3534 gen_rtx_IF_THEN_ELSE (DImode,
3535 gen_rtx_EQ (DImode, areg,
3536 const0_rtx),
3537 const0_rtx, ext_tmps[i])));
3538 }
3539
3540 /* Merge the half-words into whole words. */
3541 for (i = 0; i < words; ++i)
3542 {
3543 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3544 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3545 }
3546 }
3547
3548 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3549 may be NULL to store zeros. */
3550
3551 static void
3552 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3553 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3554 {
3555 rtx const im8 = GEN_INT (-8);
3556 rtx ins_tmps[MAX_MOVE_WORDS];
3557 rtx st_tmp_1, st_tmp_2, dreg;
3558 rtx st_addr_1, st_addr_2, dmema;
3559 HOST_WIDE_INT i;
3560
3561 dmema = XEXP (dmem, 0);
3562 if (GET_CODE (dmema) == LO_SUM)
3563 dmema = force_reg (Pmode, dmema);
3564
3565 /* Generate all the tmp registers we need. */
3566 if (data_regs != NULL)
3567 for (i = 0; i < words; ++i)
3568 ins_tmps[i] = gen_reg_rtx(DImode);
3569 st_tmp_1 = gen_reg_rtx(DImode);
3570 st_tmp_2 = gen_reg_rtx(DImode);
3571
3572 if (ofs != 0)
3573 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3574
3575 st_addr_2 = change_address (dmem, DImode,
3576 gen_rtx_AND (DImode,
3577 plus_constant (DImode, dmema,
3578 words*8 - 1),
3579 im8));
3580 set_mem_alias_set (st_addr_2, 0);
3581
3582 st_addr_1 = change_address (dmem, DImode,
3583 gen_rtx_AND (DImode, dmema, im8));
3584 set_mem_alias_set (st_addr_1, 0);
3585
3586 /* Load up the destination end bits. */
3587 emit_move_insn (st_tmp_2, st_addr_2);
3588 emit_move_insn (st_tmp_1, st_addr_1);
3589
3590 /* Shift the input data into place. */
3591 dreg = copy_addr_to_reg (dmema);
3592 if (data_regs != NULL)
3593 {
3594 for (i = words-1; i >= 0; --i)
3595 {
3596 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3597 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3598 }
3599 for (i = words-1; i > 0; --i)
3600 {
3601 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3602 ins_tmps[i-1], ins_tmps[i-1], 1,
3603 OPTAB_WIDEN);
3604 }
3605 }
3606
3607 /* Split and merge the ends with the destination data. */
3608 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3609 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3610
3611 if (data_regs != NULL)
3612 {
3613 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3614 st_tmp_2, 1, OPTAB_WIDEN);
3615 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3616 st_tmp_1, 1, OPTAB_WIDEN);
3617 }
3618
3619 /* Store it all. */
3620 emit_move_insn (st_addr_2, st_tmp_2);
3621 for (i = words-1; i > 0; --i)
3622 {
3623 rtx tmp = change_address (dmem, DImode,
3624 gen_rtx_AND (DImode,
3625 plus_constant (DImode,
3626 dmema, i*8),
3627 im8));
3628 set_mem_alias_set (tmp, 0);
3629 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3630 }
3631 emit_move_insn (st_addr_1, st_tmp_1);
3632 }
3633
3634
3635 /* Expand string/block move operations.
3636
3637 operands[0] is the pointer to the destination.
3638 operands[1] is the pointer to the source.
3639 operands[2] is the number of bytes to move.
3640 operands[3] is the alignment. */
3641
3642 int
3643 alpha_expand_block_move (rtx operands[])
3644 {
3645 rtx bytes_rtx = operands[2];
3646 rtx align_rtx = operands[3];
3647 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3648 HOST_WIDE_INT bytes = orig_bytes;
3649 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3650 HOST_WIDE_INT dst_align = src_align;
3651 rtx orig_src = operands[1];
3652 rtx orig_dst = operands[0];
3653 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3654 rtx tmp;
3655 unsigned int i, words, ofs, nregs = 0;
3656
3657 if (orig_bytes <= 0)
3658 return 1;
3659 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3660 return 0;
3661
3662 /* Look for additional alignment information from recorded register info. */
3663
3664 tmp = XEXP (orig_src, 0);
3665 if (REG_P (tmp))
3666 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3667 else if (GET_CODE (tmp) == PLUS
3668 && REG_P (XEXP (tmp, 0))
3669 && CONST_INT_P (XEXP (tmp, 1)))
3670 {
3671 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3672 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3673
3674 if (a > src_align)
3675 {
3676 if (a >= 64 && c % 8 == 0)
3677 src_align = 64;
3678 else if (a >= 32 && c % 4 == 0)
3679 src_align = 32;
3680 else if (a >= 16 && c % 2 == 0)
3681 src_align = 16;
3682 }
3683 }
3684
3685 tmp = XEXP (orig_dst, 0);
3686 if (REG_P (tmp))
3687 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3688 else if (GET_CODE (tmp) == PLUS
3689 && REG_P (XEXP (tmp, 0))
3690 && CONST_INT_P (XEXP (tmp, 1)))
3691 {
3692 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3693 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3694
3695 if (a > dst_align)
3696 {
3697 if (a >= 64 && c % 8 == 0)
3698 dst_align = 64;
3699 else if (a >= 32 && c % 4 == 0)
3700 dst_align = 32;
3701 else if (a >= 16 && c % 2 == 0)
3702 dst_align = 16;
3703 }
3704 }
3705
3706 ofs = 0;
3707 if (src_align >= 64 && bytes >= 8)
3708 {
3709 words = bytes / 8;
3710
3711 for (i = 0; i < words; ++i)
3712 data_regs[nregs + i] = gen_reg_rtx (DImode);
3713
3714 for (i = 0; i < words; ++i)
3715 emit_move_insn (data_regs[nregs + i],
3716 adjust_address (orig_src, DImode, ofs + i * 8));
3717
3718 nregs += words;
3719 bytes -= words * 8;
3720 ofs += words * 8;
3721 }
3722
3723 if (src_align >= 32 && bytes >= 4)
3724 {
3725 words = bytes / 4;
3726
3727 for (i = 0; i < words; ++i)
3728 data_regs[nregs + i] = gen_reg_rtx (SImode);
3729
3730 for (i = 0; i < words; ++i)
3731 emit_move_insn (data_regs[nregs + i],
3732 adjust_address (orig_src, SImode, ofs + i * 4));
3733
3734 nregs += words;
3735 bytes -= words * 4;
3736 ofs += words * 4;
3737 }
3738
3739 if (bytes >= 8)
3740 {
3741 words = bytes / 8;
3742
3743 for (i = 0; i < words+1; ++i)
3744 data_regs[nregs + i] = gen_reg_rtx (DImode);
3745
3746 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3747 words, ofs);
3748
3749 nregs += words;
3750 bytes -= words * 8;
3751 ofs += words * 8;
3752 }
3753
3754 if (! TARGET_BWX && bytes >= 4)
3755 {
3756 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3757 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3758 bytes -= 4;
3759 ofs += 4;
3760 }
3761
3762 if (bytes >= 2)
3763 {
3764 if (src_align >= 16)
3765 {
3766 do {
3767 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3768 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3769 bytes -= 2;
3770 ofs += 2;
3771 } while (bytes >= 2);
3772 }
3773 else if (! TARGET_BWX)
3774 {
3775 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3776 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3777 bytes -= 2;
3778 ofs += 2;
3779 }
3780 }
3781
3782 while (bytes > 0)
3783 {
3784 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3785 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3786 bytes -= 1;
3787 ofs += 1;
3788 }
3789
3790 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3791
3792 /* Now save it back out again. */
3793
3794 i = 0, ofs = 0;
3795
3796 /* Write out the data in whatever chunks reading the source allowed. */
3797 if (dst_align >= 64)
3798 {
3799 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3800 {
3801 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3802 data_regs[i]);
3803 ofs += 8;
3804 i++;
3805 }
3806 }
3807
3808 if (dst_align >= 32)
3809 {
3810 /* If the source has remaining DImode regs, write them out in
3811 two pieces. */
3812 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3813 {
3814 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3815 NULL_RTX, 1, OPTAB_WIDEN);
3816
3817 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3818 gen_lowpart (SImode, data_regs[i]));
3819 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3820 gen_lowpart (SImode, tmp));
3821 ofs += 8;
3822 i++;
3823 }
3824
3825 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3826 {
3827 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3828 data_regs[i]);
3829 ofs += 4;
3830 i++;
3831 }
3832 }
3833
3834 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3835 {
3836 /* Write out a remaining block of words using unaligned methods. */
3837
3838 for (words = 1; i + words < nregs; words++)
3839 if (GET_MODE (data_regs[i + words]) != DImode)
3840 break;
3841
3842 if (words == 1)
3843 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3844 else
3845 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3846 words, ofs);
3847
3848 i += words;
3849 ofs += words * 8;
3850 }
3851
3852 /* Due to the above, this won't be aligned. */
3853 /* ??? If we have more than one of these, consider constructing full
3854 words in registers and using alpha_expand_unaligned_store_words. */
3855 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3856 {
3857 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3858 ofs += 4;
3859 i++;
3860 }
3861
3862 if (dst_align >= 16)
3863 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3864 {
3865 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3866 i++;
3867 ofs += 2;
3868 }
3869 else
3870 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3871 {
3872 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3873 i++;
3874 ofs += 2;
3875 }
3876
3877 /* The remainder must be byte copies. */
3878 while (i < nregs)
3879 {
3880 gcc_assert (GET_MODE (data_regs[i]) == QImode);
3881 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
3882 i++;
3883 ofs += 1;
3884 }
3885
3886 return 1;
3887 }
3888
3889 int
3890 alpha_expand_block_clear (rtx operands[])
3891 {
3892 rtx bytes_rtx = operands[1];
3893 rtx align_rtx = operands[3];
3894 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3895 HOST_WIDE_INT bytes = orig_bytes;
3896 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
3897 HOST_WIDE_INT alignofs = 0;
3898 rtx orig_dst = operands[0];
3899 rtx tmp;
3900 int i, words, ofs = 0;
3901
3902 if (orig_bytes <= 0)
3903 return 1;
3904 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3905 return 0;
3906
3907 /* Look for stricter alignment. */
3908 tmp = XEXP (orig_dst, 0);
3909 if (REG_P (tmp))
3910 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3911 else if (GET_CODE (tmp) == PLUS
3912 && REG_P (XEXP (tmp, 0))
3913 && CONST_INT_P (XEXP (tmp, 1)))
3914 {
3915 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3916 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3917
3918 if (a > align)
3919 {
3920 if (a >= 64)
3921 align = a, alignofs = 8 - c % 8;
3922 else if (a >= 32)
3923 align = a, alignofs = 4 - c % 4;
3924 else if (a >= 16)
3925 align = a, alignofs = 2 - c % 2;
3926 }
3927 }
3928
3929 /* Handle an unaligned prefix first. */
3930
3931 if (alignofs > 0)
3932 {
3933 #if HOST_BITS_PER_WIDE_INT >= 64
3934 /* Given that alignofs is bounded by align, the only time BWX could
3935 generate three stores is for a 7 byte fill. Prefer two individual
3936 stores over a load/mask/store sequence. */
3937 if ((!TARGET_BWX || alignofs == 7)
3938 && align >= 32
3939 && !(alignofs == 4 && bytes >= 4))
3940 {
3941 enum machine_mode mode = (align >= 64 ? DImode : SImode);
3942 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
3943 rtx mem, tmp;
3944 HOST_WIDE_INT mask;
3945
3946 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
3947 set_mem_alias_set (mem, 0);
3948
3949 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
3950 if (bytes < alignofs)
3951 {
3952 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
3953 ofs += bytes;
3954 bytes = 0;
3955 }
3956 else
3957 {
3958 bytes -= alignofs;
3959 ofs += alignofs;
3960 }
3961 alignofs = 0;
3962
3963 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
3964 NULL_RTX, 1, OPTAB_WIDEN);
3965
3966 emit_move_insn (mem, tmp);
3967 }
3968 #endif
3969
3970 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
3971 {
3972 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
3973 bytes -= 1;
3974 ofs += 1;
3975 alignofs -= 1;
3976 }
3977 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
3978 {
3979 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
3980 bytes -= 2;
3981 ofs += 2;
3982 alignofs -= 2;
3983 }
3984 if (alignofs == 4 && bytes >= 4)
3985 {
3986 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3987 bytes -= 4;
3988 ofs += 4;
3989 alignofs = 0;
3990 }
3991
3992 /* If we've not used the extra lead alignment information by now,
3993 we won't be able to. Downgrade align to match what's left over. */
3994 if (alignofs > 0)
3995 {
3996 alignofs = alignofs & -alignofs;
3997 align = MIN (align, alignofs * BITS_PER_UNIT);
3998 }
3999 }
4000
4001 /* Handle a block of contiguous long-words. */
4002
4003 if (align >= 64 && bytes >= 8)
4004 {
4005 words = bytes / 8;
4006
4007 for (i = 0; i < words; ++i)
4008 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4009 const0_rtx);
4010
4011 bytes -= words * 8;
4012 ofs += words * 8;
4013 }
4014
4015 /* If the block is large and appropriately aligned, emit a single
4016 store followed by a sequence of stq_u insns. */
4017
4018 if (align >= 32 && bytes > 16)
4019 {
4020 rtx orig_dsta;
4021
4022 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4023 bytes -= 4;
4024 ofs += 4;
4025
4026 orig_dsta = XEXP (orig_dst, 0);
4027 if (GET_CODE (orig_dsta) == LO_SUM)
4028 orig_dsta = force_reg (Pmode, orig_dsta);
4029
4030 words = bytes / 8;
4031 for (i = 0; i < words; ++i)
4032 {
4033 rtx mem
4034 = change_address (orig_dst, DImode,
4035 gen_rtx_AND (DImode,
4036 plus_constant (DImode, orig_dsta,
4037 ofs + i*8),
4038 GEN_INT (-8)));
4039 set_mem_alias_set (mem, 0);
4040 emit_move_insn (mem, const0_rtx);
4041 }
4042
4043 /* Depending on the alignment, the first stq_u may have overlapped
4044 with the initial stl, which means that the last stq_u didn't
4045 write as much as it would appear. Leave those questionable bytes
4046 unaccounted for. */
4047 bytes -= words * 8 - 4;
4048 ofs += words * 8 - 4;
4049 }
4050
4051 /* Handle a smaller block of aligned words. */
4052
4053 if ((align >= 64 && bytes == 4)
4054 || (align == 32 && bytes >= 4))
4055 {
4056 words = bytes / 4;
4057
4058 for (i = 0; i < words; ++i)
4059 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4060 const0_rtx);
4061
4062 bytes -= words * 4;
4063 ofs += words * 4;
4064 }
4065
4066 /* An unaligned block uses stq_u stores for as many as possible. */
4067
4068 if (bytes >= 8)
4069 {
4070 words = bytes / 8;
4071
4072 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4073
4074 bytes -= words * 8;
4075 ofs += words * 8;
4076 }
4077
4078 /* Next clean up any trailing pieces. */
4079
4080 #if HOST_BITS_PER_WIDE_INT >= 64
4081 /* Count the number of bits in BYTES for which aligned stores could
4082 be emitted. */
4083 words = 0;
4084 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4085 if (bytes & i)
4086 words += 1;
4087
4088 /* If we have appropriate alignment (and it wouldn't take too many
4089 instructions otherwise), mask out the bytes we need. */
4090 if (TARGET_BWX ? words > 2 : bytes > 0)
4091 {
4092 if (align >= 64)
4093 {
4094 rtx mem, tmp;
4095 HOST_WIDE_INT mask;
4096
4097 mem = adjust_address (orig_dst, DImode, ofs);
4098 set_mem_alias_set (mem, 0);
4099
4100 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4101
4102 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4103 NULL_RTX, 1, OPTAB_WIDEN);
4104
4105 emit_move_insn (mem, tmp);
4106 return 1;
4107 }
4108 else if (align >= 32 && bytes < 4)
4109 {
4110 rtx mem, tmp;
4111 HOST_WIDE_INT mask;
4112
4113 mem = adjust_address (orig_dst, SImode, ofs);
4114 set_mem_alias_set (mem, 0);
4115
4116 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4117
4118 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4119 NULL_RTX, 1, OPTAB_WIDEN);
4120
4121 emit_move_insn (mem, tmp);
4122 return 1;
4123 }
4124 }
4125 #endif
4126
4127 if (!TARGET_BWX && bytes >= 4)
4128 {
4129 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4130 bytes -= 4;
4131 ofs += 4;
4132 }
4133
4134 if (bytes >= 2)
4135 {
4136 if (align >= 16)
4137 {
4138 do {
4139 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4140 const0_rtx);
4141 bytes -= 2;
4142 ofs += 2;
4143 } while (bytes >= 2);
4144 }
4145 else if (! TARGET_BWX)
4146 {
4147 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4148 bytes -= 2;
4149 ofs += 2;
4150 }
4151 }
4152
4153 while (bytes > 0)
4154 {
4155 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4156 bytes -= 1;
4157 ofs += 1;
4158 }
4159
4160 return 1;
4161 }
4162
4163 /* Returns a mask so that zap(x, value) == x & mask. */
4164
4165 rtx
4166 alpha_expand_zap_mask (HOST_WIDE_INT value)
4167 {
4168 rtx result;
4169 int i;
4170
4171 if (HOST_BITS_PER_WIDE_INT >= 64)
4172 {
4173 HOST_WIDE_INT mask = 0;
4174
4175 for (i = 7; i >= 0; --i)
4176 {
4177 mask <<= 8;
4178 if (!((value >> i) & 1))
4179 mask |= 0xff;
4180 }
4181
4182 result = gen_int_mode (mask, DImode);
4183 }
4184 else
4185 {
4186 HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4187
4188 gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4189
4190 for (i = 7; i >= 4; --i)
4191 {
4192 mask_hi <<= 8;
4193 if (!((value >> i) & 1))
4194 mask_hi |= 0xff;
4195 }
4196
4197 for (i = 3; i >= 0; --i)
4198 {
4199 mask_lo <<= 8;
4200 if (!((value >> i) & 1))
4201 mask_lo |= 0xff;
4202 }
4203
4204 result = immed_double_const (mask_lo, mask_hi, DImode);
4205 }
4206
4207 return result;
4208 }
4209
4210 void
4211 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4212 enum machine_mode mode,
4213 rtx op0, rtx op1, rtx op2)
4214 {
4215 op0 = gen_lowpart (mode, op0);
4216
4217 if (op1 == const0_rtx)
4218 op1 = CONST0_RTX (mode);
4219 else
4220 op1 = gen_lowpart (mode, op1);
4221
4222 if (op2 == const0_rtx)
4223 op2 = CONST0_RTX (mode);
4224 else
4225 op2 = gen_lowpart (mode, op2);
4226
4227 emit_insn ((*gen) (op0, op1, op2));
4228 }
4229
4230 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4231 COND is true. Mark the jump as unlikely to be taken. */
4232
4233 static void
4234 emit_unlikely_jump (rtx cond, rtx label)
4235 {
4236 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
4237 rtx x;
4238
4239 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4240 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4241 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
4242 }
4243
4244 /* A subroutine of the atomic operation splitters. Emit a load-locked
4245 instruction in MODE. */
4246
4247 static void
4248 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
4249 {
4250 rtx (*fn) (rtx, rtx) = NULL;
4251 if (mode == SImode)
4252 fn = gen_load_locked_si;
4253 else if (mode == DImode)
4254 fn = gen_load_locked_di;
4255 emit_insn (fn (reg, mem));
4256 }
4257
4258 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4259 instruction in MODE. */
4260
4261 static void
4262 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
4263 {
4264 rtx (*fn) (rtx, rtx, rtx) = NULL;
4265 if (mode == SImode)
4266 fn = gen_store_conditional_si;
4267 else if (mode == DImode)
4268 fn = gen_store_conditional_di;
4269 emit_insn (fn (res, mem, val));
4270 }
4271
4272 /* Subroutines of the atomic operation splitters. Emit barriers
4273 as needed for the memory MODEL. */
4274
4275 static void
4276 alpha_pre_atomic_barrier (enum memmodel model)
4277 {
4278 if (need_atomic_barrier_p (model, true))
4279 emit_insn (gen_memory_barrier ());
4280 }
4281
4282 static void
4283 alpha_post_atomic_barrier (enum memmodel model)
4284 {
4285 if (need_atomic_barrier_p (model, false))
4286 emit_insn (gen_memory_barrier ());
4287 }
4288
4289 /* A subroutine of the atomic operation splitters. Emit an insxl
4290 instruction in MODE. */
4291
4292 static rtx
4293 emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
4294 {
4295 rtx ret = gen_reg_rtx (DImode);
4296 rtx (*fn) (rtx, rtx, rtx);
4297
4298 switch (mode)
4299 {
4300 case QImode:
4301 fn = gen_insbl;
4302 break;
4303 case HImode:
4304 fn = gen_inswl;
4305 break;
4306 case SImode:
4307 fn = gen_insll;
4308 break;
4309 case DImode:
4310 fn = gen_insql;
4311 break;
4312 default:
4313 gcc_unreachable ();
4314 }
4315
4316 op1 = force_reg (mode, op1);
4317 emit_insn (fn (ret, op1, op2));
4318
4319 return ret;
4320 }
4321
4322 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4323 to perform. MEM is the memory on which to operate. VAL is the second
4324 operand of the binary operator. BEFORE and AFTER are optional locations to
4325 return the value of MEM either before of after the operation. SCRATCH is
4326 a scratch register. */
4327
4328 void
4329 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4330 rtx after, rtx scratch, enum memmodel model)
4331 {
4332 enum machine_mode mode = GET_MODE (mem);
4333 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4334
4335 alpha_pre_atomic_barrier (model);
4336
4337 label = gen_label_rtx ();
4338 emit_label (label);
4339 label = gen_rtx_LABEL_REF (DImode, label);
4340
4341 if (before == NULL)
4342 before = scratch;
4343 emit_load_locked (mode, before, mem);
4344
4345 if (code == NOT)
4346 {
4347 x = gen_rtx_AND (mode, before, val);
4348 emit_insn (gen_rtx_SET (VOIDmode, val, x));
4349
4350 x = gen_rtx_NOT (mode, val);
4351 }
4352 else
4353 x = gen_rtx_fmt_ee (code, mode, before, val);
4354 if (after)
4355 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4356 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4357
4358 emit_store_conditional (mode, cond, mem, scratch);
4359
4360 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4361 emit_unlikely_jump (x, label);
4362
4363 alpha_post_atomic_barrier (model);
4364 }
4365
4366 /* Expand a compare and swap operation. */
4367
4368 void
4369 alpha_split_compare_and_swap (rtx operands[])
4370 {
4371 rtx cond, retval, mem, oldval, newval;
4372 bool is_weak;
4373 enum memmodel mod_s, mod_f;
4374 enum machine_mode mode;
4375 rtx label1, label2, x;
4376
4377 cond = operands[0];
4378 retval = operands[1];
4379 mem = operands[2];
4380 oldval = operands[3];
4381 newval = operands[4];
4382 is_weak = (operands[5] != const0_rtx);
4383 mod_s = (enum memmodel) INTVAL (operands[6]);
4384 mod_f = (enum memmodel) INTVAL (operands[7]);
4385 mode = GET_MODE (mem);
4386
4387 alpha_pre_atomic_barrier (mod_s);
4388
4389 label1 = NULL_RTX;
4390 if (!is_weak)
4391 {
4392 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4393 emit_label (XEXP (label1, 0));
4394 }
4395 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4396
4397 emit_load_locked (mode, retval, mem);
4398
4399 x = gen_lowpart (DImode, retval);
4400 if (oldval == const0_rtx)
4401 {
4402 emit_move_insn (cond, const0_rtx);
4403 x = gen_rtx_NE (DImode, x, const0_rtx);
4404 }
4405 else
4406 {
4407 x = gen_rtx_EQ (DImode, x, oldval);
4408 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4409 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4410 }
4411 emit_unlikely_jump (x, label2);
4412
4413 emit_move_insn (cond, newval);
4414 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4415
4416 if (!is_weak)
4417 {
4418 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4419 emit_unlikely_jump (x, label1);
4420 }
4421
4422 if (mod_f != MEMMODEL_RELAXED)
4423 emit_label (XEXP (label2, 0));
4424
4425 alpha_post_atomic_barrier (mod_s);
4426
4427 if (mod_f == MEMMODEL_RELAXED)
4428 emit_label (XEXP (label2, 0));
4429 }
4430
4431 void
4432 alpha_expand_compare_and_swap_12 (rtx operands[])
4433 {
4434 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4435 enum machine_mode mode;
4436 rtx addr, align, wdst;
4437 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4438
4439 cond = operands[0];
4440 dst = operands[1];
4441 mem = operands[2];
4442 oldval = operands[3];
4443 newval = operands[4];
4444 is_weak = operands[5];
4445 mod_s = operands[6];
4446 mod_f = operands[7];
4447 mode = GET_MODE (mem);
4448
4449 /* We forced the address into a register via mem_noofs_operand. */
4450 addr = XEXP (mem, 0);
4451 gcc_assert (register_operand (addr, DImode));
4452
4453 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4454 NULL_RTX, 1, OPTAB_DIRECT);
4455
4456 oldval = convert_modes (DImode, mode, oldval, 1);
4457
4458 if (newval != const0_rtx)
4459 newval = emit_insxl (mode, newval, addr);
4460
4461 wdst = gen_reg_rtx (DImode);
4462 if (mode == QImode)
4463 gen = gen_atomic_compare_and_swapqi_1;
4464 else
4465 gen = gen_atomic_compare_and_swaphi_1;
4466 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4467 is_weak, mod_s, mod_f));
4468
4469 emit_move_insn (dst, gen_lowpart (mode, wdst));
4470 }
4471
4472 void
4473 alpha_split_compare_and_swap_12 (rtx operands[])
4474 {
4475 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4476 enum machine_mode mode;
4477 bool is_weak;
4478 enum memmodel mod_s, mod_f;
4479 rtx label1, label2, mem, addr, width, mask, x;
4480
4481 cond = operands[0];
4482 dest = operands[1];
4483 orig_mem = operands[2];
4484 oldval = operands[3];
4485 newval = operands[4];
4486 align = operands[5];
4487 is_weak = (operands[6] != const0_rtx);
4488 mod_s = (enum memmodel) INTVAL (operands[7]);
4489 mod_f = (enum memmodel) INTVAL (operands[8]);
4490 scratch = operands[9];
4491 mode = GET_MODE (orig_mem);
4492 addr = XEXP (orig_mem, 0);
4493
4494 mem = gen_rtx_MEM (DImode, align);
4495 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4496 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4497 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4498
4499 alpha_pre_atomic_barrier (mod_s);
4500
4501 label1 = NULL_RTX;
4502 if (!is_weak)
4503 {
4504 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4505 emit_label (XEXP (label1, 0));
4506 }
4507 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4508
4509 emit_load_locked (DImode, scratch, mem);
4510
4511 width = GEN_INT (GET_MODE_BITSIZE (mode));
4512 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4513 emit_insn (gen_extxl (dest, scratch, width, addr));
4514
4515 if (oldval == const0_rtx)
4516 {
4517 emit_move_insn (cond, const0_rtx);
4518 x = gen_rtx_NE (DImode, dest, const0_rtx);
4519 }
4520 else
4521 {
4522 x = gen_rtx_EQ (DImode, dest, oldval);
4523 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4524 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4525 }
4526 emit_unlikely_jump (x, label2);
4527
4528 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4529
4530 if (newval != const0_rtx)
4531 emit_insn (gen_iordi3 (cond, cond, newval));
4532
4533 emit_store_conditional (DImode, cond, mem, cond);
4534
4535 if (!is_weak)
4536 {
4537 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4538 emit_unlikely_jump (x, label1);
4539 }
4540
4541 if (mod_f != MEMMODEL_RELAXED)
4542 emit_label (XEXP (label2, 0));
4543
4544 alpha_post_atomic_barrier (mod_s);
4545
4546 if (mod_f == MEMMODEL_RELAXED)
4547 emit_label (XEXP (label2, 0));
4548 }
4549
4550 /* Expand an atomic exchange operation. */
4551
4552 void
4553 alpha_split_atomic_exchange (rtx operands[])
4554 {
4555 rtx retval, mem, val, scratch;
4556 enum memmodel model;
4557 enum machine_mode mode;
4558 rtx label, x, cond;
4559
4560 retval = operands[0];
4561 mem = operands[1];
4562 val = operands[2];
4563 model = (enum memmodel) INTVAL (operands[3]);
4564 scratch = operands[4];
4565 mode = GET_MODE (mem);
4566 cond = gen_lowpart (DImode, scratch);
4567
4568 alpha_pre_atomic_barrier (model);
4569
4570 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4571 emit_label (XEXP (label, 0));
4572
4573 emit_load_locked (mode, retval, mem);
4574 emit_move_insn (scratch, val);
4575 emit_store_conditional (mode, cond, mem, scratch);
4576
4577 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4578 emit_unlikely_jump (x, label);
4579
4580 alpha_post_atomic_barrier (model);
4581 }
4582
4583 void
4584 alpha_expand_atomic_exchange_12 (rtx operands[])
4585 {
4586 rtx dst, mem, val, model;
4587 enum machine_mode mode;
4588 rtx addr, align, wdst;
4589 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4590
4591 dst = operands[0];
4592 mem = operands[1];
4593 val = operands[2];
4594 model = operands[3];
4595 mode = GET_MODE (mem);
4596
4597 /* We forced the address into a register via mem_noofs_operand. */
4598 addr = XEXP (mem, 0);
4599 gcc_assert (register_operand (addr, DImode));
4600
4601 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4602 NULL_RTX, 1, OPTAB_DIRECT);
4603
4604 /* Insert val into the correct byte location within the word. */
4605 if (val != const0_rtx)
4606 val = emit_insxl (mode, val, addr);
4607
4608 wdst = gen_reg_rtx (DImode);
4609 if (mode == QImode)
4610 gen = gen_atomic_exchangeqi_1;
4611 else
4612 gen = gen_atomic_exchangehi_1;
4613 emit_insn (gen (wdst, mem, val, align, model));
4614
4615 emit_move_insn (dst, gen_lowpart (mode, wdst));
4616 }
4617
4618 void
4619 alpha_split_atomic_exchange_12 (rtx operands[])
4620 {
4621 rtx dest, orig_mem, addr, val, align, scratch;
4622 rtx label, mem, width, mask, x;
4623 enum machine_mode mode;
4624 enum memmodel model;
4625
4626 dest = operands[0];
4627 orig_mem = operands[1];
4628 val = operands[2];
4629 align = operands[3];
4630 model = (enum memmodel) INTVAL (operands[4]);
4631 scratch = operands[5];
4632 mode = GET_MODE (orig_mem);
4633 addr = XEXP (orig_mem, 0);
4634
4635 mem = gen_rtx_MEM (DImode, align);
4636 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4637 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4638 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4639
4640 alpha_pre_atomic_barrier (model);
4641
4642 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4643 emit_label (XEXP (label, 0));
4644
4645 emit_load_locked (DImode, scratch, mem);
4646
4647 width = GEN_INT (GET_MODE_BITSIZE (mode));
4648 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4649 emit_insn (gen_extxl (dest, scratch, width, addr));
4650 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4651 if (val != const0_rtx)
4652 emit_insn (gen_iordi3 (scratch, scratch, val));
4653
4654 emit_store_conditional (DImode, scratch, mem, scratch);
4655
4656 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4657 emit_unlikely_jump (x, label);
4658
4659 alpha_post_atomic_barrier (model);
4660 }
4661 \f
4662 /* Adjust the cost of a scheduling dependency. Return the new cost of
4663 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4664
4665 static int
4666 alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4667 {
4668 enum attr_type dep_insn_type;
4669
4670 /* If the dependence is an anti-dependence, there is no cost. For an
4671 output dependence, there is sometimes a cost, but it doesn't seem
4672 worth handling those few cases. */
4673 if (REG_NOTE_KIND (link) != 0)
4674 return cost;
4675
4676 /* If we can't recognize the insns, we can't really do anything. */
4677 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4678 return cost;
4679
4680 dep_insn_type = get_attr_type (dep_insn);
4681
4682 /* Bring in the user-defined memory latency. */
4683 if (dep_insn_type == TYPE_ILD
4684 || dep_insn_type == TYPE_FLD
4685 || dep_insn_type == TYPE_LDSYM)
4686 cost += alpha_memory_latency-1;
4687
4688 /* Everything else handled in DFA bypasses now. */
4689
4690 return cost;
4691 }
4692
4693 /* The number of instructions that can be issued per cycle. */
4694
4695 static int
4696 alpha_issue_rate (void)
4697 {
4698 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4699 }
4700
4701 /* How many alternative schedules to try. This should be as wide as the
4702 scheduling freedom in the DFA, but no wider. Making this value too
4703 large results extra work for the scheduler.
4704
4705 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4706 alternative schedules. For EV5, we can choose between E0/E1 and
4707 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4708
4709 static int
4710 alpha_multipass_dfa_lookahead (void)
4711 {
4712 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4713 }
4714 \f
4715 /* Machine-specific function data. */
4716
4717 struct GTY(()) alpha_links;
4718
4719 struct GTY(()) machine_function
4720 {
4721 /* For OSF. */
4722 const char *some_ld_name;
4723
4724 /* For flag_reorder_blocks_and_partition. */
4725 rtx gp_save_rtx;
4726
4727 /* For VMS condition handlers. */
4728 bool uses_condition_handler;
4729
4730 /* Linkage entries. */
4731 splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
4732 links;
4733 };
4734
4735 /* How to allocate a 'struct machine_function'. */
4736
4737 static struct machine_function *
4738 alpha_init_machine_status (void)
4739 {
4740 return ggc_alloc_cleared_machine_function ();
4741 }
4742
4743 /* Support for frame based VMS condition handlers. */
4744
4745 /* A VMS condition handler may be established for a function with a call to
4746 __builtin_establish_vms_condition_handler, and cancelled with a call to
4747 __builtin_revert_vms_condition_handler.
4748
4749 The VMS Condition Handling Facility knows about the existence of a handler
4750 from the procedure descriptor .handler field. As the VMS native compilers,
4751 we store the user specified handler's address at a fixed location in the
4752 stack frame and point the procedure descriptor at a common wrapper which
4753 fetches the real handler's address and issues an indirect call.
4754
4755 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4756
4757 We force the procedure kind to PT_STACK, and the fixed frame location is
4758 fp+8, just before the register save area. We use the handler_data field in
4759 the procedure descriptor to state the fp offset at which the installed
4760 handler address can be found. */
4761
4762 #define VMS_COND_HANDLER_FP_OFFSET 8
4763
4764 /* Expand code to store the currently installed user VMS condition handler
4765 into TARGET and install HANDLER as the new condition handler. */
4766
4767 void
4768 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4769 {
4770 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4771 VMS_COND_HANDLER_FP_OFFSET);
4772
4773 rtx handler_slot
4774 = gen_rtx_MEM (DImode, handler_slot_address);
4775
4776 emit_move_insn (target, handler_slot);
4777 emit_move_insn (handler_slot, handler);
4778
4779 /* Notify the start/prologue/epilogue emitters that the condition handler
4780 slot is needed. In addition to reserving the slot space, this will force
4781 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4782 use above is correct. */
4783 cfun->machine->uses_condition_handler = true;
4784 }
4785
4786 /* Expand code to store the current VMS condition handler into TARGET and
4787 nullify it. */
4788
4789 void
4790 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4791 {
4792 /* We implement this by establishing a null condition handler, with the tiny
4793 side effect of setting uses_condition_handler. This is a little bit
4794 pessimistic if no actual builtin_establish call is ever issued, which is
4795 not a real problem and expected never to happen anyway. */
4796
4797 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4798 }
4799
4800 /* Functions to save and restore alpha_return_addr_rtx. */
4801
4802 /* Start the ball rolling with RETURN_ADDR_RTX. */
4803
4804 rtx
4805 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4806 {
4807 if (count != 0)
4808 return const0_rtx;
4809
4810 return get_hard_reg_initial_val (Pmode, REG_RA);
4811 }
4812
4813 /* Return or create a memory slot containing the gp value for the current
4814 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4815
4816 rtx
4817 alpha_gp_save_rtx (void)
4818 {
4819 rtx seq, m = cfun->machine->gp_save_rtx;
4820
4821 if (m == NULL)
4822 {
4823 start_sequence ();
4824
4825 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4826 m = validize_mem (m);
4827 emit_move_insn (m, pic_offset_table_rtx);
4828
4829 seq = get_insns ();
4830 end_sequence ();
4831
4832 /* We used to simply emit the sequence after entry_of_function.
4833 However this breaks the CFG if the first instruction in the
4834 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4835 label. Emit the sequence properly on the edge. We are only
4836 invoked from dw2_build_landing_pads and finish_eh_generation
4837 will call commit_edge_insertions thanks to a kludge. */
4838 insert_insn_on_edge (seq,
4839 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4840
4841 cfun->machine->gp_save_rtx = m;
4842 }
4843
4844 return m;
4845 }
4846
4847 static void
4848 alpha_instantiate_decls (void)
4849 {
4850 if (cfun->machine->gp_save_rtx != NULL_RTX)
4851 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4852 }
4853
4854 static int
4855 alpha_ra_ever_killed (void)
4856 {
4857 rtx top;
4858
4859 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4860 return (int)df_regs_ever_live_p (REG_RA);
4861
4862 push_topmost_sequence ();
4863 top = get_insns ();
4864 pop_topmost_sequence ();
4865
4866 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
4867 }
4868
4869 \f
4870 /* Return the trap mode suffix applicable to the current
4871 instruction, or NULL. */
4872
4873 static const char *
4874 get_trap_mode_suffix (void)
4875 {
4876 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4877
4878 switch (s)
4879 {
4880 case TRAP_SUFFIX_NONE:
4881 return NULL;
4882
4883 case TRAP_SUFFIX_SU:
4884 if (alpha_fptm >= ALPHA_FPTM_SU)
4885 return "su";
4886 return NULL;
4887
4888 case TRAP_SUFFIX_SUI:
4889 if (alpha_fptm >= ALPHA_FPTM_SUI)
4890 return "sui";
4891 return NULL;
4892
4893 case TRAP_SUFFIX_V_SV:
4894 switch (alpha_fptm)
4895 {
4896 case ALPHA_FPTM_N:
4897 return NULL;
4898 case ALPHA_FPTM_U:
4899 return "v";
4900 case ALPHA_FPTM_SU:
4901 case ALPHA_FPTM_SUI:
4902 return "sv";
4903 default:
4904 gcc_unreachable ();
4905 }
4906
4907 case TRAP_SUFFIX_V_SV_SVI:
4908 switch (alpha_fptm)
4909 {
4910 case ALPHA_FPTM_N:
4911 return NULL;
4912 case ALPHA_FPTM_U:
4913 return "v";
4914 case ALPHA_FPTM_SU:
4915 return "sv";
4916 case ALPHA_FPTM_SUI:
4917 return "svi";
4918 default:
4919 gcc_unreachable ();
4920 }
4921 break;
4922
4923 case TRAP_SUFFIX_U_SU_SUI:
4924 switch (alpha_fptm)
4925 {
4926 case ALPHA_FPTM_N:
4927 return NULL;
4928 case ALPHA_FPTM_U:
4929 return "u";
4930 case ALPHA_FPTM_SU:
4931 return "su";
4932 case ALPHA_FPTM_SUI:
4933 return "sui";
4934 default:
4935 gcc_unreachable ();
4936 }
4937 break;
4938
4939 default:
4940 gcc_unreachable ();
4941 }
4942 gcc_unreachable ();
4943 }
4944
4945 /* Return the rounding mode suffix applicable to the current
4946 instruction, or NULL. */
4947
4948 static const char *
4949 get_round_mode_suffix (void)
4950 {
4951 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
4952
4953 switch (s)
4954 {
4955 case ROUND_SUFFIX_NONE:
4956 return NULL;
4957 case ROUND_SUFFIX_NORMAL:
4958 switch (alpha_fprm)
4959 {
4960 case ALPHA_FPRM_NORM:
4961 return NULL;
4962 case ALPHA_FPRM_MINF:
4963 return "m";
4964 case ALPHA_FPRM_CHOP:
4965 return "c";
4966 case ALPHA_FPRM_DYN:
4967 return "d";
4968 default:
4969 gcc_unreachable ();
4970 }
4971 break;
4972
4973 case ROUND_SUFFIX_C:
4974 return "c";
4975
4976 default:
4977 gcc_unreachable ();
4978 }
4979 gcc_unreachable ();
4980 }
4981
4982 /* Locate some local-dynamic symbol still in use by this function
4983 so that we can print its name in some movdi_er_tlsldm pattern. */
4984
4985 static int
4986 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
4987 {
4988 rtx x = *px;
4989
4990 if (GET_CODE (x) == SYMBOL_REF
4991 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
4992 {
4993 cfun->machine->some_ld_name = XSTR (x, 0);
4994 return 1;
4995 }
4996
4997 return 0;
4998 }
4999
5000 static const char *
5001 get_some_local_dynamic_name (void)
5002 {
5003 rtx insn;
5004
5005 if (cfun->machine->some_ld_name)
5006 return cfun->machine->some_ld_name;
5007
5008 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5009 if (INSN_P (insn)
5010 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5011 return cfun->machine->some_ld_name;
5012
5013 gcc_unreachable ();
5014 }
5015
5016 /* Print an operand. Recognize special options, documented below. */
5017
5018 void
5019 print_operand (FILE *file, rtx x, int code)
5020 {
5021 int i;
5022
5023 switch (code)
5024 {
5025 case '~':
5026 /* Print the assembler name of the current function. */
5027 assemble_name (file, alpha_fnname);
5028 break;
5029
5030 case '&':
5031 assemble_name (file, get_some_local_dynamic_name ());
5032 break;
5033
5034 case '/':
5035 {
5036 const char *trap = get_trap_mode_suffix ();
5037 const char *round = get_round_mode_suffix ();
5038
5039 if (trap || round)
5040 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5041 break;
5042 }
5043
5044 case ',':
5045 /* Generates single precision instruction suffix. */
5046 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5047 break;
5048
5049 case '-':
5050 /* Generates double precision instruction suffix. */
5051 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5052 break;
5053
5054 case '#':
5055 if (alpha_this_literal_sequence_number == 0)
5056 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5057 fprintf (file, "%d", alpha_this_literal_sequence_number);
5058 break;
5059
5060 case '*':
5061 if (alpha_this_gpdisp_sequence_number == 0)
5062 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5063 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5064 break;
5065
5066 case 'H':
5067 if (GET_CODE (x) == HIGH)
5068 output_addr_const (file, XEXP (x, 0));
5069 else
5070 output_operand_lossage ("invalid %%H value");
5071 break;
5072
5073 case 'J':
5074 {
5075 const char *lituse;
5076
5077 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5078 {
5079 x = XVECEXP (x, 0, 0);
5080 lituse = "lituse_tlsgd";
5081 }
5082 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5083 {
5084 x = XVECEXP (x, 0, 0);
5085 lituse = "lituse_tlsldm";
5086 }
5087 else if (CONST_INT_P (x))
5088 lituse = "lituse_jsr";
5089 else
5090 {
5091 output_operand_lossage ("invalid %%J value");
5092 break;
5093 }
5094
5095 if (x != const0_rtx)
5096 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5097 }
5098 break;
5099
5100 case 'j':
5101 {
5102 const char *lituse;
5103
5104 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5105 lituse = "lituse_jsrdirect";
5106 #else
5107 lituse = "lituse_jsr";
5108 #endif
5109
5110 gcc_assert (INTVAL (x) != 0);
5111 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5112 }
5113 break;
5114 case 'r':
5115 /* If this operand is the constant zero, write it as "$31". */
5116 if (REG_P (x))
5117 fprintf (file, "%s", reg_names[REGNO (x)]);
5118 else if (x == CONST0_RTX (GET_MODE (x)))
5119 fprintf (file, "$31");
5120 else
5121 output_operand_lossage ("invalid %%r value");
5122 break;
5123
5124 case 'R':
5125 /* Similar, but for floating-point. */
5126 if (REG_P (x))
5127 fprintf (file, "%s", reg_names[REGNO (x)]);
5128 else if (x == CONST0_RTX (GET_MODE (x)))
5129 fprintf (file, "$f31");
5130 else
5131 output_operand_lossage ("invalid %%R value");
5132 break;
5133
5134 case 'N':
5135 /* Write the 1's complement of a constant. */
5136 if (!CONST_INT_P (x))
5137 output_operand_lossage ("invalid %%N value");
5138
5139 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5140 break;
5141
5142 case 'P':
5143 /* Write 1 << C, for a constant C. */
5144 if (!CONST_INT_P (x))
5145 output_operand_lossage ("invalid %%P value");
5146
5147 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5148 break;
5149
5150 case 'h':
5151 /* Write the high-order 16 bits of a constant, sign-extended. */
5152 if (!CONST_INT_P (x))
5153 output_operand_lossage ("invalid %%h value");
5154
5155 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5156 break;
5157
5158 case 'L':
5159 /* Write the low-order 16 bits of a constant, sign-extended. */
5160 if (!CONST_INT_P (x))
5161 output_operand_lossage ("invalid %%L value");
5162
5163 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5164 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5165 break;
5166
5167 case 'm':
5168 /* Write mask for ZAP insn. */
5169 if (GET_CODE (x) == CONST_DOUBLE)
5170 {
5171 HOST_WIDE_INT mask = 0;
5172 HOST_WIDE_INT value;
5173
5174 value = CONST_DOUBLE_LOW (x);
5175 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5176 i++, value >>= 8)
5177 if (value & 0xff)
5178 mask |= (1 << i);
5179
5180 value = CONST_DOUBLE_HIGH (x);
5181 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5182 i++, value >>= 8)
5183 if (value & 0xff)
5184 mask |= (1 << (i + sizeof (int)));
5185
5186 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5187 }
5188
5189 else if (CONST_INT_P (x))
5190 {
5191 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5192
5193 for (i = 0; i < 8; i++, value >>= 8)
5194 if (value & 0xff)
5195 mask |= (1 << i);
5196
5197 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5198 }
5199 else
5200 output_operand_lossage ("invalid %%m value");
5201 break;
5202
5203 case 'M':
5204 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5205 if (!CONST_INT_P (x)
5206 || (INTVAL (x) != 8 && INTVAL (x) != 16
5207 && INTVAL (x) != 32 && INTVAL (x) != 64))
5208 output_operand_lossage ("invalid %%M value");
5209
5210 fprintf (file, "%s",
5211 (INTVAL (x) == 8 ? "b"
5212 : INTVAL (x) == 16 ? "w"
5213 : INTVAL (x) == 32 ? "l"
5214 : "q"));
5215 break;
5216
5217 case 'U':
5218 /* Similar, except do it from the mask. */
5219 if (CONST_INT_P (x))
5220 {
5221 HOST_WIDE_INT value = INTVAL (x);
5222
5223 if (value == 0xff)
5224 {
5225 fputc ('b', file);
5226 break;
5227 }
5228 if (value == 0xffff)
5229 {
5230 fputc ('w', file);
5231 break;
5232 }
5233 if (value == 0xffffffff)
5234 {
5235 fputc ('l', file);
5236 break;
5237 }
5238 if (value == -1)
5239 {
5240 fputc ('q', file);
5241 break;
5242 }
5243 }
5244 else if (HOST_BITS_PER_WIDE_INT == 32
5245 && GET_CODE (x) == CONST_DOUBLE
5246 && CONST_DOUBLE_LOW (x) == 0xffffffff
5247 && CONST_DOUBLE_HIGH (x) == 0)
5248 {
5249 fputc ('l', file);
5250 break;
5251 }
5252 output_operand_lossage ("invalid %%U value");
5253 break;
5254
5255 case 's':
5256 /* Write the constant value divided by 8. */
5257 if (!CONST_INT_P (x)
5258 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5259 || (INTVAL (x) & 7) != 0)
5260 output_operand_lossage ("invalid %%s value");
5261
5262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5263 break;
5264
5265 case 'S':
5266 /* Same, except compute (64 - c) / 8 */
5267
5268 if (!CONST_INT_P (x)
5269 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5270 && (INTVAL (x) & 7) != 8)
5271 output_operand_lossage ("invalid %%s value");
5272
5273 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5274 break;
5275
5276 case 'C': case 'D': case 'c': case 'd':
5277 /* Write out comparison name. */
5278 {
5279 enum rtx_code c = GET_CODE (x);
5280
5281 if (!COMPARISON_P (x))
5282 output_operand_lossage ("invalid %%C value");
5283
5284 else if (code == 'D')
5285 c = reverse_condition (c);
5286 else if (code == 'c')
5287 c = swap_condition (c);
5288 else if (code == 'd')
5289 c = swap_condition (reverse_condition (c));
5290
5291 if (c == LEU)
5292 fprintf (file, "ule");
5293 else if (c == LTU)
5294 fprintf (file, "ult");
5295 else if (c == UNORDERED)
5296 fprintf (file, "un");
5297 else
5298 fprintf (file, "%s", GET_RTX_NAME (c));
5299 }
5300 break;
5301
5302 case 'E':
5303 /* Write the divide or modulus operator. */
5304 switch (GET_CODE (x))
5305 {
5306 case DIV:
5307 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5308 break;
5309 case UDIV:
5310 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5311 break;
5312 case MOD:
5313 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5314 break;
5315 case UMOD:
5316 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5317 break;
5318 default:
5319 output_operand_lossage ("invalid %%E value");
5320 break;
5321 }
5322 break;
5323
5324 case 'A':
5325 /* Write "_u" for unaligned access. */
5326 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5327 fprintf (file, "_u");
5328 break;
5329
5330 case 0:
5331 if (REG_P (x))
5332 fprintf (file, "%s", reg_names[REGNO (x)]);
5333 else if (MEM_P (x))
5334 output_address (XEXP (x, 0));
5335 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5336 {
5337 switch (XINT (XEXP (x, 0), 1))
5338 {
5339 case UNSPEC_DTPREL:
5340 case UNSPEC_TPREL:
5341 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5342 break;
5343 default:
5344 output_operand_lossage ("unknown relocation unspec");
5345 break;
5346 }
5347 }
5348 else
5349 output_addr_const (file, x);
5350 break;
5351
5352 default:
5353 output_operand_lossage ("invalid %%xn code");
5354 }
5355 }
5356
5357 void
5358 print_operand_address (FILE *file, rtx addr)
5359 {
5360 int basereg = 31;
5361 HOST_WIDE_INT offset = 0;
5362
5363 if (GET_CODE (addr) == AND)
5364 addr = XEXP (addr, 0);
5365
5366 if (GET_CODE (addr) == PLUS
5367 && CONST_INT_P (XEXP (addr, 1)))
5368 {
5369 offset = INTVAL (XEXP (addr, 1));
5370 addr = XEXP (addr, 0);
5371 }
5372
5373 if (GET_CODE (addr) == LO_SUM)
5374 {
5375 const char *reloc16, *reloclo;
5376 rtx op1 = XEXP (addr, 1);
5377
5378 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5379 {
5380 op1 = XEXP (op1, 0);
5381 switch (XINT (op1, 1))
5382 {
5383 case UNSPEC_DTPREL:
5384 reloc16 = NULL;
5385 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5386 break;
5387 case UNSPEC_TPREL:
5388 reloc16 = NULL;
5389 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5390 break;
5391 default:
5392 output_operand_lossage ("unknown relocation unspec");
5393 return;
5394 }
5395
5396 output_addr_const (file, XVECEXP (op1, 0, 0));
5397 }
5398 else
5399 {
5400 reloc16 = "gprel";
5401 reloclo = "gprellow";
5402 output_addr_const (file, op1);
5403 }
5404
5405 if (offset)
5406 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5407
5408 addr = XEXP (addr, 0);
5409 switch (GET_CODE (addr))
5410 {
5411 case REG:
5412 basereg = REGNO (addr);
5413 break;
5414
5415 case SUBREG:
5416 basereg = subreg_regno (addr);
5417 break;
5418
5419 default:
5420 gcc_unreachable ();
5421 }
5422
5423 fprintf (file, "($%d)\t\t!%s", basereg,
5424 (basereg == 29 ? reloc16 : reloclo));
5425 return;
5426 }
5427
5428 switch (GET_CODE (addr))
5429 {
5430 case REG:
5431 basereg = REGNO (addr);
5432 break;
5433
5434 case SUBREG:
5435 basereg = subreg_regno (addr);
5436 break;
5437
5438 case CONST_INT:
5439 offset = INTVAL (addr);
5440 break;
5441
5442 #if TARGET_ABI_OPEN_VMS
5443 case SYMBOL_REF:
5444 fprintf (file, "%s", XSTR (addr, 0));
5445 return;
5446
5447 case CONST:
5448 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5449 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5450 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5451 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5452 INTVAL (XEXP (XEXP (addr, 0), 1)));
5453 return;
5454
5455 #endif
5456 default:
5457 gcc_unreachable ();
5458 }
5459
5460 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5461 }
5462 \f
5463 /* Emit RTL insns to initialize the variable parts of a trampoline at
5464 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5465 for the static chain value for the function. */
5466
5467 static void
5468 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5469 {
5470 rtx fnaddr, mem, word1, word2;
5471
5472 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5473
5474 #ifdef POINTERS_EXTEND_UNSIGNED
5475 fnaddr = convert_memory_address (Pmode, fnaddr);
5476 chain_value = convert_memory_address (Pmode, chain_value);
5477 #endif
5478
5479 if (TARGET_ABI_OPEN_VMS)
5480 {
5481 const char *fnname;
5482 char *trname;
5483
5484 /* Construct the name of the trampoline entry point. */
5485 fnname = XSTR (fnaddr, 0);
5486 trname = (char *) alloca (strlen (fnname) + 5);
5487 strcpy (trname, fnname);
5488 strcat (trname, "..tr");
5489 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5490 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5491
5492 /* Trampoline (or "bounded") procedure descriptor is constructed from
5493 the function's procedure descriptor with certain fields zeroed IAW
5494 the VMS calling standard. This is stored in the first quadword. */
5495 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5496 word1 = expand_and (DImode, word1,
5497 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5498 NULL);
5499 }
5500 else
5501 {
5502 /* These 4 instructions are:
5503 ldq $1,24($27)
5504 ldq $27,16($27)
5505 jmp $31,($27),0
5506 nop
5507 We don't bother setting the HINT field of the jump; the nop
5508 is merely there for padding. */
5509 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5510 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5511 }
5512
5513 /* Store the first two words, as computed above. */
5514 mem = adjust_address (m_tramp, DImode, 0);
5515 emit_move_insn (mem, word1);
5516 mem = adjust_address (m_tramp, DImode, 8);
5517 emit_move_insn (mem, word2);
5518
5519 /* Store function address and static chain value. */
5520 mem = adjust_address (m_tramp, Pmode, 16);
5521 emit_move_insn (mem, fnaddr);
5522 mem = adjust_address (m_tramp, Pmode, 24);
5523 emit_move_insn (mem, chain_value);
5524
5525 if (TARGET_ABI_OSF)
5526 {
5527 emit_insn (gen_imb ());
5528 #ifdef HAVE_ENABLE_EXECUTE_STACK
5529 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5530 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5531 #endif
5532 }
5533 }
5534 \f
5535 /* Determine where to put an argument to a function.
5536 Value is zero to push the argument on the stack,
5537 or a hard register in which to store the argument.
5538
5539 MODE is the argument's machine mode.
5540 TYPE is the data type of the argument (as a tree).
5541 This is null for libcalls where that information may
5542 not be available.
5543 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5544 the preceding args and about the function being called.
5545 NAMED is nonzero if this argument is a named parameter
5546 (otherwise it is an extra parameter matching an ellipsis).
5547
5548 On Alpha the first 6 words of args are normally in registers
5549 and the rest are pushed. */
5550
5551 static rtx
5552 alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
5553 const_tree type, bool named ATTRIBUTE_UNUSED)
5554 {
5555 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5556 int basereg;
5557 int num_args;
5558
5559 /* Don't get confused and pass small structures in FP registers. */
5560 if (type && AGGREGATE_TYPE_P (type))
5561 basereg = 16;
5562 else
5563 {
5564 #ifdef ENABLE_CHECKING
5565 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5566 values here. */
5567 gcc_assert (!COMPLEX_MODE_P (mode));
5568 #endif
5569
5570 /* Set up defaults for FP operands passed in FP registers, and
5571 integral operands passed in integer registers. */
5572 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5573 basereg = 32 + 16;
5574 else
5575 basereg = 16;
5576 }
5577
5578 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5579 the two platforms, so we can't avoid conditional compilation. */
5580 #if TARGET_ABI_OPEN_VMS
5581 {
5582 if (mode == VOIDmode)
5583 return alpha_arg_info_reg_val (*cum);
5584
5585 num_args = cum->num_args;
5586 if (num_args >= 6
5587 || targetm.calls.must_pass_in_stack (mode, type))
5588 return NULL_RTX;
5589 }
5590 #elif TARGET_ABI_OSF
5591 {
5592 if (*cum >= 6)
5593 return NULL_RTX;
5594 num_args = *cum;
5595
5596 /* VOID is passed as a special flag for "last argument". */
5597 if (type == void_type_node)
5598 basereg = 16;
5599 else if (targetm.calls.must_pass_in_stack (mode, type))
5600 return NULL_RTX;
5601 }
5602 #else
5603 #error Unhandled ABI
5604 #endif
5605
5606 return gen_rtx_REG (mode, num_args + basereg);
5607 }
5608
5609 /* Update the data in CUM to advance over an argument
5610 of mode MODE and data type TYPE.
5611 (TYPE is null for libcalls where that information may not be available.) */
5612
5613 static void
5614 alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
5615 const_tree type, bool named ATTRIBUTE_UNUSED)
5616 {
5617 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5618 bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5619 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
5620
5621 #if TARGET_ABI_OSF
5622 *cum += increment;
5623 #else
5624 if (!onstack && cum->num_args < 6)
5625 cum->atypes[cum->num_args] = alpha_arg_type (mode);
5626 cum->num_args += increment;
5627 #endif
5628 }
5629
5630 static int
5631 alpha_arg_partial_bytes (cumulative_args_t cum_v,
5632 enum machine_mode mode ATTRIBUTE_UNUSED,
5633 tree type ATTRIBUTE_UNUSED,
5634 bool named ATTRIBUTE_UNUSED)
5635 {
5636 int words = 0;
5637 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5638
5639 #if TARGET_ABI_OPEN_VMS
5640 if (cum->num_args < 6
5641 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5642 words = 6 - cum->num_args;
5643 #elif TARGET_ABI_OSF
5644 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5645 words = 6 - *cum;
5646 #else
5647 #error Unhandled ABI
5648 #endif
5649
5650 return words * UNITS_PER_WORD;
5651 }
5652
5653
5654 /* Return true if TYPE must be returned in memory, instead of in registers. */
5655
5656 static bool
5657 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5658 {
5659 enum machine_mode mode = VOIDmode;
5660 int size;
5661
5662 if (type)
5663 {
5664 mode = TYPE_MODE (type);
5665
5666 /* All aggregates are returned in memory, except on OpenVMS where
5667 records that fit 64 bits should be returned by immediate value
5668 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5669 if (TARGET_ABI_OPEN_VMS
5670 && TREE_CODE (type) != ARRAY_TYPE
5671 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5672 return false;
5673
5674 if (AGGREGATE_TYPE_P (type))
5675 return true;
5676 }
5677
5678 size = GET_MODE_SIZE (mode);
5679 switch (GET_MODE_CLASS (mode))
5680 {
5681 case MODE_VECTOR_FLOAT:
5682 /* Pass all float vectors in memory, like an aggregate. */
5683 return true;
5684
5685 case MODE_COMPLEX_FLOAT:
5686 /* We judge complex floats on the size of their element,
5687 not the size of the whole type. */
5688 size = GET_MODE_UNIT_SIZE (mode);
5689 break;
5690
5691 case MODE_INT:
5692 case MODE_FLOAT:
5693 case MODE_COMPLEX_INT:
5694 case MODE_VECTOR_INT:
5695 break;
5696
5697 default:
5698 /* ??? We get called on all sorts of random stuff from
5699 aggregate_value_p. We must return something, but it's not
5700 clear what's safe to return. Pretend it's a struct I
5701 guess. */
5702 return true;
5703 }
5704
5705 /* Otherwise types must fit in one register. */
5706 return size > UNITS_PER_WORD;
5707 }
5708
5709 /* Return true if TYPE should be passed by invisible reference. */
5710
5711 static bool
5712 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5713 enum machine_mode mode,
5714 const_tree type ATTRIBUTE_UNUSED,
5715 bool named ATTRIBUTE_UNUSED)
5716 {
5717 return mode == TFmode || mode == TCmode;
5718 }
5719
5720 /* Define how to find the value returned by a function. VALTYPE is the
5721 data type of the value (as a tree). If the precise function being
5722 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5723 MODE is set instead of VALTYPE for libcalls.
5724
5725 On Alpha the value is found in $0 for integer functions and
5726 $f0 for floating-point functions. */
5727
5728 rtx
5729 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5730 enum machine_mode mode)
5731 {
5732 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5733 enum mode_class mclass;
5734
5735 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5736
5737 if (valtype)
5738 mode = TYPE_MODE (valtype);
5739
5740 mclass = GET_MODE_CLASS (mode);
5741 switch (mclass)
5742 {
5743 case MODE_INT:
5744 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5745 where we have them returning both SImode and DImode. */
5746 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5747 PROMOTE_MODE (mode, dummy, valtype);
5748 /* FALLTHRU */
5749
5750 case MODE_COMPLEX_INT:
5751 case MODE_VECTOR_INT:
5752 regnum = 0;
5753 break;
5754
5755 case MODE_FLOAT:
5756 regnum = 32;
5757 break;
5758
5759 case MODE_COMPLEX_FLOAT:
5760 {
5761 enum machine_mode cmode = GET_MODE_INNER (mode);
5762
5763 return gen_rtx_PARALLEL
5764 (VOIDmode,
5765 gen_rtvec (2,
5766 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5767 const0_rtx),
5768 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5769 GEN_INT (GET_MODE_SIZE (cmode)))));
5770 }
5771
5772 case MODE_RANDOM:
5773 /* We should only reach here for BLKmode on VMS. */
5774 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5775 regnum = 0;
5776 break;
5777
5778 default:
5779 gcc_unreachable ();
5780 }
5781
5782 return gen_rtx_REG (mode, regnum);
5783 }
5784
5785 /* TCmode complex values are passed by invisible reference. We
5786 should not split these values. */
5787
5788 static bool
5789 alpha_split_complex_arg (const_tree type)
5790 {
5791 return TYPE_MODE (type) != TCmode;
5792 }
5793
5794 static tree
5795 alpha_build_builtin_va_list (void)
5796 {
5797 tree base, ofs, space, record, type_decl;
5798
5799 if (TARGET_ABI_OPEN_VMS)
5800 return ptr_type_node;
5801
5802 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5803 type_decl = build_decl (BUILTINS_LOCATION,
5804 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5805 TYPE_STUB_DECL (record) = type_decl;
5806 TYPE_NAME (record) = type_decl;
5807
5808 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5809
5810 /* Dummy field to prevent alignment warnings. */
5811 space = build_decl (BUILTINS_LOCATION,
5812 FIELD_DECL, NULL_TREE, integer_type_node);
5813 DECL_FIELD_CONTEXT (space) = record;
5814 DECL_ARTIFICIAL (space) = 1;
5815 DECL_IGNORED_P (space) = 1;
5816
5817 ofs = build_decl (BUILTINS_LOCATION,
5818 FIELD_DECL, get_identifier ("__offset"),
5819 integer_type_node);
5820 DECL_FIELD_CONTEXT (ofs) = record;
5821 DECL_CHAIN (ofs) = space;
5822 /* ??? This is a hack, __offset is marked volatile to prevent
5823 DCE that confuses stdarg optimization and results in
5824 gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */
5825 TREE_THIS_VOLATILE (ofs) = 1;
5826
5827 base = build_decl (BUILTINS_LOCATION,
5828 FIELD_DECL, get_identifier ("__base"),
5829 ptr_type_node);
5830 DECL_FIELD_CONTEXT (base) = record;
5831 DECL_CHAIN (base) = ofs;
5832
5833 TYPE_FIELDS (record) = base;
5834 layout_type (record);
5835
5836 va_list_gpr_counter_field = ofs;
5837 return record;
5838 }
5839
5840 #if TARGET_ABI_OSF
5841 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5842 and constant additions. */
5843
5844 static gimple
5845 va_list_skip_additions (tree lhs)
5846 {
5847 gimple stmt;
5848
5849 for (;;)
5850 {
5851 enum tree_code code;
5852
5853 stmt = SSA_NAME_DEF_STMT (lhs);
5854
5855 if (gimple_code (stmt) == GIMPLE_PHI)
5856 return stmt;
5857
5858 if (!is_gimple_assign (stmt)
5859 || gimple_assign_lhs (stmt) != lhs)
5860 return NULL;
5861
5862 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5863 return stmt;
5864 code = gimple_assign_rhs_code (stmt);
5865 if (!CONVERT_EXPR_CODE_P (code)
5866 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5867 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5868 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5869 return stmt;
5870
5871 lhs = gimple_assign_rhs1 (stmt);
5872 }
5873 }
5874
5875 /* Check if LHS = RHS statement is
5876 LHS = *(ap.__base + ap.__offset + cst)
5877 or
5878 LHS = *(ap.__base
5879 + ((ap.__offset + cst <= 47)
5880 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5881 If the former, indicate that GPR registers are needed,
5882 if the latter, indicate that FPR registers are needed.
5883
5884 Also look for LHS = (*ptr).field, where ptr is one of the forms
5885 listed above.
5886
5887 On alpha, cfun->va_list_gpr_size is used as size of the needed
5888 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5889 registers are needed and bit 1 set if FPR registers are needed.
5890 Return true if va_list references should not be scanned for the
5891 current statement. */
5892
5893 static bool
5894 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
5895 {
5896 tree base, offset, rhs;
5897 int offset_arg = 1;
5898 gimple base_stmt;
5899
5900 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5901 != GIMPLE_SINGLE_RHS)
5902 return false;
5903
5904 rhs = gimple_assign_rhs1 (stmt);
5905 while (handled_component_p (rhs))
5906 rhs = TREE_OPERAND (rhs, 0);
5907 if (TREE_CODE (rhs) != MEM_REF
5908 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5909 return false;
5910
5911 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5912 if (stmt == NULL
5913 || !is_gimple_assign (stmt)
5914 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5915 return false;
5916
5917 base = gimple_assign_rhs1 (stmt);
5918 if (TREE_CODE (base) == SSA_NAME)
5919 {
5920 base_stmt = va_list_skip_additions (base);
5921 if (base_stmt
5922 && is_gimple_assign (base_stmt)
5923 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5924 base = gimple_assign_rhs1 (base_stmt);
5925 }
5926
5927 if (TREE_CODE (base) != COMPONENT_REF
5928 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5929 {
5930 base = gimple_assign_rhs2 (stmt);
5931 if (TREE_CODE (base) == SSA_NAME)
5932 {
5933 base_stmt = va_list_skip_additions (base);
5934 if (base_stmt
5935 && is_gimple_assign (base_stmt)
5936 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5937 base = gimple_assign_rhs1 (base_stmt);
5938 }
5939
5940 if (TREE_CODE (base) != COMPONENT_REF
5941 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5942 return false;
5943
5944 offset_arg = 0;
5945 }
5946
5947 base = get_base_address (base);
5948 if (TREE_CODE (base) != VAR_DECL
5949 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
5950 return false;
5951
5952 offset = gimple_op (stmt, 1 + offset_arg);
5953 if (TREE_CODE (offset) == SSA_NAME)
5954 {
5955 gimple offset_stmt = va_list_skip_additions (offset);
5956
5957 if (offset_stmt
5958 && gimple_code (offset_stmt) == GIMPLE_PHI)
5959 {
5960 HOST_WIDE_INT sub;
5961 gimple arg1_stmt, arg2_stmt;
5962 tree arg1, arg2;
5963 enum tree_code code1, code2;
5964
5965 if (gimple_phi_num_args (offset_stmt) != 2)
5966 goto escapes;
5967
5968 arg1_stmt
5969 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
5970 arg2_stmt
5971 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
5972 if (arg1_stmt == NULL
5973 || !is_gimple_assign (arg1_stmt)
5974 || arg2_stmt == NULL
5975 || !is_gimple_assign (arg2_stmt))
5976 goto escapes;
5977
5978 code1 = gimple_assign_rhs_code (arg1_stmt);
5979 code2 = gimple_assign_rhs_code (arg2_stmt);
5980 if (code1 == COMPONENT_REF
5981 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
5982 /* Do nothing. */;
5983 else if (code2 == COMPONENT_REF
5984 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
5985 {
5986 gimple tem = arg1_stmt;
5987 code2 = code1;
5988 arg1_stmt = arg2_stmt;
5989 arg2_stmt = tem;
5990 }
5991 else
5992 goto escapes;
5993
5994 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
5995 goto escapes;
5996
5997 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
5998 if (code2 == MINUS_EXPR)
5999 sub = -sub;
6000 if (sub < -48 || sub > -32)
6001 goto escapes;
6002
6003 arg1 = gimple_assign_rhs1 (arg1_stmt);
6004 arg2 = gimple_assign_rhs1 (arg2_stmt);
6005 if (TREE_CODE (arg2) == SSA_NAME)
6006 {
6007 arg2_stmt = va_list_skip_additions (arg2);
6008 if (arg2_stmt == NULL
6009 || !is_gimple_assign (arg2_stmt)
6010 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6011 goto escapes;
6012 arg2 = gimple_assign_rhs1 (arg2_stmt);
6013 }
6014 if (arg1 != arg2)
6015 goto escapes;
6016
6017 if (TREE_CODE (arg1) != COMPONENT_REF
6018 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6019 || get_base_address (arg1) != base)
6020 goto escapes;
6021
6022 /* Need floating point regs. */
6023 cfun->va_list_fpr_size |= 2;
6024 return false;
6025 }
6026 if (offset_stmt
6027 && is_gimple_assign (offset_stmt)
6028 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6029 offset = gimple_assign_rhs1 (offset_stmt);
6030 }
6031 if (TREE_CODE (offset) != COMPONENT_REF
6032 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6033 || get_base_address (offset) != base)
6034 goto escapes;
6035 else
6036 /* Need general regs. */
6037 cfun->va_list_fpr_size |= 1;
6038 return false;
6039
6040 escapes:
6041 si->va_list_escapes = true;
6042 return false;
6043 }
6044 #endif
6045
6046 /* Perform any needed actions needed for a function that is receiving a
6047 variable number of arguments. */
6048
6049 static void
6050 alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode,
6051 tree type, int *pretend_size, int no_rtl)
6052 {
6053 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6054
6055 /* Skip the current argument. */
6056 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6057 true);
6058
6059 #if TARGET_ABI_OPEN_VMS
6060 /* For VMS, we allocate space for all 6 arg registers plus a count.
6061
6062 However, if NO registers need to be saved, don't allocate any space.
6063 This is not only because we won't need the space, but because AP
6064 includes the current_pretend_args_size and we don't want to mess up
6065 any ap-relative addresses already made. */
6066 if (cum.num_args < 6)
6067 {
6068 if (!no_rtl)
6069 {
6070 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6071 emit_insn (gen_arg_home ());
6072 }
6073 *pretend_size = 7 * UNITS_PER_WORD;
6074 }
6075 #else
6076 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6077 only push those that are remaining. However, if NO registers need to
6078 be saved, don't allocate any space. This is not only because we won't
6079 need the space, but because AP includes the current_pretend_args_size
6080 and we don't want to mess up any ap-relative addresses already made.
6081
6082 If we are not to use the floating-point registers, save the integer
6083 registers where we would put the floating-point registers. This is
6084 not the most efficient way to implement varargs with just one register
6085 class, but it isn't worth doing anything more efficient in this rare
6086 case. */
6087 if (cum >= 6)
6088 return;
6089
6090 if (!no_rtl)
6091 {
6092 int count;
6093 alias_set_type set = get_varargs_alias_set ();
6094 rtx tmp;
6095
6096 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6097 if (count > 6 - cum)
6098 count = 6 - cum;
6099
6100 /* Detect whether integer registers or floating-point registers
6101 are needed by the detected va_arg statements. See above for
6102 how these values are computed. Note that the "escape" value
6103 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6104 these bits set. */
6105 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6106
6107 if (cfun->va_list_fpr_size & 1)
6108 {
6109 tmp = gen_rtx_MEM (BLKmode,
6110 plus_constant (Pmode, virtual_incoming_args_rtx,
6111 (cum + 6) * UNITS_PER_WORD));
6112 MEM_NOTRAP_P (tmp) = 1;
6113 set_mem_alias_set (tmp, set);
6114 move_block_from_reg (16 + cum, tmp, count);
6115 }
6116
6117 if (cfun->va_list_fpr_size & 2)
6118 {
6119 tmp = gen_rtx_MEM (BLKmode,
6120 plus_constant (Pmode, virtual_incoming_args_rtx,
6121 cum * UNITS_PER_WORD));
6122 MEM_NOTRAP_P (tmp) = 1;
6123 set_mem_alias_set (tmp, set);
6124 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6125 }
6126 }
6127 *pretend_size = 12 * UNITS_PER_WORD;
6128 #endif
6129 }
6130
6131 static void
6132 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6133 {
6134 HOST_WIDE_INT offset;
6135 tree t, offset_field, base_field;
6136
6137 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6138 return;
6139
6140 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6141 up by 48, storing fp arg registers in the first 48 bytes, and the
6142 integer arg registers in the next 48 bytes. This is only done,
6143 however, if any integer registers need to be stored.
6144
6145 If no integer registers need be stored, then we must subtract 48
6146 in order to account for the integer arg registers which are counted
6147 in argsize above, but which are not actually stored on the stack.
6148 Must further be careful here about structures straddling the last
6149 integer argument register; that futzes with pretend_args_size,
6150 which changes the meaning of AP. */
6151
6152 if (NUM_ARGS < 6)
6153 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6154 else
6155 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6156
6157 if (TARGET_ABI_OPEN_VMS)
6158 {
6159 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6160 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6161 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6162 TREE_SIDE_EFFECTS (t) = 1;
6163 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6164 }
6165 else
6166 {
6167 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6168 offset_field = DECL_CHAIN (base_field);
6169
6170 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6171 valist, base_field, NULL_TREE);
6172 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6173 valist, offset_field, NULL_TREE);
6174
6175 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6176 t = fold_build_pointer_plus_hwi (t, offset);
6177 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6178 TREE_SIDE_EFFECTS (t) = 1;
6179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6180
6181 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6182 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6183 TREE_SIDE_EFFECTS (t) = 1;
6184 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6185 }
6186 }
6187
6188 static tree
6189 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6190 gimple_seq *pre_p)
6191 {
6192 tree type_size, ptr_type, addend, t, addr;
6193 gimple_seq internal_post;
6194
6195 /* If the type could not be passed in registers, skip the block
6196 reserved for the registers. */
6197 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6198 {
6199 t = build_int_cst (TREE_TYPE (offset), 6*8);
6200 gimplify_assign (offset,
6201 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6202 pre_p);
6203 }
6204
6205 addend = offset;
6206 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6207
6208 if (TREE_CODE (type) == COMPLEX_TYPE)
6209 {
6210 tree real_part, imag_part, real_temp;
6211
6212 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6213 offset, pre_p);
6214
6215 /* Copy the value into a new temporary, lest the formal temporary
6216 be reused out from under us. */
6217 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6218
6219 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6220 offset, pre_p);
6221
6222 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6223 }
6224 else if (TREE_CODE (type) == REAL_TYPE)
6225 {
6226 tree fpaddend, cond, fourtyeight;
6227
6228 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6229 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6230 addend, fourtyeight);
6231 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6232 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6233 fpaddend, addend);
6234 }
6235
6236 /* Build the final address and force that value into a temporary. */
6237 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6238 internal_post = NULL;
6239 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6240 gimple_seq_add_seq (pre_p, internal_post);
6241
6242 /* Update the offset field. */
6243 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6244 if (type_size == NULL || TREE_OVERFLOW (type_size))
6245 t = size_zero_node;
6246 else
6247 {
6248 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6249 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6250 t = size_binop (MULT_EXPR, t, size_int (8));
6251 }
6252 t = fold_convert (TREE_TYPE (offset), t);
6253 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6254 pre_p);
6255
6256 return build_va_arg_indirect_ref (addr);
6257 }
6258
6259 static tree
6260 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6261 gimple_seq *post_p)
6262 {
6263 tree offset_field, base_field, offset, base, t, r;
6264 bool indirect;
6265
6266 if (TARGET_ABI_OPEN_VMS)
6267 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6268
6269 base_field = TYPE_FIELDS (va_list_type_node);
6270 offset_field = DECL_CHAIN (base_field);
6271 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6272 valist, base_field, NULL_TREE);
6273 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6274 valist, offset_field, NULL_TREE);
6275
6276 /* Pull the fields of the structure out into temporaries. Since we never
6277 modify the base field, we can use a formal temporary. Sign-extend the
6278 offset field so that it's the proper width for pointer arithmetic. */
6279 base = get_formal_tmp_var (base_field, pre_p);
6280
6281 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6282 offset = get_initialized_tmp_var (t, pre_p, NULL);
6283
6284 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6285 if (indirect)
6286 type = build_pointer_type_for_mode (type, ptr_mode, true);
6287
6288 /* Find the value. Note that this will be a stable indirection, or
6289 a composite of stable indirections in the case of complex. */
6290 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6291
6292 /* Stuff the offset temporary back into its field. */
6293 gimplify_assign (unshare_expr (offset_field),
6294 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6295
6296 if (indirect)
6297 r = build_va_arg_indirect_ref (r);
6298
6299 return r;
6300 }
6301 \f
6302 /* Builtins. */
6303
6304 enum alpha_builtin
6305 {
6306 ALPHA_BUILTIN_CMPBGE,
6307 ALPHA_BUILTIN_EXTBL,
6308 ALPHA_BUILTIN_EXTWL,
6309 ALPHA_BUILTIN_EXTLL,
6310 ALPHA_BUILTIN_EXTQL,
6311 ALPHA_BUILTIN_EXTWH,
6312 ALPHA_BUILTIN_EXTLH,
6313 ALPHA_BUILTIN_EXTQH,
6314 ALPHA_BUILTIN_INSBL,
6315 ALPHA_BUILTIN_INSWL,
6316 ALPHA_BUILTIN_INSLL,
6317 ALPHA_BUILTIN_INSQL,
6318 ALPHA_BUILTIN_INSWH,
6319 ALPHA_BUILTIN_INSLH,
6320 ALPHA_BUILTIN_INSQH,
6321 ALPHA_BUILTIN_MSKBL,
6322 ALPHA_BUILTIN_MSKWL,
6323 ALPHA_BUILTIN_MSKLL,
6324 ALPHA_BUILTIN_MSKQL,
6325 ALPHA_BUILTIN_MSKWH,
6326 ALPHA_BUILTIN_MSKLH,
6327 ALPHA_BUILTIN_MSKQH,
6328 ALPHA_BUILTIN_UMULH,
6329 ALPHA_BUILTIN_ZAP,
6330 ALPHA_BUILTIN_ZAPNOT,
6331 ALPHA_BUILTIN_AMASK,
6332 ALPHA_BUILTIN_IMPLVER,
6333 ALPHA_BUILTIN_RPCC,
6334 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6335 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6336
6337 /* TARGET_MAX */
6338 ALPHA_BUILTIN_MINUB8,
6339 ALPHA_BUILTIN_MINSB8,
6340 ALPHA_BUILTIN_MINUW4,
6341 ALPHA_BUILTIN_MINSW4,
6342 ALPHA_BUILTIN_MAXUB8,
6343 ALPHA_BUILTIN_MAXSB8,
6344 ALPHA_BUILTIN_MAXUW4,
6345 ALPHA_BUILTIN_MAXSW4,
6346 ALPHA_BUILTIN_PERR,
6347 ALPHA_BUILTIN_PKLB,
6348 ALPHA_BUILTIN_PKWB,
6349 ALPHA_BUILTIN_UNPKBL,
6350 ALPHA_BUILTIN_UNPKBW,
6351
6352 /* TARGET_CIX */
6353 ALPHA_BUILTIN_CTTZ,
6354 ALPHA_BUILTIN_CTLZ,
6355 ALPHA_BUILTIN_CTPOP,
6356
6357 ALPHA_BUILTIN_max
6358 };
6359
6360 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6361 CODE_FOR_builtin_cmpbge,
6362 CODE_FOR_extbl,
6363 CODE_FOR_extwl,
6364 CODE_FOR_extll,
6365 CODE_FOR_extql,
6366 CODE_FOR_extwh,
6367 CODE_FOR_extlh,
6368 CODE_FOR_extqh,
6369 CODE_FOR_builtin_insbl,
6370 CODE_FOR_builtin_inswl,
6371 CODE_FOR_builtin_insll,
6372 CODE_FOR_insql,
6373 CODE_FOR_inswh,
6374 CODE_FOR_inslh,
6375 CODE_FOR_insqh,
6376 CODE_FOR_mskbl,
6377 CODE_FOR_mskwl,
6378 CODE_FOR_mskll,
6379 CODE_FOR_mskql,
6380 CODE_FOR_mskwh,
6381 CODE_FOR_msklh,
6382 CODE_FOR_mskqh,
6383 CODE_FOR_umuldi3_highpart,
6384 CODE_FOR_builtin_zap,
6385 CODE_FOR_builtin_zapnot,
6386 CODE_FOR_builtin_amask,
6387 CODE_FOR_builtin_implver,
6388 CODE_FOR_builtin_rpcc,
6389 CODE_FOR_builtin_establish_vms_condition_handler,
6390 CODE_FOR_builtin_revert_vms_condition_handler,
6391
6392 /* TARGET_MAX */
6393 CODE_FOR_builtin_minub8,
6394 CODE_FOR_builtin_minsb8,
6395 CODE_FOR_builtin_minuw4,
6396 CODE_FOR_builtin_minsw4,
6397 CODE_FOR_builtin_maxub8,
6398 CODE_FOR_builtin_maxsb8,
6399 CODE_FOR_builtin_maxuw4,
6400 CODE_FOR_builtin_maxsw4,
6401 CODE_FOR_builtin_perr,
6402 CODE_FOR_builtin_pklb,
6403 CODE_FOR_builtin_pkwb,
6404 CODE_FOR_builtin_unpkbl,
6405 CODE_FOR_builtin_unpkbw,
6406
6407 /* TARGET_CIX */
6408 CODE_FOR_ctzdi2,
6409 CODE_FOR_clzdi2,
6410 CODE_FOR_popcountdi2
6411 };
6412
6413 struct alpha_builtin_def
6414 {
6415 const char *name;
6416 enum alpha_builtin code;
6417 unsigned int target_mask;
6418 bool is_const;
6419 };
6420
6421 static struct alpha_builtin_def const zero_arg_builtins[] = {
6422 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6423 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6424 };
6425
6426 static struct alpha_builtin_def const one_arg_builtins[] = {
6427 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6428 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6429 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6430 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6431 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6432 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6433 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6434 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6435 };
6436
6437 static struct alpha_builtin_def const two_arg_builtins[] = {
6438 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6439 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6440 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6441 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6442 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6443 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6444 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6445 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6446 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6447 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6448 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6449 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6450 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6451 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6452 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6453 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6454 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6455 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6456 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6457 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6458 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6459 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6460 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6461 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6462 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6463 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6464 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6465 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6466 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6467 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6468 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6469 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6470 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6471 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6472 };
6473
6474 static GTY(()) tree alpha_dimode_u;
6475 static GTY(()) tree alpha_v8qi_u;
6476 static GTY(()) tree alpha_v8qi_s;
6477 static GTY(()) tree alpha_v4hi_u;
6478 static GTY(()) tree alpha_v4hi_s;
6479
6480 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6481
6482 /* Return the alpha builtin for CODE. */
6483
6484 static tree
6485 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6486 {
6487 if (code >= ALPHA_BUILTIN_max)
6488 return error_mark_node;
6489 return alpha_builtins[code];
6490 }
6491
6492 /* Helper function of alpha_init_builtins. Add the built-in specified
6493 by NAME, TYPE, CODE, and ECF. */
6494
6495 static void
6496 alpha_builtin_function (const char *name, tree ftype,
6497 enum alpha_builtin code, unsigned ecf)
6498 {
6499 tree decl = add_builtin_function (name, ftype, (int) code,
6500 BUILT_IN_MD, NULL, NULL_TREE);
6501
6502 if (ecf & ECF_CONST)
6503 TREE_READONLY (decl) = 1;
6504 if (ecf & ECF_NOTHROW)
6505 TREE_NOTHROW (decl) = 1;
6506
6507 alpha_builtins [(int) code] = decl;
6508 }
6509
6510 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6511 functions pointed to by P, with function type FTYPE. */
6512
6513 static void
6514 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6515 tree ftype)
6516 {
6517 size_t i;
6518
6519 for (i = 0; i < count; ++i, ++p)
6520 if ((target_flags & p->target_mask) == p->target_mask)
6521 alpha_builtin_function (p->name, ftype, p->code,
6522 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6523 }
6524
6525 static void
6526 alpha_init_builtins (void)
6527 {
6528 tree ftype;
6529
6530 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6531 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6532 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6533 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6534 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6535
6536 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6537 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6538
6539 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6540 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6541
6542 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6543 alpha_dimode_u, NULL_TREE);
6544 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6545
6546 if (TARGET_ABI_OPEN_VMS)
6547 {
6548 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6549 NULL_TREE);
6550 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6551 ftype,
6552 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6553 0);
6554
6555 ftype = build_function_type_list (ptr_type_node, void_type_node,
6556 NULL_TREE);
6557 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6558 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6559
6560 vms_patch_builtins ();
6561 }
6562 }
6563
6564 /* Expand an expression EXP that calls a built-in function,
6565 with result going to TARGET if that's convenient
6566 (and in mode MODE if that's convenient).
6567 SUBTARGET may be used as the target for computing one of EXP's operands.
6568 IGNORE is nonzero if the value is to be ignored. */
6569
6570 static rtx
6571 alpha_expand_builtin (tree exp, rtx target,
6572 rtx subtarget ATTRIBUTE_UNUSED,
6573 enum machine_mode mode ATTRIBUTE_UNUSED,
6574 int ignore ATTRIBUTE_UNUSED)
6575 {
6576 #define MAX_ARGS 2
6577
6578 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6579 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6580 tree arg;
6581 call_expr_arg_iterator iter;
6582 enum insn_code icode;
6583 rtx op[MAX_ARGS], pat;
6584 int arity;
6585 bool nonvoid;
6586
6587 if (fcode >= ALPHA_BUILTIN_max)
6588 internal_error ("bad builtin fcode");
6589 icode = code_for_builtin[fcode];
6590 if (icode == 0)
6591 internal_error ("bad builtin fcode");
6592
6593 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6594
6595 arity = 0;
6596 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6597 {
6598 const struct insn_operand_data *insn_op;
6599
6600 if (arg == error_mark_node)
6601 return NULL_RTX;
6602 if (arity > MAX_ARGS)
6603 return NULL_RTX;
6604
6605 insn_op = &insn_data[icode].operand[arity + nonvoid];
6606
6607 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6608
6609 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6610 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6611 arity++;
6612 }
6613
6614 if (nonvoid)
6615 {
6616 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6617 if (!target
6618 || GET_MODE (target) != tmode
6619 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6620 target = gen_reg_rtx (tmode);
6621 }
6622
6623 switch (arity)
6624 {
6625 case 0:
6626 pat = GEN_FCN (icode) (target);
6627 break;
6628 case 1:
6629 if (nonvoid)
6630 pat = GEN_FCN (icode) (target, op[0]);
6631 else
6632 pat = GEN_FCN (icode) (op[0]);
6633 break;
6634 case 2:
6635 pat = GEN_FCN (icode) (target, op[0], op[1]);
6636 break;
6637 default:
6638 gcc_unreachable ();
6639 }
6640 if (!pat)
6641 return NULL_RTX;
6642 emit_insn (pat);
6643
6644 if (nonvoid)
6645 return target;
6646 else
6647 return const0_rtx;
6648 }
6649
6650
6651 /* Several bits below assume HWI >= 64 bits. This should be enforced
6652 by config.gcc. */
6653 #if HOST_BITS_PER_WIDE_INT < 64
6654 # error "HOST_WIDE_INT too small"
6655 #endif
6656
6657 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6658 with an 8-bit output vector. OPINT contains the integer operands; bit N
6659 of OP_CONST is set if OPINT[N] is valid. */
6660
6661 static tree
6662 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6663 {
6664 if (op_const == 3)
6665 {
6666 int i, val;
6667 for (i = 0, val = 0; i < 8; ++i)
6668 {
6669 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6670 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6671 if (c0 >= c1)
6672 val |= 1 << i;
6673 }
6674 return build_int_cst (alpha_dimode_u, val);
6675 }
6676 else if (op_const == 2 && opint[1] == 0)
6677 return build_int_cst (alpha_dimode_u, 0xff);
6678 return NULL;
6679 }
6680
6681 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6682 specialized form of an AND operation. Other byte manipulation instructions
6683 are defined in terms of this instruction, so this is also used as a
6684 subroutine for other builtins.
6685
6686 OP contains the tree operands; OPINT contains the extracted integer values.
6687 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6688 OPINT may be considered. */
6689
6690 static tree
6691 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6692 long op_const)
6693 {
6694 if (op_const & 2)
6695 {
6696 unsigned HOST_WIDE_INT mask = 0;
6697 int i;
6698
6699 for (i = 0; i < 8; ++i)
6700 if ((opint[1] >> i) & 1)
6701 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6702
6703 if (op_const & 1)
6704 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6705
6706 if (op)
6707 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6708 build_int_cst (alpha_dimode_u, mask));
6709 }
6710 else if ((op_const & 1) && opint[0] == 0)
6711 return build_int_cst (alpha_dimode_u, 0);
6712 return NULL;
6713 }
6714
6715 /* Fold the builtins for the EXT family of instructions. */
6716
6717 static tree
6718 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6719 long op_const, unsigned HOST_WIDE_INT bytemask,
6720 bool is_high)
6721 {
6722 long zap_const = 2;
6723 tree *zap_op = NULL;
6724
6725 if (op_const & 2)
6726 {
6727 unsigned HOST_WIDE_INT loc;
6728
6729 loc = opint[1] & 7;
6730 loc *= BITS_PER_UNIT;
6731
6732 if (loc != 0)
6733 {
6734 if (op_const & 1)
6735 {
6736 unsigned HOST_WIDE_INT temp = opint[0];
6737 if (is_high)
6738 temp <<= loc;
6739 else
6740 temp >>= loc;
6741 opint[0] = temp;
6742 zap_const = 3;
6743 }
6744 }
6745 else
6746 zap_op = op;
6747 }
6748
6749 opint[1] = bytemask;
6750 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6751 }
6752
6753 /* Fold the builtins for the INS family of instructions. */
6754
6755 static tree
6756 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6757 long op_const, unsigned HOST_WIDE_INT bytemask,
6758 bool is_high)
6759 {
6760 if ((op_const & 1) && opint[0] == 0)
6761 return build_int_cst (alpha_dimode_u, 0);
6762
6763 if (op_const & 2)
6764 {
6765 unsigned HOST_WIDE_INT temp, loc, byteloc;
6766 tree *zap_op = NULL;
6767
6768 loc = opint[1] & 7;
6769 bytemask <<= loc;
6770
6771 temp = opint[0];
6772 if (is_high)
6773 {
6774 byteloc = (64 - (loc * 8)) & 0x3f;
6775 if (byteloc == 0)
6776 zap_op = op;
6777 else
6778 temp >>= byteloc;
6779 bytemask >>= 8;
6780 }
6781 else
6782 {
6783 byteloc = loc * 8;
6784 if (byteloc == 0)
6785 zap_op = op;
6786 else
6787 temp <<= byteloc;
6788 }
6789
6790 opint[0] = temp;
6791 opint[1] = bytemask;
6792 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6793 }
6794
6795 return NULL;
6796 }
6797
6798 static tree
6799 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6800 long op_const, unsigned HOST_WIDE_INT bytemask,
6801 bool is_high)
6802 {
6803 if (op_const & 2)
6804 {
6805 unsigned HOST_WIDE_INT loc;
6806
6807 loc = opint[1] & 7;
6808 bytemask <<= loc;
6809
6810 if (is_high)
6811 bytemask >>= 8;
6812
6813 opint[1] = bytemask ^ 0xff;
6814 }
6815
6816 return alpha_fold_builtin_zapnot (op, opint, op_const);
6817 }
6818
6819 static tree
6820 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6821 {
6822 tree op0 = fold_convert (vtype, op[0]);
6823 tree op1 = fold_convert (vtype, op[1]);
6824 tree val = fold_build2 (code, vtype, op0, op1);
6825 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6826 }
6827
6828 static tree
6829 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6830 {
6831 unsigned HOST_WIDE_INT temp = 0;
6832 int i;
6833
6834 if (op_const != 3)
6835 return NULL;
6836
6837 for (i = 0; i < 8; ++i)
6838 {
6839 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6840 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6841 if (a >= b)
6842 temp += a - b;
6843 else
6844 temp += b - a;
6845 }
6846
6847 return build_int_cst (alpha_dimode_u, temp);
6848 }
6849
6850 static tree
6851 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6852 {
6853 unsigned HOST_WIDE_INT temp;
6854
6855 if (op_const == 0)
6856 return NULL;
6857
6858 temp = opint[0] & 0xff;
6859 temp |= (opint[0] >> 24) & 0xff00;
6860
6861 return build_int_cst (alpha_dimode_u, temp);
6862 }
6863
6864 static tree
6865 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6866 {
6867 unsigned HOST_WIDE_INT temp;
6868
6869 if (op_const == 0)
6870 return NULL;
6871
6872 temp = opint[0] & 0xff;
6873 temp |= (opint[0] >> 8) & 0xff00;
6874 temp |= (opint[0] >> 16) & 0xff0000;
6875 temp |= (opint[0] >> 24) & 0xff000000;
6876
6877 return build_int_cst (alpha_dimode_u, temp);
6878 }
6879
6880 static tree
6881 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6882 {
6883 unsigned HOST_WIDE_INT temp;
6884
6885 if (op_const == 0)
6886 return NULL;
6887
6888 temp = opint[0] & 0xff;
6889 temp |= (opint[0] & 0xff00) << 24;
6890
6891 return build_int_cst (alpha_dimode_u, temp);
6892 }
6893
6894 static tree
6895 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6896 {
6897 unsigned HOST_WIDE_INT temp;
6898
6899 if (op_const == 0)
6900 return NULL;
6901
6902 temp = opint[0] & 0xff;
6903 temp |= (opint[0] & 0x0000ff00) << 8;
6904 temp |= (opint[0] & 0x00ff0000) << 16;
6905 temp |= (opint[0] & 0xff000000) << 24;
6906
6907 return build_int_cst (alpha_dimode_u, temp);
6908 }
6909
6910 static tree
6911 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6912 {
6913 unsigned HOST_WIDE_INT temp;
6914
6915 if (op_const == 0)
6916 return NULL;
6917
6918 if (opint[0] == 0)
6919 temp = 64;
6920 else
6921 temp = exact_log2 (opint[0] & -opint[0]);
6922
6923 return build_int_cst (alpha_dimode_u, temp);
6924 }
6925
6926 static tree
6927 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6928 {
6929 unsigned HOST_WIDE_INT temp;
6930
6931 if (op_const == 0)
6932 return NULL;
6933
6934 if (opint[0] == 0)
6935 temp = 64;
6936 else
6937 temp = 64 - floor_log2 (opint[0]) - 1;
6938
6939 return build_int_cst (alpha_dimode_u, temp);
6940 }
6941
6942 static tree
6943 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
6944 {
6945 unsigned HOST_WIDE_INT temp, op;
6946
6947 if (op_const == 0)
6948 return NULL;
6949
6950 op = opint[0];
6951 temp = 0;
6952 while (op)
6953 temp++, op &= op - 1;
6954
6955 return build_int_cst (alpha_dimode_u, temp);
6956 }
6957
6958 /* Fold one of our builtin functions. */
6959
6960 static tree
6961 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
6962 bool ignore ATTRIBUTE_UNUSED)
6963 {
6964 unsigned HOST_WIDE_INT opint[MAX_ARGS];
6965 long op_const = 0;
6966 int i;
6967
6968 if (n_args > MAX_ARGS)
6969 return NULL;
6970
6971 for (i = 0; i < n_args; i++)
6972 {
6973 tree arg = op[i];
6974 if (arg == error_mark_node)
6975 return NULL;
6976
6977 opint[i] = 0;
6978 if (TREE_CODE (arg) == INTEGER_CST)
6979 {
6980 op_const |= 1L << i;
6981 opint[i] = int_cst_value (arg);
6982 }
6983 }
6984
6985 switch (DECL_FUNCTION_CODE (fndecl))
6986 {
6987 case ALPHA_BUILTIN_CMPBGE:
6988 return alpha_fold_builtin_cmpbge (opint, op_const);
6989
6990 case ALPHA_BUILTIN_EXTBL:
6991 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
6992 case ALPHA_BUILTIN_EXTWL:
6993 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
6994 case ALPHA_BUILTIN_EXTLL:
6995 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
6996 case ALPHA_BUILTIN_EXTQL:
6997 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
6998 case ALPHA_BUILTIN_EXTWH:
6999 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7000 case ALPHA_BUILTIN_EXTLH:
7001 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7002 case ALPHA_BUILTIN_EXTQH:
7003 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7004
7005 case ALPHA_BUILTIN_INSBL:
7006 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7007 case ALPHA_BUILTIN_INSWL:
7008 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7009 case ALPHA_BUILTIN_INSLL:
7010 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7011 case ALPHA_BUILTIN_INSQL:
7012 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7013 case ALPHA_BUILTIN_INSWH:
7014 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7015 case ALPHA_BUILTIN_INSLH:
7016 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7017 case ALPHA_BUILTIN_INSQH:
7018 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7019
7020 case ALPHA_BUILTIN_MSKBL:
7021 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7022 case ALPHA_BUILTIN_MSKWL:
7023 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7024 case ALPHA_BUILTIN_MSKLL:
7025 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7026 case ALPHA_BUILTIN_MSKQL:
7027 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7028 case ALPHA_BUILTIN_MSKWH:
7029 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7030 case ALPHA_BUILTIN_MSKLH:
7031 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7032 case ALPHA_BUILTIN_MSKQH:
7033 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7034
7035 case ALPHA_BUILTIN_UMULH:
7036 return fold_build2 (MULT_HIGHPART_EXPR, alpha_dimode_u, op[0], op[1]);
7037
7038 case ALPHA_BUILTIN_ZAP:
7039 opint[1] ^= 0xff;
7040 /* FALLTHRU */
7041 case ALPHA_BUILTIN_ZAPNOT:
7042 return alpha_fold_builtin_zapnot (op, opint, op_const);
7043
7044 case ALPHA_BUILTIN_MINUB8:
7045 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7046 case ALPHA_BUILTIN_MINSB8:
7047 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7048 case ALPHA_BUILTIN_MINUW4:
7049 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7050 case ALPHA_BUILTIN_MINSW4:
7051 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7052 case ALPHA_BUILTIN_MAXUB8:
7053 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7054 case ALPHA_BUILTIN_MAXSB8:
7055 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7056 case ALPHA_BUILTIN_MAXUW4:
7057 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7058 case ALPHA_BUILTIN_MAXSW4:
7059 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7060
7061 case ALPHA_BUILTIN_PERR:
7062 return alpha_fold_builtin_perr (opint, op_const);
7063 case ALPHA_BUILTIN_PKLB:
7064 return alpha_fold_builtin_pklb (opint, op_const);
7065 case ALPHA_BUILTIN_PKWB:
7066 return alpha_fold_builtin_pkwb (opint, op_const);
7067 case ALPHA_BUILTIN_UNPKBL:
7068 return alpha_fold_builtin_unpkbl (opint, op_const);
7069 case ALPHA_BUILTIN_UNPKBW:
7070 return alpha_fold_builtin_unpkbw (opint, op_const);
7071
7072 case ALPHA_BUILTIN_CTTZ:
7073 return alpha_fold_builtin_cttz (opint, op_const);
7074 case ALPHA_BUILTIN_CTLZ:
7075 return alpha_fold_builtin_ctlz (opint, op_const);
7076 case ALPHA_BUILTIN_CTPOP:
7077 return alpha_fold_builtin_ctpop (opint, op_const);
7078
7079 case ALPHA_BUILTIN_AMASK:
7080 case ALPHA_BUILTIN_IMPLVER:
7081 case ALPHA_BUILTIN_RPCC:
7082 /* None of these are foldable at compile-time. */
7083 default:
7084 return NULL;
7085 }
7086 }
7087 \f
7088 /* This page contains routines that are used to determine what the function
7089 prologue and epilogue code will do and write them out. */
7090
7091 /* Compute the size of the save area in the stack. */
7092
7093 /* These variables are used for communication between the following functions.
7094 They indicate various things about the current function being compiled
7095 that are used to tell what kind of prologue, epilogue and procedure
7096 descriptor to generate. */
7097
7098 /* Nonzero if we need a stack procedure. */
7099 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7100 static enum alpha_procedure_types alpha_procedure_type;
7101
7102 /* Register number (either FP or SP) that is used to unwind the frame. */
7103 static int vms_unwind_regno;
7104
7105 /* Register number used to save FP. We need not have one for RA since
7106 we don't modify it for register procedures. This is only defined
7107 for register frame procedures. */
7108 static int vms_save_fp_regno;
7109
7110 /* Register number used to reference objects off our PV. */
7111 static int vms_base_regno;
7112
7113 /* Compute register masks for saved registers. */
7114
7115 static void
7116 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7117 {
7118 unsigned long imask = 0;
7119 unsigned long fmask = 0;
7120 unsigned int i;
7121
7122 /* When outputting a thunk, we don't have valid register life info,
7123 but assemble_start_function wants to output .frame and .mask
7124 directives. */
7125 if (cfun->is_thunk)
7126 {
7127 *imaskP = 0;
7128 *fmaskP = 0;
7129 return;
7130 }
7131
7132 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7133 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7134
7135 /* One for every register we have to save. */
7136 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7137 if (! fixed_regs[i] && ! call_used_regs[i]
7138 && df_regs_ever_live_p (i) && i != REG_RA)
7139 {
7140 if (i < 32)
7141 imask |= (1UL << i);
7142 else
7143 fmask |= (1UL << (i - 32));
7144 }
7145
7146 /* We need to restore these for the handler. */
7147 if (crtl->calls_eh_return)
7148 {
7149 for (i = 0; ; ++i)
7150 {
7151 unsigned regno = EH_RETURN_DATA_REGNO (i);
7152 if (regno == INVALID_REGNUM)
7153 break;
7154 imask |= 1UL << regno;
7155 }
7156 }
7157
7158 /* If any register spilled, then spill the return address also. */
7159 /* ??? This is required by the Digital stack unwind specification
7160 and isn't needed if we're doing Dwarf2 unwinding. */
7161 if (imask || fmask || alpha_ra_ever_killed ())
7162 imask |= (1UL << REG_RA);
7163
7164 *imaskP = imask;
7165 *fmaskP = fmask;
7166 }
7167
7168 int
7169 alpha_sa_size (void)
7170 {
7171 unsigned long mask[2];
7172 int sa_size = 0;
7173 int i, j;
7174
7175 alpha_sa_mask (&mask[0], &mask[1]);
7176
7177 for (j = 0; j < 2; ++j)
7178 for (i = 0; i < 32; ++i)
7179 if ((mask[j] >> i) & 1)
7180 sa_size++;
7181
7182 if (TARGET_ABI_OPEN_VMS)
7183 {
7184 /* Start with a stack procedure if we make any calls (REG_RA used), or
7185 need a frame pointer, with a register procedure if we otherwise need
7186 at least a slot, and with a null procedure in other cases. */
7187 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7188 alpha_procedure_type = PT_STACK;
7189 else if (get_frame_size() != 0)
7190 alpha_procedure_type = PT_REGISTER;
7191 else
7192 alpha_procedure_type = PT_NULL;
7193
7194 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7195 made the final decision on stack procedure vs register procedure. */
7196 if (alpha_procedure_type == PT_STACK)
7197 sa_size -= 2;
7198
7199 /* Decide whether to refer to objects off our PV via FP or PV.
7200 If we need FP for something else or if we receive a nonlocal
7201 goto (which expects PV to contain the value), we must use PV.
7202 Otherwise, start by assuming we can use FP. */
7203
7204 vms_base_regno
7205 = (frame_pointer_needed
7206 || cfun->has_nonlocal_label
7207 || alpha_procedure_type == PT_STACK
7208 || crtl->outgoing_args_size)
7209 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7210
7211 /* If we want to copy PV into FP, we need to find some register
7212 in which to save FP. */
7213
7214 vms_save_fp_regno = -1;
7215 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7216 for (i = 0; i < 32; i++)
7217 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7218 vms_save_fp_regno = i;
7219
7220 /* A VMS condition handler requires a stack procedure in our
7221 implementation. (not required by the calling standard). */
7222 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7223 || cfun->machine->uses_condition_handler)
7224 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7225 else if (alpha_procedure_type == PT_NULL)
7226 vms_base_regno = REG_PV;
7227
7228 /* Stack unwinding should be done via FP unless we use it for PV. */
7229 vms_unwind_regno = (vms_base_regno == REG_PV
7230 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7231
7232 /* If this is a stack procedure, allow space for saving FP, RA and
7233 a condition handler slot if needed. */
7234 if (alpha_procedure_type == PT_STACK)
7235 sa_size += 2 + cfun->machine->uses_condition_handler;
7236 }
7237 else
7238 {
7239 /* Our size must be even (multiple of 16 bytes). */
7240 if (sa_size & 1)
7241 sa_size++;
7242 }
7243
7244 return sa_size * 8;
7245 }
7246
7247 /* Define the offset between two registers, one to be eliminated,
7248 and the other its replacement, at the start of a routine. */
7249
7250 HOST_WIDE_INT
7251 alpha_initial_elimination_offset (unsigned int from,
7252 unsigned int to ATTRIBUTE_UNUSED)
7253 {
7254 HOST_WIDE_INT ret;
7255
7256 ret = alpha_sa_size ();
7257 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7258
7259 switch (from)
7260 {
7261 case FRAME_POINTER_REGNUM:
7262 break;
7263
7264 case ARG_POINTER_REGNUM:
7265 ret += (ALPHA_ROUND (get_frame_size ()
7266 + crtl->args.pretend_args_size)
7267 - crtl->args.pretend_args_size);
7268 break;
7269
7270 default:
7271 gcc_unreachable ();
7272 }
7273
7274 return ret;
7275 }
7276
7277 #if TARGET_ABI_OPEN_VMS
7278
7279 /* Worker function for TARGET_CAN_ELIMINATE. */
7280
7281 static bool
7282 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7283 {
7284 /* We need the alpha_procedure_type to decide. Evaluate it now. */
7285 alpha_sa_size ();
7286
7287 switch (alpha_procedure_type)
7288 {
7289 case PT_NULL:
7290 /* NULL procedures have no frame of their own and we only
7291 know how to resolve from the current stack pointer. */
7292 return to == STACK_POINTER_REGNUM;
7293
7294 case PT_REGISTER:
7295 case PT_STACK:
7296 /* We always eliminate except to the stack pointer if there is no
7297 usable frame pointer at hand. */
7298 return (to != STACK_POINTER_REGNUM
7299 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7300 }
7301
7302 gcc_unreachable ();
7303 }
7304
7305 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7306 designates the same location as FROM. */
7307
7308 HOST_WIDE_INT
7309 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7310 {
7311 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7312 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7313 on the proper computations and will need the register save area size
7314 in most cases. */
7315
7316 HOST_WIDE_INT sa_size = alpha_sa_size ();
7317
7318 /* PT_NULL procedures have no frame of their own and we only allow
7319 elimination to the stack pointer. This is the argument pointer and we
7320 resolve the soft frame pointer to that as well. */
7321
7322 if (alpha_procedure_type == PT_NULL)
7323 return 0;
7324
7325 /* For a PT_STACK procedure the frame layout looks as follows
7326
7327 -----> decreasing addresses
7328
7329 < size rounded up to 16 | likewise >
7330 --------------#------------------------------+++--------------+++-------#
7331 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7332 --------------#---------------------------------------------------------#
7333 ^ ^ ^ ^
7334 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7335
7336
7337 PT_REGISTER procedures are similar in that they may have a frame of their
7338 own. They have no regs-sa/pv/outgoing-args area.
7339
7340 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7341 to STACK_PTR if need be. */
7342
7343 {
7344 HOST_WIDE_INT offset;
7345 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7346
7347 switch (from)
7348 {
7349 case FRAME_POINTER_REGNUM:
7350 offset = ALPHA_ROUND (sa_size + pv_save_size);
7351 break;
7352 case ARG_POINTER_REGNUM:
7353 offset = (ALPHA_ROUND (sa_size + pv_save_size
7354 + get_frame_size ()
7355 + crtl->args.pretend_args_size)
7356 - crtl->args.pretend_args_size);
7357 break;
7358 default:
7359 gcc_unreachable ();
7360 }
7361
7362 if (to == STACK_POINTER_REGNUM)
7363 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7364
7365 return offset;
7366 }
7367 }
7368
7369 #define COMMON_OBJECT "common_object"
7370
7371 static tree
7372 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7373 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7374 bool *no_add_attrs ATTRIBUTE_UNUSED)
7375 {
7376 tree decl = *node;
7377 gcc_assert (DECL_P (decl));
7378
7379 DECL_COMMON (decl) = 1;
7380 return NULL_TREE;
7381 }
7382
7383 static const struct attribute_spec vms_attribute_table[] =
7384 {
7385 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7386 affects_type_identity } */
7387 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false },
7388 { NULL, 0, 0, false, false, false, NULL, false }
7389 };
7390
7391 void
7392 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7393 unsigned HOST_WIDE_INT size,
7394 unsigned int align)
7395 {
7396 tree attr = DECL_ATTRIBUTES (decl);
7397 fprintf (file, "%s", COMMON_ASM_OP);
7398 assemble_name (file, name);
7399 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7400 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7401 fprintf (file, ",%u", align / BITS_PER_UNIT);
7402 if (attr)
7403 {
7404 attr = lookup_attribute (COMMON_OBJECT, attr);
7405 if (attr)
7406 fprintf (file, ",%s",
7407 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7408 }
7409 fputc ('\n', file);
7410 }
7411
7412 #undef COMMON_OBJECT
7413
7414 #endif
7415
7416 static int
7417 find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
7418 {
7419 return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
7420 }
7421
7422 int
7423 alpha_find_lo_sum_using_gp (rtx insn)
7424 {
7425 return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
7426 }
7427
7428 static int
7429 alpha_does_function_need_gp (void)
7430 {
7431 rtx insn;
7432
7433 /* The GP being variable is an OSF abi thing. */
7434 if (! TARGET_ABI_OSF)
7435 return 0;
7436
7437 /* We need the gp to load the address of __mcount. */
7438 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7439 return 1;
7440
7441 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7442 if (cfun->is_thunk)
7443 return 1;
7444
7445 /* The nonlocal receiver pattern assumes that the gp is valid for
7446 the nested function. Reasonable because it's almost always set
7447 correctly already. For the cases where that's wrong, make sure
7448 the nested function loads its gp on entry. */
7449 if (crtl->has_nonlocal_goto)
7450 return 1;
7451
7452 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7453 Even if we are a static function, we still need to do this in case
7454 our address is taken and passed to something like qsort. */
7455
7456 push_topmost_sequence ();
7457 insn = get_insns ();
7458 pop_topmost_sequence ();
7459
7460 for (; insn; insn = NEXT_INSN (insn))
7461 if (NONDEBUG_INSN_P (insn)
7462 && GET_CODE (PATTERN (insn)) != USE
7463 && GET_CODE (PATTERN (insn)) != CLOBBER
7464 && get_attr_usegp (insn))
7465 return 1;
7466
7467 return 0;
7468 }
7469
7470 \f
7471 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7472 sequences. */
7473
7474 static rtx
7475 set_frame_related_p (void)
7476 {
7477 rtx seq = get_insns ();
7478 rtx insn;
7479
7480 end_sequence ();
7481
7482 if (!seq)
7483 return NULL_RTX;
7484
7485 if (INSN_P (seq))
7486 {
7487 insn = seq;
7488 while (insn != NULL_RTX)
7489 {
7490 RTX_FRAME_RELATED_P (insn) = 1;
7491 insn = NEXT_INSN (insn);
7492 }
7493 seq = emit_insn (seq);
7494 }
7495 else
7496 {
7497 seq = emit_insn (seq);
7498 RTX_FRAME_RELATED_P (seq) = 1;
7499 }
7500 return seq;
7501 }
7502
7503 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7504
7505 /* Generates a store with the proper unwind info attached. VALUE is
7506 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7507 contains SP+FRAME_BIAS, and that is the unwind info that should be
7508 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7509 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7510
7511 static void
7512 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7513 HOST_WIDE_INT base_ofs, rtx frame_reg)
7514 {
7515 rtx addr, mem, insn;
7516
7517 addr = plus_constant (Pmode, base_reg, base_ofs);
7518 mem = gen_frame_mem (DImode, addr);
7519
7520 insn = emit_move_insn (mem, value);
7521 RTX_FRAME_RELATED_P (insn) = 1;
7522
7523 if (frame_bias || value != frame_reg)
7524 {
7525 if (frame_bias)
7526 {
7527 addr = plus_constant (Pmode, stack_pointer_rtx,
7528 frame_bias + base_ofs);
7529 mem = gen_rtx_MEM (DImode, addr);
7530 }
7531
7532 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7533 gen_rtx_SET (VOIDmode, mem, frame_reg));
7534 }
7535 }
7536
7537 static void
7538 emit_frame_store (unsigned int regno, rtx base_reg,
7539 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7540 {
7541 rtx reg = gen_rtx_REG (DImode, regno);
7542 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7543 }
7544
7545 /* Compute the frame size. SIZE is the size of the "naked" frame
7546 and SA_SIZE is the size of the register save area. */
7547
7548 static HOST_WIDE_INT
7549 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7550 {
7551 if (TARGET_ABI_OPEN_VMS)
7552 return ALPHA_ROUND (sa_size
7553 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7554 + size
7555 + crtl->args.pretend_args_size);
7556 else
7557 return ALPHA_ROUND (crtl->outgoing_args_size)
7558 + sa_size
7559 + ALPHA_ROUND (size
7560 + crtl->args.pretend_args_size);
7561 }
7562
7563 /* Write function prologue. */
7564
7565 /* On vms we have two kinds of functions:
7566
7567 - stack frame (PROC_STACK)
7568 these are 'normal' functions with local vars and which are
7569 calling other functions
7570 - register frame (PROC_REGISTER)
7571 keeps all data in registers, needs no stack
7572
7573 We must pass this to the assembler so it can generate the
7574 proper pdsc (procedure descriptor)
7575 This is done with the '.pdesc' command.
7576
7577 On not-vms, we don't really differentiate between the two, as we can
7578 simply allocate stack without saving registers. */
7579
7580 void
7581 alpha_expand_prologue (void)
7582 {
7583 /* Registers to save. */
7584 unsigned long imask = 0;
7585 unsigned long fmask = 0;
7586 /* Stack space needed for pushing registers clobbered by us. */
7587 HOST_WIDE_INT sa_size, sa_bias;
7588 /* Complete stack size needed. */
7589 HOST_WIDE_INT frame_size;
7590 /* Probed stack size; it additionally includes the size of
7591 the "reserve region" if any. */
7592 HOST_WIDE_INT probed_size;
7593 /* Offset from base reg to register save area. */
7594 HOST_WIDE_INT reg_offset;
7595 rtx sa_reg;
7596 int i;
7597
7598 sa_size = alpha_sa_size ();
7599 frame_size = compute_frame_size (get_frame_size (), sa_size);
7600
7601 if (flag_stack_usage_info)
7602 current_function_static_stack_size = frame_size;
7603
7604 if (TARGET_ABI_OPEN_VMS)
7605 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7606 else
7607 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7608
7609 alpha_sa_mask (&imask, &fmask);
7610
7611 /* Emit an insn to reload GP, if needed. */
7612 if (TARGET_ABI_OSF)
7613 {
7614 alpha_function_needs_gp = alpha_does_function_need_gp ();
7615 if (alpha_function_needs_gp)
7616 emit_insn (gen_prologue_ldgp ());
7617 }
7618
7619 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7620 the call to mcount ourselves, rather than having the linker do it
7621 magically in response to -pg. Since _mcount has special linkage,
7622 don't represent the call as a call. */
7623 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7624 emit_insn (gen_prologue_mcount ());
7625
7626 /* Adjust the stack by the frame size. If the frame size is > 4096
7627 bytes, we need to be sure we probe somewhere in the first and last
7628 4096 bytes (we can probably get away without the latter test) and
7629 every 8192 bytes in between. If the frame size is > 32768, we
7630 do this in a loop. Otherwise, we generate the explicit probe
7631 instructions.
7632
7633 Note that we are only allowed to adjust sp once in the prologue. */
7634
7635 probed_size = frame_size;
7636 if (flag_stack_check)
7637 probed_size += STACK_CHECK_PROTECT;
7638
7639 if (probed_size <= 32768)
7640 {
7641 if (probed_size > 4096)
7642 {
7643 int probed;
7644
7645 for (probed = 4096; probed < probed_size; probed += 8192)
7646 emit_insn (gen_probe_stack (GEN_INT (-probed)));
7647
7648 /* We only have to do this probe if we aren't saving registers or
7649 if we are probing beyond the frame because of -fstack-check. */
7650 if ((sa_size == 0 && probed_size > probed - 4096)
7651 || flag_stack_check)
7652 emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7653 }
7654
7655 if (frame_size != 0)
7656 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7657 GEN_INT (-frame_size))));
7658 }
7659 else
7660 {
7661 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7662 number of 8192 byte blocks to probe. We then probe each block
7663 in the loop and then set SP to the proper location. If the
7664 amount remaining is > 4096, we have to do one more probe if we
7665 are not saving any registers or if we are probing beyond the
7666 frame because of -fstack-check. */
7667
7668 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7669 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7670 rtx ptr = gen_rtx_REG (DImode, 22);
7671 rtx count = gen_rtx_REG (DImode, 23);
7672 rtx seq;
7673
7674 emit_move_insn (count, GEN_INT (blocks));
7675 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7676
7677 /* Because of the difficulty in emitting a new basic block this
7678 late in the compilation, generate the loop as a single insn. */
7679 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7680
7681 if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7682 {
7683 rtx last = gen_rtx_MEM (DImode,
7684 plus_constant (Pmode, ptr, -leftover));
7685 MEM_VOLATILE_P (last) = 1;
7686 emit_move_insn (last, const0_rtx);
7687 }
7688
7689 if (flag_stack_check)
7690 {
7691 /* If -fstack-check is specified we have to load the entire
7692 constant into a register and subtract from the sp in one go,
7693 because the probed stack size is not equal to the frame size. */
7694 HOST_WIDE_INT lo, hi;
7695 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7696 hi = frame_size - lo;
7697
7698 emit_move_insn (ptr, GEN_INT (hi));
7699 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7700 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7701 ptr));
7702 }
7703 else
7704 {
7705 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7706 GEN_INT (-leftover)));
7707 }
7708
7709 /* This alternative is special, because the DWARF code cannot
7710 possibly intuit through the loop above. So we invent this
7711 note it looks at instead. */
7712 RTX_FRAME_RELATED_P (seq) = 1;
7713 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7714 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7715 plus_constant (Pmode, stack_pointer_rtx,
7716 -frame_size)));
7717 }
7718
7719 /* Cope with very large offsets to the register save area. */
7720 sa_bias = 0;
7721 sa_reg = stack_pointer_rtx;
7722 if (reg_offset + sa_size > 0x8000)
7723 {
7724 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7725 rtx sa_bias_rtx;
7726
7727 if (low + sa_size <= 0x8000)
7728 sa_bias = reg_offset - low, reg_offset = low;
7729 else
7730 sa_bias = reg_offset, reg_offset = 0;
7731
7732 sa_reg = gen_rtx_REG (DImode, 24);
7733 sa_bias_rtx = GEN_INT (sa_bias);
7734
7735 if (add_operand (sa_bias_rtx, DImode))
7736 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7737 else
7738 {
7739 emit_move_insn (sa_reg, sa_bias_rtx);
7740 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7741 }
7742 }
7743
7744 /* Save regs in stack order. Beginning with VMS PV. */
7745 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7746 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7747
7748 /* Save register RA next. */
7749 if (imask & (1UL << REG_RA))
7750 {
7751 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7752 imask &= ~(1UL << REG_RA);
7753 reg_offset += 8;
7754 }
7755
7756 /* Now save any other registers required to be saved. */
7757 for (i = 0; i < 31; i++)
7758 if (imask & (1UL << i))
7759 {
7760 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7761 reg_offset += 8;
7762 }
7763
7764 for (i = 0; i < 31; i++)
7765 if (fmask & (1UL << i))
7766 {
7767 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7768 reg_offset += 8;
7769 }
7770
7771 if (TARGET_ABI_OPEN_VMS)
7772 {
7773 /* Register frame procedures save the fp. */
7774 if (alpha_procedure_type == PT_REGISTER)
7775 {
7776 rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7777 hard_frame_pointer_rtx);
7778 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7779 RTX_FRAME_RELATED_P (insn) = 1;
7780 }
7781
7782 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7783 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7784 gen_rtx_REG (DImode, REG_PV)));
7785
7786 if (alpha_procedure_type != PT_NULL
7787 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7788 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7789
7790 /* If we have to allocate space for outgoing args, do it now. */
7791 if (crtl->outgoing_args_size != 0)
7792 {
7793 rtx seq
7794 = emit_move_insn (stack_pointer_rtx,
7795 plus_constant
7796 (Pmode, hard_frame_pointer_rtx,
7797 - (ALPHA_ROUND
7798 (crtl->outgoing_args_size))));
7799
7800 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7801 if ! frame_pointer_needed. Setting the bit will change the CFA
7802 computation rule to use sp again, which would be wrong if we had
7803 frame_pointer_needed, as this means sp might move unpredictably
7804 later on.
7805
7806 Also, note that
7807 frame_pointer_needed
7808 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7809 and
7810 crtl->outgoing_args_size != 0
7811 => alpha_procedure_type != PT_NULL,
7812
7813 so when we are not setting the bit here, we are guaranteed to
7814 have emitted an FRP frame pointer update just before. */
7815 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7816 }
7817 }
7818 else
7819 {
7820 /* If we need a frame pointer, set it from the stack pointer. */
7821 if (frame_pointer_needed)
7822 {
7823 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7824 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7825 else
7826 /* This must always be the last instruction in the
7827 prologue, thus we emit a special move + clobber. */
7828 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7829 stack_pointer_rtx, sa_reg)));
7830 }
7831 }
7832
7833 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7834 the prologue, for exception handling reasons, we cannot do this for
7835 any insn that might fault. We could prevent this for mems with a
7836 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7837 have to prevent all such scheduling with a blockage.
7838
7839 Linux, on the other hand, never bothered to implement OSF/1's
7840 exception handling, and so doesn't care about such things. Anyone
7841 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7842
7843 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7844 emit_insn (gen_blockage ());
7845 }
7846
7847 /* Count the number of .file directives, so that .loc is up to date. */
7848 int num_source_filenames = 0;
7849
7850 /* Output the textual info surrounding the prologue. */
7851
7852 void
7853 alpha_start_function (FILE *file, const char *fnname,
7854 tree decl ATTRIBUTE_UNUSED)
7855 {
7856 unsigned long imask = 0;
7857 unsigned long fmask = 0;
7858 /* Stack space needed for pushing registers clobbered by us. */
7859 HOST_WIDE_INT sa_size;
7860 /* Complete stack size needed. */
7861 unsigned HOST_WIDE_INT frame_size;
7862 /* The maximum debuggable frame size. */
7863 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
7864 /* Offset from base reg to register save area. */
7865 HOST_WIDE_INT reg_offset;
7866 char *entry_label = (char *) alloca (strlen (fnname) + 6);
7867 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
7868 int i;
7869
7870 #if TARGET_ABI_OPEN_VMS
7871 vms_start_function (fnname);
7872 #endif
7873
7874 alpha_fnname = fnname;
7875 sa_size = alpha_sa_size ();
7876 frame_size = compute_frame_size (get_frame_size (), sa_size);
7877
7878 if (TARGET_ABI_OPEN_VMS)
7879 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7880 else
7881 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7882
7883 alpha_sa_mask (&imask, &fmask);
7884
7885 /* Issue function start and label. */
7886 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
7887 {
7888 fputs ("\t.ent ", file);
7889 assemble_name (file, fnname);
7890 putc ('\n', file);
7891
7892 /* If the function needs GP, we'll write the "..ng" label there.
7893 Otherwise, do it here. */
7894 if (TARGET_ABI_OSF
7895 && ! alpha_function_needs_gp
7896 && ! cfun->is_thunk)
7897 {
7898 putc ('$', file);
7899 assemble_name (file, fnname);
7900 fputs ("..ng:\n", file);
7901 }
7902 }
7903 /* Nested functions on VMS that are potentially called via trampoline
7904 get a special transfer entry point that loads the called functions
7905 procedure descriptor and static chain. */
7906 if (TARGET_ABI_OPEN_VMS
7907 && !TREE_PUBLIC (decl)
7908 && DECL_CONTEXT (decl)
7909 && !TYPE_P (DECL_CONTEXT (decl))
7910 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
7911 {
7912 strcpy (tramp_label, fnname);
7913 strcat (tramp_label, "..tr");
7914 ASM_OUTPUT_LABEL (file, tramp_label);
7915 fprintf (file, "\tldq $1,24($27)\n");
7916 fprintf (file, "\tldq $27,16($27)\n");
7917 }
7918
7919 strcpy (entry_label, fnname);
7920 if (TARGET_ABI_OPEN_VMS)
7921 strcat (entry_label, "..en");
7922
7923 ASM_OUTPUT_LABEL (file, entry_label);
7924 inside_function = TRUE;
7925
7926 if (TARGET_ABI_OPEN_VMS)
7927 fprintf (file, "\t.base $%d\n", vms_base_regno);
7928
7929 if (TARGET_ABI_OSF
7930 && TARGET_IEEE_CONFORMANT
7931 && !flag_inhibit_size_directive)
7932 {
7933 /* Set flags in procedure descriptor to request IEEE-conformant
7934 math-library routines. The value we set it to is PDSC_EXC_IEEE
7935 (/usr/include/pdsc.h). */
7936 fputs ("\t.eflag 48\n", file);
7937 }
7938
7939 /* Set up offsets to alpha virtual arg/local debugging pointer. */
7940 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
7941 alpha_arg_offset = -frame_size + 48;
7942
7943 /* Describe our frame. If the frame size is larger than an integer,
7944 print it as zero to avoid an assembler error. We won't be
7945 properly describing such a frame, but that's the best we can do. */
7946 if (TARGET_ABI_OPEN_VMS)
7947 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
7948 HOST_WIDE_INT_PRINT_DEC "\n",
7949 vms_unwind_regno,
7950 frame_size >= (1UL << 31) ? 0 : frame_size,
7951 reg_offset);
7952 else if (!flag_inhibit_size_directive)
7953 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
7954 (frame_pointer_needed
7955 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
7956 frame_size >= max_frame_size ? 0 : frame_size,
7957 crtl->args.pretend_args_size);
7958
7959 /* Describe which registers were spilled. */
7960 if (TARGET_ABI_OPEN_VMS)
7961 {
7962 if (imask)
7963 /* ??? Does VMS care if mask contains ra? The old code didn't
7964 set it, so I don't here. */
7965 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
7966 if (fmask)
7967 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
7968 if (alpha_procedure_type == PT_REGISTER)
7969 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
7970 }
7971 else if (!flag_inhibit_size_directive)
7972 {
7973 if (imask)
7974 {
7975 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
7976 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
7977
7978 for (i = 0; i < 32; ++i)
7979 if (imask & (1UL << i))
7980 reg_offset += 8;
7981 }
7982
7983 if (fmask)
7984 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
7985 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
7986 }
7987
7988 #if TARGET_ABI_OPEN_VMS
7989 /* If a user condition handler has been installed at some point, emit
7990 the procedure descriptor bits to point the Condition Handling Facility
7991 at the indirection wrapper, and state the fp offset at which the user
7992 handler may be found. */
7993 if (cfun->machine->uses_condition_handler)
7994 {
7995 fprintf (file, "\t.handler __gcc_shell_handler\n");
7996 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
7997 }
7998
7999 #ifdef TARGET_VMS_CRASH_DEBUG
8000 /* Support of minimal traceback info. */
8001 switch_to_section (readonly_data_section);
8002 fprintf (file, "\t.align 3\n");
8003 assemble_name (file, fnname); fputs ("..na:\n", file);
8004 fputs ("\t.ascii \"", file);
8005 assemble_name (file, fnname);
8006 fputs ("\\0\"\n", file);
8007 switch_to_section (text_section);
8008 #endif
8009 #endif /* TARGET_ABI_OPEN_VMS */
8010 }
8011
8012 /* Emit the .prologue note at the scheduled end of the prologue. */
8013
8014 static void
8015 alpha_output_function_end_prologue (FILE *file)
8016 {
8017 if (TARGET_ABI_OPEN_VMS)
8018 fputs ("\t.prologue\n", file);
8019 else if (!flag_inhibit_size_directive)
8020 fprintf (file, "\t.prologue %d\n",
8021 alpha_function_needs_gp || cfun->is_thunk);
8022 }
8023
8024 /* Write function epilogue. */
8025
8026 void
8027 alpha_expand_epilogue (void)
8028 {
8029 /* Registers to save. */
8030 unsigned long imask = 0;
8031 unsigned long fmask = 0;
8032 /* Stack space needed for pushing registers clobbered by us. */
8033 HOST_WIDE_INT sa_size;
8034 /* Complete stack size needed. */
8035 HOST_WIDE_INT frame_size;
8036 /* Offset from base reg to register save area. */
8037 HOST_WIDE_INT reg_offset;
8038 int fp_is_frame_pointer, fp_offset;
8039 rtx sa_reg, sa_reg_exp = NULL;
8040 rtx sp_adj1, sp_adj2, mem, reg, insn;
8041 rtx eh_ofs;
8042 rtx cfa_restores = NULL_RTX;
8043 int i;
8044
8045 sa_size = alpha_sa_size ();
8046 frame_size = compute_frame_size (get_frame_size (), sa_size);
8047
8048 if (TARGET_ABI_OPEN_VMS)
8049 {
8050 if (alpha_procedure_type == PT_STACK)
8051 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8052 else
8053 reg_offset = 0;
8054 }
8055 else
8056 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8057
8058 alpha_sa_mask (&imask, &fmask);
8059
8060 fp_is_frame_pointer
8061 = (TARGET_ABI_OPEN_VMS
8062 ? alpha_procedure_type == PT_STACK
8063 : frame_pointer_needed);
8064 fp_offset = 0;
8065 sa_reg = stack_pointer_rtx;
8066
8067 if (crtl->calls_eh_return)
8068 eh_ofs = EH_RETURN_STACKADJ_RTX;
8069 else
8070 eh_ofs = NULL_RTX;
8071
8072 if (sa_size)
8073 {
8074 /* If we have a frame pointer, restore SP from it. */
8075 if (TARGET_ABI_OPEN_VMS
8076 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8077 : frame_pointer_needed)
8078 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8079
8080 /* Cope with very large offsets to the register save area. */
8081 if (reg_offset + sa_size > 0x8000)
8082 {
8083 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8084 HOST_WIDE_INT bias;
8085
8086 if (low + sa_size <= 0x8000)
8087 bias = reg_offset - low, reg_offset = low;
8088 else
8089 bias = reg_offset, reg_offset = 0;
8090
8091 sa_reg = gen_rtx_REG (DImode, 22);
8092 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8093
8094 emit_move_insn (sa_reg, sa_reg_exp);
8095 }
8096
8097 /* Restore registers in order, excepting a true frame pointer. */
8098
8099 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8100 reg = gen_rtx_REG (DImode, REG_RA);
8101 emit_move_insn (reg, mem);
8102 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8103
8104 reg_offset += 8;
8105 imask &= ~(1UL << REG_RA);
8106
8107 for (i = 0; i < 31; ++i)
8108 if (imask & (1UL << i))
8109 {
8110 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8111 fp_offset = reg_offset;
8112 else
8113 {
8114 mem = gen_frame_mem (DImode,
8115 plus_constant (Pmode, sa_reg,
8116 reg_offset));
8117 reg = gen_rtx_REG (DImode, i);
8118 emit_move_insn (reg, mem);
8119 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8120 cfa_restores);
8121 }
8122 reg_offset += 8;
8123 }
8124
8125 for (i = 0; i < 31; ++i)
8126 if (fmask & (1UL << i))
8127 {
8128 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8129 reg_offset));
8130 reg = gen_rtx_REG (DFmode, i+32);
8131 emit_move_insn (reg, mem);
8132 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8133 reg_offset += 8;
8134 }
8135 }
8136
8137 if (frame_size || eh_ofs)
8138 {
8139 sp_adj1 = stack_pointer_rtx;
8140
8141 if (eh_ofs)
8142 {
8143 sp_adj1 = gen_rtx_REG (DImode, 23);
8144 emit_move_insn (sp_adj1,
8145 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8146 }
8147
8148 /* If the stack size is large, begin computation into a temporary
8149 register so as not to interfere with a potential fp restore,
8150 which must be consecutive with an SP restore. */
8151 if (frame_size < 32768 && !cfun->calls_alloca)
8152 sp_adj2 = GEN_INT (frame_size);
8153 else if (frame_size < 0x40007fffL)
8154 {
8155 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8156
8157 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8158 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8159 sp_adj1 = sa_reg;
8160 else
8161 {
8162 sp_adj1 = gen_rtx_REG (DImode, 23);
8163 emit_move_insn (sp_adj1, sp_adj2);
8164 }
8165 sp_adj2 = GEN_INT (low);
8166 }
8167 else
8168 {
8169 rtx tmp = gen_rtx_REG (DImode, 23);
8170 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8171 if (!sp_adj2)
8172 {
8173 /* We can't drop new things to memory this late, afaik,
8174 so build it up by pieces. */
8175 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8176 -(frame_size < 0));
8177 gcc_assert (sp_adj2);
8178 }
8179 }
8180
8181 /* From now on, things must be in order. So emit blockages. */
8182
8183 /* Restore the frame pointer. */
8184 if (fp_is_frame_pointer)
8185 {
8186 emit_insn (gen_blockage ());
8187 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8188 fp_offset));
8189 emit_move_insn (hard_frame_pointer_rtx, mem);
8190 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8191 hard_frame_pointer_rtx, cfa_restores);
8192 }
8193 else if (TARGET_ABI_OPEN_VMS)
8194 {
8195 emit_insn (gen_blockage ());
8196 emit_move_insn (hard_frame_pointer_rtx,
8197 gen_rtx_REG (DImode, vms_save_fp_regno));
8198 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8199 hard_frame_pointer_rtx, cfa_restores);
8200 }
8201
8202 /* Restore the stack pointer. */
8203 emit_insn (gen_blockage ());
8204 if (sp_adj2 == const0_rtx)
8205 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8206 else
8207 insn = emit_move_insn (stack_pointer_rtx,
8208 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8209 REG_NOTES (insn) = cfa_restores;
8210 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8211 RTX_FRAME_RELATED_P (insn) = 1;
8212 }
8213 else
8214 {
8215 gcc_assert (cfa_restores == NULL);
8216
8217 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8218 {
8219 emit_insn (gen_blockage ());
8220 insn = emit_move_insn (hard_frame_pointer_rtx,
8221 gen_rtx_REG (DImode, vms_save_fp_regno));
8222 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8223 RTX_FRAME_RELATED_P (insn) = 1;
8224 }
8225 }
8226 }
8227 \f
8228 /* Output the rest of the textual info surrounding the epilogue. */
8229
8230 void
8231 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8232 {
8233 rtx insn;
8234
8235 /* We output a nop after noreturn calls at the very end of the function to
8236 ensure that the return address always remains in the caller's code range,
8237 as not doing so might confuse unwinding engines. */
8238 insn = get_last_insn ();
8239 if (!INSN_P (insn))
8240 insn = prev_active_insn (insn);
8241 if (insn && CALL_P (insn))
8242 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8243
8244 #if TARGET_ABI_OPEN_VMS
8245 /* Write the linkage entries. */
8246 alpha_write_linkage (file, fnname);
8247 #endif
8248
8249 /* End the function. */
8250 if (TARGET_ABI_OPEN_VMS
8251 || !flag_inhibit_size_directive)
8252 {
8253 fputs ("\t.end ", file);
8254 assemble_name (file, fnname);
8255 putc ('\n', file);
8256 }
8257 inside_function = FALSE;
8258 }
8259
8260 #if TARGET_ABI_OSF
8261 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8262
8263 In order to avoid the hordes of differences between generated code
8264 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8265 lots of code loading up large constants, generate rtl and emit it
8266 instead of going straight to text.
8267
8268 Not sure why this idea hasn't been explored before... */
8269
8270 static void
8271 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8272 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8273 tree function)
8274 {
8275 HOST_WIDE_INT hi, lo;
8276 rtx this_rtx, insn, funexp;
8277
8278 /* We always require a valid GP. */
8279 emit_insn (gen_prologue_ldgp ());
8280 emit_note (NOTE_INSN_PROLOGUE_END);
8281
8282 /* Find the "this" pointer. If the function returns a structure,
8283 the structure return pointer is in $16. */
8284 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8285 this_rtx = gen_rtx_REG (Pmode, 17);
8286 else
8287 this_rtx = gen_rtx_REG (Pmode, 16);
8288
8289 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8290 entire constant for the add. */
8291 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8292 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8293 if (hi + lo == delta)
8294 {
8295 if (hi)
8296 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8297 if (lo)
8298 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8299 }
8300 else
8301 {
8302 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8303 delta, -(delta < 0));
8304 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8305 }
8306
8307 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8308 if (vcall_offset)
8309 {
8310 rtx tmp, tmp2;
8311
8312 tmp = gen_rtx_REG (Pmode, 0);
8313 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8314
8315 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8316 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8317 if (hi + lo == vcall_offset)
8318 {
8319 if (hi)
8320 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8321 }
8322 else
8323 {
8324 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8325 vcall_offset, -(vcall_offset < 0));
8326 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8327 lo = 0;
8328 }
8329 if (lo)
8330 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8331 else
8332 tmp2 = tmp;
8333 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8334
8335 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8336 }
8337
8338 /* Generate a tail call to the target function. */
8339 if (! TREE_USED (function))
8340 {
8341 assemble_external (function);
8342 TREE_USED (function) = 1;
8343 }
8344 funexp = XEXP (DECL_RTL (function), 0);
8345 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8346 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8347 SIBLING_CALL_P (insn) = 1;
8348
8349 /* Run just enough of rest_of_compilation to get the insns emitted.
8350 There's not really enough bulk here to make other passes such as
8351 instruction scheduling worth while. Note that use_thunk calls
8352 assemble_start_function and assemble_end_function. */
8353 insn = get_insns ();
8354 shorten_branches (insn);
8355 final_start_function (insn, file, 1);
8356 final (insn, file, 1);
8357 final_end_function ();
8358 }
8359 #endif /* TARGET_ABI_OSF */
8360 \f
8361 /* Debugging support. */
8362
8363 #include "gstab.h"
8364
8365 /* Name of the file containing the current function. */
8366
8367 static const char *current_function_file = "";
8368
8369 /* Offsets to alpha virtual arg/local debugging pointers. */
8370
8371 long alpha_arg_offset;
8372 long alpha_auto_offset;
8373 \f
8374 /* Emit a new filename to a stream. */
8375
8376 void
8377 alpha_output_filename (FILE *stream, const char *name)
8378 {
8379 static int first_time = TRUE;
8380
8381 if (first_time)
8382 {
8383 first_time = FALSE;
8384 ++num_source_filenames;
8385 current_function_file = name;
8386 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8387 output_quoted_string (stream, name);
8388 fprintf (stream, "\n");
8389 }
8390
8391 else if (name != current_function_file
8392 && strcmp (name, current_function_file) != 0)
8393 {
8394 ++num_source_filenames;
8395 current_function_file = name;
8396 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8397
8398 output_quoted_string (stream, name);
8399 fprintf (stream, "\n");
8400 }
8401 }
8402 \f
8403 /* Structure to show the current status of registers and memory. */
8404
8405 struct shadow_summary
8406 {
8407 struct {
8408 unsigned int i : 31; /* Mask of int regs */
8409 unsigned int fp : 31; /* Mask of fp regs */
8410 unsigned int mem : 1; /* mem == imem | fpmem */
8411 } used, defd;
8412 };
8413
8414 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8415 to the summary structure. SET is nonzero if the insn is setting the
8416 object, otherwise zero. */
8417
8418 static void
8419 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8420 {
8421 const char *format_ptr;
8422 int i, j;
8423
8424 if (x == 0)
8425 return;
8426
8427 switch (GET_CODE (x))
8428 {
8429 /* ??? Note that this case would be incorrect if the Alpha had a
8430 ZERO_EXTRACT in SET_DEST. */
8431 case SET:
8432 summarize_insn (SET_SRC (x), sum, 0);
8433 summarize_insn (SET_DEST (x), sum, 1);
8434 break;
8435
8436 case CLOBBER:
8437 summarize_insn (XEXP (x, 0), sum, 1);
8438 break;
8439
8440 case USE:
8441 summarize_insn (XEXP (x, 0), sum, 0);
8442 break;
8443
8444 case ASM_OPERANDS:
8445 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8446 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8447 break;
8448
8449 case PARALLEL:
8450 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8451 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8452 break;
8453
8454 case SUBREG:
8455 summarize_insn (SUBREG_REG (x), sum, 0);
8456 break;
8457
8458 case REG:
8459 {
8460 int regno = REGNO (x);
8461 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8462
8463 if (regno == 31 || regno == 63)
8464 break;
8465
8466 if (set)
8467 {
8468 if (regno < 32)
8469 sum->defd.i |= mask;
8470 else
8471 sum->defd.fp |= mask;
8472 }
8473 else
8474 {
8475 if (regno < 32)
8476 sum->used.i |= mask;
8477 else
8478 sum->used.fp |= mask;
8479 }
8480 }
8481 break;
8482
8483 case MEM:
8484 if (set)
8485 sum->defd.mem = 1;
8486 else
8487 sum->used.mem = 1;
8488
8489 /* Find the regs used in memory address computation: */
8490 summarize_insn (XEXP (x, 0), sum, 0);
8491 break;
8492
8493 case CONST_INT: case CONST_DOUBLE:
8494 case SYMBOL_REF: case LABEL_REF: case CONST:
8495 case SCRATCH: case ASM_INPUT:
8496 break;
8497
8498 /* Handle common unary and binary ops for efficiency. */
8499 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8500 case MOD: case UDIV: case UMOD: case AND: case IOR:
8501 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8502 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8503 case NE: case EQ: case GE: case GT: case LE:
8504 case LT: case GEU: case GTU: case LEU: case LTU:
8505 summarize_insn (XEXP (x, 0), sum, 0);
8506 summarize_insn (XEXP (x, 1), sum, 0);
8507 break;
8508
8509 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8510 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8511 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8512 case SQRT: case FFS:
8513 summarize_insn (XEXP (x, 0), sum, 0);
8514 break;
8515
8516 default:
8517 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8518 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8519 switch (format_ptr[i])
8520 {
8521 case 'e':
8522 summarize_insn (XEXP (x, i), sum, 0);
8523 break;
8524
8525 case 'E':
8526 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8527 summarize_insn (XVECEXP (x, i, j), sum, 0);
8528 break;
8529
8530 case 'i':
8531 break;
8532
8533 default:
8534 gcc_unreachable ();
8535 }
8536 }
8537 }
8538
8539 /* Ensure a sufficient number of `trapb' insns are in the code when
8540 the user requests code with a trap precision of functions or
8541 instructions.
8542
8543 In naive mode, when the user requests a trap-precision of
8544 "instruction", a trapb is needed after every instruction that may
8545 generate a trap. This ensures that the code is resumption safe but
8546 it is also slow.
8547
8548 When optimizations are turned on, we delay issuing a trapb as long
8549 as possible. In this context, a trap shadow is the sequence of
8550 instructions that starts with a (potentially) trap generating
8551 instruction and extends to the next trapb or call_pal instruction
8552 (but GCC never generates call_pal by itself). We can delay (and
8553 therefore sometimes omit) a trapb subject to the following
8554 conditions:
8555
8556 (a) On entry to the trap shadow, if any Alpha register or memory
8557 location contains a value that is used as an operand value by some
8558 instruction in the trap shadow (live on entry), then no instruction
8559 in the trap shadow may modify the register or memory location.
8560
8561 (b) Within the trap shadow, the computation of the base register
8562 for a memory load or store instruction may not involve using the
8563 result of an instruction that might generate an UNPREDICTABLE
8564 result.
8565
8566 (c) Within the trap shadow, no register may be used more than once
8567 as a destination register. (This is to make life easier for the
8568 trap-handler.)
8569
8570 (d) The trap shadow may not include any branch instructions. */
8571
8572 static void
8573 alpha_handle_trap_shadows (void)
8574 {
8575 struct shadow_summary shadow;
8576 int trap_pending, exception_nesting;
8577 rtx i, n;
8578
8579 trap_pending = 0;
8580 exception_nesting = 0;
8581 shadow.used.i = 0;
8582 shadow.used.fp = 0;
8583 shadow.used.mem = 0;
8584 shadow.defd = shadow.used;
8585
8586 for (i = get_insns (); i ; i = NEXT_INSN (i))
8587 {
8588 if (NOTE_P (i))
8589 {
8590 switch (NOTE_KIND (i))
8591 {
8592 case NOTE_INSN_EH_REGION_BEG:
8593 exception_nesting++;
8594 if (trap_pending)
8595 goto close_shadow;
8596 break;
8597
8598 case NOTE_INSN_EH_REGION_END:
8599 exception_nesting--;
8600 if (trap_pending)
8601 goto close_shadow;
8602 break;
8603
8604 case NOTE_INSN_EPILOGUE_BEG:
8605 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8606 goto close_shadow;
8607 break;
8608 }
8609 }
8610 else if (trap_pending)
8611 {
8612 if (alpha_tp == ALPHA_TP_FUNC)
8613 {
8614 if (JUMP_P (i)
8615 && GET_CODE (PATTERN (i)) == RETURN)
8616 goto close_shadow;
8617 }
8618 else if (alpha_tp == ALPHA_TP_INSN)
8619 {
8620 if (optimize > 0)
8621 {
8622 struct shadow_summary sum;
8623
8624 sum.used.i = 0;
8625 sum.used.fp = 0;
8626 sum.used.mem = 0;
8627 sum.defd = sum.used;
8628
8629 switch (GET_CODE (i))
8630 {
8631 case INSN:
8632 /* Annoyingly, get_attr_trap will die on these. */
8633 if (GET_CODE (PATTERN (i)) == USE
8634 || GET_CODE (PATTERN (i)) == CLOBBER)
8635 break;
8636
8637 summarize_insn (PATTERN (i), &sum, 0);
8638
8639 if ((sum.defd.i & shadow.defd.i)
8640 || (sum.defd.fp & shadow.defd.fp))
8641 {
8642 /* (c) would be violated */
8643 goto close_shadow;
8644 }
8645
8646 /* Combine shadow with summary of current insn: */
8647 shadow.used.i |= sum.used.i;
8648 shadow.used.fp |= sum.used.fp;
8649 shadow.used.mem |= sum.used.mem;
8650 shadow.defd.i |= sum.defd.i;
8651 shadow.defd.fp |= sum.defd.fp;
8652 shadow.defd.mem |= sum.defd.mem;
8653
8654 if ((sum.defd.i & shadow.used.i)
8655 || (sum.defd.fp & shadow.used.fp)
8656 || (sum.defd.mem & shadow.used.mem))
8657 {
8658 /* (a) would be violated (also takes care of (b)) */
8659 gcc_assert (get_attr_trap (i) != TRAP_YES
8660 || (!(sum.defd.i & sum.used.i)
8661 && !(sum.defd.fp & sum.used.fp)));
8662
8663 goto close_shadow;
8664 }
8665 break;
8666
8667 case JUMP_INSN:
8668 case CALL_INSN:
8669 case CODE_LABEL:
8670 goto close_shadow;
8671
8672 default:
8673 gcc_unreachable ();
8674 }
8675 }
8676 else
8677 {
8678 close_shadow:
8679 n = emit_insn_before (gen_trapb (), i);
8680 PUT_MODE (n, TImode);
8681 PUT_MODE (i, TImode);
8682 trap_pending = 0;
8683 shadow.used.i = 0;
8684 shadow.used.fp = 0;
8685 shadow.used.mem = 0;
8686 shadow.defd = shadow.used;
8687 }
8688 }
8689 }
8690
8691 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8692 && NONJUMP_INSN_P (i)
8693 && GET_CODE (PATTERN (i)) != USE
8694 && GET_CODE (PATTERN (i)) != CLOBBER
8695 && get_attr_trap (i) == TRAP_YES)
8696 {
8697 if (optimize && !trap_pending)
8698 summarize_insn (PATTERN (i), &shadow, 0);
8699 trap_pending = 1;
8700 }
8701 }
8702 }
8703 \f
8704 /* Alpha can only issue instruction groups simultaneously if they are
8705 suitably aligned. This is very processor-specific. */
8706 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8707 that are marked "fake". These instructions do not exist on that target,
8708 but it is possible to see these insns with deranged combinations of
8709 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8710 choose a result at random. */
8711
8712 enum alphaev4_pipe {
8713 EV4_STOP = 0,
8714 EV4_IB0 = 1,
8715 EV4_IB1 = 2,
8716 EV4_IBX = 4
8717 };
8718
8719 enum alphaev5_pipe {
8720 EV5_STOP = 0,
8721 EV5_NONE = 1,
8722 EV5_E01 = 2,
8723 EV5_E0 = 4,
8724 EV5_E1 = 8,
8725 EV5_FAM = 16,
8726 EV5_FA = 32,
8727 EV5_FM = 64
8728 };
8729
8730 static enum alphaev4_pipe
8731 alphaev4_insn_pipe (rtx insn)
8732 {
8733 if (recog_memoized (insn) < 0)
8734 return EV4_STOP;
8735 if (get_attr_length (insn) != 4)
8736 return EV4_STOP;
8737
8738 switch (get_attr_type (insn))
8739 {
8740 case TYPE_ILD:
8741 case TYPE_LDSYM:
8742 case TYPE_FLD:
8743 case TYPE_LD_L:
8744 return EV4_IBX;
8745
8746 case TYPE_IADD:
8747 case TYPE_ILOG:
8748 case TYPE_ICMOV:
8749 case TYPE_ICMP:
8750 case TYPE_FST:
8751 case TYPE_SHIFT:
8752 case TYPE_IMUL:
8753 case TYPE_FBR:
8754 case TYPE_MVI: /* fake */
8755 return EV4_IB0;
8756
8757 case TYPE_IST:
8758 case TYPE_MISC:
8759 case TYPE_IBR:
8760 case TYPE_JSR:
8761 case TYPE_CALLPAL:
8762 case TYPE_FCPYS:
8763 case TYPE_FCMOV:
8764 case TYPE_FADD:
8765 case TYPE_FDIV:
8766 case TYPE_FMUL:
8767 case TYPE_ST_C:
8768 case TYPE_MB:
8769 case TYPE_FSQRT: /* fake */
8770 case TYPE_FTOI: /* fake */
8771 case TYPE_ITOF: /* fake */
8772 return EV4_IB1;
8773
8774 default:
8775 gcc_unreachable ();
8776 }
8777 }
8778
8779 static enum alphaev5_pipe
8780 alphaev5_insn_pipe (rtx insn)
8781 {
8782 if (recog_memoized (insn) < 0)
8783 return EV5_STOP;
8784 if (get_attr_length (insn) != 4)
8785 return EV5_STOP;
8786
8787 switch (get_attr_type (insn))
8788 {
8789 case TYPE_ILD:
8790 case TYPE_FLD:
8791 case TYPE_LDSYM:
8792 case TYPE_IADD:
8793 case TYPE_ILOG:
8794 case TYPE_ICMOV:
8795 case TYPE_ICMP:
8796 return EV5_E01;
8797
8798 case TYPE_IST:
8799 case TYPE_FST:
8800 case TYPE_SHIFT:
8801 case TYPE_IMUL:
8802 case TYPE_MISC:
8803 case TYPE_MVI:
8804 case TYPE_LD_L:
8805 case TYPE_ST_C:
8806 case TYPE_MB:
8807 case TYPE_FTOI: /* fake */
8808 case TYPE_ITOF: /* fake */
8809 return EV5_E0;
8810
8811 case TYPE_IBR:
8812 case TYPE_JSR:
8813 case TYPE_CALLPAL:
8814 return EV5_E1;
8815
8816 case TYPE_FCPYS:
8817 return EV5_FAM;
8818
8819 case TYPE_FBR:
8820 case TYPE_FCMOV:
8821 case TYPE_FADD:
8822 case TYPE_FDIV:
8823 case TYPE_FSQRT: /* fake */
8824 return EV5_FA;
8825
8826 case TYPE_FMUL:
8827 return EV5_FM;
8828
8829 default:
8830 gcc_unreachable ();
8831 }
8832 }
8833
8834 /* IN_USE is a mask of the slots currently filled within the insn group.
8835 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8836 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8837
8838 LEN is, of course, the length of the group in bytes. */
8839
8840 static rtx
8841 alphaev4_next_group (rtx insn, int *pin_use, int *plen)
8842 {
8843 int len, in_use;
8844
8845 len = in_use = 0;
8846
8847 if (! INSN_P (insn)
8848 || GET_CODE (PATTERN (insn)) == CLOBBER
8849 || GET_CODE (PATTERN (insn)) == USE)
8850 goto next_and_done;
8851
8852 while (1)
8853 {
8854 enum alphaev4_pipe pipe;
8855
8856 pipe = alphaev4_insn_pipe (insn);
8857 switch (pipe)
8858 {
8859 case EV4_STOP:
8860 /* Force complex instructions to start new groups. */
8861 if (in_use)
8862 goto done;
8863
8864 /* If this is a completely unrecognized insn, it's an asm.
8865 We don't know how long it is, so record length as -1 to
8866 signal a needed realignment. */
8867 if (recog_memoized (insn) < 0)
8868 len = -1;
8869 else
8870 len = get_attr_length (insn);
8871 goto next_and_done;
8872
8873 case EV4_IBX:
8874 if (in_use & EV4_IB0)
8875 {
8876 if (in_use & EV4_IB1)
8877 goto done;
8878 in_use |= EV4_IB1;
8879 }
8880 else
8881 in_use |= EV4_IB0 | EV4_IBX;
8882 break;
8883
8884 case EV4_IB0:
8885 if (in_use & EV4_IB0)
8886 {
8887 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8888 goto done;
8889 in_use |= EV4_IB1;
8890 }
8891 in_use |= EV4_IB0;
8892 break;
8893
8894 case EV4_IB1:
8895 if (in_use & EV4_IB1)
8896 goto done;
8897 in_use |= EV4_IB1;
8898 break;
8899
8900 default:
8901 gcc_unreachable ();
8902 }
8903 len += 4;
8904
8905 /* Haifa doesn't do well scheduling branches. */
8906 if (JUMP_P (insn))
8907 goto next_and_done;
8908
8909 next:
8910 insn = next_nonnote_insn (insn);
8911
8912 if (!insn || ! INSN_P (insn))
8913 goto done;
8914
8915 /* Let Haifa tell us where it thinks insn group boundaries are. */
8916 if (GET_MODE (insn) == TImode)
8917 goto done;
8918
8919 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
8920 goto next;
8921 }
8922
8923 next_and_done:
8924 insn = next_nonnote_insn (insn);
8925
8926 done:
8927 *plen = len;
8928 *pin_use = in_use;
8929 return insn;
8930 }
8931
8932 /* IN_USE is a mask of the slots currently filled within the insn group.
8933 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
8934 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
8935
8936 LEN is, of course, the length of the group in bytes. */
8937
8938 static rtx
8939 alphaev5_next_group (rtx insn, int *pin_use, int *plen)
8940 {
8941 int len, in_use;
8942
8943 len = in_use = 0;
8944
8945 if (! INSN_P (insn)
8946 || GET_CODE (PATTERN (insn)) == CLOBBER
8947 || GET_CODE (PATTERN (insn)) == USE)
8948 goto next_and_done;
8949
8950 while (1)
8951 {
8952 enum alphaev5_pipe pipe;
8953
8954 pipe = alphaev5_insn_pipe (insn);
8955 switch (pipe)
8956 {
8957 case EV5_STOP:
8958 /* Force complex instructions to start new groups. */
8959 if (in_use)
8960 goto done;
8961
8962 /* If this is a completely unrecognized insn, it's an asm.
8963 We don't know how long it is, so record length as -1 to
8964 signal a needed realignment. */
8965 if (recog_memoized (insn) < 0)
8966 len = -1;
8967 else
8968 len = get_attr_length (insn);
8969 goto next_and_done;
8970
8971 /* ??? Most of the places below, we would like to assert never
8972 happen, as it would indicate an error either in Haifa, or
8973 in the scheduling description. Unfortunately, Haifa never
8974 schedules the last instruction of the BB, so we don't have
8975 an accurate TI bit to go off. */
8976 case EV5_E01:
8977 if (in_use & EV5_E0)
8978 {
8979 if (in_use & EV5_E1)
8980 goto done;
8981 in_use |= EV5_E1;
8982 }
8983 else
8984 in_use |= EV5_E0 | EV5_E01;
8985 break;
8986
8987 case EV5_E0:
8988 if (in_use & EV5_E0)
8989 {
8990 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
8991 goto done;
8992 in_use |= EV5_E1;
8993 }
8994 in_use |= EV5_E0;
8995 break;
8996
8997 case EV5_E1:
8998 if (in_use & EV5_E1)
8999 goto done;
9000 in_use |= EV5_E1;
9001 break;
9002
9003 case EV5_FAM:
9004 if (in_use & EV5_FA)
9005 {
9006 if (in_use & EV5_FM)
9007 goto done;
9008 in_use |= EV5_FM;
9009 }
9010 else
9011 in_use |= EV5_FA | EV5_FAM;
9012 break;
9013
9014 case EV5_FA:
9015 if (in_use & EV5_FA)
9016 goto done;
9017 in_use |= EV5_FA;
9018 break;
9019
9020 case EV5_FM:
9021 if (in_use & EV5_FM)
9022 goto done;
9023 in_use |= EV5_FM;
9024 break;
9025
9026 case EV5_NONE:
9027 break;
9028
9029 default:
9030 gcc_unreachable ();
9031 }
9032 len += 4;
9033
9034 /* Haifa doesn't do well scheduling branches. */
9035 /* ??? If this is predicted not-taken, slotting continues, except
9036 that no more IBR, FBR, or JSR insns may be slotted. */
9037 if (JUMP_P (insn))
9038 goto next_and_done;
9039
9040 next:
9041 insn = next_nonnote_insn (insn);
9042
9043 if (!insn || ! INSN_P (insn))
9044 goto done;
9045
9046 /* Let Haifa tell us where it thinks insn group boundaries are. */
9047 if (GET_MODE (insn) == TImode)
9048 goto done;
9049
9050 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9051 goto next;
9052 }
9053
9054 next_and_done:
9055 insn = next_nonnote_insn (insn);
9056
9057 done:
9058 *plen = len;
9059 *pin_use = in_use;
9060 return insn;
9061 }
9062
9063 static rtx
9064 alphaev4_next_nop (int *pin_use)
9065 {
9066 int in_use = *pin_use;
9067 rtx nop;
9068
9069 if (!(in_use & EV4_IB0))
9070 {
9071 in_use |= EV4_IB0;
9072 nop = gen_nop ();
9073 }
9074 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9075 {
9076 in_use |= EV4_IB1;
9077 nop = gen_nop ();
9078 }
9079 else if (TARGET_FP && !(in_use & EV4_IB1))
9080 {
9081 in_use |= EV4_IB1;
9082 nop = gen_fnop ();
9083 }
9084 else
9085 nop = gen_unop ();
9086
9087 *pin_use = in_use;
9088 return nop;
9089 }
9090
9091 static rtx
9092 alphaev5_next_nop (int *pin_use)
9093 {
9094 int in_use = *pin_use;
9095 rtx nop;
9096
9097 if (!(in_use & EV5_E1))
9098 {
9099 in_use |= EV5_E1;
9100 nop = gen_nop ();
9101 }
9102 else if (TARGET_FP && !(in_use & EV5_FA))
9103 {
9104 in_use |= EV5_FA;
9105 nop = gen_fnop ();
9106 }
9107 else if (TARGET_FP && !(in_use & EV5_FM))
9108 {
9109 in_use |= EV5_FM;
9110 nop = gen_fnop ();
9111 }
9112 else
9113 nop = gen_unop ();
9114
9115 *pin_use = in_use;
9116 return nop;
9117 }
9118
9119 /* The instruction group alignment main loop. */
9120
9121 static void
9122 alpha_align_insns (unsigned int max_align,
9123 rtx (*next_group) (rtx, int *, int *),
9124 rtx (*next_nop) (int *))
9125 {
9126 /* ALIGN is the known alignment for the insn group. */
9127 unsigned int align;
9128 /* OFS is the offset of the current insn in the insn group. */
9129 int ofs;
9130 int prev_in_use, in_use, len, ldgp;
9131 rtx i, next;
9132
9133 /* Let shorten branches care for assigning alignments to code labels. */
9134 shorten_branches (get_insns ());
9135
9136 if (align_functions < 4)
9137 align = 4;
9138 else if ((unsigned int) align_functions < max_align)
9139 align = align_functions;
9140 else
9141 align = max_align;
9142
9143 ofs = prev_in_use = 0;
9144 i = get_insns ();
9145 if (NOTE_P (i))
9146 i = next_nonnote_insn (i);
9147
9148 ldgp = alpha_function_needs_gp ? 8 : 0;
9149
9150 while (i)
9151 {
9152 next = (*next_group) (i, &in_use, &len);
9153
9154 /* When we see a label, resync alignment etc. */
9155 if (LABEL_P (i))
9156 {
9157 unsigned int new_align = 1 << label_to_alignment (i);
9158
9159 if (new_align >= align)
9160 {
9161 align = new_align < max_align ? new_align : max_align;
9162 ofs = 0;
9163 }
9164
9165 else if (ofs & (new_align-1))
9166 ofs = (ofs | (new_align-1)) + 1;
9167 gcc_assert (!len);
9168 }
9169
9170 /* Handle complex instructions special. */
9171 else if (in_use == 0)
9172 {
9173 /* Asms will have length < 0. This is a signal that we have
9174 lost alignment knowledge. Assume, however, that the asm
9175 will not mis-align instructions. */
9176 if (len < 0)
9177 {
9178 ofs = 0;
9179 align = 4;
9180 len = 0;
9181 }
9182 }
9183
9184 /* If the known alignment is smaller than the recognized insn group,
9185 realign the output. */
9186 else if ((int) align < len)
9187 {
9188 unsigned int new_log_align = len > 8 ? 4 : 3;
9189 rtx prev, where;
9190
9191 where = prev = prev_nonnote_insn (i);
9192 if (!where || !LABEL_P (where))
9193 where = i;
9194
9195 /* Can't realign between a call and its gp reload. */
9196 if (! (TARGET_EXPLICIT_RELOCS
9197 && prev && CALL_P (prev)))
9198 {
9199 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9200 align = 1 << new_log_align;
9201 ofs = 0;
9202 }
9203 }
9204
9205 /* We may not insert padding inside the initial ldgp sequence. */
9206 else if (ldgp > 0)
9207 ldgp -= len;
9208
9209 /* If the group won't fit in the same INT16 as the previous,
9210 we need to add padding to keep the group together. Rather
9211 than simply leaving the insn filling to the assembler, we
9212 can make use of the knowledge of what sorts of instructions
9213 were issued in the previous group to make sure that all of
9214 the added nops are really free. */
9215 else if (ofs + len > (int) align)
9216 {
9217 int nop_count = (align - ofs) / 4;
9218 rtx where;
9219
9220 /* Insert nops before labels, branches, and calls to truly merge
9221 the execution of the nops with the previous instruction group. */
9222 where = prev_nonnote_insn (i);
9223 if (where)
9224 {
9225 if (LABEL_P (where))
9226 {
9227 rtx where2 = prev_nonnote_insn (where);
9228 if (where2 && JUMP_P (where2))
9229 where = where2;
9230 }
9231 else if (NONJUMP_INSN_P (where))
9232 where = i;
9233 }
9234 else
9235 where = i;
9236
9237 do
9238 emit_insn_before ((*next_nop)(&prev_in_use), where);
9239 while (--nop_count);
9240 ofs = 0;
9241 }
9242
9243 ofs = (ofs + len) & (align - 1);
9244 prev_in_use = in_use;
9245 i = next;
9246 }
9247 }
9248
9249 /* Insert an unop between sibcall or noreturn function call and GP load. */
9250
9251 static void
9252 alpha_pad_function_end (void)
9253 {
9254 rtx insn, next;
9255
9256 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9257 {
9258 if (!CALL_P (insn)
9259 || !(SIBLING_CALL_P (insn)
9260 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9261 continue;
9262
9263 /* Make sure we do not split a call and its corresponding
9264 CALL_ARG_LOCATION note. */
9265 next = NEXT_INSN (insn);
9266 if (next == NULL)
9267 continue;
9268 if (BARRIER_P (next))
9269 {
9270 next = NEXT_INSN (next);
9271 if (next == NULL)
9272 continue;
9273 }
9274 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9275 insn = next;
9276
9277 next = next_active_insn (insn);
9278 if (next)
9279 {
9280 rtx pat = PATTERN (next);
9281
9282 if (GET_CODE (pat) == SET
9283 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9284 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9285 emit_insn_after (gen_unop (), insn);
9286 }
9287 }
9288 }
9289 \f
9290 /* Machine dependent reorg pass. */
9291
9292 static void
9293 alpha_reorg (void)
9294 {
9295 /* Workaround for a linker error that triggers when an exception
9296 handler immediatelly follows a sibcall or a noreturn function.
9297
9298 In the sibcall case:
9299
9300 The instruction stream from an object file:
9301
9302 1d8: 00 00 fb 6b jmp (t12)
9303 1dc: 00 00 ba 27 ldah gp,0(ra)
9304 1e0: 00 00 bd 23 lda gp,0(gp)
9305 1e4: 00 00 7d a7 ldq t12,0(gp)
9306 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9307
9308 was converted in the final link pass to:
9309
9310 12003aa88: 67 fa ff c3 br 120039428 <...>
9311 12003aa8c: 00 00 fe 2f unop
9312 12003aa90: 00 00 fe 2f unop
9313 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9314 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9315
9316 And in the noreturn case:
9317
9318 The instruction stream from an object file:
9319
9320 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9321 58: 00 00 ba 27 ldah gp,0(ra)
9322 5c: 00 00 bd 23 lda gp,0(gp)
9323 60: 00 00 7d a7 ldq t12,0(gp)
9324 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9325
9326 was converted in the final link pass to:
9327
9328 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9329 fdb28: 00 00 fe 2f unop
9330 fdb2c: 00 00 fe 2f unop
9331 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9332 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9333
9334 GP load instructions were wrongly cleared by the linker relaxation
9335 pass. This workaround prevents removal of GP loads by inserting
9336 an unop instruction between a sibcall or noreturn function call and
9337 exception handler prologue. */
9338
9339 if (current_function_has_exception_handlers ())
9340 alpha_pad_function_end ();
9341
9342 if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
9343 alpha_handle_trap_shadows ();
9344
9345 /* Due to the number of extra trapb insns, don't bother fixing up
9346 alignment when trap precision is instruction. Moreover, we can
9347 only do our job when sched2 is run. */
9348 if (optimize && !optimize_size
9349 && alpha_tp != ALPHA_TP_INSN
9350 && flag_schedule_insns_after_reload)
9351 {
9352 if (alpha_tune == PROCESSOR_EV4)
9353 alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop);
9354 else if (alpha_tune == PROCESSOR_EV5)
9355 alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop);
9356 }
9357 }
9358 \f
9359 static void
9360 alpha_file_start (void)
9361 {
9362 default_file_start ();
9363
9364 fputs ("\t.set noreorder\n", asm_out_file);
9365 fputs ("\t.set volatile\n", asm_out_file);
9366 if (TARGET_ABI_OSF)
9367 fputs ("\t.set noat\n", asm_out_file);
9368 if (TARGET_EXPLICIT_RELOCS)
9369 fputs ("\t.set nomacro\n", asm_out_file);
9370 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9371 {
9372 const char *arch;
9373
9374 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9375 arch = "ev6";
9376 else if (TARGET_MAX)
9377 arch = "pca56";
9378 else if (TARGET_BWX)
9379 arch = "ev56";
9380 else if (alpha_cpu == PROCESSOR_EV5)
9381 arch = "ev5";
9382 else
9383 arch = "ev4";
9384
9385 fprintf (asm_out_file, "\t.arch %s\n", arch);
9386 }
9387 }
9388
9389 /* Since we don't have a .dynbss section, we should not allow global
9390 relocations in the .rodata section. */
9391
9392 static int
9393 alpha_elf_reloc_rw_mask (void)
9394 {
9395 return flag_pic ? 3 : 2;
9396 }
9397
9398 /* Return a section for X. The only special thing we do here is to
9399 honor small data. */
9400
9401 static section *
9402 alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
9403 unsigned HOST_WIDE_INT align)
9404 {
9405 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9406 /* ??? Consider using mergeable sdata sections. */
9407 return sdata_section;
9408 else
9409 return default_elf_select_rtx_section (mode, x, align);
9410 }
9411
9412 static unsigned int
9413 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9414 {
9415 unsigned int flags = 0;
9416
9417 if (strcmp (name, ".sdata") == 0
9418 || strncmp (name, ".sdata.", 7) == 0
9419 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9420 || strcmp (name, ".sbss") == 0
9421 || strncmp (name, ".sbss.", 6) == 0
9422 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9423 flags = SECTION_SMALL;
9424
9425 flags |= default_section_type_flags (decl, name, reloc);
9426 return flags;
9427 }
9428 \f
9429 /* Structure to collect function names for final output in link section. */
9430 /* Note that items marked with GTY can't be ifdef'ed out. */
9431
9432 enum reloc_kind
9433 {
9434 KIND_LINKAGE,
9435 KIND_CODEADDR
9436 };
9437
9438 struct GTY(()) alpha_links
9439 {
9440 rtx func;
9441 rtx linkage;
9442 enum reloc_kind rkind;
9443 };
9444
9445 #if TARGET_ABI_OPEN_VMS
9446
9447 /* Return the VMS argument type corresponding to MODE. */
9448
9449 enum avms_arg_type
9450 alpha_arg_type (enum machine_mode mode)
9451 {
9452 switch (mode)
9453 {
9454 case SFmode:
9455 return TARGET_FLOAT_VAX ? FF : FS;
9456 case DFmode:
9457 return TARGET_FLOAT_VAX ? FD : FT;
9458 default:
9459 return I64;
9460 }
9461 }
9462
9463 /* Return an rtx for an integer representing the VMS Argument Information
9464 register value. */
9465
9466 rtx
9467 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9468 {
9469 unsigned HOST_WIDE_INT regval = cum.num_args;
9470 int i;
9471
9472 for (i = 0; i < 6; i++)
9473 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9474
9475 return GEN_INT (regval);
9476 }
9477 \f
9478
9479 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9480 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9481 this is the reference to the linkage pointer value, 0 if this is the
9482 reference to the function entry value. RFLAG is 1 if this a reduced
9483 reference (code address only), 0 if this is a full reference. */
9484
9485 rtx
9486 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9487 {
9488 struct alpha_links *al = NULL;
9489 const char *name = XSTR (func, 0);
9490
9491 if (cfun->machine->links)
9492 {
9493 splay_tree_node lnode;
9494
9495 /* Is this name already defined? */
9496 lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name);
9497 if (lnode)
9498 al = (struct alpha_links *) lnode->value;
9499 }
9500 else
9501 cfun->machine->links = splay_tree_new_ggc
9502 ((splay_tree_compare_fn) strcmp,
9503 ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
9504 ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
9505
9506 if (al == NULL)
9507 {
9508 size_t buf_len;
9509 char *linksym;
9510 tree id;
9511
9512 if (name[0] == '*')
9513 name++;
9514
9515 /* Follow transparent alias, as this is used for CRTL translations. */
9516 id = maybe_get_identifier (name);
9517 if (id)
9518 {
9519 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9520 id = TREE_CHAIN (id);
9521 name = IDENTIFIER_POINTER (id);
9522 }
9523
9524 buf_len = strlen (name) + 8 + 9;
9525 linksym = (char *) alloca (buf_len);
9526 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9527
9528 al = ggc_alloc_alpha_links ();
9529 al->func = func;
9530 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9531
9532 splay_tree_insert (cfun->machine->links,
9533 (splay_tree_key) ggc_strdup (name),
9534 (splay_tree_value) al);
9535 }
9536
9537 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9538
9539 if (lflag)
9540 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9541 else
9542 return al->linkage;
9543 }
9544
9545 static int
9546 alpha_write_one_linkage (splay_tree_node node, void *data)
9547 {
9548 const char *const name = (const char *) node->key;
9549 struct alpha_links *link = (struct alpha_links *) node->value;
9550 FILE *stream = (FILE *) data;
9551
9552 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9553 if (link->rkind == KIND_CODEADDR)
9554 {
9555 /* External and used, request code address. */
9556 fprintf (stream, "\t.code_address ");
9557 }
9558 else
9559 {
9560 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9561 && SYMBOL_REF_LOCAL_P (link->func))
9562 {
9563 /* Locally defined, build linkage pair. */
9564 fprintf (stream, "\t.quad %s..en\n", name);
9565 fprintf (stream, "\t.quad ");
9566 }
9567 else
9568 {
9569 /* External, request linkage pair. */
9570 fprintf (stream, "\t.linkage ");
9571 }
9572 }
9573 assemble_name (stream, name);
9574 fputs ("\n", stream);
9575
9576 return 0;
9577 }
9578
9579 static void
9580 alpha_write_linkage (FILE *stream, const char *funname)
9581 {
9582 fprintf (stream, "\t.link\n");
9583 fprintf (stream, "\t.align 3\n");
9584 in_section = NULL;
9585
9586 #ifdef TARGET_VMS_CRASH_DEBUG
9587 fputs ("\t.name ", stream);
9588 assemble_name (stream, funname);
9589 fputs ("..na\n", stream);
9590 #endif
9591
9592 ASM_OUTPUT_LABEL (stream, funname);
9593 fprintf (stream, "\t.pdesc ");
9594 assemble_name (stream, funname);
9595 fprintf (stream, "..en,%s\n",
9596 alpha_procedure_type == PT_STACK ? "stack"
9597 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9598
9599 if (cfun->machine->links)
9600 {
9601 splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream);
9602 /* splay_tree_delete (func->links); */
9603 }
9604 }
9605
9606 /* Switch to an arbitrary section NAME with attributes as specified
9607 by FLAGS. ALIGN specifies any known alignment requirements for
9608 the section; 0 if the default should be used. */
9609
9610 static void
9611 vms_asm_named_section (const char *name, unsigned int flags,
9612 tree decl ATTRIBUTE_UNUSED)
9613 {
9614 fputc ('\n', asm_out_file);
9615 fprintf (asm_out_file, ".section\t%s", name);
9616
9617 if (flags & SECTION_DEBUG)
9618 fprintf (asm_out_file, ",NOWRT");
9619
9620 fputc ('\n', asm_out_file);
9621 }
9622
9623 /* Record an element in the table of global constructors. SYMBOL is
9624 a SYMBOL_REF of the function to be called; PRIORITY is a number
9625 between 0 and MAX_INIT_PRIORITY.
9626
9627 Differs from default_ctors_section_asm_out_constructor in that the
9628 width of the .ctors entry is always 64 bits, rather than the 32 bits
9629 used by a normal pointer. */
9630
9631 static void
9632 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9633 {
9634 switch_to_section (ctors_section);
9635 assemble_align (BITS_PER_WORD);
9636 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9637 }
9638
9639 static void
9640 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9641 {
9642 switch_to_section (dtors_section);
9643 assemble_align (BITS_PER_WORD);
9644 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9645 }
9646 #else
9647 rtx
9648 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9649 bool lflag ATTRIBUTE_UNUSED,
9650 bool rflag ATTRIBUTE_UNUSED)
9651 {
9652 return NULL_RTX;
9653 }
9654
9655 #endif /* TARGET_ABI_OPEN_VMS */
9656 \f
9657 static void
9658 alpha_init_libfuncs (void)
9659 {
9660 if (TARGET_ABI_OPEN_VMS)
9661 {
9662 /* Use the VMS runtime library functions for division and
9663 remainder. */
9664 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9665 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9666 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9667 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9668 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9669 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9670 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9671 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9672 abort_libfunc = init_one_libfunc ("decc$abort");
9673 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
9674 #ifdef MEM_LIBFUNCS_INIT
9675 MEM_LIBFUNCS_INIT;
9676 #endif
9677 }
9678 }
9679
9680 /* On the Alpha, we use this to disable the floating-point registers
9681 when they don't exist. */
9682
9683 static void
9684 alpha_conditional_register_usage (void)
9685 {
9686 int i;
9687 if (! TARGET_FPREGS)
9688 for (i = 32; i < 63; i++)
9689 fixed_regs[i] = call_used_regs[i] = 1;
9690 }
9691
9692 /* Canonicalize a comparison from one we don't have to one we do have. */
9693
9694 static void
9695 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9696 bool op0_preserve_value)
9697 {
9698 if (!op0_preserve_value
9699 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9700 && (REG_P (*op1) || *op1 == const0_rtx))
9701 {
9702 rtx tem = *op0;
9703 *op0 = *op1;
9704 *op1 = tem;
9705 *code = (int)swap_condition ((enum rtx_code)*code);
9706 }
9707
9708 if ((*code == LT || *code == LTU)
9709 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9710 {
9711 *code = *code == LT ? LE : LEU;
9712 *op1 = GEN_INT (255);
9713 }
9714 }
9715 \f
9716 /* Initialize the GCC target structure. */
9717 #if TARGET_ABI_OPEN_VMS
9718 # undef TARGET_ATTRIBUTE_TABLE
9719 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9720 # undef TARGET_CAN_ELIMINATE
9721 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9722 #endif
9723
9724 #undef TARGET_IN_SMALL_DATA_P
9725 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9726
9727 #undef TARGET_ASM_ALIGNED_HI_OP
9728 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9729 #undef TARGET_ASM_ALIGNED_DI_OP
9730 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9731
9732 /* Default unaligned ops are provided for ELF systems. To get unaligned
9733 data for non-ELF systems, we have to turn off auto alignment. */
9734 #if TARGET_ABI_OPEN_VMS
9735 #undef TARGET_ASM_UNALIGNED_HI_OP
9736 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9737 #undef TARGET_ASM_UNALIGNED_SI_OP
9738 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9739 #undef TARGET_ASM_UNALIGNED_DI_OP
9740 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9741 #endif
9742
9743 #undef TARGET_ASM_RELOC_RW_MASK
9744 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9745 #undef TARGET_ASM_SELECT_RTX_SECTION
9746 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9747 #undef TARGET_SECTION_TYPE_FLAGS
9748 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9749
9750 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9751 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9752
9753 #undef TARGET_INIT_LIBFUNCS
9754 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9755
9756 #undef TARGET_LEGITIMIZE_ADDRESS
9757 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9758 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
9759 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9760
9761 #undef TARGET_ASM_FILE_START
9762 #define TARGET_ASM_FILE_START alpha_file_start
9763
9764 #undef TARGET_SCHED_ADJUST_COST
9765 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9766 #undef TARGET_SCHED_ISSUE_RATE
9767 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9768 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9769 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9770 alpha_multipass_dfa_lookahead
9771
9772 #undef TARGET_HAVE_TLS
9773 #define TARGET_HAVE_TLS HAVE_AS_TLS
9774
9775 #undef TARGET_BUILTIN_DECL
9776 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9777 #undef TARGET_INIT_BUILTINS
9778 #define TARGET_INIT_BUILTINS alpha_init_builtins
9779 #undef TARGET_EXPAND_BUILTIN
9780 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9781 #undef TARGET_FOLD_BUILTIN
9782 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9783
9784 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9785 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9786 #undef TARGET_CANNOT_COPY_INSN_P
9787 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9788 #undef TARGET_LEGITIMATE_CONSTANT_P
9789 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9790 #undef TARGET_CANNOT_FORCE_CONST_MEM
9791 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9792
9793 #if TARGET_ABI_OSF
9794 #undef TARGET_ASM_OUTPUT_MI_THUNK
9795 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
9796 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9797 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
9798 #undef TARGET_STDARG_OPTIMIZE_HOOK
9799 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
9800 #endif
9801
9802 /* Use 16-bits anchor. */
9803 #undef TARGET_MIN_ANCHOR_OFFSET
9804 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
9805 #undef TARGET_MAX_ANCHOR_OFFSET
9806 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
9807 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9808 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
9809
9810 #undef TARGET_RTX_COSTS
9811 #define TARGET_RTX_COSTS alpha_rtx_costs
9812 #undef TARGET_ADDRESS_COST
9813 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
9814
9815 #undef TARGET_MACHINE_DEPENDENT_REORG
9816 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
9817
9818 #undef TARGET_PROMOTE_FUNCTION_MODE
9819 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
9820 #undef TARGET_PROMOTE_PROTOTYPES
9821 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
9822 #undef TARGET_RETURN_IN_MEMORY
9823 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
9824 #undef TARGET_PASS_BY_REFERENCE
9825 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
9826 #undef TARGET_SETUP_INCOMING_VARARGS
9827 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
9828 #undef TARGET_STRICT_ARGUMENT_NAMING
9829 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
9830 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
9831 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
9832 #undef TARGET_SPLIT_COMPLEX_ARG
9833 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
9834 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9835 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
9836 #undef TARGET_ARG_PARTIAL_BYTES
9837 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
9838 #undef TARGET_FUNCTION_ARG
9839 #define TARGET_FUNCTION_ARG alpha_function_arg
9840 #undef TARGET_FUNCTION_ARG_ADVANCE
9841 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
9842 #undef TARGET_TRAMPOLINE_INIT
9843 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
9844
9845 #undef TARGET_INSTANTIATE_DECLS
9846 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
9847
9848 #undef TARGET_SECONDARY_RELOAD
9849 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
9850
9851 #undef TARGET_SCALAR_MODE_SUPPORTED_P
9852 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
9853 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9854 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
9855
9856 #undef TARGET_BUILD_BUILTIN_VA_LIST
9857 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
9858
9859 #undef TARGET_EXPAND_BUILTIN_VA_START
9860 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
9861
9862 /* The Alpha architecture does not require sequential consistency. See
9863 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
9864 for an example of how it can be violated in practice. */
9865 #undef TARGET_RELAXED_ORDERING
9866 #define TARGET_RELAXED_ORDERING true
9867
9868 #undef TARGET_OPTION_OVERRIDE
9869 #define TARGET_OPTION_OVERRIDE alpha_option_override
9870
9871 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9872 #undef TARGET_MANGLE_TYPE
9873 #define TARGET_MANGLE_TYPE alpha_mangle_type
9874 #endif
9875
9876 #undef TARGET_LEGITIMATE_ADDRESS_P
9877 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
9878
9879 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9880 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
9881
9882 #undef TARGET_CANONICALIZE_COMPARISON
9883 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
9884
9885 struct gcc_target targetm = TARGET_INITIALIZER;
9886
9887 \f
9888 #include "gt-alpha.h"