]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/alpha/alpha.c
[62/77] Big machine_mode to scalar_int_mode replacement
[thirdparty/gcc.git] / gcc / config / alpha / alpha.c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "attribs.h"
31 #include "memmodel.h"
32 #include "gimple.h"
33 #include "df.h"
34 #include "predict.h"
35 #include "tm_p.h"
36 #include "ssa.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "varasm.h"
48 #include "output.h"
49 #include "insn-attr.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "reload.h"
53 #include "except.h"
54 #include "common/common-target.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "cfgrtl.h"
58 #include "tree-pass.h"
59 #include "context.h"
60 #include "gimple-iterator.h"
61 #include "gimplify.h"
62 #include "tree-stdarg.h"
63 #include "tm-constrs.h"
64 #include "libfuncs.h"
65 #include "params.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68
69 /* This file should be included last. */
70 #include "target-def.h"
71
72 /* Specify which cpu to schedule for. */
73 enum processor_type alpha_tune;
74
75 /* Which cpu we're generating code for. */
76 enum processor_type alpha_cpu;
77
78 static const char * const alpha_cpu_name[] =
79 {
80 "ev4", "ev5", "ev6"
81 };
82
83 /* Specify how accurate floating-point traps need to be. */
84
85 enum alpha_trap_precision alpha_tp;
86
87 /* Specify the floating-point rounding mode. */
88
89 enum alpha_fp_rounding_mode alpha_fprm;
90
91 /* Specify which things cause traps. */
92
93 enum alpha_fp_trap_mode alpha_fptm;
94
95 /* Nonzero if inside of a function, because the Alpha asm can't
96 handle .files inside of functions. */
97
98 static int inside_function = FALSE;
99
100 /* The number of cycles of latency we should assume on memory reads. */
101
102 static int alpha_memory_latency = 3;
103
104 /* Whether the function needs the GP. */
105
106 static int alpha_function_needs_gp;
107
108 /* The assembler name of the current function. */
109
110 static const char *alpha_fnname;
111
112 /* The next explicit relocation sequence number. */
113 extern GTY(()) int alpha_next_sequence_number;
114 int alpha_next_sequence_number = 1;
115
116 /* The literal and gpdisp sequence numbers for this insn, as printed
117 by %# and %* respectively. */
118 extern GTY(()) int alpha_this_literal_sequence_number;
119 extern GTY(()) int alpha_this_gpdisp_sequence_number;
120 int alpha_this_literal_sequence_number;
121 int alpha_this_gpdisp_sequence_number;
122
123 /* Costs of various operations on the different architectures. */
124
125 struct alpha_rtx_cost_data
126 {
127 unsigned char fp_add;
128 unsigned char fp_mult;
129 unsigned char fp_div_sf;
130 unsigned char fp_div_df;
131 unsigned char int_mult_si;
132 unsigned char int_mult_di;
133 unsigned char int_shift;
134 unsigned char int_cmov;
135 unsigned short int_div;
136 };
137
138 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
139 {
140 { /* EV4 */
141 COSTS_N_INSNS (6), /* fp_add */
142 COSTS_N_INSNS (6), /* fp_mult */
143 COSTS_N_INSNS (34), /* fp_div_sf */
144 COSTS_N_INSNS (63), /* fp_div_df */
145 COSTS_N_INSNS (23), /* int_mult_si */
146 COSTS_N_INSNS (23), /* int_mult_di */
147 COSTS_N_INSNS (2), /* int_shift */
148 COSTS_N_INSNS (2), /* int_cmov */
149 COSTS_N_INSNS (97), /* int_div */
150 },
151 { /* EV5 */
152 COSTS_N_INSNS (4), /* fp_add */
153 COSTS_N_INSNS (4), /* fp_mult */
154 COSTS_N_INSNS (15), /* fp_div_sf */
155 COSTS_N_INSNS (22), /* fp_div_df */
156 COSTS_N_INSNS (8), /* int_mult_si */
157 COSTS_N_INSNS (12), /* int_mult_di */
158 COSTS_N_INSNS (1) + 1, /* int_shift */
159 COSTS_N_INSNS (1), /* int_cmov */
160 COSTS_N_INSNS (83), /* int_div */
161 },
162 { /* EV6 */
163 COSTS_N_INSNS (4), /* fp_add */
164 COSTS_N_INSNS (4), /* fp_mult */
165 COSTS_N_INSNS (12), /* fp_div_sf */
166 COSTS_N_INSNS (15), /* fp_div_df */
167 COSTS_N_INSNS (7), /* int_mult_si */
168 COSTS_N_INSNS (7), /* int_mult_di */
169 COSTS_N_INSNS (1), /* int_shift */
170 COSTS_N_INSNS (2), /* int_cmov */
171 COSTS_N_INSNS (86), /* int_div */
172 },
173 };
174
175 /* Similar but tuned for code size instead of execution latency. The
176 extra +N is fractional cost tuning based on latency. It's used to
177 encourage use of cheaper insns like shift, but only if there's just
178 one of them. */
179
180 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
181 {
182 COSTS_N_INSNS (1), /* fp_add */
183 COSTS_N_INSNS (1), /* fp_mult */
184 COSTS_N_INSNS (1), /* fp_div_sf */
185 COSTS_N_INSNS (1) + 1, /* fp_div_df */
186 COSTS_N_INSNS (1) + 1, /* int_mult_si */
187 COSTS_N_INSNS (1) + 2, /* int_mult_di */
188 COSTS_N_INSNS (1), /* int_shift */
189 COSTS_N_INSNS (1), /* int_cmov */
190 COSTS_N_INSNS (6), /* int_div */
191 };
192
193 /* Get the number of args of a function in one of two ways. */
194 #if TARGET_ABI_OPEN_VMS
195 #define NUM_ARGS crtl->args.info.num_args
196 #else
197 #define NUM_ARGS crtl->args.info
198 #endif
199
200 #define REG_PV 27
201 #define REG_RA 26
202
203 /* Declarations of static functions. */
204 static struct machine_function *alpha_init_machine_status (void);
205 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
206 static void alpha_handle_trap_shadows (void);
207 static void alpha_align_insns (void);
208 static void alpha_override_options_after_change (void);
209
210 #if TARGET_ABI_OPEN_VMS
211 static void alpha_write_linkage (FILE *, const char *);
212 static bool vms_valid_pointer_mode (scalar_int_mode);
213 #else
214 #define vms_patch_builtins() gcc_unreachable()
215 #endif
216 \f
217 static unsigned int
218 rest_of_handle_trap_shadows (void)
219 {
220 alpha_handle_trap_shadows ();
221 return 0;
222 }
223
224 namespace {
225
226 const pass_data pass_data_handle_trap_shadows =
227 {
228 RTL_PASS,
229 "trap_shadows", /* name */
230 OPTGROUP_NONE, /* optinfo_flags */
231 TV_NONE, /* tv_id */
232 0, /* properties_required */
233 0, /* properties_provided */
234 0, /* properties_destroyed */
235 0, /* todo_flags_start */
236 TODO_df_finish, /* todo_flags_finish */
237 };
238
239 class pass_handle_trap_shadows : public rtl_opt_pass
240 {
241 public:
242 pass_handle_trap_shadows(gcc::context *ctxt)
243 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
244 {}
245
246 /* opt_pass methods: */
247 virtual bool gate (function *)
248 {
249 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
250 }
251
252 virtual unsigned int execute (function *)
253 {
254 return rest_of_handle_trap_shadows ();
255 }
256
257 }; // class pass_handle_trap_shadows
258
259 } // anon namespace
260
261 rtl_opt_pass *
262 make_pass_handle_trap_shadows (gcc::context *ctxt)
263 {
264 return new pass_handle_trap_shadows (ctxt);
265 }
266
267 static unsigned int
268 rest_of_align_insns (void)
269 {
270 alpha_align_insns ();
271 return 0;
272 }
273
274 namespace {
275
276 const pass_data pass_data_align_insns =
277 {
278 RTL_PASS,
279 "align_insns", /* name */
280 OPTGROUP_NONE, /* optinfo_flags */
281 TV_NONE, /* tv_id */
282 0, /* properties_required */
283 0, /* properties_provided */
284 0, /* properties_destroyed */
285 0, /* todo_flags_start */
286 TODO_df_finish, /* todo_flags_finish */
287 };
288
289 class pass_align_insns : public rtl_opt_pass
290 {
291 public:
292 pass_align_insns(gcc::context *ctxt)
293 : rtl_opt_pass(pass_data_align_insns, ctxt)
294 {}
295
296 /* opt_pass methods: */
297 virtual bool gate (function *)
298 {
299 /* Due to the number of extra trapb insns, don't bother fixing up
300 alignment when trap precision is instruction. Moreover, we can
301 only do our job when sched2 is run. */
302 return ((alpha_tune == PROCESSOR_EV4
303 || alpha_tune == PROCESSOR_EV5)
304 && optimize && !optimize_size
305 && alpha_tp != ALPHA_TP_INSN
306 && flag_schedule_insns_after_reload);
307 }
308
309 virtual unsigned int execute (function *)
310 {
311 return rest_of_align_insns ();
312 }
313
314 }; // class pass_align_insns
315
316 } // anon namespace
317
318 rtl_opt_pass *
319 make_pass_align_insns (gcc::context *ctxt)
320 {
321 return new pass_align_insns (ctxt);
322 }
323
324 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
325 /* Implement TARGET_MANGLE_TYPE. */
326
327 static const char *
328 alpha_mangle_type (const_tree type)
329 {
330 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
331 && TARGET_LONG_DOUBLE_128)
332 return "g";
333
334 /* For all other types, use normal C++ mangling. */
335 return NULL;
336 }
337 #endif
338
339 /* Parse target option strings. */
340
341 static void
342 alpha_option_override (void)
343 {
344 static const struct cpu_table {
345 const char *const name;
346 const enum processor_type processor;
347 const int flags;
348 const unsigned short line_size; /* in bytes */
349 const unsigned short l1_size; /* in kb. */
350 const unsigned short l2_size; /* in kb. */
351 } cpu_table[] = {
352 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
353 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
354 had 64k to 8M 8-byte direct Bcache. */
355 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
356 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
357 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
358
359 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
360 and 1M to 16M 64 byte L3 (not modeled).
361 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
362 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
363 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
364 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
365 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
366 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
367 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
368 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
369 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
370
371 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
372 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
373 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
374 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
375 64, 64, 16*1024 },
376 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
377 64, 64, 16*1024 }
378 };
379
380 int const ct_size = ARRAY_SIZE (cpu_table);
381 int line_size = 0, l1_size = 0, l2_size = 0;
382 int i;
383
384 #ifdef SUBTARGET_OVERRIDE_OPTIONS
385 SUBTARGET_OVERRIDE_OPTIONS;
386 #endif
387
388 /* Default to full IEEE compliance mode for Go language. */
389 if (strcmp (lang_hooks.name, "GNU Go") == 0
390 && !(target_flags_explicit & MASK_IEEE))
391 target_flags |= MASK_IEEE;
392
393 alpha_fprm = ALPHA_FPRM_NORM;
394 alpha_tp = ALPHA_TP_PROG;
395 alpha_fptm = ALPHA_FPTM_N;
396
397 if (TARGET_IEEE)
398 {
399 alpha_tp = ALPHA_TP_INSN;
400 alpha_fptm = ALPHA_FPTM_SU;
401 }
402 if (TARGET_IEEE_WITH_INEXACT)
403 {
404 alpha_tp = ALPHA_TP_INSN;
405 alpha_fptm = ALPHA_FPTM_SUI;
406 }
407
408 if (alpha_tp_string)
409 {
410 if (! strcmp (alpha_tp_string, "p"))
411 alpha_tp = ALPHA_TP_PROG;
412 else if (! strcmp (alpha_tp_string, "f"))
413 alpha_tp = ALPHA_TP_FUNC;
414 else if (! strcmp (alpha_tp_string, "i"))
415 alpha_tp = ALPHA_TP_INSN;
416 else
417 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
418 }
419
420 if (alpha_fprm_string)
421 {
422 if (! strcmp (alpha_fprm_string, "n"))
423 alpha_fprm = ALPHA_FPRM_NORM;
424 else if (! strcmp (alpha_fprm_string, "m"))
425 alpha_fprm = ALPHA_FPRM_MINF;
426 else if (! strcmp (alpha_fprm_string, "c"))
427 alpha_fprm = ALPHA_FPRM_CHOP;
428 else if (! strcmp (alpha_fprm_string,"d"))
429 alpha_fprm = ALPHA_FPRM_DYN;
430 else
431 error ("bad value %qs for -mfp-rounding-mode switch",
432 alpha_fprm_string);
433 }
434
435 if (alpha_fptm_string)
436 {
437 if (strcmp (alpha_fptm_string, "n") == 0)
438 alpha_fptm = ALPHA_FPTM_N;
439 else if (strcmp (alpha_fptm_string, "u") == 0)
440 alpha_fptm = ALPHA_FPTM_U;
441 else if (strcmp (alpha_fptm_string, "su") == 0)
442 alpha_fptm = ALPHA_FPTM_SU;
443 else if (strcmp (alpha_fptm_string, "sui") == 0)
444 alpha_fptm = ALPHA_FPTM_SUI;
445 else
446 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
447 }
448
449 if (alpha_cpu_string)
450 {
451 for (i = 0; i < ct_size; i++)
452 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
453 {
454 alpha_tune = alpha_cpu = cpu_table[i].processor;
455 line_size = cpu_table[i].line_size;
456 l1_size = cpu_table[i].l1_size;
457 l2_size = cpu_table[i].l2_size;
458 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
459 target_flags |= cpu_table[i].flags;
460 break;
461 }
462 if (i == ct_size)
463 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
464 }
465
466 if (alpha_tune_string)
467 {
468 for (i = 0; i < ct_size; i++)
469 if (! strcmp (alpha_tune_string, cpu_table [i].name))
470 {
471 alpha_tune = cpu_table[i].processor;
472 line_size = cpu_table[i].line_size;
473 l1_size = cpu_table[i].l1_size;
474 l2_size = cpu_table[i].l2_size;
475 break;
476 }
477 if (i == ct_size)
478 error ("bad value %qs for -mtune switch", alpha_tune_string);
479 }
480
481 if (line_size)
482 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
483 global_options.x_param_values,
484 global_options_set.x_param_values);
485 if (l1_size)
486 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
487 global_options.x_param_values,
488 global_options_set.x_param_values);
489 if (l2_size)
490 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
491 global_options.x_param_values,
492 global_options_set.x_param_values);
493
494 /* Do some sanity checks on the above options. */
495
496 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
497 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
498 {
499 warning (0, "fp software completion requires -mtrap-precision=i");
500 alpha_tp = ALPHA_TP_INSN;
501 }
502
503 if (alpha_cpu == PROCESSOR_EV6)
504 {
505 /* Except for EV6 pass 1 (not released), we always have precise
506 arithmetic traps. Which means we can do software completion
507 without minding trap shadows. */
508 alpha_tp = ALPHA_TP_PROG;
509 }
510
511 if (TARGET_FLOAT_VAX)
512 {
513 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
514 {
515 warning (0, "rounding mode not supported for VAX floats");
516 alpha_fprm = ALPHA_FPRM_NORM;
517 }
518 if (alpha_fptm == ALPHA_FPTM_SUI)
519 {
520 warning (0, "trap mode not supported for VAX floats");
521 alpha_fptm = ALPHA_FPTM_SU;
522 }
523 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
524 warning (0, "128-bit long double not supported for VAX floats");
525 target_flags &= ~MASK_LONG_DOUBLE_128;
526 }
527
528 {
529 char *end;
530 int lat;
531
532 if (!alpha_mlat_string)
533 alpha_mlat_string = "L1";
534
535 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
536 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
537 ;
538 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
539 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
540 && alpha_mlat_string[2] == '\0')
541 {
542 static int const cache_latency[][4] =
543 {
544 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
545 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
546 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
547 };
548
549 lat = alpha_mlat_string[1] - '0';
550 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
551 {
552 warning (0, "L%d cache latency unknown for %s",
553 lat, alpha_cpu_name[alpha_tune]);
554 lat = 3;
555 }
556 else
557 lat = cache_latency[alpha_tune][lat-1];
558 }
559 else if (! strcmp (alpha_mlat_string, "main"))
560 {
561 /* Most current memories have about 370ns latency. This is
562 a reasonable guess for a fast cpu. */
563 lat = 150;
564 }
565 else
566 {
567 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
568 lat = 3;
569 }
570
571 alpha_memory_latency = lat;
572 }
573
574 /* Default the definition of "small data" to 8 bytes. */
575 if (!global_options_set.x_g_switch_value)
576 g_switch_value = 8;
577
578 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
579 if (flag_pic == 1)
580 target_flags |= MASK_SMALL_DATA;
581 else if (flag_pic == 2)
582 target_flags &= ~MASK_SMALL_DATA;
583
584 alpha_override_options_after_change ();
585
586 /* Register variables and functions with the garbage collector. */
587
588 /* Set up function hooks. */
589 init_machine_status = alpha_init_machine_status;
590
591 /* Tell the compiler when we're using VAX floating point. */
592 if (TARGET_FLOAT_VAX)
593 {
594 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
595 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
596 REAL_MODE_FORMAT (TFmode) = NULL;
597 }
598
599 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
600 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
601 target_flags |= MASK_LONG_DOUBLE_128;
602 #endif
603
604 }
605
606 /* Implement targetm.override_options_after_change. */
607
608 static void
609 alpha_override_options_after_change (void)
610 {
611 /* Align labels and loops for optimal branching. */
612 /* ??? Kludge these by not doing anything if we don't optimize. */
613 if (optimize > 0)
614 {
615 if (align_loops <= 0)
616 align_loops = 16;
617 if (align_jumps <= 0)
618 align_jumps = 16;
619 }
620 if (align_functions <= 0)
621 align_functions = 16;
622 }
623 \f
624 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
625
626 int
627 zap_mask (HOST_WIDE_INT value)
628 {
629 int i;
630
631 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
632 i++, value >>= 8)
633 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
634 return 0;
635
636 return 1;
637 }
638
639 /* Return true if OP is valid for a particular TLS relocation.
640 We are already guaranteed that OP is a CONST. */
641
642 int
643 tls_symbolic_operand_1 (rtx op, int size, int unspec)
644 {
645 op = XEXP (op, 0);
646
647 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
648 return 0;
649 op = XVECEXP (op, 0, 0);
650
651 if (GET_CODE (op) != SYMBOL_REF)
652 return 0;
653
654 switch (SYMBOL_REF_TLS_MODEL (op))
655 {
656 case TLS_MODEL_LOCAL_DYNAMIC:
657 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
658 case TLS_MODEL_INITIAL_EXEC:
659 return unspec == UNSPEC_TPREL && size == 64;
660 case TLS_MODEL_LOCAL_EXEC:
661 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
662 default:
663 gcc_unreachable ();
664 }
665 }
666
667 /* Used by aligned_memory_operand and unaligned_memory_operand to
668 resolve what reload is going to do with OP if it's a register. */
669
670 rtx
671 resolve_reload_operand (rtx op)
672 {
673 if (reload_in_progress)
674 {
675 rtx tmp = op;
676 if (SUBREG_P (tmp))
677 tmp = SUBREG_REG (tmp);
678 if (REG_P (tmp)
679 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
680 {
681 op = reg_equiv_memory_loc (REGNO (tmp));
682 if (op == 0)
683 return 0;
684 }
685 }
686 return op;
687 }
688
689 /* The scalar modes supported differs from the default check-what-c-supports
690 version in that sometimes TFmode is available even when long double
691 indicates only DFmode. */
692
693 static bool
694 alpha_scalar_mode_supported_p (machine_mode mode)
695 {
696 switch (mode)
697 {
698 case E_QImode:
699 case E_HImode:
700 case E_SImode:
701 case E_DImode:
702 case E_TImode: /* via optabs.c */
703 return true;
704
705 case E_SFmode:
706 case E_DFmode:
707 return true;
708
709 case E_TFmode:
710 return TARGET_HAS_XFLOATING_LIBS;
711
712 default:
713 return false;
714 }
715 }
716
717 /* Alpha implements a couple of integer vector mode operations when
718 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
719 which allows the vectorizer to operate on e.g. move instructions,
720 or when expand_vector_operations can do something useful. */
721
722 static bool
723 alpha_vector_mode_supported_p (machine_mode mode)
724 {
725 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
726 }
727
728 /* Return 1 if this function can directly return via $26. */
729
730 int
731 direct_return (void)
732 {
733 return (TARGET_ABI_OSF
734 && reload_completed
735 && alpha_sa_size () == 0
736 && get_frame_size () == 0
737 && crtl->outgoing_args_size == 0
738 && crtl->args.pretend_args_size == 0);
739 }
740
741 /* Return the TLS model to use for SYMBOL. */
742
743 static enum tls_model
744 tls_symbolic_operand_type (rtx symbol)
745 {
746 enum tls_model model;
747
748 if (GET_CODE (symbol) != SYMBOL_REF)
749 return TLS_MODEL_NONE;
750 model = SYMBOL_REF_TLS_MODEL (symbol);
751
752 /* Local-exec with a 64-bit size is the same code as initial-exec. */
753 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
754 model = TLS_MODEL_INITIAL_EXEC;
755
756 return model;
757 }
758 \f
759 /* Return true if the function DECL will share the same GP as any
760 function in the current unit of translation. */
761
762 static bool
763 decl_has_samegp (const_tree decl)
764 {
765 /* Functions that are not local can be overridden, and thus may
766 not share the same gp. */
767 if (!(*targetm.binds_local_p) (decl))
768 return false;
769
770 /* If -msmall-data is in effect, assume that there is only one GP
771 for the module, and so any local symbol has this property. We
772 need explicit relocations to be able to enforce this for symbols
773 not defined in this unit of translation, however. */
774 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
775 return true;
776
777 /* Functions that are not external are defined in this UoT. */
778 /* ??? Irritatingly, static functions not yet emitted are still
779 marked "external". Apply this to non-static functions only. */
780 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
781 }
782
783 /* Return true if EXP should be placed in the small data section. */
784
785 static bool
786 alpha_in_small_data_p (const_tree exp)
787 {
788 /* We want to merge strings, so we never consider them small data. */
789 if (TREE_CODE (exp) == STRING_CST)
790 return false;
791
792 /* Functions are never in the small data area. Duh. */
793 if (TREE_CODE (exp) == FUNCTION_DECL)
794 return false;
795
796 /* COMMON symbols are never small data. */
797 if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
798 return false;
799
800 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
801 {
802 const char *section = DECL_SECTION_NAME (exp);
803 if (strcmp (section, ".sdata") == 0
804 || strcmp (section, ".sbss") == 0)
805 return true;
806 }
807 else
808 {
809 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
810
811 /* If this is an incomplete type with size 0, then we can't put it
812 in sdata because it might be too big when completed. */
813 if (size > 0 && size <= g_switch_value)
814 return true;
815 }
816
817 return false;
818 }
819
820 #if TARGET_ABI_OPEN_VMS
821 static bool
822 vms_valid_pointer_mode (scalar_int_mode mode)
823 {
824 return (mode == SImode || mode == DImode);
825 }
826
827 static bool
828 alpha_linkage_symbol_p (const char *symname)
829 {
830 int symlen = strlen (symname);
831
832 if (symlen > 4)
833 return strcmp (&symname [symlen - 4], "..lk") == 0;
834
835 return false;
836 }
837
838 #define LINKAGE_SYMBOL_REF_P(X) \
839 ((GET_CODE (X) == SYMBOL_REF \
840 && alpha_linkage_symbol_p (XSTR (X, 0))) \
841 || (GET_CODE (X) == CONST \
842 && GET_CODE (XEXP (X, 0)) == PLUS \
843 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
844 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
845 #endif
846
847 /* legitimate_address_p recognizes an RTL expression that is a valid
848 memory address for an instruction. The MODE argument is the
849 machine mode for the MEM expression that wants to use this address.
850
851 For Alpha, we have either a constant address or the sum of a
852 register and a constant address, or just a register. For DImode,
853 any of those forms can be surrounded with an AND that clear the
854 low-order three bits; this is an "unaligned" access. */
855
856 static bool
857 alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict)
858 {
859 /* If this is an ldq_u type address, discard the outer AND. */
860 if (mode == DImode
861 && GET_CODE (x) == AND
862 && CONST_INT_P (XEXP (x, 1))
863 && INTVAL (XEXP (x, 1)) == -8)
864 x = XEXP (x, 0);
865
866 /* Discard non-paradoxical subregs. */
867 if (SUBREG_P (x)
868 && (GET_MODE_SIZE (GET_MODE (x))
869 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
870 x = SUBREG_REG (x);
871
872 /* Unadorned general registers are valid. */
873 if (REG_P (x)
874 && (strict
875 ? STRICT_REG_OK_FOR_BASE_P (x)
876 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
877 return true;
878
879 /* Constant addresses (i.e. +/- 32k) are valid. */
880 if (CONSTANT_ADDRESS_P (x))
881 return true;
882
883 #if TARGET_ABI_OPEN_VMS
884 if (LINKAGE_SYMBOL_REF_P (x))
885 return true;
886 #endif
887
888 /* Register plus a small constant offset is valid. */
889 if (GET_CODE (x) == PLUS)
890 {
891 rtx ofs = XEXP (x, 1);
892 x = XEXP (x, 0);
893
894 /* Discard non-paradoxical subregs. */
895 if (SUBREG_P (x)
896 && (GET_MODE_SIZE (GET_MODE (x))
897 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
898 x = SUBREG_REG (x);
899
900 if (REG_P (x))
901 {
902 if (! strict
903 && NONSTRICT_REG_OK_FP_BASE_P (x)
904 && CONST_INT_P (ofs))
905 return true;
906 if ((strict
907 ? STRICT_REG_OK_FOR_BASE_P (x)
908 : NONSTRICT_REG_OK_FOR_BASE_P (x))
909 && CONSTANT_ADDRESS_P (ofs))
910 return true;
911 }
912 }
913
914 /* If we're managing explicit relocations, LO_SUM is valid, as are small
915 data symbols. Avoid explicit relocations of modes larger than word
916 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
917 else if (TARGET_EXPLICIT_RELOCS
918 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
919 {
920 if (small_symbolic_operand (x, Pmode))
921 return true;
922
923 if (GET_CODE (x) == LO_SUM)
924 {
925 rtx ofs = XEXP (x, 1);
926 x = XEXP (x, 0);
927
928 /* Discard non-paradoxical subregs. */
929 if (SUBREG_P (x)
930 && (GET_MODE_SIZE (GET_MODE (x))
931 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
932 x = SUBREG_REG (x);
933
934 /* Must have a valid base register. */
935 if (! (REG_P (x)
936 && (strict
937 ? STRICT_REG_OK_FOR_BASE_P (x)
938 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
939 return false;
940
941 /* The symbol must be local. */
942 if (local_symbolic_operand (ofs, Pmode)
943 || dtp32_symbolic_operand (ofs, Pmode)
944 || tp32_symbolic_operand (ofs, Pmode))
945 return true;
946 }
947 }
948
949 return false;
950 }
951
952 /* Build the SYMBOL_REF for __tls_get_addr. */
953
954 static GTY(()) rtx tls_get_addr_libfunc;
955
956 static rtx
957 get_tls_get_addr (void)
958 {
959 if (!tls_get_addr_libfunc)
960 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
961 return tls_get_addr_libfunc;
962 }
963
964 /* Try machine-dependent ways of modifying an illegitimate address
965 to be legitimate. If we find one, return the new, valid address. */
966
967 static rtx
968 alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
969 {
970 HOST_WIDE_INT addend;
971
972 /* If the address is (plus reg const_int) and the CONST_INT is not a
973 valid offset, compute the high part of the constant and add it to
974 the register. Then our address is (plus temp low-part-const). */
975 if (GET_CODE (x) == PLUS
976 && REG_P (XEXP (x, 0))
977 && CONST_INT_P (XEXP (x, 1))
978 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
979 {
980 addend = INTVAL (XEXP (x, 1));
981 x = XEXP (x, 0);
982 goto split_addend;
983 }
984
985 /* If the address is (const (plus FOO const_int)), find the low-order
986 part of the CONST_INT. Then load FOO plus any high-order part of the
987 CONST_INT into a register. Our address is (plus reg low-part-const).
988 This is done to reduce the number of GOT entries. */
989 if (can_create_pseudo_p ()
990 && GET_CODE (x) == CONST
991 && GET_CODE (XEXP (x, 0)) == PLUS
992 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
993 {
994 addend = INTVAL (XEXP (XEXP (x, 0), 1));
995 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
996 goto split_addend;
997 }
998
999 /* If we have a (plus reg const), emit the load as in (2), then add
1000 the two registers, and finally generate (plus reg low-part-const) as
1001 our address. */
1002 if (can_create_pseudo_p ()
1003 && GET_CODE (x) == PLUS
1004 && REG_P (XEXP (x, 0))
1005 && GET_CODE (XEXP (x, 1)) == CONST
1006 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1007 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1008 {
1009 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1010 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1011 XEXP (XEXP (XEXP (x, 1), 0), 0),
1012 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1013 goto split_addend;
1014 }
1015
1016 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1017 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1018 around +/- 32k offset. */
1019 if (TARGET_EXPLICIT_RELOCS
1020 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1021 && symbolic_operand (x, Pmode))
1022 {
1023 rtx r0, r16, eqv, tga, tp, dest, seq;
1024 rtx_insn *insn;
1025
1026 switch (tls_symbolic_operand_type (x))
1027 {
1028 case TLS_MODEL_NONE:
1029 break;
1030
1031 case TLS_MODEL_GLOBAL_DYNAMIC:
1032 {
1033 start_sequence ();
1034
1035 r0 = gen_rtx_REG (Pmode, 0);
1036 r16 = gen_rtx_REG (Pmode, 16);
1037 tga = get_tls_get_addr ();
1038 dest = gen_reg_rtx (Pmode);
1039 seq = GEN_INT (alpha_next_sequence_number++);
1040
1041 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1042 rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
1043 insn = emit_call_insn (val);
1044 RTL_CONST_CALL_P (insn) = 1;
1045 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1046
1047 insn = get_insns ();
1048 end_sequence ();
1049
1050 emit_libcall_block (insn, dest, r0, x);
1051 return dest;
1052 }
1053
1054 case TLS_MODEL_LOCAL_DYNAMIC:
1055 {
1056 start_sequence ();
1057
1058 r0 = gen_rtx_REG (Pmode, 0);
1059 r16 = gen_rtx_REG (Pmode, 16);
1060 tga = get_tls_get_addr ();
1061 scratch = gen_reg_rtx (Pmode);
1062 seq = GEN_INT (alpha_next_sequence_number++);
1063
1064 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1065 rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
1066 insn = emit_call_insn (val);
1067 RTL_CONST_CALL_P (insn) = 1;
1068 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1069
1070 insn = get_insns ();
1071 end_sequence ();
1072
1073 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1074 UNSPEC_TLSLDM_CALL);
1075 emit_libcall_block (insn, scratch, r0, eqv);
1076
1077 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1078 eqv = gen_rtx_CONST (Pmode, eqv);
1079
1080 if (alpha_tls_size == 64)
1081 {
1082 dest = gen_reg_rtx (Pmode);
1083 emit_insn (gen_rtx_SET (dest, eqv));
1084 emit_insn (gen_adddi3 (dest, dest, scratch));
1085 return dest;
1086 }
1087 if (alpha_tls_size == 32)
1088 {
1089 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1090 temp = gen_rtx_PLUS (Pmode, scratch, temp);
1091 scratch = gen_reg_rtx (Pmode);
1092 emit_insn (gen_rtx_SET (scratch, temp));
1093 }
1094 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1095 }
1096
1097 case TLS_MODEL_INITIAL_EXEC:
1098 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1099 eqv = gen_rtx_CONST (Pmode, eqv);
1100 tp = gen_reg_rtx (Pmode);
1101 scratch = gen_reg_rtx (Pmode);
1102 dest = gen_reg_rtx (Pmode);
1103
1104 emit_insn (gen_get_thread_pointerdi (tp));
1105 emit_insn (gen_rtx_SET (scratch, eqv));
1106 emit_insn (gen_adddi3 (dest, tp, scratch));
1107 return dest;
1108
1109 case TLS_MODEL_LOCAL_EXEC:
1110 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1111 eqv = gen_rtx_CONST (Pmode, eqv);
1112 tp = gen_reg_rtx (Pmode);
1113
1114 emit_insn (gen_get_thread_pointerdi (tp));
1115 if (alpha_tls_size == 32)
1116 {
1117 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1118 temp = gen_rtx_PLUS (Pmode, tp, temp);
1119 tp = gen_reg_rtx (Pmode);
1120 emit_insn (gen_rtx_SET (tp, temp));
1121 }
1122 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1123
1124 default:
1125 gcc_unreachable ();
1126 }
1127
1128 if (local_symbolic_operand (x, Pmode))
1129 {
1130 if (small_symbolic_operand (x, Pmode))
1131 return x;
1132 else
1133 {
1134 if (can_create_pseudo_p ())
1135 scratch = gen_reg_rtx (Pmode);
1136 emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1137 return gen_rtx_LO_SUM (Pmode, scratch, x);
1138 }
1139 }
1140 }
1141
1142 return NULL;
1143
1144 split_addend:
1145 {
1146 HOST_WIDE_INT low, high;
1147
1148 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1149 addend -= low;
1150 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1151 addend -= high;
1152
1153 if (addend)
1154 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1155 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1156 1, OPTAB_LIB_WIDEN);
1157 if (high)
1158 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1159 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1160 1, OPTAB_LIB_WIDEN);
1161
1162 return plus_constant (Pmode, x, low);
1163 }
1164 }
1165
1166
1167 /* Try machine-dependent ways of modifying an illegitimate address
1168 to be legitimate. Return X or the new, valid address. */
1169
1170 static rtx
1171 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1172 machine_mode mode)
1173 {
1174 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1175 return new_x ? new_x : x;
1176 }
1177
1178 /* Return true if ADDR has an effect that depends on the machine mode it
1179 is used for. On the Alpha this is true only for the unaligned modes.
1180 We can simplify the test since we know that the address must be valid. */
1181
1182 static bool
1183 alpha_mode_dependent_address_p (const_rtx addr,
1184 addr_space_t as ATTRIBUTE_UNUSED)
1185 {
1186 return GET_CODE (addr) == AND;
1187 }
1188
1189 /* Primarily this is required for TLS symbols, but given that our move
1190 patterns *ought* to be able to handle any symbol at any time, we
1191 should never be spilling symbolic operands to the constant pool, ever. */
1192
1193 static bool
1194 alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1195 {
1196 enum rtx_code code = GET_CODE (x);
1197 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1198 }
1199
1200 /* We do not allow indirect calls to be optimized into sibling calls, nor
1201 can we allow a call to a function with a different GP to be optimized
1202 into a sibcall. */
1203
1204 static bool
1205 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1206 {
1207 /* Can't do indirect tail calls, since we don't know if the target
1208 uses the same GP. */
1209 if (!decl)
1210 return false;
1211
1212 /* Otherwise, we can make a tail call if the target function shares
1213 the same GP. */
1214 return decl_has_samegp (decl);
1215 }
1216
1217 bool
1218 some_small_symbolic_operand_int (rtx x)
1219 {
1220 subrtx_var_iterator::array_type array;
1221 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1222 {
1223 rtx x = *iter;
1224 /* Don't re-split. */
1225 if (GET_CODE (x) == LO_SUM)
1226 iter.skip_subrtxes ();
1227 else if (small_symbolic_operand (x, Pmode))
1228 return true;
1229 }
1230 return false;
1231 }
1232
1233 rtx
1234 split_small_symbolic_operand (rtx x)
1235 {
1236 x = copy_insn (x);
1237 subrtx_ptr_iterator::array_type array;
1238 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1239 {
1240 rtx *ptr = *iter;
1241 rtx x = *ptr;
1242 /* Don't re-split. */
1243 if (GET_CODE (x) == LO_SUM)
1244 iter.skip_subrtxes ();
1245 else if (small_symbolic_operand (x, Pmode))
1246 {
1247 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1248 iter.skip_subrtxes ();
1249 }
1250 }
1251 return x;
1252 }
1253
1254 /* Indicate that INSN cannot be duplicated. This is true for any insn
1255 that we've marked with gpdisp relocs, since those have to stay in
1256 1-1 correspondence with one another.
1257
1258 Technically we could copy them if we could set up a mapping from one
1259 sequence number to another, across the set of insns to be duplicated.
1260 This seems overly complicated and error-prone since interblock motion
1261 from sched-ebb could move one of the pair of insns to a different block.
1262
1263 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1264 then they'll be in a different block from their ldgp. Which could lead
1265 the bb reorder code to think that it would be ok to copy just the block
1266 containing the call and branch to the block containing the ldgp. */
1267
1268 static bool
1269 alpha_cannot_copy_insn_p (rtx_insn *insn)
1270 {
1271 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1272 return false;
1273 if (recog_memoized (insn) >= 0)
1274 return get_attr_cannot_copy (insn);
1275 else
1276 return false;
1277 }
1278
1279
1280 /* Try a machine-dependent way of reloading an illegitimate address
1281 operand. If we find one, push the reload and return the new rtx. */
1282
1283 rtx
1284 alpha_legitimize_reload_address (rtx x,
1285 machine_mode mode ATTRIBUTE_UNUSED,
1286 int opnum, int type,
1287 int ind_levels ATTRIBUTE_UNUSED)
1288 {
1289 /* We must recognize output that we have already generated ourselves. */
1290 if (GET_CODE (x) == PLUS
1291 && GET_CODE (XEXP (x, 0)) == PLUS
1292 && REG_P (XEXP (XEXP (x, 0), 0))
1293 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1294 && CONST_INT_P (XEXP (x, 1)))
1295 {
1296 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1297 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1298 opnum, (enum reload_type) type);
1299 return x;
1300 }
1301
1302 /* We wish to handle large displacements off a base register by
1303 splitting the addend across an ldah and the mem insn. This
1304 cuts number of extra insns needed from 3 to 1. */
1305 if (GET_CODE (x) == PLUS
1306 && REG_P (XEXP (x, 0))
1307 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1308 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1309 && CONST_INT_P (XEXP (x, 1)))
1310 {
1311 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1312 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1313 HOST_WIDE_INT high
1314 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1315
1316 /* Check for 32-bit overflow. */
1317 if (high + low != val)
1318 return NULL_RTX;
1319
1320 /* Reload the high part into a base reg; leave the low part
1321 in the mem directly. */
1322 x = gen_rtx_PLUS (GET_MODE (x),
1323 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1324 GEN_INT (high)),
1325 GEN_INT (low));
1326
1327 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1328 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1329 opnum, (enum reload_type) type);
1330 return x;
1331 }
1332
1333 return NULL_RTX;
1334 }
1335 \f
1336 /* Return the cost of moving between registers of various classes. Moving
1337 between FLOAT_REGS and anything else except float regs is expensive.
1338 In fact, we make it quite expensive because we really don't want to
1339 do these moves unless it is clearly worth it. Optimizations may
1340 reduce the impact of not being able to allocate a pseudo to a
1341 hard register. */
1342
1343 static int
1344 alpha_register_move_cost (machine_mode /*mode*/,
1345 reg_class_t from, reg_class_t to)
1346 {
1347 if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
1348 return 2;
1349
1350 if (TARGET_FIX)
1351 return (from == FLOAT_REGS) ? 6 : 8;
1352
1353 return 4 + 2 * alpha_memory_latency;
1354 }
1355
1356 /* Return the cost of moving data of MODE from a register to
1357 or from memory. On the Alpha, bump this up a bit. */
1358
1359 static int
1360 alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
1361 bool /*in*/)
1362 {
1363 return 2 * alpha_memory_latency;
1364 }
1365
1366 /* Compute a (partial) cost for rtx X. Return true if the complete
1367 cost has been computed, and false if subexpressions should be
1368 scanned. In either case, *TOTAL contains the cost result. */
1369
1370 static bool
1371 alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
1372 bool speed)
1373 {
1374 int code = GET_CODE (x);
1375 bool float_mode_p = FLOAT_MODE_P (mode);
1376 const struct alpha_rtx_cost_data *cost_data;
1377
1378 if (!speed)
1379 cost_data = &alpha_rtx_cost_size;
1380 else
1381 cost_data = &alpha_rtx_cost_data[alpha_tune];
1382
1383 switch (code)
1384 {
1385 case CONST_INT:
1386 /* If this is an 8-bit constant, return zero since it can be used
1387 nearly anywhere with no cost. If it is a valid operand for an
1388 ADD or AND, likewise return 0 if we know it will be used in that
1389 context. Otherwise, return 2 since it might be used there later.
1390 All other constants take at least two insns. */
1391 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1392 {
1393 *total = 0;
1394 return true;
1395 }
1396 /* FALLTHRU */
1397
1398 case CONST_DOUBLE:
1399 case CONST_WIDE_INT:
1400 if (x == CONST0_RTX (mode))
1401 *total = 0;
1402 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1403 || (outer_code == AND && and_operand (x, VOIDmode)))
1404 *total = 0;
1405 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1406 *total = 2;
1407 else
1408 *total = COSTS_N_INSNS (2);
1409 return true;
1410
1411 case CONST:
1412 case SYMBOL_REF:
1413 case LABEL_REF:
1414 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1415 *total = COSTS_N_INSNS (outer_code != MEM);
1416 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1417 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1418 else if (tls_symbolic_operand_type (x))
1419 /* Estimate of cost for call_pal rduniq. */
1420 /* ??? How many insns do we emit here? More than one... */
1421 *total = COSTS_N_INSNS (15);
1422 else
1423 /* Otherwise we do a load from the GOT. */
1424 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1425 return true;
1426
1427 case HIGH:
1428 /* This is effectively an add_operand. */
1429 *total = 2;
1430 return true;
1431
1432 case PLUS:
1433 case MINUS:
1434 if (float_mode_p)
1435 *total = cost_data->fp_add;
1436 else if (GET_CODE (XEXP (x, 0)) == MULT
1437 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1438 {
1439 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
1440 (enum rtx_code) outer_code, opno, speed)
1441 + rtx_cost (XEXP (x, 1), mode,
1442 (enum rtx_code) outer_code, opno, speed)
1443 + COSTS_N_INSNS (1));
1444 return true;
1445 }
1446 return false;
1447
1448 case MULT:
1449 if (float_mode_p)
1450 *total = cost_data->fp_mult;
1451 else if (mode == DImode)
1452 *total = cost_data->int_mult_di;
1453 else
1454 *total = cost_data->int_mult_si;
1455 return false;
1456
1457 case ASHIFT:
1458 if (CONST_INT_P (XEXP (x, 1))
1459 && INTVAL (XEXP (x, 1)) <= 3)
1460 {
1461 *total = COSTS_N_INSNS (1);
1462 return false;
1463 }
1464 /* FALLTHRU */
1465
1466 case ASHIFTRT:
1467 case LSHIFTRT:
1468 *total = cost_data->int_shift;
1469 return false;
1470
1471 case IF_THEN_ELSE:
1472 if (float_mode_p)
1473 *total = cost_data->fp_add;
1474 else
1475 *total = cost_data->int_cmov;
1476 return false;
1477
1478 case DIV:
1479 case UDIV:
1480 case MOD:
1481 case UMOD:
1482 if (!float_mode_p)
1483 *total = cost_data->int_div;
1484 else if (mode == SFmode)
1485 *total = cost_data->fp_div_sf;
1486 else
1487 *total = cost_data->fp_div_df;
1488 return false;
1489
1490 case MEM:
1491 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1492 return true;
1493
1494 case NEG:
1495 if (! float_mode_p)
1496 {
1497 *total = COSTS_N_INSNS (1);
1498 return false;
1499 }
1500 /* FALLTHRU */
1501
1502 case ABS:
1503 if (! float_mode_p)
1504 {
1505 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1506 return false;
1507 }
1508 /* FALLTHRU */
1509
1510 case FLOAT:
1511 case UNSIGNED_FLOAT:
1512 case FIX:
1513 case UNSIGNED_FIX:
1514 case FLOAT_TRUNCATE:
1515 *total = cost_data->fp_add;
1516 return false;
1517
1518 case FLOAT_EXTEND:
1519 if (MEM_P (XEXP (x, 0)))
1520 *total = 0;
1521 else
1522 *total = cost_data->fp_add;
1523 return false;
1524
1525 default:
1526 return false;
1527 }
1528 }
1529 \f
1530 /* REF is an alignable memory location. Place an aligned SImode
1531 reference into *PALIGNED_MEM and the number of bits to shift into
1532 *PBITNUM. SCRATCH is a free register for use in reloading out
1533 of range stack slots. */
1534
1535 void
1536 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1537 {
1538 rtx base;
1539 HOST_WIDE_INT disp, offset;
1540
1541 gcc_assert (MEM_P (ref));
1542
1543 if (reload_in_progress)
1544 {
1545 base = find_replacement (&XEXP (ref, 0));
1546 gcc_assert (memory_address_p (GET_MODE (ref), base));
1547 }
1548 else
1549 base = XEXP (ref, 0);
1550
1551 if (GET_CODE (base) == PLUS)
1552 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1553 else
1554 disp = 0;
1555
1556 /* Find the byte offset within an aligned word. If the memory itself is
1557 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1558 will have examined the base register and determined it is aligned, and
1559 thus displacements from it are naturally alignable. */
1560 if (MEM_ALIGN (ref) >= 32)
1561 offset = 0;
1562 else
1563 offset = disp & 3;
1564
1565 /* The location should not cross aligned word boundary. */
1566 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1567 <= GET_MODE_SIZE (SImode));
1568
1569 /* Access the entire aligned word. */
1570 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1571
1572 /* Convert the byte offset within the word to a bit offset. */
1573 offset *= BITS_PER_UNIT;
1574 *pbitnum = GEN_INT (offset);
1575 }
1576
1577 /* Similar, but just get the address. Handle the two reload cases.
1578 Add EXTRA_OFFSET to the address we return. */
1579
1580 rtx
1581 get_unaligned_address (rtx ref)
1582 {
1583 rtx base;
1584 HOST_WIDE_INT offset = 0;
1585
1586 gcc_assert (MEM_P (ref));
1587
1588 if (reload_in_progress)
1589 {
1590 base = find_replacement (&XEXP (ref, 0));
1591 gcc_assert (memory_address_p (GET_MODE (ref), base));
1592 }
1593 else
1594 base = XEXP (ref, 0);
1595
1596 if (GET_CODE (base) == PLUS)
1597 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1598
1599 return plus_constant (Pmode, base, offset);
1600 }
1601
1602 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1603 X is always returned in a register. */
1604
1605 rtx
1606 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1607 {
1608 if (GET_CODE (addr) == PLUS)
1609 {
1610 ofs += INTVAL (XEXP (addr, 1));
1611 addr = XEXP (addr, 0);
1612 }
1613
1614 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1615 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1616 }
1617
1618 /* On the Alpha, all (non-symbolic) constants except zero go into
1619 a floating-point register via memory. Note that we cannot
1620 return anything that is not a subset of RCLASS, and that some
1621 symbolic constants cannot be dropped to memory. */
1622
1623 enum reg_class
1624 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1625 {
1626 /* Zero is present in any register class. */
1627 if (x == CONST0_RTX (GET_MODE (x)))
1628 return rclass;
1629
1630 /* These sorts of constants we can easily drop to memory. */
1631 if (CONST_SCALAR_INT_P (x)
1632 || CONST_DOUBLE_P (x)
1633 || GET_CODE (x) == CONST_VECTOR)
1634 {
1635 if (rclass == FLOAT_REGS)
1636 return NO_REGS;
1637 if (rclass == ALL_REGS)
1638 return GENERAL_REGS;
1639 return rclass;
1640 }
1641
1642 /* All other kinds of constants should not (and in the case of HIGH
1643 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1644 secondary reload. */
1645 if (CONSTANT_P (x))
1646 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1647
1648 return rclass;
1649 }
1650
1651 /* Inform reload about cases where moving X with a mode MODE to a register in
1652 RCLASS requires an extra scratch or immediate register. Return the class
1653 needed for the immediate register. */
1654
1655 static reg_class_t
1656 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1657 machine_mode mode, secondary_reload_info *sri)
1658 {
1659 enum reg_class rclass = (enum reg_class) rclass_i;
1660
1661 /* Loading and storing HImode or QImode values to and from memory
1662 usually requires a scratch register. */
1663 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1664 {
1665 if (any_memory_operand (x, mode))
1666 {
1667 if (in_p)
1668 {
1669 if (!aligned_memory_operand (x, mode))
1670 sri->icode = direct_optab_handler (reload_in_optab, mode);
1671 }
1672 else
1673 sri->icode = direct_optab_handler (reload_out_optab, mode);
1674 return NO_REGS;
1675 }
1676 }
1677
1678 /* We also cannot do integral arithmetic into FP regs, as might result
1679 from register elimination into a DImode fp register. */
1680 if (rclass == FLOAT_REGS)
1681 {
1682 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1683 return GENERAL_REGS;
1684 if (in_p && INTEGRAL_MODE_P (mode)
1685 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1686 return GENERAL_REGS;
1687 }
1688
1689 return NO_REGS;
1690 }
1691 \f
1692 /* Given SEQ, which is an INSN list, look for any MEMs in either
1693 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1694 volatile flags from REF into each of the MEMs found. If REF is not
1695 a MEM, don't do anything. */
1696
1697 void
1698 alpha_set_memflags (rtx seq, rtx ref)
1699 {
1700 rtx_insn *insn;
1701
1702 if (!MEM_P (ref))
1703 return;
1704
1705 /* This is only called from alpha.md, after having had something
1706 generated from one of the insn patterns. So if everything is
1707 zero, the pattern is already up-to-date. */
1708 if (!MEM_VOLATILE_P (ref)
1709 && !MEM_NOTRAP_P (ref)
1710 && !MEM_READONLY_P (ref))
1711 return;
1712
1713 subrtx_var_iterator::array_type array;
1714 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1715 if (INSN_P (insn))
1716 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1717 {
1718 rtx x = *iter;
1719 if (MEM_P (x))
1720 {
1721 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1722 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1723 MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1724 /* Sadly, we cannot use alias sets because the extra
1725 aliasing produced by the AND interferes. Given that
1726 two-byte quantities are the only thing we would be
1727 able to differentiate anyway, there does not seem to
1728 be any point in convoluting the early out of the
1729 alias check. */
1730 iter.skip_subrtxes ();
1731 }
1732 }
1733 else
1734 gcc_unreachable ();
1735 }
1736 \f
1737 static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1738 int, bool);
1739
1740 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1741 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1742 and return pc_rtx if successful. */
1743
1744 static rtx
1745 alpha_emit_set_const_1 (rtx target, machine_mode mode,
1746 HOST_WIDE_INT c, int n, bool no_output)
1747 {
1748 HOST_WIDE_INT new_const;
1749 int i, bits;
1750 /* Use a pseudo if highly optimizing and still generating RTL. */
1751 rtx subtarget
1752 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1753 rtx temp, insn;
1754
1755 /* If this is a sign-extended 32-bit constant, we can do this in at most
1756 three insns, so do it if we have enough insns left. */
1757
1758 if (c >> 31 == -1 || c >> 31 == 0)
1759 {
1760 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1761 HOST_WIDE_INT tmp1 = c - low;
1762 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1763 HOST_WIDE_INT extra = 0;
1764
1765 /* If HIGH will be interpreted as negative but the constant is
1766 positive, we must adjust it to do two ldha insns. */
1767
1768 if ((high & 0x8000) != 0 && c >= 0)
1769 {
1770 extra = 0x4000;
1771 tmp1 -= 0x40000000;
1772 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1773 }
1774
1775 if (c == low || (low == 0 && extra == 0))
1776 {
1777 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1778 but that meant that we can't handle INT_MIN on 32-bit machines
1779 (like NT/Alpha), because we recurse indefinitely through
1780 emit_move_insn to gen_movdi. So instead, since we know exactly
1781 what we want, create it explicitly. */
1782
1783 if (no_output)
1784 return pc_rtx;
1785 if (target == NULL)
1786 target = gen_reg_rtx (mode);
1787 emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1788 return target;
1789 }
1790 else if (n >= 2 + (extra != 0))
1791 {
1792 if (no_output)
1793 return pc_rtx;
1794 if (!can_create_pseudo_p ())
1795 {
1796 emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1797 temp = target;
1798 }
1799 else
1800 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1801 subtarget, mode);
1802
1803 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1804 This means that if we go through expand_binop, we'll try to
1805 generate extensions, etc, which will require new pseudos, which
1806 will fail during some split phases. The SImode add patterns
1807 still exist, but are not named. So build the insns by hand. */
1808
1809 if (extra != 0)
1810 {
1811 if (! subtarget)
1812 subtarget = gen_reg_rtx (mode);
1813 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1814 insn = gen_rtx_SET (subtarget, insn);
1815 emit_insn (insn);
1816 temp = subtarget;
1817 }
1818
1819 if (target == NULL)
1820 target = gen_reg_rtx (mode);
1821 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1822 insn = gen_rtx_SET (target, insn);
1823 emit_insn (insn);
1824 return target;
1825 }
1826 }
1827
1828 /* If we couldn't do it that way, try some other methods. But if we have
1829 no instructions left, don't bother. Likewise, if this is SImode and
1830 we can't make pseudos, we can't do anything since the expand_binop
1831 and expand_unop calls will widen and try to make pseudos. */
1832
1833 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1834 return 0;
1835
1836 /* Next, see if we can load a related constant and then shift and possibly
1837 negate it to get the constant we want. Try this once each increasing
1838 numbers of insns. */
1839
1840 for (i = 1; i < n; i++)
1841 {
1842 /* First, see if minus some low bits, we've an easy load of
1843 high bits. */
1844
1845 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1846 if (new_const != 0)
1847 {
1848 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1849 if (temp)
1850 {
1851 if (no_output)
1852 return temp;
1853 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1854 target, 0, OPTAB_WIDEN);
1855 }
1856 }
1857
1858 /* Next try complementing. */
1859 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1860 if (temp)
1861 {
1862 if (no_output)
1863 return temp;
1864 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1865 }
1866
1867 /* Next try to form a constant and do a left shift. We can do this
1868 if some low-order bits are zero; the exact_log2 call below tells
1869 us that information. The bits we are shifting out could be any
1870 value, but here we'll just try the 0- and sign-extended forms of
1871 the constant. To try to increase the chance of having the same
1872 constant in more than one insn, start at the highest number of
1873 bits to shift, but try all possibilities in case a ZAPNOT will
1874 be useful. */
1875
1876 bits = exact_log2 (c & -c);
1877 if (bits > 0)
1878 for (; bits > 0; bits--)
1879 {
1880 new_const = c >> bits;
1881 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1882 if (!temp && c < 0)
1883 {
1884 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1885 temp = alpha_emit_set_const (subtarget, mode, new_const,
1886 i, no_output);
1887 }
1888 if (temp)
1889 {
1890 if (no_output)
1891 return temp;
1892 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1893 target, 0, OPTAB_WIDEN);
1894 }
1895 }
1896
1897 /* Now try high-order zero bits. Here we try the shifted-in bits as
1898 all zero and all ones. Be careful to avoid shifting outside the
1899 mode and to avoid shifting outside the host wide int size. */
1900
1901 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1902 - floor_log2 (c) - 1);
1903 if (bits > 0)
1904 for (; bits > 0; bits--)
1905 {
1906 new_const = c << bits;
1907 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1908 if (!temp)
1909 {
1910 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1911 temp = alpha_emit_set_const (subtarget, mode, new_const,
1912 i, no_output);
1913 }
1914 if (temp)
1915 {
1916 if (no_output)
1917 return temp;
1918 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1919 target, 1, OPTAB_WIDEN);
1920 }
1921 }
1922
1923 /* Now try high-order 1 bits. We get that with a sign-extension.
1924 But one bit isn't enough here. Be careful to avoid shifting outside
1925 the mode and to avoid shifting outside the host wide int size. */
1926
1927 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1928 - floor_log2 (~ c) - 2);
1929 if (bits > 0)
1930 for (; bits > 0; bits--)
1931 {
1932 new_const = c << bits;
1933 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1934 if (!temp)
1935 {
1936 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1937 temp = alpha_emit_set_const (subtarget, mode, new_const,
1938 i, no_output);
1939 }
1940 if (temp)
1941 {
1942 if (no_output)
1943 return temp;
1944 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1945 target, 0, OPTAB_WIDEN);
1946 }
1947 }
1948 }
1949
1950 /* Finally, see if can load a value into the target that is the same as the
1951 constant except that all bytes that are 0 are changed to be 0xff. If we
1952 can, then we can do a ZAPNOT to obtain the desired constant. */
1953
1954 new_const = c;
1955 for (i = 0; i < 64; i += 8)
1956 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1957 new_const |= (HOST_WIDE_INT) 0xff << i;
1958
1959 /* We are only called for SImode and DImode. If this is SImode, ensure that
1960 we are sign extended to a full word. */
1961
1962 if (mode == SImode)
1963 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1964
1965 if (new_const != c)
1966 {
1967 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1968 if (temp)
1969 {
1970 if (no_output)
1971 return temp;
1972 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1973 target, 0, OPTAB_WIDEN);
1974 }
1975 }
1976
1977 return 0;
1978 }
1979
1980 /* Try to output insns to set TARGET equal to the constant C if it can be
1981 done in less than N insns. Do all computations in MODE. Returns the place
1982 where the output has been placed if it can be done and the insns have been
1983 emitted. If it would take more than N insns, zero is returned and no
1984 insns and emitted. */
1985
1986 static rtx
1987 alpha_emit_set_const (rtx target, machine_mode mode,
1988 HOST_WIDE_INT c, int n, bool no_output)
1989 {
1990 machine_mode orig_mode = mode;
1991 rtx orig_target = target;
1992 rtx result = 0;
1993 int i;
1994
1995 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1996 can't load this constant in one insn, do this in DImode. */
1997 if (!can_create_pseudo_p () && mode == SImode
1998 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1999 {
2000 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2001 if (result)
2002 return result;
2003
2004 target = no_output ? NULL : gen_lowpart (DImode, target);
2005 mode = DImode;
2006 }
2007 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2008 {
2009 target = no_output ? NULL : gen_lowpart (DImode, target);
2010 mode = DImode;
2011 }
2012
2013 /* Try 1 insn, then 2, then up to N. */
2014 for (i = 1; i <= n; i++)
2015 {
2016 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2017 if (result)
2018 {
2019 rtx_insn *insn;
2020 rtx set;
2021
2022 if (no_output)
2023 return result;
2024
2025 insn = get_last_insn ();
2026 set = single_set (insn);
2027 if (! CONSTANT_P (SET_SRC (set)))
2028 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2029 break;
2030 }
2031 }
2032
2033 /* Allow for the case where we changed the mode of TARGET. */
2034 if (result)
2035 {
2036 if (result == target)
2037 result = orig_target;
2038 else if (mode != orig_mode)
2039 result = gen_lowpart (orig_mode, result);
2040 }
2041
2042 return result;
2043 }
2044
2045 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2046 fall back to a straight forward decomposition. We do this to avoid
2047 exponential run times encountered when looking for longer sequences
2048 with alpha_emit_set_const. */
2049
2050 static rtx
2051 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2052 {
2053 HOST_WIDE_INT d1, d2, d3, d4;
2054
2055 /* Decompose the entire word */
2056
2057 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2058 c1 -= d1;
2059 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2060 c1 = (c1 - d2) >> 32;
2061 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2062 c1 -= d3;
2063 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2064 gcc_assert (c1 == d4);
2065
2066 /* Construct the high word */
2067 if (d4)
2068 {
2069 emit_move_insn (target, GEN_INT (d4));
2070 if (d3)
2071 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2072 }
2073 else
2074 emit_move_insn (target, GEN_INT (d3));
2075
2076 /* Shift it into place */
2077 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2078
2079 /* Add in the low bits. */
2080 if (d2)
2081 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2082 if (d1)
2083 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2084
2085 return target;
2086 }
2087
2088 /* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */
2089
2090 static HOST_WIDE_INT
2091 alpha_extract_integer (rtx x)
2092 {
2093 if (GET_CODE (x) == CONST_VECTOR)
2094 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2095
2096 gcc_assert (CONST_INT_P (x));
2097
2098 return INTVAL (x);
2099 }
2100
2101 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2102 we are willing to load the value into a register via a move pattern.
2103 Normally this is all symbolic constants, integral constants that
2104 take three or fewer instructions, and floating-point zero. */
2105
2106 bool
2107 alpha_legitimate_constant_p (machine_mode mode, rtx x)
2108 {
2109 HOST_WIDE_INT i0;
2110
2111 switch (GET_CODE (x))
2112 {
2113 case LABEL_REF:
2114 case HIGH:
2115 return true;
2116
2117 case CONST:
2118 if (GET_CODE (XEXP (x, 0)) == PLUS
2119 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2120 x = XEXP (XEXP (x, 0), 0);
2121 else
2122 return true;
2123
2124 if (GET_CODE (x) != SYMBOL_REF)
2125 return true;
2126 /* FALLTHRU */
2127
2128 case SYMBOL_REF:
2129 /* TLS symbols are never valid. */
2130 return SYMBOL_REF_TLS_MODEL (x) == 0;
2131
2132 case CONST_WIDE_INT:
2133 if (TARGET_BUILD_CONSTANTS)
2134 return true;
2135 if (x == CONST0_RTX (mode))
2136 return true;
2137 mode = DImode;
2138 gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2139 i0 = CONST_WIDE_INT_ELT (x, 1);
2140 if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2141 return false;
2142 i0 = CONST_WIDE_INT_ELT (x, 0);
2143 goto do_integer;
2144
2145 case CONST_DOUBLE:
2146 if (x == CONST0_RTX (mode))
2147 return true;
2148 return false;
2149
2150 case CONST_VECTOR:
2151 if (x == CONST0_RTX (mode))
2152 return true;
2153 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2154 return false;
2155 if (GET_MODE_SIZE (mode) != 8)
2156 return false;
2157 /* FALLTHRU */
2158
2159 case CONST_INT:
2160 if (TARGET_BUILD_CONSTANTS)
2161 return true;
2162 i0 = alpha_extract_integer (x);
2163 do_integer:
2164 return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2165
2166 default:
2167 return false;
2168 }
2169 }
2170
2171 /* Operand 1 is known to be a constant, and should require more than one
2172 instruction to load. Emit that multi-part load. */
2173
2174 bool
2175 alpha_split_const_mov (machine_mode mode, rtx *operands)
2176 {
2177 HOST_WIDE_INT i0;
2178 rtx temp = NULL_RTX;
2179
2180 i0 = alpha_extract_integer (operands[1]);
2181
2182 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2183
2184 if (!temp && TARGET_BUILD_CONSTANTS)
2185 temp = alpha_emit_set_long_const (operands[0], i0);
2186
2187 if (temp)
2188 {
2189 if (!rtx_equal_p (operands[0], temp))
2190 emit_move_insn (operands[0], temp);
2191 return true;
2192 }
2193
2194 return false;
2195 }
2196
2197 /* Expand a move instruction; return true if all work is done.
2198 We don't handle non-bwx subword loads here. */
2199
2200 bool
2201 alpha_expand_mov (machine_mode mode, rtx *operands)
2202 {
2203 rtx tmp;
2204
2205 /* If the output is not a register, the input must be. */
2206 if (MEM_P (operands[0])
2207 && ! reg_or_0_operand (operands[1], mode))
2208 operands[1] = force_reg (mode, operands[1]);
2209
2210 /* Allow legitimize_address to perform some simplifications. */
2211 if (mode == Pmode && symbolic_operand (operands[1], mode))
2212 {
2213 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2214 if (tmp)
2215 {
2216 if (tmp == operands[0])
2217 return true;
2218 operands[1] = tmp;
2219 return false;
2220 }
2221 }
2222
2223 /* Early out for non-constants and valid constants. */
2224 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2225 return false;
2226
2227 /* Split large integers. */
2228 if (CONST_INT_P (operands[1])
2229 || GET_CODE (operands[1]) == CONST_VECTOR)
2230 {
2231 if (alpha_split_const_mov (mode, operands))
2232 return true;
2233 }
2234
2235 /* Otherwise we've nothing left but to drop the thing to memory. */
2236 tmp = force_const_mem (mode, operands[1]);
2237
2238 if (tmp == NULL_RTX)
2239 return false;
2240
2241 if (reload_in_progress)
2242 {
2243 emit_move_insn (operands[0], XEXP (tmp, 0));
2244 operands[1] = replace_equiv_address (tmp, operands[0]);
2245 }
2246 else
2247 operands[1] = validize_mem (tmp);
2248 return false;
2249 }
2250
2251 /* Expand a non-bwx QImode or HImode move instruction;
2252 return true if all work is done. */
2253
2254 bool
2255 alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2256 {
2257 rtx seq;
2258
2259 /* If the output is not a register, the input must be. */
2260 if (MEM_P (operands[0]))
2261 operands[1] = force_reg (mode, operands[1]);
2262
2263 /* Handle four memory cases, unaligned and aligned for either the input
2264 or the output. The only case where we can be called during reload is
2265 for aligned loads; all other cases require temporaries. */
2266
2267 if (any_memory_operand (operands[1], mode))
2268 {
2269 if (aligned_memory_operand (operands[1], mode))
2270 {
2271 if (reload_in_progress)
2272 {
2273 if (mode == QImode)
2274 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2275 else
2276 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2277 emit_insn (seq);
2278 }
2279 else
2280 {
2281 rtx aligned_mem, bitnum;
2282 rtx scratch = gen_reg_rtx (SImode);
2283 rtx subtarget;
2284 bool copyout;
2285
2286 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2287
2288 subtarget = operands[0];
2289 if (REG_P (subtarget))
2290 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2291 else
2292 subtarget = gen_reg_rtx (DImode), copyout = true;
2293
2294 if (mode == QImode)
2295 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2296 bitnum, scratch);
2297 else
2298 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2299 bitnum, scratch);
2300 emit_insn (seq);
2301
2302 if (copyout)
2303 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2304 }
2305 }
2306 else
2307 {
2308 /* Don't pass these as parameters since that makes the generated
2309 code depend on parameter evaluation order which will cause
2310 bootstrap failures. */
2311
2312 rtx temp1, temp2, subtarget, ua;
2313 bool copyout;
2314
2315 temp1 = gen_reg_rtx (DImode);
2316 temp2 = gen_reg_rtx (DImode);
2317
2318 subtarget = operands[0];
2319 if (REG_P (subtarget))
2320 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2321 else
2322 subtarget = gen_reg_rtx (DImode), copyout = true;
2323
2324 ua = get_unaligned_address (operands[1]);
2325 if (mode == QImode)
2326 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2327 else
2328 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2329
2330 alpha_set_memflags (seq, operands[1]);
2331 emit_insn (seq);
2332
2333 if (copyout)
2334 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2335 }
2336 return true;
2337 }
2338
2339 if (any_memory_operand (operands[0], mode))
2340 {
2341 if (aligned_memory_operand (operands[0], mode))
2342 {
2343 rtx aligned_mem, bitnum;
2344 rtx temp1 = gen_reg_rtx (SImode);
2345 rtx temp2 = gen_reg_rtx (SImode);
2346
2347 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2348
2349 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2350 temp1, temp2));
2351 }
2352 else
2353 {
2354 rtx temp1 = gen_reg_rtx (DImode);
2355 rtx temp2 = gen_reg_rtx (DImode);
2356 rtx temp3 = gen_reg_rtx (DImode);
2357 rtx ua = get_unaligned_address (operands[0]);
2358
2359 if (mode == QImode)
2360 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2361 else
2362 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2363
2364 alpha_set_memflags (seq, operands[0]);
2365 emit_insn (seq);
2366 }
2367 return true;
2368 }
2369
2370 return false;
2371 }
2372
2373 /* Implement the movmisalign patterns. One of the operands is a memory
2374 that is not naturally aligned. Emit instructions to load it. */
2375
2376 void
2377 alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2378 {
2379 /* Honor misaligned loads, for those we promised to do so. */
2380 if (MEM_P (operands[1]))
2381 {
2382 rtx tmp;
2383
2384 if (register_operand (operands[0], mode))
2385 tmp = operands[0];
2386 else
2387 tmp = gen_reg_rtx (mode);
2388
2389 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2390 if (tmp != operands[0])
2391 emit_move_insn (operands[0], tmp);
2392 }
2393 else if (MEM_P (operands[0]))
2394 {
2395 if (!reg_or_0_operand (operands[1], mode))
2396 operands[1] = force_reg (mode, operands[1]);
2397 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2398 }
2399 else
2400 gcc_unreachable ();
2401 }
2402
2403 /* Generate an unsigned DImode to FP conversion. This is the same code
2404 optabs would emit if we didn't have TFmode patterns.
2405
2406 For SFmode, this is the only construction I've found that can pass
2407 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2408 intermediates will work, because you'll get intermediate rounding
2409 that ruins the end result. Some of this could be fixed by turning
2410 on round-to-positive-infinity, but that requires diddling the fpsr,
2411 which kills performance. I tried turning this around and converting
2412 to a negative number, so that I could turn on /m, but either I did
2413 it wrong or there's something else cause I wound up with the exact
2414 same single-bit error. There is a branch-less form of this same code:
2415
2416 srl $16,1,$1
2417 and $16,1,$2
2418 cmplt $16,0,$3
2419 or $1,$2,$2
2420 cmovge $16,$16,$2
2421 itoft $3,$f10
2422 itoft $2,$f11
2423 cvtqs $f11,$f11
2424 adds $f11,$f11,$f0
2425 fcmoveq $f10,$f11,$f0
2426
2427 I'm not using it because it's the same number of instructions as
2428 this branch-full form, and it has more serialized long latency
2429 instructions on the critical path.
2430
2431 For DFmode, we can avoid rounding errors by breaking up the word
2432 into two pieces, converting them separately, and adding them back:
2433
2434 LC0: .long 0,0x5f800000
2435
2436 itoft $16,$f11
2437 lda $2,LC0
2438 cmplt $16,0,$1
2439 cpyse $f11,$f31,$f10
2440 cpyse $f31,$f11,$f11
2441 s4addq $1,$2,$1
2442 lds $f12,0($1)
2443 cvtqt $f10,$f10
2444 cvtqt $f11,$f11
2445 addt $f12,$f10,$f0
2446 addt $f0,$f11,$f0
2447
2448 This doesn't seem to be a clear-cut win over the optabs form.
2449 It probably all depends on the distribution of numbers being
2450 converted -- in the optabs form, all but high-bit-set has a
2451 much lower minimum execution time. */
2452
2453 void
2454 alpha_emit_floatuns (rtx operands[2])
2455 {
2456 rtx neglab, donelab, i0, i1, f0, in, out;
2457 machine_mode mode;
2458
2459 out = operands[0];
2460 in = force_reg (DImode, operands[1]);
2461 mode = GET_MODE (out);
2462 neglab = gen_label_rtx ();
2463 donelab = gen_label_rtx ();
2464 i0 = gen_reg_rtx (DImode);
2465 i1 = gen_reg_rtx (DImode);
2466 f0 = gen_reg_rtx (mode);
2467
2468 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2469
2470 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2471 emit_jump_insn (gen_jump (donelab));
2472 emit_barrier ();
2473
2474 emit_label (neglab);
2475
2476 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2477 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2478 emit_insn (gen_iordi3 (i0, i0, i1));
2479 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2480 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2481
2482 emit_label (donelab);
2483 }
2484
2485 /* Generate the comparison for a conditional branch. */
2486
2487 void
2488 alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2489 {
2490 enum rtx_code cmp_code, branch_code;
2491 machine_mode branch_mode = VOIDmode;
2492 enum rtx_code code = GET_CODE (operands[0]);
2493 rtx op0 = operands[1], op1 = operands[2];
2494 rtx tem;
2495
2496 if (cmp_mode == TFmode)
2497 {
2498 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2499 op1 = const0_rtx;
2500 cmp_mode = DImode;
2501 }
2502
2503 /* The general case: fold the comparison code to the types of compares
2504 that we have, choosing the branch as necessary. */
2505 switch (code)
2506 {
2507 case EQ: case LE: case LT: case LEU: case LTU:
2508 case UNORDERED:
2509 /* We have these compares. */
2510 cmp_code = code, branch_code = NE;
2511 break;
2512
2513 case NE:
2514 case ORDERED:
2515 /* These must be reversed. */
2516 cmp_code = reverse_condition (code), branch_code = EQ;
2517 break;
2518
2519 case GE: case GT: case GEU: case GTU:
2520 /* For FP, we swap them, for INT, we reverse them. */
2521 if (cmp_mode == DFmode)
2522 {
2523 cmp_code = swap_condition (code);
2524 branch_code = NE;
2525 std::swap (op0, op1);
2526 }
2527 else
2528 {
2529 cmp_code = reverse_condition (code);
2530 branch_code = EQ;
2531 }
2532 break;
2533
2534 default:
2535 gcc_unreachable ();
2536 }
2537
2538 if (cmp_mode == DFmode)
2539 {
2540 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2541 {
2542 /* When we are not as concerned about non-finite values, and we
2543 are comparing against zero, we can branch directly. */
2544 if (op1 == CONST0_RTX (DFmode))
2545 cmp_code = UNKNOWN, branch_code = code;
2546 else if (op0 == CONST0_RTX (DFmode))
2547 {
2548 /* Undo the swap we probably did just above. */
2549 std::swap (op0, op1);
2550 branch_code = swap_condition (cmp_code);
2551 cmp_code = UNKNOWN;
2552 }
2553 }
2554 else
2555 {
2556 /* ??? We mark the branch mode to be CCmode to prevent the
2557 compare and branch from being combined, since the compare
2558 insn follows IEEE rules that the branch does not. */
2559 branch_mode = CCmode;
2560 }
2561 }
2562 else
2563 {
2564 /* The following optimizations are only for signed compares. */
2565 if (code != LEU && code != LTU && code != GEU && code != GTU)
2566 {
2567 /* Whee. Compare and branch against 0 directly. */
2568 if (op1 == const0_rtx)
2569 cmp_code = UNKNOWN, branch_code = code;
2570
2571 /* If the constants doesn't fit into an immediate, but can
2572 be generated by lda/ldah, we adjust the argument and
2573 compare against zero, so we can use beq/bne directly. */
2574 /* ??? Don't do this when comparing against symbols, otherwise
2575 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2576 be declared false out of hand (at least for non-weak). */
2577 else if (CONST_INT_P (op1)
2578 && (code == EQ || code == NE)
2579 && !(symbolic_operand (op0, VOIDmode)
2580 || (REG_P (op0) && REG_POINTER (op0))))
2581 {
2582 rtx n_op1 = GEN_INT (-INTVAL (op1));
2583
2584 if (! satisfies_constraint_I (op1)
2585 && (satisfies_constraint_K (n_op1)
2586 || satisfies_constraint_L (n_op1)))
2587 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2588 }
2589 }
2590
2591 if (!reg_or_0_operand (op0, DImode))
2592 op0 = force_reg (DImode, op0);
2593 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2594 op1 = force_reg (DImode, op1);
2595 }
2596
2597 /* Emit an initial compare instruction, if necessary. */
2598 tem = op0;
2599 if (cmp_code != UNKNOWN)
2600 {
2601 tem = gen_reg_rtx (cmp_mode);
2602 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2603 }
2604
2605 /* Emit the branch instruction. */
2606 tem = gen_rtx_SET (pc_rtx,
2607 gen_rtx_IF_THEN_ELSE (VOIDmode,
2608 gen_rtx_fmt_ee (branch_code,
2609 branch_mode, tem,
2610 CONST0_RTX (cmp_mode)),
2611 gen_rtx_LABEL_REF (VOIDmode,
2612 operands[3]),
2613 pc_rtx));
2614 emit_jump_insn (tem);
2615 }
2616
2617 /* Certain simplifications can be done to make invalid setcc operations
2618 valid. Return the final comparison, or NULL if we can't work. */
2619
2620 bool
2621 alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2622 {
2623 enum rtx_code cmp_code;
2624 enum rtx_code code = GET_CODE (operands[1]);
2625 rtx op0 = operands[2], op1 = operands[3];
2626 rtx tmp;
2627
2628 if (cmp_mode == TFmode)
2629 {
2630 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2631 op1 = const0_rtx;
2632 cmp_mode = DImode;
2633 }
2634
2635 if (cmp_mode == DFmode && !TARGET_FIX)
2636 return 0;
2637
2638 /* The general case: fold the comparison code to the types of compares
2639 that we have, choosing the branch as necessary. */
2640
2641 cmp_code = UNKNOWN;
2642 switch (code)
2643 {
2644 case EQ: case LE: case LT: case LEU: case LTU:
2645 case UNORDERED:
2646 /* We have these compares. */
2647 if (cmp_mode == DFmode)
2648 cmp_code = code, code = NE;
2649 break;
2650
2651 case NE:
2652 if (cmp_mode == DImode && op1 == const0_rtx)
2653 break;
2654 /* FALLTHRU */
2655
2656 case ORDERED:
2657 cmp_code = reverse_condition (code);
2658 code = EQ;
2659 break;
2660
2661 case GE: case GT: case GEU: case GTU:
2662 /* These normally need swapping, but for integer zero we have
2663 special patterns that recognize swapped operands. */
2664 if (cmp_mode == DImode && op1 == const0_rtx)
2665 break;
2666 code = swap_condition (code);
2667 if (cmp_mode == DFmode)
2668 cmp_code = code, code = NE;
2669 std::swap (op0, op1);
2670 break;
2671
2672 default:
2673 gcc_unreachable ();
2674 }
2675
2676 if (cmp_mode == DImode)
2677 {
2678 if (!register_operand (op0, DImode))
2679 op0 = force_reg (DImode, op0);
2680 if (!reg_or_8bit_operand (op1, DImode))
2681 op1 = force_reg (DImode, op1);
2682 }
2683
2684 /* Emit an initial compare instruction, if necessary. */
2685 if (cmp_code != UNKNOWN)
2686 {
2687 tmp = gen_reg_rtx (cmp_mode);
2688 emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2689 op0, op1)));
2690
2691 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2692 op1 = const0_rtx;
2693 }
2694
2695 /* Emit the setcc instruction. */
2696 emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2697 op0, op1)));
2698 return true;
2699 }
2700
2701
2702 /* Rewrite a comparison against zero CMP of the form
2703 (CODE (cc0) (const_int 0)) so it can be written validly in
2704 a conditional move (if_then_else CMP ...).
2705 If both of the operands that set cc0 are nonzero we must emit
2706 an insn to perform the compare (it can't be done within
2707 the conditional move). */
2708
2709 rtx
2710 alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2711 {
2712 enum rtx_code code = GET_CODE (cmp);
2713 enum rtx_code cmov_code = NE;
2714 rtx op0 = XEXP (cmp, 0);
2715 rtx op1 = XEXP (cmp, 1);
2716 machine_mode cmp_mode
2717 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2718 machine_mode cmov_mode = VOIDmode;
2719 int local_fast_math = flag_unsafe_math_optimizations;
2720 rtx tem;
2721
2722 if (cmp_mode == TFmode)
2723 {
2724 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2725 op1 = const0_rtx;
2726 cmp_mode = DImode;
2727 }
2728
2729 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2730
2731 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2732 {
2733 enum rtx_code cmp_code;
2734
2735 if (! TARGET_FIX)
2736 return 0;
2737
2738 /* If we have fp<->int register move instructions, do a cmov by
2739 performing the comparison in fp registers, and move the
2740 zero/nonzero value to integer registers, where we can then
2741 use a normal cmov, or vice-versa. */
2742
2743 switch (code)
2744 {
2745 case EQ: case LE: case LT: case LEU: case LTU:
2746 case UNORDERED:
2747 /* We have these compares. */
2748 cmp_code = code, code = NE;
2749 break;
2750
2751 case NE:
2752 case ORDERED:
2753 /* These must be reversed. */
2754 cmp_code = reverse_condition (code), code = EQ;
2755 break;
2756
2757 case GE: case GT: case GEU: case GTU:
2758 /* These normally need swapping, but for integer zero we have
2759 special patterns that recognize swapped operands. */
2760 if (cmp_mode == DImode && op1 == const0_rtx)
2761 cmp_code = code, code = NE;
2762 else
2763 {
2764 cmp_code = swap_condition (code);
2765 code = NE;
2766 std::swap (op0, op1);
2767 }
2768 break;
2769
2770 default:
2771 gcc_unreachable ();
2772 }
2773
2774 if (cmp_mode == DImode)
2775 {
2776 if (!reg_or_0_operand (op0, DImode))
2777 op0 = force_reg (DImode, op0);
2778 if (!reg_or_8bit_operand (op1, DImode))
2779 op1 = force_reg (DImode, op1);
2780 }
2781
2782 tem = gen_reg_rtx (cmp_mode);
2783 emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2784 op0, op1)));
2785
2786 cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
2787 op0 = gen_lowpart (cmp_mode, tem);
2788 op1 = CONST0_RTX (cmp_mode);
2789 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2790 local_fast_math = 1;
2791 }
2792
2793 if (cmp_mode == DImode)
2794 {
2795 if (!reg_or_0_operand (op0, DImode))
2796 op0 = force_reg (DImode, op0);
2797 if (!reg_or_8bit_operand (op1, DImode))
2798 op1 = force_reg (DImode, op1);
2799 }
2800
2801 /* We may be able to use a conditional move directly.
2802 This avoids emitting spurious compares. */
2803 if (signed_comparison_operator (cmp, VOIDmode)
2804 && (cmp_mode == DImode || local_fast_math)
2805 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2806 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2807
2808 /* We can't put the comparison inside the conditional move;
2809 emit a compare instruction and put that inside the
2810 conditional move. Make sure we emit only comparisons we have;
2811 swap or reverse as necessary. */
2812
2813 if (!can_create_pseudo_p ())
2814 return NULL_RTX;
2815
2816 switch (code)
2817 {
2818 case EQ: case LE: case LT: case LEU: case LTU:
2819 case UNORDERED:
2820 /* We have these compares: */
2821 break;
2822
2823 case NE:
2824 case ORDERED:
2825 /* These must be reversed. */
2826 code = reverse_condition (code);
2827 cmov_code = EQ;
2828 break;
2829
2830 case GE: case GT: case GEU: case GTU:
2831 /* These normally need swapping, but for integer zero we have
2832 special patterns that recognize swapped operands. */
2833 if (cmp_mode == DImode && op1 == const0_rtx)
2834 break;
2835 code = swap_condition (code);
2836 std::swap (op0, op1);
2837 break;
2838
2839 default:
2840 gcc_unreachable ();
2841 }
2842
2843 if (cmp_mode == DImode)
2844 {
2845 if (!reg_or_0_operand (op0, DImode))
2846 op0 = force_reg (DImode, op0);
2847 if (!reg_or_8bit_operand (op1, DImode))
2848 op1 = force_reg (DImode, op1);
2849 }
2850
2851 /* ??? We mark the branch mode to be CCmode to prevent the compare
2852 and cmov from being combined, since the compare insn follows IEEE
2853 rules that the cmov does not. */
2854 if (cmp_mode == DFmode && !local_fast_math)
2855 cmov_mode = CCmode;
2856
2857 tem = gen_reg_rtx (cmp_mode);
2858 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2859 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2860 }
2861
2862 /* Simplify a conditional move of two constants into a setcc with
2863 arithmetic. This is done with a splitter since combine would
2864 just undo the work if done during code generation. It also catches
2865 cases we wouldn't have before cse. */
2866
2867 int
2868 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2869 rtx t_rtx, rtx f_rtx)
2870 {
2871 HOST_WIDE_INT t, f, diff;
2872 machine_mode mode;
2873 rtx target, subtarget, tmp;
2874
2875 mode = GET_MODE (dest);
2876 t = INTVAL (t_rtx);
2877 f = INTVAL (f_rtx);
2878 diff = t - f;
2879
2880 if (((code == NE || code == EQ) && diff < 0)
2881 || (code == GE || code == GT))
2882 {
2883 code = reverse_condition (code);
2884 diff = t, t = f, f = diff;
2885 diff = t - f;
2886 }
2887
2888 subtarget = target = dest;
2889 if (mode != DImode)
2890 {
2891 target = gen_lowpart (DImode, dest);
2892 if (can_create_pseudo_p ())
2893 subtarget = gen_reg_rtx (DImode);
2894 else
2895 subtarget = target;
2896 }
2897 /* Below, we must be careful to use copy_rtx on target and subtarget
2898 in intermediate insns, as they may be a subreg rtx, which may not
2899 be shared. */
2900
2901 if (f == 0 && exact_log2 (diff) > 0
2902 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2903 viable over a longer latency cmove. On EV5, the E0 slot is a
2904 scarce resource, and on EV4 shift has the same latency as a cmove. */
2905 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2906 {
2907 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2908 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2909
2910 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2911 GEN_INT (exact_log2 (t)));
2912 emit_insn (gen_rtx_SET (target, tmp));
2913 }
2914 else if (f == 0 && t == -1)
2915 {
2916 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2917 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2918
2919 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2920 }
2921 else if (diff == 1 || diff == 4 || diff == 8)
2922 {
2923 rtx add_op;
2924
2925 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2926 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2927
2928 if (diff == 1)
2929 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2930 else
2931 {
2932 add_op = GEN_INT (f);
2933 if (sext_add_operand (add_op, mode))
2934 {
2935 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2936 GEN_INT (diff));
2937 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2938 emit_insn (gen_rtx_SET (target, tmp));
2939 }
2940 else
2941 return 0;
2942 }
2943 }
2944 else
2945 return 0;
2946
2947 return 1;
2948 }
2949 \f
2950 /* Look up the function X_floating library function name for the
2951 given operation. */
2952
2953 struct GTY(()) xfloating_op
2954 {
2955 const enum rtx_code code;
2956 const char *const GTY((skip)) osf_func;
2957 const char *const GTY((skip)) vms_func;
2958 rtx libcall;
2959 };
2960
2961 static GTY(()) struct xfloating_op xfloating_ops[] =
2962 {
2963 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2964 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2965 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2966 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2967 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2968 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2969 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2970 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2971 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2972 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2973 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2974 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2975 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2976 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2977 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2978 };
2979
2980 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2981 {
2982 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2983 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2984 };
2985
2986 static rtx
2987 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2988 {
2989 struct xfloating_op *ops = xfloating_ops;
2990 long n = ARRAY_SIZE (xfloating_ops);
2991 long i;
2992
2993 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2994
2995 /* How irritating. Nothing to key off for the main table. */
2996 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2997 {
2998 ops = vax_cvt_ops;
2999 n = ARRAY_SIZE (vax_cvt_ops);
3000 }
3001
3002 for (i = 0; i < n; ++i, ++ops)
3003 if (ops->code == code)
3004 {
3005 rtx func = ops->libcall;
3006 if (!func)
3007 {
3008 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3009 ? ops->vms_func : ops->osf_func);
3010 ops->libcall = func;
3011 }
3012 return func;
3013 }
3014
3015 gcc_unreachable ();
3016 }
3017
3018 /* Most X_floating operations take the rounding mode as an argument.
3019 Compute that here. */
3020
3021 static int
3022 alpha_compute_xfloating_mode_arg (enum rtx_code code,
3023 enum alpha_fp_rounding_mode round)
3024 {
3025 int mode;
3026
3027 switch (round)
3028 {
3029 case ALPHA_FPRM_NORM:
3030 mode = 2;
3031 break;
3032 case ALPHA_FPRM_MINF:
3033 mode = 1;
3034 break;
3035 case ALPHA_FPRM_CHOP:
3036 mode = 0;
3037 break;
3038 case ALPHA_FPRM_DYN:
3039 mode = 4;
3040 break;
3041 default:
3042 gcc_unreachable ();
3043
3044 /* XXX For reference, round to +inf is mode = 3. */
3045 }
3046
3047 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3048 mode |= 0x10000;
3049
3050 return mode;
3051 }
3052
3053 /* Emit an X_floating library function call.
3054
3055 Note that these functions do not follow normal calling conventions:
3056 TFmode arguments are passed in two integer registers (as opposed to
3057 indirect); TFmode return values appear in R16+R17.
3058
3059 FUNC is the function to call.
3060 TARGET is where the output belongs.
3061 OPERANDS are the inputs.
3062 NOPERANDS is the count of inputs.
3063 EQUIV is the expression equivalent for the function.
3064 */
3065
3066 static void
3067 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3068 int noperands, rtx equiv)
3069 {
3070 rtx usage = NULL_RTX, reg;
3071 int regno = 16, i;
3072
3073 start_sequence ();
3074
3075 for (i = 0; i < noperands; ++i)
3076 {
3077 switch (GET_MODE (operands[i]))
3078 {
3079 case E_TFmode:
3080 reg = gen_rtx_REG (TFmode, regno);
3081 regno += 2;
3082 break;
3083
3084 case E_DFmode:
3085 reg = gen_rtx_REG (DFmode, regno + 32);
3086 regno += 1;
3087 break;
3088
3089 case E_VOIDmode:
3090 gcc_assert (CONST_INT_P (operands[i]));
3091 /* FALLTHRU */
3092 case E_DImode:
3093 reg = gen_rtx_REG (DImode, regno);
3094 regno += 1;
3095 break;
3096
3097 default:
3098 gcc_unreachable ();
3099 }
3100
3101 emit_move_insn (reg, operands[i]);
3102 use_reg (&usage, reg);
3103 }
3104
3105 switch (GET_MODE (target))
3106 {
3107 case E_TFmode:
3108 reg = gen_rtx_REG (TFmode, 16);
3109 break;
3110 case E_DFmode:
3111 reg = gen_rtx_REG (DFmode, 32);
3112 break;
3113 case E_DImode:
3114 reg = gen_rtx_REG (DImode, 0);
3115 break;
3116 default:
3117 gcc_unreachable ();
3118 }
3119
3120 rtx mem = gen_rtx_MEM (QImode, func);
3121 rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
3122 const0_rtx, const0_rtx));
3123 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3124 RTL_CONST_CALL_P (tmp) = 1;
3125
3126 tmp = get_insns ();
3127 end_sequence ();
3128
3129 emit_libcall_block (tmp, target, reg, equiv);
3130 }
3131
3132 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3133
3134 void
3135 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3136 {
3137 rtx func;
3138 int mode;
3139 rtx out_operands[3];
3140
3141 func = alpha_lookup_xfloating_lib_func (code);
3142 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3143
3144 out_operands[0] = operands[1];
3145 out_operands[1] = operands[2];
3146 out_operands[2] = GEN_INT (mode);
3147 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3148 gen_rtx_fmt_ee (code, TFmode, operands[1],
3149 operands[2]));
3150 }
3151
3152 /* Emit an X_floating library function call for a comparison. */
3153
3154 static rtx
3155 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3156 {
3157 enum rtx_code cmp_code, res_code;
3158 rtx func, out, operands[2], note;
3159
3160 /* X_floating library comparison functions return
3161 -1 unordered
3162 0 false
3163 1 true
3164 Convert the compare against the raw return value. */
3165
3166 cmp_code = *pcode;
3167 switch (cmp_code)
3168 {
3169 case UNORDERED:
3170 cmp_code = EQ;
3171 res_code = LT;
3172 break;
3173 case ORDERED:
3174 cmp_code = EQ;
3175 res_code = GE;
3176 break;
3177 case NE:
3178 res_code = NE;
3179 break;
3180 case EQ:
3181 case LT:
3182 case GT:
3183 case LE:
3184 case GE:
3185 res_code = GT;
3186 break;
3187 default:
3188 gcc_unreachable ();
3189 }
3190 *pcode = res_code;
3191
3192 func = alpha_lookup_xfloating_lib_func (cmp_code);
3193
3194 operands[0] = op0;
3195 operands[1] = op1;
3196 out = gen_reg_rtx (DImode);
3197
3198 /* What's actually returned is -1,0,1, not a proper boolean value. */
3199 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3200 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3201 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3202
3203 return out;
3204 }
3205
3206 /* Emit an X_floating library function call for a conversion. */
3207
3208 void
3209 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3210 {
3211 int noperands = 1, mode;
3212 rtx out_operands[2];
3213 rtx func;
3214 enum rtx_code code = orig_code;
3215
3216 if (code == UNSIGNED_FIX)
3217 code = FIX;
3218
3219 func = alpha_lookup_xfloating_lib_func (code);
3220
3221 out_operands[0] = operands[1];
3222
3223 switch (code)
3224 {
3225 case FIX:
3226 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3227 out_operands[1] = GEN_INT (mode);
3228 noperands = 2;
3229 break;
3230 case FLOAT_TRUNCATE:
3231 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3232 out_operands[1] = GEN_INT (mode);
3233 noperands = 2;
3234 break;
3235 default:
3236 break;
3237 }
3238
3239 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3240 gen_rtx_fmt_e (orig_code,
3241 GET_MODE (operands[0]),
3242 operands[1]));
3243 }
3244
3245 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3246 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3247 guarantee that the sequence
3248 set (OP[0] OP[2])
3249 set (OP[1] OP[3])
3250 is valid. Naturally, output operand ordering is little-endian.
3251 This is used by *movtf_internal and *movti_internal. */
3252
3253 void
3254 alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3255 bool fixup_overlap)
3256 {
3257 switch (GET_CODE (operands[1]))
3258 {
3259 case REG:
3260 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3261 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3262 break;
3263
3264 case MEM:
3265 operands[3] = adjust_address (operands[1], DImode, 8);
3266 operands[2] = adjust_address (operands[1], DImode, 0);
3267 break;
3268
3269 CASE_CONST_SCALAR_INT:
3270 case CONST_DOUBLE:
3271 gcc_assert (operands[1] == CONST0_RTX (mode));
3272 operands[2] = operands[3] = const0_rtx;
3273 break;
3274
3275 default:
3276 gcc_unreachable ();
3277 }
3278
3279 switch (GET_CODE (operands[0]))
3280 {
3281 case REG:
3282 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3283 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3284 break;
3285
3286 case MEM:
3287 operands[1] = adjust_address (operands[0], DImode, 8);
3288 operands[0] = adjust_address (operands[0], DImode, 0);
3289 break;
3290
3291 default:
3292 gcc_unreachable ();
3293 }
3294
3295 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3296 {
3297 std::swap (operands[0], operands[1]);
3298 std::swap (operands[2], operands[3]);
3299 }
3300 }
3301
3302 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3303 op2 is a register containing the sign bit, operation is the
3304 logical operation to be performed. */
3305
3306 void
3307 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3308 {
3309 rtx high_bit = operands[2];
3310 rtx scratch;
3311 int move;
3312
3313 alpha_split_tmode_pair (operands, TFmode, false);
3314
3315 /* Detect three flavors of operand overlap. */
3316 move = 1;
3317 if (rtx_equal_p (operands[0], operands[2]))
3318 move = 0;
3319 else if (rtx_equal_p (operands[1], operands[2]))
3320 {
3321 if (rtx_equal_p (operands[0], high_bit))
3322 move = 2;
3323 else
3324 move = -1;
3325 }
3326
3327 if (move < 0)
3328 emit_move_insn (operands[0], operands[2]);
3329
3330 /* ??? If the destination overlaps both source tf and high_bit, then
3331 assume source tf is dead in its entirety and use the other half
3332 for a scratch register. Otherwise "scratch" is just the proper
3333 destination register. */
3334 scratch = operands[move < 2 ? 1 : 3];
3335
3336 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3337
3338 if (move > 0)
3339 {
3340 emit_move_insn (operands[0], operands[2]);
3341 if (move > 1)
3342 emit_move_insn (operands[1], scratch);
3343 }
3344 }
3345 \f
3346 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3347 unaligned data:
3348
3349 unsigned: signed:
3350 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3351 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3352 lda r3,X(r11) lda r3,X+2(r11)
3353 extwl r1,r3,r1 extql r1,r3,r1
3354 extwh r2,r3,r2 extqh r2,r3,r2
3355 or r1.r2.r1 or r1,r2,r1
3356 sra r1,48,r1
3357
3358 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3359 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3360 lda r3,X(r11) lda r3,X(r11)
3361 extll r1,r3,r1 extll r1,r3,r1
3362 extlh r2,r3,r2 extlh r2,r3,r2
3363 or r1.r2.r1 addl r1,r2,r1
3364
3365 quad: ldq_u r1,X(r11)
3366 ldq_u r2,X+7(r11)
3367 lda r3,X(r11)
3368 extql r1,r3,r1
3369 extqh r2,r3,r2
3370 or r1.r2.r1
3371 */
3372
3373 void
3374 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3375 HOST_WIDE_INT ofs, int sign)
3376 {
3377 rtx meml, memh, addr, extl, exth, tmp, mema;
3378 machine_mode mode;
3379
3380 if (TARGET_BWX && size == 2)
3381 {
3382 meml = adjust_address (mem, QImode, ofs);
3383 memh = adjust_address (mem, QImode, ofs+1);
3384 extl = gen_reg_rtx (DImode);
3385 exth = gen_reg_rtx (DImode);
3386 emit_insn (gen_zero_extendqidi2 (extl, meml));
3387 emit_insn (gen_zero_extendqidi2 (exth, memh));
3388 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3389 NULL, 1, OPTAB_LIB_WIDEN);
3390 addr = expand_simple_binop (DImode, IOR, extl, exth,
3391 NULL, 1, OPTAB_LIB_WIDEN);
3392
3393 if (sign && GET_MODE (tgt) != HImode)
3394 {
3395 addr = gen_lowpart (HImode, addr);
3396 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3397 }
3398 else
3399 {
3400 if (GET_MODE (tgt) != DImode)
3401 addr = gen_lowpart (GET_MODE (tgt), addr);
3402 emit_move_insn (tgt, addr);
3403 }
3404 return;
3405 }
3406
3407 meml = gen_reg_rtx (DImode);
3408 memh = gen_reg_rtx (DImode);
3409 addr = gen_reg_rtx (DImode);
3410 extl = gen_reg_rtx (DImode);
3411 exth = gen_reg_rtx (DImode);
3412
3413 mema = XEXP (mem, 0);
3414 if (GET_CODE (mema) == LO_SUM)
3415 mema = force_reg (Pmode, mema);
3416
3417 /* AND addresses cannot be in any alias set, since they may implicitly
3418 alias surrounding code. Ideally we'd have some alias set that
3419 covered all types except those with alignment 8 or higher. */
3420
3421 tmp = change_address (mem, DImode,
3422 gen_rtx_AND (DImode,
3423 plus_constant (DImode, mema, ofs),
3424 GEN_INT (-8)));
3425 set_mem_alias_set (tmp, 0);
3426 emit_move_insn (meml, tmp);
3427
3428 tmp = change_address (mem, DImode,
3429 gen_rtx_AND (DImode,
3430 plus_constant (DImode, mema,
3431 ofs + size - 1),
3432 GEN_INT (-8)));
3433 set_mem_alias_set (tmp, 0);
3434 emit_move_insn (memh, tmp);
3435
3436 if (sign && size == 2)
3437 {
3438 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3439
3440 emit_insn (gen_extql (extl, meml, addr));
3441 emit_insn (gen_extqh (exth, memh, addr));
3442
3443 /* We must use tgt here for the target. Alpha-vms port fails if we use
3444 addr for the target, because addr is marked as a pointer and combine
3445 knows that pointers are always sign-extended 32-bit values. */
3446 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3447 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3448 addr, 1, OPTAB_WIDEN);
3449 }
3450 else
3451 {
3452 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3453 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3454 switch ((int) size)
3455 {
3456 case 2:
3457 emit_insn (gen_extwh (exth, memh, addr));
3458 mode = HImode;
3459 break;
3460 case 4:
3461 emit_insn (gen_extlh (exth, memh, addr));
3462 mode = SImode;
3463 break;
3464 case 8:
3465 emit_insn (gen_extqh (exth, memh, addr));
3466 mode = DImode;
3467 break;
3468 default:
3469 gcc_unreachable ();
3470 }
3471
3472 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3473 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3474 sign, OPTAB_WIDEN);
3475 }
3476
3477 if (addr != tgt)
3478 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3479 }
3480
3481 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3482
3483 void
3484 alpha_expand_unaligned_store (rtx dst, rtx src,
3485 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3486 {
3487 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3488
3489 if (TARGET_BWX && size == 2)
3490 {
3491 if (src != const0_rtx)
3492 {
3493 dstl = gen_lowpart (QImode, src);
3494 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3495 NULL, 1, OPTAB_LIB_WIDEN);
3496 dsth = gen_lowpart (QImode, dsth);
3497 }
3498 else
3499 dstl = dsth = const0_rtx;
3500
3501 meml = adjust_address (dst, QImode, ofs);
3502 memh = adjust_address (dst, QImode, ofs+1);
3503
3504 emit_move_insn (meml, dstl);
3505 emit_move_insn (memh, dsth);
3506 return;
3507 }
3508
3509 dstl = gen_reg_rtx (DImode);
3510 dsth = gen_reg_rtx (DImode);
3511 insl = gen_reg_rtx (DImode);
3512 insh = gen_reg_rtx (DImode);
3513
3514 dsta = XEXP (dst, 0);
3515 if (GET_CODE (dsta) == LO_SUM)
3516 dsta = force_reg (Pmode, dsta);
3517
3518 /* AND addresses cannot be in any alias set, since they may implicitly
3519 alias surrounding code. Ideally we'd have some alias set that
3520 covered all types except those with alignment 8 or higher. */
3521
3522 meml = change_address (dst, DImode,
3523 gen_rtx_AND (DImode,
3524 plus_constant (DImode, dsta, ofs),
3525 GEN_INT (-8)));
3526 set_mem_alias_set (meml, 0);
3527
3528 memh = change_address (dst, DImode,
3529 gen_rtx_AND (DImode,
3530 plus_constant (DImode, dsta,
3531 ofs + size - 1),
3532 GEN_INT (-8)));
3533 set_mem_alias_set (memh, 0);
3534
3535 emit_move_insn (dsth, memh);
3536 emit_move_insn (dstl, meml);
3537
3538 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3539
3540 if (src != CONST0_RTX (GET_MODE (src)))
3541 {
3542 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3543 GEN_INT (size*8), addr));
3544
3545 switch ((int) size)
3546 {
3547 case 2:
3548 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3549 break;
3550 case 4:
3551 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3552 break;
3553 case 8:
3554 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3555 break;
3556 default:
3557 gcc_unreachable ();
3558 }
3559 }
3560
3561 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3562
3563 switch ((int) size)
3564 {
3565 case 2:
3566 emit_insn (gen_mskwl (dstl, dstl, addr));
3567 break;
3568 case 4:
3569 emit_insn (gen_mskll (dstl, dstl, addr));
3570 break;
3571 case 8:
3572 emit_insn (gen_mskql (dstl, dstl, addr));
3573 break;
3574 default:
3575 gcc_unreachable ();
3576 }
3577
3578 if (src != CONST0_RTX (GET_MODE (src)))
3579 {
3580 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3581 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3582 }
3583
3584 /* Must store high before low for degenerate case of aligned. */
3585 emit_move_insn (memh, dsth);
3586 emit_move_insn (meml, dstl);
3587 }
3588
3589 /* The block move code tries to maximize speed by separating loads and
3590 stores at the expense of register pressure: we load all of the data
3591 before we store it back out. There are two secondary effects worth
3592 mentioning, that this speeds copying to/from aligned and unaligned
3593 buffers, and that it makes the code significantly easier to write. */
3594
3595 #define MAX_MOVE_WORDS 8
3596
3597 /* Load an integral number of consecutive unaligned quadwords. */
3598
3599 static void
3600 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3601 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3602 {
3603 rtx const im8 = GEN_INT (-8);
3604 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3605 rtx sreg, areg, tmp, smema;
3606 HOST_WIDE_INT i;
3607
3608 smema = XEXP (smem, 0);
3609 if (GET_CODE (smema) == LO_SUM)
3610 smema = force_reg (Pmode, smema);
3611
3612 /* Generate all the tmp registers we need. */
3613 for (i = 0; i < words; ++i)
3614 {
3615 data_regs[i] = out_regs[i];
3616 ext_tmps[i] = gen_reg_rtx (DImode);
3617 }
3618 data_regs[words] = gen_reg_rtx (DImode);
3619
3620 if (ofs != 0)
3621 smem = adjust_address (smem, GET_MODE (smem), ofs);
3622
3623 /* Load up all of the source data. */
3624 for (i = 0; i < words; ++i)
3625 {
3626 tmp = change_address (smem, DImode,
3627 gen_rtx_AND (DImode,
3628 plus_constant (DImode, smema, 8*i),
3629 im8));
3630 set_mem_alias_set (tmp, 0);
3631 emit_move_insn (data_regs[i], tmp);
3632 }
3633
3634 tmp = change_address (smem, DImode,
3635 gen_rtx_AND (DImode,
3636 plus_constant (DImode, smema,
3637 8*words - 1),
3638 im8));
3639 set_mem_alias_set (tmp, 0);
3640 emit_move_insn (data_regs[words], tmp);
3641
3642 /* Extract the half-word fragments. Unfortunately DEC decided to make
3643 extxh with offset zero a noop instead of zeroing the register, so
3644 we must take care of that edge condition ourselves with cmov. */
3645
3646 sreg = copy_addr_to_reg (smema);
3647 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3648 1, OPTAB_WIDEN);
3649 for (i = 0; i < words; ++i)
3650 {
3651 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3652 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3653 emit_insn (gen_rtx_SET (ext_tmps[i],
3654 gen_rtx_IF_THEN_ELSE (DImode,
3655 gen_rtx_EQ (DImode, areg,
3656 const0_rtx),
3657 const0_rtx, ext_tmps[i])));
3658 }
3659
3660 /* Merge the half-words into whole words. */
3661 for (i = 0; i < words; ++i)
3662 {
3663 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3664 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3665 }
3666 }
3667
3668 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3669 may be NULL to store zeros. */
3670
3671 static void
3672 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3673 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3674 {
3675 rtx const im8 = GEN_INT (-8);
3676 rtx ins_tmps[MAX_MOVE_WORDS];
3677 rtx st_tmp_1, st_tmp_2, dreg;
3678 rtx st_addr_1, st_addr_2, dmema;
3679 HOST_WIDE_INT i;
3680
3681 dmema = XEXP (dmem, 0);
3682 if (GET_CODE (dmema) == LO_SUM)
3683 dmema = force_reg (Pmode, dmema);
3684
3685 /* Generate all the tmp registers we need. */
3686 if (data_regs != NULL)
3687 for (i = 0; i < words; ++i)
3688 ins_tmps[i] = gen_reg_rtx(DImode);
3689 st_tmp_1 = gen_reg_rtx(DImode);
3690 st_tmp_2 = gen_reg_rtx(DImode);
3691
3692 if (ofs != 0)
3693 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3694
3695 st_addr_2 = change_address (dmem, DImode,
3696 gen_rtx_AND (DImode,
3697 plus_constant (DImode, dmema,
3698 words*8 - 1),
3699 im8));
3700 set_mem_alias_set (st_addr_2, 0);
3701
3702 st_addr_1 = change_address (dmem, DImode,
3703 gen_rtx_AND (DImode, dmema, im8));
3704 set_mem_alias_set (st_addr_1, 0);
3705
3706 /* Load up the destination end bits. */
3707 emit_move_insn (st_tmp_2, st_addr_2);
3708 emit_move_insn (st_tmp_1, st_addr_1);
3709
3710 /* Shift the input data into place. */
3711 dreg = copy_addr_to_reg (dmema);
3712 if (data_regs != NULL)
3713 {
3714 for (i = words-1; i >= 0; --i)
3715 {
3716 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3717 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3718 }
3719 for (i = words-1; i > 0; --i)
3720 {
3721 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3722 ins_tmps[i-1], ins_tmps[i-1], 1,
3723 OPTAB_WIDEN);
3724 }
3725 }
3726
3727 /* Split and merge the ends with the destination data. */
3728 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3729 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3730
3731 if (data_regs != NULL)
3732 {
3733 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3734 st_tmp_2, 1, OPTAB_WIDEN);
3735 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3736 st_tmp_1, 1, OPTAB_WIDEN);
3737 }
3738
3739 /* Store it all. */
3740 emit_move_insn (st_addr_2, st_tmp_2);
3741 for (i = words-1; i > 0; --i)
3742 {
3743 rtx tmp = change_address (dmem, DImode,
3744 gen_rtx_AND (DImode,
3745 plus_constant (DImode,
3746 dmema, i*8),
3747 im8));
3748 set_mem_alias_set (tmp, 0);
3749 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3750 }
3751 emit_move_insn (st_addr_1, st_tmp_1);
3752 }
3753
3754
3755 /* Expand string/block move operations.
3756
3757 operands[0] is the pointer to the destination.
3758 operands[1] is the pointer to the source.
3759 operands[2] is the number of bytes to move.
3760 operands[3] is the alignment. */
3761
3762 int
3763 alpha_expand_block_move (rtx operands[])
3764 {
3765 rtx bytes_rtx = operands[2];
3766 rtx align_rtx = operands[3];
3767 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3768 HOST_WIDE_INT bytes = orig_bytes;
3769 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3770 HOST_WIDE_INT dst_align = src_align;
3771 rtx orig_src = operands[1];
3772 rtx orig_dst = operands[0];
3773 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3774 rtx tmp;
3775 unsigned int i, words, ofs, nregs = 0;
3776
3777 if (orig_bytes <= 0)
3778 return 1;
3779 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3780 return 0;
3781
3782 /* Look for additional alignment information from recorded register info. */
3783
3784 tmp = XEXP (orig_src, 0);
3785 if (REG_P (tmp))
3786 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3787 else if (GET_CODE (tmp) == PLUS
3788 && REG_P (XEXP (tmp, 0))
3789 && CONST_INT_P (XEXP (tmp, 1)))
3790 {
3791 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3792 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3793
3794 if (a > src_align)
3795 {
3796 if (a >= 64 && c % 8 == 0)
3797 src_align = 64;
3798 else if (a >= 32 && c % 4 == 0)
3799 src_align = 32;
3800 else if (a >= 16 && c % 2 == 0)
3801 src_align = 16;
3802 }
3803 }
3804
3805 tmp = XEXP (orig_dst, 0);
3806 if (REG_P (tmp))
3807 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3808 else if (GET_CODE (tmp) == PLUS
3809 && REG_P (XEXP (tmp, 0))
3810 && CONST_INT_P (XEXP (tmp, 1)))
3811 {
3812 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3813 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3814
3815 if (a > dst_align)
3816 {
3817 if (a >= 64 && c % 8 == 0)
3818 dst_align = 64;
3819 else if (a >= 32 && c % 4 == 0)
3820 dst_align = 32;
3821 else if (a >= 16 && c % 2 == 0)
3822 dst_align = 16;
3823 }
3824 }
3825
3826 ofs = 0;
3827 if (src_align >= 64 && bytes >= 8)
3828 {
3829 words = bytes / 8;
3830
3831 for (i = 0; i < words; ++i)
3832 data_regs[nregs + i] = gen_reg_rtx (DImode);
3833
3834 for (i = 0; i < words; ++i)
3835 emit_move_insn (data_regs[nregs + i],
3836 adjust_address (orig_src, DImode, ofs + i * 8));
3837
3838 nregs += words;
3839 bytes -= words * 8;
3840 ofs += words * 8;
3841 }
3842
3843 if (src_align >= 32 && bytes >= 4)
3844 {
3845 words = bytes / 4;
3846
3847 for (i = 0; i < words; ++i)
3848 data_regs[nregs + i] = gen_reg_rtx (SImode);
3849
3850 for (i = 0; i < words; ++i)
3851 emit_move_insn (data_regs[nregs + i],
3852 adjust_address (orig_src, SImode, ofs + i * 4));
3853
3854 nregs += words;
3855 bytes -= words * 4;
3856 ofs += words * 4;
3857 }
3858
3859 if (bytes >= 8)
3860 {
3861 words = bytes / 8;
3862
3863 for (i = 0; i < words+1; ++i)
3864 data_regs[nregs + i] = gen_reg_rtx (DImode);
3865
3866 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3867 words, ofs);
3868
3869 nregs += words;
3870 bytes -= words * 8;
3871 ofs += words * 8;
3872 }
3873
3874 if (! TARGET_BWX && bytes >= 4)
3875 {
3876 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3877 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3878 bytes -= 4;
3879 ofs += 4;
3880 }
3881
3882 if (bytes >= 2)
3883 {
3884 if (src_align >= 16)
3885 {
3886 do {
3887 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3888 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3889 bytes -= 2;
3890 ofs += 2;
3891 } while (bytes >= 2);
3892 }
3893 else if (! TARGET_BWX)
3894 {
3895 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3896 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3897 bytes -= 2;
3898 ofs += 2;
3899 }
3900 }
3901
3902 while (bytes > 0)
3903 {
3904 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3905 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3906 bytes -= 1;
3907 ofs += 1;
3908 }
3909
3910 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3911
3912 /* Now save it back out again. */
3913
3914 i = 0, ofs = 0;
3915
3916 /* Write out the data in whatever chunks reading the source allowed. */
3917 if (dst_align >= 64)
3918 {
3919 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3920 {
3921 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3922 data_regs[i]);
3923 ofs += 8;
3924 i++;
3925 }
3926 }
3927
3928 if (dst_align >= 32)
3929 {
3930 /* If the source has remaining DImode regs, write them out in
3931 two pieces. */
3932 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3933 {
3934 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3935 NULL_RTX, 1, OPTAB_WIDEN);
3936
3937 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3938 gen_lowpart (SImode, data_regs[i]));
3939 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3940 gen_lowpart (SImode, tmp));
3941 ofs += 8;
3942 i++;
3943 }
3944
3945 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3946 {
3947 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3948 data_regs[i]);
3949 ofs += 4;
3950 i++;
3951 }
3952 }
3953
3954 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3955 {
3956 /* Write out a remaining block of words using unaligned methods. */
3957
3958 for (words = 1; i + words < nregs; words++)
3959 if (GET_MODE (data_regs[i + words]) != DImode)
3960 break;
3961
3962 if (words == 1)
3963 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3964 else
3965 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3966 words, ofs);
3967
3968 i += words;
3969 ofs += words * 8;
3970 }
3971
3972 /* Due to the above, this won't be aligned. */
3973 /* ??? If we have more than one of these, consider constructing full
3974 words in registers and using alpha_expand_unaligned_store_words. */
3975 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3976 {
3977 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3978 ofs += 4;
3979 i++;
3980 }
3981
3982 if (dst_align >= 16)
3983 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3984 {
3985 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3986 i++;
3987 ofs += 2;
3988 }
3989 else
3990 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3991 {
3992 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3993 i++;
3994 ofs += 2;
3995 }
3996
3997 /* The remainder must be byte copies. */
3998 while (i < nregs)
3999 {
4000 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4001 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4002 i++;
4003 ofs += 1;
4004 }
4005
4006 return 1;
4007 }
4008
4009 int
4010 alpha_expand_block_clear (rtx operands[])
4011 {
4012 rtx bytes_rtx = operands[1];
4013 rtx align_rtx = operands[3];
4014 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4015 HOST_WIDE_INT bytes = orig_bytes;
4016 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4017 HOST_WIDE_INT alignofs = 0;
4018 rtx orig_dst = operands[0];
4019 rtx tmp;
4020 int i, words, ofs = 0;
4021
4022 if (orig_bytes <= 0)
4023 return 1;
4024 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4025 return 0;
4026
4027 /* Look for stricter alignment. */
4028 tmp = XEXP (orig_dst, 0);
4029 if (REG_P (tmp))
4030 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4031 else if (GET_CODE (tmp) == PLUS
4032 && REG_P (XEXP (tmp, 0))
4033 && CONST_INT_P (XEXP (tmp, 1)))
4034 {
4035 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4036 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4037
4038 if (a > align)
4039 {
4040 if (a >= 64)
4041 align = a, alignofs = 8 - c % 8;
4042 else if (a >= 32)
4043 align = a, alignofs = 4 - c % 4;
4044 else if (a >= 16)
4045 align = a, alignofs = 2 - c % 2;
4046 }
4047 }
4048
4049 /* Handle an unaligned prefix first. */
4050
4051 if (alignofs > 0)
4052 {
4053 /* Given that alignofs is bounded by align, the only time BWX could
4054 generate three stores is for a 7 byte fill. Prefer two individual
4055 stores over a load/mask/store sequence. */
4056 if ((!TARGET_BWX || alignofs == 7)
4057 && align >= 32
4058 && !(alignofs == 4 && bytes >= 4))
4059 {
4060 machine_mode mode = (align >= 64 ? DImode : SImode);
4061 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4062 rtx mem, tmp;
4063 HOST_WIDE_INT mask;
4064
4065 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4066 set_mem_alias_set (mem, 0);
4067
4068 mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
4069 if (bytes < alignofs)
4070 {
4071 mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
4072 ofs += bytes;
4073 bytes = 0;
4074 }
4075 else
4076 {
4077 bytes -= alignofs;
4078 ofs += alignofs;
4079 }
4080 alignofs = 0;
4081
4082 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4083 NULL_RTX, 1, OPTAB_WIDEN);
4084
4085 emit_move_insn (mem, tmp);
4086 }
4087
4088 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4089 {
4090 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4091 bytes -= 1;
4092 ofs += 1;
4093 alignofs -= 1;
4094 }
4095 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4096 {
4097 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4098 bytes -= 2;
4099 ofs += 2;
4100 alignofs -= 2;
4101 }
4102 if (alignofs == 4 && bytes >= 4)
4103 {
4104 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4105 bytes -= 4;
4106 ofs += 4;
4107 alignofs = 0;
4108 }
4109
4110 /* If we've not used the extra lead alignment information by now,
4111 we won't be able to. Downgrade align to match what's left over. */
4112 if (alignofs > 0)
4113 {
4114 alignofs = alignofs & -alignofs;
4115 align = MIN (align, alignofs * BITS_PER_UNIT);
4116 }
4117 }
4118
4119 /* Handle a block of contiguous long-words. */
4120
4121 if (align >= 64 && bytes >= 8)
4122 {
4123 words = bytes / 8;
4124
4125 for (i = 0; i < words; ++i)
4126 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4127 const0_rtx);
4128
4129 bytes -= words * 8;
4130 ofs += words * 8;
4131 }
4132
4133 /* If the block is large and appropriately aligned, emit a single
4134 store followed by a sequence of stq_u insns. */
4135
4136 if (align >= 32 && bytes > 16)
4137 {
4138 rtx orig_dsta;
4139
4140 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4141 bytes -= 4;
4142 ofs += 4;
4143
4144 orig_dsta = XEXP (orig_dst, 0);
4145 if (GET_CODE (orig_dsta) == LO_SUM)
4146 orig_dsta = force_reg (Pmode, orig_dsta);
4147
4148 words = bytes / 8;
4149 for (i = 0; i < words; ++i)
4150 {
4151 rtx mem
4152 = change_address (orig_dst, DImode,
4153 gen_rtx_AND (DImode,
4154 plus_constant (DImode, orig_dsta,
4155 ofs + i*8),
4156 GEN_INT (-8)));
4157 set_mem_alias_set (mem, 0);
4158 emit_move_insn (mem, const0_rtx);
4159 }
4160
4161 /* Depending on the alignment, the first stq_u may have overlapped
4162 with the initial stl, which means that the last stq_u didn't
4163 write as much as it would appear. Leave those questionable bytes
4164 unaccounted for. */
4165 bytes -= words * 8 - 4;
4166 ofs += words * 8 - 4;
4167 }
4168
4169 /* Handle a smaller block of aligned words. */
4170
4171 if ((align >= 64 && bytes == 4)
4172 || (align == 32 && bytes >= 4))
4173 {
4174 words = bytes / 4;
4175
4176 for (i = 0; i < words; ++i)
4177 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4178 const0_rtx);
4179
4180 bytes -= words * 4;
4181 ofs += words * 4;
4182 }
4183
4184 /* An unaligned block uses stq_u stores for as many as possible. */
4185
4186 if (bytes >= 8)
4187 {
4188 words = bytes / 8;
4189
4190 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4191
4192 bytes -= words * 8;
4193 ofs += words * 8;
4194 }
4195
4196 /* Next clean up any trailing pieces. */
4197
4198 /* Count the number of bits in BYTES for which aligned stores could
4199 be emitted. */
4200 words = 0;
4201 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4202 if (bytes & i)
4203 words += 1;
4204
4205 /* If we have appropriate alignment (and it wouldn't take too many
4206 instructions otherwise), mask out the bytes we need. */
4207 if (TARGET_BWX ? words > 2 : bytes > 0)
4208 {
4209 if (align >= 64)
4210 {
4211 rtx mem, tmp;
4212 HOST_WIDE_INT mask;
4213
4214 mem = adjust_address (orig_dst, DImode, ofs);
4215 set_mem_alias_set (mem, 0);
4216
4217 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4218
4219 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4220 NULL_RTX, 1, OPTAB_WIDEN);
4221
4222 emit_move_insn (mem, tmp);
4223 return 1;
4224 }
4225 else if (align >= 32 && bytes < 4)
4226 {
4227 rtx mem, tmp;
4228 HOST_WIDE_INT mask;
4229
4230 mem = adjust_address (orig_dst, SImode, ofs);
4231 set_mem_alias_set (mem, 0);
4232
4233 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4234
4235 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4236 NULL_RTX, 1, OPTAB_WIDEN);
4237
4238 emit_move_insn (mem, tmp);
4239 return 1;
4240 }
4241 }
4242
4243 if (!TARGET_BWX && bytes >= 4)
4244 {
4245 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4246 bytes -= 4;
4247 ofs += 4;
4248 }
4249
4250 if (bytes >= 2)
4251 {
4252 if (align >= 16)
4253 {
4254 do {
4255 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4256 const0_rtx);
4257 bytes -= 2;
4258 ofs += 2;
4259 } while (bytes >= 2);
4260 }
4261 else if (! TARGET_BWX)
4262 {
4263 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4264 bytes -= 2;
4265 ofs += 2;
4266 }
4267 }
4268
4269 while (bytes > 0)
4270 {
4271 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4272 bytes -= 1;
4273 ofs += 1;
4274 }
4275
4276 return 1;
4277 }
4278
4279 /* Returns a mask so that zap(x, value) == x & mask. */
4280
4281 rtx
4282 alpha_expand_zap_mask (HOST_WIDE_INT value)
4283 {
4284 rtx result;
4285 int i;
4286 HOST_WIDE_INT mask = 0;
4287
4288 for (i = 7; i >= 0; --i)
4289 {
4290 mask <<= 8;
4291 if (!((value >> i) & 1))
4292 mask |= 0xff;
4293 }
4294
4295 result = gen_int_mode (mask, DImode);
4296 return result;
4297 }
4298
4299 void
4300 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4301 machine_mode mode,
4302 rtx op0, rtx op1, rtx op2)
4303 {
4304 op0 = gen_lowpart (mode, op0);
4305
4306 if (op1 == const0_rtx)
4307 op1 = CONST0_RTX (mode);
4308 else
4309 op1 = gen_lowpart (mode, op1);
4310
4311 if (op2 == const0_rtx)
4312 op2 = CONST0_RTX (mode);
4313 else
4314 op2 = gen_lowpart (mode, op2);
4315
4316 emit_insn ((*gen) (op0, op1, op2));
4317 }
4318
4319 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4320 COND is true. Mark the jump as unlikely to be taken. */
4321
4322 static void
4323 emit_unlikely_jump (rtx cond, rtx label)
4324 {
4325 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4326 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4327 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
4328 }
4329
4330 /* A subroutine of the atomic operation splitters. Emit a load-locked
4331 instruction in MODE. */
4332
4333 static void
4334 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
4335 {
4336 rtx (*fn) (rtx, rtx) = NULL;
4337 if (mode == SImode)
4338 fn = gen_load_locked_si;
4339 else if (mode == DImode)
4340 fn = gen_load_locked_di;
4341 emit_insn (fn (reg, mem));
4342 }
4343
4344 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4345 instruction in MODE. */
4346
4347 static void
4348 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
4349 {
4350 rtx (*fn) (rtx, rtx, rtx) = NULL;
4351 if (mode == SImode)
4352 fn = gen_store_conditional_si;
4353 else if (mode == DImode)
4354 fn = gen_store_conditional_di;
4355 emit_insn (fn (res, mem, val));
4356 }
4357
4358 /* Subroutines of the atomic operation splitters. Emit barriers
4359 as needed for the memory MODEL. */
4360
4361 static void
4362 alpha_pre_atomic_barrier (enum memmodel model)
4363 {
4364 if (need_atomic_barrier_p (model, true))
4365 emit_insn (gen_memory_barrier ());
4366 }
4367
4368 static void
4369 alpha_post_atomic_barrier (enum memmodel model)
4370 {
4371 if (need_atomic_barrier_p (model, false))
4372 emit_insn (gen_memory_barrier ());
4373 }
4374
4375 /* A subroutine of the atomic operation splitters. Emit an insxl
4376 instruction in MODE. */
4377
4378 static rtx
4379 emit_insxl (machine_mode mode, rtx op1, rtx op2)
4380 {
4381 rtx ret = gen_reg_rtx (DImode);
4382 rtx (*fn) (rtx, rtx, rtx);
4383
4384 switch (mode)
4385 {
4386 case E_QImode:
4387 fn = gen_insbl;
4388 break;
4389 case E_HImode:
4390 fn = gen_inswl;
4391 break;
4392 case E_SImode:
4393 fn = gen_insll;
4394 break;
4395 case E_DImode:
4396 fn = gen_insql;
4397 break;
4398 default:
4399 gcc_unreachable ();
4400 }
4401
4402 op1 = force_reg (mode, op1);
4403 emit_insn (fn (ret, op1, op2));
4404
4405 return ret;
4406 }
4407
4408 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4409 to perform. MEM is the memory on which to operate. VAL is the second
4410 operand of the binary operator. BEFORE and AFTER are optional locations to
4411 return the value of MEM either before of after the operation. SCRATCH is
4412 a scratch register. */
4413
4414 void
4415 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4416 rtx after, rtx scratch, enum memmodel model)
4417 {
4418 machine_mode mode = GET_MODE (mem);
4419 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4420
4421 alpha_pre_atomic_barrier (model);
4422
4423 label = gen_label_rtx ();
4424 emit_label (label);
4425 label = gen_rtx_LABEL_REF (DImode, label);
4426
4427 if (before == NULL)
4428 before = scratch;
4429 emit_load_locked (mode, before, mem);
4430
4431 if (code == NOT)
4432 {
4433 x = gen_rtx_AND (mode, before, val);
4434 emit_insn (gen_rtx_SET (val, x));
4435
4436 x = gen_rtx_NOT (mode, val);
4437 }
4438 else
4439 x = gen_rtx_fmt_ee (code, mode, before, val);
4440 if (after)
4441 emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4442 emit_insn (gen_rtx_SET (scratch, x));
4443
4444 emit_store_conditional (mode, cond, mem, scratch);
4445
4446 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4447 emit_unlikely_jump (x, label);
4448
4449 alpha_post_atomic_barrier (model);
4450 }
4451
4452 /* Expand a compare and swap operation. */
4453
4454 void
4455 alpha_split_compare_and_swap (rtx operands[])
4456 {
4457 rtx cond, retval, mem, oldval, newval;
4458 bool is_weak;
4459 enum memmodel mod_s, mod_f;
4460 machine_mode mode;
4461 rtx label1, label2, x;
4462
4463 cond = operands[0];
4464 retval = operands[1];
4465 mem = operands[2];
4466 oldval = operands[3];
4467 newval = operands[4];
4468 is_weak = (operands[5] != const0_rtx);
4469 mod_s = memmodel_from_int (INTVAL (operands[6]));
4470 mod_f = memmodel_from_int (INTVAL (operands[7]));
4471 mode = GET_MODE (mem);
4472
4473 alpha_pre_atomic_barrier (mod_s);
4474
4475 label1 = NULL_RTX;
4476 if (!is_weak)
4477 {
4478 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4479 emit_label (XEXP (label1, 0));
4480 }
4481 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4482
4483 emit_load_locked (mode, retval, mem);
4484
4485 x = gen_lowpart (DImode, retval);
4486 if (oldval == const0_rtx)
4487 {
4488 emit_move_insn (cond, const0_rtx);
4489 x = gen_rtx_NE (DImode, x, const0_rtx);
4490 }
4491 else
4492 {
4493 x = gen_rtx_EQ (DImode, x, oldval);
4494 emit_insn (gen_rtx_SET (cond, x));
4495 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4496 }
4497 emit_unlikely_jump (x, label2);
4498
4499 emit_move_insn (cond, newval);
4500 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4501
4502 if (!is_weak)
4503 {
4504 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4505 emit_unlikely_jump (x, label1);
4506 }
4507
4508 if (!is_mm_relaxed (mod_f))
4509 emit_label (XEXP (label2, 0));
4510
4511 alpha_post_atomic_barrier (mod_s);
4512
4513 if (is_mm_relaxed (mod_f))
4514 emit_label (XEXP (label2, 0));
4515 }
4516
4517 void
4518 alpha_expand_compare_and_swap_12 (rtx operands[])
4519 {
4520 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4521 machine_mode mode;
4522 rtx addr, align, wdst;
4523 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4524
4525 cond = operands[0];
4526 dst = operands[1];
4527 mem = operands[2];
4528 oldval = operands[3];
4529 newval = operands[4];
4530 is_weak = operands[5];
4531 mod_s = operands[6];
4532 mod_f = operands[7];
4533 mode = GET_MODE (mem);
4534
4535 /* We forced the address into a register via mem_noofs_operand. */
4536 addr = XEXP (mem, 0);
4537 gcc_assert (register_operand (addr, DImode));
4538
4539 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4540 NULL_RTX, 1, OPTAB_DIRECT);
4541
4542 oldval = convert_modes (DImode, mode, oldval, 1);
4543
4544 if (newval != const0_rtx)
4545 newval = emit_insxl (mode, newval, addr);
4546
4547 wdst = gen_reg_rtx (DImode);
4548 if (mode == QImode)
4549 gen = gen_atomic_compare_and_swapqi_1;
4550 else
4551 gen = gen_atomic_compare_and_swaphi_1;
4552 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4553 is_weak, mod_s, mod_f));
4554
4555 emit_move_insn (dst, gen_lowpart (mode, wdst));
4556 }
4557
4558 void
4559 alpha_split_compare_and_swap_12 (rtx operands[])
4560 {
4561 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4562 machine_mode mode;
4563 bool is_weak;
4564 enum memmodel mod_s, mod_f;
4565 rtx label1, label2, mem, addr, width, mask, x;
4566
4567 cond = operands[0];
4568 dest = operands[1];
4569 orig_mem = operands[2];
4570 oldval = operands[3];
4571 newval = operands[4];
4572 align = operands[5];
4573 is_weak = (operands[6] != const0_rtx);
4574 mod_s = memmodel_from_int (INTVAL (operands[7]));
4575 mod_f = memmodel_from_int (INTVAL (operands[8]));
4576 scratch = operands[9];
4577 mode = GET_MODE (orig_mem);
4578 addr = XEXP (orig_mem, 0);
4579
4580 mem = gen_rtx_MEM (DImode, align);
4581 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4582 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4583 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4584
4585 alpha_pre_atomic_barrier (mod_s);
4586
4587 label1 = NULL_RTX;
4588 if (!is_weak)
4589 {
4590 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4591 emit_label (XEXP (label1, 0));
4592 }
4593 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4594
4595 emit_load_locked (DImode, scratch, mem);
4596
4597 width = GEN_INT (GET_MODE_BITSIZE (mode));
4598 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4599 emit_insn (gen_extxl (dest, scratch, width, addr));
4600
4601 if (oldval == const0_rtx)
4602 {
4603 emit_move_insn (cond, const0_rtx);
4604 x = gen_rtx_NE (DImode, dest, const0_rtx);
4605 }
4606 else
4607 {
4608 x = gen_rtx_EQ (DImode, dest, oldval);
4609 emit_insn (gen_rtx_SET (cond, x));
4610 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4611 }
4612 emit_unlikely_jump (x, label2);
4613
4614 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4615
4616 if (newval != const0_rtx)
4617 emit_insn (gen_iordi3 (cond, cond, newval));
4618
4619 emit_store_conditional (DImode, cond, mem, cond);
4620
4621 if (!is_weak)
4622 {
4623 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4624 emit_unlikely_jump (x, label1);
4625 }
4626
4627 if (!is_mm_relaxed (mod_f))
4628 emit_label (XEXP (label2, 0));
4629
4630 alpha_post_atomic_barrier (mod_s);
4631
4632 if (is_mm_relaxed (mod_f))
4633 emit_label (XEXP (label2, 0));
4634 }
4635
4636 /* Expand an atomic exchange operation. */
4637
4638 void
4639 alpha_split_atomic_exchange (rtx operands[])
4640 {
4641 rtx retval, mem, val, scratch;
4642 enum memmodel model;
4643 machine_mode mode;
4644 rtx label, x, cond;
4645
4646 retval = operands[0];
4647 mem = operands[1];
4648 val = operands[2];
4649 model = (enum memmodel) INTVAL (operands[3]);
4650 scratch = operands[4];
4651 mode = GET_MODE (mem);
4652 cond = gen_lowpart (DImode, scratch);
4653
4654 alpha_pre_atomic_barrier (model);
4655
4656 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4657 emit_label (XEXP (label, 0));
4658
4659 emit_load_locked (mode, retval, mem);
4660 emit_move_insn (scratch, val);
4661 emit_store_conditional (mode, cond, mem, scratch);
4662
4663 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4664 emit_unlikely_jump (x, label);
4665
4666 alpha_post_atomic_barrier (model);
4667 }
4668
4669 void
4670 alpha_expand_atomic_exchange_12 (rtx operands[])
4671 {
4672 rtx dst, mem, val, model;
4673 machine_mode mode;
4674 rtx addr, align, wdst;
4675 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4676
4677 dst = operands[0];
4678 mem = operands[1];
4679 val = operands[2];
4680 model = operands[3];
4681 mode = GET_MODE (mem);
4682
4683 /* We forced the address into a register via mem_noofs_operand. */
4684 addr = XEXP (mem, 0);
4685 gcc_assert (register_operand (addr, DImode));
4686
4687 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4688 NULL_RTX, 1, OPTAB_DIRECT);
4689
4690 /* Insert val into the correct byte location within the word. */
4691 if (val != const0_rtx)
4692 val = emit_insxl (mode, val, addr);
4693
4694 wdst = gen_reg_rtx (DImode);
4695 if (mode == QImode)
4696 gen = gen_atomic_exchangeqi_1;
4697 else
4698 gen = gen_atomic_exchangehi_1;
4699 emit_insn (gen (wdst, mem, val, align, model));
4700
4701 emit_move_insn (dst, gen_lowpart (mode, wdst));
4702 }
4703
4704 void
4705 alpha_split_atomic_exchange_12 (rtx operands[])
4706 {
4707 rtx dest, orig_mem, addr, val, align, scratch;
4708 rtx label, mem, width, mask, x;
4709 machine_mode mode;
4710 enum memmodel model;
4711
4712 dest = operands[0];
4713 orig_mem = operands[1];
4714 val = operands[2];
4715 align = operands[3];
4716 model = (enum memmodel) INTVAL (operands[4]);
4717 scratch = operands[5];
4718 mode = GET_MODE (orig_mem);
4719 addr = XEXP (orig_mem, 0);
4720
4721 mem = gen_rtx_MEM (DImode, align);
4722 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4723 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4724 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4725
4726 alpha_pre_atomic_barrier (model);
4727
4728 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4729 emit_label (XEXP (label, 0));
4730
4731 emit_load_locked (DImode, scratch, mem);
4732
4733 width = GEN_INT (GET_MODE_BITSIZE (mode));
4734 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4735 emit_insn (gen_extxl (dest, scratch, width, addr));
4736 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4737 if (val != const0_rtx)
4738 emit_insn (gen_iordi3 (scratch, scratch, val));
4739
4740 emit_store_conditional (DImode, scratch, mem, scratch);
4741
4742 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4743 emit_unlikely_jump (x, label);
4744
4745 alpha_post_atomic_barrier (model);
4746 }
4747 \f
4748 /* Adjust the cost of a scheduling dependency. Return the new cost of
4749 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4750
4751 static int
4752 alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4753 unsigned int)
4754 {
4755 enum attr_type dep_insn_type;
4756
4757 /* If the dependence is an anti-dependence, there is no cost. For an
4758 output dependence, there is sometimes a cost, but it doesn't seem
4759 worth handling those few cases. */
4760 if (dep_type != 0)
4761 return cost;
4762
4763 /* If we can't recognize the insns, we can't really do anything. */
4764 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4765 return cost;
4766
4767 dep_insn_type = get_attr_type (dep_insn);
4768
4769 /* Bring in the user-defined memory latency. */
4770 if (dep_insn_type == TYPE_ILD
4771 || dep_insn_type == TYPE_FLD
4772 || dep_insn_type == TYPE_LDSYM)
4773 cost += alpha_memory_latency-1;
4774
4775 /* Everything else handled in DFA bypasses now. */
4776
4777 return cost;
4778 }
4779
4780 /* The number of instructions that can be issued per cycle. */
4781
4782 static int
4783 alpha_issue_rate (void)
4784 {
4785 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4786 }
4787
4788 /* How many alternative schedules to try. This should be as wide as the
4789 scheduling freedom in the DFA, but no wider. Making this value too
4790 large results extra work for the scheduler.
4791
4792 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4793 alternative schedules. For EV5, we can choose between E0/E1 and
4794 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4795
4796 static int
4797 alpha_multipass_dfa_lookahead (void)
4798 {
4799 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4800 }
4801 \f
4802 /* Machine-specific function data. */
4803
4804 struct GTY(()) alpha_links;
4805
4806 struct GTY(()) machine_function
4807 {
4808 /* For flag_reorder_blocks_and_partition. */
4809 rtx gp_save_rtx;
4810
4811 /* For VMS condition handlers. */
4812 bool uses_condition_handler;
4813
4814 /* Linkage entries. */
4815 hash_map<nofree_string_hash, alpha_links *> *links;
4816 };
4817
4818 /* How to allocate a 'struct machine_function'. */
4819
4820 static struct machine_function *
4821 alpha_init_machine_status (void)
4822 {
4823 return ggc_cleared_alloc<machine_function> ();
4824 }
4825
4826 /* Support for frame based VMS condition handlers. */
4827
4828 /* A VMS condition handler may be established for a function with a call to
4829 __builtin_establish_vms_condition_handler, and cancelled with a call to
4830 __builtin_revert_vms_condition_handler.
4831
4832 The VMS Condition Handling Facility knows about the existence of a handler
4833 from the procedure descriptor .handler field. As the VMS native compilers,
4834 we store the user specified handler's address at a fixed location in the
4835 stack frame and point the procedure descriptor at a common wrapper which
4836 fetches the real handler's address and issues an indirect call.
4837
4838 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4839
4840 We force the procedure kind to PT_STACK, and the fixed frame location is
4841 fp+8, just before the register save area. We use the handler_data field in
4842 the procedure descriptor to state the fp offset at which the installed
4843 handler address can be found. */
4844
4845 #define VMS_COND_HANDLER_FP_OFFSET 8
4846
4847 /* Expand code to store the currently installed user VMS condition handler
4848 into TARGET and install HANDLER as the new condition handler. */
4849
4850 void
4851 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4852 {
4853 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4854 VMS_COND_HANDLER_FP_OFFSET);
4855
4856 rtx handler_slot
4857 = gen_rtx_MEM (DImode, handler_slot_address);
4858
4859 emit_move_insn (target, handler_slot);
4860 emit_move_insn (handler_slot, handler);
4861
4862 /* Notify the start/prologue/epilogue emitters that the condition handler
4863 slot is needed. In addition to reserving the slot space, this will force
4864 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4865 use above is correct. */
4866 cfun->machine->uses_condition_handler = true;
4867 }
4868
4869 /* Expand code to store the current VMS condition handler into TARGET and
4870 nullify it. */
4871
4872 void
4873 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4874 {
4875 /* We implement this by establishing a null condition handler, with the tiny
4876 side effect of setting uses_condition_handler. This is a little bit
4877 pessimistic if no actual builtin_establish call is ever issued, which is
4878 not a real problem and expected never to happen anyway. */
4879
4880 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4881 }
4882
4883 /* Functions to save and restore alpha_return_addr_rtx. */
4884
4885 /* Start the ball rolling with RETURN_ADDR_RTX. */
4886
4887 rtx
4888 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4889 {
4890 if (count != 0)
4891 return const0_rtx;
4892
4893 return get_hard_reg_initial_val (Pmode, REG_RA);
4894 }
4895
4896 /* Return or create a memory slot containing the gp value for the current
4897 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4898
4899 rtx
4900 alpha_gp_save_rtx (void)
4901 {
4902 rtx_insn *seq;
4903 rtx m = cfun->machine->gp_save_rtx;
4904
4905 if (m == NULL)
4906 {
4907 start_sequence ();
4908
4909 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4910 m = validize_mem (m);
4911 emit_move_insn (m, pic_offset_table_rtx);
4912
4913 seq = get_insns ();
4914 end_sequence ();
4915
4916 /* We used to simply emit the sequence after entry_of_function.
4917 However this breaks the CFG if the first instruction in the
4918 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4919 label. Emit the sequence properly on the edge. We are only
4920 invoked from dw2_build_landing_pads and finish_eh_generation
4921 will call commit_edge_insertions thanks to a kludge. */
4922 insert_insn_on_edge (seq,
4923 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4924
4925 cfun->machine->gp_save_rtx = m;
4926 }
4927
4928 return m;
4929 }
4930
4931 static void
4932 alpha_instantiate_decls (void)
4933 {
4934 if (cfun->machine->gp_save_rtx != NULL_RTX)
4935 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4936 }
4937
4938 static int
4939 alpha_ra_ever_killed (void)
4940 {
4941 rtx_insn *top;
4942
4943 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4944 return (int)df_regs_ever_live_p (REG_RA);
4945
4946 push_topmost_sequence ();
4947 top = get_insns ();
4948 pop_topmost_sequence ();
4949
4950 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
4951 }
4952
4953 \f
4954 /* Return the trap mode suffix applicable to the current
4955 instruction, or NULL. */
4956
4957 static const char *
4958 get_trap_mode_suffix (void)
4959 {
4960 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4961
4962 switch (s)
4963 {
4964 case TRAP_SUFFIX_NONE:
4965 return NULL;
4966
4967 case TRAP_SUFFIX_SU:
4968 if (alpha_fptm >= ALPHA_FPTM_SU)
4969 return "su";
4970 return NULL;
4971
4972 case TRAP_SUFFIX_SUI:
4973 if (alpha_fptm >= ALPHA_FPTM_SUI)
4974 return "sui";
4975 return NULL;
4976
4977 case TRAP_SUFFIX_V_SV:
4978 switch (alpha_fptm)
4979 {
4980 case ALPHA_FPTM_N:
4981 return NULL;
4982 case ALPHA_FPTM_U:
4983 return "v";
4984 case ALPHA_FPTM_SU:
4985 case ALPHA_FPTM_SUI:
4986 return "sv";
4987 default:
4988 gcc_unreachable ();
4989 }
4990
4991 case TRAP_SUFFIX_V_SV_SVI:
4992 switch (alpha_fptm)
4993 {
4994 case ALPHA_FPTM_N:
4995 return NULL;
4996 case ALPHA_FPTM_U:
4997 return "v";
4998 case ALPHA_FPTM_SU:
4999 return "sv";
5000 case ALPHA_FPTM_SUI:
5001 return "svi";
5002 default:
5003 gcc_unreachable ();
5004 }
5005 break;
5006
5007 case TRAP_SUFFIX_U_SU_SUI:
5008 switch (alpha_fptm)
5009 {
5010 case ALPHA_FPTM_N:
5011 return NULL;
5012 case ALPHA_FPTM_U:
5013 return "u";
5014 case ALPHA_FPTM_SU:
5015 return "su";
5016 case ALPHA_FPTM_SUI:
5017 return "sui";
5018 default:
5019 gcc_unreachable ();
5020 }
5021 break;
5022
5023 default:
5024 gcc_unreachable ();
5025 }
5026 gcc_unreachable ();
5027 }
5028
5029 /* Return the rounding mode suffix applicable to the current
5030 instruction, or NULL. */
5031
5032 static const char *
5033 get_round_mode_suffix (void)
5034 {
5035 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5036
5037 switch (s)
5038 {
5039 case ROUND_SUFFIX_NONE:
5040 return NULL;
5041 case ROUND_SUFFIX_NORMAL:
5042 switch (alpha_fprm)
5043 {
5044 case ALPHA_FPRM_NORM:
5045 return NULL;
5046 case ALPHA_FPRM_MINF:
5047 return "m";
5048 case ALPHA_FPRM_CHOP:
5049 return "c";
5050 case ALPHA_FPRM_DYN:
5051 return "d";
5052 default:
5053 gcc_unreachable ();
5054 }
5055 break;
5056
5057 case ROUND_SUFFIX_C:
5058 return "c";
5059
5060 default:
5061 gcc_unreachable ();
5062 }
5063 gcc_unreachable ();
5064 }
5065
5066 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5067
5068 static bool
5069 alpha_print_operand_punct_valid_p (unsigned char code)
5070 {
5071 return (code == '/' || code == ',' || code == '-' || code == '~'
5072 || code == '#' || code == '*' || code == '&');
5073 }
5074
5075 /* Implement TARGET_PRINT_OPERAND. The alpha-specific
5076 operand codes are documented below. */
5077
5078 static void
5079 alpha_print_operand (FILE *file, rtx x, int code)
5080 {
5081 int i;
5082
5083 switch (code)
5084 {
5085 case '~':
5086 /* Print the assembler name of the current function. */
5087 assemble_name (file, alpha_fnname);
5088 break;
5089
5090 case '&':
5091 if (const char *name = get_some_local_dynamic_name ())
5092 assemble_name (file, name);
5093 else
5094 output_operand_lossage ("'%%&' used without any "
5095 "local dynamic TLS references");
5096 break;
5097
5098 case '/':
5099 /* Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX
5100 attributes are examined to determine what is appropriate. */
5101 {
5102 const char *trap = get_trap_mode_suffix ();
5103 const char *round = get_round_mode_suffix ();
5104
5105 if (trap || round)
5106 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5107 break;
5108 }
5109
5110 case ',':
5111 /* Generates single precision suffix for floating point
5112 instructions (s for IEEE, f for VAX). */
5113 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5114 break;
5115
5116 case '-':
5117 /* Generates double precision suffix for floating point
5118 instructions (t for IEEE, g for VAX). */
5119 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5120 break;
5121
5122 case '#':
5123 if (alpha_this_literal_sequence_number == 0)
5124 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5125 fprintf (file, "%d", alpha_this_literal_sequence_number);
5126 break;
5127
5128 case '*':
5129 if (alpha_this_gpdisp_sequence_number == 0)
5130 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5131 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5132 break;
5133
5134 case 'J':
5135 {
5136 const char *lituse;
5137
5138 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5139 {
5140 x = XVECEXP (x, 0, 0);
5141 lituse = "lituse_tlsgd";
5142 }
5143 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5144 {
5145 x = XVECEXP (x, 0, 0);
5146 lituse = "lituse_tlsldm";
5147 }
5148 else if (CONST_INT_P (x))
5149 lituse = "lituse_jsr";
5150 else
5151 {
5152 output_operand_lossage ("invalid %%J value");
5153 break;
5154 }
5155
5156 if (x != const0_rtx)
5157 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5158 }
5159 break;
5160
5161 case 'j':
5162 {
5163 const char *lituse;
5164
5165 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5166 lituse = "lituse_jsrdirect";
5167 #else
5168 lituse = "lituse_jsr";
5169 #endif
5170
5171 gcc_assert (INTVAL (x) != 0);
5172 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5173 }
5174 break;
5175 case 'r':
5176 /* If this operand is the constant zero, write it as "$31". */
5177 if (REG_P (x))
5178 fprintf (file, "%s", reg_names[REGNO (x)]);
5179 else if (x == CONST0_RTX (GET_MODE (x)))
5180 fprintf (file, "$31");
5181 else
5182 output_operand_lossage ("invalid %%r value");
5183 break;
5184
5185 case 'R':
5186 /* Similar, but for floating-point. */
5187 if (REG_P (x))
5188 fprintf (file, "%s", reg_names[REGNO (x)]);
5189 else if (x == CONST0_RTX (GET_MODE (x)))
5190 fprintf (file, "$f31");
5191 else
5192 output_operand_lossage ("invalid %%R value");
5193 break;
5194
5195 case 'N':
5196 /* Write the 1's complement of a constant. */
5197 if (!CONST_INT_P (x))
5198 output_operand_lossage ("invalid %%N value");
5199
5200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5201 break;
5202
5203 case 'P':
5204 /* Write 1 << C, for a constant C. */
5205 if (!CONST_INT_P (x))
5206 output_operand_lossage ("invalid %%P value");
5207
5208 fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
5209 break;
5210
5211 case 'h':
5212 /* Write the high-order 16 bits of a constant, sign-extended. */
5213 if (!CONST_INT_P (x))
5214 output_operand_lossage ("invalid %%h value");
5215
5216 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5217 break;
5218
5219 case 'L':
5220 /* Write the low-order 16 bits of a constant, sign-extended. */
5221 if (!CONST_INT_P (x))
5222 output_operand_lossage ("invalid %%L value");
5223
5224 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5225 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5226 break;
5227
5228 case 'm':
5229 /* Write mask for ZAP insn. */
5230 if (CONST_INT_P (x))
5231 {
5232 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5233
5234 for (i = 0; i < 8; i++, value >>= 8)
5235 if (value & 0xff)
5236 mask |= (1 << i);
5237
5238 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5239 }
5240 else
5241 output_operand_lossage ("invalid %%m value");
5242 break;
5243
5244 case 'M':
5245 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5246 if (!mode_width_operand (x, VOIDmode))
5247 output_operand_lossage ("invalid %%M value");
5248
5249 fprintf (file, "%s",
5250 (INTVAL (x) == 8 ? "b"
5251 : INTVAL (x) == 16 ? "w"
5252 : INTVAL (x) == 32 ? "l"
5253 : "q"));
5254 break;
5255
5256 case 'U':
5257 /* Similar, except do it from the mask. */
5258 if (CONST_INT_P (x))
5259 {
5260 HOST_WIDE_INT value = INTVAL (x);
5261
5262 if (value == 0xff)
5263 {
5264 fputc ('b', file);
5265 break;
5266 }
5267 if (value == 0xffff)
5268 {
5269 fputc ('w', file);
5270 break;
5271 }
5272 if (value == 0xffffffff)
5273 {
5274 fputc ('l', file);
5275 break;
5276 }
5277 if (value == -1)
5278 {
5279 fputc ('q', file);
5280 break;
5281 }
5282 }
5283
5284 output_operand_lossage ("invalid %%U value");
5285 break;
5286
5287 case 's':
5288 /* Write the constant value divided by 8. */
5289 if (!CONST_INT_P (x)
5290 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5291 || (INTVAL (x) & 7) != 0)
5292 output_operand_lossage ("invalid %%s value");
5293
5294 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5295 break;
5296
5297 case 'S':
5298 /* Same, except compute (64 - c) / 8 */
5299
5300 if (!CONST_INT_P (x)
5301 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5302 && (INTVAL (x) & 7) != 8)
5303 output_operand_lossage ("invalid %%s value");
5304
5305 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5306 break;
5307
5308 case 'C': case 'D': case 'c': case 'd':
5309 /* Write out comparison name. */
5310 {
5311 enum rtx_code c = GET_CODE (x);
5312
5313 if (!COMPARISON_P (x))
5314 output_operand_lossage ("invalid %%C value");
5315
5316 else if (code == 'D')
5317 c = reverse_condition (c);
5318 else if (code == 'c')
5319 c = swap_condition (c);
5320 else if (code == 'd')
5321 c = swap_condition (reverse_condition (c));
5322
5323 if (c == LEU)
5324 fprintf (file, "ule");
5325 else if (c == LTU)
5326 fprintf (file, "ult");
5327 else if (c == UNORDERED)
5328 fprintf (file, "un");
5329 else
5330 fprintf (file, "%s", GET_RTX_NAME (c));
5331 }
5332 break;
5333
5334 case 'E':
5335 /* Write the divide or modulus operator. */
5336 switch (GET_CODE (x))
5337 {
5338 case DIV:
5339 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5340 break;
5341 case UDIV:
5342 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5343 break;
5344 case MOD:
5345 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5346 break;
5347 case UMOD:
5348 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5349 break;
5350 default:
5351 output_operand_lossage ("invalid %%E value");
5352 break;
5353 }
5354 break;
5355
5356 case 'A':
5357 /* Write "_u" for unaligned access. */
5358 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5359 fprintf (file, "_u");
5360 break;
5361
5362 case 0:
5363 if (REG_P (x))
5364 fprintf (file, "%s", reg_names[REGNO (x)]);
5365 else if (MEM_P (x))
5366 output_address (GET_MODE (x), XEXP (x, 0));
5367 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5368 {
5369 switch (XINT (XEXP (x, 0), 1))
5370 {
5371 case UNSPEC_DTPREL:
5372 case UNSPEC_TPREL:
5373 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5374 break;
5375 default:
5376 output_operand_lossage ("unknown relocation unspec");
5377 break;
5378 }
5379 }
5380 else
5381 output_addr_const (file, x);
5382 break;
5383
5384 default:
5385 output_operand_lossage ("invalid %%xn code");
5386 }
5387 }
5388
5389 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
5390
5391 static void
5392 alpha_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
5393 {
5394 int basereg = 31;
5395 HOST_WIDE_INT offset = 0;
5396
5397 if (GET_CODE (addr) == AND)
5398 addr = XEXP (addr, 0);
5399
5400 if (GET_CODE (addr) == PLUS
5401 && CONST_INT_P (XEXP (addr, 1)))
5402 {
5403 offset = INTVAL (XEXP (addr, 1));
5404 addr = XEXP (addr, 0);
5405 }
5406
5407 if (GET_CODE (addr) == LO_SUM)
5408 {
5409 const char *reloc16, *reloclo;
5410 rtx op1 = XEXP (addr, 1);
5411
5412 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5413 {
5414 op1 = XEXP (op1, 0);
5415 switch (XINT (op1, 1))
5416 {
5417 case UNSPEC_DTPREL:
5418 reloc16 = NULL;
5419 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5420 break;
5421 case UNSPEC_TPREL:
5422 reloc16 = NULL;
5423 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5424 break;
5425 default:
5426 output_operand_lossage ("unknown relocation unspec");
5427 return;
5428 }
5429
5430 output_addr_const (file, XVECEXP (op1, 0, 0));
5431 }
5432 else
5433 {
5434 reloc16 = "gprel";
5435 reloclo = "gprellow";
5436 output_addr_const (file, op1);
5437 }
5438
5439 if (offset)
5440 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5441
5442 addr = XEXP (addr, 0);
5443 switch (GET_CODE (addr))
5444 {
5445 case REG:
5446 basereg = REGNO (addr);
5447 break;
5448
5449 case SUBREG:
5450 basereg = subreg_regno (addr);
5451 break;
5452
5453 default:
5454 gcc_unreachable ();
5455 }
5456
5457 fprintf (file, "($%d)\t\t!%s", basereg,
5458 (basereg == 29 ? reloc16 : reloclo));
5459 return;
5460 }
5461
5462 switch (GET_CODE (addr))
5463 {
5464 case REG:
5465 basereg = REGNO (addr);
5466 break;
5467
5468 case SUBREG:
5469 basereg = subreg_regno (addr);
5470 break;
5471
5472 case CONST_INT:
5473 offset = INTVAL (addr);
5474 break;
5475
5476 case SYMBOL_REF:
5477 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5478 fprintf (file, "%s", XSTR (addr, 0));
5479 return;
5480
5481 case CONST:
5482 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5483 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5484 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5485 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5486 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5487 INTVAL (XEXP (XEXP (addr, 0), 1)));
5488 return;
5489
5490 default:
5491 output_operand_lossage ("invalid operand address");
5492 return;
5493 }
5494
5495 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5496 }
5497 \f
5498 /* Emit RTL insns to initialize the variable parts of a trampoline at
5499 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5500 for the static chain value for the function. */
5501
5502 static void
5503 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5504 {
5505 rtx fnaddr, mem, word1, word2;
5506
5507 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5508
5509 #ifdef POINTERS_EXTEND_UNSIGNED
5510 fnaddr = convert_memory_address (Pmode, fnaddr);
5511 chain_value = convert_memory_address (Pmode, chain_value);
5512 #endif
5513
5514 if (TARGET_ABI_OPEN_VMS)
5515 {
5516 const char *fnname;
5517 char *trname;
5518
5519 /* Construct the name of the trampoline entry point. */
5520 fnname = XSTR (fnaddr, 0);
5521 trname = (char *) alloca (strlen (fnname) + 5);
5522 strcpy (trname, fnname);
5523 strcat (trname, "..tr");
5524 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5525 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5526
5527 /* Trampoline (or "bounded") procedure descriptor is constructed from
5528 the function's procedure descriptor with certain fields zeroed IAW
5529 the VMS calling standard. This is stored in the first quadword. */
5530 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5531 word1 = expand_and (DImode, word1,
5532 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5533 NULL);
5534 }
5535 else
5536 {
5537 /* These 4 instructions are:
5538 ldq $1,24($27)
5539 ldq $27,16($27)
5540 jmp $31,($27),0
5541 nop
5542 We don't bother setting the HINT field of the jump; the nop
5543 is merely there for padding. */
5544 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5545 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5546 }
5547
5548 /* Store the first two words, as computed above. */
5549 mem = adjust_address (m_tramp, DImode, 0);
5550 emit_move_insn (mem, word1);
5551 mem = adjust_address (m_tramp, DImode, 8);
5552 emit_move_insn (mem, word2);
5553
5554 /* Store function address and static chain value. */
5555 mem = adjust_address (m_tramp, Pmode, 16);
5556 emit_move_insn (mem, fnaddr);
5557 mem = adjust_address (m_tramp, Pmode, 24);
5558 emit_move_insn (mem, chain_value);
5559
5560 if (TARGET_ABI_OSF)
5561 {
5562 emit_insn (gen_imb ());
5563 #ifdef HAVE_ENABLE_EXECUTE_STACK
5564 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5565 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5566 #endif
5567 }
5568 }
5569 \f
5570 /* Determine where to put an argument to a function.
5571 Value is zero to push the argument on the stack,
5572 or a hard register in which to store the argument.
5573
5574 MODE is the argument's machine mode.
5575 TYPE is the data type of the argument (as a tree).
5576 This is null for libcalls where that information may
5577 not be available.
5578 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5579 the preceding args and about the function being called.
5580 NAMED is nonzero if this argument is a named parameter
5581 (otherwise it is an extra parameter matching an ellipsis).
5582
5583 On Alpha the first 6 words of args are normally in registers
5584 and the rest are pushed. */
5585
5586 static rtx
5587 alpha_function_arg (cumulative_args_t cum_v, machine_mode mode,
5588 const_tree type, bool named ATTRIBUTE_UNUSED)
5589 {
5590 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5591 int basereg;
5592 int num_args;
5593
5594 /* Don't get confused and pass small structures in FP registers. */
5595 if (type && AGGREGATE_TYPE_P (type))
5596 basereg = 16;
5597 else
5598 {
5599 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5600 values here. */
5601 gcc_checking_assert (!COMPLEX_MODE_P (mode));
5602
5603 /* Set up defaults for FP operands passed in FP registers, and
5604 integral operands passed in integer registers. */
5605 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5606 basereg = 32 + 16;
5607 else
5608 basereg = 16;
5609 }
5610
5611 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5612 the two platforms, so we can't avoid conditional compilation. */
5613 #if TARGET_ABI_OPEN_VMS
5614 {
5615 if (mode == VOIDmode)
5616 return alpha_arg_info_reg_val (*cum);
5617
5618 num_args = cum->num_args;
5619 if (num_args >= 6
5620 || targetm.calls.must_pass_in_stack (mode, type))
5621 return NULL_RTX;
5622 }
5623 #elif TARGET_ABI_OSF
5624 {
5625 if (*cum >= 6)
5626 return NULL_RTX;
5627 num_args = *cum;
5628
5629 /* VOID is passed as a special flag for "last argument". */
5630 if (type == void_type_node)
5631 basereg = 16;
5632 else if (targetm.calls.must_pass_in_stack (mode, type))
5633 return NULL_RTX;
5634 }
5635 #else
5636 #error Unhandled ABI
5637 #endif
5638
5639 return gen_rtx_REG (mode, num_args + basereg);
5640 }
5641
5642 /* Update the data in CUM to advance over an argument
5643 of mode MODE and data type TYPE.
5644 (TYPE is null for libcalls where that information may not be available.) */
5645
5646 static void
5647 alpha_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
5648 const_tree type, bool named ATTRIBUTE_UNUSED)
5649 {
5650 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5651 bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5652 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type);
5653
5654 #if TARGET_ABI_OSF
5655 *cum += increment;
5656 #else
5657 if (!onstack && cum->num_args < 6)
5658 cum->atypes[cum->num_args] = alpha_arg_type (mode);
5659 cum->num_args += increment;
5660 #endif
5661 }
5662
5663 static int
5664 alpha_arg_partial_bytes (cumulative_args_t cum_v,
5665 machine_mode mode ATTRIBUTE_UNUSED,
5666 tree type ATTRIBUTE_UNUSED,
5667 bool named ATTRIBUTE_UNUSED)
5668 {
5669 int words = 0;
5670 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5671
5672 #if TARGET_ABI_OPEN_VMS
5673 if (cum->num_args < 6
5674 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type))
5675 words = 6 - cum->num_args;
5676 #elif TARGET_ABI_OSF
5677 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type))
5678 words = 6 - *cum;
5679 #else
5680 #error Unhandled ABI
5681 #endif
5682
5683 return words * UNITS_PER_WORD;
5684 }
5685
5686
5687 /* Return true if TYPE must be returned in memory, instead of in registers. */
5688
5689 static bool
5690 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5691 {
5692 machine_mode mode = VOIDmode;
5693 int size;
5694
5695 if (type)
5696 {
5697 mode = TYPE_MODE (type);
5698
5699 /* All aggregates are returned in memory, except on OpenVMS where
5700 records that fit 64 bits should be returned by immediate value
5701 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5702 if (TARGET_ABI_OPEN_VMS
5703 && TREE_CODE (type) != ARRAY_TYPE
5704 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5705 return false;
5706
5707 if (AGGREGATE_TYPE_P (type))
5708 return true;
5709 }
5710
5711 size = GET_MODE_SIZE (mode);
5712 switch (GET_MODE_CLASS (mode))
5713 {
5714 case MODE_VECTOR_FLOAT:
5715 /* Pass all float vectors in memory, like an aggregate. */
5716 return true;
5717
5718 case MODE_COMPLEX_FLOAT:
5719 /* We judge complex floats on the size of their element,
5720 not the size of the whole type. */
5721 size = GET_MODE_UNIT_SIZE (mode);
5722 break;
5723
5724 case MODE_INT:
5725 case MODE_FLOAT:
5726 case MODE_COMPLEX_INT:
5727 case MODE_VECTOR_INT:
5728 break;
5729
5730 default:
5731 /* ??? We get called on all sorts of random stuff from
5732 aggregate_value_p. We must return something, but it's not
5733 clear what's safe to return. Pretend it's a struct I
5734 guess. */
5735 return true;
5736 }
5737
5738 /* Otherwise types must fit in one register. */
5739 return size > UNITS_PER_WORD;
5740 }
5741
5742 /* Return true if TYPE should be passed by invisible reference. */
5743
5744 static bool
5745 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5746 machine_mode mode,
5747 const_tree type ATTRIBUTE_UNUSED,
5748 bool named)
5749 {
5750 /* Pass float and _Complex float variable arguments by reference.
5751 This avoids 64-bit store from a FP register to a pretend args save area
5752 and subsequent 32-bit load from the saved location to a FP register.
5753
5754 Note that 32-bit loads and stores to/from a FP register on alpha reorder
5755 bits to form a canonical 64-bit value in the FP register. This fact
5756 invalidates compiler assumption that 32-bit FP value lives in the lower
5757 32-bits of the passed 64-bit FP value, so loading the 32-bit value from
5758 the stored 64-bit location using 32-bit FP load is invalid on alpha.
5759
5760 This introduces sort of ABI incompatibility, but until _Float32 was
5761 introduced, C-family languages promoted 32-bit float variable arg to
5762 a 64-bit double, and it was not allowed to pass float as a varible
5763 argument. Passing _Complex float as a variable argument never
5764 worked on alpha. Thus, we have no backward compatibility issues
5765 to worry about, and passing unpromoted _Float32 and _Complex float
5766 as a variable argument will actually work in the future. */
5767
5768 if (mode == SFmode || mode == SCmode)
5769 return !named;
5770
5771 return mode == TFmode || mode == TCmode;
5772 }
5773
5774 /* Define how to find the value returned by a function. VALTYPE is the
5775 data type of the value (as a tree). If the precise function being
5776 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5777 MODE is set instead of VALTYPE for libcalls.
5778
5779 On Alpha the value is found in $0 for integer functions and
5780 $f0 for floating-point functions. */
5781
5782 static rtx
5783 alpha_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5784 machine_mode mode)
5785 {
5786 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5787 enum mode_class mclass;
5788
5789 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5790
5791 if (valtype)
5792 mode = TYPE_MODE (valtype);
5793
5794 mclass = GET_MODE_CLASS (mode);
5795 switch (mclass)
5796 {
5797 case MODE_INT:
5798 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5799 where we have them returning both SImode and DImode. */
5800 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5801 PROMOTE_MODE (mode, dummy, valtype);
5802 /* FALLTHRU */
5803
5804 case MODE_COMPLEX_INT:
5805 case MODE_VECTOR_INT:
5806 regnum = 0;
5807 break;
5808
5809 case MODE_FLOAT:
5810 regnum = 32;
5811 break;
5812
5813 case MODE_COMPLEX_FLOAT:
5814 {
5815 machine_mode cmode = GET_MODE_INNER (mode);
5816
5817 return gen_rtx_PARALLEL
5818 (VOIDmode,
5819 gen_rtvec (2,
5820 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5821 const0_rtx),
5822 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5823 GEN_INT (GET_MODE_SIZE (cmode)))));
5824 }
5825
5826 case MODE_RANDOM:
5827 /* We should only reach here for BLKmode on VMS. */
5828 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5829 regnum = 0;
5830 break;
5831
5832 default:
5833 gcc_unreachable ();
5834 }
5835
5836 return gen_rtx_REG (mode, regnum);
5837 }
5838
5839 /* Implement TARGET_FUNCTION_VALUE. */
5840
5841 static rtx
5842 alpha_function_value (const_tree valtype, const_tree fn_decl_or_type,
5843 bool /*outgoing*/)
5844 {
5845 return alpha_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
5846 }
5847
5848 /* Implement TARGET_LIBCALL_VALUE. */
5849
5850 static rtx
5851 alpha_libcall_value (machine_mode mode, const_rtx /*fun*/)
5852 {
5853 return alpha_function_value_1 (NULL_TREE, NULL_TREE, mode);
5854 }
5855
5856 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
5857
5858 On the Alpha, $0 $1 and $f0 $f1 are the only register thus used. */
5859
5860 static bool
5861 alpha_function_value_regno_p (const unsigned int regno)
5862 {
5863 return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
5864 }
5865
5866 /* TCmode complex values are passed by invisible reference. We
5867 should not split these values. */
5868
5869 static bool
5870 alpha_split_complex_arg (const_tree type)
5871 {
5872 return TYPE_MODE (type) != TCmode;
5873 }
5874
5875 static tree
5876 alpha_build_builtin_va_list (void)
5877 {
5878 tree base, ofs, space, record, type_decl;
5879
5880 if (TARGET_ABI_OPEN_VMS)
5881 return ptr_type_node;
5882
5883 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5884 type_decl = build_decl (BUILTINS_LOCATION,
5885 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5886 TYPE_STUB_DECL (record) = type_decl;
5887 TYPE_NAME (record) = type_decl;
5888
5889 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5890
5891 /* Dummy field to prevent alignment warnings. */
5892 space = build_decl (BUILTINS_LOCATION,
5893 FIELD_DECL, NULL_TREE, integer_type_node);
5894 DECL_FIELD_CONTEXT (space) = record;
5895 DECL_ARTIFICIAL (space) = 1;
5896 DECL_IGNORED_P (space) = 1;
5897
5898 ofs = build_decl (BUILTINS_LOCATION,
5899 FIELD_DECL, get_identifier ("__offset"),
5900 integer_type_node);
5901 DECL_FIELD_CONTEXT (ofs) = record;
5902 DECL_CHAIN (ofs) = space;
5903
5904 base = build_decl (BUILTINS_LOCATION,
5905 FIELD_DECL, get_identifier ("__base"),
5906 ptr_type_node);
5907 DECL_FIELD_CONTEXT (base) = record;
5908 DECL_CHAIN (base) = ofs;
5909
5910 TYPE_FIELDS (record) = base;
5911 layout_type (record);
5912
5913 va_list_gpr_counter_field = ofs;
5914 return record;
5915 }
5916
5917 #if TARGET_ABI_OSF
5918 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5919 and constant additions. */
5920
5921 static gimple *
5922 va_list_skip_additions (tree lhs)
5923 {
5924 gimple *stmt;
5925
5926 for (;;)
5927 {
5928 enum tree_code code;
5929
5930 stmt = SSA_NAME_DEF_STMT (lhs);
5931
5932 if (gimple_code (stmt) == GIMPLE_PHI)
5933 return stmt;
5934
5935 if (!is_gimple_assign (stmt)
5936 || gimple_assign_lhs (stmt) != lhs)
5937 return NULL;
5938
5939 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5940 return stmt;
5941 code = gimple_assign_rhs_code (stmt);
5942 if (!CONVERT_EXPR_CODE_P (code)
5943 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5944 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5945 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5946 return stmt;
5947
5948 lhs = gimple_assign_rhs1 (stmt);
5949 }
5950 }
5951
5952 /* Check if LHS = RHS statement is
5953 LHS = *(ap.__base + ap.__offset + cst)
5954 or
5955 LHS = *(ap.__base
5956 + ((ap.__offset + cst <= 47)
5957 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5958 If the former, indicate that GPR registers are needed,
5959 if the latter, indicate that FPR registers are needed.
5960
5961 Also look for LHS = (*ptr).field, where ptr is one of the forms
5962 listed above.
5963
5964 On alpha, cfun->va_list_gpr_size is used as size of the needed
5965 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5966 registers are needed and bit 1 set if FPR registers are needed.
5967 Return true if va_list references should not be scanned for the
5968 current statement. */
5969
5970 static bool
5971 alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
5972 {
5973 tree base, offset, rhs;
5974 int offset_arg = 1;
5975 gimple *base_stmt;
5976
5977 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5978 != GIMPLE_SINGLE_RHS)
5979 return false;
5980
5981 rhs = gimple_assign_rhs1 (stmt);
5982 while (handled_component_p (rhs))
5983 rhs = TREE_OPERAND (rhs, 0);
5984 if (TREE_CODE (rhs) != MEM_REF
5985 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5986 return false;
5987
5988 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5989 if (stmt == NULL
5990 || !is_gimple_assign (stmt)
5991 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5992 return false;
5993
5994 base = gimple_assign_rhs1 (stmt);
5995 if (TREE_CODE (base) == SSA_NAME)
5996 {
5997 base_stmt = va_list_skip_additions (base);
5998 if (base_stmt
5999 && is_gimple_assign (base_stmt)
6000 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6001 base = gimple_assign_rhs1 (base_stmt);
6002 }
6003
6004 if (TREE_CODE (base) != COMPONENT_REF
6005 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6006 {
6007 base = gimple_assign_rhs2 (stmt);
6008 if (TREE_CODE (base) == SSA_NAME)
6009 {
6010 base_stmt = va_list_skip_additions (base);
6011 if (base_stmt
6012 && is_gimple_assign (base_stmt)
6013 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6014 base = gimple_assign_rhs1 (base_stmt);
6015 }
6016
6017 if (TREE_CODE (base) != COMPONENT_REF
6018 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6019 return false;
6020
6021 offset_arg = 0;
6022 }
6023
6024 base = get_base_address (base);
6025 if (TREE_CODE (base) != VAR_DECL
6026 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
6027 return false;
6028
6029 offset = gimple_op (stmt, 1 + offset_arg);
6030 if (TREE_CODE (offset) == SSA_NAME)
6031 {
6032 gimple *offset_stmt = va_list_skip_additions (offset);
6033
6034 if (offset_stmt
6035 && gimple_code (offset_stmt) == GIMPLE_PHI)
6036 {
6037 HOST_WIDE_INT sub;
6038 gimple *arg1_stmt, *arg2_stmt;
6039 tree arg1, arg2;
6040 enum tree_code code1, code2;
6041
6042 if (gimple_phi_num_args (offset_stmt) != 2)
6043 goto escapes;
6044
6045 arg1_stmt
6046 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6047 arg2_stmt
6048 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6049 if (arg1_stmt == NULL
6050 || !is_gimple_assign (arg1_stmt)
6051 || arg2_stmt == NULL
6052 || !is_gimple_assign (arg2_stmt))
6053 goto escapes;
6054
6055 code1 = gimple_assign_rhs_code (arg1_stmt);
6056 code2 = gimple_assign_rhs_code (arg2_stmt);
6057 if (code1 == COMPONENT_REF
6058 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6059 /* Do nothing. */;
6060 else if (code2 == COMPONENT_REF
6061 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6062 {
6063 gimple *tem = arg1_stmt;
6064 code2 = code1;
6065 arg1_stmt = arg2_stmt;
6066 arg2_stmt = tem;
6067 }
6068 else
6069 goto escapes;
6070
6071 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6072 goto escapes;
6073
6074 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6075 if (code2 == MINUS_EXPR)
6076 sub = -sub;
6077 if (sub < -48 || sub > -32)
6078 goto escapes;
6079
6080 arg1 = gimple_assign_rhs1 (arg1_stmt);
6081 arg2 = gimple_assign_rhs1 (arg2_stmt);
6082 if (TREE_CODE (arg2) == SSA_NAME)
6083 {
6084 arg2_stmt = va_list_skip_additions (arg2);
6085 if (arg2_stmt == NULL
6086 || !is_gimple_assign (arg2_stmt)
6087 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6088 goto escapes;
6089 arg2 = gimple_assign_rhs1 (arg2_stmt);
6090 }
6091 if (arg1 != arg2)
6092 goto escapes;
6093
6094 if (TREE_CODE (arg1) != COMPONENT_REF
6095 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6096 || get_base_address (arg1) != base)
6097 goto escapes;
6098
6099 /* Need floating point regs. */
6100 cfun->va_list_fpr_size |= 2;
6101 return false;
6102 }
6103 if (offset_stmt
6104 && is_gimple_assign (offset_stmt)
6105 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6106 offset = gimple_assign_rhs1 (offset_stmt);
6107 }
6108 if (TREE_CODE (offset) != COMPONENT_REF
6109 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6110 || get_base_address (offset) != base)
6111 goto escapes;
6112 else
6113 /* Need general regs. */
6114 cfun->va_list_fpr_size |= 1;
6115 return false;
6116
6117 escapes:
6118 si->va_list_escapes = true;
6119 return false;
6120 }
6121 #endif
6122
6123 /* Perform any needed actions needed for a function that is receiving a
6124 variable number of arguments. */
6125
6126 static void
6127 alpha_setup_incoming_varargs (cumulative_args_t pcum, machine_mode mode,
6128 tree type, int *pretend_size, int no_rtl)
6129 {
6130 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6131
6132 /* Skip the current argument. */
6133 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6134 true);
6135
6136 #if TARGET_ABI_OPEN_VMS
6137 /* For VMS, we allocate space for all 6 arg registers plus a count.
6138
6139 However, if NO registers need to be saved, don't allocate any space.
6140 This is not only because we won't need the space, but because AP
6141 includes the current_pretend_args_size and we don't want to mess up
6142 any ap-relative addresses already made. */
6143 if (cum.num_args < 6)
6144 {
6145 if (!no_rtl)
6146 {
6147 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6148 emit_insn (gen_arg_home ());
6149 }
6150 *pretend_size = 7 * UNITS_PER_WORD;
6151 }
6152 #else
6153 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6154 only push those that are remaining. However, if NO registers need to
6155 be saved, don't allocate any space. This is not only because we won't
6156 need the space, but because AP includes the current_pretend_args_size
6157 and we don't want to mess up any ap-relative addresses already made.
6158
6159 If we are not to use the floating-point registers, save the integer
6160 registers where we would put the floating-point registers. This is
6161 not the most efficient way to implement varargs with just one register
6162 class, but it isn't worth doing anything more efficient in this rare
6163 case. */
6164 if (cum >= 6)
6165 return;
6166
6167 if (!no_rtl)
6168 {
6169 int count;
6170 alias_set_type set = get_varargs_alias_set ();
6171 rtx tmp;
6172
6173 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6174 if (count > 6 - cum)
6175 count = 6 - cum;
6176
6177 /* Detect whether integer registers or floating-point registers
6178 are needed by the detected va_arg statements. See above for
6179 how these values are computed. Note that the "escape" value
6180 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6181 these bits set. */
6182 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6183
6184 if (cfun->va_list_fpr_size & 1)
6185 {
6186 tmp = gen_rtx_MEM (BLKmode,
6187 plus_constant (Pmode, virtual_incoming_args_rtx,
6188 (cum + 6) * UNITS_PER_WORD));
6189 MEM_NOTRAP_P (tmp) = 1;
6190 set_mem_alias_set (tmp, set);
6191 move_block_from_reg (16 + cum, tmp, count);
6192 }
6193
6194 if (cfun->va_list_fpr_size & 2)
6195 {
6196 tmp = gen_rtx_MEM (BLKmode,
6197 plus_constant (Pmode, virtual_incoming_args_rtx,
6198 cum * UNITS_PER_WORD));
6199 MEM_NOTRAP_P (tmp) = 1;
6200 set_mem_alias_set (tmp, set);
6201 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6202 }
6203 }
6204 *pretend_size = 12 * UNITS_PER_WORD;
6205 #endif
6206 }
6207
6208 static void
6209 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6210 {
6211 HOST_WIDE_INT offset;
6212 tree t, offset_field, base_field;
6213
6214 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6215 return;
6216
6217 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6218 up by 48, storing fp arg registers in the first 48 bytes, and the
6219 integer arg registers in the next 48 bytes. This is only done,
6220 however, if any integer registers need to be stored.
6221
6222 If no integer registers need be stored, then we must subtract 48
6223 in order to account for the integer arg registers which are counted
6224 in argsize above, but which are not actually stored on the stack.
6225 Must further be careful here about structures straddling the last
6226 integer argument register; that futzes with pretend_args_size,
6227 which changes the meaning of AP. */
6228
6229 if (NUM_ARGS < 6)
6230 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6231 else
6232 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6233
6234 if (TARGET_ABI_OPEN_VMS)
6235 {
6236 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6237 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6238 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6239 TREE_SIDE_EFFECTS (t) = 1;
6240 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6241 }
6242 else
6243 {
6244 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6245 offset_field = DECL_CHAIN (base_field);
6246
6247 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6248 valist, base_field, NULL_TREE);
6249 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6250 valist, offset_field, NULL_TREE);
6251
6252 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6253 t = fold_build_pointer_plus_hwi (t, offset);
6254 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6255 TREE_SIDE_EFFECTS (t) = 1;
6256 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6257
6258 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6259 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6260 TREE_SIDE_EFFECTS (t) = 1;
6261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6262 }
6263 }
6264
6265 static tree
6266 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6267 gimple_seq *pre_p)
6268 {
6269 tree type_size, ptr_type, addend, t, addr;
6270 gimple_seq internal_post;
6271
6272 /* If the type could not be passed in registers, skip the block
6273 reserved for the registers. */
6274 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6275 {
6276 t = build_int_cst (TREE_TYPE (offset), 6*8);
6277 gimplify_assign (offset,
6278 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6279 pre_p);
6280 }
6281
6282 addend = offset;
6283 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6284
6285 if (TREE_CODE (type) == COMPLEX_TYPE)
6286 {
6287 tree real_part, imag_part, real_temp;
6288
6289 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6290 offset, pre_p);
6291
6292 /* Copy the value into a new temporary, lest the formal temporary
6293 be reused out from under us. */
6294 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6295
6296 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6297 offset, pre_p);
6298
6299 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6300 }
6301 else if (TREE_CODE (type) == REAL_TYPE)
6302 {
6303 tree fpaddend, cond, fourtyeight;
6304
6305 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6306 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6307 addend, fourtyeight);
6308 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6309 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6310 fpaddend, addend);
6311 }
6312
6313 /* Build the final address and force that value into a temporary. */
6314 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6315 internal_post = NULL;
6316 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6317 gimple_seq_add_seq (pre_p, internal_post);
6318
6319 /* Update the offset field. */
6320 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6321 if (type_size == NULL || TREE_OVERFLOW (type_size))
6322 t = size_zero_node;
6323 else
6324 {
6325 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6326 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6327 t = size_binop (MULT_EXPR, t, size_int (8));
6328 }
6329 t = fold_convert (TREE_TYPE (offset), t);
6330 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6331 pre_p);
6332
6333 return build_va_arg_indirect_ref (addr);
6334 }
6335
6336 static tree
6337 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6338 gimple_seq *post_p)
6339 {
6340 tree offset_field, base_field, offset, base, t, r;
6341 bool indirect;
6342
6343 if (TARGET_ABI_OPEN_VMS)
6344 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6345
6346 base_field = TYPE_FIELDS (va_list_type_node);
6347 offset_field = DECL_CHAIN (base_field);
6348 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6349 valist, base_field, NULL_TREE);
6350 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6351 valist, offset_field, NULL_TREE);
6352
6353 /* Pull the fields of the structure out into temporaries. Since we never
6354 modify the base field, we can use a formal temporary. Sign-extend the
6355 offset field so that it's the proper width for pointer arithmetic. */
6356 base = get_formal_tmp_var (base_field, pre_p);
6357
6358 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6359 offset = get_initialized_tmp_var (t, pre_p, NULL);
6360
6361 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6362 if (indirect)
6363 type = build_pointer_type_for_mode (type, ptr_mode, true);
6364
6365 /* Find the value. Note that this will be a stable indirection, or
6366 a composite of stable indirections in the case of complex. */
6367 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6368
6369 /* Stuff the offset temporary back into its field. */
6370 gimplify_assign (unshare_expr (offset_field),
6371 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6372
6373 if (indirect)
6374 r = build_va_arg_indirect_ref (r);
6375
6376 return r;
6377 }
6378 \f
6379 /* Builtins. */
6380
6381 enum alpha_builtin
6382 {
6383 ALPHA_BUILTIN_CMPBGE,
6384 ALPHA_BUILTIN_EXTBL,
6385 ALPHA_BUILTIN_EXTWL,
6386 ALPHA_BUILTIN_EXTLL,
6387 ALPHA_BUILTIN_EXTQL,
6388 ALPHA_BUILTIN_EXTWH,
6389 ALPHA_BUILTIN_EXTLH,
6390 ALPHA_BUILTIN_EXTQH,
6391 ALPHA_BUILTIN_INSBL,
6392 ALPHA_BUILTIN_INSWL,
6393 ALPHA_BUILTIN_INSLL,
6394 ALPHA_BUILTIN_INSQL,
6395 ALPHA_BUILTIN_INSWH,
6396 ALPHA_BUILTIN_INSLH,
6397 ALPHA_BUILTIN_INSQH,
6398 ALPHA_BUILTIN_MSKBL,
6399 ALPHA_BUILTIN_MSKWL,
6400 ALPHA_BUILTIN_MSKLL,
6401 ALPHA_BUILTIN_MSKQL,
6402 ALPHA_BUILTIN_MSKWH,
6403 ALPHA_BUILTIN_MSKLH,
6404 ALPHA_BUILTIN_MSKQH,
6405 ALPHA_BUILTIN_UMULH,
6406 ALPHA_BUILTIN_ZAP,
6407 ALPHA_BUILTIN_ZAPNOT,
6408 ALPHA_BUILTIN_AMASK,
6409 ALPHA_BUILTIN_IMPLVER,
6410 ALPHA_BUILTIN_RPCC,
6411 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6412 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6413
6414 /* TARGET_MAX */
6415 ALPHA_BUILTIN_MINUB8,
6416 ALPHA_BUILTIN_MINSB8,
6417 ALPHA_BUILTIN_MINUW4,
6418 ALPHA_BUILTIN_MINSW4,
6419 ALPHA_BUILTIN_MAXUB8,
6420 ALPHA_BUILTIN_MAXSB8,
6421 ALPHA_BUILTIN_MAXUW4,
6422 ALPHA_BUILTIN_MAXSW4,
6423 ALPHA_BUILTIN_PERR,
6424 ALPHA_BUILTIN_PKLB,
6425 ALPHA_BUILTIN_PKWB,
6426 ALPHA_BUILTIN_UNPKBL,
6427 ALPHA_BUILTIN_UNPKBW,
6428
6429 /* TARGET_CIX */
6430 ALPHA_BUILTIN_CTTZ,
6431 ALPHA_BUILTIN_CTLZ,
6432 ALPHA_BUILTIN_CTPOP,
6433
6434 ALPHA_BUILTIN_max
6435 };
6436
6437 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6438 CODE_FOR_builtin_cmpbge,
6439 CODE_FOR_extbl,
6440 CODE_FOR_extwl,
6441 CODE_FOR_extll,
6442 CODE_FOR_extql,
6443 CODE_FOR_extwh,
6444 CODE_FOR_extlh,
6445 CODE_FOR_extqh,
6446 CODE_FOR_builtin_insbl,
6447 CODE_FOR_builtin_inswl,
6448 CODE_FOR_builtin_insll,
6449 CODE_FOR_insql,
6450 CODE_FOR_inswh,
6451 CODE_FOR_inslh,
6452 CODE_FOR_insqh,
6453 CODE_FOR_mskbl,
6454 CODE_FOR_mskwl,
6455 CODE_FOR_mskll,
6456 CODE_FOR_mskql,
6457 CODE_FOR_mskwh,
6458 CODE_FOR_msklh,
6459 CODE_FOR_mskqh,
6460 CODE_FOR_umuldi3_highpart,
6461 CODE_FOR_builtin_zap,
6462 CODE_FOR_builtin_zapnot,
6463 CODE_FOR_builtin_amask,
6464 CODE_FOR_builtin_implver,
6465 CODE_FOR_builtin_rpcc,
6466 CODE_FOR_builtin_establish_vms_condition_handler,
6467 CODE_FOR_builtin_revert_vms_condition_handler,
6468
6469 /* TARGET_MAX */
6470 CODE_FOR_builtin_minub8,
6471 CODE_FOR_builtin_minsb8,
6472 CODE_FOR_builtin_minuw4,
6473 CODE_FOR_builtin_minsw4,
6474 CODE_FOR_builtin_maxub8,
6475 CODE_FOR_builtin_maxsb8,
6476 CODE_FOR_builtin_maxuw4,
6477 CODE_FOR_builtin_maxsw4,
6478 CODE_FOR_builtin_perr,
6479 CODE_FOR_builtin_pklb,
6480 CODE_FOR_builtin_pkwb,
6481 CODE_FOR_builtin_unpkbl,
6482 CODE_FOR_builtin_unpkbw,
6483
6484 /* TARGET_CIX */
6485 CODE_FOR_ctzdi2,
6486 CODE_FOR_clzdi2,
6487 CODE_FOR_popcountdi2
6488 };
6489
6490 struct alpha_builtin_def
6491 {
6492 const char *name;
6493 enum alpha_builtin code;
6494 unsigned int target_mask;
6495 bool is_const;
6496 };
6497
6498 static struct alpha_builtin_def const zero_arg_builtins[] = {
6499 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6500 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6501 };
6502
6503 static struct alpha_builtin_def const one_arg_builtins[] = {
6504 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6505 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6506 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6507 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6508 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6509 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6510 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6511 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6512 };
6513
6514 static struct alpha_builtin_def const two_arg_builtins[] = {
6515 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6516 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6517 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6518 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6519 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6520 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6521 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6522 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6523 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6524 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6525 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6526 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6527 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6528 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6529 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6530 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6531 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6532 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6533 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6534 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6535 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6536 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6537 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6538 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6539 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6540 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6541 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6542 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6543 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6544 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6545 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6546 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6547 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6548 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6549 };
6550
6551 static GTY(()) tree alpha_dimode_u;
6552 static GTY(()) tree alpha_v8qi_u;
6553 static GTY(()) tree alpha_v8qi_s;
6554 static GTY(()) tree alpha_v4hi_u;
6555 static GTY(()) tree alpha_v4hi_s;
6556
6557 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6558
6559 /* Return the alpha builtin for CODE. */
6560
6561 static tree
6562 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6563 {
6564 if (code >= ALPHA_BUILTIN_max)
6565 return error_mark_node;
6566 return alpha_builtins[code];
6567 }
6568
6569 /* Helper function of alpha_init_builtins. Add the built-in specified
6570 by NAME, TYPE, CODE, and ECF. */
6571
6572 static void
6573 alpha_builtin_function (const char *name, tree ftype,
6574 enum alpha_builtin code, unsigned ecf)
6575 {
6576 tree decl = add_builtin_function (name, ftype, (int) code,
6577 BUILT_IN_MD, NULL, NULL_TREE);
6578
6579 if (ecf & ECF_CONST)
6580 TREE_READONLY (decl) = 1;
6581 if (ecf & ECF_NOTHROW)
6582 TREE_NOTHROW (decl) = 1;
6583
6584 alpha_builtins [(int) code] = decl;
6585 }
6586
6587 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6588 functions pointed to by P, with function type FTYPE. */
6589
6590 static void
6591 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6592 tree ftype)
6593 {
6594 size_t i;
6595
6596 for (i = 0; i < count; ++i, ++p)
6597 if ((target_flags & p->target_mask) == p->target_mask)
6598 alpha_builtin_function (p->name, ftype, p->code,
6599 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6600 }
6601
6602 static void
6603 alpha_init_builtins (void)
6604 {
6605 tree ftype;
6606
6607 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6608 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6609 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6610 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6611 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6612
6613 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6614 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6615
6616 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6617 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6618
6619 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6620 alpha_dimode_u, NULL_TREE);
6621 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6622
6623 if (TARGET_ABI_OPEN_VMS)
6624 {
6625 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6626 NULL_TREE);
6627 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6628 ftype,
6629 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6630 0);
6631
6632 ftype = build_function_type_list (ptr_type_node, void_type_node,
6633 NULL_TREE);
6634 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6635 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6636
6637 vms_patch_builtins ();
6638 }
6639 }
6640
6641 /* Expand an expression EXP that calls a built-in function,
6642 with result going to TARGET if that's convenient
6643 (and in mode MODE if that's convenient).
6644 SUBTARGET may be used as the target for computing one of EXP's operands.
6645 IGNORE is nonzero if the value is to be ignored. */
6646
6647 static rtx
6648 alpha_expand_builtin (tree exp, rtx target,
6649 rtx subtarget ATTRIBUTE_UNUSED,
6650 machine_mode mode ATTRIBUTE_UNUSED,
6651 int ignore ATTRIBUTE_UNUSED)
6652 {
6653 #define MAX_ARGS 2
6654
6655 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6656 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6657 tree arg;
6658 call_expr_arg_iterator iter;
6659 enum insn_code icode;
6660 rtx op[MAX_ARGS], pat;
6661 int arity;
6662 bool nonvoid;
6663
6664 if (fcode >= ALPHA_BUILTIN_max)
6665 internal_error ("bad builtin fcode");
6666 icode = code_for_builtin[fcode];
6667 if (icode == 0)
6668 internal_error ("bad builtin fcode");
6669
6670 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6671
6672 arity = 0;
6673 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6674 {
6675 const struct insn_operand_data *insn_op;
6676
6677 if (arg == error_mark_node)
6678 return NULL_RTX;
6679 if (arity > MAX_ARGS)
6680 return NULL_RTX;
6681
6682 insn_op = &insn_data[icode].operand[arity + nonvoid];
6683
6684 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6685
6686 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6687 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6688 arity++;
6689 }
6690
6691 if (nonvoid)
6692 {
6693 machine_mode tmode = insn_data[icode].operand[0].mode;
6694 if (!target
6695 || GET_MODE (target) != tmode
6696 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6697 target = gen_reg_rtx (tmode);
6698 }
6699
6700 switch (arity)
6701 {
6702 case 0:
6703 pat = GEN_FCN (icode) (target);
6704 break;
6705 case 1:
6706 if (nonvoid)
6707 pat = GEN_FCN (icode) (target, op[0]);
6708 else
6709 pat = GEN_FCN (icode) (op[0]);
6710 break;
6711 case 2:
6712 pat = GEN_FCN (icode) (target, op[0], op[1]);
6713 break;
6714 default:
6715 gcc_unreachable ();
6716 }
6717 if (!pat)
6718 return NULL_RTX;
6719 emit_insn (pat);
6720
6721 if (nonvoid)
6722 return target;
6723 else
6724 return const0_rtx;
6725 }
6726
6727 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6728 with an 8-bit output vector. OPINT contains the integer operands; bit N
6729 of OP_CONST is set if OPINT[N] is valid. */
6730
6731 static tree
6732 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6733 {
6734 if (op_const == 3)
6735 {
6736 int i, val;
6737 for (i = 0, val = 0; i < 8; ++i)
6738 {
6739 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6740 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6741 if (c0 >= c1)
6742 val |= 1 << i;
6743 }
6744 return build_int_cst (alpha_dimode_u, val);
6745 }
6746 else if (op_const == 2 && opint[1] == 0)
6747 return build_int_cst (alpha_dimode_u, 0xff);
6748 return NULL;
6749 }
6750
6751 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6752 specialized form of an AND operation. Other byte manipulation instructions
6753 are defined in terms of this instruction, so this is also used as a
6754 subroutine for other builtins.
6755
6756 OP contains the tree operands; OPINT contains the extracted integer values.
6757 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6758 OPINT may be considered. */
6759
6760 static tree
6761 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6762 long op_const)
6763 {
6764 if (op_const & 2)
6765 {
6766 unsigned HOST_WIDE_INT mask = 0;
6767 int i;
6768
6769 for (i = 0; i < 8; ++i)
6770 if ((opint[1] >> i) & 1)
6771 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6772
6773 if (op_const & 1)
6774 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6775
6776 if (op)
6777 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6778 build_int_cst (alpha_dimode_u, mask));
6779 }
6780 else if ((op_const & 1) && opint[0] == 0)
6781 return build_int_cst (alpha_dimode_u, 0);
6782 return NULL;
6783 }
6784
6785 /* Fold the builtins for the EXT family of instructions. */
6786
6787 static tree
6788 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6789 long op_const, unsigned HOST_WIDE_INT bytemask,
6790 bool is_high)
6791 {
6792 long zap_const = 2;
6793 tree *zap_op = NULL;
6794
6795 if (op_const & 2)
6796 {
6797 unsigned HOST_WIDE_INT loc;
6798
6799 loc = opint[1] & 7;
6800 loc *= BITS_PER_UNIT;
6801
6802 if (loc != 0)
6803 {
6804 if (op_const & 1)
6805 {
6806 unsigned HOST_WIDE_INT temp = opint[0];
6807 if (is_high)
6808 temp <<= loc;
6809 else
6810 temp >>= loc;
6811 opint[0] = temp;
6812 zap_const = 3;
6813 }
6814 }
6815 else
6816 zap_op = op;
6817 }
6818
6819 opint[1] = bytemask;
6820 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6821 }
6822
6823 /* Fold the builtins for the INS family of instructions. */
6824
6825 static tree
6826 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6827 long op_const, unsigned HOST_WIDE_INT bytemask,
6828 bool is_high)
6829 {
6830 if ((op_const & 1) && opint[0] == 0)
6831 return build_int_cst (alpha_dimode_u, 0);
6832
6833 if (op_const & 2)
6834 {
6835 unsigned HOST_WIDE_INT temp, loc, byteloc;
6836 tree *zap_op = NULL;
6837
6838 loc = opint[1] & 7;
6839 bytemask <<= loc;
6840
6841 temp = opint[0];
6842 if (is_high)
6843 {
6844 byteloc = (64 - (loc * 8)) & 0x3f;
6845 if (byteloc == 0)
6846 zap_op = op;
6847 else
6848 temp >>= byteloc;
6849 bytemask >>= 8;
6850 }
6851 else
6852 {
6853 byteloc = loc * 8;
6854 if (byteloc == 0)
6855 zap_op = op;
6856 else
6857 temp <<= byteloc;
6858 }
6859
6860 opint[0] = temp;
6861 opint[1] = bytemask;
6862 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6863 }
6864
6865 return NULL;
6866 }
6867
6868 static tree
6869 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6870 long op_const, unsigned HOST_WIDE_INT bytemask,
6871 bool is_high)
6872 {
6873 if (op_const & 2)
6874 {
6875 unsigned HOST_WIDE_INT loc;
6876
6877 loc = opint[1] & 7;
6878 bytemask <<= loc;
6879
6880 if (is_high)
6881 bytemask >>= 8;
6882
6883 opint[1] = bytemask ^ 0xff;
6884 }
6885
6886 return alpha_fold_builtin_zapnot (op, opint, op_const);
6887 }
6888
6889 static tree
6890 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6891 {
6892 tree op0 = fold_convert (vtype, op[0]);
6893 tree op1 = fold_convert (vtype, op[1]);
6894 tree val = fold_build2 (code, vtype, op0, op1);
6895 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6896 }
6897
6898 static tree
6899 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6900 {
6901 unsigned HOST_WIDE_INT temp = 0;
6902 int i;
6903
6904 if (op_const != 3)
6905 return NULL;
6906
6907 for (i = 0; i < 8; ++i)
6908 {
6909 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6910 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6911 if (a >= b)
6912 temp += a - b;
6913 else
6914 temp += b - a;
6915 }
6916
6917 return build_int_cst (alpha_dimode_u, temp);
6918 }
6919
6920 static tree
6921 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6922 {
6923 unsigned HOST_WIDE_INT temp;
6924
6925 if (op_const == 0)
6926 return NULL;
6927
6928 temp = opint[0] & 0xff;
6929 temp |= (opint[0] >> 24) & 0xff00;
6930
6931 return build_int_cst (alpha_dimode_u, temp);
6932 }
6933
6934 static tree
6935 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6936 {
6937 unsigned HOST_WIDE_INT temp;
6938
6939 if (op_const == 0)
6940 return NULL;
6941
6942 temp = opint[0] & 0xff;
6943 temp |= (opint[0] >> 8) & 0xff00;
6944 temp |= (opint[0] >> 16) & 0xff0000;
6945 temp |= (opint[0] >> 24) & 0xff000000;
6946
6947 return build_int_cst (alpha_dimode_u, temp);
6948 }
6949
6950 static tree
6951 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6952 {
6953 unsigned HOST_WIDE_INT temp;
6954
6955 if (op_const == 0)
6956 return NULL;
6957
6958 temp = opint[0] & 0xff;
6959 temp |= (opint[0] & 0xff00) << 24;
6960
6961 return build_int_cst (alpha_dimode_u, temp);
6962 }
6963
6964 static tree
6965 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6966 {
6967 unsigned HOST_WIDE_INT temp;
6968
6969 if (op_const == 0)
6970 return NULL;
6971
6972 temp = opint[0] & 0xff;
6973 temp |= (opint[0] & 0x0000ff00) << 8;
6974 temp |= (opint[0] & 0x00ff0000) << 16;
6975 temp |= (opint[0] & 0xff000000) << 24;
6976
6977 return build_int_cst (alpha_dimode_u, temp);
6978 }
6979
6980 static tree
6981 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6982 {
6983 unsigned HOST_WIDE_INT temp;
6984
6985 if (op_const == 0)
6986 return NULL;
6987
6988 if (opint[0] == 0)
6989 temp = 64;
6990 else
6991 temp = exact_log2 (opint[0] & -opint[0]);
6992
6993 return build_int_cst (alpha_dimode_u, temp);
6994 }
6995
6996 static tree
6997 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6998 {
6999 unsigned HOST_WIDE_INT temp;
7000
7001 if (op_const == 0)
7002 return NULL;
7003
7004 if (opint[0] == 0)
7005 temp = 64;
7006 else
7007 temp = 64 - floor_log2 (opint[0]) - 1;
7008
7009 return build_int_cst (alpha_dimode_u, temp);
7010 }
7011
7012 static tree
7013 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7014 {
7015 unsigned HOST_WIDE_INT temp, op;
7016
7017 if (op_const == 0)
7018 return NULL;
7019
7020 op = opint[0];
7021 temp = 0;
7022 while (op)
7023 temp++, op &= op - 1;
7024
7025 return build_int_cst (alpha_dimode_u, temp);
7026 }
7027
7028 /* Fold one of our builtin functions. */
7029
7030 static tree
7031 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7032 bool ignore ATTRIBUTE_UNUSED)
7033 {
7034 unsigned HOST_WIDE_INT opint[MAX_ARGS];
7035 long op_const = 0;
7036 int i;
7037
7038 if (n_args > MAX_ARGS)
7039 return NULL;
7040
7041 for (i = 0; i < n_args; i++)
7042 {
7043 tree arg = op[i];
7044 if (arg == error_mark_node)
7045 return NULL;
7046
7047 opint[i] = 0;
7048 if (TREE_CODE (arg) == INTEGER_CST)
7049 {
7050 op_const |= 1L << i;
7051 opint[i] = int_cst_value (arg);
7052 }
7053 }
7054
7055 switch (DECL_FUNCTION_CODE (fndecl))
7056 {
7057 case ALPHA_BUILTIN_CMPBGE:
7058 return alpha_fold_builtin_cmpbge (opint, op_const);
7059
7060 case ALPHA_BUILTIN_EXTBL:
7061 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7062 case ALPHA_BUILTIN_EXTWL:
7063 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7064 case ALPHA_BUILTIN_EXTLL:
7065 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7066 case ALPHA_BUILTIN_EXTQL:
7067 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7068 case ALPHA_BUILTIN_EXTWH:
7069 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7070 case ALPHA_BUILTIN_EXTLH:
7071 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7072 case ALPHA_BUILTIN_EXTQH:
7073 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7074
7075 case ALPHA_BUILTIN_INSBL:
7076 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7077 case ALPHA_BUILTIN_INSWL:
7078 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7079 case ALPHA_BUILTIN_INSLL:
7080 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7081 case ALPHA_BUILTIN_INSQL:
7082 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7083 case ALPHA_BUILTIN_INSWH:
7084 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7085 case ALPHA_BUILTIN_INSLH:
7086 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7087 case ALPHA_BUILTIN_INSQH:
7088 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7089
7090 case ALPHA_BUILTIN_MSKBL:
7091 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7092 case ALPHA_BUILTIN_MSKWL:
7093 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7094 case ALPHA_BUILTIN_MSKLL:
7095 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7096 case ALPHA_BUILTIN_MSKQL:
7097 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7098 case ALPHA_BUILTIN_MSKWH:
7099 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7100 case ALPHA_BUILTIN_MSKLH:
7101 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7102 case ALPHA_BUILTIN_MSKQH:
7103 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7104
7105 case ALPHA_BUILTIN_ZAP:
7106 opint[1] ^= 0xff;
7107 /* FALLTHRU */
7108 case ALPHA_BUILTIN_ZAPNOT:
7109 return alpha_fold_builtin_zapnot (op, opint, op_const);
7110
7111 case ALPHA_BUILTIN_MINUB8:
7112 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7113 case ALPHA_BUILTIN_MINSB8:
7114 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7115 case ALPHA_BUILTIN_MINUW4:
7116 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7117 case ALPHA_BUILTIN_MINSW4:
7118 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7119 case ALPHA_BUILTIN_MAXUB8:
7120 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7121 case ALPHA_BUILTIN_MAXSB8:
7122 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7123 case ALPHA_BUILTIN_MAXUW4:
7124 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7125 case ALPHA_BUILTIN_MAXSW4:
7126 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7127
7128 case ALPHA_BUILTIN_PERR:
7129 return alpha_fold_builtin_perr (opint, op_const);
7130 case ALPHA_BUILTIN_PKLB:
7131 return alpha_fold_builtin_pklb (opint, op_const);
7132 case ALPHA_BUILTIN_PKWB:
7133 return alpha_fold_builtin_pkwb (opint, op_const);
7134 case ALPHA_BUILTIN_UNPKBL:
7135 return alpha_fold_builtin_unpkbl (opint, op_const);
7136 case ALPHA_BUILTIN_UNPKBW:
7137 return alpha_fold_builtin_unpkbw (opint, op_const);
7138
7139 case ALPHA_BUILTIN_CTTZ:
7140 return alpha_fold_builtin_cttz (opint, op_const);
7141 case ALPHA_BUILTIN_CTLZ:
7142 return alpha_fold_builtin_ctlz (opint, op_const);
7143 case ALPHA_BUILTIN_CTPOP:
7144 return alpha_fold_builtin_ctpop (opint, op_const);
7145
7146 case ALPHA_BUILTIN_AMASK:
7147 case ALPHA_BUILTIN_IMPLVER:
7148 case ALPHA_BUILTIN_RPCC:
7149 /* None of these are foldable at compile-time. */
7150 default:
7151 return NULL;
7152 }
7153 }
7154
7155 bool
7156 alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7157 {
7158 bool changed = false;
7159 gimple *stmt = gsi_stmt (*gsi);
7160 tree call = gimple_call_fn (stmt);
7161 gimple *new_stmt = NULL;
7162
7163 if (call)
7164 {
7165 tree fndecl = gimple_call_fndecl (stmt);
7166
7167 if (fndecl)
7168 {
7169 tree arg0, arg1;
7170
7171 switch (DECL_FUNCTION_CODE (fndecl))
7172 {
7173 case ALPHA_BUILTIN_UMULH:
7174 arg0 = gimple_call_arg (stmt, 0);
7175 arg1 = gimple_call_arg (stmt, 1);
7176
7177 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7178 MULT_HIGHPART_EXPR, arg0, arg1);
7179 break;
7180 default:
7181 break;
7182 }
7183 }
7184 }
7185
7186 if (new_stmt)
7187 {
7188 gsi_replace (gsi, new_stmt, true);
7189 changed = true;
7190 }
7191
7192 return changed;
7193 }
7194 \f
7195 /* This page contains routines that are used to determine what the function
7196 prologue and epilogue code will do and write them out. */
7197
7198 /* Compute the size of the save area in the stack. */
7199
7200 /* These variables are used for communication between the following functions.
7201 They indicate various things about the current function being compiled
7202 that are used to tell what kind of prologue, epilogue and procedure
7203 descriptor to generate. */
7204
7205 /* Nonzero if we need a stack procedure. */
7206 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7207 static enum alpha_procedure_types alpha_procedure_type;
7208
7209 /* Register number (either FP or SP) that is used to unwind the frame. */
7210 static int vms_unwind_regno;
7211
7212 /* Register number used to save FP. We need not have one for RA since
7213 we don't modify it for register procedures. This is only defined
7214 for register frame procedures. */
7215 static int vms_save_fp_regno;
7216
7217 /* Register number used to reference objects off our PV. */
7218 static int vms_base_regno;
7219
7220 /* Compute register masks for saved registers. */
7221
7222 static void
7223 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7224 {
7225 unsigned long imask = 0;
7226 unsigned long fmask = 0;
7227 unsigned int i;
7228
7229 /* When outputting a thunk, we don't have valid register life info,
7230 but assemble_start_function wants to output .frame and .mask
7231 directives. */
7232 if (cfun->is_thunk)
7233 {
7234 *imaskP = 0;
7235 *fmaskP = 0;
7236 return;
7237 }
7238
7239 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7240 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7241
7242 /* One for every register we have to save. */
7243 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7244 if (! fixed_regs[i] && ! call_used_regs[i]
7245 && df_regs_ever_live_p (i) && i != REG_RA)
7246 {
7247 if (i < 32)
7248 imask |= (1UL << i);
7249 else
7250 fmask |= (1UL << (i - 32));
7251 }
7252
7253 /* We need to restore these for the handler. */
7254 if (crtl->calls_eh_return)
7255 {
7256 for (i = 0; ; ++i)
7257 {
7258 unsigned regno = EH_RETURN_DATA_REGNO (i);
7259 if (regno == INVALID_REGNUM)
7260 break;
7261 imask |= 1UL << regno;
7262 }
7263 }
7264
7265 /* If any register spilled, then spill the return address also. */
7266 /* ??? This is required by the Digital stack unwind specification
7267 and isn't needed if we're doing Dwarf2 unwinding. */
7268 if (imask || fmask || alpha_ra_ever_killed ())
7269 imask |= (1UL << REG_RA);
7270
7271 *imaskP = imask;
7272 *fmaskP = fmask;
7273 }
7274
7275 int
7276 alpha_sa_size (void)
7277 {
7278 unsigned long mask[2];
7279 int sa_size = 0;
7280 int i, j;
7281
7282 alpha_sa_mask (&mask[0], &mask[1]);
7283
7284 for (j = 0; j < 2; ++j)
7285 for (i = 0; i < 32; ++i)
7286 if ((mask[j] >> i) & 1)
7287 sa_size++;
7288
7289 if (TARGET_ABI_OPEN_VMS)
7290 {
7291 /* Start with a stack procedure if we make any calls (REG_RA used), or
7292 need a frame pointer, with a register procedure if we otherwise need
7293 at least a slot, and with a null procedure in other cases. */
7294 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7295 alpha_procedure_type = PT_STACK;
7296 else if (get_frame_size() != 0)
7297 alpha_procedure_type = PT_REGISTER;
7298 else
7299 alpha_procedure_type = PT_NULL;
7300
7301 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7302 made the final decision on stack procedure vs register procedure. */
7303 if (alpha_procedure_type == PT_STACK)
7304 sa_size -= 2;
7305
7306 /* Decide whether to refer to objects off our PV via FP or PV.
7307 If we need FP for something else or if we receive a nonlocal
7308 goto (which expects PV to contain the value), we must use PV.
7309 Otherwise, start by assuming we can use FP. */
7310
7311 vms_base_regno
7312 = (frame_pointer_needed
7313 || cfun->has_nonlocal_label
7314 || alpha_procedure_type == PT_STACK
7315 || crtl->outgoing_args_size)
7316 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7317
7318 /* If we want to copy PV into FP, we need to find some register
7319 in which to save FP. */
7320
7321 vms_save_fp_regno = -1;
7322 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7323 for (i = 0; i < 32; i++)
7324 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7325 vms_save_fp_regno = i;
7326
7327 /* A VMS condition handler requires a stack procedure in our
7328 implementation. (not required by the calling standard). */
7329 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7330 || cfun->machine->uses_condition_handler)
7331 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7332 else if (alpha_procedure_type == PT_NULL)
7333 vms_base_regno = REG_PV;
7334
7335 /* Stack unwinding should be done via FP unless we use it for PV. */
7336 vms_unwind_regno = (vms_base_regno == REG_PV
7337 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7338
7339 /* If this is a stack procedure, allow space for saving FP, RA and
7340 a condition handler slot if needed. */
7341 if (alpha_procedure_type == PT_STACK)
7342 sa_size += 2 + cfun->machine->uses_condition_handler;
7343 }
7344 else
7345 {
7346 /* Our size must be even (multiple of 16 bytes). */
7347 if (sa_size & 1)
7348 sa_size++;
7349 }
7350
7351 return sa_size * 8;
7352 }
7353
7354 /* Define the offset between two registers, one to be eliminated,
7355 and the other its replacement, at the start of a routine. */
7356
7357 HOST_WIDE_INT
7358 alpha_initial_elimination_offset (unsigned int from,
7359 unsigned int to ATTRIBUTE_UNUSED)
7360 {
7361 HOST_WIDE_INT ret;
7362
7363 ret = alpha_sa_size ();
7364 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7365
7366 switch (from)
7367 {
7368 case FRAME_POINTER_REGNUM:
7369 break;
7370
7371 case ARG_POINTER_REGNUM:
7372 ret += (ALPHA_ROUND (get_frame_size ()
7373 + crtl->args.pretend_args_size)
7374 - crtl->args.pretend_args_size);
7375 break;
7376
7377 default:
7378 gcc_unreachable ();
7379 }
7380
7381 return ret;
7382 }
7383
7384 #if TARGET_ABI_OPEN_VMS
7385
7386 /* Worker function for TARGET_CAN_ELIMINATE. */
7387
7388 static bool
7389 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7390 {
7391 /* We need the alpha_procedure_type to decide. Evaluate it now. */
7392 alpha_sa_size ();
7393
7394 switch (alpha_procedure_type)
7395 {
7396 case PT_NULL:
7397 /* NULL procedures have no frame of their own and we only
7398 know how to resolve from the current stack pointer. */
7399 return to == STACK_POINTER_REGNUM;
7400
7401 case PT_REGISTER:
7402 case PT_STACK:
7403 /* We always eliminate except to the stack pointer if there is no
7404 usable frame pointer at hand. */
7405 return (to != STACK_POINTER_REGNUM
7406 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7407 }
7408
7409 gcc_unreachable ();
7410 }
7411
7412 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7413 designates the same location as FROM. */
7414
7415 HOST_WIDE_INT
7416 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7417 {
7418 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7419 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7420 on the proper computations and will need the register save area size
7421 in most cases. */
7422
7423 HOST_WIDE_INT sa_size = alpha_sa_size ();
7424
7425 /* PT_NULL procedures have no frame of their own and we only allow
7426 elimination to the stack pointer. This is the argument pointer and we
7427 resolve the soft frame pointer to that as well. */
7428
7429 if (alpha_procedure_type == PT_NULL)
7430 return 0;
7431
7432 /* For a PT_STACK procedure the frame layout looks as follows
7433
7434 -----> decreasing addresses
7435
7436 < size rounded up to 16 | likewise >
7437 --------------#------------------------------+++--------------+++-------#
7438 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7439 --------------#---------------------------------------------------------#
7440 ^ ^ ^ ^
7441 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7442
7443
7444 PT_REGISTER procedures are similar in that they may have a frame of their
7445 own. They have no regs-sa/pv/outgoing-args area.
7446
7447 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7448 to STACK_PTR if need be. */
7449
7450 {
7451 HOST_WIDE_INT offset;
7452 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7453
7454 switch (from)
7455 {
7456 case FRAME_POINTER_REGNUM:
7457 offset = ALPHA_ROUND (sa_size + pv_save_size);
7458 break;
7459 case ARG_POINTER_REGNUM:
7460 offset = (ALPHA_ROUND (sa_size + pv_save_size
7461 + get_frame_size ()
7462 + crtl->args.pretend_args_size)
7463 - crtl->args.pretend_args_size);
7464 break;
7465 default:
7466 gcc_unreachable ();
7467 }
7468
7469 if (to == STACK_POINTER_REGNUM)
7470 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7471
7472 return offset;
7473 }
7474 }
7475
7476 #define COMMON_OBJECT "common_object"
7477
7478 static tree
7479 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7480 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7481 bool *no_add_attrs ATTRIBUTE_UNUSED)
7482 {
7483 tree decl = *node;
7484 gcc_assert (DECL_P (decl));
7485
7486 DECL_COMMON (decl) = 1;
7487 return NULL_TREE;
7488 }
7489
7490 static const struct attribute_spec vms_attribute_table[] =
7491 {
7492 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7493 affects_type_identity } */
7494 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false },
7495 { NULL, 0, 0, false, false, false, NULL, false }
7496 };
7497
7498 void
7499 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7500 unsigned HOST_WIDE_INT size,
7501 unsigned int align)
7502 {
7503 tree attr = DECL_ATTRIBUTES (decl);
7504 fprintf (file, "%s", COMMON_ASM_OP);
7505 assemble_name (file, name);
7506 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7507 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7508 fprintf (file, ",%u", align / BITS_PER_UNIT);
7509 if (attr)
7510 {
7511 attr = lookup_attribute (COMMON_OBJECT, attr);
7512 if (attr)
7513 fprintf (file, ",%s",
7514 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7515 }
7516 fputc ('\n', file);
7517 }
7518
7519 #undef COMMON_OBJECT
7520
7521 #endif
7522
7523 bool
7524 alpha_find_lo_sum_using_gp (rtx insn)
7525 {
7526 subrtx_iterator::array_type array;
7527 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7528 {
7529 const_rtx x = *iter;
7530 if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7531 return true;
7532 }
7533 return false;
7534 }
7535
7536 static int
7537 alpha_does_function_need_gp (void)
7538 {
7539 rtx_insn *insn;
7540
7541 /* The GP being variable is an OSF abi thing. */
7542 if (! TARGET_ABI_OSF)
7543 return 0;
7544
7545 /* We need the gp to load the address of __mcount. */
7546 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7547 return 1;
7548
7549 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7550 if (cfun->is_thunk)
7551 return 1;
7552
7553 /* The nonlocal receiver pattern assumes that the gp is valid for
7554 the nested function. Reasonable because it's almost always set
7555 correctly already. For the cases where that's wrong, make sure
7556 the nested function loads its gp on entry. */
7557 if (crtl->has_nonlocal_goto)
7558 return 1;
7559
7560 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7561 Even if we are a static function, we still need to do this in case
7562 our address is taken and passed to something like qsort. */
7563
7564 push_topmost_sequence ();
7565 insn = get_insns ();
7566 pop_topmost_sequence ();
7567
7568 for (; insn; insn = NEXT_INSN (insn))
7569 if (NONDEBUG_INSN_P (insn)
7570 && GET_CODE (PATTERN (insn)) != USE
7571 && GET_CODE (PATTERN (insn)) != CLOBBER
7572 && get_attr_usegp (insn))
7573 return 1;
7574
7575 return 0;
7576 }
7577
7578 \f
7579 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7580 sequences. */
7581
7582 static rtx_insn *
7583 set_frame_related_p (void)
7584 {
7585 rtx_insn *seq = get_insns ();
7586 rtx_insn *insn;
7587
7588 end_sequence ();
7589
7590 if (!seq)
7591 return NULL;
7592
7593 if (INSN_P (seq))
7594 {
7595 insn = seq;
7596 while (insn != NULL_RTX)
7597 {
7598 RTX_FRAME_RELATED_P (insn) = 1;
7599 insn = NEXT_INSN (insn);
7600 }
7601 seq = emit_insn (seq);
7602 }
7603 else
7604 {
7605 seq = emit_insn (seq);
7606 RTX_FRAME_RELATED_P (seq) = 1;
7607 }
7608 return seq;
7609 }
7610
7611 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7612
7613 /* Generates a store with the proper unwind info attached. VALUE is
7614 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7615 contains SP+FRAME_BIAS, and that is the unwind info that should be
7616 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7617 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7618
7619 static void
7620 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7621 HOST_WIDE_INT base_ofs, rtx frame_reg)
7622 {
7623 rtx addr, mem;
7624 rtx_insn *insn;
7625
7626 addr = plus_constant (Pmode, base_reg, base_ofs);
7627 mem = gen_frame_mem (DImode, addr);
7628
7629 insn = emit_move_insn (mem, value);
7630 RTX_FRAME_RELATED_P (insn) = 1;
7631
7632 if (frame_bias || value != frame_reg)
7633 {
7634 if (frame_bias)
7635 {
7636 addr = plus_constant (Pmode, stack_pointer_rtx,
7637 frame_bias + base_ofs);
7638 mem = gen_rtx_MEM (DImode, addr);
7639 }
7640
7641 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7642 gen_rtx_SET (mem, frame_reg));
7643 }
7644 }
7645
7646 static void
7647 emit_frame_store (unsigned int regno, rtx base_reg,
7648 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7649 {
7650 rtx reg = gen_rtx_REG (DImode, regno);
7651 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7652 }
7653
7654 /* Compute the frame size. SIZE is the size of the "naked" frame
7655 and SA_SIZE is the size of the register save area. */
7656
7657 static HOST_WIDE_INT
7658 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7659 {
7660 if (TARGET_ABI_OPEN_VMS)
7661 return ALPHA_ROUND (sa_size
7662 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7663 + size
7664 + crtl->args.pretend_args_size);
7665 else
7666 return ALPHA_ROUND (crtl->outgoing_args_size)
7667 + sa_size
7668 + ALPHA_ROUND (size
7669 + crtl->args.pretend_args_size);
7670 }
7671
7672 /* Write function prologue. */
7673
7674 /* On vms we have two kinds of functions:
7675
7676 - stack frame (PROC_STACK)
7677 these are 'normal' functions with local vars and which are
7678 calling other functions
7679 - register frame (PROC_REGISTER)
7680 keeps all data in registers, needs no stack
7681
7682 We must pass this to the assembler so it can generate the
7683 proper pdsc (procedure descriptor)
7684 This is done with the '.pdesc' command.
7685
7686 On not-vms, we don't really differentiate between the two, as we can
7687 simply allocate stack without saving registers. */
7688
7689 void
7690 alpha_expand_prologue (void)
7691 {
7692 /* Registers to save. */
7693 unsigned long imask = 0;
7694 unsigned long fmask = 0;
7695 /* Stack space needed for pushing registers clobbered by us. */
7696 HOST_WIDE_INT sa_size, sa_bias;
7697 /* Complete stack size needed. */
7698 HOST_WIDE_INT frame_size;
7699 /* Probed stack size; it additionally includes the size of
7700 the "reserve region" if any. */
7701 HOST_WIDE_INT probed_size;
7702 /* Offset from base reg to register save area. */
7703 HOST_WIDE_INT reg_offset;
7704 rtx sa_reg;
7705 int i;
7706
7707 sa_size = alpha_sa_size ();
7708 frame_size = compute_frame_size (get_frame_size (), sa_size);
7709
7710 if (flag_stack_usage_info)
7711 current_function_static_stack_size = frame_size;
7712
7713 if (TARGET_ABI_OPEN_VMS)
7714 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7715 else
7716 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7717
7718 alpha_sa_mask (&imask, &fmask);
7719
7720 /* Emit an insn to reload GP, if needed. */
7721 if (TARGET_ABI_OSF)
7722 {
7723 alpha_function_needs_gp = alpha_does_function_need_gp ();
7724 if (alpha_function_needs_gp)
7725 emit_insn (gen_prologue_ldgp ());
7726 }
7727
7728 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7729 the call to mcount ourselves, rather than having the linker do it
7730 magically in response to -pg. Since _mcount has special linkage,
7731 don't represent the call as a call. */
7732 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7733 emit_insn (gen_prologue_mcount ());
7734
7735 /* Adjust the stack by the frame size. If the frame size is > 4096
7736 bytes, we need to be sure we probe somewhere in the first and last
7737 4096 bytes (we can probably get away without the latter test) and
7738 every 8192 bytes in between. If the frame size is > 32768, we
7739 do this in a loop. Otherwise, we generate the explicit probe
7740 instructions.
7741
7742 Note that we are only allowed to adjust sp once in the prologue. */
7743
7744 probed_size = frame_size;
7745 if (flag_stack_check)
7746 probed_size += STACK_CHECK_PROTECT;
7747
7748 if (probed_size <= 32768)
7749 {
7750 if (probed_size > 4096)
7751 {
7752 int probed;
7753
7754 for (probed = 4096; probed < probed_size; probed += 8192)
7755 emit_insn (gen_probe_stack (GEN_INT (-probed)));
7756
7757 /* We only have to do this probe if we aren't saving registers or
7758 if we are probing beyond the frame because of -fstack-check. */
7759 if ((sa_size == 0 && probed_size > probed - 4096)
7760 || flag_stack_check)
7761 emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7762 }
7763
7764 if (frame_size != 0)
7765 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7766 GEN_INT (-frame_size))));
7767 }
7768 else
7769 {
7770 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7771 number of 8192 byte blocks to probe. We then probe each block
7772 in the loop and then set SP to the proper location. If the
7773 amount remaining is > 4096, we have to do one more probe if we
7774 are not saving any registers or if we are probing beyond the
7775 frame because of -fstack-check. */
7776
7777 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7778 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7779 rtx ptr = gen_rtx_REG (DImode, 22);
7780 rtx count = gen_rtx_REG (DImode, 23);
7781 rtx seq;
7782
7783 emit_move_insn (count, GEN_INT (blocks));
7784 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7785
7786 /* Because of the difficulty in emitting a new basic block this
7787 late in the compilation, generate the loop as a single insn. */
7788 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7789
7790 if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7791 {
7792 rtx last = gen_rtx_MEM (DImode,
7793 plus_constant (Pmode, ptr, -leftover));
7794 MEM_VOLATILE_P (last) = 1;
7795 emit_move_insn (last, const0_rtx);
7796 }
7797
7798 if (flag_stack_check)
7799 {
7800 /* If -fstack-check is specified we have to load the entire
7801 constant into a register and subtract from the sp in one go,
7802 because the probed stack size is not equal to the frame size. */
7803 HOST_WIDE_INT lo, hi;
7804 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7805 hi = frame_size - lo;
7806
7807 emit_move_insn (ptr, GEN_INT (hi));
7808 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7809 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7810 ptr));
7811 }
7812 else
7813 {
7814 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7815 GEN_INT (-leftover)));
7816 }
7817
7818 /* This alternative is special, because the DWARF code cannot
7819 possibly intuit through the loop above. So we invent this
7820 note it looks at instead. */
7821 RTX_FRAME_RELATED_P (seq) = 1;
7822 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7823 gen_rtx_SET (stack_pointer_rtx,
7824 plus_constant (Pmode, stack_pointer_rtx,
7825 -frame_size)));
7826 }
7827
7828 /* Cope with very large offsets to the register save area. */
7829 sa_bias = 0;
7830 sa_reg = stack_pointer_rtx;
7831 if (reg_offset + sa_size > 0x8000)
7832 {
7833 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7834 rtx sa_bias_rtx;
7835
7836 if (low + sa_size <= 0x8000)
7837 sa_bias = reg_offset - low, reg_offset = low;
7838 else
7839 sa_bias = reg_offset, reg_offset = 0;
7840
7841 sa_reg = gen_rtx_REG (DImode, 24);
7842 sa_bias_rtx = GEN_INT (sa_bias);
7843
7844 if (add_operand (sa_bias_rtx, DImode))
7845 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7846 else
7847 {
7848 emit_move_insn (sa_reg, sa_bias_rtx);
7849 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7850 }
7851 }
7852
7853 /* Save regs in stack order. Beginning with VMS PV. */
7854 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7855 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7856
7857 /* Save register RA next. */
7858 if (imask & (1UL << REG_RA))
7859 {
7860 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7861 imask &= ~(1UL << REG_RA);
7862 reg_offset += 8;
7863 }
7864
7865 /* Now save any other registers required to be saved. */
7866 for (i = 0; i < 31; i++)
7867 if (imask & (1UL << i))
7868 {
7869 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7870 reg_offset += 8;
7871 }
7872
7873 for (i = 0; i < 31; i++)
7874 if (fmask & (1UL << i))
7875 {
7876 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7877 reg_offset += 8;
7878 }
7879
7880 if (TARGET_ABI_OPEN_VMS)
7881 {
7882 /* Register frame procedures save the fp. */
7883 if (alpha_procedure_type == PT_REGISTER)
7884 {
7885 rtx_insn *insn =
7886 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7887 hard_frame_pointer_rtx);
7888 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7889 RTX_FRAME_RELATED_P (insn) = 1;
7890 }
7891
7892 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7893 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7894 gen_rtx_REG (DImode, REG_PV)));
7895
7896 if (alpha_procedure_type != PT_NULL
7897 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7898 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7899
7900 /* If we have to allocate space for outgoing args, do it now. */
7901 if (crtl->outgoing_args_size != 0)
7902 {
7903 rtx_insn *seq
7904 = emit_move_insn (stack_pointer_rtx,
7905 plus_constant
7906 (Pmode, hard_frame_pointer_rtx,
7907 - (ALPHA_ROUND
7908 (crtl->outgoing_args_size))));
7909
7910 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7911 if ! frame_pointer_needed. Setting the bit will change the CFA
7912 computation rule to use sp again, which would be wrong if we had
7913 frame_pointer_needed, as this means sp might move unpredictably
7914 later on.
7915
7916 Also, note that
7917 frame_pointer_needed
7918 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7919 and
7920 crtl->outgoing_args_size != 0
7921 => alpha_procedure_type != PT_NULL,
7922
7923 so when we are not setting the bit here, we are guaranteed to
7924 have emitted an FRP frame pointer update just before. */
7925 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7926 }
7927 }
7928 else
7929 {
7930 /* If we need a frame pointer, set it from the stack pointer. */
7931 if (frame_pointer_needed)
7932 {
7933 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7934 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7935 else
7936 /* This must always be the last instruction in the
7937 prologue, thus we emit a special move + clobber. */
7938 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7939 stack_pointer_rtx, sa_reg)));
7940 }
7941 }
7942
7943 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7944 the prologue, for exception handling reasons, we cannot do this for
7945 any insn that might fault. We could prevent this for mems with a
7946 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7947 have to prevent all such scheduling with a blockage.
7948
7949 Linux, on the other hand, never bothered to implement OSF/1's
7950 exception handling, and so doesn't care about such things. Anyone
7951 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7952
7953 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7954 emit_insn (gen_blockage ());
7955 }
7956
7957 /* Count the number of .file directives, so that .loc is up to date. */
7958 int num_source_filenames = 0;
7959
7960 /* Output the textual info surrounding the prologue. */
7961
7962 void
7963 alpha_start_function (FILE *file, const char *fnname,
7964 tree decl ATTRIBUTE_UNUSED)
7965 {
7966 unsigned long imask = 0;
7967 unsigned long fmask = 0;
7968 /* Stack space needed for pushing registers clobbered by us. */
7969 HOST_WIDE_INT sa_size;
7970 /* Complete stack size needed. */
7971 unsigned HOST_WIDE_INT frame_size;
7972 /* The maximum debuggable frame size. */
7973 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
7974 /* Offset from base reg to register save area. */
7975 HOST_WIDE_INT reg_offset;
7976 char *entry_label = (char *) alloca (strlen (fnname) + 6);
7977 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
7978 int i;
7979
7980 #if TARGET_ABI_OPEN_VMS
7981 vms_start_function (fnname);
7982 #endif
7983
7984 alpha_fnname = fnname;
7985 sa_size = alpha_sa_size ();
7986 frame_size = compute_frame_size (get_frame_size (), sa_size);
7987
7988 if (TARGET_ABI_OPEN_VMS)
7989 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7990 else
7991 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7992
7993 alpha_sa_mask (&imask, &fmask);
7994
7995 /* Issue function start and label. */
7996 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
7997 {
7998 fputs ("\t.ent ", file);
7999 assemble_name (file, fnname);
8000 putc ('\n', file);
8001
8002 /* If the function needs GP, we'll write the "..ng" label there.
8003 Otherwise, do it here. */
8004 if (TARGET_ABI_OSF
8005 && ! alpha_function_needs_gp
8006 && ! cfun->is_thunk)
8007 {
8008 putc ('$', file);
8009 assemble_name (file, fnname);
8010 fputs ("..ng:\n", file);
8011 }
8012 }
8013 /* Nested functions on VMS that are potentially called via trampoline
8014 get a special transfer entry point that loads the called functions
8015 procedure descriptor and static chain. */
8016 if (TARGET_ABI_OPEN_VMS
8017 && !TREE_PUBLIC (decl)
8018 && DECL_CONTEXT (decl)
8019 && !TYPE_P (DECL_CONTEXT (decl))
8020 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8021 {
8022 strcpy (tramp_label, fnname);
8023 strcat (tramp_label, "..tr");
8024 ASM_OUTPUT_LABEL (file, tramp_label);
8025 fprintf (file, "\tldq $1,24($27)\n");
8026 fprintf (file, "\tldq $27,16($27)\n");
8027 }
8028
8029 strcpy (entry_label, fnname);
8030 if (TARGET_ABI_OPEN_VMS)
8031 strcat (entry_label, "..en");
8032
8033 ASM_OUTPUT_LABEL (file, entry_label);
8034 inside_function = TRUE;
8035
8036 if (TARGET_ABI_OPEN_VMS)
8037 fprintf (file, "\t.base $%d\n", vms_base_regno);
8038
8039 if (TARGET_ABI_OSF
8040 && TARGET_IEEE_CONFORMANT
8041 && !flag_inhibit_size_directive)
8042 {
8043 /* Set flags in procedure descriptor to request IEEE-conformant
8044 math-library routines. The value we set it to is PDSC_EXC_IEEE
8045 (/usr/include/pdsc.h). */
8046 fputs ("\t.eflag 48\n", file);
8047 }
8048
8049 /* Set up offsets to alpha virtual arg/local debugging pointer. */
8050 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8051 alpha_arg_offset = -frame_size + 48;
8052
8053 /* Describe our frame. If the frame size is larger than an integer,
8054 print it as zero to avoid an assembler error. We won't be
8055 properly describing such a frame, but that's the best we can do. */
8056 if (TARGET_ABI_OPEN_VMS)
8057 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8058 HOST_WIDE_INT_PRINT_DEC "\n",
8059 vms_unwind_regno,
8060 frame_size >= (1UL << 31) ? 0 : frame_size,
8061 reg_offset);
8062 else if (!flag_inhibit_size_directive)
8063 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8064 (frame_pointer_needed
8065 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8066 frame_size >= max_frame_size ? 0 : frame_size,
8067 crtl->args.pretend_args_size);
8068
8069 /* Describe which registers were spilled. */
8070 if (TARGET_ABI_OPEN_VMS)
8071 {
8072 if (imask)
8073 /* ??? Does VMS care if mask contains ra? The old code didn't
8074 set it, so I don't here. */
8075 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8076 if (fmask)
8077 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8078 if (alpha_procedure_type == PT_REGISTER)
8079 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8080 }
8081 else if (!flag_inhibit_size_directive)
8082 {
8083 if (imask)
8084 {
8085 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8086 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8087
8088 for (i = 0; i < 32; ++i)
8089 if (imask & (1UL << i))
8090 reg_offset += 8;
8091 }
8092
8093 if (fmask)
8094 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8095 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8096 }
8097
8098 #if TARGET_ABI_OPEN_VMS
8099 /* If a user condition handler has been installed at some point, emit
8100 the procedure descriptor bits to point the Condition Handling Facility
8101 at the indirection wrapper, and state the fp offset at which the user
8102 handler may be found. */
8103 if (cfun->machine->uses_condition_handler)
8104 {
8105 fprintf (file, "\t.handler __gcc_shell_handler\n");
8106 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8107 }
8108
8109 #ifdef TARGET_VMS_CRASH_DEBUG
8110 /* Support of minimal traceback info. */
8111 switch_to_section (readonly_data_section);
8112 fprintf (file, "\t.align 3\n");
8113 assemble_name (file, fnname); fputs ("..na:\n", file);
8114 fputs ("\t.ascii \"", file);
8115 assemble_name (file, fnname);
8116 fputs ("\\0\"\n", file);
8117 switch_to_section (text_section);
8118 #endif
8119 #endif /* TARGET_ABI_OPEN_VMS */
8120 }
8121
8122 /* Emit the .prologue note at the scheduled end of the prologue. */
8123
8124 static void
8125 alpha_output_function_end_prologue (FILE *file)
8126 {
8127 if (TARGET_ABI_OPEN_VMS)
8128 fputs ("\t.prologue\n", file);
8129 else if (!flag_inhibit_size_directive)
8130 fprintf (file, "\t.prologue %d\n",
8131 alpha_function_needs_gp || cfun->is_thunk);
8132 }
8133
8134 /* Write function epilogue. */
8135
8136 void
8137 alpha_expand_epilogue (void)
8138 {
8139 /* Registers to save. */
8140 unsigned long imask = 0;
8141 unsigned long fmask = 0;
8142 /* Stack space needed for pushing registers clobbered by us. */
8143 HOST_WIDE_INT sa_size;
8144 /* Complete stack size needed. */
8145 HOST_WIDE_INT frame_size;
8146 /* Offset from base reg to register save area. */
8147 HOST_WIDE_INT reg_offset;
8148 int fp_is_frame_pointer, fp_offset;
8149 rtx sa_reg, sa_reg_exp = NULL;
8150 rtx sp_adj1, sp_adj2, mem, reg, insn;
8151 rtx eh_ofs;
8152 rtx cfa_restores = NULL_RTX;
8153 int i;
8154
8155 sa_size = alpha_sa_size ();
8156 frame_size = compute_frame_size (get_frame_size (), sa_size);
8157
8158 if (TARGET_ABI_OPEN_VMS)
8159 {
8160 if (alpha_procedure_type == PT_STACK)
8161 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8162 else
8163 reg_offset = 0;
8164 }
8165 else
8166 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8167
8168 alpha_sa_mask (&imask, &fmask);
8169
8170 fp_is_frame_pointer
8171 = (TARGET_ABI_OPEN_VMS
8172 ? alpha_procedure_type == PT_STACK
8173 : frame_pointer_needed);
8174 fp_offset = 0;
8175 sa_reg = stack_pointer_rtx;
8176
8177 if (crtl->calls_eh_return)
8178 eh_ofs = EH_RETURN_STACKADJ_RTX;
8179 else
8180 eh_ofs = NULL_RTX;
8181
8182 if (sa_size)
8183 {
8184 /* If we have a frame pointer, restore SP from it. */
8185 if (TARGET_ABI_OPEN_VMS
8186 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8187 : frame_pointer_needed)
8188 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8189
8190 /* Cope with very large offsets to the register save area. */
8191 if (reg_offset + sa_size > 0x8000)
8192 {
8193 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8194 HOST_WIDE_INT bias;
8195
8196 if (low + sa_size <= 0x8000)
8197 bias = reg_offset - low, reg_offset = low;
8198 else
8199 bias = reg_offset, reg_offset = 0;
8200
8201 sa_reg = gen_rtx_REG (DImode, 22);
8202 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8203
8204 emit_move_insn (sa_reg, sa_reg_exp);
8205 }
8206
8207 /* Restore registers in order, excepting a true frame pointer. */
8208
8209 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8210 reg = gen_rtx_REG (DImode, REG_RA);
8211 emit_move_insn (reg, mem);
8212 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8213
8214 reg_offset += 8;
8215 imask &= ~(1UL << REG_RA);
8216
8217 for (i = 0; i < 31; ++i)
8218 if (imask & (1UL << i))
8219 {
8220 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8221 fp_offset = reg_offset;
8222 else
8223 {
8224 mem = gen_frame_mem (DImode,
8225 plus_constant (Pmode, sa_reg,
8226 reg_offset));
8227 reg = gen_rtx_REG (DImode, i);
8228 emit_move_insn (reg, mem);
8229 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8230 cfa_restores);
8231 }
8232 reg_offset += 8;
8233 }
8234
8235 for (i = 0; i < 31; ++i)
8236 if (fmask & (1UL << i))
8237 {
8238 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8239 reg_offset));
8240 reg = gen_rtx_REG (DFmode, i+32);
8241 emit_move_insn (reg, mem);
8242 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8243 reg_offset += 8;
8244 }
8245 }
8246
8247 if (frame_size || eh_ofs)
8248 {
8249 sp_adj1 = stack_pointer_rtx;
8250
8251 if (eh_ofs)
8252 {
8253 sp_adj1 = gen_rtx_REG (DImode, 23);
8254 emit_move_insn (sp_adj1,
8255 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8256 }
8257
8258 /* If the stack size is large, begin computation into a temporary
8259 register so as not to interfere with a potential fp restore,
8260 which must be consecutive with an SP restore. */
8261 if (frame_size < 32768 && !cfun->calls_alloca)
8262 sp_adj2 = GEN_INT (frame_size);
8263 else if (frame_size < 0x40007fffL)
8264 {
8265 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8266
8267 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8268 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8269 sp_adj1 = sa_reg;
8270 else
8271 {
8272 sp_adj1 = gen_rtx_REG (DImode, 23);
8273 emit_move_insn (sp_adj1, sp_adj2);
8274 }
8275 sp_adj2 = GEN_INT (low);
8276 }
8277 else
8278 {
8279 rtx tmp = gen_rtx_REG (DImode, 23);
8280 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8281 if (!sp_adj2)
8282 {
8283 /* We can't drop new things to memory this late, afaik,
8284 so build it up by pieces. */
8285 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size);
8286 gcc_assert (sp_adj2);
8287 }
8288 }
8289
8290 /* From now on, things must be in order. So emit blockages. */
8291
8292 /* Restore the frame pointer. */
8293 if (fp_is_frame_pointer)
8294 {
8295 emit_insn (gen_blockage ());
8296 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8297 fp_offset));
8298 emit_move_insn (hard_frame_pointer_rtx, mem);
8299 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8300 hard_frame_pointer_rtx, cfa_restores);
8301 }
8302 else if (TARGET_ABI_OPEN_VMS)
8303 {
8304 emit_insn (gen_blockage ());
8305 emit_move_insn (hard_frame_pointer_rtx,
8306 gen_rtx_REG (DImode, vms_save_fp_regno));
8307 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8308 hard_frame_pointer_rtx, cfa_restores);
8309 }
8310
8311 /* Restore the stack pointer. */
8312 emit_insn (gen_blockage ());
8313 if (sp_adj2 == const0_rtx)
8314 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8315 else
8316 insn = emit_move_insn (stack_pointer_rtx,
8317 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8318 REG_NOTES (insn) = cfa_restores;
8319 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8320 RTX_FRAME_RELATED_P (insn) = 1;
8321 }
8322 else
8323 {
8324 gcc_assert (cfa_restores == NULL);
8325
8326 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8327 {
8328 emit_insn (gen_blockage ());
8329 insn = emit_move_insn (hard_frame_pointer_rtx,
8330 gen_rtx_REG (DImode, vms_save_fp_regno));
8331 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8332 RTX_FRAME_RELATED_P (insn) = 1;
8333 }
8334 }
8335 }
8336 \f
8337 /* Output the rest of the textual info surrounding the epilogue. */
8338
8339 void
8340 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8341 {
8342 rtx_insn *insn;
8343
8344 /* We output a nop after noreturn calls at the very end of the function to
8345 ensure that the return address always remains in the caller's code range,
8346 as not doing so might confuse unwinding engines. */
8347 insn = get_last_insn ();
8348 if (!INSN_P (insn))
8349 insn = prev_active_insn (insn);
8350 if (insn && CALL_P (insn))
8351 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8352
8353 #if TARGET_ABI_OPEN_VMS
8354 /* Write the linkage entries. */
8355 alpha_write_linkage (file, fnname);
8356 #endif
8357
8358 /* End the function. */
8359 if (TARGET_ABI_OPEN_VMS
8360 || !flag_inhibit_size_directive)
8361 {
8362 fputs ("\t.end ", file);
8363 assemble_name (file, fnname);
8364 putc ('\n', file);
8365 }
8366 inside_function = FALSE;
8367 }
8368
8369 #if TARGET_ABI_OSF
8370 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8371
8372 In order to avoid the hordes of differences between generated code
8373 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8374 lots of code loading up large constants, generate rtl and emit it
8375 instead of going straight to text.
8376
8377 Not sure why this idea hasn't been explored before... */
8378
8379 static void
8380 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8381 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8382 tree function)
8383 {
8384 HOST_WIDE_INT hi, lo;
8385 rtx this_rtx, funexp;
8386 rtx_insn *insn;
8387
8388 /* We always require a valid GP. */
8389 emit_insn (gen_prologue_ldgp ());
8390 emit_note (NOTE_INSN_PROLOGUE_END);
8391
8392 /* Find the "this" pointer. If the function returns a structure,
8393 the structure return pointer is in $16. */
8394 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8395 this_rtx = gen_rtx_REG (Pmode, 17);
8396 else
8397 this_rtx = gen_rtx_REG (Pmode, 16);
8398
8399 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8400 entire constant for the add. */
8401 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8402 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8403 if (hi + lo == delta)
8404 {
8405 if (hi)
8406 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8407 if (lo)
8408 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8409 }
8410 else
8411 {
8412 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
8413 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8414 }
8415
8416 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8417 if (vcall_offset)
8418 {
8419 rtx tmp, tmp2;
8420
8421 tmp = gen_rtx_REG (Pmode, 0);
8422 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8423
8424 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8425 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8426 if (hi + lo == vcall_offset)
8427 {
8428 if (hi)
8429 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8430 }
8431 else
8432 {
8433 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8434 vcall_offset);
8435 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8436 lo = 0;
8437 }
8438 if (lo)
8439 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8440 else
8441 tmp2 = tmp;
8442 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8443
8444 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8445 }
8446
8447 /* Generate a tail call to the target function. */
8448 if (! TREE_USED (function))
8449 {
8450 assemble_external (function);
8451 TREE_USED (function) = 1;
8452 }
8453 funexp = XEXP (DECL_RTL (function), 0);
8454 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8455 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8456 SIBLING_CALL_P (insn) = 1;
8457
8458 /* Run just enough of rest_of_compilation to get the insns emitted.
8459 There's not really enough bulk here to make other passes such as
8460 instruction scheduling worth while. Note that use_thunk calls
8461 assemble_start_function and assemble_end_function. */
8462 insn = get_insns ();
8463 shorten_branches (insn);
8464 final_start_function (insn, file, 1);
8465 final (insn, file, 1);
8466 final_end_function ();
8467 }
8468 #endif /* TARGET_ABI_OSF */
8469 \f
8470 /* Debugging support. */
8471
8472 #include "gstab.h"
8473
8474 /* Name of the file containing the current function. */
8475
8476 static const char *current_function_file = "";
8477
8478 /* Offsets to alpha virtual arg/local debugging pointers. */
8479
8480 long alpha_arg_offset;
8481 long alpha_auto_offset;
8482 \f
8483 /* Emit a new filename to a stream. */
8484
8485 void
8486 alpha_output_filename (FILE *stream, const char *name)
8487 {
8488 static int first_time = TRUE;
8489
8490 if (first_time)
8491 {
8492 first_time = FALSE;
8493 ++num_source_filenames;
8494 current_function_file = name;
8495 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8496 output_quoted_string (stream, name);
8497 fprintf (stream, "\n");
8498 }
8499
8500 else if (name != current_function_file
8501 && strcmp (name, current_function_file) != 0)
8502 {
8503 ++num_source_filenames;
8504 current_function_file = name;
8505 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8506
8507 output_quoted_string (stream, name);
8508 fprintf (stream, "\n");
8509 }
8510 }
8511 \f
8512 /* Structure to show the current status of registers and memory. */
8513
8514 struct shadow_summary
8515 {
8516 struct {
8517 unsigned int i : 31; /* Mask of int regs */
8518 unsigned int fp : 31; /* Mask of fp regs */
8519 unsigned int mem : 1; /* mem == imem | fpmem */
8520 } used, defd;
8521 };
8522
8523 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8524 to the summary structure. SET is nonzero if the insn is setting the
8525 object, otherwise zero. */
8526
8527 static void
8528 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8529 {
8530 const char *format_ptr;
8531 int i, j;
8532
8533 if (x == 0)
8534 return;
8535
8536 switch (GET_CODE (x))
8537 {
8538 /* ??? Note that this case would be incorrect if the Alpha had a
8539 ZERO_EXTRACT in SET_DEST. */
8540 case SET:
8541 summarize_insn (SET_SRC (x), sum, 0);
8542 summarize_insn (SET_DEST (x), sum, 1);
8543 break;
8544
8545 case CLOBBER:
8546 summarize_insn (XEXP (x, 0), sum, 1);
8547 break;
8548
8549 case USE:
8550 summarize_insn (XEXP (x, 0), sum, 0);
8551 break;
8552
8553 case ASM_OPERANDS:
8554 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8555 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8556 break;
8557
8558 case PARALLEL:
8559 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8560 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8561 break;
8562
8563 case SUBREG:
8564 summarize_insn (SUBREG_REG (x), sum, 0);
8565 break;
8566
8567 case REG:
8568 {
8569 int regno = REGNO (x);
8570 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8571
8572 if (regno == 31 || regno == 63)
8573 break;
8574
8575 if (set)
8576 {
8577 if (regno < 32)
8578 sum->defd.i |= mask;
8579 else
8580 sum->defd.fp |= mask;
8581 }
8582 else
8583 {
8584 if (regno < 32)
8585 sum->used.i |= mask;
8586 else
8587 sum->used.fp |= mask;
8588 }
8589 }
8590 break;
8591
8592 case MEM:
8593 if (set)
8594 sum->defd.mem = 1;
8595 else
8596 sum->used.mem = 1;
8597
8598 /* Find the regs used in memory address computation: */
8599 summarize_insn (XEXP (x, 0), sum, 0);
8600 break;
8601
8602 case CONST_INT: case CONST_WIDE_INT: case CONST_DOUBLE:
8603 case SYMBOL_REF: case LABEL_REF: case CONST:
8604 case SCRATCH: case ASM_INPUT:
8605 break;
8606
8607 /* Handle common unary and binary ops for efficiency. */
8608 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8609 case MOD: case UDIV: case UMOD: case AND: case IOR:
8610 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8611 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8612 case NE: case EQ: case GE: case GT: case LE:
8613 case LT: case GEU: case GTU: case LEU: case LTU:
8614 summarize_insn (XEXP (x, 0), sum, 0);
8615 summarize_insn (XEXP (x, 1), sum, 0);
8616 break;
8617
8618 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8619 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8620 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8621 case SQRT: case FFS:
8622 summarize_insn (XEXP (x, 0), sum, 0);
8623 break;
8624
8625 default:
8626 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8627 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8628 switch (format_ptr[i])
8629 {
8630 case 'e':
8631 summarize_insn (XEXP (x, i), sum, 0);
8632 break;
8633
8634 case 'E':
8635 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8636 summarize_insn (XVECEXP (x, i, j), sum, 0);
8637 break;
8638
8639 case 'i':
8640 break;
8641
8642 default:
8643 gcc_unreachable ();
8644 }
8645 }
8646 }
8647
8648 /* Ensure a sufficient number of `trapb' insns are in the code when
8649 the user requests code with a trap precision of functions or
8650 instructions.
8651
8652 In naive mode, when the user requests a trap-precision of
8653 "instruction", a trapb is needed after every instruction that may
8654 generate a trap. This ensures that the code is resumption safe but
8655 it is also slow.
8656
8657 When optimizations are turned on, we delay issuing a trapb as long
8658 as possible. In this context, a trap shadow is the sequence of
8659 instructions that starts with a (potentially) trap generating
8660 instruction and extends to the next trapb or call_pal instruction
8661 (but GCC never generates call_pal by itself). We can delay (and
8662 therefore sometimes omit) a trapb subject to the following
8663 conditions:
8664
8665 (a) On entry to the trap shadow, if any Alpha register or memory
8666 location contains a value that is used as an operand value by some
8667 instruction in the trap shadow (live on entry), then no instruction
8668 in the trap shadow may modify the register or memory location.
8669
8670 (b) Within the trap shadow, the computation of the base register
8671 for a memory load or store instruction may not involve using the
8672 result of an instruction that might generate an UNPREDICTABLE
8673 result.
8674
8675 (c) Within the trap shadow, no register may be used more than once
8676 as a destination register. (This is to make life easier for the
8677 trap-handler.)
8678
8679 (d) The trap shadow may not include any branch instructions. */
8680
8681 static void
8682 alpha_handle_trap_shadows (void)
8683 {
8684 struct shadow_summary shadow;
8685 int trap_pending, exception_nesting;
8686 rtx_insn *i, *n;
8687
8688 trap_pending = 0;
8689 exception_nesting = 0;
8690 shadow.used.i = 0;
8691 shadow.used.fp = 0;
8692 shadow.used.mem = 0;
8693 shadow.defd = shadow.used;
8694
8695 for (i = get_insns (); i ; i = NEXT_INSN (i))
8696 {
8697 if (NOTE_P (i))
8698 {
8699 switch (NOTE_KIND (i))
8700 {
8701 case NOTE_INSN_EH_REGION_BEG:
8702 exception_nesting++;
8703 if (trap_pending)
8704 goto close_shadow;
8705 break;
8706
8707 case NOTE_INSN_EH_REGION_END:
8708 exception_nesting--;
8709 if (trap_pending)
8710 goto close_shadow;
8711 break;
8712
8713 case NOTE_INSN_EPILOGUE_BEG:
8714 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8715 goto close_shadow;
8716 break;
8717 }
8718 }
8719 else if (trap_pending)
8720 {
8721 if (alpha_tp == ALPHA_TP_FUNC)
8722 {
8723 if (JUMP_P (i)
8724 && GET_CODE (PATTERN (i)) == RETURN)
8725 goto close_shadow;
8726 }
8727 else if (alpha_tp == ALPHA_TP_INSN)
8728 {
8729 if (optimize > 0)
8730 {
8731 struct shadow_summary sum;
8732
8733 sum.used.i = 0;
8734 sum.used.fp = 0;
8735 sum.used.mem = 0;
8736 sum.defd = sum.used;
8737
8738 switch (GET_CODE (i))
8739 {
8740 case INSN:
8741 /* Annoyingly, get_attr_trap will die on these. */
8742 if (GET_CODE (PATTERN (i)) == USE
8743 || GET_CODE (PATTERN (i)) == CLOBBER)
8744 break;
8745
8746 summarize_insn (PATTERN (i), &sum, 0);
8747
8748 if ((sum.defd.i & shadow.defd.i)
8749 || (sum.defd.fp & shadow.defd.fp))
8750 {
8751 /* (c) would be violated */
8752 goto close_shadow;
8753 }
8754
8755 /* Combine shadow with summary of current insn: */
8756 shadow.used.i |= sum.used.i;
8757 shadow.used.fp |= sum.used.fp;
8758 shadow.used.mem |= sum.used.mem;
8759 shadow.defd.i |= sum.defd.i;
8760 shadow.defd.fp |= sum.defd.fp;
8761 shadow.defd.mem |= sum.defd.mem;
8762
8763 if ((sum.defd.i & shadow.used.i)
8764 || (sum.defd.fp & shadow.used.fp)
8765 || (sum.defd.mem & shadow.used.mem))
8766 {
8767 /* (a) would be violated (also takes care of (b)) */
8768 gcc_assert (get_attr_trap (i) != TRAP_YES
8769 || (!(sum.defd.i & sum.used.i)
8770 && !(sum.defd.fp & sum.used.fp)));
8771
8772 goto close_shadow;
8773 }
8774 break;
8775
8776 case BARRIER:
8777 /* __builtin_unreachable can expand to no code at all,
8778 leaving (barrier) RTXes in the instruction stream. */
8779 goto close_shadow_notrapb;
8780
8781 case JUMP_INSN:
8782 case CALL_INSN:
8783 case CODE_LABEL:
8784 goto close_shadow;
8785
8786 default:
8787 gcc_unreachable ();
8788 }
8789 }
8790 else
8791 {
8792 close_shadow:
8793 n = emit_insn_before (gen_trapb (), i);
8794 PUT_MODE (n, TImode);
8795 PUT_MODE (i, TImode);
8796 close_shadow_notrapb:
8797 trap_pending = 0;
8798 shadow.used.i = 0;
8799 shadow.used.fp = 0;
8800 shadow.used.mem = 0;
8801 shadow.defd = shadow.used;
8802 }
8803 }
8804 }
8805
8806 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8807 && NONJUMP_INSN_P (i)
8808 && GET_CODE (PATTERN (i)) != USE
8809 && GET_CODE (PATTERN (i)) != CLOBBER
8810 && get_attr_trap (i) == TRAP_YES)
8811 {
8812 if (optimize && !trap_pending)
8813 summarize_insn (PATTERN (i), &shadow, 0);
8814 trap_pending = 1;
8815 }
8816 }
8817 }
8818 \f
8819 /* Alpha can only issue instruction groups simultaneously if they are
8820 suitably aligned. This is very processor-specific. */
8821 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8822 that are marked "fake". These instructions do not exist on that target,
8823 but it is possible to see these insns with deranged combinations of
8824 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8825 choose a result at random. */
8826
8827 enum alphaev4_pipe {
8828 EV4_STOP = 0,
8829 EV4_IB0 = 1,
8830 EV4_IB1 = 2,
8831 EV4_IBX = 4
8832 };
8833
8834 enum alphaev5_pipe {
8835 EV5_STOP = 0,
8836 EV5_NONE = 1,
8837 EV5_E01 = 2,
8838 EV5_E0 = 4,
8839 EV5_E1 = 8,
8840 EV5_FAM = 16,
8841 EV5_FA = 32,
8842 EV5_FM = 64
8843 };
8844
8845 static enum alphaev4_pipe
8846 alphaev4_insn_pipe (rtx_insn *insn)
8847 {
8848 if (recog_memoized (insn) < 0)
8849 return EV4_STOP;
8850 if (get_attr_length (insn) != 4)
8851 return EV4_STOP;
8852
8853 switch (get_attr_type (insn))
8854 {
8855 case TYPE_ILD:
8856 case TYPE_LDSYM:
8857 case TYPE_FLD:
8858 case TYPE_LD_L:
8859 return EV4_IBX;
8860
8861 case TYPE_IADD:
8862 case TYPE_ILOG:
8863 case TYPE_ICMOV:
8864 case TYPE_ICMP:
8865 case TYPE_FST:
8866 case TYPE_SHIFT:
8867 case TYPE_IMUL:
8868 case TYPE_FBR:
8869 case TYPE_MVI: /* fake */
8870 return EV4_IB0;
8871
8872 case TYPE_IST:
8873 case TYPE_MISC:
8874 case TYPE_IBR:
8875 case TYPE_JSR:
8876 case TYPE_CALLPAL:
8877 case TYPE_FCPYS:
8878 case TYPE_FCMOV:
8879 case TYPE_FADD:
8880 case TYPE_FDIV:
8881 case TYPE_FMUL:
8882 case TYPE_ST_C:
8883 case TYPE_MB:
8884 case TYPE_FSQRT: /* fake */
8885 case TYPE_FTOI: /* fake */
8886 case TYPE_ITOF: /* fake */
8887 return EV4_IB1;
8888
8889 default:
8890 gcc_unreachable ();
8891 }
8892 }
8893
8894 static enum alphaev5_pipe
8895 alphaev5_insn_pipe (rtx_insn *insn)
8896 {
8897 if (recog_memoized (insn) < 0)
8898 return EV5_STOP;
8899 if (get_attr_length (insn) != 4)
8900 return EV5_STOP;
8901
8902 switch (get_attr_type (insn))
8903 {
8904 case TYPE_ILD:
8905 case TYPE_FLD:
8906 case TYPE_LDSYM:
8907 case TYPE_IADD:
8908 case TYPE_ILOG:
8909 case TYPE_ICMOV:
8910 case TYPE_ICMP:
8911 return EV5_E01;
8912
8913 case TYPE_IST:
8914 case TYPE_FST:
8915 case TYPE_SHIFT:
8916 case TYPE_IMUL:
8917 case TYPE_MISC:
8918 case TYPE_MVI:
8919 case TYPE_LD_L:
8920 case TYPE_ST_C:
8921 case TYPE_MB:
8922 case TYPE_FTOI: /* fake */
8923 case TYPE_ITOF: /* fake */
8924 return EV5_E0;
8925
8926 case TYPE_IBR:
8927 case TYPE_JSR:
8928 case TYPE_CALLPAL:
8929 return EV5_E1;
8930
8931 case TYPE_FCPYS:
8932 return EV5_FAM;
8933
8934 case TYPE_FBR:
8935 case TYPE_FCMOV:
8936 case TYPE_FADD:
8937 case TYPE_FDIV:
8938 case TYPE_FSQRT: /* fake */
8939 return EV5_FA;
8940
8941 case TYPE_FMUL:
8942 return EV5_FM;
8943
8944 default:
8945 gcc_unreachable ();
8946 }
8947 }
8948
8949 /* IN_USE is a mask of the slots currently filled within the insn group.
8950 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8951 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8952
8953 LEN is, of course, the length of the group in bytes. */
8954
8955 static rtx_insn *
8956 alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
8957 {
8958 int len, in_use;
8959
8960 len = in_use = 0;
8961
8962 if (! INSN_P (insn)
8963 || GET_CODE (PATTERN (insn)) == CLOBBER
8964 || GET_CODE (PATTERN (insn)) == USE)
8965 goto next_and_done;
8966
8967 while (1)
8968 {
8969 enum alphaev4_pipe pipe;
8970
8971 pipe = alphaev4_insn_pipe (insn);
8972 switch (pipe)
8973 {
8974 case EV4_STOP:
8975 /* Force complex instructions to start new groups. */
8976 if (in_use)
8977 goto done;
8978
8979 /* If this is a completely unrecognized insn, it's an asm.
8980 We don't know how long it is, so record length as -1 to
8981 signal a needed realignment. */
8982 if (recog_memoized (insn) < 0)
8983 len = -1;
8984 else
8985 len = get_attr_length (insn);
8986 goto next_and_done;
8987
8988 case EV4_IBX:
8989 if (in_use & EV4_IB0)
8990 {
8991 if (in_use & EV4_IB1)
8992 goto done;
8993 in_use |= EV4_IB1;
8994 }
8995 else
8996 in_use |= EV4_IB0 | EV4_IBX;
8997 break;
8998
8999 case EV4_IB0:
9000 if (in_use & EV4_IB0)
9001 {
9002 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
9003 goto done;
9004 in_use |= EV4_IB1;
9005 }
9006 in_use |= EV4_IB0;
9007 break;
9008
9009 case EV4_IB1:
9010 if (in_use & EV4_IB1)
9011 goto done;
9012 in_use |= EV4_IB1;
9013 break;
9014
9015 default:
9016 gcc_unreachable ();
9017 }
9018 len += 4;
9019
9020 /* Haifa doesn't do well scheduling branches. */
9021 if (JUMP_P (insn))
9022 goto next_and_done;
9023
9024 next:
9025 insn = next_nonnote_insn (insn);
9026
9027 if (!insn || ! INSN_P (insn))
9028 goto done;
9029
9030 /* Let Haifa tell us where it thinks insn group boundaries are. */
9031 if (GET_MODE (insn) == TImode)
9032 goto done;
9033
9034 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9035 goto next;
9036 }
9037
9038 next_and_done:
9039 insn = next_nonnote_insn (insn);
9040
9041 done:
9042 *plen = len;
9043 *pin_use = in_use;
9044 return insn;
9045 }
9046
9047 /* IN_USE is a mask of the slots currently filled within the insn group.
9048 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
9049 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9050
9051 LEN is, of course, the length of the group in bytes. */
9052
9053 static rtx_insn *
9054 alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9055 {
9056 int len, in_use;
9057
9058 len = in_use = 0;
9059
9060 if (! INSN_P (insn)
9061 || GET_CODE (PATTERN (insn)) == CLOBBER
9062 || GET_CODE (PATTERN (insn)) == USE)
9063 goto next_and_done;
9064
9065 while (1)
9066 {
9067 enum alphaev5_pipe pipe;
9068
9069 pipe = alphaev5_insn_pipe (insn);
9070 switch (pipe)
9071 {
9072 case EV5_STOP:
9073 /* Force complex instructions to start new groups. */
9074 if (in_use)
9075 goto done;
9076
9077 /* If this is a completely unrecognized insn, it's an asm.
9078 We don't know how long it is, so record length as -1 to
9079 signal a needed realignment. */
9080 if (recog_memoized (insn) < 0)
9081 len = -1;
9082 else
9083 len = get_attr_length (insn);
9084 goto next_and_done;
9085
9086 /* ??? Most of the places below, we would like to assert never
9087 happen, as it would indicate an error either in Haifa, or
9088 in the scheduling description. Unfortunately, Haifa never
9089 schedules the last instruction of the BB, so we don't have
9090 an accurate TI bit to go off. */
9091 case EV5_E01:
9092 if (in_use & EV5_E0)
9093 {
9094 if (in_use & EV5_E1)
9095 goto done;
9096 in_use |= EV5_E1;
9097 }
9098 else
9099 in_use |= EV5_E0 | EV5_E01;
9100 break;
9101
9102 case EV5_E0:
9103 if (in_use & EV5_E0)
9104 {
9105 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9106 goto done;
9107 in_use |= EV5_E1;
9108 }
9109 in_use |= EV5_E0;
9110 break;
9111
9112 case EV5_E1:
9113 if (in_use & EV5_E1)
9114 goto done;
9115 in_use |= EV5_E1;
9116 break;
9117
9118 case EV5_FAM:
9119 if (in_use & EV5_FA)
9120 {
9121 if (in_use & EV5_FM)
9122 goto done;
9123 in_use |= EV5_FM;
9124 }
9125 else
9126 in_use |= EV5_FA | EV5_FAM;
9127 break;
9128
9129 case EV5_FA:
9130 if (in_use & EV5_FA)
9131 goto done;
9132 in_use |= EV5_FA;
9133 break;
9134
9135 case EV5_FM:
9136 if (in_use & EV5_FM)
9137 goto done;
9138 in_use |= EV5_FM;
9139 break;
9140
9141 case EV5_NONE:
9142 break;
9143
9144 default:
9145 gcc_unreachable ();
9146 }
9147 len += 4;
9148
9149 /* Haifa doesn't do well scheduling branches. */
9150 /* ??? If this is predicted not-taken, slotting continues, except
9151 that no more IBR, FBR, or JSR insns may be slotted. */
9152 if (JUMP_P (insn))
9153 goto next_and_done;
9154
9155 next:
9156 insn = next_nonnote_insn (insn);
9157
9158 if (!insn || ! INSN_P (insn))
9159 goto done;
9160
9161 /* Let Haifa tell us where it thinks insn group boundaries are. */
9162 if (GET_MODE (insn) == TImode)
9163 goto done;
9164
9165 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9166 goto next;
9167 }
9168
9169 next_and_done:
9170 insn = next_nonnote_insn (insn);
9171
9172 done:
9173 *plen = len;
9174 *pin_use = in_use;
9175 return insn;
9176 }
9177
9178 static rtx
9179 alphaev4_next_nop (int *pin_use)
9180 {
9181 int in_use = *pin_use;
9182 rtx nop;
9183
9184 if (!(in_use & EV4_IB0))
9185 {
9186 in_use |= EV4_IB0;
9187 nop = gen_nop ();
9188 }
9189 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9190 {
9191 in_use |= EV4_IB1;
9192 nop = gen_nop ();
9193 }
9194 else if (TARGET_FP && !(in_use & EV4_IB1))
9195 {
9196 in_use |= EV4_IB1;
9197 nop = gen_fnop ();
9198 }
9199 else
9200 nop = gen_unop ();
9201
9202 *pin_use = in_use;
9203 return nop;
9204 }
9205
9206 static rtx
9207 alphaev5_next_nop (int *pin_use)
9208 {
9209 int in_use = *pin_use;
9210 rtx nop;
9211
9212 if (!(in_use & EV5_E1))
9213 {
9214 in_use |= EV5_E1;
9215 nop = gen_nop ();
9216 }
9217 else if (TARGET_FP && !(in_use & EV5_FA))
9218 {
9219 in_use |= EV5_FA;
9220 nop = gen_fnop ();
9221 }
9222 else if (TARGET_FP && !(in_use & EV5_FM))
9223 {
9224 in_use |= EV5_FM;
9225 nop = gen_fnop ();
9226 }
9227 else
9228 nop = gen_unop ();
9229
9230 *pin_use = in_use;
9231 return nop;
9232 }
9233
9234 /* The instruction group alignment main loop. */
9235
9236 static void
9237 alpha_align_insns_1 (unsigned int max_align,
9238 rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9239 rtx (*next_nop) (int *))
9240 {
9241 /* ALIGN is the known alignment for the insn group. */
9242 unsigned int align;
9243 /* OFS is the offset of the current insn in the insn group. */
9244 int ofs;
9245 int prev_in_use, in_use, len, ldgp;
9246 rtx_insn *i, *next;
9247
9248 /* Let shorten branches care for assigning alignments to code labels. */
9249 shorten_branches (get_insns ());
9250
9251 if (align_functions < 4)
9252 align = 4;
9253 else if ((unsigned int) align_functions < max_align)
9254 align = align_functions;
9255 else
9256 align = max_align;
9257
9258 ofs = prev_in_use = 0;
9259 i = get_insns ();
9260 if (NOTE_P (i))
9261 i = next_nonnote_insn (i);
9262
9263 ldgp = alpha_function_needs_gp ? 8 : 0;
9264
9265 while (i)
9266 {
9267 next = (*next_group) (i, &in_use, &len);
9268
9269 /* When we see a label, resync alignment etc. */
9270 if (LABEL_P (i))
9271 {
9272 unsigned int new_align = 1 << label_to_alignment (i);
9273
9274 if (new_align >= align)
9275 {
9276 align = new_align < max_align ? new_align : max_align;
9277 ofs = 0;
9278 }
9279
9280 else if (ofs & (new_align-1))
9281 ofs = (ofs | (new_align-1)) + 1;
9282 gcc_assert (!len);
9283 }
9284
9285 /* Handle complex instructions special. */
9286 else if (in_use == 0)
9287 {
9288 /* Asms will have length < 0. This is a signal that we have
9289 lost alignment knowledge. Assume, however, that the asm
9290 will not mis-align instructions. */
9291 if (len < 0)
9292 {
9293 ofs = 0;
9294 align = 4;
9295 len = 0;
9296 }
9297 }
9298
9299 /* If the known alignment is smaller than the recognized insn group,
9300 realign the output. */
9301 else if ((int) align < len)
9302 {
9303 unsigned int new_log_align = len > 8 ? 4 : 3;
9304 rtx_insn *prev, *where;
9305
9306 where = prev = prev_nonnote_insn (i);
9307 if (!where || !LABEL_P (where))
9308 where = i;
9309
9310 /* Can't realign between a call and its gp reload. */
9311 if (! (TARGET_EXPLICIT_RELOCS
9312 && prev && CALL_P (prev)))
9313 {
9314 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9315 align = 1 << new_log_align;
9316 ofs = 0;
9317 }
9318 }
9319
9320 /* We may not insert padding inside the initial ldgp sequence. */
9321 else if (ldgp > 0)
9322 ldgp -= len;
9323
9324 /* If the group won't fit in the same INT16 as the previous,
9325 we need to add padding to keep the group together. Rather
9326 than simply leaving the insn filling to the assembler, we
9327 can make use of the knowledge of what sorts of instructions
9328 were issued in the previous group to make sure that all of
9329 the added nops are really free. */
9330 else if (ofs + len > (int) align)
9331 {
9332 int nop_count = (align - ofs) / 4;
9333 rtx_insn *where;
9334
9335 /* Insert nops before labels, branches, and calls to truly merge
9336 the execution of the nops with the previous instruction group. */
9337 where = prev_nonnote_insn (i);
9338 if (where)
9339 {
9340 if (LABEL_P (where))
9341 {
9342 rtx_insn *where2 = prev_nonnote_insn (where);
9343 if (where2 && JUMP_P (where2))
9344 where = where2;
9345 }
9346 else if (NONJUMP_INSN_P (where))
9347 where = i;
9348 }
9349 else
9350 where = i;
9351
9352 do
9353 emit_insn_before ((*next_nop)(&prev_in_use), where);
9354 while (--nop_count);
9355 ofs = 0;
9356 }
9357
9358 ofs = (ofs + len) & (align - 1);
9359 prev_in_use = in_use;
9360 i = next;
9361 }
9362 }
9363
9364 static void
9365 alpha_align_insns (void)
9366 {
9367 if (alpha_tune == PROCESSOR_EV4)
9368 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9369 else if (alpha_tune == PROCESSOR_EV5)
9370 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9371 else
9372 gcc_unreachable ();
9373 }
9374
9375 /* Insert an unop between sibcall or noreturn function call and GP load. */
9376
9377 static void
9378 alpha_pad_function_end (void)
9379 {
9380 rtx_insn *insn, *next;
9381
9382 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9383 {
9384 if (!CALL_P (insn)
9385 || !(SIBLING_CALL_P (insn)
9386 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9387 continue;
9388
9389 /* Make sure we do not split a call and its corresponding
9390 CALL_ARG_LOCATION note. */
9391 next = NEXT_INSN (insn);
9392 if (next == NULL)
9393 continue;
9394 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9395 insn = next;
9396
9397 next = next_active_insn (insn);
9398 if (next)
9399 {
9400 rtx pat = PATTERN (next);
9401
9402 if (GET_CODE (pat) == SET
9403 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9404 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9405 emit_insn_after (gen_unop (), insn);
9406 }
9407 }
9408 }
9409 \f
9410 /* Machine dependent reorg pass. */
9411
9412 static void
9413 alpha_reorg (void)
9414 {
9415 /* Workaround for a linker error that triggers when an exception
9416 handler immediatelly follows a sibcall or a noreturn function.
9417
9418 In the sibcall case:
9419
9420 The instruction stream from an object file:
9421
9422 1d8: 00 00 fb 6b jmp (t12)
9423 1dc: 00 00 ba 27 ldah gp,0(ra)
9424 1e0: 00 00 bd 23 lda gp,0(gp)
9425 1e4: 00 00 7d a7 ldq t12,0(gp)
9426 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9427
9428 was converted in the final link pass to:
9429
9430 12003aa88: 67 fa ff c3 br 120039428 <...>
9431 12003aa8c: 00 00 fe 2f unop
9432 12003aa90: 00 00 fe 2f unop
9433 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9434 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9435
9436 And in the noreturn case:
9437
9438 The instruction stream from an object file:
9439
9440 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9441 58: 00 00 ba 27 ldah gp,0(ra)
9442 5c: 00 00 bd 23 lda gp,0(gp)
9443 60: 00 00 7d a7 ldq t12,0(gp)
9444 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9445
9446 was converted in the final link pass to:
9447
9448 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9449 fdb28: 00 00 fe 2f unop
9450 fdb2c: 00 00 fe 2f unop
9451 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9452 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9453
9454 GP load instructions were wrongly cleared by the linker relaxation
9455 pass. This workaround prevents removal of GP loads by inserting
9456 an unop instruction between a sibcall or noreturn function call and
9457 exception handler prologue. */
9458
9459 if (current_function_has_exception_handlers ())
9460 alpha_pad_function_end ();
9461
9462 /* CALL_PAL that implements trap insn, updates program counter to point
9463 after the insn. In case trap is the last insn in the function,
9464 emit NOP to guarantee that PC remains inside function boundaries.
9465 This workaround is needed to get reliable backtraces. */
9466
9467 rtx_insn *insn = prev_active_insn (get_last_insn ());
9468
9469 if (insn && NONJUMP_INSN_P (insn))
9470 {
9471 rtx pat = PATTERN (insn);
9472 if (GET_CODE (pat) == PARALLEL)
9473 {
9474 rtx vec = XVECEXP (pat, 0, 0);
9475 if (GET_CODE (vec) == TRAP_IF
9476 && XEXP (vec, 0) == const1_rtx)
9477 emit_insn_after (gen_unop (), insn);
9478 }
9479 }
9480 }
9481 \f
9482 static void
9483 alpha_file_start (void)
9484 {
9485 default_file_start ();
9486
9487 fputs ("\t.set noreorder\n", asm_out_file);
9488 fputs ("\t.set volatile\n", asm_out_file);
9489 if (TARGET_ABI_OSF)
9490 fputs ("\t.set noat\n", asm_out_file);
9491 if (TARGET_EXPLICIT_RELOCS)
9492 fputs ("\t.set nomacro\n", asm_out_file);
9493 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9494 {
9495 const char *arch;
9496
9497 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9498 arch = "ev6";
9499 else if (TARGET_MAX)
9500 arch = "pca56";
9501 else if (TARGET_BWX)
9502 arch = "ev56";
9503 else if (alpha_cpu == PROCESSOR_EV5)
9504 arch = "ev5";
9505 else
9506 arch = "ev4";
9507
9508 fprintf (asm_out_file, "\t.arch %s\n", arch);
9509 }
9510 }
9511
9512 /* Since we don't have a .dynbss section, we should not allow global
9513 relocations in the .rodata section. */
9514
9515 static int
9516 alpha_elf_reloc_rw_mask (void)
9517 {
9518 return flag_pic ? 3 : 2;
9519 }
9520
9521 /* Return a section for X. The only special thing we do here is to
9522 honor small data. */
9523
9524 static section *
9525 alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9526 unsigned HOST_WIDE_INT align)
9527 {
9528 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9529 /* ??? Consider using mergeable sdata sections. */
9530 return sdata_section;
9531 else
9532 return default_elf_select_rtx_section (mode, x, align);
9533 }
9534
9535 static unsigned int
9536 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9537 {
9538 unsigned int flags = 0;
9539
9540 if (strcmp (name, ".sdata") == 0
9541 || strncmp (name, ".sdata.", 7) == 0
9542 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9543 || strcmp (name, ".sbss") == 0
9544 || strncmp (name, ".sbss.", 6) == 0
9545 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9546 flags = SECTION_SMALL;
9547
9548 flags |= default_section_type_flags (decl, name, reloc);
9549 return flags;
9550 }
9551 \f
9552 /* Structure to collect function names for final output in link section. */
9553 /* Note that items marked with GTY can't be ifdef'ed out. */
9554
9555 enum reloc_kind
9556 {
9557 KIND_LINKAGE,
9558 KIND_CODEADDR
9559 };
9560
9561 struct GTY(()) alpha_links
9562 {
9563 rtx func;
9564 rtx linkage;
9565 enum reloc_kind rkind;
9566 };
9567
9568 #if TARGET_ABI_OPEN_VMS
9569
9570 /* Return the VMS argument type corresponding to MODE. */
9571
9572 enum avms_arg_type
9573 alpha_arg_type (machine_mode mode)
9574 {
9575 switch (mode)
9576 {
9577 case E_SFmode:
9578 return TARGET_FLOAT_VAX ? FF : FS;
9579 case E_DFmode:
9580 return TARGET_FLOAT_VAX ? FD : FT;
9581 default:
9582 return I64;
9583 }
9584 }
9585
9586 /* Return an rtx for an integer representing the VMS Argument Information
9587 register value. */
9588
9589 rtx
9590 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9591 {
9592 unsigned HOST_WIDE_INT regval = cum.num_args;
9593 int i;
9594
9595 for (i = 0; i < 6; i++)
9596 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9597
9598 return GEN_INT (regval);
9599 }
9600 \f
9601
9602 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9603 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9604 this is the reference to the linkage pointer value, 0 if this is the
9605 reference to the function entry value. RFLAG is 1 if this a reduced
9606 reference (code address only), 0 if this is a full reference. */
9607
9608 rtx
9609 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9610 {
9611 struct alpha_links *al = NULL;
9612 const char *name = XSTR (func, 0);
9613
9614 if (cfun->machine->links)
9615 {
9616 /* Is this name already defined? */
9617 alpha_links **slot = cfun->machine->links->get (name);
9618 if (slot)
9619 al = *slot;
9620 }
9621 else
9622 cfun->machine->links
9623 = hash_map<nofree_string_hash, alpha_links *>::create_ggc (64);
9624
9625 if (al == NULL)
9626 {
9627 size_t buf_len;
9628 char *linksym;
9629 tree id;
9630
9631 if (name[0] == '*')
9632 name++;
9633
9634 /* Follow transparent alias, as this is used for CRTL translations. */
9635 id = maybe_get_identifier (name);
9636 if (id)
9637 {
9638 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9639 id = TREE_CHAIN (id);
9640 name = IDENTIFIER_POINTER (id);
9641 }
9642
9643 buf_len = strlen (name) + 8 + 9;
9644 linksym = (char *) alloca (buf_len);
9645 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9646
9647 al = ggc_alloc<alpha_links> ();
9648 al->func = func;
9649 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9650
9651 cfun->machine->links->put (ggc_strdup (name), al);
9652 }
9653
9654 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9655
9656 if (lflag)
9657 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9658 else
9659 return al->linkage;
9660 }
9661
9662 static int
9663 alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9664 {
9665 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9666 if (link->rkind == KIND_CODEADDR)
9667 {
9668 /* External and used, request code address. */
9669 fprintf (stream, "\t.code_address ");
9670 }
9671 else
9672 {
9673 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9674 && SYMBOL_REF_LOCAL_P (link->func))
9675 {
9676 /* Locally defined, build linkage pair. */
9677 fprintf (stream, "\t.quad %s..en\n", name);
9678 fprintf (stream, "\t.quad ");
9679 }
9680 else
9681 {
9682 /* External, request linkage pair. */
9683 fprintf (stream, "\t.linkage ");
9684 }
9685 }
9686 assemble_name (stream, name);
9687 fputs ("\n", stream);
9688
9689 return 0;
9690 }
9691
9692 static void
9693 alpha_write_linkage (FILE *stream, const char *funname)
9694 {
9695 fprintf (stream, "\t.link\n");
9696 fprintf (stream, "\t.align 3\n");
9697 in_section = NULL;
9698
9699 #ifdef TARGET_VMS_CRASH_DEBUG
9700 fputs ("\t.name ", stream);
9701 assemble_name (stream, funname);
9702 fputs ("..na\n", stream);
9703 #endif
9704
9705 ASM_OUTPUT_LABEL (stream, funname);
9706 fprintf (stream, "\t.pdesc ");
9707 assemble_name (stream, funname);
9708 fprintf (stream, "..en,%s\n",
9709 alpha_procedure_type == PT_STACK ? "stack"
9710 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9711
9712 if (cfun->machine->links)
9713 {
9714 hash_map<nofree_string_hash, alpha_links *>::iterator iter
9715 = cfun->machine->links->begin ();
9716 for (; iter != cfun->machine->links->end (); ++iter)
9717 alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9718 }
9719 }
9720
9721 /* Switch to an arbitrary section NAME with attributes as specified
9722 by FLAGS. ALIGN specifies any known alignment requirements for
9723 the section; 0 if the default should be used. */
9724
9725 static void
9726 vms_asm_named_section (const char *name, unsigned int flags,
9727 tree decl ATTRIBUTE_UNUSED)
9728 {
9729 fputc ('\n', asm_out_file);
9730 fprintf (asm_out_file, ".section\t%s", name);
9731
9732 if (flags & SECTION_DEBUG)
9733 fprintf (asm_out_file, ",NOWRT");
9734
9735 fputc ('\n', asm_out_file);
9736 }
9737
9738 /* Record an element in the table of global constructors. SYMBOL is
9739 a SYMBOL_REF of the function to be called; PRIORITY is a number
9740 between 0 and MAX_INIT_PRIORITY.
9741
9742 Differs from default_ctors_section_asm_out_constructor in that the
9743 width of the .ctors entry is always 64 bits, rather than the 32 bits
9744 used by a normal pointer. */
9745
9746 static void
9747 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9748 {
9749 switch_to_section (ctors_section);
9750 assemble_align (BITS_PER_WORD);
9751 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9752 }
9753
9754 static void
9755 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9756 {
9757 switch_to_section (dtors_section);
9758 assemble_align (BITS_PER_WORD);
9759 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9760 }
9761 #else
9762 rtx
9763 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9764 bool lflag ATTRIBUTE_UNUSED,
9765 bool rflag ATTRIBUTE_UNUSED)
9766 {
9767 return NULL_RTX;
9768 }
9769
9770 #endif /* TARGET_ABI_OPEN_VMS */
9771 \f
9772 static void
9773 alpha_init_libfuncs (void)
9774 {
9775 if (TARGET_ABI_OPEN_VMS)
9776 {
9777 /* Use the VMS runtime library functions for division and
9778 remainder. */
9779 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9780 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9781 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9782 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9783 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9784 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9785 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9786 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9787 #ifdef MEM_LIBFUNCS_INIT
9788 MEM_LIBFUNCS_INIT;
9789 #endif
9790 }
9791 }
9792
9793 /* On the Alpha, we use this to disable the floating-point registers
9794 when they don't exist. */
9795
9796 static void
9797 alpha_conditional_register_usage (void)
9798 {
9799 int i;
9800 if (! TARGET_FPREGS)
9801 for (i = 32; i < 63; i++)
9802 fixed_regs[i] = call_used_regs[i] = 1;
9803 }
9804
9805 /* Canonicalize a comparison from one we don't have to one we do have. */
9806
9807 static void
9808 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9809 bool op0_preserve_value)
9810 {
9811 if (!op0_preserve_value
9812 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9813 && (REG_P (*op1) || *op1 == const0_rtx))
9814 {
9815 rtx tem = *op0;
9816 *op0 = *op1;
9817 *op1 = tem;
9818 *code = (int)swap_condition ((enum rtx_code)*code);
9819 }
9820
9821 if ((*code == LT || *code == LTU)
9822 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9823 {
9824 *code = *code == LT ? LE : LEU;
9825 *op1 = GEN_INT (255);
9826 }
9827 }
9828
9829 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
9830
9831 static void
9832 alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9833 {
9834 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9835
9836 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9837 tree new_fenv_var, reload_fenv, restore_fnenv;
9838 tree update_call, atomic_feraiseexcept, hold_fnclex;
9839
9840 /* Assume OSF/1 compatible interfaces. */
9841 if (!TARGET_ABI_OSF)
9842 return;
9843
9844 /* Generate the equivalent of :
9845 unsigned long fenv_var;
9846 fenv_var = __ieee_get_fp_control ();
9847
9848 unsigned long masked_fenv;
9849 masked_fenv = fenv_var & mask;
9850
9851 __ieee_set_fp_control (masked_fenv); */
9852
9853 fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9854 get_fpscr
9855 = build_fn_decl ("__ieee_get_fp_control",
9856 build_function_type_list (long_unsigned_type_node, NULL));
9857 set_fpscr
9858 = build_fn_decl ("__ieee_set_fp_control",
9859 build_function_type_list (void_type_node, NULL));
9860 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9861 ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
9862 fenv_var, build_call_expr (get_fpscr, 0));
9863 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9864 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9865 *hold = build2 (COMPOUND_EXPR, void_type_node,
9866 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9867 hold_fnclex);
9868
9869 /* Store the value of masked_fenv to clear the exceptions:
9870 __ieee_set_fp_control (masked_fenv); */
9871
9872 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9873
9874 /* Generate the equivalent of :
9875 unsigned long new_fenv_var;
9876 new_fenv_var = __ieee_get_fp_control ();
9877
9878 __ieee_set_fp_control (fenv_var);
9879
9880 __atomic_feraiseexcept (new_fenv_var); */
9881
9882 new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9883 reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
9884 build_call_expr (get_fpscr, 0));
9885 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9886 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9887 update_call
9888 = build_call_expr (atomic_feraiseexcept, 1,
9889 fold_convert (integer_type_node, new_fenv_var));
9890 *update = build2 (COMPOUND_EXPR, void_type_node,
9891 build2 (COMPOUND_EXPR, void_type_node,
9892 reload_fenv, restore_fnenv), update_call);
9893 }
9894 \f
9895 /* Initialize the GCC target structure. */
9896 #if TARGET_ABI_OPEN_VMS
9897 # undef TARGET_ATTRIBUTE_TABLE
9898 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9899 # undef TARGET_CAN_ELIMINATE
9900 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9901 #endif
9902
9903 #undef TARGET_IN_SMALL_DATA_P
9904 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9905
9906 #undef TARGET_ASM_ALIGNED_HI_OP
9907 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9908 #undef TARGET_ASM_ALIGNED_DI_OP
9909 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9910
9911 /* Default unaligned ops are provided for ELF systems. To get unaligned
9912 data for non-ELF systems, we have to turn off auto alignment. */
9913 #if TARGET_ABI_OPEN_VMS
9914 #undef TARGET_ASM_UNALIGNED_HI_OP
9915 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9916 #undef TARGET_ASM_UNALIGNED_SI_OP
9917 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9918 #undef TARGET_ASM_UNALIGNED_DI_OP
9919 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9920 #endif
9921
9922 #undef TARGET_ASM_RELOC_RW_MASK
9923 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9924 #undef TARGET_ASM_SELECT_RTX_SECTION
9925 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9926 #undef TARGET_SECTION_TYPE_FLAGS
9927 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9928
9929 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9930 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9931
9932 #undef TARGET_INIT_LIBFUNCS
9933 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9934
9935 #undef TARGET_LEGITIMIZE_ADDRESS
9936 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9937 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
9938 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9939
9940 #undef TARGET_ASM_FILE_START
9941 #define TARGET_ASM_FILE_START alpha_file_start
9942
9943 #undef TARGET_SCHED_ADJUST_COST
9944 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9945 #undef TARGET_SCHED_ISSUE_RATE
9946 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9947 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9948 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9949 alpha_multipass_dfa_lookahead
9950
9951 #undef TARGET_HAVE_TLS
9952 #define TARGET_HAVE_TLS HAVE_AS_TLS
9953
9954 #undef TARGET_BUILTIN_DECL
9955 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9956 #undef TARGET_INIT_BUILTINS
9957 #define TARGET_INIT_BUILTINS alpha_init_builtins
9958 #undef TARGET_EXPAND_BUILTIN
9959 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9960 #undef TARGET_FOLD_BUILTIN
9961 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9962 #undef TARGET_GIMPLE_FOLD_BUILTIN
9963 #define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
9964
9965 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9966 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9967 #undef TARGET_CANNOT_COPY_INSN_P
9968 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9969 #undef TARGET_LEGITIMATE_CONSTANT_P
9970 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9971 #undef TARGET_CANNOT_FORCE_CONST_MEM
9972 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9973
9974 #if TARGET_ABI_OSF
9975 #undef TARGET_ASM_OUTPUT_MI_THUNK
9976 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
9977 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9978 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
9979 #undef TARGET_STDARG_OPTIMIZE_HOOK
9980 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
9981 #endif
9982
9983 #undef TARGET_PRINT_OPERAND
9984 #define TARGET_PRINT_OPERAND alpha_print_operand
9985 #undef TARGET_PRINT_OPERAND_ADDRESS
9986 #define TARGET_PRINT_OPERAND_ADDRESS alpha_print_operand_address
9987 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
9988 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P alpha_print_operand_punct_valid_p
9989
9990 /* Use 16-bits anchor. */
9991 #undef TARGET_MIN_ANCHOR_OFFSET
9992 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
9993 #undef TARGET_MAX_ANCHOR_OFFSET
9994 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
9995 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9996 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
9997
9998 #undef TARGET_REGISTER_MOVE_COST
9999 #define TARGET_REGISTER_MOVE_COST alpha_register_move_cost
10000 #undef TARGET_MEMORY_MOVE_COST
10001 #define TARGET_MEMORY_MOVE_COST alpha_memory_move_cost
10002 #undef TARGET_RTX_COSTS
10003 #define TARGET_RTX_COSTS alpha_rtx_costs
10004 #undef TARGET_ADDRESS_COST
10005 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10006
10007 #undef TARGET_MACHINE_DEPENDENT_REORG
10008 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10009
10010 #undef TARGET_PROMOTE_FUNCTION_MODE
10011 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10012 #undef TARGET_PROMOTE_PROTOTYPES
10013 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10014
10015 #undef TARGET_FUNCTION_VALUE
10016 #define TARGET_FUNCTION_VALUE alpha_function_value
10017 #undef TARGET_LIBCALL_VALUE
10018 #define TARGET_LIBCALL_VALUE alpha_libcall_value
10019 #undef TARGET_FUNCTION_VALUE_REGNO_P
10020 #define TARGET_FUNCTION_VALUE_REGNO_P alpha_function_value_regno_p
10021 #undef TARGET_RETURN_IN_MEMORY
10022 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10023 #undef TARGET_PASS_BY_REFERENCE
10024 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10025 #undef TARGET_SETUP_INCOMING_VARARGS
10026 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10027 #undef TARGET_STRICT_ARGUMENT_NAMING
10028 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10029 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10030 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10031 #undef TARGET_SPLIT_COMPLEX_ARG
10032 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10033 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10034 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10035 #undef TARGET_ARG_PARTIAL_BYTES
10036 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10037 #undef TARGET_FUNCTION_ARG
10038 #define TARGET_FUNCTION_ARG alpha_function_arg
10039 #undef TARGET_FUNCTION_ARG_ADVANCE
10040 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10041 #undef TARGET_TRAMPOLINE_INIT
10042 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10043
10044 #undef TARGET_INSTANTIATE_DECLS
10045 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10046
10047 #undef TARGET_SECONDARY_RELOAD
10048 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10049
10050 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10051 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10052 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10053 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10054
10055 #undef TARGET_BUILD_BUILTIN_VA_LIST
10056 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10057
10058 #undef TARGET_EXPAND_BUILTIN_VA_START
10059 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10060
10061 #undef TARGET_OPTION_OVERRIDE
10062 #define TARGET_OPTION_OVERRIDE alpha_option_override
10063
10064 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10065 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10066 alpha_override_options_after_change
10067
10068 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10069 #undef TARGET_MANGLE_TYPE
10070 #define TARGET_MANGLE_TYPE alpha_mangle_type
10071 #endif
10072
10073 #undef TARGET_LRA_P
10074 #define TARGET_LRA_P hook_bool_void_false
10075
10076 #undef TARGET_LEGITIMATE_ADDRESS_P
10077 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10078
10079 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10080 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10081
10082 #undef TARGET_CANONICALIZE_COMPARISON
10083 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10084
10085 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10086 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10087
10088 struct gcc_target targetm = TARGET_INITIALIZER;
10089
10090 \f
10091 #include "gt-alpha.h"