]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/alpha/alpha.cc
b540e4fadfc8db2156abd82cf82032852418907d
[thirdparty/gcc.git] / gcc / config / alpha / alpha.cc
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2024 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "memmodel.h"
34 #include "gimple.h"
35 #include "df.h"
36 #include "predict.h"
37 #include "tm_p.h"
38 #include "ssa.h"
39 #include "expmed.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "reload.h"
55 #include "except.h"
56 #include "common/common-target.h"
57 #include "debug.h"
58 #include "langhooks.h"
59 #include "cfgrtl.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "gimple-iterator.h"
63 #include "gimplify.h"
64 #include "tree-stdarg.h"
65 #include "tm-constrs.h"
66 #include "libfuncs.h"
67 #include "builtins.h"
68 #include "rtl-iter.h"
69 #include "flags.h"
70 #include "opts.h"
71
72 /* This file should be included last. */
73 #include "target-def.h"
74
75 /* Specify which cpu to schedule for. */
76 enum processor_type alpha_tune;
77
78 /* Which cpu we're generating code for. */
79 enum processor_type alpha_cpu;
80
81 static const char * const alpha_cpu_name[] =
82 {
83 "ev4", "ev5", "ev6"
84 };
85
86 /* Specify how accurate floating-point traps need to be. */
87
88 enum alpha_trap_precision alpha_tp;
89
90 /* Specify the floating-point rounding mode. */
91
92 enum alpha_fp_rounding_mode alpha_fprm;
93
94 /* Specify which things cause traps. */
95
96 enum alpha_fp_trap_mode alpha_fptm;
97
98 /* Nonzero if inside of a function, because the Alpha asm can't
99 handle .files inside of functions. */
100
101 static int inside_function = FALSE;
102
103 /* The number of cycles of latency we should assume on memory reads. */
104
105 static int alpha_memory_latency = 3;
106
107 /* Whether the function needs the GP. */
108
109 static int alpha_function_needs_gp;
110
111 /* The assembler name of the current function. */
112
113 static const char *alpha_fnname;
114
115 /* The next explicit relocation sequence number. */
116 extern GTY(()) int alpha_next_sequence_number;
117 int alpha_next_sequence_number = 1;
118
119 /* The literal and gpdisp sequence numbers for this insn, as printed
120 by %# and %* respectively. */
121 extern GTY(()) int alpha_this_literal_sequence_number;
122 extern GTY(()) int alpha_this_gpdisp_sequence_number;
123 int alpha_this_literal_sequence_number;
124 int alpha_this_gpdisp_sequence_number;
125
126 /* Costs of various operations on the different architectures. */
127
128 struct alpha_rtx_cost_data
129 {
130 unsigned char fp_add;
131 unsigned char fp_mult;
132 unsigned char fp_div_sf;
133 unsigned char fp_div_df;
134 unsigned char int_mult_si;
135 unsigned char int_mult_di;
136 unsigned char int_shift;
137 unsigned char int_cmov;
138 unsigned short int_div;
139 };
140
141 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
142 {
143 { /* EV4 */
144 COSTS_N_INSNS (6), /* fp_add */
145 COSTS_N_INSNS (6), /* fp_mult */
146 COSTS_N_INSNS (34), /* fp_div_sf */
147 COSTS_N_INSNS (63), /* fp_div_df */
148 COSTS_N_INSNS (23), /* int_mult_si */
149 COSTS_N_INSNS (23), /* int_mult_di */
150 COSTS_N_INSNS (2), /* int_shift */
151 COSTS_N_INSNS (2), /* int_cmov */
152 COSTS_N_INSNS (97), /* int_div */
153 },
154 { /* EV5 */
155 COSTS_N_INSNS (4), /* fp_add */
156 COSTS_N_INSNS (4), /* fp_mult */
157 COSTS_N_INSNS (15), /* fp_div_sf */
158 COSTS_N_INSNS (22), /* fp_div_df */
159 COSTS_N_INSNS (8), /* int_mult_si */
160 COSTS_N_INSNS (12), /* int_mult_di */
161 COSTS_N_INSNS (1) + 1, /* int_shift */
162 COSTS_N_INSNS (1), /* int_cmov */
163 COSTS_N_INSNS (83), /* int_div */
164 },
165 { /* EV6 */
166 COSTS_N_INSNS (4), /* fp_add */
167 COSTS_N_INSNS (4), /* fp_mult */
168 COSTS_N_INSNS (12), /* fp_div_sf */
169 COSTS_N_INSNS (15), /* fp_div_df */
170 COSTS_N_INSNS (7), /* int_mult_si */
171 COSTS_N_INSNS (7), /* int_mult_di */
172 COSTS_N_INSNS (1), /* int_shift */
173 COSTS_N_INSNS (2), /* int_cmov */
174 COSTS_N_INSNS (86), /* int_div */
175 },
176 };
177
178 /* Similar but tuned for code size instead of execution latency. The
179 extra +N is fractional cost tuning based on latency. It's used to
180 encourage use of cheaper insns like shift, but only if there's just
181 one of them. */
182
183 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
184 {
185 COSTS_N_INSNS (1), /* fp_add */
186 COSTS_N_INSNS (1), /* fp_mult */
187 COSTS_N_INSNS (1), /* fp_div_sf */
188 COSTS_N_INSNS (1) + 1, /* fp_div_df */
189 COSTS_N_INSNS (1) + 1, /* int_mult_si */
190 COSTS_N_INSNS (1) + 2, /* int_mult_di */
191 COSTS_N_INSNS (1), /* int_shift */
192 COSTS_N_INSNS (1), /* int_cmov */
193 COSTS_N_INSNS (6), /* int_div */
194 };
195
196 /* Get the number of args of a function in one of two ways. */
197 #if TARGET_ABI_OPEN_VMS
198 #define NUM_ARGS crtl->args.info.num_args
199 #else
200 #define NUM_ARGS crtl->args.info
201 #endif
202
203 #define REG_PV 27
204 #define REG_RA 26
205
206 /* Declarations of static functions. */
207 static struct machine_function *alpha_init_machine_status (void);
208 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
209 static void alpha_handle_trap_shadows (void);
210 static void alpha_align_insns (void);
211 static void alpha_override_options_after_change (void);
212
213 #if TARGET_ABI_OPEN_VMS
214 static void alpha_write_linkage (FILE *, const char *);
215 static bool vms_valid_pointer_mode (scalar_int_mode);
216 #else
217 #define vms_patch_builtins() gcc_unreachable()
218 #endif
219 \f
220 static unsigned int
221 rest_of_handle_trap_shadows (void)
222 {
223 alpha_handle_trap_shadows ();
224 return 0;
225 }
226
227 namespace {
228
229 const pass_data pass_data_handle_trap_shadows =
230 {
231 RTL_PASS,
232 "trap_shadows", /* name */
233 OPTGROUP_NONE, /* optinfo_flags */
234 TV_NONE, /* tv_id */
235 0, /* properties_required */
236 0, /* properties_provided */
237 0, /* properties_destroyed */
238 0, /* todo_flags_start */
239 TODO_df_finish, /* todo_flags_finish */
240 };
241
242 class pass_handle_trap_shadows : public rtl_opt_pass
243 {
244 public:
245 pass_handle_trap_shadows(gcc::context *ctxt)
246 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
247 {}
248
249 /* opt_pass methods: */
250 virtual bool gate (function *)
251 {
252 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
253 }
254
255 virtual unsigned int execute (function *)
256 {
257 return rest_of_handle_trap_shadows ();
258 }
259
260 }; // class pass_handle_trap_shadows
261
262 } // anon namespace
263
264 rtl_opt_pass *
265 make_pass_handle_trap_shadows (gcc::context *ctxt)
266 {
267 return new pass_handle_trap_shadows (ctxt);
268 }
269
270 static unsigned int
271 rest_of_align_insns (void)
272 {
273 alpha_align_insns ();
274 return 0;
275 }
276
277 namespace {
278
279 const pass_data pass_data_align_insns =
280 {
281 RTL_PASS,
282 "align_insns", /* name */
283 OPTGROUP_NONE, /* optinfo_flags */
284 TV_NONE, /* tv_id */
285 0, /* properties_required */
286 0, /* properties_provided */
287 0, /* properties_destroyed */
288 0, /* todo_flags_start */
289 TODO_df_finish, /* todo_flags_finish */
290 };
291
292 class pass_align_insns : public rtl_opt_pass
293 {
294 public:
295 pass_align_insns(gcc::context *ctxt)
296 : rtl_opt_pass(pass_data_align_insns, ctxt)
297 {}
298
299 /* opt_pass methods: */
300 virtual bool gate (function *)
301 {
302 /* Due to the number of extra trapb insns, don't bother fixing up
303 alignment when trap precision is instruction. Moreover, we can
304 only do our job when sched2 is run. */
305 return ((alpha_tune == PROCESSOR_EV4
306 || alpha_tune == PROCESSOR_EV5)
307 && optimize && !optimize_size
308 && alpha_tp != ALPHA_TP_INSN
309 && flag_schedule_insns_after_reload);
310 }
311
312 virtual unsigned int execute (function *)
313 {
314 return rest_of_align_insns ();
315 }
316
317 }; // class pass_align_insns
318
319 } // anon namespace
320
321 rtl_opt_pass *
322 make_pass_align_insns (gcc::context *ctxt)
323 {
324 return new pass_align_insns (ctxt);
325 }
326
327 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
328 /* Implement TARGET_MANGLE_TYPE. */
329
330 static const char *
331 alpha_mangle_type (const_tree type)
332 {
333 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
334 && TARGET_LONG_DOUBLE_128)
335 return "g";
336
337 /* For all other types, use normal C++ mangling. */
338 return NULL;
339 }
340 #endif
341
342 /* Parse target option strings. */
343
344 static void
345 alpha_option_override (void)
346 {
347 static const struct cpu_table {
348 const char *const name;
349 const enum processor_type processor;
350 const int flags;
351 const unsigned short line_size; /* in bytes */
352 const unsigned short l1_size; /* in kb. */
353 const unsigned short l2_size; /* in kb. */
354 } cpu_table[] = {
355 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
356 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
357 had 64k to 8M 8-byte direct Bcache. */
358 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
359 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
360 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
361
362 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
363 and 1M to 16M 64 byte L3 (not modeled).
364 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
365 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
366 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
367 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
368 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
369 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
370 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
371 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
372 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
373
374 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
375 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
376 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
377 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
378 64, 64, 16*1024 },
379 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
380 64, 64, 16*1024 }
381 };
382
383 int const ct_size = ARRAY_SIZE (cpu_table);
384 int line_size = 0, l1_size = 0, l2_size = 0;
385 int i;
386
387 #ifdef SUBTARGET_OVERRIDE_OPTIONS
388 SUBTARGET_OVERRIDE_OPTIONS;
389 #endif
390
391 /* Default to full IEEE compliance mode for Go language. */
392 if (strcmp (lang_hooks.name, "GNU Go") == 0
393 && !(target_flags_explicit & MASK_IEEE))
394 target_flags |= MASK_IEEE;
395
396 alpha_fprm = ALPHA_FPRM_NORM;
397 alpha_tp = ALPHA_TP_PROG;
398 alpha_fptm = ALPHA_FPTM_N;
399
400 if (TARGET_IEEE)
401 {
402 alpha_tp = ALPHA_TP_INSN;
403 alpha_fptm = ALPHA_FPTM_SU;
404 }
405 if (TARGET_IEEE_WITH_INEXACT)
406 {
407 alpha_tp = ALPHA_TP_INSN;
408 alpha_fptm = ALPHA_FPTM_SUI;
409 }
410
411 if (alpha_tp_string)
412 {
413 if (! strcmp (alpha_tp_string, "p"))
414 alpha_tp = ALPHA_TP_PROG;
415 else if (! strcmp (alpha_tp_string, "f"))
416 alpha_tp = ALPHA_TP_FUNC;
417 else if (! strcmp (alpha_tp_string, "i"))
418 alpha_tp = ALPHA_TP_INSN;
419 else
420 error ("bad value %qs for %<-mtrap-precision%> switch",
421 alpha_tp_string);
422 }
423
424 if (alpha_fprm_string)
425 {
426 if (! strcmp (alpha_fprm_string, "n"))
427 alpha_fprm = ALPHA_FPRM_NORM;
428 else if (! strcmp (alpha_fprm_string, "m"))
429 alpha_fprm = ALPHA_FPRM_MINF;
430 else if (! strcmp (alpha_fprm_string, "c"))
431 alpha_fprm = ALPHA_FPRM_CHOP;
432 else if (! strcmp (alpha_fprm_string,"d"))
433 alpha_fprm = ALPHA_FPRM_DYN;
434 else
435 error ("bad value %qs for %<-mfp-rounding-mode%> switch",
436 alpha_fprm_string);
437 }
438
439 if (alpha_fptm_string)
440 {
441 if (strcmp (alpha_fptm_string, "n") == 0)
442 alpha_fptm = ALPHA_FPTM_N;
443 else if (strcmp (alpha_fptm_string, "u") == 0)
444 alpha_fptm = ALPHA_FPTM_U;
445 else if (strcmp (alpha_fptm_string, "su") == 0)
446 alpha_fptm = ALPHA_FPTM_SU;
447 else if (strcmp (alpha_fptm_string, "sui") == 0)
448 alpha_fptm = ALPHA_FPTM_SUI;
449 else
450 error ("bad value %qs for %<-mfp-trap-mode%> switch",
451 alpha_fptm_string);
452 }
453
454 if (alpha_cpu_string)
455 {
456 for (i = 0; i < ct_size; i++)
457 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
458 {
459 alpha_tune = alpha_cpu = cpu_table[i].processor;
460 line_size = cpu_table[i].line_size;
461 l1_size = cpu_table[i].l1_size;
462 l2_size = cpu_table[i].l2_size;
463 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
464 target_flags |= cpu_table[i].flags;
465 break;
466 }
467 if (i == ct_size)
468 error ("bad value %qs for %<-mcpu%> switch", alpha_cpu_string);
469 }
470
471 if (alpha_tune_string)
472 {
473 for (i = 0; i < ct_size; i++)
474 if (! strcmp (alpha_tune_string, cpu_table [i].name))
475 {
476 alpha_tune = cpu_table[i].processor;
477 line_size = cpu_table[i].line_size;
478 l1_size = cpu_table[i].l1_size;
479 l2_size = cpu_table[i].l2_size;
480 break;
481 }
482 if (i == ct_size)
483 error ("bad value %qs for %<-mtune%> switch", alpha_tune_string);
484 }
485
486 if (line_size)
487 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
488 param_l1_cache_line_size, line_size);
489 if (l1_size)
490 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
491 param_l1_cache_size, l1_size);
492 if (l2_size)
493 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
494 param_l2_cache_size, l2_size);
495
496 /* Do some sanity checks on the above options. */
497
498 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
499 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
500 {
501 warning (0, "fp software completion requires %<-mtrap-precision=i%>");
502 alpha_tp = ALPHA_TP_INSN;
503 }
504
505 if (alpha_cpu == PROCESSOR_EV6)
506 {
507 /* Except for EV6 pass 1 (not released), we always have precise
508 arithmetic traps. Which means we can do software completion
509 without minding trap shadows. */
510 alpha_tp = ALPHA_TP_PROG;
511 }
512
513 if (TARGET_FLOAT_VAX)
514 {
515 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
516 {
517 warning (0, "rounding mode not supported for VAX floats");
518 alpha_fprm = ALPHA_FPRM_NORM;
519 }
520 if (alpha_fptm == ALPHA_FPTM_SUI)
521 {
522 warning (0, "trap mode not supported for VAX floats");
523 alpha_fptm = ALPHA_FPTM_SU;
524 }
525 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
526 warning (0, "128-bit %<long double%> not supported for VAX floats");
527 target_flags &= ~MASK_LONG_DOUBLE_128;
528 }
529
530 {
531 char *end;
532 int lat;
533
534 if (!alpha_mlat_string)
535 alpha_mlat_string = "L1";
536
537 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
538 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
539 ;
540 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
541 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
542 && alpha_mlat_string[2] == '\0')
543 {
544 static int const cache_latency[][4] =
545 {
546 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
547 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
548 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
549 };
550
551 lat = alpha_mlat_string[1] - '0';
552 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
553 {
554 warning (0, "L%d cache latency unknown for %s",
555 lat, alpha_cpu_name[alpha_tune]);
556 lat = 3;
557 }
558 else
559 lat = cache_latency[alpha_tune][lat-1];
560 }
561 else if (! strcmp (alpha_mlat_string, "main"))
562 {
563 /* Most current memories have about 370ns latency. This is
564 a reasonable guess for a fast cpu. */
565 lat = 150;
566 }
567 else
568 {
569 warning (0, "bad value %qs for %<-mmemory-latency%>",
570 alpha_mlat_string);
571 lat = 3;
572 }
573
574 alpha_memory_latency = lat;
575 }
576
577 /* Default the definition of "small data" to 8 bytes. */
578 if (!OPTION_SET_P (g_switch_value))
579 g_switch_value = 8;
580
581 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
582 if (flag_pic == 1)
583 target_flags |= MASK_SMALL_DATA;
584 else if (flag_pic == 2)
585 target_flags &= ~MASK_SMALL_DATA;
586
587 alpha_override_options_after_change ();
588
589 /* Register variables and functions with the garbage collector. */
590
591 /* Set up function hooks. */
592 init_machine_status = alpha_init_machine_status;
593
594 /* Tell the compiler when we're using VAX floating point. */
595 if (TARGET_FLOAT_VAX)
596 {
597 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
598 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
599 REAL_MODE_FORMAT (TFmode) = NULL;
600 }
601
602 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
603 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
604 target_flags |= MASK_LONG_DOUBLE_128;
605 #endif
606
607 }
608
609 /* Implement targetm.override_options_after_change. */
610
611 static void
612 alpha_override_options_after_change (void)
613 {
614 /* Align labels and loops for optimal branching. */
615 /* ??? Kludge these by not doing anything if we don't optimize. */
616 if (optimize > 0)
617 {
618 if (flag_align_loops && !str_align_loops)
619 str_align_loops = "16";
620 if (flag_align_jumps && !str_align_jumps)
621 str_align_jumps = "16";
622 }
623 if (flag_align_functions && !str_align_functions)
624 str_align_functions = "16";
625 }
626 \f
627 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
628
629 int
630 zap_mask (HOST_WIDE_INT value)
631 {
632 int i;
633
634 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
635 i++, value >>= 8)
636 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
637 return 0;
638
639 return 1;
640 }
641
642 /* Return true if OP is valid for a particular TLS relocation.
643 We are already guaranteed that OP is a CONST. */
644
645 int
646 tls_symbolic_operand_1 (rtx op, int size, int unspec)
647 {
648 op = XEXP (op, 0);
649
650 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
651 return 0;
652 op = XVECEXP (op, 0, 0);
653
654 if (GET_CODE (op) != SYMBOL_REF)
655 return 0;
656
657 switch (SYMBOL_REF_TLS_MODEL (op))
658 {
659 case TLS_MODEL_LOCAL_DYNAMIC:
660 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
661 case TLS_MODEL_INITIAL_EXEC:
662 return unspec == UNSPEC_TPREL && size == 64;
663 case TLS_MODEL_LOCAL_EXEC:
664 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
665 default:
666 gcc_unreachable ();
667 }
668 }
669
670 /* Used by aligned_memory_operand and unaligned_memory_operand to
671 resolve what reload is going to do with OP if it's a register. */
672
673 rtx
674 resolve_reload_operand (rtx op)
675 {
676 if (reload_in_progress)
677 {
678 rtx tmp = op;
679 if (SUBREG_P (tmp))
680 tmp = SUBREG_REG (tmp);
681 if (REG_P (tmp)
682 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
683 {
684 op = reg_equiv_memory_loc (REGNO (tmp));
685 if (op == 0)
686 return 0;
687 }
688 }
689 return op;
690 }
691
692 /* The scalar modes supported differs from the default check-what-c-supports
693 version in that sometimes TFmode is available even when long double
694 indicates only DFmode. */
695
696 static bool
697 alpha_scalar_mode_supported_p (scalar_mode mode)
698 {
699 switch (mode)
700 {
701 case E_QImode:
702 case E_HImode:
703 case E_SImode:
704 case E_DImode:
705 case E_TImode: /* via optabs.cc */
706 return true;
707
708 case E_SFmode:
709 case E_DFmode:
710 return true;
711
712 case E_TFmode:
713 return TARGET_HAS_XFLOATING_LIBS;
714
715 default:
716 return false;
717 }
718 }
719
720 /* Alpha implements a couple of integer vector mode operations when
721 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
722 which allows the vectorizer to operate on e.g. move instructions,
723 or when expand_vector_operations can do something useful. */
724
725 static bool
726 alpha_vector_mode_supported_p (machine_mode mode)
727 {
728 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
729 }
730
731 /* Return the TLS model to use for SYMBOL. */
732
733 static enum tls_model
734 tls_symbolic_operand_type (rtx symbol)
735 {
736 enum tls_model model;
737
738 if (GET_CODE (symbol) != SYMBOL_REF)
739 return TLS_MODEL_NONE;
740 model = SYMBOL_REF_TLS_MODEL (symbol);
741
742 /* Local-exec with a 64-bit size is the same code as initial-exec. */
743 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
744 model = TLS_MODEL_INITIAL_EXEC;
745
746 return model;
747 }
748 \f
749 /* Return true if the function DECL will share the same GP as any
750 function in the current unit of translation. */
751
752 static bool
753 decl_has_samegp (const_tree decl)
754 {
755 /* Functions that are not local can be overridden, and thus may
756 not share the same gp. */
757 if (!(*targetm.binds_local_p) (decl))
758 return false;
759
760 /* If -msmall-data is in effect, assume that there is only one GP
761 for the module, and so any local symbol has this property. We
762 need explicit relocations to be able to enforce this for symbols
763 not defined in this unit of translation, however. */
764 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
765 return true;
766
767 /* Functions that are not external are defined in this UoT. */
768 /* ??? Irritatingly, static functions not yet emitted are still
769 marked "external". Apply this to non-static functions only. */
770 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
771 }
772
773 /* Return true if EXP should be placed in the small data section. */
774
775 static bool
776 alpha_in_small_data_p (const_tree exp)
777 {
778 /* We want to merge strings, so we never consider them small data. */
779 if (TREE_CODE (exp) == STRING_CST)
780 return false;
781
782 /* Functions are never in the small data area. Duh. */
783 if (TREE_CODE (exp) == FUNCTION_DECL)
784 return false;
785
786 /* COMMON symbols are never small data. */
787 if (VAR_P (exp) && DECL_COMMON (exp))
788 return false;
789
790 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
791 {
792 const char *section = DECL_SECTION_NAME (exp);
793 if (strcmp (section, ".sdata") == 0
794 || strcmp (section, ".sbss") == 0)
795 return true;
796 }
797 else
798 {
799 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
800
801 /* If this is an incomplete type with size 0, then we can't put it
802 in sdata because it might be too big when completed. */
803 if (size > 0 && size <= g_switch_value)
804 return true;
805 }
806
807 return false;
808 }
809
810 #if TARGET_ABI_OPEN_VMS
811 static bool
812 vms_valid_pointer_mode (scalar_int_mode mode)
813 {
814 return (mode == SImode || mode == DImode);
815 }
816
817 static bool
818 alpha_linkage_symbol_p (const char *symname)
819 {
820 int symlen = strlen (symname);
821
822 if (symlen > 4)
823 return strcmp (&symname [symlen - 4], "..lk") == 0;
824
825 return false;
826 }
827
828 #define LINKAGE_SYMBOL_REF_P(X) \
829 ((GET_CODE (X) == SYMBOL_REF \
830 && alpha_linkage_symbol_p (XSTR (X, 0))) \
831 || (GET_CODE (X) == CONST \
832 && GET_CODE (XEXP (X, 0)) == PLUS \
833 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
834 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
835 #endif
836
837 /* legitimate_address_p recognizes an RTL expression that is a valid
838 memory address for an instruction. The MODE argument is the
839 machine mode for the MEM expression that wants to use this address.
840
841 For Alpha, we have either a constant address or the sum of a
842 register and a constant address, or just a register. For DImode,
843 any of those forms can be surrounded with an AND that clear the
844 low-order three bits; this is an "unaligned" access. */
845
846 static bool
847 alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict,
848 code_helper = ERROR_MARK)
849 {
850 /* If this is an ldq_u type address, discard the outer AND. */
851 if (mode == DImode
852 && GET_CODE (x) == AND
853 && CONST_INT_P (XEXP (x, 1))
854 && INTVAL (XEXP (x, 1)) == -8)
855 x = XEXP (x, 0);
856
857 /* Discard non-paradoxical subregs. */
858 if (SUBREG_P (x)
859 && (GET_MODE_SIZE (GET_MODE (x))
860 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
861 x = SUBREG_REG (x);
862
863 /* Unadorned general registers are valid. */
864 if (REG_P (x)
865 && (strict
866 ? STRICT_REG_OK_FOR_BASE_P (x)
867 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
868 return true;
869
870 /* Constant addresses (i.e. +/- 32k) are valid. */
871 if (CONSTANT_ADDRESS_P (x))
872 return true;
873
874 #if TARGET_ABI_OPEN_VMS
875 if (LINKAGE_SYMBOL_REF_P (x))
876 return true;
877 #endif
878
879 /* Register plus a small constant offset is valid. */
880 if (GET_CODE (x) == PLUS)
881 {
882 rtx ofs = XEXP (x, 1);
883 x = XEXP (x, 0);
884
885 /* Discard non-paradoxical subregs. */
886 if (SUBREG_P (x)
887 && (GET_MODE_SIZE (GET_MODE (x))
888 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
889 x = SUBREG_REG (x);
890
891 if (REG_P (x))
892 {
893 if (! strict
894 && NONSTRICT_REG_OK_FP_BASE_P (x)
895 && CONST_INT_P (ofs))
896 return true;
897 if ((strict
898 ? STRICT_REG_OK_FOR_BASE_P (x)
899 : NONSTRICT_REG_OK_FOR_BASE_P (x))
900 && CONSTANT_ADDRESS_P (ofs))
901 return true;
902 }
903 }
904
905 /* If we're managing explicit relocations, LO_SUM is valid, as are small
906 data symbols. Avoid explicit relocations of modes larger than word
907 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
908 else if (TARGET_EXPLICIT_RELOCS
909 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
910 {
911 if (small_symbolic_operand (x, Pmode))
912 return true;
913
914 if (GET_CODE (x) == LO_SUM)
915 {
916 rtx ofs = XEXP (x, 1);
917 x = XEXP (x, 0);
918
919 /* Discard non-paradoxical subregs. */
920 if (SUBREG_P (x)
921 && (GET_MODE_SIZE (GET_MODE (x))
922 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
923 x = SUBREG_REG (x);
924
925 /* Must have a valid base register. */
926 if (! (REG_P (x)
927 && (strict
928 ? STRICT_REG_OK_FOR_BASE_P (x)
929 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
930 return false;
931
932 /* The symbol must be local. */
933 if (local_symbolic_operand (ofs, Pmode)
934 || dtp32_symbolic_operand (ofs, Pmode)
935 || tp32_symbolic_operand (ofs, Pmode))
936 return true;
937 }
938 }
939
940 return false;
941 }
942
943 /* Build the SYMBOL_REF for __tls_get_addr. */
944
945 static GTY(()) rtx tls_get_addr_libfunc;
946
947 static rtx
948 get_tls_get_addr (void)
949 {
950 if (!tls_get_addr_libfunc)
951 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
952 return tls_get_addr_libfunc;
953 }
954
955 /* Try machine-dependent ways of modifying an illegitimate address
956 to be legitimate. If we find one, return the new, valid address. */
957
958 static rtx
959 alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
960 {
961 HOST_WIDE_INT addend;
962
963 /* If the address is (plus reg const_int) and the CONST_INT is not a
964 valid offset, compute the high part of the constant and add it to
965 the register. Then our address is (plus temp low-part-const). */
966 if (GET_CODE (x) == PLUS
967 && REG_P (XEXP (x, 0))
968 && CONST_INT_P (XEXP (x, 1))
969 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
970 {
971 addend = INTVAL (XEXP (x, 1));
972 x = XEXP (x, 0);
973 goto split_addend;
974 }
975
976 /* If the address is (const (plus FOO const_int)), find the low-order
977 part of the CONST_INT. Then load FOO plus any high-order part of the
978 CONST_INT into a register. Our address is (plus reg low-part-const).
979 This is done to reduce the number of GOT entries. */
980 if (can_create_pseudo_p ()
981 && GET_CODE (x) == CONST
982 && GET_CODE (XEXP (x, 0)) == PLUS
983 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
984 {
985 addend = INTVAL (XEXP (XEXP (x, 0), 1));
986 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
987 goto split_addend;
988 }
989
990 /* If we have a (plus reg const), emit the load as in (2), then add
991 the two registers, and finally generate (plus reg low-part-const) as
992 our address. */
993 if (can_create_pseudo_p ()
994 && GET_CODE (x) == PLUS
995 && REG_P (XEXP (x, 0))
996 && GET_CODE (XEXP (x, 1)) == CONST
997 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
998 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
999 {
1000 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1001 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1002 XEXP (XEXP (XEXP (x, 1), 0), 0),
1003 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1004 goto split_addend;
1005 }
1006
1007 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1008 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1009 around +/- 32k offset. */
1010 if (TARGET_EXPLICIT_RELOCS
1011 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1012 && symbolic_operand (x, Pmode))
1013 {
1014 rtx r0, r16, eqv, tga, tp, dest, seq;
1015 rtx_insn *insn;
1016
1017 switch (tls_symbolic_operand_type (x))
1018 {
1019 case TLS_MODEL_NONE:
1020 break;
1021
1022 case TLS_MODEL_GLOBAL_DYNAMIC:
1023 {
1024 start_sequence ();
1025
1026 r0 = gen_rtx_REG (Pmode, 0);
1027 r16 = gen_rtx_REG (Pmode, 16);
1028 tga = get_tls_get_addr ();
1029 dest = gen_reg_rtx (Pmode);
1030 seq = GEN_INT (alpha_next_sequence_number++);
1031
1032 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1033 rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
1034 insn = emit_call_insn (val);
1035 RTL_CONST_CALL_P (insn) = 1;
1036 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1037
1038 insn = get_insns ();
1039 end_sequence ();
1040
1041 emit_libcall_block (insn, dest, r0, x);
1042 return dest;
1043 }
1044
1045 case TLS_MODEL_LOCAL_DYNAMIC:
1046 {
1047 start_sequence ();
1048
1049 r0 = gen_rtx_REG (Pmode, 0);
1050 r16 = gen_rtx_REG (Pmode, 16);
1051 tga = get_tls_get_addr ();
1052 scratch = gen_reg_rtx (Pmode);
1053 seq = GEN_INT (alpha_next_sequence_number++);
1054
1055 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1056 rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
1057 insn = emit_call_insn (val);
1058 RTL_CONST_CALL_P (insn) = 1;
1059 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1060
1061 insn = get_insns ();
1062 end_sequence ();
1063
1064 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1065 UNSPEC_TLSLDM_CALL);
1066 emit_libcall_block (insn, scratch, r0, eqv);
1067
1068 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1069 eqv = gen_rtx_CONST (Pmode, eqv);
1070
1071 if (alpha_tls_size == 64)
1072 {
1073 dest = gen_reg_rtx (Pmode);
1074 emit_insn (gen_rtx_SET (dest, eqv));
1075 emit_insn (gen_adddi3 (dest, dest, scratch));
1076 return dest;
1077 }
1078 if (alpha_tls_size == 32)
1079 {
1080 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1081 temp = gen_rtx_PLUS (Pmode, scratch, temp);
1082 scratch = gen_reg_rtx (Pmode);
1083 emit_insn (gen_rtx_SET (scratch, temp));
1084 }
1085 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1086 }
1087
1088 case TLS_MODEL_INITIAL_EXEC:
1089 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1090 eqv = gen_rtx_CONST (Pmode, eqv);
1091 tp = gen_reg_rtx (Pmode);
1092 scratch = gen_reg_rtx (Pmode);
1093 dest = gen_reg_rtx (Pmode);
1094
1095 emit_insn (gen_get_thread_pointerdi (tp));
1096 emit_insn (gen_rtx_SET (scratch, eqv));
1097 emit_insn (gen_adddi3 (dest, tp, scratch));
1098 return dest;
1099
1100 case TLS_MODEL_LOCAL_EXEC:
1101 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1102 eqv = gen_rtx_CONST (Pmode, eqv);
1103 tp = gen_reg_rtx (Pmode);
1104
1105 emit_insn (gen_get_thread_pointerdi (tp));
1106 if (alpha_tls_size == 32)
1107 {
1108 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1109 temp = gen_rtx_PLUS (Pmode, tp, temp);
1110 tp = gen_reg_rtx (Pmode);
1111 emit_insn (gen_rtx_SET (tp, temp));
1112 }
1113 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1114
1115 default:
1116 gcc_unreachable ();
1117 }
1118
1119 if (local_symbolic_operand (x, Pmode))
1120 {
1121 if (small_symbolic_operand (x, Pmode))
1122 return x;
1123 else
1124 {
1125 if (can_create_pseudo_p ())
1126 scratch = gen_reg_rtx (Pmode);
1127 emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1128 return gen_rtx_LO_SUM (Pmode, scratch, x);
1129 }
1130 }
1131 }
1132
1133 return NULL;
1134
1135 split_addend:
1136 {
1137 HOST_WIDE_INT low, high;
1138
1139 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1140 addend -= low;
1141 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1142 addend -= high;
1143
1144 if (addend)
1145 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1146 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1147 1, OPTAB_LIB_WIDEN);
1148 if (high)
1149 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1150 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1151 1, OPTAB_LIB_WIDEN);
1152
1153 return plus_constant (Pmode, x, low);
1154 }
1155 }
1156
1157
1158 /* Try machine-dependent ways of modifying an illegitimate address
1159 to be legitimate. Return X or the new, valid address. */
1160
1161 static rtx
1162 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1163 machine_mode mode)
1164 {
1165 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1166 return new_x ? new_x : x;
1167 }
1168
1169 /* Return true if ADDR has an effect that depends on the machine mode it
1170 is used for. On the Alpha this is true only for the unaligned modes.
1171 We can simplify the test since we know that the address must be valid. */
1172
1173 static bool
1174 alpha_mode_dependent_address_p (const_rtx addr,
1175 addr_space_t as ATTRIBUTE_UNUSED)
1176 {
1177 return GET_CODE (addr) == AND;
1178 }
1179
1180 /* Primarily this is required for TLS symbols, but given that our move
1181 patterns *ought* to be able to handle any symbol at any time, we
1182 should never be spilling symbolic operands to the constant pool, ever. */
1183
1184 static bool
1185 alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1186 {
1187 enum rtx_code code = GET_CODE (x);
1188 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1189 }
1190
1191 /* We do not allow indirect calls to be optimized into sibling calls, nor
1192 can we allow a call to a function with a different GP to be optimized
1193 into a sibcall. */
1194
1195 static bool
1196 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1197 {
1198 /* Can't do indirect tail calls, since we don't know if the target
1199 uses the same GP. */
1200 if (!decl)
1201 return false;
1202
1203 /* Otherwise, we can make a tail call if the target function shares
1204 the same GP. */
1205 return decl_has_samegp (decl);
1206 }
1207
1208 bool
1209 some_small_symbolic_operand_int (rtx x)
1210 {
1211 subrtx_var_iterator::array_type array;
1212 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1213 {
1214 rtx x = *iter;
1215 /* Don't re-split. */
1216 if (GET_CODE (x) == LO_SUM)
1217 iter.skip_subrtxes ();
1218 else if (small_symbolic_operand (x, Pmode))
1219 return true;
1220 }
1221 return false;
1222 }
1223
1224 rtx
1225 split_small_symbolic_operand (rtx x)
1226 {
1227 x = copy_insn (x);
1228 subrtx_ptr_iterator::array_type array;
1229 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1230 {
1231 rtx *ptr = *iter;
1232 rtx x = *ptr;
1233 /* Don't re-split. */
1234 if (GET_CODE (x) == LO_SUM)
1235 iter.skip_subrtxes ();
1236 else if (small_symbolic_operand (x, Pmode))
1237 {
1238 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1239 iter.skip_subrtxes ();
1240 }
1241 }
1242 return x;
1243 }
1244
1245 /* Indicate that INSN cannot be duplicated. This is true for any insn
1246 that we've marked with gpdisp relocs, since those have to stay in
1247 1-1 correspondence with one another.
1248
1249 Technically we could copy them if we could set up a mapping from one
1250 sequence number to another, across the set of insns to be duplicated.
1251 This seems overly complicated and error-prone since interblock motion
1252 from sched-ebb could move one of the pair of insns to a different block.
1253
1254 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1255 then they'll be in a different block from their ldgp. Which could lead
1256 the bb reorder code to think that it would be ok to copy just the block
1257 containing the call and branch to the block containing the ldgp. */
1258
1259 static bool
1260 alpha_cannot_copy_insn_p (rtx_insn *insn)
1261 {
1262 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1263 return false;
1264 if (recog_memoized (insn) >= 0)
1265 return get_attr_cannot_copy (insn);
1266 else
1267 return false;
1268 }
1269
1270
1271 /* Try a machine-dependent way of reloading an illegitimate address
1272 operand. If we find one, push the reload and return the new rtx. */
1273
1274 rtx
1275 alpha_legitimize_reload_address (rtx x,
1276 machine_mode mode ATTRIBUTE_UNUSED,
1277 int opnum, int type,
1278 int ind_levels ATTRIBUTE_UNUSED)
1279 {
1280 /* We must recognize output that we have already generated ourselves. */
1281 if (GET_CODE (x) == PLUS
1282 && GET_CODE (XEXP (x, 0)) == PLUS
1283 && REG_P (XEXP (XEXP (x, 0), 0))
1284 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1285 && CONST_INT_P (XEXP (x, 1)))
1286 {
1287 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1288 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1289 opnum, (enum reload_type) type);
1290 return x;
1291 }
1292
1293 /* We wish to handle large displacements off a base register by
1294 splitting the addend across an ldah and the mem insn. This
1295 cuts number of extra insns needed from 3 to 1. */
1296 if (GET_CODE (x) == PLUS
1297 && REG_P (XEXP (x, 0))
1298 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1299 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1300 && CONST_INT_P (XEXP (x, 1)))
1301 {
1302 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1303 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1304 HOST_WIDE_INT high
1305 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1306
1307 /* Check for 32-bit overflow. */
1308 if (high + low != val)
1309 return NULL_RTX;
1310
1311 /* Reload the high part into a base reg; leave the low part
1312 in the mem directly. */
1313 x = gen_rtx_PLUS (GET_MODE (x),
1314 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1315 GEN_INT (high)),
1316 GEN_INT (low));
1317
1318 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1319 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1320 opnum, (enum reload_type) type);
1321 return x;
1322 }
1323
1324 return NULL_RTX;
1325 }
1326 \f
1327 /* Return the cost of moving between registers of various classes. Moving
1328 between FLOAT_REGS and anything else except float regs is expensive.
1329 In fact, we make it quite expensive because we really don't want to
1330 do these moves unless it is clearly worth it. Optimizations may
1331 reduce the impact of not being able to allocate a pseudo to a
1332 hard register. */
1333
1334 static int
1335 alpha_register_move_cost (machine_mode /*mode*/,
1336 reg_class_t from, reg_class_t to)
1337 {
1338 if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
1339 return 2;
1340
1341 if (TARGET_FIX)
1342 return (from == FLOAT_REGS) ? 6 : 8;
1343
1344 return 4 + 2 * alpha_memory_latency;
1345 }
1346
1347 /* Return the cost of moving data of MODE from a register to
1348 or from memory. On the Alpha, bump this up a bit. */
1349
1350 static int
1351 alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
1352 bool /*in*/)
1353 {
1354 return 2 * alpha_memory_latency;
1355 }
1356
1357 /* Compute a (partial) cost for rtx X. Return true if the complete
1358 cost has been computed, and false if subexpressions should be
1359 scanned. In either case, *TOTAL contains the cost result. */
1360
1361 static bool
1362 alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
1363 bool speed)
1364 {
1365 int code = GET_CODE (x);
1366 bool float_mode_p = FLOAT_MODE_P (mode);
1367 const struct alpha_rtx_cost_data *cost_data;
1368
1369 if (!speed)
1370 cost_data = &alpha_rtx_cost_size;
1371 else
1372 cost_data = &alpha_rtx_cost_data[alpha_tune];
1373
1374 switch (code)
1375 {
1376 case CONST_INT:
1377 /* If this is an 8-bit constant, return zero since it can be used
1378 nearly anywhere with no cost. If it is a valid operand for an
1379 ADD or AND, likewise return 0 if we know it will be used in that
1380 context. Otherwise, return 2 since it might be used there later.
1381 All other constants take at least two insns. */
1382 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1383 {
1384 *total = 0;
1385 return true;
1386 }
1387 /* FALLTHRU */
1388
1389 case CONST_DOUBLE:
1390 case CONST_WIDE_INT:
1391 if (x == CONST0_RTX (mode))
1392 *total = 0;
1393 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1394 || (outer_code == AND && and_operand (x, VOIDmode)))
1395 *total = 0;
1396 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1397 *total = 2;
1398 else
1399 *total = COSTS_N_INSNS (2);
1400 return true;
1401
1402 case CONST:
1403 case SYMBOL_REF:
1404 case LABEL_REF:
1405 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1406 *total = COSTS_N_INSNS (outer_code != MEM);
1407 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1408 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1409 else if (tls_symbolic_operand_type (x))
1410 /* Estimate of cost for call_pal rduniq. */
1411 /* ??? How many insns do we emit here? More than one... */
1412 *total = COSTS_N_INSNS (15);
1413 else
1414 /* Otherwise we do a load from the GOT. */
1415 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1416 return true;
1417
1418 case HIGH:
1419 /* This is effectively an add_operand. */
1420 *total = 2;
1421 return true;
1422
1423 case PLUS:
1424 case MINUS:
1425 if (float_mode_p)
1426 *total = cost_data->fp_add;
1427 else if (GET_CODE (XEXP (x, 0)) == ASHIFT
1428 && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1429 {
1430 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
1431 (enum rtx_code) outer_code, opno, speed)
1432 + rtx_cost (XEXP (x, 1), mode,
1433 (enum rtx_code) outer_code, opno, speed)
1434 + COSTS_N_INSNS (1));
1435 return true;
1436 }
1437 return false;
1438
1439 case MULT:
1440 if (float_mode_p)
1441 *total = cost_data->fp_mult;
1442 else if (mode == DImode)
1443 *total = cost_data->int_mult_di;
1444 else
1445 *total = cost_data->int_mult_si;
1446 return false;
1447
1448 case ASHIFT:
1449 if (CONST_INT_P (XEXP (x, 1))
1450 && INTVAL (XEXP (x, 1)) <= 3)
1451 {
1452 *total = COSTS_N_INSNS (1);
1453 return false;
1454 }
1455 /* FALLTHRU */
1456
1457 case ASHIFTRT:
1458 case LSHIFTRT:
1459 *total = cost_data->int_shift;
1460 return false;
1461
1462 case IF_THEN_ELSE:
1463 if (float_mode_p)
1464 *total = cost_data->fp_add;
1465 else
1466 *total = cost_data->int_cmov;
1467 return false;
1468
1469 case DIV:
1470 case UDIV:
1471 case MOD:
1472 case UMOD:
1473 if (!float_mode_p)
1474 *total = cost_data->int_div;
1475 else if (mode == SFmode)
1476 *total = cost_data->fp_div_sf;
1477 else
1478 *total = cost_data->fp_div_df;
1479 return false;
1480
1481 case MEM:
1482 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1483 return true;
1484
1485 case NEG:
1486 if (! float_mode_p)
1487 {
1488 *total = COSTS_N_INSNS (1);
1489 return false;
1490 }
1491 /* FALLTHRU */
1492
1493 case ABS:
1494 if (! float_mode_p)
1495 {
1496 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1497 return false;
1498 }
1499 /* FALLTHRU */
1500
1501 case FLOAT:
1502 case UNSIGNED_FLOAT:
1503 case FIX:
1504 case UNSIGNED_FIX:
1505 case FLOAT_TRUNCATE:
1506 *total = cost_data->fp_add;
1507 return false;
1508
1509 case FLOAT_EXTEND:
1510 if (MEM_P (XEXP (x, 0)))
1511 *total = 0;
1512 else
1513 *total = cost_data->fp_add;
1514 return false;
1515
1516 default:
1517 return false;
1518 }
1519 }
1520 \f
1521 /* REF is an alignable memory location. Place an aligned SImode
1522 reference into *PALIGNED_MEM and the number of bits to shift into
1523 *PBITNUM. SCRATCH is a free register for use in reloading out
1524 of range stack slots. */
1525
1526 void
1527 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1528 {
1529 rtx base;
1530 HOST_WIDE_INT disp, offset;
1531
1532 gcc_assert (MEM_P (ref));
1533
1534 if (reload_in_progress)
1535 {
1536 base = find_replacement (&XEXP (ref, 0));
1537 gcc_assert (memory_address_p (GET_MODE (ref), base));
1538 }
1539 else
1540 base = XEXP (ref, 0);
1541
1542 if (GET_CODE (base) == PLUS)
1543 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1544 else
1545 disp = 0;
1546
1547 /* Find the byte offset within an aligned word. If the memory itself is
1548 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1549 will have examined the base register and determined it is aligned, and
1550 thus displacements from it are naturally alignable. */
1551 if (MEM_ALIGN (ref) >= 32)
1552 offset = 0;
1553 else
1554 offset = disp & 3;
1555
1556 /* The location should not cross aligned word boundary. */
1557 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1558 <= GET_MODE_SIZE (SImode));
1559
1560 /* Access the entire aligned word. */
1561 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1562
1563 /* Convert the byte offset within the word to a bit offset. */
1564 offset *= BITS_PER_UNIT;
1565 *pbitnum = GEN_INT (offset);
1566 }
1567
1568 /* Similar, but just get the address. Handle the two reload cases.
1569 Add EXTRA_OFFSET to the address we return. */
1570
1571 rtx
1572 get_unaligned_address (rtx ref)
1573 {
1574 rtx base;
1575 HOST_WIDE_INT offset = 0;
1576
1577 gcc_assert (MEM_P (ref));
1578
1579 if (reload_in_progress)
1580 {
1581 base = find_replacement (&XEXP (ref, 0));
1582 gcc_assert (memory_address_p (GET_MODE (ref), base));
1583 }
1584 else
1585 base = XEXP (ref, 0);
1586
1587 if (GET_CODE (base) == PLUS)
1588 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1589
1590 return plus_constant (Pmode, base, offset);
1591 }
1592
1593 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1594 X is always returned in a register. */
1595
1596 rtx
1597 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1598 {
1599 if (GET_CODE (addr) == PLUS)
1600 {
1601 ofs += INTVAL (XEXP (addr, 1));
1602 addr = XEXP (addr, 0);
1603 }
1604
1605 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1606 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1607 }
1608
1609 /* On the Alpha, all (non-symbolic) constants except zero go into
1610 a floating-point register via memory. Note that we cannot
1611 return anything that is not a subset of RCLASS, and that some
1612 symbolic constants cannot be dropped to memory. */
1613
1614 enum reg_class
1615 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1616 {
1617 /* Zero is present in any register class. */
1618 if (x == CONST0_RTX (GET_MODE (x)))
1619 return rclass;
1620
1621 /* These sorts of constants we can easily drop to memory. */
1622 if (CONST_SCALAR_INT_P (x)
1623 || CONST_DOUBLE_P (x)
1624 || GET_CODE (x) == CONST_VECTOR)
1625 {
1626 if (rclass == FLOAT_REGS)
1627 return NO_REGS;
1628 if (rclass == ALL_REGS)
1629 return GENERAL_REGS;
1630 return rclass;
1631 }
1632
1633 /* All other kinds of constants should not (and in the case of HIGH
1634 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1635 secondary reload. */
1636 if (CONSTANT_P (x))
1637 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1638
1639 return rclass;
1640 }
1641
1642 /* Inform reload about cases where moving X with a mode MODE to a register in
1643 RCLASS requires an extra scratch or immediate register. Return the class
1644 needed for the immediate register. */
1645
1646 static reg_class_t
1647 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1648 machine_mode mode, secondary_reload_info *sri)
1649 {
1650 enum reg_class rclass = (enum reg_class) rclass_i;
1651
1652 /* Loading and storing HImode or QImode values to and from memory
1653 usually requires a scratch register. */
1654 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1655 {
1656 if (any_memory_operand (x, mode))
1657 {
1658 if (in_p)
1659 {
1660 if (!aligned_memory_operand (x, mode))
1661 sri->icode = direct_optab_handler (reload_in_optab, mode);
1662 }
1663 else
1664 sri->icode = direct_optab_handler (reload_out_optab, mode);
1665 return NO_REGS;
1666 }
1667 }
1668
1669 /* We also cannot do integral arithmetic into FP regs, as might result
1670 from register elimination into a DImode fp register. */
1671 if (rclass == FLOAT_REGS)
1672 {
1673 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1674 return GENERAL_REGS;
1675 if (in_p && INTEGRAL_MODE_P (mode)
1676 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1677 return GENERAL_REGS;
1678 }
1679
1680 return NO_REGS;
1681 }
1682
1683 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
1684
1685 If we are copying between general and FP registers, we need a memory
1686 location unless the FIX extension is available. */
1687
1688 static bool
1689 alpha_secondary_memory_needed (machine_mode, reg_class_t class1,
1690 reg_class_t class2)
1691 {
1692 return (!TARGET_FIX
1693 && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
1694 || (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
1695 }
1696
1697 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is
1698 floating-point, use it. Otherwise, widen to a word like the default.
1699 This is needed because we always store integers in FP registers in
1700 quadword format. This whole area is very tricky! */
1701
1702 static machine_mode
1703 alpha_secondary_memory_needed_mode (machine_mode mode)
1704 {
1705 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1706 return mode;
1707 if (GET_MODE_SIZE (mode) >= 4)
1708 return mode;
1709 return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
1710 }
1711 \f
1712 /* Given SEQ, which is an INSN list, look for any MEMs in either
1713 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1714 volatile flags from REF into each of the MEMs found. If REF is not
1715 a MEM, don't do anything. */
1716
1717 void
1718 alpha_set_memflags (rtx seq, rtx ref)
1719 {
1720 rtx_insn *insn;
1721
1722 if (!MEM_P (ref))
1723 return;
1724
1725 /* This is only called from alpha.md, after having had something
1726 generated from one of the insn patterns. So if everything is
1727 zero, the pattern is already up-to-date. */
1728 if (!MEM_VOLATILE_P (ref)
1729 && !MEM_NOTRAP_P (ref)
1730 && !MEM_READONLY_P (ref))
1731 return;
1732
1733 subrtx_var_iterator::array_type array;
1734 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1735 if (INSN_P (insn))
1736 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1737 {
1738 rtx x = *iter;
1739 if (MEM_P (x))
1740 {
1741 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1742 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1743 MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1744 /* Sadly, we cannot use alias sets because the extra
1745 aliasing produced by the AND interferes. Given that
1746 two-byte quantities are the only thing we would be
1747 able to differentiate anyway, there does not seem to
1748 be any point in convoluting the early out of the
1749 alias check. */
1750 iter.skip_subrtxes ();
1751 }
1752 }
1753 else
1754 gcc_unreachable ();
1755 }
1756 \f
1757 static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1758 int, bool);
1759
1760 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1761 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1762 and return pc_rtx if successful. */
1763
1764 static rtx
1765 alpha_emit_set_const_1 (rtx target, machine_mode mode,
1766 HOST_WIDE_INT c, int n, bool no_output)
1767 {
1768 HOST_WIDE_INT new_const;
1769 int i, bits;
1770 /* Use a pseudo if highly optimizing and still generating RTL. */
1771 rtx subtarget
1772 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1773 rtx temp, insn;
1774
1775 /* If this is a sign-extended 32-bit constant, we can do this in at most
1776 three insns, so do it if we have enough insns left. */
1777
1778 if (c >> 31 == -1 || c >> 31 == 0)
1779 {
1780 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1781 HOST_WIDE_INT tmp1 = c - low;
1782 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1783 HOST_WIDE_INT extra = 0;
1784
1785 /* If HIGH will be interpreted as negative but the constant is
1786 positive, we must adjust it to do two ldha insns. */
1787
1788 if ((high & 0x8000) != 0 && c >= 0)
1789 {
1790 extra = 0x4000;
1791 tmp1 -= 0x40000000;
1792 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1793 }
1794
1795 if (c == low || (low == 0 && extra == 0))
1796 {
1797 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1798 but that meant that we can't handle INT_MIN on 32-bit machines
1799 (like NT/Alpha), because we recurse indefinitely through
1800 emit_move_insn to gen_movdi. So instead, since we know exactly
1801 what we want, create it explicitly. */
1802
1803 if (no_output)
1804 return pc_rtx;
1805 if (target == NULL)
1806 target = gen_reg_rtx (mode);
1807 emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1808 return target;
1809 }
1810 else if (n >= 2 + (extra != 0))
1811 {
1812 if (no_output)
1813 return pc_rtx;
1814 if (!can_create_pseudo_p ())
1815 {
1816 emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1817 temp = target;
1818 }
1819 else
1820 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1821 subtarget, mode);
1822
1823 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1824 This means that if we go through expand_binop, we'll try to
1825 generate extensions, etc, which will require new pseudos, which
1826 will fail during some split phases. The SImode add patterns
1827 still exist, but are not named. So build the insns by hand. */
1828
1829 if (extra != 0)
1830 {
1831 if (! subtarget)
1832 subtarget = gen_reg_rtx (mode);
1833 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1834 insn = gen_rtx_SET (subtarget, insn);
1835 emit_insn (insn);
1836 temp = subtarget;
1837 }
1838
1839 if (target == NULL)
1840 target = gen_reg_rtx (mode);
1841 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1842 insn = gen_rtx_SET (target, insn);
1843 emit_insn (insn);
1844 return target;
1845 }
1846 }
1847
1848 /* If we couldn't do it that way, try some other methods. But if we have
1849 no instructions left, don't bother. Likewise, if this is SImode and
1850 we can't make pseudos, we can't do anything since the expand_binop
1851 and expand_unop calls will widen and try to make pseudos. */
1852
1853 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1854 return 0;
1855
1856 /* Next, see if we can load a related constant and then shift and possibly
1857 negate it to get the constant we want. Try this once each increasing
1858 numbers of insns. */
1859
1860 for (i = 1; i < n; i++)
1861 {
1862 /* First, see if minus some low bits, we've an easy load of
1863 high bits. */
1864
1865 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1866 if (new_const != 0)
1867 {
1868 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1869 if (temp)
1870 {
1871 if (no_output)
1872 return temp;
1873 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1874 target, 0, OPTAB_WIDEN);
1875 }
1876 }
1877
1878 /* Next try complementing. */
1879 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1880 if (temp)
1881 {
1882 if (no_output)
1883 return temp;
1884 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1885 }
1886
1887 /* Next try to form a constant and do a left shift. We can do this
1888 if some low-order bits are zero; the exact_log2 call below tells
1889 us that information. The bits we are shifting out could be any
1890 value, but here we'll just try the 0- and sign-extended forms of
1891 the constant. To try to increase the chance of having the same
1892 constant in more than one insn, start at the highest number of
1893 bits to shift, but try all possibilities in case a ZAPNOT will
1894 be useful. */
1895
1896 bits = exact_log2 (c & -c);
1897 if (bits > 0)
1898 for (; bits > 0; bits--)
1899 {
1900 new_const = c >> bits;
1901 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1902 if (!temp && c < 0)
1903 {
1904 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1905 temp = alpha_emit_set_const (subtarget, mode, new_const,
1906 i, no_output);
1907 }
1908 if (temp)
1909 {
1910 if (no_output)
1911 return temp;
1912 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1913 target, 0, OPTAB_WIDEN);
1914 }
1915 }
1916
1917 /* Now try high-order zero bits. Here we try the shifted-in bits as
1918 all zero and all ones. Be careful to avoid shifting outside the
1919 mode and to avoid shifting outside the host wide int size. */
1920
1921 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1922 - floor_log2 (c) - 1);
1923 if (bits > 0)
1924 for (; bits > 0; bits--)
1925 {
1926 new_const = c << bits;
1927 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1928 if (!temp)
1929 {
1930 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1931 temp = alpha_emit_set_const (subtarget, mode, new_const,
1932 i, no_output);
1933 }
1934 if (temp)
1935 {
1936 if (no_output)
1937 return temp;
1938 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1939 target, 1, OPTAB_WIDEN);
1940 }
1941 }
1942
1943 /* Now try high-order 1 bits. We get that with a sign-extension.
1944 But one bit isn't enough here. Be careful to avoid shifting outside
1945 the mode and to avoid shifting outside the host wide int size. */
1946
1947 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1948 - floor_log2 (~ c) - 2);
1949 if (bits > 0)
1950 for (; bits > 0; bits--)
1951 {
1952 new_const = c << bits;
1953 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1954 if (!temp)
1955 {
1956 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1957 temp = alpha_emit_set_const (subtarget, mode, new_const,
1958 i, no_output);
1959 }
1960 if (temp)
1961 {
1962 if (no_output)
1963 return temp;
1964 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1965 target, 0, OPTAB_WIDEN);
1966 }
1967 }
1968 }
1969
1970 /* Finally, see if can load a value into the target that is the same as the
1971 constant except that all bytes that are 0 are changed to be 0xff. If we
1972 can, then we can do a ZAPNOT to obtain the desired constant. */
1973
1974 new_const = c;
1975 for (i = 0; i < 64; i += 8)
1976 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1977 new_const |= (HOST_WIDE_INT) 0xff << i;
1978
1979 /* We are only called for SImode and DImode. If this is SImode, ensure that
1980 we are sign extended to a full word. */
1981
1982 if (mode == SImode)
1983 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1984
1985 if (new_const != c)
1986 {
1987 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1988 if (temp)
1989 {
1990 if (no_output)
1991 return temp;
1992 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1993 target, 0, OPTAB_WIDEN);
1994 }
1995 }
1996
1997 return 0;
1998 }
1999
2000 /* Try to output insns to set TARGET equal to the constant C if it can be
2001 done in less than N insns. Do all computations in MODE. Returns the place
2002 where the output has been placed if it can be done and the insns have been
2003 emitted. If it would take more than N insns, zero is returned and no
2004 insns and emitted. */
2005
2006 static rtx
2007 alpha_emit_set_const (rtx target, machine_mode mode,
2008 HOST_WIDE_INT c, int n, bool no_output)
2009 {
2010 machine_mode orig_mode = mode;
2011 rtx orig_target = target;
2012 rtx result = 0;
2013 int i;
2014
2015 /* If we can't make any pseudos, TARGET is an SImode hard register, we
2016 can't load this constant in one insn, do this in DImode. */
2017 if (!can_create_pseudo_p () && mode == SImode
2018 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
2019 {
2020 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2021 if (result)
2022 return result;
2023
2024 target = no_output ? NULL : gen_lowpart (DImode, target);
2025 mode = DImode;
2026 }
2027 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2028 {
2029 target = no_output ? NULL : gen_lowpart (DImode, target);
2030 mode = DImode;
2031 }
2032
2033 /* Try 1 insn, then 2, then up to N. */
2034 for (i = 1; i <= n; i++)
2035 {
2036 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2037 if (result)
2038 {
2039 rtx_insn *insn;
2040 rtx set;
2041
2042 if (no_output)
2043 return result;
2044
2045 insn = get_last_insn ();
2046 set = single_set (insn);
2047 if (! CONSTANT_P (SET_SRC (set)))
2048 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2049 break;
2050 }
2051 }
2052
2053 /* Allow for the case where we changed the mode of TARGET. */
2054 if (result)
2055 {
2056 if (result == target)
2057 result = orig_target;
2058 else if (mode != orig_mode)
2059 result = gen_lowpart (orig_mode, result);
2060 }
2061
2062 return result;
2063 }
2064
2065 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2066 fall back to a straight forward decomposition. We do this to avoid
2067 exponential run times encountered when looking for longer sequences
2068 with alpha_emit_set_const. */
2069
2070 static rtx
2071 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2072 {
2073 HOST_WIDE_INT d1, d2, d3, d4;
2074 machine_mode mode = GET_MODE (target);
2075 rtx orig_target = target;
2076
2077 /* Decompose the entire word */
2078
2079 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2080 c1 -= d1;
2081 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2082 c1 = (c1 - d2) >> 32;
2083 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2084 c1 -= d3;
2085 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2086 gcc_assert (c1 == d4);
2087
2088 if (mode != DImode)
2089 target = gen_lowpart (DImode, target);
2090
2091 /* Construct the high word */
2092 if (d4)
2093 {
2094 emit_move_insn (target, GEN_INT (d4));
2095 if (d3)
2096 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2097 }
2098 else
2099 emit_move_insn (target, GEN_INT (d3));
2100
2101 /* Shift it into place */
2102 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2103
2104 /* Add in the low bits. */
2105 if (d2)
2106 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2107 if (d1)
2108 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2109
2110 return orig_target;
2111 }
2112
2113 /* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */
2114
2115 static HOST_WIDE_INT
2116 alpha_extract_integer (rtx x)
2117 {
2118 if (GET_CODE (x) == CONST_VECTOR)
2119 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2120
2121 gcc_assert (CONST_INT_P (x));
2122
2123 return INTVAL (x);
2124 }
2125
2126 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2127 we are willing to load the value into a register via a move pattern.
2128 Normally this is all symbolic constants, integral constants that
2129 take three or fewer instructions, and floating-point zero. */
2130
2131 bool
2132 alpha_legitimate_constant_p (machine_mode mode, rtx x)
2133 {
2134 HOST_WIDE_INT i0;
2135
2136 switch (GET_CODE (x))
2137 {
2138 case LABEL_REF:
2139 case HIGH:
2140 return true;
2141
2142 case CONST:
2143 if (GET_CODE (XEXP (x, 0)) == PLUS
2144 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2145 x = XEXP (XEXP (x, 0), 0);
2146 else
2147 return true;
2148
2149 if (GET_CODE (x) != SYMBOL_REF)
2150 return true;
2151 /* FALLTHRU */
2152
2153 case SYMBOL_REF:
2154 /* TLS symbols are never valid. */
2155 return SYMBOL_REF_TLS_MODEL (x) == 0;
2156
2157 case CONST_WIDE_INT:
2158 if (TARGET_BUILD_CONSTANTS)
2159 return true;
2160 if (x == CONST0_RTX (mode))
2161 return true;
2162 mode = DImode;
2163 gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2164 i0 = CONST_WIDE_INT_ELT (x, 1);
2165 if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2166 return false;
2167 i0 = CONST_WIDE_INT_ELT (x, 0);
2168 goto do_integer;
2169
2170 case CONST_DOUBLE:
2171 if (x == CONST0_RTX (mode))
2172 return true;
2173 return false;
2174
2175 case CONST_VECTOR:
2176 if (x == CONST0_RTX (mode))
2177 return true;
2178 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2179 return false;
2180 if (GET_MODE_SIZE (mode) != 8)
2181 return false;
2182 /* FALLTHRU */
2183
2184 case CONST_INT:
2185 if (TARGET_BUILD_CONSTANTS)
2186 return true;
2187 i0 = alpha_extract_integer (x);
2188 do_integer:
2189 return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2190
2191 default:
2192 return false;
2193 }
2194 }
2195
2196 /* Operand 1 is known to be a constant, and should require more than one
2197 instruction to load. Emit that multi-part load. */
2198
2199 bool
2200 alpha_split_const_mov (machine_mode mode, rtx *operands)
2201 {
2202 HOST_WIDE_INT i0;
2203 rtx temp = NULL_RTX;
2204
2205 i0 = alpha_extract_integer (operands[1]);
2206
2207 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2208
2209 if (!temp && TARGET_BUILD_CONSTANTS)
2210 temp = alpha_emit_set_long_const (operands[0], i0);
2211
2212 if (temp)
2213 {
2214 if (!rtx_equal_p (operands[0], temp))
2215 emit_move_insn (operands[0], temp);
2216 return true;
2217 }
2218
2219 return false;
2220 }
2221
2222 /* Expand a move instruction; return true if all work is done.
2223 We don't handle non-bwx subword loads here. */
2224
2225 bool
2226 alpha_expand_mov (machine_mode mode, rtx *operands)
2227 {
2228 rtx tmp;
2229
2230 /* If the output is not a register, the input must be. */
2231 if (MEM_P (operands[0])
2232 && ! reg_or_0_operand (operands[1], mode))
2233 operands[1] = force_reg (mode, operands[1]);
2234
2235 /* Allow legitimize_address to perform some simplifications. */
2236 if (mode == Pmode && symbolic_operand (operands[1], mode))
2237 {
2238 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2239 if (tmp)
2240 {
2241 if (tmp == operands[0])
2242 return true;
2243 operands[1] = tmp;
2244 return false;
2245 }
2246 }
2247
2248 /* Early out for non-constants and valid constants. */
2249 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2250 return false;
2251
2252 /* Split large integers. */
2253 if (CONST_INT_P (operands[1])
2254 || GET_CODE (operands[1]) == CONST_VECTOR)
2255 {
2256 if (alpha_split_const_mov (mode, operands))
2257 return true;
2258 }
2259
2260 /* Otherwise we've nothing left but to drop the thing to memory. */
2261 tmp = force_const_mem (mode, operands[1]);
2262
2263 if (tmp == NULL_RTX)
2264 return false;
2265
2266 if (reload_in_progress)
2267 {
2268 emit_move_insn (operands[0], XEXP (tmp, 0));
2269 operands[1] = replace_equiv_address (tmp, operands[0]);
2270 }
2271 else
2272 operands[1] = validize_mem (tmp);
2273 return false;
2274 }
2275
2276 /* Expand a non-bwx QImode or HImode move instruction;
2277 return true if all work is done. */
2278
2279 bool
2280 alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2281 {
2282 rtx seq;
2283
2284 /* If the output is not a register, the input must be. */
2285 if (MEM_P (operands[0]))
2286 operands[1] = force_reg (mode, operands[1]);
2287
2288 /* Handle four memory cases, unaligned and aligned for either the input
2289 or the output. The only case where we can be called during reload is
2290 for aligned loads; all other cases require temporaries. */
2291
2292 if (any_memory_operand (operands[1], mode))
2293 {
2294 if (aligned_memory_operand (operands[1], mode))
2295 {
2296 if (reload_in_progress)
2297 {
2298 seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
2299 emit_insn (seq);
2300 }
2301 else
2302 {
2303 rtx aligned_mem, bitnum;
2304 rtx scratch = gen_reg_rtx (SImode);
2305 rtx subtarget;
2306 bool copyout;
2307
2308 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2309
2310 subtarget = operands[0];
2311 if (REG_P (subtarget))
2312 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2313 else
2314 subtarget = gen_reg_rtx (DImode), copyout = true;
2315
2316 if (mode == QImode)
2317 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2318 bitnum, scratch);
2319 else
2320 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2321 bitnum, scratch);
2322 emit_insn (seq);
2323
2324 if (copyout)
2325 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2326 }
2327 }
2328 else
2329 {
2330 /* Don't pass these as parameters since that makes the generated
2331 code depend on parameter evaluation order which will cause
2332 bootstrap failures. */
2333
2334 rtx temp1, temp2, subtarget, ua;
2335 bool copyout;
2336
2337 temp1 = gen_reg_rtx (DImode);
2338 temp2 = gen_reg_rtx (DImode);
2339
2340 subtarget = operands[0];
2341 if (REG_P (subtarget))
2342 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2343 else
2344 subtarget = gen_reg_rtx (DImode), copyout = true;
2345
2346 ua = get_unaligned_address (operands[1]);
2347 if (mode == QImode)
2348 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2349 else
2350 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2351
2352 alpha_set_memflags (seq, operands[1]);
2353 emit_insn (seq);
2354
2355 if (copyout)
2356 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2357 }
2358 return true;
2359 }
2360
2361 if (any_memory_operand (operands[0], mode))
2362 {
2363 if (aligned_memory_operand (operands[0], mode))
2364 {
2365 rtx aligned_mem, bitnum;
2366 rtx temp1 = gen_reg_rtx (SImode);
2367 rtx temp2 = gen_reg_rtx (SImode);
2368
2369 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2370
2371 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2372 temp1, temp2));
2373 }
2374 else
2375 {
2376 rtx temp1 = gen_reg_rtx (DImode);
2377 rtx temp2 = gen_reg_rtx (DImode);
2378 rtx temp3 = gen_reg_rtx (DImode);
2379 rtx ua = get_unaligned_address (operands[0]);
2380
2381 seq = gen_unaligned_store
2382 (mode, ua, operands[1], temp1, temp2, temp3);
2383
2384 alpha_set_memflags (seq, operands[0]);
2385 emit_insn (seq);
2386 }
2387 return true;
2388 }
2389
2390 return false;
2391 }
2392
2393 /* Implement the movmisalign patterns. One of the operands is a memory
2394 that is not naturally aligned. Emit instructions to load it. */
2395
2396 void
2397 alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2398 {
2399 /* Honor misaligned loads, for those we promised to do so. */
2400 if (MEM_P (operands[1]))
2401 {
2402 rtx tmp;
2403
2404 if (register_operand (operands[0], mode))
2405 tmp = operands[0];
2406 else
2407 tmp = gen_reg_rtx (mode);
2408
2409 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2410 if (tmp != operands[0])
2411 emit_move_insn (operands[0], tmp);
2412 }
2413 else if (MEM_P (operands[0]))
2414 {
2415 if (!reg_or_0_operand (operands[1], mode))
2416 operands[1] = force_reg (mode, operands[1]);
2417 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2418 }
2419 else
2420 gcc_unreachable ();
2421 }
2422
2423 /* Generate an unsigned DImode to FP conversion. This is the same code
2424 optabs would emit if we didn't have TFmode patterns.
2425
2426 For SFmode, this is the only construction I've found that can pass
2427 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2428 intermediates will work, because you'll get intermediate rounding
2429 that ruins the end result. Some of this could be fixed by turning
2430 on round-to-positive-infinity, but that requires diddling the fpsr,
2431 which kills performance. I tried turning this around and converting
2432 to a negative number, so that I could turn on /m, but either I did
2433 it wrong or there's something else cause I wound up with the exact
2434 same single-bit error. There is a branch-less form of this same code:
2435
2436 srl $16,1,$1
2437 and $16,1,$2
2438 cmplt $16,0,$3
2439 or $1,$2,$2
2440 cmovge $16,$16,$2
2441 itoft $3,$f10
2442 itoft $2,$f11
2443 cvtqs $f11,$f11
2444 adds $f11,$f11,$f0
2445 fcmoveq $f10,$f11,$f0
2446
2447 I'm not using it because it's the same number of instructions as
2448 this branch-full form, and it has more serialized long latency
2449 instructions on the critical path.
2450
2451 For DFmode, we can avoid rounding errors by breaking up the word
2452 into two pieces, converting them separately, and adding them back:
2453
2454 LC0: .long 0,0x5f800000
2455
2456 itoft $16,$f11
2457 lda $2,LC0
2458 cmplt $16,0,$1
2459 cpyse $f11,$f31,$f10
2460 cpyse $f31,$f11,$f11
2461 s4addq $1,$2,$1
2462 lds $f12,0($1)
2463 cvtqt $f10,$f10
2464 cvtqt $f11,$f11
2465 addt $f12,$f10,$f0
2466 addt $f0,$f11,$f0
2467
2468 This doesn't seem to be a clear-cut win over the optabs form.
2469 It probably all depends on the distribution of numbers being
2470 converted -- in the optabs form, all but high-bit-set has a
2471 much lower minimum execution time. */
2472
2473 void
2474 alpha_emit_floatuns (rtx operands[2])
2475 {
2476 rtx neglab, donelab, i0, i1, f0, in, out;
2477 machine_mode mode;
2478
2479 out = operands[0];
2480 in = force_reg (DImode, operands[1]);
2481 mode = GET_MODE (out);
2482 neglab = gen_label_rtx ();
2483 donelab = gen_label_rtx ();
2484 i0 = gen_reg_rtx (DImode);
2485 i1 = gen_reg_rtx (DImode);
2486 f0 = gen_reg_rtx (mode);
2487
2488 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2489
2490 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2491 emit_jump_insn (gen_jump (donelab));
2492 emit_barrier ();
2493
2494 emit_label (neglab);
2495
2496 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2497 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2498 emit_insn (gen_iordi3 (i0, i0, i1));
2499 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2500 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2501
2502 emit_label (donelab);
2503 }
2504
2505 /* Generate the comparison for a conditional branch. */
2506
2507 void
2508 alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2509 {
2510 enum rtx_code cmp_code, branch_code;
2511 machine_mode branch_mode = VOIDmode;
2512 enum rtx_code code = GET_CODE (operands[0]);
2513 rtx op0 = operands[1], op1 = operands[2];
2514 rtx tem;
2515
2516 if (cmp_mode == TFmode)
2517 {
2518 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2519 op1 = const0_rtx;
2520 cmp_mode = DImode;
2521 }
2522
2523 /* The general case: fold the comparison code to the types of compares
2524 that we have, choosing the branch as necessary. */
2525 switch (code)
2526 {
2527 case EQ: case LE: case LT: case LEU: case LTU:
2528 case UNORDERED:
2529 /* We have these compares. */
2530 cmp_code = code, branch_code = NE;
2531 break;
2532
2533 case NE:
2534 case ORDERED:
2535 /* These must be reversed. */
2536 cmp_code = reverse_condition (code), branch_code = EQ;
2537 break;
2538
2539 case GE: case GT: case GEU: case GTU:
2540 /* For FP, we swap them, for INT, we reverse them. */
2541 if (cmp_mode == DFmode)
2542 {
2543 cmp_code = swap_condition (code);
2544 branch_code = NE;
2545 std::swap (op0, op1);
2546 }
2547 else
2548 {
2549 cmp_code = reverse_condition (code);
2550 branch_code = EQ;
2551 }
2552 break;
2553
2554 default:
2555 gcc_unreachable ();
2556 }
2557
2558 if (cmp_mode == DFmode)
2559 {
2560 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2561 {
2562 /* When we are not as concerned about non-finite values, and we
2563 are comparing against zero, we can branch directly. */
2564 if (op1 == CONST0_RTX (DFmode))
2565 cmp_code = UNKNOWN, branch_code = code;
2566 else if (op0 == CONST0_RTX (DFmode))
2567 {
2568 /* Undo the swap we probably did just above. */
2569 std::swap (op0, op1);
2570 branch_code = swap_condition (cmp_code);
2571 cmp_code = UNKNOWN;
2572 }
2573 }
2574 else
2575 {
2576 /* ??? We mark the branch mode to be CCmode to prevent the
2577 compare and branch from being combined, since the compare
2578 insn follows IEEE rules that the branch does not. */
2579 branch_mode = CCmode;
2580 }
2581 }
2582 else
2583 {
2584 /* The following optimizations are only for signed compares. */
2585 if (code != LEU && code != LTU && code != GEU && code != GTU)
2586 {
2587 /* Whee. Compare and branch against 0 directly. */
2588 if (op1 == const0_rtx)
2589 cmp_code = UNKNOWN, branch_code = code;
2590
2591 /* If the constants doesn't fit into an immediate, but can
2592 be generated by lda/ldah, we adjust the argument and
2593 compare against zero, so we can use beq/bne directly. */
2594 /* ??? Don't do this when comparing against symbols, otherwise
2595 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2596 be declared false out of hand (at least for non-weak). */
2597 else if (CONST_INT_P (op1)
2598 && (code == EQ || code == NE)
2599 && !(symbolic_operand (op0, VOIDmode)
2600 || (REG_P (op0) && REG_POINTER (op0))))
2601 {
2602 rtx n_op1 = GEN_INT (-INTVAL (op1));
2603
2604 if (! satisfies_constraint_I (op1)
2605 && (satisfies_constraint_K (n_op1)
2606 || satisfies_constraint_L (n_op1)))
2607 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2608 }
2609 }
2610
2611 if (!reg_or_0_operand (op0, DImode))
2612 op0 = force_reg (DImode, op0);
2613 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2614 op1 = force_reg (DImode, op1);
2615 }
2616
2617 /* Emit an initial compare instruction, if necessary. */
2618 tem = op0;
2619 if (cmp_code != UNKNOWN)
2620 {
2621 tem = gen_reg_rtx (cmp_mode);
2622 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2623 }
2624
2625 /* Emit the branch instruction. */
2626 tem = gen_rtx_SET (pc_rtx,
2627 gen_rtx_IF_THEN_ELSE (VOIDmode,
2628 gen_rtx_fmt_ee (branch_code,
2629 branch_mode, tem,
2630 CONST0_RTX (cmp_mode)),
2631 gen_rtx_LABEL_REF (VOIDmode,
2632 operands[3]),
2633 pc_rtx));
2634 emit_jump_insn (tem);
2635 }
2636
2637 /* Certain simplifications can be done to make invalid setcc operations
2638 valid. Return the final comparison, or NULL if we can't work. */
2639
2640 bool
2641 alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2642 {
2643 enum rtx_code cmp_code;
2644 enum rtx_code code = GET_CODE (operands[1]);
2645 rtx op0 = operands[2], op1 = operands[3];
2646 rtx tmp;
2647
2648 if (cmp_mode == TFmode)
2649 {
2650 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2651 op1 = const0_rtx;
2652 cmp_mode = DImode;
2653 }
2654
2655 if (cmp_mode == DFmode && !TARGET_FIX)
2656 return 0;
2657
2658 /* The general case: fold the comparison code to the types of compares
2659 that we have, choosing the branch as necessary. */
2660
2661 cmp_code = UNKNOWN;
2662 switch (code)
2663 {
2664 case EQ: case LE: case LT: case LEU: case LTU:
2665 case UNORDERED:
2666 /* We have these compares. */
2667 if (cmp_mode == DFmode)
2668 cmp_code = code, code = NE;
2669 break;
2670
2671 case NE:
2672 if (cmp_mode == DImode && op1 == const0_rtx)
2673 break;
2674 /* FALLTHRU */
2675
2676 case ORDERED:
2677 cmp_code = reverse_condition (code);
2678 code = EQ;
2679 break;
2680
2681 case GE: case GT: case GEU: case GTU:
2682 /* These normally need swapping, but for integer zero we have
2683 special patterns that recognize swapped operands. */
2684 if (cmp_mode == DImode && op1 == const0_rtx)
2685 break;
2686 code = swap_condition (code);
2687 if (cmp_mode == DFmode)
2688 cmp_code = code, code = NE;
2689 std::swap (op0, op1);
2690 break;
2691
2692 default:
2693 gcc_unreachable ();
2694 }
2695
2696 if (cmp_mode == DImode)
2697 {
2698 if (!register_operand (op0, DImode))
2699 op0 = force_reg (DImode, op0);
2700 if (!reg_or_8bit_operand (op1, DImode))
2701 op1 = force_reg (DImode, op1);
2702 }
2703
2704 /* Emit an initial compare instruction, if necessary. */
2705 if (cmp_code != UNKNOWN)
2706 {
2707 tmp = gen_reg_rtx (cmp_mode);
2708 emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2709 op0, op1)));
2710
2711 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2712 op1 = const0_rtx;
2713 }
2714
2715 /* Emit the setcc instruction. */
2716 emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2717 op0, op1)));
2718 return true;
2719 }
2720
2721
2722 /* Rewrite a comparison against zero CMP of the form
2723 (CODE (cc0) (const_int 0)) so it can be written validly in
2724 a conditional move (if_then_else CMP ...).
2725 If both of the operands that set cc0 are nonzero we must emit
2726 an insn to perform the compare (it can't be done within
2727 the conditional move). */
2728
2729 rtx
2730 alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2731 {
2732 enum rtx_code code = GET_CODE (cmp);
2733 enum rtx_code cmov_code = NE;
2734 rtx op0 = XEXP (cmp, 0);
2735 rtx op1 = XEXP (cmp, 1);
2736 machine_mode cmp_mode
2737 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2738 machine_mode cmov_mode = VOIDmode;
2739 int local_fast_math = flag_unsafe_math_optimizations;
2740 rtx tem;
2741
2742 if (cmp_mode == TFmode)
2743 {
2744 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2745 op1 = const0_rtx;
2746 cmp_mode = DImode;
2747 }
2748
2749 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2750
2751 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2752 {
2753 enum rtx_code cmp_code;
2754
2755 if (! TARGET_FIX)
2756 return 0;
2757
2758 /* If we have fp<->int register move instructions, do a cmov by
2759 performing the comparison in fp registers, and move the
2760 zero/nonzero value to integer registers, where we can then
2761 use a normal cmov, or vice-versa. */
2762
2763 switch (code)
2764 {
2765 case EQ: case LE: case LT: case LEU: case LTU:
2766 case UNORDERED:
2767 /* We have these compares. */
2768 cmp_code = code, code = NE;
2769 break;
2770
2771 case NE:
2772 case ORDERED:
2773 /* These must be reversed. */
2774 cmp_code = reverse_condition (code), code = EQ;
2775 break;
2776
2777 case GE: case GT: case GEU: case GTU:
2778 /* These normally need swapping, but for integer zero we have
2779 special patterns that recognize swapped operands. */
2780 if (cmp_mode == DImode && op1 == const0_rtx)
2781 cmp_code = code, code = NE;
2782 else
2783 {
2784 cmp_code = swap_condition (code);
2785 code = NE;
2786 std::swap (op0, op1);
2787 }
2788 break;
2789
2790 default:
2791 gcc_unreachable ();
2792 }
2793
2794 if (cmp_mode == DImode)
2795 {
2796 if (!reg_or_0_operand (op0, DImode))
2797 op0 = force_reg (DImode, op0);
2798 if (!reg_or_8bit_operand (op1, DImode))
2799 op1 = force_reg (DImode, op1);
2800 }
2801
2802 tem = gen_reg_rtx (cmp_mode);
2803 emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2804 op0, op1)));
2805
2806 cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
2807 op0 = gen_lowpart (cmp_mode, tem);
2808 op1 = CONST0_RTX (cmp_mode);
2809 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2810 local_fast_math = 1;
2811 }
2812
2813 if (cmp_mode == DImode)
2814 {
2815 if (!reg_or_0_operand (op0, DImode))
2816 op0 = force_reg (DImode, op0);
2817 if (!reg_or_8bit_operand (op1, DImode))
2818 op1 = force_reg (DImode, op1);
2819 }
2820
2821 /* We may be able to use a conditional move directly.
2822 This avoids emitting spurious compares. */
2823 if (signed_comparison_operator (cmp, VOIDmode)
2824 && (cmp_mode == DImode || local_fast_math)
2825 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2826 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2827
2828 /* We can't put the comparison inside the conditional move;
2829 emit a compare instruction and put that inside the
2830 conditional move. Make sure we emit only comparisons we have;
2831 swap or reverse as necessary. */
2832
2833 if (!can_create_pseudo_p ())
2834 return NULL_RTX;
2835
2836 switch (code)
2837 {
2838 case EQ: case LE: case LT: case LEU: case LTU:
2839 case UNORDERED:
2840 /* We have these compares: */
2841 break;
2842
2843 case NE:
2844 case ORDERED:
2845 /* These must be reversed. */
2846 code = reverse_condition (code);
2847 cmov_code = EQ;
2848 break;
2849
2850 case GE: case GT: case GEU: case GTU:
2851 /* These normally need swapping, but for integer zero we have
2852 special patterns that recognize swapped operands. */
2853 if (cmp_mode == DImode && op1 == const0_rtx)
2854 break;
2855 code = swap_condition (code);
2856 std::swap (op0, op1);
2857 break;
2858
2859 default:
2860 gcc_unreachable ();
2861 }
2862
2863 if (cmp_mode == DImode)
2864 {
2865 if (!reg_or_0_operand (op0, DImode))
2866 op0 = force_reg (DImode, op0);
2867 if (!reg_or_8bit_operand (op1, DImode))
2868 op1 = force_reg (DImode, op1);
2869 }
2870
2871 /* ??? We mark the branch mode to be CCmode to prevent the compare
2872 and cmov from being combined, since the compare insn follows IEEE
2873 rules that the cmov does not. */
2874 if (cmp_mode == DFmode && !local_fast_math)
2875 cmov_mode = CCmode;
2876
2877 tem = gen_reg_rtx (cmp_mode);
2878 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2879 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2880 }
2881
2882 /* Simplify a conditional move of two constants into a setcc with
2883 arithmetic. This is done with a splitter since combine would
2884 just undo the work if done during code generation. It also catches
2885 cases we wouldn't have before cse. */
2886
2887 int
2888 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2889 rtx t_rtx, rtx f_rtx)
2890 {
2891 HOST_WIDE_INT t, f, diff;
2892 machine_mode mode;
2893 rtx target, subtarget, tmp;
2894
2895 mode = GET_MODE (dest);
2896 t = INTVAL (t_rtx);
2897 f = INTVAL (f_rtx);
2898 diff = t - f;
2899
2900 if (((code == NE || code == EQ) && diff < 0)
2901 || (code == GE || code == GT))
2902 {
2903 code = reverse_condition (code);
2904 std::swap (t, f);
2905 diff = -diff;
2906 }
2907
2908 subtarget = target = dest;
2909 if (mode != DImode)
2910 {
2911 target = gen_lowpart (DImode, dest);
2912 if (can_create_pseudo_p ())
2913 subtarget = gen_reg_rtx (DImode);
2914 else
2915 subtarget = target;
2916 }
2917 /* Below, we must be careful to use copy_rtx on target and subtarget
2918 in intermediate insns, as they may be a subreg rtx, which may not
2919 be shared. */
2920
2921 if (f == 0 && exact_log2 (diff) > 0
2922 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2923 viable over a longer latency cmove. On EV5, the E0 slot is a
2924 scarce resource, and on EV4 shift has the same latency as a cmove. */
2925 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2926 {
2927 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2928 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2929
2930 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2931 GEN_INT (exact_log2 (t)));
2932 emit_insn (gen_rtx_SET (target, tmp));
2933 }
2934 else if (f == 0 && t == -1)
2935 {
2936 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2937 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2938
2939 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2940 }
2941 else if (diff == 1 || diff == 4 || diff == 8)
2942 {
2943 rtx add_op;
2944
2945 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2946 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2947
2948 if (diff == 1)
2949 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2950 else
2951 {
2952 add_op = GEN_INT (f);
2953 if (sext_add_operand (add_op, mode))
2954 {
2955 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2956 GEN_INT (exact_log2 (diff)));
2957 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2958 emit_insn (gen_rtx_SET (target, tmp));
2959 }
2960 else
2961 return 0;
2962 }
2963 }
2964 else
2965 return 0;
2966
2967 return 1;
2968 }
2969 \f
2970 /* Look up the function X_floating library function name for the
2971 given operation. */
2972
2973 struct GTY(()) xfloating_op
2974 {
2975 const enum rtx_code code;
2976 const char *const GTY((skip)) osf_func;
2977 const char *const GTY((skip)) vms_func;
2978 rtx libcall;
2979 };
2980
2981 static GTY(()) struct xfloating_op xfloating_ops[] =
2982 {
2983 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2984 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2985 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2986 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2987 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2988 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2989 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2990 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2991 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2992 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2993 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2994 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2995 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2996 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2997 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2998 };
2999
3000 static GTY(()) struct xfloating_op vax_cvt_ops[] =
3001 {
3002 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
3003 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
3004 };
3005
3006 static rtx
3007 alpha_lookup_xfloating_lib_func (enum rtx_code code)
3008 {
3009 struct xfloating_op *ops = xfloating_ops;
3010 long n = ARRAY_SIZE (xfloating_ops);
3011 long i;
3012
3013 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3014
3015 /* How irritating. Nothing to key off for the main table. */
3016 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3017 {
3018 ops = vax_cvt_ops;
3019 n = ARRAY_SIZE (vax_cvt_ops);
3020 }
3021
3022 for (i = 0; i < n; ++i, ++ops)
3023 if (ops->code == code)
3024 {
3025 rtx func = ops->libcall;
3026 if (!func)
3027 {
3028 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3029 ? ops->vms_func : ops->osf_func);
3030 ops->libcall = func;
3031 }
3032 return func;
3033 }
3034
3035 gcc_unreachable ();
3036 }
3037
3038 /* Most X_floating operations take the rounding mode as an argument.
3039 Compute that here. */
3040
3041 static int
3042 alpha_compute_xfloating_mode_arg (enum rtx_code code,
3043 enum alpha_fp_rounding_mode round)
3044 {
3045 int mode;
3046
3047 switch (round)
3048 {
3049 case ALPHA_FPRM_NORM:
3050 mode = 2;
3051 break;
3052 case ALPHA_FPRM_MINF:
3053 mode = 1;
3054 break;
3055 case ALPHA_FPRM_CHOP:
3056 mode = 0;
3057 break;
3058 case ALPHA_FPRM_DYN:
3059 mode = 4;
3060 break;
3061 default:
3062 gcc_unreachable ();
3063
3064 /* XXX For reference, round to +inf is mode = 3. */
3065 }
3066
3067 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3068 mode |= 0x10000;
3069
3070 return mode;
3071 }
3072
3073 /* Emit an X_floating library function call.
3074
3075 Note that these functions do not follow normal calling conventions:
3076 TFmode arguments are passed in two integer registers (as opposed to
3077 indirect); TFmode return values appear in R16+R17.
3078
3079 FUNC is the function to call.
3080 TARGET is where the output belongs.
3081 OPERANDS are the inputs.
3082 NOPERANDS is the count of inputs.
3083 EQUIV is the expression equivalent for the function.
3084 */
3085
3086 static void
3087 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3088 int noperands, rtx equiv)
3089 {
3090 rtx usage = NULL_RTX, reg;
3091 int regno = 16, i;
3092
3093 start_sequence ();
3094
3095 for (i = 0; i < noperands; ++i)
3096 {
3097 switch (GET_MODE (operands[i]))
3098 {
3099 case E_TFmode:
3100 reg = gen_rtx_REG (TFmode, regno);
3101 regno += 2;
3102 break;
3103
3104 case E_DFmode:
3105 reg = gen_rtx_REG (DFmode, regno + 32);
3106 regno += 1;
3107 break;
3108
3109 case E_VOIDmode:
3110 gcc_assert (CONST_INT_P (operands[i]));
3111 /* FALLTHRU */
3112 case E_DImode:
3113 reg = gen_rtx_REG (DImode, regno);
3114 regno += 1;
3115 break;
3116
3117 default:
3118 gcc_unreachable ();
3119 }
3120
3121 emit_move_insn (reg, operands[i]);
3122 use_reg (&usage, reg);
3123 }
3124
3125 switch (GET_MODE (target))
3126 {
3127 case E_TFmode:
3128 reg = gen_rtx_REG (TFmode, 16);
3129 break;
3130 case E_DFmode:
3131 reg = gen_rtx_REG (DFmode, 32);
3132 break;
3133 case E_DImode:
3134 reg = gen_rtx_REG (DImode, 0);
3135 break;
3136 default:
3137 gcc_unreachable ();
3138 }
3139
3140 rtx mem = gen_rtx_MEM (QImode, func);
3141 rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
3142 const0_rtx, const0_rtx));
3143 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3144 RTL_CONST_CALL_P (tmp) = 1;
3145
3146 tmp = get_insns ();
3147 end_sequence ();
3148
3149 emit_libcall_block (tmp, target, reg, equiv);
3150 }
3151
3152 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3153
3154 void
3155 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3156 {
3157 rtx func;
3158 int mode;
3159 rtx out_operands[3];
3160
3161 func = alpha_lookup_xfloating_lib_func (code);
3162 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3163
3164 out_operands[0] = operands[1];
3165 out_operands[1] = operands[2];
3166 out_operands[2] = GEN_INT (mode);
3167 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3168 gen_rtx_fmt_ee (code, TFmode, operands[1],
3169 operands[2]));
3170 }
3171
3172 /* Emit an X_floating library function call for a comparison. */
3173
3174 static rtx
3175 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3176 {
3177 enum rtx_code cmp_code, res_code;
3178 rtx func, out, operands[2], note;
3179
3180 /* X_floating library comparison functions return
3181 -1 unordered
3182 0 false
3183 1 true
3184 Convert the compare against the raw return value. */
3185
3186 cmp_code = *pcode;
3187 switch (cmp_code)
3188 {
3189 case UNORDERED:
3190 cmp_code = EQ;
3191 res_code = LT;
3192 break;
3193 case ORDERED:
3194 cmp_code = EQ;
3195 res_code = GE;
3196 break;
3197 case NE:
3198 res_code = NE;
3199 break;
3200 case EQ:
3201 case LT:
3202 case GT:
3203 case LE:
3204 case GE:
3205 res_code = GT;
3206 break;
3207 default:
3208 gcc_unreachable ();
3209 }
3210 *pcode = res_code;
3211
3212 func = alpha_lookup_xfloating_lib_func (cmp_code);
3213
3214 operands[0] = op0;
3215 operands[1] = op1;
3216 out = gen_reg_rtx (DImode);
3217
3218 /* What's actually returned is -1,0,1, not a proper boolean value. */
3219 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3220 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3221 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3222
3223 return out;
3224 }
3225
3226 /* Emit an X_floating library function call for a conversion. */
3227
3228 void
3229 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3230 {
3231 int noperands = 1, mode;
3232 rtx out_operands[2];
3233 rtx func;
3234 enum rtx_code code = orig_code;
3235
3236 if (code == UNSIGNED_FIX)
3237 code = FIX;
3238
3239 func = alpha_lookup_xfloating_lib_func (code);
3240
3241 out_operands[0] = operands[1];
3242
3243 switch (code)
3244 {
3245 case FIX:
3246 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3247 out_operands[1] = GEN_INT (mode);
3248 noperands = 2;
3249 break;
3250 case FLOAT_TRUNCATE:
3251 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3252 out_operands[1] = GEN_INT (mode);
3253 noperands = 2;
3254 break;
3255 default:
3256 break;
3257 }
3258
3259 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3260 gen_rtx_fmt_e (orig_code,
3261 GET_MODE (operands[0]),
3262 operands[1]));
3263 }
3264
3265 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3266 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3267 guarantee that the sequence
3268 set (OP[0] OP[2])
3269 set (OP[1] OP[3])
3270 is valid. Naturally, output operand ordering is little-endian.
3271 This is used by *movtf_internal and *movti_internal. */
3272
3273 void
3274 alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3275 bool fixup_overlap)
3276 {
3277 switch (GET_CODE (operands[1]))
3278 {
3279 case REG:
3280 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3281 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3282 break;
3283
3284 case MEM:
3285 operands[3] = adjust_address (operands[1], DImode, 8);
3286 operands[2] = adjust_address (operands[1], DImode, 0);
3287 break;
3288
3289 CASE_CONST_SCALAR_INT:
3290 case CONST_DOUBLE:
3291 gcc_assert (operands[1] == CONST0_RTX (mode));
3292 operands[2] = operands[3] = const0_rtx;
3293 break;
3294
3295 default:
3296 gcc_unreachable ();
3297 }
3298
3299 switch (GET_CODE (operands[0]))
3300 {
3301 case REG:
3302 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3303 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3304 break;
3305
3306 case MEM:
3307 operands[1] = adjust_address (operands[0], DImode, 8);
3308 operands[0] = adjust_address (operands[0], DImode, 0);
3309 break;
3310
3311 default:
3312 gcc_unreachable ();
3313 }
3314
3315 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3316 {
3317 std::swap (operands[0], operands[1]);
3318 std::swap (operands[2], operands[3]);
3319 }
3320 }
3321
3322 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3323 op2 is a register containing the sign bit, operation is the
3324 logical operation to be performed. */
3325
3326 void
3327 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3328 {
3329 rtx high_bit = operands[2];
3330 rtx scratch;
3331 int move;
3332
3333 alpha_split_tmode_pair (operands, TFmode, false);
3334
3335 /* Detect three flavors of operand overlap. */
3336 move = 1;
3337 if (rtx_equal_p (operands[0], operands[2]))
3338 move = 0;
3339 else if (rtx_equal_p (operands[1], operands[2]))
3340 {
3341 if (rtx_equal_p (operands[0], high_bit))
3342 move = 2;
3343 else
3344 move = -1;
3345 }
3346
3347 if (move < 0)
3348 emit_move_insn (operands[0], operands[2]);
3349
3350 /* ??? If the destination overlaps both source tf and high_bit, then
3351 assume source tf is dead in its entirety and use the other half
3352 for a scratch register. Otherwise "scratch" is just the proper
3353 destination register. */
3354 scratch = operands[move < 2 ? 1 : 3];
3355
3356 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3357
3358 if (move > 0)
3359 {
3360 emit_move_insn (operands[0], operands[2]);
3361 if (move > 1)
3362 emit_move_insn (operands[1], scratch);
3363 }
3364 }
3365 \f
3366 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3367 unaligned data:
3368
3369 unsigned: signed:
3370 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3371 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3372 lda r3,X(r11) lda r3,X+2(r11)
3373 extwl r1,r3,r1 extql r1,r3,r1
3374 extwh r2,r3,r2 extqh r2,r3,r2
3375 or r1.r2.r1 or r1,r2,r1
3376 sra r1,48,r1
3377
3378 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3379 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3380 lda r3,X(r11) lda r3,X(r11)
3381 extll r1,r3,r1 extll r1,r3,r1
3382 extlh r2,r3,r2 extlh r2,r3,r2
3383 or r1.r2.r1 addl r1,r2,r1
3384
3385 quad: ldq_u r1,X(r11)
3386 ldq_u r2,X+7(r11)
3387 lda r3,X(r11)
3388 extql r1,r3,r1
3389 extqh r2,r3,r2
3390 or r1.r2.r1
3391 */
3392
3393 void
3394 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3395 HOST_WIDE_INT ofs, int sign)
3396 {
3397 rtx meml, memh, addr, extl, exth, tmp, mema;
3398 machine_mode mode;
3399
3400 if (TARGET_BWX && size == 2)
3401 {
3402 meml = adjust_address (mem, QImode, ofs);
3403 memh = adjust_address (mem, QImode, ofs+1);
3404 extl = gen_reg_rtx (DImode);
3405 exth = gen_reg_rtx (DImode);
3406 emit_insn (gen_zero_extendqidi2 (extl, meml));
3407 emit_insn (gen_zero_extendqidi2 (exth, memh));
3408 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3409 NULL, 1, OPTAB_LIB_WIDEN);
3410 addr = expand_simple_binop (DImode, IOR, extl, exth,
3411 NULL, 1, OPTAB_LIB_WIDEN);
3412
3413 if (sign && GET_MODE (tgt) != HImode)
3414 {
3415 addr = gen_lowpart (HImode, addr);
3416 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3417 }
3418 else
3419 {
3420 if (GET_MODE (tgt) != DImode)
3421 addr = gen_lowpart (GET_MODE (tgt), addr);
3422 emit_move_insn (tgt, addr);
3423 }
3424 return;
3425 }
3426
3427 meml = gen_reg_rtx (DImode);
3428 memh = gen_reg_rtx (DImode);
3429 addr = gen_reg_rtx (DImode);
3430 extl = gen_reg_rtx (DImode);
3431 exth = gen_reg_rtx (DImode);
3432
3433 mema = XEXP (mem, 0);
3434 if (GET_CODE (mema) == LO_SUM)
3435 mema = force_reg (Pmode, mema);
3436
3437 /* AND addresses cannot be in any alias set, since they may implicitly
3438 alias surrounding code. Ideally we'd have some alias set that
3439 covered all types except those with alignment 8 or higher. */
3440
3441 tmp = change_address (mem, DImode,
3442 gen_rtx_AND (DImode,
3443 plus_constant (DImode, mema, ofs),
3444 GEN_INT (-8)));
3445 set_mem_alias_set (tmp, 0);
3446 emit_move_insn (meml, tmp);
3447
3448 tmp = change_address (mem, DImode,
3449 gen_rtx_AND (DImode,
3450 plus_constant (DImode, mema,
3451 ofs + size - 1),
3452 GEN_INT (-8)));
3453 set_mem_alias_set (tmp, 0);
3454 emit_move_insn (memh, tmp);
3455
3456 if (sign && size == 2)
3457 {
3458 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3459
3460 emit_insn (gen_extql (extl, meml, addr));
3461 emit_insn (gen_extqh (exth, memh, addr));
3462
3463 /* We must use tgt here for the target. Alpha-vms port fails if we use
3464 addr for the target, because addr is marked as a pointer and combine
3465 knows that pointers are always sign-extended 32-bit values. */
3466 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3467 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3468 addr, 1, OPTAB_WIDEN);
3469 }
3470 else
3471 {
3472 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3473 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3474 switch ((int) size)
3475 {
3476 case 2:
3477 emit_insn (gen_extwh (exth, memh, addr));
3478 mode = HImode;
3479 break;
3480 case 4:
3481 emit_insn (gen_extlh (exth, memh, addr));
3482 mode = SImode;
3483 break;
3484 case 8:
3485 emit_insn (gen_extqh (exth, memh, addr));
3486 mode = DImode;
3487 break;
3488 default:
3489 gcc_unreachable ();
3490 }
3491
3492 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3493 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3494 sign, OPTAB_WIDEN);
3495 }
3496
3497 if (addr != tgt)
3498 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3499 }
3500
3501 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3502
3503 void
3504 alpha_expand_unaligned_store (rtx dst, rtx src,
3505 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3506 {
3507 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3508
3509 if (TARGET_BWX && size == 2)
3510 {
3511 if (src != const0_rtx)
3512 {
3513 dstl = gen_lowpart (QImode, src);
3514 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3515 NULL, 1, OPTAB_LIB_WIDEN);
3516 dsth = gen_lowpart (QImode, dsth);
3517 }
3518 else
3519 dstl = dsth = const0_rtx;
3520
3521 meml = adjust_address (dst, QImode, ofs);
3522 memh = adjust_address (dst, QImode, ofs+1);
3523
3524 emit_move_insn (meml, dstl);
3525 emit_move_insn (memh, dsth);
3526 return;
3527 }
3528
3529 dstl = gen_reg_rtx (DImode);
3530 dsth = gen_reg_rtx (DImode);
3531 insl = gen_reg_rtx (DImode);
3532 insh = gen_reg_rtx (DImode);
3533
3534 dsta = XEXP (dst, 0);
3535 if (GET_CODE (dsta) == LO_SUM)
3536 dsta = force_reg (Pmode, dsta);
3537
3538 /* AND addresses cannot be in any alias set, since they may implicitly
3539 alias surrounding code. Ideally we'd have some alias set that
3540 covered all types except those with alignment 8 or higher. */
3541
3542 meml = change_address (dst, DImode,
3543 gen_rtx_AND (DImode,
3544 plus_constant (DImode, dsta, ofs),
3545 GEN_INT (-8)));
3546 set_mem_alias_set (meml, 0);
3547
3548 memh = change_address (dst, DImode,
3549 gen_rtx_AND (DImode,
3550 plus_constant (DImode, dsta,
3551 ofs + size - 1),
3552 GEN_INT (-8)));
3553 set_mem_alias_set (memh, 0);
3554
3555 emit_move_insn (dsth, memh);
3556 emit_move_insn (dstl, meml);
3557
3558 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3559
3560 if (src != CONST0_RTX (GET_MODE (src)))
3561 {
3562 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3563 GEN_INT (size*8), addr));
3564
3565 switch ((int) size)
3566 {
3567 case 2:
3568 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3569 break;
3570 case 4:
3571 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3572 break;
3573 case 8:
3574 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3575 break;
3576 default:
3577 gcc_unreachable ();
3578 }
3579 }
3580
3581 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3582
3583 switch ((int) size)
3584 {
3585 case 2:
3586 emit_insn (gen_mskwl (dstl, dstl, addr));
3587 break;
3588 case 4:
3589 emit_insn (gen_mskll (dstl, dstl, addr));
3590 break;
3591 case 8:
3592 emit_insn (gen_mskql (dstl, dstl, addr));
3593 break;
3594 default:
3595 gcc_unreachable ();
3596 }
3597
3598 if (src != CONST0_RTX (GET_MODE (src)))
3599 {
3600 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3601 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3602 }
3603
3604 /* Must store high before low for degenerate case of aligned. */
3605 emit_move_insn (memh, dsth);
3606 emit_move_insn (meml, dstl);
3607 }
3608
3609 /* The block move code tries to maximize speed by separating loads and
3610 stores at the expense of register pressure: we load all of the data
3611 before we store it back out. There are two secondary effects worth
3612 mentioning, that this speeds copying to/from aligned and unaligned
3613 buffers, and that it makes the code significantly easier to write. */
3614
3615 #define MAX_MOVE_WORDS 8
3616
3617 /* Load an integral number of consecutive unaligned quadwords. */
3618
3619 static void
3620 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3621 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3622 {
3623 rtx const im8 = GEN_INT (-8);
3624 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3625 rtx sreg, areg, tmp, smema;
3626 HOST_WIDE_INT i;
3627
3628 smema = XEXP (smem, 0);
3629 if (GET_CODE (smema) == LO_SUM)
3630 smema = force_reg (Pmode, smema);
3631
3632 /* Generate all the tmp registers we need. */
3633 for (i = 0; i < words; ++i)
3634 {
3635 data_regs[i] = out_regs[i];
3636 ext_tmps[i] = gen_reg_rtx (DImode);
3637 }
3638 data_regs[words] = gen_reg_rtx (DImode);
3639
3640 if (ofs != 0)
3641 smem = adjust_address (smem, GET_MODE (smem), ofs);
3642
3643 /* Load up all of the source data. */
3644 for (i = 0; i < words; ++i)
3645 {
3646 tmp = change_address (smem, DImode,
3647 gen_rtx_AND (DImode,
3648 plus_constant (DImode, smema, 8*i),
3649 im8));
3650 set_mem_alias_set (tmp, 0);
3651 emit_move_insn (data_regs[i], tmp);
3652 }
3653
3654 tmp = change_address (smem, DImode,
3655 gen_rtx_AND (DImode,
3656 plus_constant (DImode, smema,
3657 8*words - 1),
3658 im8));
3659 set_mem_alias_set (tmp, 0);
3660 emit_move_insn (data_regs[words], tmp);
3661
3662 /* Extract the half-word fragments. Unfortunately DEC decided to make
3663 extxh with offset zero a noop instead of zeroing the register, so
3664 we must take care of that edge condition ourselves with cmov. */
3665
3666 sreg = copy_addr_to_reg (smema);
3667 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3668 1, OPTAB_WIDEN);
3669 for (i = 0; i < words; ++i)
3670 {
3671 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3672 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3673 emit_insn (gen_rtx_SET (ext_tmps[i],
3674 gen_rtx_IF_THEN_ELSE (DImode,
3675 gen_rtx_EQ (DImode, areg,
3676 const0_rtx),
3677 const0_rtx, ext_tmps[i])));
3678 }
3679
3680 /* Merge the half-words into whole words. */
3681 for (i = 0; i < words; ++i)
3682 {
3683 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3684 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3685 }
3686 }
3687
3688 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3689 may be NULL to store zeros. */
3690
3691 static void
3692 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3693 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3694 {
3695 rtx const im8 = GEN_INT (-8);
3696 rtx ins_tmps[MAX_MOVE_WORDS];
3697 rtx st_tmp_1, st_tmp_2, dreg;
3698 rtx st_addr_1, st_addr_2, dmema;
3699 HOST_WIDE_INT i;
3700
3701 dmema = XEXP (dmem, 0);
3702 if (GET_CODE (dmema) == LO_SUM)
3703 dmema = force_reg (Pmode, dmema);
3704
3705 /* Generate all the tmp registers we need. */
3706 if (data_regs != NULL)
3707 for (i = 0; i < words; ++i)
3708 ins_tmps[i] = gen_reg_rtx(DImode);
3709 st_tmp_1 = gen_reg_rtx(DImode);
3710 st_tmp_2 = gen_reg_rtx(DImode);
3711
3712 if (ofs != 0)
3713 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3714
3715 st_addr_2 = change_address (dmem, DImode,
3716 gen_rtx_AND (DImode,
3717 plus_constant (DImode, dmema,
3718 words*8 - 1),
3719 im8));
3720 set_mem_alias_set (st_addr_2, 0);
3721
3722 st_addr_1 = change_address (dmem, DImode,
3723 gen_rtx_AND (DImode, dmema, im8));
3724 set_mem_alias_set (st_addr_1, 0);
3725
3726 /* Load up the destination end bits. */
3727 emit_move_insn (st_tmp_2, st_addr_2);
3728 emit_move_insn (st_tmp_1, st_addr_1);
3729
3730 /* Shift the input data into place. */
3731 dreg = copy_addr_to_reg (dmema);
3732 if (data_regs != NULL)
3733 {
3734 for (i = words-1; i >= 0; --i)
3735 {
3736 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3737 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3738 }
3739 for (i = words-1; i > 0; --i)
3740 {
3741 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3742 ins_tmps[i-1], ins_tmps[i-1], 1,
3743 OPTAB_WIDEN);
3744 }
3745 }
3746
3747 /* Split and merge the ends with the destination data. */
3748 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3749 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3750
3751 if (data_regs != NULL)
3752 {
3753 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3754 st_tmp_2, 1, OPTAB_WIDEN);
3755 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3756 st_tmp_1, 1, OPTAB_WIDEN);
3757 }
3758
3759 /* Store it all. */
3760 emit_move_insn (st_addr_2, st_tmp_2);
3761 for (i = words-1; i > 0; --i)
3762 {
3763 rtx tmp = change_address (dmem, DImode,
3764 gen_rtx_AND (DImode,
3765 plus_constant (DImode,
3766 dmema, i*8),
3767 im8));
3768 set_mem_alias_set (tmp, 0);
3769 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3770 }
3771 emit_move_insn (st_addr_1, st_tmp_1);
3772 }
3773
3774
3775 /* Expand string/block move operations.
3776
3777 operands[0] is the pointer to the destination.
3778 operands[1] is the pointer to the source.
3779 operands[2] is the number of bytes to move.
3780 operands[3] is the alignment. */
3781
3782 int
3783 alpha_expand_block_move (rtx operands[])
3784 {
3785 rtx bytes_rtx = operands[2];
3786 rtx align_rtx = operands[3];
3787 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3788 HOST_WIDE_INT bytes = orig_bytes;
3789 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3790 HOST_WIDE_INT dst_align = src_align;
3791 rtx orig_src = operands[1];
3792 rtx orig_dst = operands[0];
3793 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3794 rtx tmp;
3795 unsigned int i, words, ofs, nregs = 0;
3796
3797 if (orig_bytes <= 0)
3798 return 1;
3799 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3800 return 0;
3801
3802 /* Look for additional alignment information from recorded register info. */
3803
3804 tmp = XEXP (orig_src, 0);
3805 if (REG_P (tmp))
3806 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3807 else if (GET_CODE (tmp) == PLUS
3808 && REG_P (XEXP (tmp, 0))
3809 && CONST_INT_P (XEXP (tmp, 1)))
3810 {
3811 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3812 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3813
3814 if (a > src_align)
3815 {
3816 if (a >= 64 && c % 8 == 0)
3817 src_align = 64;
3818 else if (a >= 32 && c % 4 == 0)
3819 src_align = 32;
3820 else if (a >= 16 && c % 2 == 0)
3821 src_align = 16;
3822 }
3823 }
3824
3825 tmp = XEXP (orig_dst, 0);
3826 if (REG_P (tmp))
3827 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3828 else if (GET_CODE (tmp) == PLUS
3829 && REG_P (XEXP (tmp, 0))
3830 && CONST_INT_P (XEXP (tmp, 1)))
3831 {
3832 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3833 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3834
3835 if (a > dst_align)
3836 {
3837 if (a >= 64 && c % 8 == 0)
3838 dst_align = 64;
3839 else if (a >= 32 && c % 4 == 0)
3840 dst_align = 32;
3841 else if (a >= 16 && c % 2 == 0)
3842 dst_align = 16;
3843 }
3844 }
3845
3846 ofs = 0;
3847 if (src_align >= 64 && bytes >= 8)
3848 {
3849 words = bytes / 8;
3850
3851 for (i = 0; i < words; ++i)
3852 data_regs[nregs + i] = gen_reg_rtx (DImode);
3853
3854 for (i = 0; i < words; ++i)
3855 emit_move_insn (data_regs[nregs + i],
3856 adjust_address (orig_src, DImode, ofs + i * 8));
3857
3858 nregs += words;
3859 bytes -= words * 8;
3860 ofs += words * 8;
3861 }
3862
3863 if (src_align >= 32 && bytes >= 4)
3864 {
3865 words = bytes / 4;
3866
3867 for (i = 0; i < words; ++i)
3868 data_regs[nregs + i] = gen_reg_rtx (SImode);
3869
3870 for (i = 0; i < words; ++i)
3871 emit_move_insn (data_regs[nregs + i],
3872 adjust_address (orig_src, SImode, ofs + i * 4));
3873
3874 nregs += words;
3875 bytes -= words * 4;
3876 ofs += words * 4;
3877 }
3878
3879 if (bytes >= 8)
3880 {
3881 words = bytes / 8;
3882
3883 for (i = 0; i < words+1; ++i)
3884 data_regs[nregs + i] = gen_reg_rtx (DImode);
3885
3886 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3887 words, ofs);
3888
3889 nregs += words;
3890 bytes -= words * 8;
3891 ofs += words * 8;
3892 }
3893
3894 if (! TARGET_BWX && bytes >= 4)
3895 {
3896 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3897 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3898 bytes -= 4;
3899 ofs += 4;
3900 }
3901
3902 if (bytes >= 2)
3903 {
3904 if (src_align >= 16)
3905 {
3906 do {
3907 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3908 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3909 bytes -= 2;
3910 ofs += 2;
3911 } while (bytes >= 2);
3912 }
3913 else if (! TARGET_BWX)
3914 {
3915 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3916 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3917 bytes -= 2;
3918 ofs += 2;
3919 }
3920 }
3921
3922 while (bytes > 0)
3923 {
3924 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3925 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3926 bytes -= 1;
3927 ofs += 1;
3928 }
3929
3930 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3931
3932 /* Now save it back out again. */
3933
3934 i = 0, ofs = 0;
3935
3936 /* Write out the data in whatever chunks reading the source allowed. */
3937 if (dst_align >= 64)
3938 {
3939 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3940 {
3941 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3942 data_regs[i]);
3943 ofs += 8;
3944 i++;
3945 }
3946 }
3947
3948 if (dst_align >= 32)
3949 {
3950 /* If the source has remaining DImode regs, write them out in
3951 two pieces. */
3952 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3953 {
3954 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3955 NULL_RTX, 1, OPTAB_WIDEN);
3956
3957 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3958 gen_lowpart (SImode, data_regs[i]));
3959 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3960 gen_lowpart (SImode, tmp));
3961 ofs += 8;
3962 i++;
3963 }
3964
3965 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3966 {
3967 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3968 data_regs[i]);
3969 ofs += 4;
3970 i++;
3971 }
3972 }
3973
3974 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3975 {
3976 /* Write out a remaining block of words using unaligned methods. */
3977
3978 for (words = 1; i + words < nregs; words++)
3979 if (GET_MODE (data_regs[i + words]) != DImode)
3980 break;
3981
3982 if (words == 1)
3983 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3984 else
3985 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3986 words, ofs);
3987
3988 i += words;
3989 ofs += words * 8;
3990 }
3991
3992 /* Due to the above, this won't be aligned. */
3993 /* ??? If we have more than one of these, consider constructing full
3994 words in registers and using alpha_expand_unaligned_store_words. */
3995 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3996 {
3997 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3998 ofs += 4;
3999 i++;
4000 }
4001
4002 if (dst_align >= 16)
4003 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4004 {
4005 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4006 i++;
4007 ofs += 2;
4008 }
4009 else
4010 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4011 {
4012 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4013 i++;
4014 ofs += 2;
4015 }
4016
4017 /* The remainder must be byte copies. */
4018 while (i < nregs)
4019 {
4020 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4021 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4022 i++;
4023 ofs += 1;
4024 }
4025
4026 return 1;
4027 }
4028
4029 int
4030 alpha_expand_block_clear (rtx operands[])
4031 {
4032 rtx bytes_rtx = operands[1];
4033 rtx align_rtx = operands[3];
4034 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4035 HOST_WIDE_INT bytes = orig_bytes;
4036 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4037 HOST_WIDE_INT alignofs = 0;
4038 rtx orig_dst = operands[0];
4039 rtx tmp;
4040 int i, words, ofs = 0;
4041
4042 if (orig_bytes <= 0)
4043 return 1;
4044 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4045 return 0;
4046
4047 /* Look for stricter alignment. */
4048 tmp = XEXP (orig_dst, 0);
4049 if (REG_P (tmp))
4050 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4051 else if (GET_CODE (tmp) == PLUS
4052 && REG_P (XEXP (tmp, 0))
4053 && CONST_INT_P (XEXP (tmp, 1)))
4054 {
4055 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4056 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4057
4058 if (a > align)
4059 {
4060 if (a >= 64)
4061 align = a, alignofs = 8 - c % 8;
4062 else if (a >= 32)
4063 align = a, alignofs = 4 - c % 4;
4064 else if (a >= 16)
4065 align = a, alignofs = 2 - c % 2;
4066 }
4067 }
4068
4069 /* Handle an unaligned prefix first. */
4070
4071 if (alignofs > 0)
4072 {
4073 /* Given that alignofs is bounded by align, the only time BWX could
4074 generate three stores is for a 7 byte fill. Prefer two individual
4075 stores over a load/mask/store sequence. */
4076 if ((!TARGET_BWX || alignofs == 7)
4077 && align >= 32
4078 && !(alignofs == 4 && bytes >= 4))
4079 {
4080 machine_mode mode = (align >= 64 ? DImode : SImode);
4081 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4082 rtx mem, tmp;
4083 HOST_WIDE_INT mask;
4084
4085 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4086 set_mem_alias_set (mem, 0);
4087
4088 mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
4089 if (bytes < alignofs)
4090 {
4091 mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
4092 ofs += bytes;
4093 bytes = 0;
4094 }
4095 else
4096 {
4097 bytes -= alignofs;
4098 ofs += alignofs;
4099 }
4100 alignofs = 0;
4101
4102 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4103 NULL_RTX, 1, OPTAB_WIDEN);
4104
4105 emit_move_insn (mem, tmp);
4106 }
4107
4108 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4109 {
4110 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4111 bytes -= 1;
4112 ofs += 1;
4113 alignofs -= 1;
4114 }
4115 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4116 {
4117 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4118 bytes -= 2;
4119 ofs += 2;
4120 alignofs -= 2;
4121 }
4122 if (alignofs == 4 && bytes >= 4)
4123 {
4124 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4125 bytes -= 4;
4126 ofs += 4;
4127 alignofs = 0;
4128 }
4129
4130 /* If we've not used the extra lead alignment information by now,
4131 we won't be able to. Downgrade align to match what's left over. */
4132 if (alignofs > 0)
4133 {
4134 alignofs = alignofs & -alignofs;
4135 align = MIN (align, alignofs * BITS_PER_UNIT);
4136 }
4137 }
4138
4139 /* Handle a block of contiguous long-words. */
4140
4141 if (align >= 64 && bytes >= 8)
4142 {
4143 words = bytes / 8;
4144
4145 for (i = 0; i < words; ++i)
4146 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4147 const0_rtx);
4148
4149 bytes -= words * 8;
4150 ofs += words * 8;
4151 }
4152
4153 /* If the block is large and appropriately aligned, emit a single
4154 store followed by a sequence of stq_u insns. */
4155
4156 if (align >= 32 && bytes > 16)
4157 {
4158 rtx orig_dsta;
4159
4160 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4161 bytes -= 4;
4162 ofs += 4;
4163
4164 orig_dsta = XEXP (orig_dst, 0);
4165 if (GET_CODE (orig_dsta) == LO_SUM)
4166 orig_dsta = force_reg (Pmode, orig_dsta);
4167
4168 words = bytes / 8;
4169 for (i = 0; i < words; ++i)
4170 {
4171 rtx mem
4172 = change_address (orig_dst, DImode,
4173 gen_rtx_AND (DImode,
4174 plus_constant (DImode, orig_dsta,
4175 ofs + i*8),
4176 GEN_INT (-8)));
4177 set_mem_alias_set (mem, 0);
4178 emit_move_insn (mem, const0_rtx);
4179 }
4180
4181 /* Depending on the alignment, the first stq_u may have overlapped
4182 with the initial stl, which means that the last stq_u didn't
4183 write as much as it would appear. Leave those questionable bytes
4184 unaccounted for. */
4185 bytes -= words * 8 - 4;
4186 ofs += words * 8 - 4;
4187 }
4188
4189 /* Handle a smaller block of aligned words. */
4190
4191 if ((align >= 64 && bytes == 4)
4192 || (align == 32 && bytes >= 4))
4193 {
4194 words = bytes / 4;
4195
4196 for (i = 0; i < words; ++i)
4197 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4198 const0_rtx);
4199
4200 bytes -= words * 4;
4201 ofs += words * 4;
4202 }
4203
4204 /* An unaligned block uses stq_u stores for as many as possible. */
4205
4206 if (bytes >= 8)
4207 {
4208 words = bytes / 8;
4209
4210 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4211
4212 bytes -= words * 8;
4213 ofs += words * 8;
4214 }
4215
4216 /* Next clean up any trailing pieces. */
4217
4218 /* Count the number of bits in BYTES for which aligned stores could
4219 be emitted. */
4220 words = 0;
4221 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4222 if (bytes & i)
4223 words += 1;
4224
4225 /* If we have appropriate alignment (and it wouldn't take too many
4226 instructions otherwise), mask out the bytes we need. */
4227 if (TARGET_BWX ? words > 2 : bytes > 0)
4228 {
4229 if (align >= 64)
4230 {
4231 rtx mem, tmp;
4232 HOST_WIDE_INT mask;
4233
4234 mem = adjust_address (orig_dst, DImode, ofs);
4235 set_mem_alias_set (mem, 0);
4236
4237 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4238
4239 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4240 NULL_RTX, 1, OPTAB_WIDEN);
4241
4242 emit_move_insn (mem, tmp);
4243 return 1;
4244 }
4245 else if (align >= 32 && bytes < 4)
4246 {
4247 rtx mem, tmp;
4248 HOST_WIDE_INT mask;
4249
4250 mem = adjust_address (orig_dst, SImode, ofs);
4251 set_mem_alias_set (mem, 0);
4252
4253 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4254
4255 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4256 NULL_RTX, 1, OPTAB_WIDEN);
4257
4258 emit_move_insn (mem, tmp);
4259 return 1;
4260 }
4261 }
4262
4263 if (!TARGET_BWX && bytes >= 4)
4264 {
4265 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4266 bytes -= 4;
4267 ofs += 4;
4268 }
4269
4270 if (bytes >= 2)
4271 {
4272 if (align >= 16)
4273 {
4274 do {
4275 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4276 const0_rtx);
4277 bytes -= 2;
4278 ofs += 2;
4279 } while (bytes >= 2);
4280 }
4281 else if (! TARGET_BWX)
4282 {
4283 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4284 bytes -= 2;
4285 ofs += 2;
4286 }
4287 }
4288
4289 while (bytes > 0)
4290 {
4291 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4292 bytes -= 1;
4293 ofs += 1;
4294 }
4295
4296 return 1;
4297 }
4298
4299 /* Returns a mask so that zap(x, value) == x & mask. */
4300
4301 rtx
4302 alpha_expand_zap_mask (HOST_WIDE_INT value)
4303 {
4304 rtx result;
4305 int i;
4306 HOST_WIDE_INT mask = 0;
4307
4308 for (i = 7; i >= 0; --i)
4309 {
4310 mask <<= 8;
4311 if (!((value >> i) & 1))
4312 mask |= 0xff;
4313 }
4314
4315 result = gen_int_mode (mask, DImode);
4316 return result;
4317 }
4318
4319 void
4320 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4321 machine_mode mode,
4322 rtx op0, rtx op1, rtx op2)
4323 {
4324 op0 = gen_lowpart (mode, op0);
4325
4326 if (op1 == const0_rtx)
4327 op1 = CONST0_RTX (mode);
4328 else
4329 op1 = gen_lowpart (mode, op1);
4330
4331 if (op2 == const0_rtx)
4332 op2 = CONST0_RTX (mode);
4333 else
4334 op2 = gen_lowpart (mode, op2);
4335
4336 emit_insn ((*gen) (op0, op1, op2));
4337 }
4338
4339 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4340 COND is true. Mark the jump as unlikely to be taken. */
4341
4342 static void
4343 emit_unlikely_jump (rtx cond, rtx label)
4344 {
4345 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4346 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4347 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
4348 }
4349
4350 /* Subroutines of the atomic operation splitters. Emit barriers
4351 as needed for the memory MODEL. */
4352
4353 static void
4354 alpha_pre_atomic_barrier (enum memmodel model)
4355 {
4356 if (need_atomic_barrier_p (model, true))
4357 emit_insn (gen_memory_barrier ());
4358 }
4359
4360 static void
4361 alpha_post_atomic_barrier (enum memmodel model)
4362 {
4363 if (need_atomic_barrier_p (model, false))
4364 emit_insn (gen_memory_barrier ());
4365 }
4366
4367 /* A subroutine of the atomic operation splitters. Emit an insxl
4368 instruction in MODE. */
4369
4370 static rtx
4371 emit_insxl (machine_mode mode, rtx op1, rtx op2)
4372 {
4373 rtx ret = gen_reg_rtx (DImode);
4374 rtx (*fn) (rtx, rtx, rtx);
4375
4376 switch (mode)
4377 {
4378 case E_QImode:
4379 fn = gen_insbl;
4380 break;
4381 case E_HImode:
4382 fn = gen_inswl;
4383 break;
4384 case E_SImode:
4385 fn = gen_insll;
4386 break;
4387 case E_DImode:
4388 fn = gen_insql;
4389 break;
4390 default:
4391 gcc_unreachable ();
4392 }
4393
4394 op1 = force_reg (mode, op1);
4395 emit_insn (fn (ret, op1, op2));
4396
4397 return ret;
4398 }
4399
4400 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4401 to perform. MEM is the memory on which to operate. VAL is the second
4402 operand of the binary operator. BEFORE and AFTER are optional locations to
4403 return the value of MEM either before of after the operation. SCRATCH is
4404 a scratch register. */
4405
4406 void
4407 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4408 rtx after, rtx scratch, enum memmodel model)
4409 {
4410 machine_mode mode = GET_MODE (mem);
4411 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4412
4413 alpha_pre_atomic_barrier (model);
4414
4415 label = gen_label_rtx ();
4416 emit_label (label);
4417 label = gen_rtx_LABEL_REF (DImode, label);
4418
4419 if (before == NULL)
4420 before = scratch;
4421 emit_insn (gen_load_locked (mode, before, mem));
4422
4423 if (code == NOT)
4424 {
4425 x = gen_rtx_AND (mode, before, val);
4426 emit_insn (gen_rtx_SET (val, x));
4427
4428 x = gen_rtx_NOT (mode, val);
4429 }
4430 else
4431 x = gen_rtx_fmt_ee (code, mode, before, val);
4432 if (after)
4433 emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4434 emit_insn (gen_rtx_SET (scratch, x));
4435
4436 emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4437
4438 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4439 emit_unlikely_jump (x, label);
4440
4441 alpha_post_atomic_barrier (model);
4442 }
4443
4444 /* Expand a compare and swap operation. */
4445
4446 void
4447 alpha_split_compare_and_swap (rtx operands[])
4448 {
4449 rtx cond, retval, mem, oldval, newval;
4450 bool is_weak;
4451 enum memmodel mod_s, mod_f;
4452 machine_mode mode;
4453 rtx label1, label2, x;
4454
4455 cond = operands[0];
4456 retval = operands[1];
4457 mem = operands[2];
4458 oldval = operands[3];
4459 newval = operands[4];
4460 is_weak = (operands[5] != const0_rtx);
4461 mod_s = memmodel_from_int (INTVAL (operands[6]));
4462 mod_f = memmodel_from_int (INTVAL (operands[7]));
4463 mode = GET_MODE (mem);
4464
4465 alpha_pre_atomic_barrier (mod_s);
4466
4467 label1 = NULL_RTX;
4468 if (!is_weak)
4469 {
4470 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4471 emit_label (XEXP (label1, 0));
4472 }
4473 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4474
4475 emit_insn (gen_load_locked (mode, retval, mem));
4476
4477 x = gen_lowpart (DImode, retval);
4478 if (oldval == const0_rtx)
4479 {
4480 emit_move_insn (cond, const0_rtx);
4481 x = gen_rtx_NE (DImode, x, const0_rtx);
4482 }
4483 else
4484 {
4485 x = gen_rtx_EQ (DImode, x, oldval);
4486 emit_insn (gen_rtx_SET (cond, x));
4487 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4488 }
4489 emit_unlikely_jump (x, label2);
4490
4491 emit_move_insn (cond, newval);
4492 emit_insn (gen_store_conditional
4493 (mode, cond, mem, gen_lowpart (mode, cond)));
4494
4495 if (!is_weak)
4496 {
4497 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4498 emit_unlikely_jump (x, label1);
4499 }
4500
4501 if (!is_mm_relaxed (mod_f))
4502 emit_label (XEXP (label2, 0));
4503
4504 alpha_post_atomic_barrier (mod_s);
4505
4506 if (is_mm_relaxed (mod_f))
4507 emit_label (XEXP (label2, 0));
4508 }
4509
4510 void
4511 alpha_expand_compare_and_swap_12 (rtx operands[])
4512 {
4513 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4514 machine_mode mode;
4515 rtx addr, align, wdst;
4516
4517 cond = operands[0];
4518 dst = operands[1];
4519 mem = operands[2];
4520 oldval = operands[3];
4521 newval = operands[4];
4522 is_weak = operands[5];
4523 mod_s = operands[6];
4524 mod_f = operands[7];
4525 mode = GET_MODE (mem);
4526
4527 /* We forced the address into a register via mem_noofs_operand. */
4528 addr = XEXP (mem, 0);
4529 gcc_assert (register_operand (addr, DImode));
4530
4531 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4532 NULL_RTX, 1, OPTAB_DIRECT);
4533
4534 oldval = convert_modes (DImode, mode, oldval, 1);
4535
4536 if (newval != const0_rtx)
4537 newval = emit_insxl (mode, newval, addr);
4538
4539 wdst = gen_reg_rtx (DImode);
4540 emit_insn (gen_atomic_compare_and_swap_1
4541 (mode, cond, wdst, mem, oldval, newval, align,
4542 is_weak, mod_s, mod_f));
4543
4544 emit_move_insn (dst, gen_lowpart (mode, wdst));
4545 }
4546
4547 void
4548 alpha_split_compare_and_swap_12 (rtx operands[])
4549 {
4550 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4551 machine_mode mode;
4552 bool is_weak;
4553 enum memmodel mod_s, mod_f;
4554 rtx label1, label2, mem, addr, width, mask, x;
4555
4556 cond = operands[0];
4557 dest = operands[1];
4558 orig_mem = operands[2];
4559 oldval = operands[3];
4560 newval = operands[4];
4561 align = operands[5];
4562 is_weak = (operands[6] != const0_rtx);
4563 mod_s = memmodel_from_int (INTVAL (operands[7]));
4564 mod_f = memmodel_from_int (INTVAL (operands[8]));
4565 scratch = operands[9];
4566 mode = GET_MODE (orig_mem);
4567 addr = XEXP (orig_mem, 0);
4568
4569 mem = gen_rtx_MEM (DImode, align);
4570 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4571 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4572 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4573
4574 alpha_pre_atomic_barrier (mod_s);
4575
4576 label1 = NULL_RTX;
4577 if (!is_weak)
4578 {
4579 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4580 emit_label (XEXP (label1, 0));
4581 }
4582 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4583
4584 emit_insn (gen_load_locked (DImode, scratch, mem));
4585
4586 width = GEN_INT (GET_MODE_BITSIZE (mode));
4587 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4588 emit_insn (gen_extxl (dest, scratch, width, addr));
4589
4590 if (oldval == const0_rtx)
4591 {
4592 emit_move_insn (cond, const0_rtx);
4593 x = gen_rtx_NE (DImode, dest, const0_rtx);
4594 }
4595 else
4596 {
4597 x = gen_rtx_EQ (DImode, dest, oldval);
4598 emit_insn (gen_rtx_SET (cond, x));
4599 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4600 }
4601 emit_unlikely_jump (x, label2);
4602
4603 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4604
4605 if (newval != const0_rtx)
4606 emit_insn (gen_iordi3 (cond, cond, newval));
4607
4608 emit_insn (gen_store_conditional (DImode, cond, mem, cond));
4609
4610 if (!is_weak)
4611 {
4612 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4613 emit_unlikely_jump (x, label1);
4614 }
4615
4616 if (!is_mm_relaxed (mod_f))
4617 emit_label (XEXP (label2, 0));
4618
4619 alpha_post_atomic_barrier (mod_s);
4620
4621 if (is_mm_relaxed (mod_f))
4622 emit_label (XEXP (label2, 0));
4623 }
4624
4625 /* Expand an atomic exchange operation. */
4626
4627 void
4628 alpha_split_atomic_exchange (rtx operands[])
4629 {
4630 rtx retval, mem, val, scratch;
4631 enum memmodel model;
4632 machine_mode mode;
4633 rtx label, x, cond;
4634
4635 retval = operands[0];
4636 mem = operands[1];
4637 val = operands[2];
4638 model = (enum memmodel) INTVAL (operands[3]);
4639 scratch = operands[4];
4640 mode = GET_MODE (mem);
4641 cond = gen_lowpart (DImode, scratch);
4642
4643 alpha_pre_atomic_barrier (model);
4644
4645 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4646 emit_label (XEXP (label, 0));
4647
4648 emit_insn (gen_load_locked (mode, retval, mem));
4649 emit_move_insn (scratch, val);
4650 emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4651
4652 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4653 emit_unlikely_jump (x, label);
4654
4655 alpha_post_atomic_barrier (model);
4656 }
4657
4658 void
4659 alpha_expand_atomic_exchange_12 (rtx operands[])
4660 {
4661 rtx dst, mem, val, model;
4662 machine_mode mode;
4663 rtx addr, align, wdst;
4664
4665 dst = operands[0];
4666 mem = operands[1];
4667 val = operands[2];
4668 model = operands[3];
4669 mode = GET_MODE (mem);
4670
4671 /* We forced the address into a register via mem_noofs_operand. */
4672 addr = XEXP (mem, 0);
4673 gcc_assert (register_operand (addr, DImode));
4674
4675 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4676 NULL_RTX, 1, OPTAB_DIRECT);
4677
4678 /* Insert val into the correct byte location within the word. */
4679 if (val != const0_rtx)
4680 val = emit_insxl (mode, val, addr);
4681
4682 wdst = gen_reg_rtx (DImode);
4683 emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model));
4684
4685 emit_move_insn (dst, gen_lowpart (mode, wdst));
4686 }
4687
4688 void
4689 alpha_split_atomic_exchange_12 (rtx operands[])
4690 {
4691 rtx dest, orig_mem, addr, val, align, scratch;
4692 rtx label, mem, width, mask, x;
4693 machine_mode mode;
4694 enum memmodel model;
4695
4696 dest = operands[0];
4697 orig_mem = operands[1];
4698 val = operands[2];
4699 align = operands[3];
4700 model = (enum memmodel) INTVAL (operands[4]);
4701 scratch = operands[5];
4702 mode = GET_MODE (orig_mem);
4703 addr = XEXP (orig_mem, 0);
4704
4705 mem = gen_rtx_MEM (DImode, align);
4706 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4707 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4708 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4709
4710 alpha_pre_atomic_barrier (model);
4711
4712 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4713 emit_label (XEXP (label, 0));
4714
4715 emit_insn (gen_load_locked (DImode, scratch, mem));
4716
4717 width = GEN_INT (GET_MODE_BITSIZE (mode));
4718 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4719 emit_insn (gen_extxl (dest, scratch, width, addr));
4720 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4721 if (val != const0_rtx)
4722 emit_insn (gen_iordi3 (scratch, scratch, val));
4723
4724 emit_insn (gen_store_conditional (DImode, scratch, mem, scratch));
4725
4726 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4727 emit_unlikely_jump (x, label);
4728
4729 alpha_post_atomic_barrier (model);
4730 }
4731 \f
4732 /* Adjust the cost of a scheduling dependency. Return the new cost of
4733 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4734
4735 static int
4736 alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4737 unsigned int)
4738 {
4739 enum attr_type dep_insn_type;
4740
4741 /* If the dependence is an anti-dependence, there is no cost. For an
4742 output dependence, there is sometimes a cost, but it doesn't seem
4743 worth handling those few cases. */
4744 if (dep_type != 0)
4745 return cost;
4746
4747 /* If we can't recognize the insns, we can't really do anything. */
4748 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4749 return cost;
4750
4751 dep_insn_type = get_attr_type (dep_insn);
4752
4753 /* Bring in the user-defined memory latency. */
4754 if (dep_insn_type == TYPE_ILD
4755 || dep_insn_type == TYPE_FLD
4756 || dep_insn_type == TYPE_LDSYM)
4757 cost += alpha_memory_latency-1;
4758
4759 /* Everything else handled in DFA bypasses now. */
4760
4761 return cost;
4762 }
4763
4764 /* The number of instructions that can be issued per cycle. */
4765
4766 static int
4767 alpha_issue_rate (void)
4768 {
4769 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4770 }
4771
4772 /* How many alternative schedules to try. This should be as wide as the
4773 scheduling freedom in the DFA, but no wider. Making this value too
4774 large results extra work for the scheduler.
4775
4776 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4777 alternative schedules. For EV5, we can choose between E0/E1 and
4778 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4779
4780 static int
4781 alpha_multipass_dfa_lookahead (void)
4782 {
4783 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4784 }
4785 \f
4786 /* Machine-specific function data. */
4787
4788 struct GTY(()) alpha_links;
4789
4790 struct GTY(()) machine_function
4791 {
4792 unsigned HOST_WIDE_INT sa_mask;
4793 HOST_WIDE_INT sa_size;
4794 HOST_WIDE_INT frame_size;
4795
4796 /* For flag_reorder_blocks_and_partition. */
4797 rtx gp_save_rtx;
4798
4799 /* For VMS condition handlers. */
4800 bool uses_condition_handler;
4801
4802 /* Linkage entries. */
4803 hash_map<nofree_string_hash, alpha_links *> *links;
4804 };
4805
4806 /* How to allocate a 'struct machine_function'. */
4807
4808 static struct machine_function *
4809 alpha_init_machine_status (void)
4810 {
4811 return ggc_cleared_alloc<machine_function> ();
4812 }
4813
4814 /* Support for frame based VMS condition handlers. */
4815
4816 /* A VMS condition handler may be established for a function with a call to
4817 __builtin_establish_vms_condition_handler, and cancelled with a call to
4818 __builtin_revert_vms_condition_handler.
4819
4820 The VMS Condition Handling Facility knows about the existence of a handler
4821 from the procedure descriptor .handler field. As the VMS native compilers,
4822 we store the user specified handler's address at a fixed location in the
4823 stack frame and point the procedure descriptor at a common wrapper which
4824 fetches the real handler's address and issues an indirect call.
4825
4826 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4827
4828 We force the procedure kind to PT_STACK, and the fixed frame location is
4829 fp+8, just before the register save area. We use the handler_data field in
4830 the procedure descriptor to state the fp offset at which the installed
4831 handler address can be found. */
4832
4833 #define VMS_COND_HANDLER_FP_OFFSET 8
4834
4835 /* Expand code to store the currently installed user VMS condition handler
4836 into TARGET and install HANDLER as the new condition handler. */
4837
4838 void
4839 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4840 {
4841 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4842 VMS_COND_HANDLER_FP_OFFSET);
4843
4844 rtx handler_slot
4845 = gen_rtx_MEM (DImode, handler_slot_address);
4846
4847 emit_move_insn (target, handler_slot);
4848 emit_move_insn (handler_slot, handler);
4849
4850 /* Notify the start/prologue/epilogue emitters that the condition handler
4851 slot is needed. In addition to reserving the slot space, this will force
4852 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4853 use above is correct. */
4854 cfun->machine->uses_condition_handler = true;
4855 }
4856
4857 /* Expand code to store the current VMS condition handler into TARGET and
4858 nullify it. */
4859
4860 void
4861 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4862 {
4863 /* We implement this by establishing a null condition handler, with the tiny
4864 side effect of setting uses_condition_handler. This is a little bit
4865 pessimistic if no actual builtin_establish call is ever issued, which is
4866 not a real problem and expected never to happen anyway. */
4867
4868 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4869 }
4870
4871 /* Functions to save and restore alpha_return_addr_rtx. */
4872
4873 /* Start the ball rolling with RETURN_ADDR_RTX. */
4874
4875 rtx
4876 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4877 {
4878 if (count != 0)
4879 return const0_rtx;
4880
4881 return get_hard_reg_initial_val (Pmode, REG_RA);
4882 }
4883
4884 /* Return or create a memory slot containing the gp value for the current
4885 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4886
4887 rtx
4888 alpha_gp_save_rtx (void)
4889 {
4890 rtx_insn *seq;
4891 rtx m = cfun->machine->gp_save_rtx;
4892
4893 if (m == NULL)
4894 {
4895 start_sequence ();
4896
4897 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4898 m = validize_mem (m);
4899 emit_move_insn (m, pic_offset_table_rtx);
4900
4901 seq = get_insns ();
4902 end_sequence ();
4903
4904 /* We used to simply emit the sequence after entry_of_function.
4905 However this breaks the CFG if the first instruction in the
4906 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4907 label. Emit the sequence properly on the edge. We are only
4908 invoked from dw2_build_landing_pads and finish_eh_generation
4909 will call commit_edge_insertions thanks to a kludge. */
4910 insert_insn_on_edge (seq,
4911 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4912
4913 cfun->machine->gp_save_rtx = m;
4914 }
4915
4916 return m;
4917 }
4918
4919 static void
4920 alpha_instantiate_decls (void)
4921 {
4922 if (cfun->machine->gp_save_rtx != NULL_RTX)
4923 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4924 }
4925
4926 static int
4927 alpha_ra_ever_killed (void)
4928 {
4929 rtx_insn *top;
4930
4931 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4932 return (int)df_regs_ever_live_p (REG_RA);
4933
4934 push_topmost_sequence ();
4935 top = get_insns ();
4936 pop_topmost_sequence ();
4937
4938 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
4939 }
4940
4941 \f
4942 /* Return the trap mode suffix applicable to the current
4943 instruction, or NULL. */
4944
4945 static const char *
4946 get_trap_mode_suffix (void)
4947 {
4948 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4949
4950 switch (s)
4951 {
4952 case TRAP_SUFFIX_NONE:
4953 return NULL;
4954
4955 case TRAP_SUFFIX_SU:
4956 if (alpha_fptm >= ALPHA_FPTM_SU)
4957 return "su";
4958 return NULL;
4959
4960 case TRAP_SUFFIX_SUI:
4961 if (alpha_fptm >= ALPHA_FPTM_SUI)
4962 return "sui";
4963 return NULL;
4964
4965 case TRAP_SUFFIX_V_SV:
4966 switch (alpha_fptm)
4967 {
4968 case ALPHA_FPTM_N:
4969 return NULL;
4970 case ALPHA_FPTM_U:
4971 return "v";
4972 case ALPHA_FPTM_SU:
4973 case ALPHA_FPTM_SUI:
4974 return "sv";
4975 default:
4976 gcc_unreachable ();
4977 }
4978
4979 case TRAP_SUFFIX_V_SV_SVI:
4980 switch (alpha_fptm)
4981 {
4982 case ALPHA_FPTM_N:
4983 return NULL;
4984 case ALPHA_FPTM_U:
4985 return "v";
4986 case ALPHA_FPTM_SU:
4987 return "sv";
4988 case ALPHA_FPTM_SUI:
4989 return "svi";
4990 default:
4991 gcc_unreachable ();
4992 }
4993 break;
4994
4995 case TRAP_SUFFIX_U_SU_SUI:
4996 switch (alpha_fptm)
4997 {
4998 case ALPHA_FPTM_N:
4999 return NULL;
5000 case ALPHA_FPTM_U:
5001 return "u";
5002 case ALPHA_FPTM_SU:
5003 return "su";
5004 case ALPHA_FPTM_SUI:
5005 return "sui";
5006 default:
5007 gcc_unreachable ();
5008 }
5009 break;
5010
5011 default:
5012 gcc_unreachable ();
5013 }
5014 gcc_unreachable ();
5015 }
5016
5017 /* Return the rounding mode suffix applicable to the current
5018 instruction, or NULL. */
5019
5020 static const char *
5021 get_round_mode_suffix (void)
5022 {
5023 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5024
5025 switch (s)
5026 {
5027 case ROUND_SUFFIX_NONE:
5028 return NULL;
5029 case ROUND_SUFFIX_NORMAL:
5030 switch (alpha_fprm)
5031 {
5032 case ALPHA_FPRM_NORM:
5033 return NULL;
5034 case ALPHA_FPRM_MINF:
5035 return "m";
5036 case ALPHA_FPRM_CHOP:
5037 return "c";
5038 case ALPHA_FPRM_DYN:
5039 return "d";
5040 default:
5041 gcc_unreachable ();
5042 }
5043 break;
5044
5045 case ROUND_SUFFIX_C:
5046 return "c";
5047
5048 default:
5049 gcc_unreachable ();
5050 }
5051 gcc_unreachable ();
5052 }
5053
5054 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5055
5056 static bool
5057 alpha_print_operand_punct_valid_p (unsigned char code)
5058 {
5059 return (code == '/' || code == ',' || code == '-' || code == '~'
5060 || code == '#' || code == '*' || code == '&');
5061 }
5062
5063 /* Implement TARGET_PRINT_OPERAND. The alpha-specific
5064 operand codes are documented below. */
5065
5066 static void
5067 alpha_print_operand (FILE *file, rtx x, int code)
5068 {
5069 int i;
5070
5071 switch (code)
5072 {
5073 case '~':
5074 /* Print the assembler name of the current function. */
5075 assemble_name (file, alpha_fnname);
5076 break;
5077
5078 case '&':
5079 if (const char *name = get_some_local_dynamic_name ())
5080 assemble_name (file, name);
5081 else
5082 output_operand_lossage ("'%%&' used without any "
5083 "local dynamic TLS references");
5084 break;
5085
5086 case '/':
5087 /* Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX
5088 attributes are examined to determine what is appropriate. */
5089 {
5090 const char *trap = get_trap_mode_suffix ();
5091 const char *round = get_round_mode_suffix ();
5092
5093 if (trap || round)
5094 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5095 break;
5096 }
5097
5098 case ',':
5099 /* Generates single precision suffix for floating point
5100 instructions (s for IEEE, f for VAX). */
5101 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5102 break;
5103
5104 case '-':
5105 /* Generates double precision suffix for floating point
5106 instructions (t for IEEE, g for VAX). */
5107 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5108 break;
5109
5110 case '#':
5111 if (alpha_this_literal_sequence_number == 0)
5112 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5113 fprintf (file, "%d", alpha_this_literal_sequence_number);
5114 break;
5115
5116 case '*':
5117 if (alpha_this_gpdisp_sequence_number == 0)
5118 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5119 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5120 break;
5121
5122 case 'J':
5123 {
5124 const char *lituse;
5125
5126 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5127 {
5128 x = XVECEXP (x, 0, 0);
5129 lituse = "lituse_tlsgd";
5130 }
5131 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5132 {
5133 x = XVECEXP (x, 0, 0);
5134 lituse = "lituse_tlsldm";
5135 }
5136 else if (CONST_INT_P (x))
5137 lituse = "lituse_jsr";
5138 else
5139 {
5140 output_operand_lossage ("invalid %%J value");
5141 break;
5142 }
5143
5144 if (x != const0_rtx)
5145 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5146 }
5147 break;
5148
5149 case 'j':
5150 {
5151 const char *lituse;
5152
5153 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5154 lituse = "lituse_jsrdirect";
5155 #else
5156 lituse = "lituse_jsr";
5157 #endif
5158
5159 gcc_assert (INTVAL (x) != 0);
5160 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5161 }
5162 break;
5163 case 'r':
5164 /* If this operand is the constant zero, write it as "$31". */
5165 if (REG_P (x))
5166 fprintf (file, "%s", reg_names[REGNO (x)]);
5167 else if (x == CONST0_RTX (GET_MODE (x)))
5168 fprintf (file, "$31");
5169 else
5170 output_operand_lossage ("invalid %%r value");
5171 break;
5172
5173 case 'R':
5174 /* Similar, but for floating-point. */
5175 if (REG_P (x))
5176 fprintf (file, "%s", reg_names[REGNO (x)]);
5177 else if (x == CONST0_RTX (GET_MODE (x)))
5178 fprintf (file, "$f31");
5179 else
5180 output_operand_lossage ("invalid %%R value");
5181 break;
5182
5183 case 'N':
5184 /* Write the 1's complement of a constant. */
5185 if (!CONST_INT_P (x))
5186 output_operand_lossage ("invalid %%N value");
5187
5188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5189 break;
5190
5191 case 'P':
5192 /* Write 1 << C, for a constant C. */
5193 if (!CONST_INT_P (x))
5194 output_operand_lossage ("invalid %%P value");
5195
5196 fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
5197 break;
5198
5199 case 'h':
5200 /* Write the high-order 16 bits of a constant, sign-extended. */
5201 if (!CONST_INT_P (x))
5202 output_operand_lossage ("invalid %%h value");
5203
5204 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5205 break;
5206
5207 case 'L':
5208 /* Write the low-order 16 bits of a constant, sign-extended. */
5209 if (!CONST_INT_P (x))
5210 output_operand_lossage ("invalid %%L value");
5211
5212 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5213 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5214 break;
5215
5216 case 'm':
5217 /* Write mask for ZAP insn. */
5218 if (CONST_INT_P (x))
5219 {
5220 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5221
5222 for (i = 0; i < 8; i++, value >>= 8)
5223 if (value & 0xff)
5224 mask |= (1 << i);
5225
5226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5227 }
5228 else
5229 output_operand_lossage ("invalid %%m value");
5230 break;
5231
5232 case 'M':
5233 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5234 if (!mode_width_operand (x, VOIDmode))
5235 output_operand_lossage ("invalid %%M value");
5236
5237 fprintf (file, "%s",
5238 (INTVAL (x) == 8 ? "b"
5239 : INTVAL (x) == 16 ? "w"
5240 : INTVAL (x) == 32 ? "l"
5241 : "q"));
5242 break;
5243
5244 case 'U':
5245 /* Similar, except do it from the mask. */
5246 if (CONST_INT_P (x))
5247 {
5248 HOST_WIDE_INT value = INTVAL (x);
5249
5250 if (value == 0xff)
5251 {
5252 fputc ('b', file);
5253 break;
5254 }
5255 if (value == 0xffff)
5256 {
5257 fputc ('w', file);
5258 break;
5259 }
5260 if (value == 0xffffffff)
5261 {
5262 fputc ('l', file);
5263 break;
5264 }
5265 if (value == -1)
5266 {
5267 fputc ('q', file);
5268 break;
5269 }
5270 }
5271
5272 output_operand_lossage ("invalid %%U value");
5273 break;
5274
5275 case 's':
5276 /* Write the constant value divided by 8. */
5277 if (!CONST_INT_P (x)
5278 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5279 || (INTVAL (x) & 7) != 0)
5280 output_operand_lossage ("invalid %%s value");
5281
5282 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5283 break;
5284
5285 case 'C': case 'D': case 'c': case 'd':
5286 /* Write out comparison name. */
5287 {
5288 enum rtx_code c = GET_CODE (x);
5289
5290 if (!COMPARISON_P (x))
5291 output_operand_lossage ("invalid %%C value");
5292
5293 else if (code == 'D')
5294 c = reverse_condition (c);
5295 else if (code == 'c')
5296 c = swap_condition (c);
5297 else if (code == 'd')
5298 c = swap_condition (reverse_condition (c));
5299
5300 if (c == LEU)
5301 fprintf (file, "ule");
5302 else if (c == LTU)
5303 fprintf (file, "ult");
5304 else if (c == UNORDERED)
5305 fprintf (file, "un");
5306 else
5307 fprintf (file, "%s", GET_RTX_NAME (c));
5308 }
5309 break;
5310
5311 case 'E':
5312 /* Write the divide or modulus operator. */
5313 switch (GET_CODE (x))
5314 {
5315 case DIV:
5316 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5317 break;
5318 case UDIV:
5319 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5320 break;
5321 case MOD:
5322 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5323 break;
5324 case UMOD:
5325 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5326 break;
5327 default:
5328 output_operand_lossage ("invalid %%E value");
5329 break;
5330 }
5331 break;
5332
5333 case 'A':
5334 /* Write "_u" for unaligned access. */
5335 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5336 fprintf (file, "_u");
5337 break;
5338
5339 case 0:
5340 if (REG_P (x))
5341 fprintf (file, "%s", reg_names[REGNO (x)]);
5342 else if (MEM_P (x))
5343 output_address (GET_MODE (x), XEXP (x, 0));
5344 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5345 {
5346 switch (XINT (XEXP (x, 0), 1))
5347 {
5348 case UNSPEC_DTPREL:
5349 case UNSPEC_TPREL:
5350 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5351 break;
5352 default:
5353 output_operand_lossage ("unknown relocation unspec");
5354 break;
5355 }
5356 }
5357 else
5358 output_addr_const (file, x);
5359 break;
5360
5361 default:
5362 output_operand_lossage ("invalid %%xn code");
5363 }
5364 }
5365
5366 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
5367
5368 static void
5369 alpha_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
5370 {
5371 int basereg = 31;
5372 HOST_WIDE_INT offset = 0;
5373
5374 if (GET_CODE (addr) == AND)
5375 addr = XEXP (addr, 0);
5376
5377 if (GET_CODE (addr) == PLUS
5378 && CONST_INT_P (XEXP (addr, 1)))
5379 {
5380 offset = INTVAL (XEXP (addr, 1));
5381 addr = XEXP (addr, 0);
5382 }
5383
5384 if (GET_CODE (addr) == LO_SUM)
5385 {
5386 const char *reloc16, *reloclo;
5387 rtx op1 = XEXP (addr, 1);
5388
5389 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5390 {
5391 op1 = XEXP (op1, 0);
5392 switch (XINT (op1, 1))
5393 {
5394 case UNSPEC_DTPREL:
5395 reloc16 = NULL;
5396 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5397 break;
5398 case UNSPEC_TPREL:
5399 reloc16 = NULL;
5400 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5401 break;
5402 default:
5403 output_operand_lossage ("unknown relocation unspec");
5404 return;
5405 }
5406
5407 output_addr_const (file, XVECEXP (op1, 0, 0));
5408 }
5409 else
5410 {
5411 reloc16 = "gprel";
5412 reloclo = "gprellow";
5413 output_addr_const (file, op1);
5414 }
5415
5416 if (offset)
5417 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5418
5419 addr = XEXP (addr, 0);
5420 switch (GET_CODE (addr))
5421 {
5422 case REG:
5423 basereg = REGNO (addr);
5424 break;
5425
5426 case SUBREG:
5427 basereg = subreg_regno (addr);
5428 break;
5429
5430 default:
5431 gcc_unreachable ();
5432 }
5433
5434 fprintf (file, "($%d)\t\t!%s", basereg,
5435 (basereg == 29 ? reloc16 : reloclo));
5436 return;
5437 }
5438
5439 switch (GET_CODE (addr))
5440 {
5441 case REG:
5442 basereg = REGNO (addr);
5443 break;
5444
5445 case SUBREG:
5446 basereg = subreg_regno (addr);
5447 break;
5448
5449 case CONST_INT:
5450 offset = INTVAL (addr);
5451 break;
5452
5453 case SYMBOL_REF:
5454 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5455 fprintf (file, "%s", XSTR (addr, 0));
5456 return;
5457
5458 case CONST:
5459 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5460 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5461 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5462 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5463 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5464 INTVAL (XEXP (XEXP (addr, 0), 1)));
5465 return;
5466
5467 default:
5468 output_operand_lossage ("invalid operand address");
5469 return;
5470 }
5471
5472 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5473 }
5474 \f
5475 /* Emit RTL insns to initialize the variable parts of a trampoline at
5476 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5477 for the static chain value for the function. */
5478
5479 static void
5480 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5481 {
5482 rtx fnaddr, mem, word1, word2;
5483
5484 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5485
5486 #ifdef POINTERS_EXTEND_UNSIGNED
5487 fnaddr = convert_memory_address (Pmode, fnaddr);
5488 chain_value = convert_memory_address (Pmode, chain_value);
5489 #endif
5490
5491 if (TARGET_ABI_OPEN_VMS)
5492 {
5493 const char *fnname;
5494 char *trname;
5495
5496 /* Construct the name of the trampoline entry point. */
5497 fnname = XSTR (fnaddr, 0);
5498 trname = (char *) alloca (strlen (fnname) + 5);
5499 strcpy (trname, fnname);
5500 strcat (trname, "..tr");
5501 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5502 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5503
5504 /* Trampoline (or "bounded") procedure descriptor is constructed from
5505 the function's procedure descriptor with certain fields zeroed IAW
5506 the VMS calling standard. This is stored in the first quadword. */
5507 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5508 word1 = expand_and (DImode, word1,
5509 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5510 NULL);
5511 }
5512 else
5513 {
5514 /* These 4 instructions are:
5515 ldq $1,24($27)
5516 ldq $27,16($27)
5517 jmp $31,($27),0
5518 nop
5519 We don't bother setting the HINT field of the jump; the nop
5520 is merely there for padding. */
5521 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5522 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5523 }
5524
5525 /* Store the first two words, as computed above. */
5526 mem = adjust_address (m_tramp, DImode, 0);
5527 emit_move_insn (mem, word1);
5528 mem = adjust_address (m_tramp, DImode, 8);
5529 emit_move_insn (mem, word2);
5530
5531 /* Store function address and static chain value. */
5532 mem = adjust_address (m_tramp, Pmode, 16);
5533 emit_move_insn (mem, fnaddr);
5534 mem = adjust_address (m_tramp, Pmode, 24);
5535 emit_move_insn (mem, chain_value);
5536
5537 if (TARGET_ABI_OSF)
5538 {
5539 emit_insn (gen_imb ());
5540 #ifdef HAVE_ENABLE_EXECUTE_STACK
5541 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5542 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
5543 #endif
5544 }
5545 }
5546 \f
5547 /* Determine where to put an argument to a function.
5548 Value is zero to push the argument on the stack,
5549 or a hard register in which to store the argument.
5550
5551 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5552 the preceding args and about the function being called.
5553 ARG is a description of the argument.
5554
5555 On Alpha the first 6 words of args are normally in registers
5556 and the rest are pushed. */
5557
5558 static rtx
5559 alpha_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5560 {
5561 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5562 int basereg;
5563 int num_args;
5564
5565 /* Don't get confused and pass small structures in FP registers. */
5566 if (arg.aggregate_type_p ())
5567 basereg = 16;
5568 else
5569 {
5570 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5571 values here. */
5572 gcc_checking_assert (!COMPLEX_MODE_P (arg.mode));
5573
5574 /* Set up defaults for FP operands passed in FP registers, and
5575 integral operands passed in integer registers. */
5576 if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT)
5577 basereg = 32 + 16;
5578 else
5579 basereg = 16;
5580 }
5581
5582 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5583 the two platforms, so we can't avoid conditional compilation. */
5584 #if TARGET_ABI_OPEN_VMS
5585 {
5586 if (arg.end_marker_p ())
5587 return alpha_arg_info_reg_val (*cum);
5588
5589 num_args = cum->num_args;
5590 if (num_args >= 6
5591 || targetm.calls.must_pass_in_stack (arg))
5592 return NULL_RTX;
5593 }
5594 #elif TARGET_ABI_OSF
5595 {
5596 if (*cum >= 6)
5597 return NULL_RTX;
5598 num_args = *cum;
5599
5600 if (arg.end_marker_p ())
5601 basereg = 16;
5602 else if (targetm.calls.must_pass_in_stack (arg))
5603 return NULL_RTX;
5604 }
5605 #else
5606 #error Unhandled ABI
5607 #endif
5608
5609 return gen_rtx_REG (arg.mode, num_args + basereg);
5610 }
5611
5612 /* Update the data in CUM to advance over argument ARG. */
5613
5614 static void
5615 alpha_function_arg_advance (cumulative_args_t cum_v,
5616 const function_arg_info &arg)
5617 {
5618 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5619 bool onstack = targetm.calls.must_pass_in_stack (arg);
5620 int increment = onstack ? 6 : ALPHA_ARG_SIZE (arg.mode, arg.type);
5621
5622 #if TARGET_ABI_OSF
5623 *cum += increment;
5624 #else
5625 if (!onstack && cum->num_args < 6)
5626 cum->atypes[cum->num_args] = alpha_arg_type (arg.mode);
5627 cum->num_args += increment;
5628 #endif
5629 }
5630
5631 static int
5632 alpha_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
5633 {
5634 int words = 0;
5635 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5636
5637 #if TARGET_ABI_OPEN_VMS
5638 if (cum->num_args < 6
5639 && 6 < cum->num_args + ALPHA_ARG_SIZE (arg.mode, arg.type))
5640 words = 6 - cum->num_args;
5641 #elif TARGET_ABI_OSF
5642 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (arg.mode, arg.type))
5643 words = 6 - *cum;
5644 #else
5645 #error Unhandled ABI
5646 #endif
5647
5648 return words * UNITS_PER_WORD;
5649 }
5650
5651
5652 /* Return true if TYPE must be returned in memory, instead of in registers. */
5653
5654 static bool
5655 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5656 {
5657 machine_mode mode = VOIDmode;
5658 int size;
5659
5660 if (type)
5661 {
5662 mode = TYPE_MODE (type);
5663
5664 /* All aggregates are returned in memory, except on OpenVMS where
5665 records that fit 64 bits should be returned by immediate value
5666 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5667 if (TARGET_ABI_OPEN_VMS
5668 && TREE_CODE (type) != ARRAY_TYPE
5669 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5670 return false;
5671
5672 if (AGGREGATE_TYPE_P (type))
5673 return true;
5674 }
5675
5676 size = GET_MODE_SIZE (mode);
5677 switch (GET_MODE_CLASS (mode))
5678 {
5679 case MODE_VECTOR_FLOAT:
5680 /* Pass all float vectors in memory, like an aggregate. */
5681 return true;
5682
5683 case MODE_COMPLEX_FLOAT:
5684 /* We judge complex floats on the size of their element,
5685 not the size of the whole type. */
5686 size = GET_MODE_UNIT_SIZE (mode);
5687 break;
5688
5689 case MODE_INT:
5690 case MODE_FLOAT:
5691 case MODE_COMPLEX_INT:
5692 case MODE_VECTOR_INT:
5693 break;
5694
5695 default:
5696 /* ??? We get called on all sorts of random stuff from
5697 aggregate_value_p. We must return something, but it's not
5698 clear what's safe to return. Pretend it's a struct I
5699 guess. */
5700 return true;
5701 }
5702
5703 /* Otherwise types must fit in one register. */
5704 return size > UNITS_PER_WORD;
5705 }
5706
5707 /* Return true if ARG should be passed by invisible reference. */
5708
5709 static bool
5710 alpha_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
5711 {
5712 /* Pass float and _Complex float variable arguments by reference.
5713 This avoids 64-bit store from a FP register to a pretend args save area
5714 and subsequent 32-bit load from the saved location to a FP register.
5715
5716 Note that 32-bit loads and stores to/from a FP register on alpha reorder
5717 bits to form a canonical 64-bit value in the FP register. This fact
5718 invalidates compiler assumption that 32-bit FP value lives in the lower
5719 32-bits of the passed 64-bit FP value, so loading the 32-bit value from
5720 the stored 64-bit location using 32-bit FP load is invalid on alpha.
5721
5722 This introduces sort of ABI incompatibility, but until _Float32 was
5723 introduced, C-family languages promoted 32-bit float variable arg to
5724 a 64-bit double, and it was not allowed to pass float as a varible
5725 argument. Passing _Complex float as a variable argument never
5726 worked on alpha. Thus, we have no backward compatibility issues
5727 to worry about, and passing unpromoted _Float32 and _Complex float
5728 as a variable argument will actually work in the future. */
5729
5730 if (arg.mode == SFmode || arg.mode == SCmode)
5731 return !arg.named;
5732
5733 return arg.mode == TFmode || arg.mode == TCmode;
5734 }
5735
5736 /* Define how to find the value returned by a function. VALTYPE is the
5737 data type of the value (as a tree). If the precise function being
5738 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5739 MODE is set instead of VALTYPE for libcalls.
5740
5741 On Alpha the value is found in $0 for integer functions and
5742 $f0 for floating-point functions. */
5743
5744 static rtx
5745 alpha_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5746 machine_mode mode)
5747 {
5748 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5749 enum mode_class mclass;
5750
5751 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5752
5753 if (valtype)
5754 mode = TYPE_MODE (valtype);
5755
5756 mclass = GET_MODE_CLASS (mode);
5757 switch (mclass)
5758 {
5759 case MODE_INT:
5760 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5761 where we have them returning both SImode and DImode. */
5762 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5763 PROMOTE_MODE (mode, dummy, valtype);
5764 /* FALLTHRU */
5765
5766 case MODE_COMPLEX_INT:
5767 case MODE_VECTOR_INT:
5768 regnum = 0;
5769 break;
5770
5771 case MODE_FLOAT:
5772 regnum = 32;
5773 break;
5774
5775 case MODE_COMPLEX_FLOAT:
5776 {
5777 machine_mode cmode = GET_MODE_INNER (mode);
5778
5779 return gen_rtx_PARALLEL
5780 (VOIDmode,
5781 gen_rtvec (2,
5782 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5783 const0_rtx),
5784 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5785 GEN_INT (GET_MODE_SIZE (cmode)))));
5786 }
5787
5788 case MODE_RANDOM:
5789 /* We should only reach here for BLKmode on VMS. */
5790 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5791 regnum = 0;
5792 break;
5793
5794 default:
5795 gcc_unreachable ();
5796 }
5797
5798 return gen_rtx_REG (mode, regnum);
5799 }
5800
5801 /* Implement TARGET_FUNCTION_VALUE. */
5802
5803 static rtx
5804 alpha_function_value (const_tree valtype, const_tree fn_decl_or_type,
5805 bool /*outgoing*/)
5806 {
5807 return alpha_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
5808 }
5809
5810 /* Implement TARGET_LIBCALL_VALUE. */
5811
5812 static rtx
5813 alpha_libcall_value (machine_mode mode, const_rtx /*fun*/)
5814 {
5815 return alpha_function_value_1 (NULL_TREE, NULL_TREE, mode);
5816 }
5817
5818 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
5819
5820 On the Alpha, $0 $1 and $f0 $f1 are the only register thus used. */
5821
5822 static bool
5823 alpha_function_value_regno_p (const unsigned int regno)
5824 {
5825 return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
5826 }
5827
5828 /* TCmode complex values are passed by invisible reference. We
5829 should not split these values. */
5830
5831 static bool
5832 alpha_split_complex_arg (const_tree type)
5833 {
5834 return TYPE_MODE (type) != TCmode;
5835 }
5836
5837 static tree
5838 alpha_build_builtin_va_list (void)
5839 {
5840 tree base, ofs, space, record, type_decl;
5841
5842 if (TARGET_ABI_OPEN_VMS)
5843 return ptr_type_node;
5844
5845 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5846 type_decl = build_decl (BUILTINS_LOCATION,
5847 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5848 TYPE_STUB_DECL (record) = type_decl;
5849 TYPE_NAME (record) = type_decl;
5850
5851 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5852
5853 /* Dummy field to prevent alignment warnings. */
5854 space = build_decl (BUILTINS_LOCATION,
5855 FIELD_DECL, NULL_TREE, integer_type_node);
5856 DECL_FIELD_CONTEXT (space) = record;
5857 DECL_ARTIFICIAL (space) = 1;
5858 DECL_IGNORED_P (space) = 1;
5859
5860 ofs = build_decl (BUILTINS_LOCATION,
5861 FIELD_DECL, get_identifier ("__offset"),
5862 integer_type_node);
5863 DECL_FIELD_CONTEXT (ofs) = record;
5864 DECL_CHAIN (ofs) = space;
5865
5866 base = build_decl (BUILTINS_LOCATION,
5867 FIELD_DECL, get_identifier ("__base"),
5868 ptr_type_node);
5869 DECL_FIELD_CONTEXT (base) = record;
5870 DECL_CHAIN (base) = ofs;
5871
5872 TYPE_FIELDS (record) = base;
5873 layout_type (record);
5874
5875 va_list_gpr_counter_field = ofs;
5876 return record;
5877 }
5878
5879 #if TARGET_ABI_OSF
5880 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5881 and constant additions. */
5882
5883 static gimple *
5884 va_list_skip_additions (tree lhs)
5885 {
5886 gimple *stmt;
5887
5888 for (;;)
5889 {
5890 enum tree_code code;
5891
5892 stmt = SSA_NAME_DEF_STMT (lhs);
5893
5894 if (gimple_code (stmt) == GIMPLE_PHI)
5895 return stmt;
5896
5897 if (!is_gimple_assign (stmt)
5898 || gimple_assign_lhs (stmt) != lhs)
5899 return NULL;
5900
5901 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5902 return stmt;
5903 code = gimple_assign_rhs_code (stmt);
5904 if (!CONVERT_EXPR_CODE_P (code)
5905 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5906 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5907 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5908 return stmt;
5909
5910 lhs = gimple_assign_rhs1 (stmt);
5911 }
5912 }
5913
5914 /* Check if LHS = RHS statement is
5915 LHS = *(ap.__base + ap.__offset + cst)
5916 or
5917 LHS = *(ap.__base
5918 + ((ap.__offset + cst <= 47)
5919 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5920 If the former, indicate that GPR registers are needed,
5921 if the latter, indicate that FPR registers are needed.
5922
5923 Also look for LHS = (*ptr).field, where ptr is one of the forms
5924 listed above.
5925
5926 On alpha, cfun->va_list_gpr_size is used as size of the needed
5927 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5928 registers are needed and bit 1 set if FPR registers are needed.
5929 Return true if va_list references should not be scanned for the
5930 current statement. */
5931
5932 static bool
5933 alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
5934 {
5935 tree base, offset, rhs;
5936 int offset_arg = 1;
5937 gimple *base_stmt;
5938
5939 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5940 != GIMPLE_SINGLE_RHS)
5941 return false;
5942
5943 rhs = gimple_assign_rhs1 (stmt);
5944 while (handled_component_p (rhs))
5945 rhs = TREE_OPERAND (rhs, 0);
5946 if (TREE_CODE (rhs) != MEM_REF
5947 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5948 return false;
5949
5950 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5951 if (stmt == NULL
5952 || !is_gimple_assign (stmt)
5953 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5954 return false;
5955
5956 base = gimple_assign_rhs1 (stmt);
5957 if (TREE_CODE (base) == SSA_NAME)
5958 {
5959 base_stmt = va_list_skip_additions (base);
5960 if (base_stmt
5961 && is_gimple_assign (base_stmt)
5962 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5963 base = gimple_assign_rhs1 (base_stmt);
5964 }
5965
5966 if (TREE_CODE (base) != COMPONENT_REF
5967 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5968 {
5969 base = gimple_assign_rhs2 (stmt);
5970 if (TREE_CODE (base) == SSA_NAME)
5971 {
5972 base_stmt = va_list_skip_additions (base);
5973 if (base_stmt
5974 && is_gimple_assign (base_stmt)
5975 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5976 base = gimple_assign_rhs1 (base_stmt);
5977 }
5978
5979 if (TREE_CODE (base) != COMPONENT_REF
5980 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5981 return false;
5982
5983 offset_arg = 0;
5984 }
5985
5986 base = get_base_address (base);
5987 if (TREE_CODE (base) != VAR_DECL
5988 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
5989 return false;
5990
5991 offset = gimple_op (stmt, 1 + offset_arg);
5992 if (TREE_CODE (offset) == SSA_NAME)
5993 {
5994 gimple *offset_stmt = va_list_skip_additions (offset);
5995
5996 if (offset_stmt
5997 && gimple_code (offset_stmt) == GIMPLE_PHI)
5998 {
5999 HOST_WIDE_INT sub;
6000 gimple *arg1_stmt, *arg2_stmt;
6001 tree arg1, arg2;
6002 enum tree_code code1, code2;
6003
6004 if (gimple_phi_num_args (offset_stmt) != 2)
6005 goto escapes;
6006
6007 arg1_stmt
6008 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6009 arg2_stmt
6010 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6011 if (arg1_stmt == NULL
6012 || !is_gimple_assign (arg1_stmt)
6013 || arg2_stmt == NULL
6014 || !is_gimple_assign (arg2_stmt))
6015 goto escapes;
6016
6017 code1 = gimple_assign_rhs_code (arg1_stmt);
6018 code2 = gimple_assign_rhs_code (arg2_stmt);
6019 if (code1 == COMPONENT_REF
6020 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6021 /* Do nothing. */;
6022 else if (code2 == COMPONENT_REF
6023 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6024 {
6025 std::swap (arg1_stmt, arg2_stmt);
6026 code2 = code1;
6027 }
6028 else
6029 goto escapes;
6030
6031 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6032 goto escapes;
6033
6034 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6035 if (code2 == MINUS_EXPR)
6036 sub = -sub;
6037 if (sub < -48 || sub > -32)
6038 goto escapes;
6039
6040 arg1 = gimple_assign_rhs1 (arg1_stmt);
6041 arg2 = gimple_assign_rhs1 (arg2_stmt);
6042 if (TREE_CODE (arg2) == SSA_NAME)
6043 {
6044 arg2_stmt = va_list_skip_additions (arg2);
6045 if (arg2_stmt == NULL
6046 || !is_gimple_assign (arg2_stmt)
6047 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6048 goto escapes;
6049 arg2 = gimple_assign_rhs1 (arg2_stmt);
6050 }
6051 if (arg1 != arg2)
6052 goto escapes;
6053
6054 if (TREE_CODE (arg1) != COMPONENT_REF
6055 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6056 || get_base_address (arg1) != base)
6057 goto escapes;
6058
6059 /* Need floating point regs. */
6060 cfun->va_list_fpr_size |= 2;
6061 return false;
6062 }
6063 if (offset_stmt
6064 && is_gimple_assign (offset_stmt)
6065 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6066 offset = gimple_assign_rhs1 (offset_stmt);
6067 }
6068 if (TREE_CODE (offset) != COMPONENT_REF
6069 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6070 || get_base_address (offset) != base)
6071 goto escapes;
6072 else
6073 /* Need general regs. */
6074 cfun->va_list_fpr_size |= 1;
6075 return false;
6076
6077 escapes:
6078 si->va_list_escapes = true;
6079 return false;
6080 }
6081 #endif
6082
6083 /* Perform any needed actions needed for a function that is receiving a
6084 variable number of arguments. */
6085
6086 static void
6087 alpha_setup_incoming_varargs (cumulative_args_t pcum,
6088 const function_arg_info &arg,
6089 int *pretend_size, int no_rtl)
6090 {
6091 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6092
6093 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
6094 /* Skip the current argument. */
6095 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg);
6096
6097 #if TARGET_ABI_OPEN_VMS
6098 /* For VMS, we allocate space for all 6 arg registers plus a count.
6099
6100 However, if NO registers need to be saved, don't allocate any space.
6101 This is not only because we won't need the space, but because AP
6102 includes the current_pretend_args_size and we don't want to mess up
6103 any ap-relative addresses already made. */
6104 if (cum.num_args < 6)
6105 {
6106 if (!no_rtl)
6107 {
6108 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6109 emit_insn (gen_arg_home ());
6110 }
6111 *pretend_size = 7 * UNITS_PER_WORD;
6112 }
6113 #else
6114 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6115 only push those that are remaining. However, if NO registers need to
6116 be saved, don't allocate any space. This is not only because we won't
6117 need the space, but because AP includes the current_pretend_args_size
6118 and we don't want to mess up any ap-relative addresses already made.
6119
6120 If we are not to use the floating-point registers, save the integer
6121 registers where we would put the floating-point registers. This is
6122 not the most efficient way to implement varargs with just one register
6123 class, but it isn't worth doing anything more efficient in this rare
6124 case. */
6125 if (cum >= 6)
6126 return;
6127
6128 if (!no_rtl)
6129 {
6130 int count;
6131 alias_set_type set = get_varargs_alias_set ();
6132 rtx tmp;
6133
6134 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6135 if (count > 6 - cum)
6136 count = 6 - cum;
6137
6138 /* Detect whether integer registers or floating-point registers
6139 are needed by the detected va_arg statements. See above for
6140 how these values are computed. Note that the "escape" value
6141 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6142 these bits set. */
6143 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6144
6145 if (cfun->va_list_fpr_size & 1)
6146 {
6147 tmp = gen_rtx_MEM (BLKmode,
6148 plus_constant (Pmode, virtual_incoming_args_rtx,
6149 (cum + 6) * UNITS_PER_WORD));
6150 MEM_NOTRAP_P (tmp) = 1;
6151 set_mem_alias_set (tmp, set);
6152 move_block_from_reg (16 + cum, tmp, count);
6153 }
6154
6155 if (cfun->va_list_fpr_size & 2)
6156 {
6157 tmp = gen_rtx_MEM (BLKmode,
6158 plus_constant (Pmode, virtual_incoming_args_rtx,
6159 cum * UNITS_PER_WORD));
6160 MEM_NOTRAP_P (tmp) = 1;
6161 set_mem_alias_set (tmp, set);
6162 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6163 }
6164 }
6165 *pretend_size = 12 * UNITS_PER_WORD;
6166 #endif
6167 }
6168
6169 static void
6170 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6171 {
6172 HOST_WIDE_INT offset;
6173 tree t, offset_field, base_field;
6174
6175 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6176 return;
6177
6178 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6179 up by 48, storing fp arg registers in the first 48 bytes, and the
6180 integer arg registers in the next 48 bytes. This is only done,
6181 however, if any integer registers need to be stored.
6182
6183 If no integer registers need be stored, then we must subtract 48
6184 in order to account for the integer arg registers which are counted
6185 in argsize above, but which are not actually stored on the stack.
6186 Must further be careful here about structures straddling the last
6187 integer argument register; that futzes with pretend_args_size,
6188 which changes the meaning of AP. */
6189
6190 if (NUM_ARGS < 6)
6191 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6192 else
6193 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6194
6195 if (TARGET_ABI_OPEN_VMS)
6196 {
6197 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6198 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6199 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6200 TREE_SIDE_EFFECTS (t) = 1;
6201 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6202 }
6203 else
6204 {
6205 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6206 offset_field = DECL_CHAIN (base_field);
6207
6208 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6209 valist, base_field, NULL_TREE);
6210 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6211 valist, offset_field, NULL_TREE);
6212
6213 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6214 t = fold_build_pointer_plus_hwi (t, offset);
6215 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6216 TREE_SIDE_EFFECTS (t) = 1;
6217 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6218
6219 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6220 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6221 TREE_SIDE_EFFECTS (t) = 1;
6222 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6223 }
6224 }
6225
6226 static tree
6227 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6228 gimple_seq *pre_p)
6229 {
6230 tree type_size, ptr_type, addend, t, addr;
6231 gimple_seq internal_post;
6232
6233 /* If the type could not be passed in registers, skip the block
6234 reserved for the registers. */
6235 if (must_pass_va_arg_in_stack (type))
6236 {
6237 t = build_int_cst (TREE_TYPE (offset), 6*8);
6238 gimplify_assign (offset,
6239 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6240 pre_p);
6241 }
6242
6243 addend = offset;
6244 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6245
6246 if (TREE_CODE (type) == COMPLEX_TYPE)
6247 {
6248 tree real_part, imag_part, real_temp;
6249
6250 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6251 offset, pre_p);
6252
6253 /* Copy the value into a new temporary, lest the formal temporary
6254 be reused out from under us. */
6255 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6256
6257 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6258 offset, pre_p);
6259
6260 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6261 }
6262 else if (SCALAR_FLOAT_TYPE_P (type))
6263 {
6264 tree fpaddend, cond, fourtyeight;
6265
6266 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6267 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6268 addend, fourtyeight);
6269 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6270 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6271 fpaddend, addend);
6272 }
6273
6274 /* Build the final address and force that value into a temporary. */
6275 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6276 internal_post = NULL;
6277 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6278 gimple_seq_add_seq (pre_p, internal_post);
6279
6280 /* Update the offset field. */
6281 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6282 if (type_size == NULL || TREE_OVERFLOW (type_size))
6283 t = size_zero_node;
6284 else
6285 {
6286 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6287 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6288 t = size_binop (MULT_EXPR, t, size_int (8));
6289 }
6290 t = fold_convert (TREE_TYPE (offset), t);
6291 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6292 pre_p);
6293
6294 return build_va_arg_indirect_ref (addr);
6295 }
6296
6297 static tree
6298 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6299 gimple_seq *post_p)
6300 {
6301 tree offset_field, base_field, offset, base, t, r;
6302 bool indirect;
6303
6304 if (TARGET_ABI_OPEN_VMS)
6305 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6306
6307 base_field = TYPE_FIELDS (va_list_type_node);
6308 offset_field = DECL_CHAIN (base_field);
6309 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6310 valist, base_field, NULL_TREE);
6311 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6312 valist, offset_field, NULL_TREE);
6313
6314 /* Pull the fields of the structure out into temporaries. Since we never
6315 modify the base field, we can use a formal temporary. Sign-extend the
6316 offset field so that it's the proper width for pointer arithmetic. */
6317 base = get_formal_tmp_var (base_field, pre_p);
6318
6319 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6320 offset = get_initialized_tmp_var (t, pre_p, NULL);
6321
6322 indirect = pass_va_arg_by_reference (type);
6323
6324 if (indirect)
6325 {
6326 if (TREE_CODE (type) == COMPLEX_TYPE
6327 && targetm.calls.split_complex_arg (type))
6328 {
6329 tree real_part, imag_part, real_temp;
6330
6331 tree ptr_type = build_pointer_type_for_mode (TREE_TYPE (type),
6332 ptr_mode, true);
6333
6334 real_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6335 offset, pre_p);
6336 real_part = build_va_arg_indirect_ref (real_part);
6337
6338 /* Copy the value into a new temporary, lest the formal temporary
6339 be reused out from under us. */
6340 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6341
6342 imag_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6343 offset, pre_p);
6344 imag_part = build_va_arg_indirect_ref (imag_part);
6345
6346 r = build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6347
6348 /* Stuff the offset temporary back into its field. */
6349 gimplify_assign (unshare_expr (offset_field),
6350 fold_convert (TREE_TYPE (offset_field), offset),
6351 pre_p);
6352 return r;
6353 }
6354 else
6355 type = build_pointer_type_for_mode (type, ptr_mode, true);
6356 }
6357
6358 /* Find the value. Note that this will be a stable indirection, or
6359 a composite of stable indirections in the case of complex. */
6360 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6361
6362 /* Stuff the offset temporary back into its field. */
6363 gimplify_assign (unshare_expr (offset_field),
6364 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6365
6366 if (indirect)
6367 r = build_va_arg_indirect_ref (r);
6368
6369 return r;
6370 }
6371 \f
6372 /* Builtins. */
6373
6374 enum alpha_builtin
6375 {
6376 ALPHA_BUILTIN_CMPBGE,
6377 ALPHA_BUILTIN_EXTBL,
6378 ALPHA_BUILTIN_EXTWL,
6379 ALPHA_BUILTIN_EXTLL,
6380 ALPHA_BUILTIN_EXTQL,
6381 ALPHA_BUILTIN_EXTWH,
6382 ALPHA_BUILTIN_EXTLH,
6383 ALPHA_BUILTIN_EXTQH,
6384 ALPHA_BUILTIN_INSBL,
6385 ALPHA_BUILTIN_INSWL,
6386 ALPHA_BUILTIN_INSLL,
6387 ALPHA_BUILTIN_INSQL,
6388 ALPHA_BUILTIN_INSWH,
6389 ALPHA_BUILTIN_INSLH,
6390 ALPHA_BUILTIN_INSQH,
6391 ALPHA_BUILTIN_MSKBL,
6392 ALPHA_BUILTIN_MSKWL,
6393 ALPHA_BUILTIN_MSKLL,
6394 ALPHA_BUILTIN_MSKQL,
6395 ALPHA_BUILTIN_MSKWH,
6396 ALPHA_BUILTIN_MSKLH,
6397 ALPHA_BUILTIN_MSKQH,
6398 ALPHA_BUILTIN_UMULH,
6399 ALPHA_BUILTIN_ZAP,
6400 ALPHA_BUILTIN_ZAPNOT,
6401 ALPHA_BUILTIN_AMASK,
6402 ALPHA_BUILTIN_IMPLVER,
6403 ALPHA_BUILTIN_RPCC,
6404 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6405 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6406
6407 /* TARGET_MAX */
6408 ALPHA_BUILTIN_MINUB8,
6409 ALPHA_BUILTIN_MINSB8,
6410 ALPHA_BUILTIN_MINUW4,
6411 ALPHA_BUILTIN_MINSW4,
6412 ALPHA_BUILTIN_MAXUB8,
6413 ALPHA_BUILTIN_MAXSB8,
6414 ALPHA_BUILTIN_MAXUW4,
6415 ALPHA_BUILTIN_MAXSW4,
6416 ALPHA_BUILTIN_PERR,
6417 ALPHA_BUILTIN_PKLB,
6418 ALPHA_BUILTIN_PKWB,
6419 ALPHA_BUILTIN_UNPKBL,
6420 ALPHA_BUILTIN_UNPKBW,
6421
6422 /* TARGET_CIX */
6423 ALPHA_BUILTIN_CTTZ,
6424 ALPHA_BUILTIN_CTLZ,
6425 ALPHA_BUILTIN_CTPOP,
6426
6427 ALPHA_BUILTIN_max
6428 };
6429
6430 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6431 CODE_FOR_builtin_cmpbge,
6432 CODE_FOR_extbl,
6433 CODE_FOR_extwl,
6434 CODE_FOR_extll,
6435 CODE_FOR_extql,
6436 CODE_FOR_extwh,
6437 CODE_FOR_extlh,
6438 CODE_FOR_extqh,
6439 CODE_FOR_builtin_insbl,
6440 CODE_FOR_builtin_inswl,
6441 CODE_FOR_builtin_insll,
6442 CODE_FOR_insql,
6443 CODE_FOR_inswh,
6444 CODE_FOR_inslh,
6445 CODE_FOR_insqh,
6446 CODE_FOR_mskbl,
6447 CODE_FOR_mskwl,
6448 CODE_FOR_mskll,
6449 CODE_FOR_mskql,
6450 CODE_FOR_mskwh,
6451 CODE_FOR_msklh,
6452 CODE_FOR_mskqh,
6453 CODE_FOR_umuldi3_highpart,
6454 CODE_FOR_builtin_zap,
6455 CODE_FOR_builtin_zapnot,
6456 CODE_FOR_builtin_amask,
6457 CODE_FOR_builtin_implver,
6458 CODE_FOR_builtin_rpcc,
6459 CODE_FOR_builtin_establish_vms_condition_handler,
6460 CODE_FOR_builtin_revert_vms_condition_handler,
6461
6462 /* TARGET_MAX */
6463 CODE_FOR_builtin_minub8,
6464 CODE_FOR_builtin_minsb8,
6465 CODE_FOR_builtin_minuw4,
6466 CODE_FOR_builtin_minsw4,
6467 CODE_FOR_builtin_maxub8,
6468 CODE_FOR_builtin_maxsb8,
6469 CODE_FOR_builtin_maxuw4,
6470 CODE_FOR_builtin_maxsw4,
6471 CODE_FOR_builtin_perr,
6472 CODE_FOR_builtin_pklb,
6473 CODE_FOR_builtin_pkwb,
6474 CODE_FOR_builtin_unpkbl,
6475 CODE_FOR_builtin_unpkbw,
6476
6477 /* TARGET_CIX */
6478 CODE_FOR_ctzdi2,
6479 CODE_FOR_clzdi2,
6480 CODE_FOR_popcountdi2
6481 };
6482
6483 struct alpha_builtin_def
6484 {
6485 const char *name;
6486 enum alpha_builtin code;
6487 unsigned int target_mask;
6488 bool is_const;
6489 };
6490
6491 static struct alpha_builtin_def const zero_arg_builtins[] = {
6492 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6493 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6494 };
6495
6496 static struct alpha_builtin_def const one_arg_builtins[] = {
6497 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6498 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6499 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6500 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6501 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6502 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6503 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6504 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6505 };
6506
6507 static struct alpha_builtin_def const two_arg_builtins[] = {
6508 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6509 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6510 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6511 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6512 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6513 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6514 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6515 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6516 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6517 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6518 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6519 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6520 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6521 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6522 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6523 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6524 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6525 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6526 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6527 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6528 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6529 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6530 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6531 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6532 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6533 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6534 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6535 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6536 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6537 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6538 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6539 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6540 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6541 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6542 };
6543
6544 static GTY(()) tree alpha_dimode_u;
6545 static GTY(()) tree alpha_v8qi_u;
6546 static GTY(()) tree alpha_v8qi_s;
6547 static GTY(()) tree alpha_v4hi_u;
6548 static GTY(()) tree alpha_v4hi_s;
6549
6550 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6551
6552 /* Return the alpha builtin for CODE. */
6553
6554 static tree
6555 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6556 {
6557 if (code >= ALPHA_BUILTIN_max)
6558 return error_mark_node;
6559 return alpha_builtins[code];
6560 }
6561
6562 /* Helper function of alpha_init_builtins. Add the built-in specified
6563 by NAME, TYPE, CODE, and ECF. */
6564
6565 static void
6566 alpha_builtin_function (const char *name, tree ftype,
6567 enum alpha_builtin code, unsigned ecf)
6568 {
6569 tree decl = add_builtin_function (name, ftype, (int) code,
6570 BUILT_IN_MD, NULL, NULL_TREE);
6571
6572 if (ecf & ECF_CONST)
6573 TREE_READONLY (decl) = 1;
6574 if (ecf & ECF_NOTHROW)
6575 TREE_NOTHROW (decl) = 1;
6576
6577 alpha_builtins [(int) code] = decl;
6578 }
6579
6580 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6581 functions pointed to by P, with function type FTYPE. */
6582
6583 static void
6584 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6585 tree ftype)
6586 {
6587 size_t i;
6588
6589 for (i = 0; i < count; ++i, ++p)
6590 if ((target_flags & p->target_mask) == p->target_mask)
6591 alpha_builtin_function (p->name, ftype, p->code,
6592 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6593 }
6594
6595 static void
6596 alpha_init_builtins (void)
6597 {
6598 tree ftype;
6599
6600 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6601 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6602 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6603 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6604 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6605
6606 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6607 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6608
6609 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6610 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6611
6612 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6613 alpha_dimode_u, NULL_TREE);
6614 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6615
6616 if (TARGET_ABI_OPEN_VMS)
6617 {
6618 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6619 NULL_TREE);
6620 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6621 ftype,
6622 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6623 0);
6624
6625 ftype = build_function_type_list (ptr_type_node, void_type_node,
6626 NULL_TREE);
6627 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6628 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6629
6630 vms_patch_builtins ();
6631 }
6632 }
6633
6634 /* Expand an expression EXP that calls a built-in function,
6635 with result going to TARGET if that's convenient
6636 (and in mode MODE if that's convenient).
6637 SUBTARGET may be used as the target for computing one of EXP's operands.
6638 IGNORE is nonzero if the value is to be ignored. */
6639
6640 static rtx
6641 alpha_expand_builtin (tree exp, rtx target,
6642 rtx subtarget ATTRIBUTE_UNUSED,
6643 machine_mode mode ATTRIBUTE_UNUSED,
6644 int ignore ATTRIBUTE_UNUSED)
6645 {
6646 #define MAX_ARGS 2
6647
6648 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6649 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
6650 tree arg;
6651 call_expr_arg_iterator iter;
6652 enum insn_code icode;
6653 rtx op[MAX_ARGS], pat;
6654 int arity;
6655 bool nonvoid;
6656
6657 if (fcode >= ALPHA_BUILTIN_max)
6658 internal_error ("bad builtin fcode");
6659 icode = code_for_builtin[fcode];
6660 if (icode == 0)
6661 internal_error ("bad builtin fcode");
6662
6663 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6664
6665 arity = 0;
6666 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6667 {
6668 const struct insn_operand_data *insn_op;
6669
6670 if (arg == error_mark_node)
6671 return NULL_RTX;
6672 if (arity > MAX_ARGS)
6673 return NULL_RTX;
6674
6675 insn_op = &insn_data[icode].operand[arity + nonvoid];
6676
6677 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6678
6679 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6680 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6681 arity++;
6682 }
6683
6684 if (nonvoid)
6685 {
6686 machine_mode tmode = insn_data[icode].operand[0].mode;
6687 if (!target
6688 || GET_MODE (target) != tmode
6689 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6690 target = gen_reg_rtx (tmode);
6691 }
6692
6693 switch (arity)
6694 {
6695 case 0:
6696 pat = GEN_FCN (icode) (target);
6697 break;
6698 case 1:
6699 if (nonvoid)
6700 pat = GEN_FCN (icode) (target, op[0]);
6701 else
6702 pat = GEN_FCN (icode) (op[0]);
6703 break;
6704 case 2:
6705 pat = GEN_FCN (icode) (target, op[0], op[1]);
6706 break;
6707 default:
6708 gcc_unreachable ();
6709 }
6710 if (!pat)
6711 return NULL_RTX;
6712 emit_insn (pat);
6713
6714 if (nonvoid)
6715 return target;
6716 else
6717 return const0_rtx;
6718 }
6719
6720 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6721 with an 8-bit output vector. OPINT contains the integer operands; bit N
6722 of OP_CONST is set if OPINT[N] is valid. */
6723
6724 static tree
6725 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6726 {
6727 if (op_const == 3)
6728 {
6729 int i, val;
6730 for (i = 0, val = 0; i < 8; ++i)
6731 {
6732 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6733 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6734 if (c0 >= c1)
6735 val |= 1 << i;
6736 }
6737 return build_int_cst (alpha_dimode_u, val);
6738 }
6739 else if (op_const == 2 && opint[1] == 0)
6740 return build_int_cst (alpha_dimode_u, 0xff);
6741 return NULL;
6742 }
6743
6744 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6745 specialized form of an AND operation. Other byte manipulation instructions
6746 are defined in terms of this instruction, so this is also used as a
6747 subroutine for other builtins.
6748
6749 OP contains the tree operands; OPINT contains the extracted integer values.
6750 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6751 OPINT may be considered. */
6752
6753 static tree
6754 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6755 long op_const)
6756 {
6757 if (op_const & 2)
6758 {
6759 unsigned HOST_WIDE_INT mask = 0;
6760 int i;
6761
6762 for (i = 0; i < 8; ++i)
6763 if ((opint[1] >> i) & 1)
6764 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6765
6766 if (op_const & 1)
6767 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6768
6769 if (op)
6770 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6771 build_int_cst (alpha_dimode_u, mask));
6772 }
6773 else if ((op_const & 1) && opint[0] == 0)
6774 return build_int_cst (alpha_dimode_u, 0);
6775 return NULL;
6776 }
6777
6778 /* Fold the builtins for the EXT family of instructions. */
6779
6780 static tree
6781 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6782 long op_const, unsigned HOST_WIDE_INT bytemask,
6783 bool is_high)
6784 {
6785 long zap_const = 2;
6786 tree *zap_op = NULL;
6787
6788 if (op_const & 2)
6789 {
6790 unsigned HOST_WIDE_INT loc;
6791
6792 loc = opint[1] & 7;
6793 loc *= BITS_PER_UNIT;
6794
6795 if (loc != 0)
6796 {
6797 if (op_const & 1)
6798 {
6799 unsigned HOST_WIDE_INT temp = opint[0];
6800 if (is_high)
6801 temp <<= loc;
6802 else
6803 temp >>= loc;
6804 opint[0] = temp;
6805 zap_const = 3;
6806 }
6807 }
6808 else
6809 zap_op = op;
6810 }
6811
6812 opint[1] = bytemask;
6813 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6814 }
6815
6816 /* Fold the builtins for the INS family of instructions. */
6817
6818 static tree
6819 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6820 long op_const, unsigned HOST_WIDE_INT bytemask,
6821 bool is_high)
6822 {
6823 if ((op_const & 1) && opint[0] == 0)
6824 return build_int_cst (alpha_dimode_u, 0);
6825
6826 if (op_const & 2)
6827 {
6828 unsigned HOST_WIDE_INT temp, loc, byteloc;
6829 tree *zap_op = NULL;
6830
6831 loc = opint[1] & 7;
6832 bytemask <<= loc;
6833
6834 temp = opint[0];
6835 if (is_high)
6836 {
6837 byteloc = (64 - (loc * 8)) & 0x3f;
6838 if (byteloc == 0)
6839 zap_op = op;
6840 else
6841 temp >>= byteloc;
6842 bytemask >>= 8;
6843 }
6844 else
6845 {
6846 byteloc = loc * 8;
6847 if (byteloc == 0)
6848 zap_op = op;
6849 else
6850 temp <<= byteloc;
6851 }
6852
6853 opint[0] = temp;
6854 opint[1] = bytemask;
6855 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6856 }
6857
6858 return NULL;
6859 }
6860
6861 static tree
6862 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6863 long op_const, unsigned HOST_WIDE_INT bytemask,
6864 bool is_high)
6865 {
6866 if (op_const & 2)
6867 {
6868 unsigned HOST_WIDE_INT loc;
6869
6870 loc = opint[1] & 7;
6871 bytemask <<= loc;
6872
6873 if (is_high)
6874 bytemask >>= 8;
6875
6876 opint[1] = bytemask ^ 0xff;
6877 }
6878
6879 return alpha_fold_builtin_zapnot (op, opint, op_const);
6880 }
6881
6882 static tree
6883 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6884 {
6885 tree op0 = fold_convert (vtype, op[0]);
6886 tree op1 = fold_convert (vtype, op[1]);
6887 tree val = fold_build2 (code, vtype, op0, op1);
6888 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6889 }
6890
6891 static tree
6892 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6893 {
6894 unsigned HOST_WIDE_INT temp = 0;
6895 int i;
6896
6897 if (op_const != 3)
6898 return NULL;
6899
6900 for (i = 0; i < 8; ++i)
6901 {
6902 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6903 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6904 if (a >= b)
6905 temp += a - b;
6906 else
6907 temp += b - a;
6908 }
6909
6910 return build_int_cst (alpha_dimode_u, temp);
6911 }
6912
6913 static tree
6914 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6915 {
6916 unsigned HOST_WIDE_INT temp;
6917
6918 if (op_const == 0)
6919 return NULL;
6920
6921 temp = opint[0] & 0xff;
6922 temp |= (opint[0] >> 24) & 0xff00;
6923
6924 return build_int_cst (alpha_dimode_u, temp);
6925 }
6926
6927 static tree
6928 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6929 {
6930 unsigned HOST_WIDE_INT temp;
6931
6932 if (op_const == 0)
6933 return NULL;
6934
6935 temp = opint[0] & 0xff;
6936 temp |= (opint[0] >> 8) & 0xff00;
6937 temp |= (opint[0] >> 16) & 0xff0000;
6938 temp |= (opint[0] >> 24) & 0xff000000;
6939
6940 return build_int_cst (alpha_dimode_u, temp);
6941 }
6942
6943 static tree
6944 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6945 {
6946 unsigned HOST_WIDE_INT temp;
6947
6948 if (op_const == 0)
6949 return NULL;
6950
6951 temp = opint[0] & 0xff;
6952 temp |= (opint[0] & 0xff00) << 24;
6953
6954 return build_int_cst (alpha_dimode_u, temp);
6955 }
6956
6957 static tree
6958 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6959 {
6960 unsigned HOST_WIDE_INT temp;
6961
6962 if (op_const == 0)
6963 return NULL;
6964
6965 temp = opint[0] & 0xff;
6966 temp |= (opint[0] & 0x0000ff00) << 8;
6967 temp |= (opint[0] & 0x00ff0000) << 16;
6968 temp |= (opint[0] & 0xff000000) << 24;
6969
6970 return build_int_cst (alpha_dimode_u, temp);
6971 }
6972
6973 static tree
6974 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6975 {
6976 unsigned HOST_WIDE_INT temp;
6977
6978 if (op_const == 0)
6979 return NULL;
6980
6981 if (opint[0] == 0)
6982 temp = 64;
6983 else
6984 temp = exact_log2 (opint[0] & -opint[0]);
6985
6986 return build_int_cst (alpha_dimode_u, temp);
6987 }
6988
6989 static tree
6990 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6991 {
6992 unsigned HOST_WIDE_INT temp;
6993
6994 if (op_const == 0)
6995 return NULL;
6996
6997 if (opint[0] == 0)
6998 temp = 64;
6999 else
7000 temp = 64 - floor_log2 (opint[0]) - 1;
7001
7002 return build_int_cst (alpha_dimode_u, temp);
7003 }
7004
7005 static tree
7006 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7007 {
7008 unsigned HOST_WIDE_INT temp, op;
7009
7010 if (op_const == 0)
7011 return NULL;
7012
7013 op = opint[0];
7014 temp = 0;
7015 while (op)
7016 temp++, op &= op - 1;
7017
7018 return build_int_cst (alpha_dimode_u, temp);
7019 }
7020
7021 /* Fold one of our builtin functions. */
7022
7023 static tree
7024 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7025 bool ignore ATTRIBUTE_UNUSED)
7026 {
7027 unsigned HOST_WIDE_INT opint[MAX_ARGS];
7028 long op_const = 0;
7029 int i;
7030
7031 if (n_args > MAX_ARGS)
7032 return NULL;
7033
7034 for (i = 0; i < n_args; i++)
7035 {
7036 tree arg = op[i];
7037 if (arg == error_mark_node)
7038 return NULL;
7039
7040 opint[i] = 0;
7041 if (TREE_CODE (arg) == INTEGER_CST)
7042 {
7043 op_const |= 1L << i;
7044 opint[i] = int_cst_value (arg);
7045 }
7046 }
7047
7048 switch (DECL_MD_FUNCTION_CODE (fndecl))
7049 {
7050 case ALPHA_BUILTIN_CMPBGE:
7051 return alpha_fold_builtin_cmpbge (opint, op_const);
7052
7053 case ALPHA_BUILTIN_EXTBL:
7054 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7055 case ALPHA_BUILTIN_EXTWL:
7056 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7057 case ALPHA_BUILTIN_EXTLL:
7058 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7059 case ALPHA_BUILTIN_EXTQL:
7060 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7061 case ALPHA_BUILTIN_EXTWH:
7062 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7063 case ALPHA_BUILTIN_EXTLH:
7064 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7065 case ALPHA_BUILTIN_EXTQH:
7066 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7067
7068 case ALPHA_BUILTIN_INSBL:
7069 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7070 case ALPHA_BUILTIN_INSWL:
7071 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7072 case ALPHA_BUILTIN_INSLL:
7073 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7074 case ALPHA_BUILTIN_INSQL:
7075 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7076 case ALPHA_BUILTIN_INSWH:
7077 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7078 case ALPHA_BUILTIN_INSLH:
7079 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7080 case ALPHA_BUILTIN_INSQH:
7081 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7082
7083 case ALPHA_BUILTIN_MSKBL:
7084 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7085 case ALPHA_BUILTIN_MSKWL:
7086 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7087 case ALPHA_BUILTIN_MSKLL:
7088 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7089 case ALPHA_BUILTIN_MSKQL:
7090 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7091 case ALPHA_BUILTIN_MSKWH:
7092 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7093 case ALPHA_BUILTIN_MSKLH:
7094 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7095 case ALPHA_BUILTIN_MSKQH:
7096 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7097
7098 case ALPHA_BUILTIN_ZAP:
7099 opint[1] ^= 0xff;
7100 /* FALLTHRU */
7101 case ALPHA_BUILTIN_ZAPNOT:
7102 return alpha_fold_builtin_zapnot (op, opint, op_const);
7103
7104 case ALPHA_BUILTIN_MINUB8:
7105 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7106 case ALPHA_BUILTIN_MINSB8:
7107 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7108 case ALPHA_BUILTIN_MINUW4:
7109 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7110 case ALPHA_BUILTIN_MINSW4:
7111 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7112 case ALPHA_BUILTIN_MAXUB8:
7113 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7114 case ALPHA_BUILTIN_MAXSB8:
7115 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7116 case ALPHA_BUILTIN_MAXUW4:
7117 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7118 case ALPHA_BUILTIN_MAXSW4:
7119 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7120
7121 case ALPHA_BUILTIN_PERR:
7122 return alpha_fold_builtin_perr (opint, op_const);
7123 case ALPHA_BUILTIN_PKLB:
7124 return alpha_fold_builtin_pklb (opint, op_const);
7125 case ALPHA_BUILTIN_PKWB:
7126 return alpha_fold_builtin_pkwb (opint, op_const);
7127 case ALPHA_BUILTIN_UNPKBL:
7128 return alpha_fold_builtin_unpkbl (opint, op_const);
7129 case ALPHA_BUILTIN_UNPKBW:
7130 return alpha_fold_builtin_unpkbw (opint, op_const);
7131
7132 case ALPHA_BUILTIN_CTTZ:
7133 return alpha_fold_builtin_cttz (opint, op_const);
7134 case ALPHA_BUILTIN_CTLZ:
7135 return alpha_fold_builtin_ctlz (opint, op_const);
7136 case ALPHA_BUILTIN_CTPOP:
7137 return alpha_fold_builtin_ctpop (opint, op_const);
7138
7139 case ALPHA_BUILTIN_AMASK:
7140 case ALPHA_BUILTIN_IMPLVER:
7141 case ALPHA_BUILTIN_RPCC:
7142 /* None of these are foldable at compile-time. */
7143 default:
7144 return NULL;
7145 }
7146 }
7147
7148 bool
7149 alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7150 {
7151 bool changed = false;
7152 gimple *stmt = gsi_stmt (*gsi);
7153 tree call = gimple_call_fn (stmt);
7154 gimple *new_stmt = NULL;
7155
7156 if (call)
7157 {
7158 tree fndecl = gimple_call_fndecl (stmt);
7159
7160 if (fndecl)
7161 {
7162 tree arg0, arg1;
7163
7164 switch (DECL_MD_FUNCTION_CODE (fndecl))
7165 {
7166 case ALPHA_BUILTIN_UMULH:
7167 arg0 = gimple_call_arg (stmt, 0);
7168 arg1 = gimple_call_arg (stmt, 1);
7169
7170 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7171 MULT_HIGHPART_EXPR, arg0, arg1);
7172 break;
7173 default:
7174 break;
7175 }
7176 }
7177 }
7178
7179 if (new_stmt)
7180 {
7181 gsi_replace (gsi, new_stmt, true);
7182 changed = true;
7183 }
7184
7185 return changed;
7186 }
7187 \f
7188 /* This page contains routines that are used to determine what the function
7189 prologue and epilogue code will do and write them out. */
7190
7191 /* Compute the size of the save area in the stack. */
7192
7193 /* These variables are used for communication between the following functions.
7194 They indicate various things about the current function being compiled
7195 that are used to tell what kind of prologue, epilogue and procedure
7196 descriptor to generate. */
7197
7198 /* Nonzero if we need a stack procedure. */
7199 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7200 static enum alpha_procedure_types alpha_procedure_type;
7201
7202 /* Register number (either FP or SP) that is used to unwind the frame. */
7203 static int vms_unwind_regno;
7204
7205 /* Register number used to save FP. We need not have one for RA since
7206 we don't modify it for register procedures. This is only defined
7207 for register frame procedures. */
7208 static int vms_save_fp_regno;
7209
7210 /* Register number used to reference objects off our PV. */
7211 static int vms_base_regno;
7212
7213 /* Compute register masks for saved registers, register save area size,
7214 and total frame size. */
7215 static void
7216 alpha_compute_frame_layout (void)
7217 {
7218 unsigned HOST_WIDE_INT sa_mask = 0;
7219 HOST_WIDE_INT frame_size;
7220 int sa_size;
7221
7222 /* When outputting a thunk, we don't have valid register life info,
7223 but assemble_start_function wants to output .frame and .mask
7224 directives. */
7225 if (!cfun->is_thunk)
7226 {
7227 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7228 sa_mask |= HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM;
7229
7230 /* One for every register we have to save. */
7231 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7232 if (! call_used_or_fixed_reg_p (i)
7233 && df_regs_ever_live_p (i) && i != REG_RA)
7234 sa_mask |= HOST_WIDE_INT_1U << i;
7235
7236 /* We need to restore these for the handler. */
7237 if (crtl->calls_eh_return)
7238 {
7239 for (unsigned i = 0; ; ++i)
7240 {
7241 unsigned regno = EH_RETURN_DATA_REGNO (i);
7242 if (regno == INVALID_REGNUM)
7243 break;
7244 sa_mask |= HOST_WIDE_INT_1U << regno;
7245 }
7246 }
7247
7248 /* If any register spilled, then spill the return address also. */
7249 /* ??? This is required by the Digital stack unwind specification
7250 and isn't needed if we're doing Dwarf2 unwinding. */
7251 if (sa_mask || alpha_ra_ever_killed ())
7252 sa_mask |= HOST_WIDE_INT_1U << REG_RA;
7253 }
7254
7255 sa_size = popcount_hwi(sa_mask);
7256 frame_size = get_frame_size ();
7257
7258 if (TARGET_ABI_OPEN_VMS)
7259 {
7260 /* Start with a stack procedure if we make any calls (REG_RA used), or
7261 need a frame pointer, with a register procedure if we otherwise need
7262 at least a slot, and with a null procedure in other cases. */
7263 if ((sa_mask >> REG_RA) & 1 || frame_pointer_needed)
7264 alpha_procedure_type = PT_STACK;
7265 else if (frame_size != 0)
7266 alpha_procedure_type = PT_REGISTER;
7267 else
7268 alpha_procedure_type = PT_NULL;
7269
7270 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7271 made the final decision on stack procedure vs register procedure. */
7272 if (alpha_procedure_type == PT_STACK)
7273 sa_size -= 2;
7274
7275 /* Decide whether to refer to objects off our PV via FP or PV.
7276 If we need FP for something else or if we receive a nonlocal
7277 goto (which expects PV to contain the value), we must use PV.
7278 Otherwise, start by assuming we can use FP. */
7279
7280 vms_base_regno
7281 = (frame_pointer_needed
7282 || cfun->has_nonlocal_label
7283 || alpha_procedure_type == PT_STACK
7284 || crtl->outgoing_args_size)
7285 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7286
7287 /* If we want to copy PV into FP, we need to find some register
7288 in which to save FP. */
7289 vms_save_fp_regno = -1;
7290 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7291 for (unsigned i = 0; i < 32; i++)
7292 if (! fixed_regs[i] && call_used_or_fixed_reg_p (i)
7293 && ! df_regs_ever_live_p (i))
7294 {
7295 vms_save_fp_regno = i;
7296 break;
7297 }
7298
7299 /* A VMS condition handler requires a stack procedure in our
7300 implementation. (not required by the calling standard). */
7301 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7302 || cfun->machine->uses_condition_handler)
7303 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7304 else if (alpha_procedure_type == PT_NULL)
7305 vms_base_regno = REG_PV;
7306
7307 /* Stack unwinding should be done via FP unless we use it for PV. */
7308 vms_unwind_regno = (vms_base_regno == REG_PV
7309 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7310
7311 /* If this is a stack procedure, allow space for saving FP, RA and
7312 a condition handler slot if needed. */
7313 if (alpha_procedure_type == PT_STACK)
7314 sa_size += 2 + cfun->machine->uses_condition_handler;
7315 }
7316 else
7317 {
7318 /* Our size must be even (multiple of 16 bytes). */
7319 if (sa_size & 1)
7320 sa_size++;
7321 }
7322 sa_size *= 8;
7323
7324 if (TARGET_ABI_OPEN_VMS)
7325 frame_size = ALPHA_ROUND (sa_size
7326 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7327 + frame_size
7328 + crtl->args.pretend_args_size);
7329 else
7330 frame_size = (ALPHA_ROUND (crtl->outgoing_args_size)
7331 + sa_size
7332 + ALPHA_ROUND (frame_size + crtl->args.pretend_args_size));
7333
7334 cfun->machine->sa_mask = sa_mask;
7335 cfun->machine->sa_size = sa_size;
7336 cfun->machine->frame_size = frame_size;
7337 }
7338
7339 #undef TARGET_COMPUTE_FRAME_LAYOUT
7340 #define TARGET_COMPUTE_FRAME_LAYOUT alpha_compute_frame_layout
7341
7342 /* Return 1 if this function can directly return via $26. */
7343
7344 bool
7345 direct_return (void)
7346 {
7347 return (TARGET_ABI_OSF
7348 && reload_completed
7349 && cfun->machine->frame_size == 0);
7350 }
7351
7352 /* Define the offset between two registers, one to be eliminated,
7353 and the other its replacement, at the start of a routine. */
7354
7355 HOST_WIDE_INT
7356 alpha_initial_elimination_offset (unsigned int from,
7357 unsigned int to ATTRIBUTE_UNUSED)
7358 {
7359 HOST_WIDE_INT ret;
7360
7361 ret = cfun->machine->sa_size;
7362 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7363
7364 switch (from)
7365 {
7366 case FRAME_POINTER_REGNUM:
7367 break;
7368
7369 case ARG_POINTER_REGNUM:
7370 ret += (ALPHA_ROUND (get_frame_size ()
7371 + crtl->args.pretend_args_size)
7372 - crtl->args.pretend_args_size);
7373 break;
7374
7375 default:
7376 gcc_unreachable ();
7377 }
7378
7379 return ret;
7380 }
7381
7382 #if TARGET_ABI_OPEN_VMS
7383
7384 /* Worker function for TARGET_CAN_ELIMINATE. */
7385
7386 static bool
7387 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7388 {
7389 switch (alpha_procedure_type)
7390 {
7391 case PT_NULL:
7392 /* NULL procedures have no frame of their own and we only
7393 know how to resolve from the current stack pointer. */
7394 return to == STACK_POINTER_REGNUM;
7395
7396 case PT_REGISTER:
7397 case PT_STACK:
7398 /* We always eliminate except to the stack pointer if there is no
7399 usable frame pointer at hand. */
7400 return (to != STACK_POINTER_REGNUM
7401 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7402 }
7403
7404 gcc_unreachable ();
7405 }
7406
7407 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7408 designates the same location as FROM. */
7409
7410 HOST_WIDE_INT
7411 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7412 {
7413 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7414 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7415 on the proper computations and will need the register save area size
7416 in most cases. */
7417
7418 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7419
7420 /* PT_NULL procedures have no frame of their own and we only allow
7421 elimination to the stack pointer. This is the argument pointer and we
7422 resolve the soft frame pointer to that as well. */
7423
7424 if (alpha_procedure_type == PT_NULL)
7425 return 0;
7426
7427 /* For a PT_STACK procedure the frame layout looks as follows
7428
7429 -----> decreasing addresses
7430
7431 < size rounded up to 16 | likewise >
7432 --------------#------------------------------+++--------------+++-------#
7433 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7434 --------------#---------------------------------------------------------#
7435 ^ ^ ^ ^
7436 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7437
7438
7439 PT_REGISTER procedures are similar in that they may have a frame of their
7440 own. They have no regs-sa/pv/outgoing-args area.
7441
7442 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7443 to STACK_PTR if need be. */
7444
7445 {
7446 HOST_WIDE_INT offset;
7447 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7448
7449 switch (from)
7450 {
7451 case FRAME_POINTER_REGNUM:
7452 offset = ALPHA_ROUND (sa_size + pv_save_size);
7453 break;
7454 case ARG_POINTER_REGNUM:
7455 offset = (ALPHA_ROUND (sa_size + pv_save_size
7456 + get_frame_size ()
7457 + crtl->args.pretend_args_size)
7458 - crtl->args.pretend_args_size);
7459 break;
7460 default:
7461 gcc_unreachable ();
7462 }
7463
7464 if (to == STACK_POINTER_REGNUM)
7465 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7466
7467 return offset;
7468 }
7469 }
7470
7471 #define COMMON_OBJECT "common_object"
7472
7473 static tree
7474 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7475 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7476 bool *no_add_attrs ATTRIBUTE_UNUSED)
7477 {
7478 tree decl = *node;
7479 gcc_assert (DECL_P (decl));
7480
7481 DECL_COMMON (decl) = 1;
7482 return NULL_TREE;
7483 }
7484
7485 TARGET_GNU_ATTRIBUTES (vms_attribute_table,
7486 {
7487 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7488 affects_type_identity, handler, exclude } */
7489 { COMMON_OBJECT, 0, 1, true, false, false, false, common_object_handler,
7490 NULL }
7491 });
7492
7493 void
7494 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7495 unsigned HOST_WIDE_INT size,
7496 unsigned int align)
7497 {
7498 tree attr = DECL_ATTRIBUTES (decl);
7499 fprintf (file, "%s", COMMON_ASM_OP);
7500 assemble_name (file, name);
7501 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7502 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7503 fprintf (file, ",%u", align / BITS_PER_UNIT);
7504 if (attr)
7505 {
7506 attr = lookup_attribute (COMMON_OBJECT, attr);
7507 if (attr)
7508 fprintf (file, ",%s",
7509 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7510 }
7511 fputc ('\n', file);
7512 }
7513
7514 #undef COMMON_OBJECT
7515
7516 #endif
7517
7518 bool
7519 alpha_find_lo_sum_using_gp (rtx insn)
7520 {
7521 subrtx_iterator::array_type array;
7522 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7523 {
7524 const_rtx x = *iter;
7525 if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7526 return true;
7527 }
7528 return false;
7529 }
7530
7531 static int
7532 alpha_does_function_need_gp (void)
7533 {
7534 rtx_insn *insn;
7535
7536 /* The GP being variable is an OSF abi thing. */
7537 if (! TARGET_ABI_OSF)
7538 return 0;
7539
7540 /* We need the gp to load the address of __mcount. */
7541 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7542 return 1;
7543
7544 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7545 if (cfun->is_thunk)
7546 return 1;
7547
7548 /* The nonlocal receiver pattern assumes that the gp is valid for
7549 the nested function. Reasonable because it's almost always set
7550 correctly already. For the cases where that's wrong, make sure
7551 the nested function loads its gp on entry. */
7552 if (crtl->has_nonlocal_goto)
7553 return 1;
7554
7555 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7556 Even if we are a static function, we still need to do this in case
7557 our address is taken and passed to something like qsort. */
7558
7559 push_topmost_sequence ();
7560 insn = get_insns ();
7561 pop_topmost_sequence ();
7562
7563 for (; insn; insn = NEXT_INSN (insn))
7564 if (NONDEBUG_INSN_P (insn)
7565 && GET_CODE (PATTERN (insn)) != USE
7566 && GET_CODE (PATTERN (insn)) != CLOBBER
7567 && get_attr_usegp (insn))
7568 return 1;
7569
7570 return 0;
7571 }
7572
7573 /* Helper function for alpha_store_data_bypass_p, handle just a single SET
7574 IN_SET. */
7575
7576 static bool
7577 alpha_store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set)
7578 {
7579 if (!MEM_P (SET_DEST (in_set)))
7580 return false;
7581
7582 rtx out_set = single_set (out_insn);
7583 if (out_set)
7584 return !reg_mentioned_p (SET_DEST (out_set), SET_DEST (in_set));
7585
7586 rtx out_pat = PATTERN (out_insn);
7587 if (GET_CODE (out_pat) != PARALLEL)
7588 return false;
7589
7590 for (int i = 0; i < XVECLEN (out_pat, 0); i++)
7591 {
7592 rtx out_exp = XVECEXP (out_pat, 0, i);
7593
7594 if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE
7595 || GET_CODE (out_exp) == TRAP_IF)
7596 continue;
7597
7598 gcc_assert (GET_CODE (out_exp) == SET);
7599
7600 if (reg_mentioned_p (SET_DEST (out_exp), SET_DEST (in_set)))
7601 return false;
7602 }
7603
7604 return true;
7605 }
7606
7607 /* True if the dependency between OUT_INSN and IN_INSN is on the store
7608 data not the address operand(s) of the store. IN_INSN and OUT_INSN
7609 must be either a single_set or a PARALLEL with SETs inside.
7610
7611 This alpha-specific version of store_data_bypass_p ignores TRAP_IF
7612 that would result in assertion failure (and internal compiler error)
7613 in the generic store_data_bypass_p function. */
7614
7615 int
7616 alpha_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
7617 {
7618 rtx in_set = single_set (in_insn);
7619 if (in_set)
7620 return alpha_store_data_bypass_p_1 (out_insn, in_set);
7621
7622 rtx in_pat = PATTERN (in_insn);
7623 if (GET_CODE (in_pat) != PARALLEL)
7624 return false;
7625
7626 for (int i = 0; i < XVECLEN (in_pat, 0); i++)
7627 {
7628 rtx in_exp = XVECEXP (in_pat, 0, i);
7629
7630 if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE
7631 || GET_CODE (in_exp) == TRAP_IF)
7632 continue;
7633
7634 gcc_assert (GET_CODE (in_exp) == SET);
7635
7636 if (!alpha_store_data_bypass_p_1 (out_insn, in_exp))
7637 return false;
7638 }
7639
7640 return true;
7641 }
7642 \f
7643 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7644 sequences. */
7645
7646 static rtx_insn *
7647 set_frame_related_p (void)
7648 {
7649 rtx_insn *seq = get_insns ();
7650 rtx_insn *insn;
7651
7652 end_sequence ();
7653
7654 if (!seq)
7655 return NULL;
7656
7657 if (INSN_P (seq))
7658 {
7659 insn = seq;
7660 while (insn != NULL_RTX)
7661 {
7662 RTX_FRAME_RELATED_P (insn) = 1;
7663 insn = NEXT_INSN (insn);
7664 }
7665 seq = emit_insn (seq);
7666 }
7667 else
7668 {
7669 seq = emit_insn (seq);
7670 RTX_FRAME_RELATED_P (seq) = 1;
7671 }
7672 return seq;
7673 }
7674
7675 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7676
7677 /* Generates a store with the proper unwind info attached. VALUE is
7678 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7679 contains SP+FRAME_BIAS, and that is the unwind info that should be
7680 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7681 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7682
7683 static void
7684 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7685 HOST_WIDE_INT base_ofs, rtx frame_reg)
7686 {
7687 rtx addr, mem;
7688 rtx_insn *insn;
7689
7690 addr = plus_constant (Pmode, base_reg, base_ofs);
7691 mem = gen_frame_mem (DImode, addr);
7692
7693 insn = emit_move_insn (mem, value);
7694 RTX_FRAME_RELATED_P (insn) = 1;
7695
7696 if (frame_bias || value != frame_reg)
7697 {
7698 if (frame_bias)
7699 {
7700 addr = plus_constant (Pmode, stack_pointer_rtx,
7701 frame_bias + base_ofs);
7702 mem = gen_rtx_MEM (DImode, addr);
7703 }
7704
7705 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7706 gen_rtx_SET (mem, frame_reg));
7707 }
7708 }
7709
7710 static void
7711 emit_frame_store (unsigned int regno, rtx base_reg,
7712 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7713 {
7714 rtx reg = gen_rtx_REG (DImode, regno);
7715 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7716 }
7717
7718 /* Write function prologue. */
7719
7720 /* On vms we have two kinds of functions:
7721
7722 - stack frame (PROC_STACK)
7723 these are 'normal' functions with local vars and which are
7724 calling other functions
7725 - register frame (PROC_REGISTER)
7726 keeps all data in registers, needs no stack
7727
7728 We must pass this to the assembler so it can generate the
7729 proper pdsc (procedure descriptor)
7730 This is done with the '.pdesc' command.
7731
7732 On not-vms, we don't really differentiate between the two, as we can
7733 simply allocate stack without saving registers. */
7734
7735 void
7736 alpha_expand_prologue (void)
7737 {
7738 /* Registers to save. */
7739 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
7740 /* Stack space needed for pushing registers clobbered by us. */
7741 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7742 /* Complete stack size needed. */
7743 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
7744 /* Probed stack size; it additionally includes the size of
7745 the "reserve region" if any. */
7746 HOST_WIDE_INT probed_size, sa_bias;
7747 /* Offset from base reg to register save area. */
7748 HOST_WIDE_INT reg_offset;
7749 rtx sa_reg;
7750
7751 if (flag_stack_usage_info)
7752 current_function_static_stack_size = frame_size;
7753
7754 if (TARGET_ABI_OPEN_VMS)
7755 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7756 else
7757 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7758
7759 /* Emit an insn to reload GP, if needed. */
7760 if (TARGET_ABI_OSF)
7761 {
7762 alpha_function_needs_gp = alpha_does_function_need_gp ();
7763 if (alpha_function_needs_gp)
7764 emit_insn (gen_prologue_ldgp ());
7765 }
7766
7767 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7768 the call to mcount ourselves, rather than having the linker do it
7769 magically in response to -pg. Since _mcount has special linkage,
7770 don't represent the call as a call. */
7771 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7772 emit_insn (gen_prologue_mcount ());
7773
7774 /* Adjust the stack by the frame size. If the frame size is > 4096
7775 bytes, we need to be sure we probe somewhere in the first and last
7776 4096 bytes (we can probably get away without the latter test) and
7777 every 8192 bytes in between. If the frame size is > 32768, we
7778 do this in a loop. Otherwise, we generate the explicit probe
7779 instructions.
7780
7781 Note that we are only allowed to adjust sp once in the prologue. */
7782
7783 probed_size = frame_size;
7784 if (flag_stack_check || flag_stack_clash_protection)
7785 probed_size += get_stack_check_protect ();
7786
7787 if (probed_size <= 32768)
7788 {
7789 if (probed_size > 4096)
7790 {
7791 int probed;
7792
7793 for (probed = 4096; probed < probed_size; probed += 8192)
7794 emit_insn (gen_stack_probe_internal (GEN_INT (-probed)));
7795
7796 /* We only have to do this probe if we aren't saving registers or
7797 if we are probing beyond the frame because of -fstack-check. */
7798 if ((sa_size == 0 && probed_size > probed - 4096)
7799 || flag_stack_check || flag_stack_clash_protection)
7800 emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size)));
7801 }
7802
7803 if (frame_size != 0)
7804 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7805 GEN_INT (-frame_size))));
7806 }
7807 else
7808 {
7809 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7810 number of 8192 byte blocks to probe. We then probe each block
7811 in the loop and then set SP to the proper location. If the
7812 amount remaining is > 4096, we have to do one more probe if we
7813 are not saving any registers or if we are probing beyond the
7814 frame because of -fstack-check. */
7815
7816 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7817 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7818 rtx ptr = gen_rtx_REG (DImode, 22);
7819 rtx count = gen_rtx_REG (DImode, 23);
7820 rtx seq;
7821
7822 emit_move_insn (count, GEN_INT (blocks));
7823 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7824
7825 /* Because of the difficulty in emitting a new basic block this
7826 late in the compilation, generate the loop as a single insn. */
7827 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7828
7829 if ((leftover > 4096 && sa_size == 0)
7830 || flag_stack_check || flag_stack_clash_protection)
7831 {
7832 rtx last = gen_rtx_MEM (DImode,
7833 plus_constant (Pmode, ptr, -leftover));
7834 MEM_VOLATILE_P (last) = 1;
7835 emit_move_insn (last, const0_rtx);
7836 }
7837
7838 if (flag_stack_check || flag_stack_clash_protection)
7839 {
7840 /* If -fstack-check is specified we have to load the entire
7841 constant into a register and subtract from the sp in one go,
7842 because the probed stack size is not equal to the frame size. */
7843 HOST_WIDE_INT lo, hi;
7844 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7845 hi = frame_size - lo;
7846
7847 emit_move_insn (ptr, GEN_INT (hi));
7848 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7849 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7850 ptr));
7851 }
7852 else
7853 {
7854 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7855 GEN_INT (-leftover)));
7856 }
7857
7858 /* This alternative is special, because the DWARF code cannot
7859 possibly intuit through the loop above. So we invent this
7860 note it looks at instead. */
7861 RTX_FRAME_RELATED_P (seq) = 1;
7862 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7863 gen_rtx_SET (stack_pointer_rtx,
7864 plus_constant (Pmode, stack_pointer_rtx,
7865 -frame_size)));
7866 }
7867
7868 /* Cope with very large offsets to the register save area. */
7869 sa_bias = 0;
7870 sa_reg = stack_pointer_rtx;
7871 if (reg_offset + sa_size > 0x8000)
7872 {
7873 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7874 rtx sa_bias_rtx;
7875
7876 if (low + sa_size <= 0x8000)
7877 sa_bias = reg_offset - low, reg_offset = low;
7878 else
7879 sa_bias = reg_offset, reg_offset = 0;
7880
7881 sa_reg = gen_rtx_REG (DImode, 24);
7882 sa_bias_rtx = GEN_INT (sa_bias);
7883
7884 if (add_operand (sa_bias_rtx, DImode))
7885 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7886 else
7887 {
7888 emit_move_insn (sa_reg, sa_bias_rtx);
7889 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7890 }
7891 }
7892
7893 /* Save regs in stack order. Beginning with VMS PV. */
7894 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7895 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7896
7897 /* Save register RA next, followed by any other registers
7898 that need to be saved. */
7899 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
7900 {
7901 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7902 reg_offset += 8;
7903 sa_mask &= ~(HOST_WIDE_INT_1U << i);
7904 }
7905
7906 if (TARGET_ABI_OPEN_VMS)
7907 {
7908 /* Register frame procedures save the fp. */
7909 if (alpha_procedure_type == PT_REGISTER)
7910 {
7911 rtx_insn *insn =
7912 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7913 hard_frame_pointer_rtx);
7914 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7915 RTX_FRAME_RELATED_P (insn) = 1;
7916 }
7917
7918 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7919 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7920 gen_rtx_REG (DImode, REG_PV)));
7921
7922 if (alpha_procedure_type != PT_NULL
7923 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7924 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7925
7926 /* If we have to allocate space for outgoing args, do it now. */
7927 if (crtl->outgoing_args_size != 0)
7928 {
7929 rtx_insn *seq
7930 = emit_move_insn (stack_pointer_rtx,
7931 plus_constant
7932 (Pmode, hard_frame_pointer_rtx,
7933 - (ALPHA_ROUND
7934 (crtl->outgoing_args_size))));
7935
7936 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7937 if ! frame_pointer_needed. Setting the bit will change the CFA
7938 computation rule to use sp again, which would be wrong if we had
7939 frame_pointer_needed, as this means sp might move unpredictably
7940 later on.
7941
7942 Also, note that
7943 frame_pointer_needed
7944 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7945 and
7946 crtl->outgoing_args_size != 0
7947 => alpha_procedure_type != PT_NULL,
7948
7949 so when we are not setting the bit here, we are guaranteed to
7950 have emitted an FRP frame pointer update just before. */
7951 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7952 }
7953 }
7954 else
7955 {
7956 /* If we need a frame pointer, set it from the stack pointer. */
7957 if (frame_pointer_needed)
7958 {
7959 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7960 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7961 else
7962 /* This must always be the last instruction in the
7963 prologue, thus we emit a special move + clobber. */
7964 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7965 stack_pointer_rtx, sa_reg)));
7966 }
7967 }
7968
7969 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7970 the prologue, for exception handling reasons, we cannot do this for
7971 any insn that might fault. We could prevent this for mems with a
7972 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7973 have to prevent all such scheduling with a blockage.
7974
7975 Linux, on the other hand, never bothered to implement OSF/1's
7976 exception handling, and so doesn't care about such things. Anyone
7977 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7978
7979 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7980 emit_insn (gen_blockage ());
7981 }
7982
7983 /* Count the number of .file directives, so that .loc is up to date. */
7984 int num_source_filenames = 0;
7985
7986 /* Output the textual info surrounding the prologue. */
7987
7988 void
7989 alpha_start_function (FILE *file, const char *fnname,
7990 tree decl ATTRIBUTE_UNUSED)
7991 {
7992 unsigned long imask, fmask;
7993 /* Complete stack size needed. */
7994 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
7995 /* The maximum debuggable frame size. */
7996 const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31;
7997 /* Offset from base reg to register save area. */
7998 HOST_WIDE_INT reg_offset;
7999 char *entry_label = (char *) alloca (strlen (fnname) + 6);
8000 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
8001 int i;
8002
8003 #if TARGET_ABI_OPEN_VMS
8004 vms_start_function (fnname);
8005 #endif
8006
8007 alpha_fnname = fnname;
8008
8009 if (TARGET_ABI_OPEN_VMS)
8010 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8011 else
8012 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8013
8014 imask = cfun->machine->sa_mask & 0xffffffffu;
8015 fmask = cfun->machine->sa_mask >> 32;
8016
8017 /* Issue function start and label. */
8018 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
8019 {
8020 fputs ("\t.ent ", file);
8021 assemble_name (file, fnname);
8022 putc ('\n', file);
8023
8024 /* If the function needs GP, we'll write the "..ng" label there.
8025 Otherwise, do it here. */
8026 if (TARGET_ABI_OSF
8027 && ! alpha_function_needs_gp
8028 && ! cfun->is_thunk)
8029 {
8030 putc ('$', file);
8031 assemble_name (file, fnname);
8032 fputs ("..ng:\n", file);
8033 }
8034 }
8035 /* Nested functions on VMS that are potentially called via trampoline
8036 get a special transfer entry point that loads the called functions
8037 procedure descriptor and static chain. */
8038 if (TARGET_ABI_OPEN_VMS
8039 && !TREE_PUBLIC (decl)
8040 && DECL_CONTEXT (decl)
8041 && !TYPE_P (DECL_CONTEXT (decl))
8042 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8043 {
8044 strcpy (tramp_label, fnname);
8045 strcat (tramp_label, "..tr");
8046 ASM_OUTPUT_LABEL (file, tramp_label);
8047 fprintf (file, "\tldq $1,24($27)\n");
8048 fprintf (file, "\tldq $27,16($27)\n");
8049 }
8050
8051 strcpy (entry_label, fnname);
8052 if (TARGET_ABI_OPEN_VMS)
8053 strcat (entry_label, "..en");
8054
8055 ASM_OUTPUT_LABEL (file, entry_label);
8056 inside_function = TRUE;
8057
8058 if (TARGET_ABI_OPEN_VMS)
8059 fprintf (file, "\t.base $%d\n", vms_base_regno);
8060
8061 if (TARGET_ABI_OSF
8062 && TARGET_IEEE_CONFORMANT
8063 && !flag_inhibit_size_directive)
8064 {
8065 /* Set flags in procedure descriptor to request IEEE-conformant
8066 math-library routines. The value we set it to is PDSC_EXC_IEEE
8067 (/usr/include/pdsc.h). */
8068 fputs ("\t.eflag 48\n", file);
8069 }
8070
8071 /* Set up offsets to alpha virtual arg/local debugging pointer. */
8072 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8073 alpha_arg_offset = -frame_size + 48;
8074
8075 /* Describe our frame. If the frame size is larger than an integer,
8076 print it as zero to avoid an assembler error. We won't be
8077 properly describing such a frame, but that's the best we can do. */
8078 if (TARGET_ABI_OPEN_VMS)
8079 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8080 HOST_WIDE_INT_PRINT_DEC "\n",
8081 vms_unwind_regno,
8082 frame_size >= max_frame_size ? 0 : frame_size,
8083 reg_offset);
8084 else if (!flag_inhibit_size_directive)
8085 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8086 (frame_pointer_needed
8087 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8088 frame_size >= max_frame_size ? 0 : frame_size,
8089 crtl->args.pretend_args_size);
8090
8091 /* Describe which registers were spilled. */
8092 if (TARGET_ABI_OPEN_VMS)
8093 {
8094 if (imask)
8095 /* ??? Does VMS care if mask contains ra? The old code didn't
8096 set it, so I don't here. */
8097 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8098 if (fmask)
8099 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8100 if (alpha_procedure_type == PT_REGISTER)
8101 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8102 }
8103 else if (!flag_inhibit_size_directive)
8104 {
8105 if (imask)
8106 {
8107 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8108 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8109
8110 for (i = 0; i < 32; ++i)
8111 if (imask & (1UL << i))
8112 reg_offset += 8;
8113 }
8114
8115 if (fmask)
8116 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8117 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8118 }
8119
8120 #if TARGET_ABI_OPEN_VMS
8121 /* If a user condition handler has been installed at some point, emit
8122 the procedure descriptor bits to point the Condition Handling Facility
8123 at the indirection wrapper, and state the fp offset at which the user
8124 handler may be found. */
8125 if (cfun->machine->uses_condition_handler)
8126 {
8127 fprintf (file, "\t.handler __gcc_shell_handler\n");
8128 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8129 }
8130
8131 #ifdef TARGET_VMS_CRASH_DEBUG
8132 /* Support of minimal traceback info. */
8133 switch_to_section (readonly_data_section);
8134 fprintf (file, "\t.align 3\n");
8135 assemble_name (file, fnname); fputs ("..na:\n", file);
8136 fputs ("\t.ascii \"", file);
8137 assemble_name (file, fnname);
8138 fputs ("\\0\"\n", file);
8139 switch_to_section (text_section);
8140 #endif
8141 #endif /* TARGET_ABI_OPEN_VMS */
8142 }
8143
8144 /* Emit the .prologue note at the scheduled end of the prologue. */
8145
8146 static void
8147 alpha_output_function_end_prologue (FILE *file)
8148 {
8149 if (TARGET_ABI_OPEN_VMS)
8150 fputs ("\t.prologue\n", file);
8151 else if (!flag_inhibit_size_directive)
8152 fprintf (file, "\t.prologue %d\n",
8153 alpha_function_needs_gp || cfun->is_thunk);
8154 }
8155
8156 /* Write function epilogue. */
8157
8158 void
8159 alpha_expand_epilogue (void)
8160 {
8161 /* Registers to save. */
8162 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
8163 /* Stack space needed for pushing registers clobbered by us. */
8164 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
8165 /* Complete stack size needed. */
8166 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
8167 /* Offset from base reg to register save area. */
8168 HOST_WIDE_INT reg_offset;
8169 int fp_is_frame_pointer, fp_offset;
8170 rtx sa_reg, sa_reg_exp = NULL;
8171 rtx sp_adj1, sp_adj2, mem, reg, insn;
8172 rtx eh_ofs;
8173 rtx cfa_restores = NULL_RTX;
8174
8175 if (TARGET_ABI_OPEN_VMS)
8176 {
8177 if (alpha_procedure_type == PT_STACK)
8178 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8179 else
8180 reg_offset = 0;
8181 }
8182 else
8183 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8184
8185 fp_is_frame_pointer
8186 = (TARGET_ABI_OPEN_VMS
8187 ? alpha_procedure_type == PT_STACK
8188 : frame_pointer_needed);
8189 fp_offset = 0;
8190 sa_reg = stack_pointer_rtx;
8191
8192 if (crtl->calls_eh_return)
8193 eh_ofs = EH_RETURN_STACKADJ_RTX;
8194 else
8195 eh_ofs = NULL_RTX;
8196
8197 if (sa_size)
8198 {
8199 /* If we have a frame pointer, restore SP from it. */
8200 if (TARGET_ABI_OPEN_VMS
8201 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8202 : frame_pointer_needed)
8203 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8204
8205 /* Cope with very large offsets to the register save area. */
8206 if (reg_offset + sa_size > 0x8000)
8207 {
8208 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8209 HOST_WIDE_INT bias;
8210
8211 if (low + sa_size <= 0x8000)
8212 bias = reg_offset - low, reg_offset = low;
8213 else
8214 bias = reg_offset, reg_offset = 0;
8215
8216 sa_reg = gen_rtx_REG (DImode, 22);
8217 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8218
8219 emit_move_insn (sa_reg, sa_reg_exp);
8220 }
8221
8222 /* Restore registers in order, excepting a true frame pointer. */
8223 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
8224 {
8225 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8226 fp_offset = reg_offset;
8227 else
8228 {
8229 mem = gen_frame_mem (DImode,
8230 plus_constant (Pmode, sa_reg,
8231 reg_offset));
8232 reg = gen_rtx_REG (DImode, i);
8233 emit_move_insn (reg, mem);
8234 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8235 cfa_restores);
8236 }
8237 reg_offset += 8;
8238 sa_mask &= ~(HOST_WIDE_INT_1U << i);
8239 }
8240 }
8241
8242 if (frame_size || eh_ofs)
8243 {
8244 sp_adj1 = stack_pointer_rtx;
8245
8246 if (eh_ofs)
8247 {
8248 sp_adj1 = gen_rtx_REG (DImode, 23);
8249 emit_move_insn (sp_adj1,
8250 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8251 }
8252
8253 /* If the stack size is large, begin computation into a temporary
8254 register so as not to interfere with a potential fp restore,
8255 which must be consecutive with an SP restore. */
8256 if (frame_size < 32768 && !cfun->calls_alloca)
8257 sp_adj2 = GEN_INT (frame_size);
8258 else if (frame_size < 0x40007fffL)
8259 {
8260 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8261
8262 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8263 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8264 sp_adj1 = sa_reg;
8265 else
8266 {
8267 sp_adj1 = gen_rtx_REG (DImode, 23);
8268 emit_move_insn (sp_adj1, sp_adj2);
8269 }
8270 sp_adj2 = GEN_INT (low);
8271 }
8272 else
8273 {
8274 rtx tmp = gen_rtx_REG (DImode, 23);
8275 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8276 if (!sp_adj2)
8277 {
8278 /* We can't drop new things to memory this late, afaik,
8279 so build it up by pieces. */
8280 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size);
8281 gcc_assert (sp_adj2);
8282 }
8283 }
8284
8285 /* From now on, things must be in order. So emit blockages. */
8286
8287 /* Restore the frame pointer. */
8288 if (fp_is_frame_pointer)
8289 {
8290 emit_insn (gen_blockage ());
8291 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8292 fp_offset));
8293 emit_move_insn (hard_frame_pointer_rtx, mem);
8294 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8295 hard_frame_pointer_rtx, cfa_restores);
8296 }
8297 else if (TARGET_ABI_OPEN_VMS)
8298 {
8299 emit_insn (gen_blockage ());
8300 emit_move_insn (hard_frame_pointer_rtx,
8301 gen_rtx_REG (DImode, vms_save_fp_regno));
8302 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8303 hard_frame_pointer_rtx, cfa_restores);
8304 }
8305
8306 /* Restore the stack pointer. */
8307 emit_insn (gen_blockage ());
8308 if (sp_adj2 == const0_rtx)
8309 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8310 else
8311 insn = emit_move_insn (stack_pointer_rtx,
8312 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8313 REG_NOTES (insn) = cfa_restores;
8314 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8315 RTX_FRAME_RELATED_P (insn) = 1;
8316 }
8317 else
8318 {
8319 gcc_assert (cfa_restores == NULL);
8320
8321 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8322 {
8323 emit_insn (gen_blockage ());
8324 insn = emit_move_insn (hard_frame_pointer_rtx,
8325 gen_rtx_REG (DImode, vms_save_fp_regno));
8326 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8327 RTX_FRAME_RELATED_P (insn) = 1;
8328 }
8329 }
8330 }
8331 \f
8332 /* Output the rest of the textual info surrounding the epilogue. */
8333
8334 void
8335 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8336 {
8337 rtx_insn *insn;
8338
8339 /* We output a nop after noreturn calls at the very end of the function to
8340 ensure that the return address always remains in the caller's code range,
8341 as not doing so might confuse unwinding engines. */
8342 insn = get_last_insn ();
8343 if (!INSN_P (insn))
8344 insn = prev_active_insn (insn);
8345 if (insn && CALL_P (insn))
8346 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8347
8348 #if TARGET_ABI_OPEN_VMS
8349 /* Write the linkage entries. */
8350 alpha_write_linkage (file, fnname);
8351 #endif
8352
8353 /* End the function. */
8354 if (TARGET_ABI_OPEN_VMS
8355 || !flag_inhibit_size_directive)
8356 {
8357 fputs ("\t.end ", file);
8358 assemble_name (file, fnname);
8359 putc ('\n', file);
8360 }
8361 inside_function = FALSE;
8362 }
8363
8364 #if TARGET_ABI_OSF
8365 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8366
8367 In order to avoid the hordes of differences between generated code
8368 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8369 lots of code loading up large constants, generate rtl and emit it
8370 instead of going straight to text.
8371
8372 Not sure why this idea hasn't been explored before... */
8373
8374 static void
8375 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8376 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8377 tree function)
8378 {
8379 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8380 HOST_WIDE_INT hi, lo;
8381 rtx this_rtx, funexp;
8382 rtx_insn *insn;
8383
8384 /* We always require a valid GP. */
8385 emit_insn (gen_prologue_ldgp ());
8386 emit_note (NOTE_INSN_PROLOGUE_END);
8387
8388 /* Find the "this" pointer. If the function returns a structure,
8389 the structure return pointer is in $16. */
8390 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8391 this_rtx = gen_rtx_REG (Pmode, 17);
8392 else
8393 this_rtx = gen_rtx_REG (Pmode, 16);
8394
8395 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8396 entire constant for the add. */
8397 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8398 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8399 if (hi + lo == delta)
8400 {
8401 if (hi)
8402 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8403 if (lo)
8404 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8405 }
8406 else
8407 {
8408 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
8409 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8410 }
8411
8412 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8413 if (vcall_offset)
8414 {
8415 rtx tmp, tmp2;
8416
8417 tmp = gen_rtx_REG (Pmode, 0);
8418 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8419
8420 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8421 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8422 if (hi + lo == vcall_offset)
8423 {
8424 if (hi)
8425 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8426 }
8427 else
8428 {
8429 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8430 vcall_offset);
8431 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8432 lo = 0;
8433 }
8434 if (lo)
8435 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8436 else
8437 tmp2 = tmp;
8438 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8439
8440 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8441 }
8442
8443 /* Generate a tail call to the target function. */
8444 if (! TREE_USED (function))
8445 {
8446 assemble_external (function);
8447 TREE_USED (function) = 1;
8448 }
8449 funexp = XEXP (DECL_RTL (function), 0);
8450 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8451 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8452 SIBLING_CALL_P (insn) = 1;
8453
8454 /* Run just enough of rest_of_compilation to get the insns emitted.
8455 There's not really enough bulk here to make other passes such as
8456 instruction scheduling worth while. */
8457 insn = get_insns ();
8458 shorten_branches (insn);
8459 assemble_start_function (thunk_fndecl, fnname);
8460 final_start_function (insn, file, 1);
8461 final (insn, file, 1);
8462 final_end_function ();
8463 assemble_end_function (thunk_fndecl, fnname);
8464 }
8465 #endif /* TARGET_ABI_OSF */
8466 \f
8467 /* Name of the file containing the current function. */
8468
8469 static const char *current_function_file = "";
8470
8471 /* Offsets to alpha virtual arg/local debugging pointers. */
8472
8473 long alpha_arg_offset;
8474 long alpha_auto_offset;
8475 \f
8476 /* Emit a new filename to a stream. */
8477
8478 void
8479 alpha_output_filename (FILE *stream, const char *name)
8480 {
8481 static int first_time = TRUE;
8482
8483 if (first_time)
8484 {
8485 first_time = FALSE;
8486 ++num_source_filenames;
8487 current_function_file = name;
8488 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8489 output_quoted_string (stream, name);
8490 fprintf (stream, "\n");
8491 }
8492
8493 else if (name != current_function_file
8494 && strcmp (name, current_function_file) != 0)
8495 {
8496 ++num_source_filenames;
8497 current_function_file = name;
8498 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8499
8500 output_quoted_string (stream, name);
8501 fprintf (stream, "\n");
8502 }
8503 }
8504 \f
8505 /* Structure to show the current status of registers and memory. */
8506
8507 struct shadow_summary
8508 {
8509 struct {
8510 unsigned int i : 31; /* Mask of int regs */
8511 unsigned int fp : 31; /* Mask of fp regs */
8512 unsigned int mem : 1; /* mem == imem | fpmem */
8513 } used, defd;
8514 };
8515
8516 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8517 to the summary structure. SET is nonzero if the insn is setting the
8518 object, otherwise zero. */
8519
8520 static void
8521 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8522 {
8523 const char *format_ptr;
8524 int i, j;
8525
8526 if (x == 0)
8527 return;
8528
8529 switch (GET_CODE (x))
8530 {
8531 /* ??? Note that this case would be incorrect if the Alpha had a
8532 ZERO_EXTRACT in SET_DEST. */
8533 case SET:
8534 summarize_insn (SET_SRC (x), sum, 0);
8535 summarize_insn (SET_DEST (x), sum, 1);
8536 break;
8537
8538 case CLOBBER:
8539 summarize_insn (XEXP (x, 0), sum, 1);
8540 break;
8541
8542 case USE:
8543 summarize_insn (XEXP (x, 0), sum, 0);
8544 break;
8545
8546 case ASM_OPERANDS:
8547 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8548 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8549 break;
8550
8551 case PARALLEL:
8552 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8553 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8554 break;
8555
8556 case SUBREG:
8557 summarize_insn (SUBREG_REG (x), sum, 0);
8558 break;
8559
8560 case REG:
8561 {
8562 int regno = REGNO (x);
8563 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8564
8565 if (regno == 31 || regno == 63)
8566 break;
8567
8568 if (set)
8569 {
8570 if (regno < 32)
8571 sum->defd.i |= mask;
8572 else
8573 sum->defd.fp |= mask;
8574 }
8575 else
8576 {
8577 if (regno < 32)
8578 sum->used.i |= mask;
8579 else
8580 sum->used.fp |= mask;
8581 }
8582 }
8583 break;
8584
8585 case MEM:
8586 if (set)
8587 sum->defd.mem = 1;
8588 else
8589 sum->used.mem = 1;
8590
8591 /* Find the regs used in memory address computation: */
8592 summarize_insn (XEXP (x, 0), sum, 0);
8593 break;
8594
8595 case CONST_INT: case CONST_WIDE_INT: case CONST_DOUBLE:
8596 case SYMBOL_REF: case LABEL_REF: case CONST:
8597 case SCRATCH: case ASM_INPUT:
8598 break;
8599
8600 /* Handle common unary and binary ops for efficiency. */
8601 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8602 case MOD: case UDIV: case UMOD: case AND: case IOR:
8603 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8604 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8605 case NE: case EQ: case GE: case GT: case LE:
8606 case LT: case GEU: case GTU: case LEU: case LTU:
8607 summarize_insn (XEXP (x, 0), sum, 0);
8608 summarize_insn (XEXP (x, 1), sum, 0);
8609 break;
8610
8611 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8612 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8613 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8614 case SQRT: case FFS:
8615 summarize_insn (XEXP (x, 0), sum, 0);
8616 break;
8617
8618 default:
8619 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8620 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8621 switch (format_ptr[i])
8622 {
8623 case 'e':
8624 summarize_insn (XEXP (x, i), sum, 0);
8625 break;
8626
8627 case 'E':
8628 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8629 summarize_insn (XVECEXP (x, i, j), sum, 0);
8630 break;
8631
8632 case 'i':
8633 break;
8634
8635 default:
8636 gcc_unreachable ();
8637 }
8638 }
8639 }
8640
8641 /* Ensure a sufficient number of `trapb' insns are in the code when
8642 the user requests code with a trap precision of functions or
8643 instructions.
8644
8645 In naive mode, when the user requests a trap-precision of
8646 "instruction", a trapb is needed after every instruction that may
8647 generate a trap. This ensures that the code is resumption safe but
8648 it is also slow.
8649
8650 When optimizations are turned on, we delay issuing a trapb as long
8651 as possible. In this context, a trap shadow is the sequence of
8652 instructions that starts with a (potentially) trap generating
8653 instruction and extends to the next trapb or call_pal instruction
8654 (but GCC never generates call_pal by itself). We can delay (and
8655 therefore sometimes omit) a trapb subject to the following
8656 conditions:
8657
8658 (a) On entry to the trap shadow, if any Alpha register or memory
8659 location contains a value that is used as an operand value by some
8660 instruction in the trap shadow (live on entry), then no instruction
8661 in the trap shadow may modify the register or memory location.
8662
8663 (b) Within the trap shadow, the computation of the base register
8664 for a memory load or store instruction may not involve using the
8665 result of an instruction that might generate an UNPREDICTABLE
8666 result.
8667
8668 (c) Within the trap shadow, no register may be used more than once
8669 as a destination register. (This is to make life easier for the
8670 trap-handler.)
8671
8672 (d) The trap shadow may not include any branch instructions. */
8673
8674 static void
8675 alpha_handle_trap_shadows (void)
8676 {
8677 struct shadow_summary shadow;
8678 int trap_pending, exception_nesting;
8679 rtx_insn *i, *n;
8680
8681 trap_pending = 0;
8682 exception_nesting = 0;
8683 shadow.used.i = 0;
8684 shadow.used.fp = 0;
8685 shadow.used.mem = 0;
8686 shadow.defd = shadow.used;
8687
8688 for (i = get_insns (); i ; i = NEXT_INSN (i))
8689 {
8690 if (NOTE_P (i))
8691 {
8692 switch (NOTE_KIND (i))
8693 {
8694 case NOTE_INSN_EH_REGION_BEG:
8695 exception_nesting++;
8696 if (trap_pending)
8697 goto close_shadow;
8698 break;
8699
8700 case NOTE_INSN_EH_REGION_END:
8701 exception_nesting--;
8702 if (trap_pending)
8703 goto close_shadow;
8704 break;
8705
8706 case NOTE_INSN_EPILOGUE_BEG:
8707 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8708 goto close_shadow;
8709 break;
8710 }
8711 }
8712 else if (trap_pending)
8713 {
8714 if (alpha_tp == ALPHA_TP_FUNC)
8715 {
8716 if (JUMP_P (i)
8717 && GET_CODE (PATTERN (i)) == RETURN)
8718 goto close_shadow;
8719 }
8720 else if (alpha_tp == ALPHA_TP_INSN)
8721 {
8722 if (optimize > 0)
8723 {
8724 struct shadow_summary sum;
8725
8726 sum.used.i = 0;
8727 sum.used.fp = 0;
8728 sum.used.mem = 0;
8729 sum.defd = sum.used;
8730
8731 switch (GET_CODE (i))
8732 {
8733 case INSN:
8734 /* Annoyingly, get_attr_trap will die on these. */
8735 if (GET_CODE (PATTERN (i)) == USE
8736 || GET_CODE (PATTERN (i)) == CLOBBER)
8737 break;
8738
8739 summarize_insn (PATTERN (i), &sum, 0);
8740
8741 if ((sum.defd.i & shadow.defd.i)
8742 || (sum.defd.fp & shadow.defd.fp))
8743 {
8744 /* (c) would be violated */
8745 goto close_shadow;
8746 }
8747
8748 /* Combine shadow with summary of current insn: */
8749 shadow.used.i |= sum.used.i;
8750 shadow.used.fp |= sum.used.fp;
8751 shadow.used.mem |= sum.used.mem;
8752 shadow.defd.i |= sum.defd.i;
8753 shadow.defd.fp |= sum.defd.fp;
8754 shadow.defd.mem |= sum.defd.mem;
8755
8756 if ((sum.defd.i & shadow.used.i)
8757 || (sum.defd.fp & shadow.used.fp)
8758 || (sum.defd.mem & shadow.used.mem))
8759 {
8760 /* (a) would be violated (also takes care of (b)) */
8761 gcc_assert (get_attr_trap (i) != TRAP_YES
8762 || (!(sum.defd.i & sum.used.i)
8763 && !(sum.defd.fp & sum.used.fp)));
8764
8765 goto close_shadow;
8766 }
8767 break;
8768
8769 case BARRIER:
8770 /* __builtin_unreachable can expand to no code at all,
8771 leaving (barrier) RTXes in the instruction stream. */
8772 goto close_shadow_notrapb;
8773
8774 case JUMP_INSN:
8775 case CALL_INSN:
8776 case CODE_LABEL:
8777 goto close_shadow;
8778
8779 case DEBUG_INSN:
8780 break;
8781
8782 default:
8783 gcc_unreachable ();
8784 }
8785 }
8786 else
8787 {
8788 close_shadow:
8789 n = emit_insn_before (gen_trapb (), i);
8790 PUT_MODE (n, TImode);
8791 PUT_MODE (i, TImode);
8792 close_shadow_notrapb:
8793 trap_pending = 0;
8794 shadow.used.i = 0;
8795 shadow.used.fp = 0;
8796 shadow.used.mem = 0;
8797 shadow.defd = shadow.used;
8798 }
8799 }
8800 }
8801
8802 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8803 && NONJUMP_INSN_P (i)
8804 && GET_CODE (PATTERN (i)) != USE
8805 && GET_CODE (PATTERN (i)) != CLOBBER
8806 && get_attr_trap (i) == TRAP_YES)
8807 {
8808 if (optimize && !trap_pending)
8809 summarize_insn (PATTERN (i), &shadow, 0);
8810 trap_pending = 1;
8811 }
8812 }
8813 }
8814 \f
8815 /* Alpha can only issue instruction groups simultaneously if they are
8816 suitably aligned. This is very processor-specific. */
8817 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8818 that are marked "fake". These instructions do not exist on that target,
8819 but it is possible to see these insns with deranged combinations of
8820 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8821 choose a result at random. */
8822
8823 enum alphaev4_pipe {
8824 EV4_STOP = 0,
8825 EV4_IB0 = 1,
8826 EV4_IB1 = 2,
8827 EV4_IBX = 4
8828 };
8829
8830 enum alphaev5_pipe {
8831 EV5_STOP = 0,
8832 EV5_NONE = 1,
8833 EV5_E01 = 2,
8834 EV5_E0 = 4,
8835 EV5_E1 = 8,
8836 EV5_FAM = 16,
8837 EV5_FA = 32,
8838 EV5_FM = 64
8839 };
8840
8841 static enum alphaev4_pipe
8842 alphaev4_insn_pipe (rtx_insn *insn)
8843 {
8844 if (recog_memoized (insn) < 0)
8845 return EV4_STOP;
8846 if (get_attr_length (insn) != 4)
8847 return EV4_STOP;
8848
8849 switch (get_attr_type (insn))
8850 {
8851 case TYPE_ILD:
8852 case TYPE_LDSYM:
8853 case TYPE_FLD:
8854 case TYPE_LD_L:
8855 return EV4_IBX;
8856
8857 case TYPE_IADD:
8858 case TYPE_ILOG:
8859 case TYPE_ICMOV:
8860 case TYPE_ICMP:
8861 case TYPE_FST:
8862 case TYPE_SHIFT:
8863 case TYPE_IMUL:
8864 case TYPE_FBR:
8865 case TYPE_MVI: /* fake */
8866 return EV4_IB0;
8867
8868 case TYPE_IST:
8869 case TYPE_MISC:
8870 case TYPE_IBR:
8871 case TYPE_JSR:
8872 case TYPE_CALLPAL:
8873 case TYPE_FCPYS:
8874 case TYPE_FCMOV:
8875 case TYPE_FADD:
8876 case TYPE_FDIV:
8877 case TYPE_FMUL:
8878 case TYPE_ST_C:
8879 case TYPE_MB:
8880 case TYPE_FSQRT: /* fake */
8881 case TYPE_FTOI: /* fake */
8882 case TYPE_ITOF: /* fake */
8883 return EV4_IB1;
8884
8885 default:
8886 gcc_unreachable ();
8887 }
8888 }
8889
8890 static enum alphaev5_pipe
8891 alphaev5_insn_pipe (rtx_insn *insn)
8892 {
8893 if (recog_memoized (insn) < 0)
8894 return EV5_STOP;
8895 if (get_attr_length (insn) != 4)
8896 return EV5_STOP;
8897
8898 switch (get_attr_type (insn))
8899 {
8900 case TYPE_ILD:
8901 case TYPE_FLD:
8902 case TYPE_LDSYM:
8903 case TYPE_IADD:
8904 case TYPE_ILOG:
8905 case TYPE_ICMOV:
8906 case TYPE_ICMP:
8907 return EV5_E01;
8908
8909 case TYPE_IST:
8910 case TYPE_FST:
8911 case TYPE_SHIFT:
8912 case TYPE_IMUL:
8913 case TYPE_MISC:
8914 case TYPE_MVI:
8915 case TYPE_LD_L:
8916 case TYPE_ST_C:
8917 case TYPE_MB:
8918 case TYPE_FTOI: /* fake */
8919 case TYPE_ITOF: /* fake */
8920 return EV5_E0;
8921
8922 case TYPE_IBR:
8923 case TYPE_JSR:
8924 case TYPE_CALLPAL:
8925 return EV5_E1;
8926
8927 case TYPE_FCPYS:
8928 return EV5_FAM;
8929
8930 case TYPE_FBR:
8931 case TYPE_FCMOV:
8932 case TYPE_FADD:
8933 case TYPE_FDIV:
8934 case TYPE_FSQRT: /* fake */
8935 return EV5_FA;
8936
8937 case TYPE_FMUL:
8938 return EV5_FM;
8939
8940 default:
8941 gcc_unreachable ();
8942 }
8943 }
8944
8945 /* IN_USE is a mask of the slots currently filled within the insn group.
8946 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8947 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8948
8949 LEN is, of course, the length of the group in bytes. */
8950
8951 static rtx_insn *
8952 alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
8953 {
8954 int len, in_use;
8955
8956 len = in_use = 0;
8957
8958 if (! INSN_P (insn)
8959 || GET_CODE (PATTERN (insn)) == CLOBBER
8960 || GET_CODE (PATTERN (insn)) == USE)
8961 goto next_and_done;
8962
8963 while (1)
8964 {
8965 enum alphaev4_pipe pipe;
8966
8967 pipe = alphaev4_insn_pipe (insn);
8968 switch (pipe)
8969 {
8970 case EV4_STOP:
8971 /* Force complex instructions to start new groups. */
8972 if (in_use)
8973 goto done;
8974
8975 /* If this is a completely unrecognized insn, it's an asm.
8976 We don't know how long it is, so record length as -1 to
8977 signal a needed realignment. */
8978 if (recog_memoized (insn) < 0)
8979 len = -1;
8980 else
8981 len = get_attr_length (insn);
8982 goto next_and_done;
8983
8984 case EV4_IBX:
8985 if (in_use & EV4_IB0)
8986 {
8987 if (in_use & EV4_IB1)
8988 goto done;
8989 in_use |= EV4_IB1;
8990 }
8991 else
8992 in_use |= EV4_IB0 | EV4_IBX;
8993 break;
8994
8995 case EV4_IB0:
8996 if (in_use & EV4_IB0)
8997 {
8998 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8999 goto done;
9000 in_use |= EV4_IB1;
9001 }
9002 in_use |= EV4_IB0;
9003 break;
9004
9005 case EV4_IB1:
9006 if (in_use & EV4_IB1)
9007 goto done;
9008 in_use |= EV4_IB1;
9009 break;
9010
9011 default:
9012 gcc_unreachable ();
9013 }
9014 len += 4;
9015
9016 /* Haifa doesn't do well scheduling branches. */
9017 if (JUMP_P (insn))
9018 goto next_and_done;
9019
9020 next:
9021 insn = next_nonnote_insn (insn);
9022
9023 if (!insn || ! INSN_P (insn))
9024 goto done;
9025
9026 /* Let Haifa tell us where it thinks insn group boundaries are. */
9027 if (GET_MODE (insn) == TImode)
9028 goto done;
9029
9030 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9031 goto next;
9032 }
9033
9034 next_and_done:
9035 insn = next_nonnote_insn (insn);
9036
9037 done:
9038 *plen = len;
9039 *pin_use = in_use;
9040 return insn;
9041 }
9042
9043 /* IN_USE is a mask of the slots currently filled within the insn group.
9044 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
9045 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9046
9047 LEN is, of course, the length of the group in bytes. */
9048
9049 static rtx_insn *
9050 alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9051 {
9052 int len, in_use;
9053
9054 len = in_use = 0;
9055
9056 if (! INSN_P (insn)
9057 || GET_CODE (PATTERN (insn)) == CLOBBER
9058 || GET_CODE (PATTERN (insn)) == USE)
9059 goto next_and_done;
9060
9061 while (1)
9062 {
9063 enum alphaev5_pipe pipe;
9064
9065 pipe = alphaev5_insn_pipe (insn);
9066 switch (pipe)
9067 {
9068 case EV5_STOP:
9069 /* Force complex instructions to start new groups. */
9070 if (in_use)
9071 goto done;
9072
9073 /* If this is a completely unrecognized insn, it's an asm.
9074 We don't know how long it is, so record length as -1 to
9075 signal a needed realignment. */
9076 if (recog_memoized (insn) < 0)
9077 len = -1;
9078 else
9079 len = get_attr_length (insn);
9080 goto next_and_done;
9081
9082 /* ??? Most of the places below, we would like to assert never
9083 happen, as it would indicate an error either in Haifa, or
9084 in the scheduling description. Unfortunately, Haifa never
9085 schedules the last instruction of the BB, so we don't have
9086 an accurate TI bit to go off. */
9087 case EV5_E01:
9088 if (in_use & EV5_E0)
9089 {
9090 if (in_use & EV5_E1)
9091 goto done;
9092 in_use |= EV5_E1;
9093 }
9094 else
9095 in_use |= EV5_E0 | EV5_E01;
9096 break;
9097
9098 case EV5_E0:
9099 if (in_use & EV5_E0)
9100 {
9101 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9102 goto done;
9103 in_use |= EV5_E1;
9104 }
9105 in_use |= EV5_E0;
9106 break;
9107
9108 case EV5_E1:
9109 if (in_use & EV5_E1)
9110 goto done;
9111 in_use |= EV5_E1;
9112 break;
9113
9114 case EV5_FAM:
9115 if (in_use & EV5_FA)
9116 {
9117 if (in_use & EV5_FM)
9118 goto done;
9119 in_use |= EV5_FM;
9120 }
9121 else
9122 in_use |= EV5_FA | EV5_FAM;
9123 break;
9124
9125 case EV5_FA:
9126 if (in_use & EV5_FA)
9127 goto done;
9128 in_use |= EV5_FA;
9129 break;
9130
9131 case EV5_FM:
9132 if (in_use & EV5_FM)
9133 goto done;
9134 in_use |= EV5_FM;
9135 break;
9136
9137 case EV5_NONE:
9138 break;
9139
9140 default:
9141 gcc_unreachable ();
9142 }
9143 len += 4;
9144
9145 /* Haifa doesn't do well scheduling branches. */
9146 /* ??? If this is predicted not-taken, slotting continues, except
9147 that no more IBR, FBR, or JSR insns may be slotted. */
9148 if (JUMP_P (insn))
9149 goto next_and_done;
9150
9151 next:
9152 insn = next_nonnote_insn (insn);
9153
9154 if (!insn || ! INSN_P (insn))
9155 goto done;
9156
9157 /* Let Haifa tell us where it thinks insn group boundaries are. */
9158 if (GET_MODE (insn) == TImode)
9159 goto done;
9160
9161 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9162 goto next;
9163 }
9164
9165 next_and_done:
9166 insn = next_nonnote_insn (insn);
9167
9168 done:
9169 *plen = len;
9170 *pin_use = in_use;
9171 return insn;
9172 }
9173
9174 static rtx
9175 alphaev4_next_nop (int *pin_use)
9176 {
9177 int in_use = *pin_use;
9178 rtx nop;
9179
9180 if (!(in_use & EV4_IB0))
9181 {
9182 in_use |= EV4_IB0;
9183 nop = gen_nop ();
9184 }
9185 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9186 {
9187 in_use |= EV4_IB1;
9188 nop = gen_nop ();
9189 }
9190 else if (TARGET_FP && !(in_use & EV4_IB1))
9191 {
9192 in_use |= EV4_IB1;
9193 nop = gen_fnop ();
9194 }
9195 else
9196 nop = gen_unop ();
9197
9198 *pin_use = in_use;
9199 return nop;
9200 }
9201
9202 static rtx
9203 alphaev5_next_nop (int *pin_use)
9204 {
9205 int in_use = *pin_use;
9206 rtx nop;
9207
9208 if (!(in_use & EV5_E1))
9209 {
9210 in_use |= EV5_E1;
9211 nop = gen_nop ();
9212 }
9213 else if (TARGET_FP && !(in_use & EV5_FA))
9214 {
9215 in_use |= EV5_FA;
9216 nop = gen_fnop ();
9217 }
9218 else if (TARGET_FP && !(in_use & EV5_FM))
9219 {
9220 in_use |= EV5_FM;
9221 nop = gen_fnop ();
9222 }
9223 else
9224 nop = gen_unop ();
9225
9226 *pin_use = in_use;
9227 return nop;
9228 }
9229
9230 /* The instruction group alignment main loop. */
9231
9232 static void
9233 alpha_align_insns_1 (unsigned int max_align,
9234 rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9235 rtx (*next_nop) (int *))
9236 {
9237 /* ALIGN is the known alignment for the insn group. */
9238 unsigned int align;
9239 /* OFS is the offset of the current insn in the insn group. */
9240 int ofs;
9241 int prev_in_use, in_use, len, ldgp;
9242 rtx_insn *i, *next;
9243
9244 /* Let shorten branches care for assigning alignments to code labels. */
9245 shorten_branches (get_insns ());
9246
9247 unsigned int option_alignment = align_functions.levels[0].get_value ();
9248 if (option_alignment < 4)
9249 align = 4;
9250 else if ((unsigned int) option_alignment < max_align)
9251 align = option_alignment;
9252 else
9253 align = max_align;
9254
9255 ofs = prev_in_use = 0;
9256 i = get_insns ();
9257 if (NOTE_P (i))
9258 i = next_nonnote_insn (i);
9259
9260 ldgp = alpha_function_needs_gp ? 8 : 0;
9261
9262 while (i)
9263 {
9264 next = (*next_group) (i, &in_use, &len);
9265
9266 /* When we see a label, resync alignment etc. */
9267 if (LABEL_P (i))
9268 {
9269 unsigned int new_align
9270 = label_to_alignment (i).levels[0].get_value ();
9271
9272 if (new_align >= align)
9273 {
9274 align = new_align < max_align ? new_align : max_align;
9275 ofs = 0;
9276 }
9277
9278 else if (ofs & (new_align-1))
9279 ofs = (ofs | (new_align-1)) + 1;
9280 gcc_assert (!len);
9281 }
9282
9283 /* Handle complex instructions special. */
9284 else if (in_use == 0)
9285 {
9286 /* Asms will have length < 0. This is a signal that we have
9287 lost alignment knowledge. Assume, however, that the asm
9288 will not mis-align instructions. */
9289 if (len < 0)
9290 {
9291 ofs = 0;
9292 align = 4;
9293 len = 0;
9294 }
9295 }
9296
9297 /* If the known alignment is smaller than the recognized insn group,
9298 realign the output. */
9299 else if ((int) align < len)
9300 {
9301 unsigned int new_log_align = len > 8 ? 4 : 3;
9302 rtx_insn *prev, *where;
9303
9304 where = prev = prev_nonnote_insn (i);
9305 if (!where || !LABEL_P (where))
9306 where = i;
9307
9308 /* Can't realign between a call and its gp reload. */
9309 if (! (TARGET_EXPLICIT_RELOCS
9310 && prev && CALL_P (prev)))
9311 {
9312 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9313 align = 1 << new_log_align;
9314 ofs = 0;
9315 }
9316 }
9317
9318 /* We may not insert padding inside the initial ldgp sequence. */
9319 else if (ldgp > 0)
9320 ldgp -= len;
9321
9322 /* If the group won't fit in the same INT16 as the previous,
9323 we need to add padding to keep the group together. Rather
9324 than simply leaving the insn filling to the assembler, we
9325 can make use of the knowledge of what sorts of instructions
9326 were issued in the previous group to make sure that all of
9327 the added nops are really free. */
9328 else if (ofs + len > (int) align)
9329 {
9330 int nop_count = (align - ofs) / 4;
9331 rtx_insn *where;
9332
9333 /* Insert nops before labels, branches, and calls to truly merge
9334 the execution of the nops with the previous instruction group. */
9335 where = prev_nonnote_insn (i);
9336 if (where)
9337 {
9338 if (LABEL_P (where))
9339 {
9340 rtx_insn *where2 = prev_nonnote_insn (where);
9341 if (where2 && JUMP_P (where2))
9342 where = where2;
9343 }
9344 else if (NONJUMP_INSN_P (where))
9345 where = i;
9346 }
9347 else
9348 where = i;
9349
9350 do
9351 emit_insn_before ((*next_nop)(&prev_in_use), where);
9352 while (--nop_count);
9353 ofs = 0;
9354 }
9355
9356 ofs = (ofs + len) & (align - 1);
9357 prev_in_use = in_use;
9358 i = next;
9359 }
9360 }
9361
9362 static void
9363 alpha_align_insns (void)
9364 {
9365 if (alpha_tune == PROCESSOR_EV4)
9366 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9367 else if (alpha_tune == PROCESSOR_EV5)
9368 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9369 else
9370 gcc_unreachable ();
9371 }
9372
9373 /* Insert an unop between sibcall or noreturn function call and GP load. */
9374
9375 static void
9376 alpha_pad_function_end (void)
9377 {
9378 rtx_insn *insn, *next;
9379
9380 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9381 {
9382 if (!CALL_P (insn)
9383 || !(SIBLING_CALL_P (insn)
9384 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9385 continue;
9386
9387 next = next_active_insn (insn);
9388 if (next)
9389 {
9390 rtx pat = PATTERN (next);
9391
9392 if (GET_CODE (pat) == SET
9393 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9394 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9395 emit_insn_after (gen_unop (), insn);
9396 }
9397 }
9398 }
9399 \f
9400 /* Machine dependent reorg pass. */
9401
9402 static void
9403 alpha_reorg (void)
9404 {
9405 /* Workaround for a linker error that triggers when an exception
9406 handler immediatelly follows a sibcall or a noreturn function.
9407
9408 In the sibcall case:
9409
9410 The instruction stream from an object file:
9411
9412 1d8: 00 00 fb 6b jmp (t12)
9413 1dc: 00 00 ba 27 ldah gp,0(ra)
9414 1e0: 00 00 bd 23 lda gp,0(gp)
9415 1e4: 00 00 7d a7 ldq t12,0(gp)
9416 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9417
9418 was converted in the final link pass to:
9419
9420 12003aa88: 67 fa ff c3 br 120039428 <...>
9421 12003aa8c: 00 00 fe 2f unop
9422 12003aa90: 00 00 fe 2f unop
9423 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9424 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9425
9426 And in the noreturn case:
9427
9428 The instruction stream from an object file:
9429
9430 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9431 58: 00 00 ba 27 ldah gp,0(ra)
9432 5c: 00 00 bd 23 lda gp,0(gp)
9433 60: 00 00 7d a7 ldq t12,0(gp)
9434 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9435
9436 was converted in the final link pass to:
9437
9438 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9439 fdb28: 00 00 fe 2f unop
9440 fdb2c: 00 00 fe 2f unop
9441 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9442 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9443
9444 GP load instructions were wrongly cleared by the linker relaxation
9445 pass. This workaround prevents removal of GP loads by inserting
9446 an unop instruction between a sibcall or noreturn function call and
9447 exception handler prologue. */
9448
9449 if (current_function_has_exception_handlers ())
9450 alpha_pad_function_end ();
9451
9452 /* CALL_PAL that implements trap insn, updates program counter to point
9453 after the insn. In case trap is the last insn in the function,
9454 emit NOP to guarantee that PC remains inside function boundaries.
9455 This workaround is needed to get reliable backtraces. */
9456
9457 rtx_insn *insn = prev_active_insn (get_last_insn ());
9458
9459 if (insn && NONJUMP_INSN_P (insn))
9460 {
9461 rtx pat = PATTERN (insn);
9462 if (GET_CODE (pat) == PARALLEL)
9463 {
9464 rtx vec = XVECEXP (pat, 0, 0);
9465 if (GET_CODE (vec) == TRAP_IF
9466 && XEXP (vec, 0) == const1_rtx)
9467 emit_insn_after (gen_unop (), insn);
9468 }
9469 }
9470 }
9471 \f
9472 static void
9473 alpha_file_start (void)
9474 {
9475 default_file_start ();
9476
9477 fputs ("\t.set noreorder\n", asm_out_file);
9478 fputs ("\t.set volatile\n", asm_out_file);
9479 if (TARGET_ABI_OSF)
9480 fputs ("\t.set noat\n", asm_out_file);
9481 if (TARGET_EXPLICIT_RELOCS)
9482 fputs ("\t.set nomacro\n", asm_out_file);
9483 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9484 {
9485 const char *arch;
9486
9487 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9488 arch = "ev6";
9489 else if (TARGET_MAX)
9490 arch = "pca56";
9491 else if (TARGET_BWX)
9492 arch = "ev56";
9493 else if (alpha_cpu == PROCESSOR_EV5)
9494 arch = "ev5";
9495 else
9496 arch = "ev4";
9497
9498 fprintf (asm_out_file, "\t.arch %s\n", arch);
9499 }
9500 }
9501
9502 /* Since we don't have a .dynbss section, we should not allow global
9503 relocations in the .rodata section. */
9504
9505 static int
9506 alpha_elf_reloc_rw_mask (void)
9507 {
9508 return flag_pic ? 3 : 2;
9509 }
9510
9511 /* Return a section for X. The only special thing we do here is to
9512 honor small data. */
9513
9514 static section *
9515 alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9516 unsigned HOST_WIDE_INT align)
9517 {
9518 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9519 /* ??? Consider using mergeable sdata sections. */
9520 return sdata_section;
9521 else
9522 return default_elf_select_rtx_section (mode, x, align);
9523 }
9524
9525 static unsigned int
9526 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9527 {
9528 unsigned int flags = 0;
9529
9530 if (strcmp (name, ".sdata") == 0
9531 || startswith (name, ".sdata.")
9532 || startswith (name, ".gnu.linkonce.s.")
9533 || strcmp (name, ".sbss") == 0
9534 || startswith (name, ".sbss.")
9535 || startswith (name, ".gnu.linkonce.sb."))
9536 flags = SECTION_SMALL;
9537
9538 flags |= default_section_type_flags (decl, name, reloc);
9539 return flags;
9540 }
9541 \f
9542 /* Structure to collect function names for final output in link section. */
9543 /* Note that items marked with GTY can't be ifdef'ed out. */
9544
9545 enum reloc_kind
9546 {
9547 KIND_LINKAGE,
9548 KIND_CODEADDR
9549 };
9550
9551 struct GTY(()) alpha_links
9552 {
9553 rtx func;
9554 rtx linkage;
9555 enum reloc_kind rkind;
9556 };
9557
9558 #if TARGET_ABI_OPEN_VMS
9559
9560 /* Return the VMS argument type corresponding to MODE. */
9561
9562 enum avms_arg_type
9563 alpha_arg_type (machine_mode mode)
9564 {
9565 switch (mode)
9566 {
9567 case E_SFmode:
9568 return TARGET_FLOAT_VAX ? FF : FS;
9569 case E_DFmode:
9570 return TARGET_FLOAT_VAX ? FD : FT;
9571 default:
9572 return I64;
9573 }
9574 }
9575
9576 /* Return an rtx for an integer representing the VMS Argument Information
9577 register value. */
9578
9579 rtx
9580 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9581 {
9582 unsigned HOST_WIDE_INT regval = cum.num_args;
9583 int i;
9584
9585 for (i = 0; i < 6; i++)
9586 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9587
9588 return GEN_INT (regval);
9589 }
9590 \f
9591
9592 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9593 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9594 this is the reference to the linkage pointer value, 0 if this is the
9595 reference to the function entry value. RFLAG is 1 if this a reduced
9596 reference (code address only), 0 if this is a full reference. */
9597
9598 rtx
9599 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9600 {
9601 struct alpha_links *al = NULL;
9602 const char *name = XSTR (func, 0);
9603
9604 if (cfun->machine->links)
9605 {
9606 /* Is this name already defined? */
9607 alpha_links **slot = cfun->machine->links->get (name);
9608 if (slot)
9609 al = *slot;
9610 }
9611 else
9612 cfun->machine->links
9613 = hash_map<nofree_string_hash, alpha_links *>::create_ggc (64);
9614
9615 if (al == NULL)
9616 {
9617 size_t buf_len;
9618 char *linksym;
9619 tree id;
9620
9621 if (name[0] == '*')
9622 name++;
9623
9624 /* Follow transparent alias, as this is used for CRTL translations. */
9625 id = maybe_get_identifier (name);
9626 if (id)
9627 {
9628 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9629 id = TREE_CHAIN (id);
9630 name = IDENTIFIER_POINTER (id);
9631 }
9632
9633 buf_len = strlen (name) + 8 + 9;
9634 linksym = (char *) alloca (buf_len);
9635 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9636
9637 al = ggc_alloc<alpha_links> ();
9638 al->func = func;
9639 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9640
9641 cfun->machine->links->put (ggc_strdup (name), al);
9642 }
9643
9644 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9645
9646 if (lflag)
9647 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9648 else
9649 return al->linkage;
9650 }
9651
9652 static int
9653 alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9654 {
9655 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9656 if (link->rkind == KIND_CODEADDR)
9657 {
9658 /* External and used, request code address. */
9659 fprintf (stream, "\t.code_address ");
9660 }
9661 else
9662 {
9663 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9664 && SYMBOL_REF_LOCAL_P (link->func))
9665 {
9666 /* Locally defined, build linkage pair. */
9667 fprintf (stream, "\t.quad %s..en\n", name);
9668 fprintf (stream, "\t.quad ");
9669 }
9670 else
9671 {
9672 /* External, request linkage pair. */
9673 fprintf (stream, "\t.linkage ");
9674 }
9675 }
9676 assemble_name (stream, name);
9677 fputs ("\n", stream);
9678
9679 return 0;
9680 }
9681
9682 static void
9683 alpha_write_linkage (FILE *stream, const char *funname)
9684 {
9685 fprintf (stream, "\t.link\n");
9686 fprintf (stream, "\t.align 3\n");
9687 in_section = NULL;
9688
9689 #ifdef TARGET_VMS_CRASH_DEBUG
9690 fputs ("\t.name ", stream);
9691 assemble_name (stream, funname);
9692 fputs ("..na\n", stream);
9693 #endif
9694
9695 ASM_OUTPUT_LABEL (stream, funname);
9696 fprintf (stream, "\t.pdesc ");
9697 assemble_name (stream, funname);
9698 fprintf (stream, "..en,%s\n",
9699 alpha_procedure_type == PT_STACK ? "stack"
9700 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9701
9702 if (cfun->machine->links)
9703 {
9704 hash_map<nofree_string_hash, alpha_links *>::iterator iter
9705 = cfun->machine->links->begin ();
9706 for (; iter != cfun->machine->links->end (); ++iter)
9707 alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9708 }
9709 }
9710
9711 /* Switch to an arbitrary section NAME with attributes as specified
9712 by FLAGS. ALIGN specifies any known alignment requirements for
9713 the section; 0 if the default should be used. */
9714
9715 static void
9716 vms_asm_named_section (const char *name, unsigned int flags,
9717 tree decl ATTRIBUTE_UNUSED)
9718 {
9719 fputc ('\n', asm_out_file);
9720 fprintf (asm_out_file, ".section\t%s", name);
9721
9722 if (flags & SECTION_DEBUG)
9723 fprintf (asm_out_file, ",NOWRT");
9724
9725 fputc ('\n', asm_out_file);
9726 }
9727
9728 /* Record an element in the table of global constructors. SYMBOL is
9729 a SYMBOL_REF of the function to be called; PRIORITY is a number
9730 between 0 and MAX_INIT_PRIORITY.
9731
9732 Differs from default_ctors_section_asm_out_constructor in that the
9733 width of the .ctors entry is always 64 bits, rather than the 32 bits
9734 used by a normal pointer. */
9735
9736 static void
9737 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9738 {
9739 switch_to_section (ctors_section);
9740 assemble_align (BITS_PER_WORD);
9741 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9742 }
9743
9744 static void
9745 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9746 {
9747 switch_to_section (dtors_section);
9748 assemble_align (BITS_PER_WORD);
9749 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9750 }
9751 #else
9752 rtx
9753 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9754 bool lflag ATTRIBUTE_UNUSED,
9755 bool rflag ATTRIBUTE_UNUSED)
9756 {
9757 return NULL_RTX;
9758 }
9759
9760 #endif /* TARGET_ABI_OPEN_VMS */
9761 \f
9762 static void
9763 alpha_init_libfuncs (void)
9764 {
9765 if (TARGET_ABI_OPEN_VMS)
9766 {
9767 /* Use the VMS runtime library functions for division and
9768 remainder. */
9769 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9770 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9771 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9772 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9773 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9774 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9775 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9776 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9777 #ifdef MEM_LIBFUNCS_INIT
9778 MEM_LIBFUNCS_INIT;
9779 #endif
9780 }
9781 }
9782
9783 /* On the Alpha, we use this to disable the floating-point registers
9784 when they don't exist. */
9785
9786 static void
9787 alpha_conditional_register_usage (void)
9788 {
9789 int i;
9790 if (! TARGET_FPREGS)
9791 for (i = 32; i < 63; i++)
9792 fixed_regs[i] = call_used_regs[i] = 1;
9793 }
9794
9795 /* Canonicalize a comparison from one we don't have to one we do have. */
9796
9797 static void
9798 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9799 bool op0_preserve_value)
9800 {
9801 if (!op0_preserve_value
9802 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9803 && (REG_P (*op1) || *op1 == const0_rtx))
9804 {
9805 std::swap (*op0, *op1);
9806 *code = (int)swap_condition ((enum rtx_code)*code);
9807 }
9808
9809 if ((*code == LT || *code == LTU)
9810 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9811 {
9812 *code = *code == LT ? LE : LEU;
9813 *op1 = GEN_INT (255);
9814 }
9815 }
9816
9817 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
9818
9819 static void
9820 alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9821 {
9822 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9823
9824 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9825 tree new_fenv_var, reload_fenv, restore_fnenv;
9826 tree update_call, atomic_feraiseexcept, hold_fnclex;
9827
9828 /* Assume OSF/1 compatible interfaces. */
9829 if (!TARGET_ABI_OSF)
9830 return;
9831
9832 /* Generate the equivalent of :
9833 unsigned long fenv_var;
9834 fenv_var = __ieee_get_fp_control ();
9835
9836 unsigned long masked_fenv;
9837 masked_fenv = fenv_var & mask;
9838
9839 __ieee_set_fp_control (masked_fenv); */
9840
9841 fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9842 get_fpscr
9843 = build_fn_decl ("__ieee_get_fp_control",
9844 build_function_type_list (long_unsigned_type_node, NULL));
9845 set_fpscr
9846 = build_fn_decl ("__ieee_set_fp_control",
9847 build_function_type_list (void_type_node, NULL));
9848 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9849 ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var,
9850 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9851 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9852 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9853 *hold = build2 (COMPOUND_EXPR, void_type_node,
9854 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9855 hold_fnclex);
9856
9857 /* Store the value of masked_fenv to clear the exceptions:
9858 __ieee_set_fp_control (masked_fenv); */
9859
9860 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9861
9862 /* Generate the equivalent of :
9863 unsigned long new_fenv_var;
9864 new_fenv_var = __ieee_get_fp_control ();
9865
9866 __ieee_set_fp_control (fenv_var);
9867
9868 __atomic_feraiseexcept (new_fenv_var); */
9869
9870 new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9871 reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var,
9872 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9873 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9874 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9875 update_call
9876 = build_call_expr (atomic_feraiseexcept, 1,
9877 fold_convert (integer_type_node, new_fenv_var));
9878 *update = build2 (COMPOUND_EXPR, void_type_node,
9879 build2 (COMPOUND_EXPR, void_type_node,
9880 reload_fenv, restore_fnenv), update_call);
9881 }
9882
9883 /* Implement TARGET_HARD_REGNO_MODE_OK. On Alpha, the integer registers
9884 can hold any mode. The floating-point registers can hold 64-bit
9885 integers as well, but not smaller values. */
9886
9887 static bool
9888 alpha_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9889 {
9890 if (IN_RANGE (regno, 32, 62))
9891 return (mode == SFmode
9892 || mode == DFmode
9893 || mode == DImode
9894 || mode == SCmode
9895 || mode == DCmode);
9896 return true;
9897 }
9898
9899 /* Implement TARGET_MODES_TIEABLE_P. This asymmetric test is true when
9900 MODE1 could be put in an FP register but MODE2 could not. */
9901
9902 static bool
9903 alpha_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9904 {
9905 return (alpha_hard_regno_mode_ok (32, mode1)
9906 ? alpha_hard_regno_mode_ok (32, mode2)
9907 : true);
9908 }
9909
9910 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9911
9912 static bool
9913 alpha_can_change_mode_class (machine_mode from, machine_mode to,
9914 reg_class_t rclass)
9915 {
9916 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9917 || !reg_classes_intersect_p (FLOAT_REGS, rclass));
9918 }
9919 \f
9920 /* Initialize the GCC target structure. */
9921 #if TARGET_ABI_OPEN_VMS
9922 # undef TARGET_ATTRIBUTE_TABLE
9923 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9924 # undef TARGET_CAN_ELIMINATE
9925 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9926 #endif
9927
9928 #undef TARGET_IN_SMALL_DATA_P
9929 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9930
9931 #undef TARGET_ASM_ALIGNED_HI_OP
9932 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9933 #undef TARGET_ASM_ALIGNED_DI_OP
9934 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9935
9936 /* Default unaligned ops are provided for ELF systems. To get unaligned
9937 data for non-ELF systems, we have to turn off auto alignment. */
9938 #if TARGET_ABI_OPEN_VMS
9939 #undef TARGET_ASM_UNALIGNED_HI_OP
9940 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9941 #undef TARGET_ASM_UNALIGNED_SI_OP
9942 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9943 #undef TARGET_ASM_UNALIGNED_DI_OP
9944 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9945 #endif
9946
9947 #undef TARGET_ASM_RELOC_RW_MASK
9948 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9949 #undef TARGET_ASM_SELECT_RTX_SECTION
9950 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9951 #undef TARGET_SECTION_TYPE_FLAGS
9952 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9953
9954 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9955 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9956
9957 #undef TARGET_INIT_LIBFUNCS
9958 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9959
9960 #undef TARGET_LEGITIMIZE_ADDRESS
9961 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9962 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
9963 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9964
9965 #undef TARGET_ASM_FILE_START
9966 #define TARGET_ASM_FILE_START alpha_file_start
9967
9968 #undef TARGET_SCHED_ADJUST_COST
9969 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9970 #undef TARGET_SCHED_ISSUE_RATE
9971 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9972 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9973 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9974 alpha_multipass_dfa_lookahead
9975
9976 #undef TARGET_HAVE_TLS
9977 #define TARGET_HAVE_TLS HAVE_AS_TLS
9978
9979 #undef TARGET_BUILTIN_DECL
9980 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9981 #undef TARGET_INIT_BUILTINS
9982 #define TARGET_INIT_BUILTINS alpha_init_builtins
9983 #undef TARGET_EXPAND_BUILTIN
9984 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9985 #undef TARGET_FOLD_BUILTIN
9986 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9987 #undef TARGET_GIMPLE_FOLD_BUILTIN
9988 #define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
9989
9990 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9991 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9992 #undef TARGET_CANNOT_COPY_INSN_P
9993 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9994 #undef TARGET_LEGITIMATE_CONSTANT_P
9995 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9996 #undef TARGET_CANNOT_FORCE_CONST_MEM
9997 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9998
9999 #if TARGET_ABI_OSF
10000 #undef TARGET_ASM_OUTPUT_MI_THUNK
10001 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10002 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10003 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10004 #undef TARGET_STDARG_OPTIMIZE_HOOK
10005 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10006 #endif
10007
10008 #undef TARGET_PRINT_OPERAND
10009 #define TARGET_PRINT_OPERAND alpha_print_operand
10010 #undef TARGET_PRINT_OPERAND_ADDRESS
10011 #define TARGET_PRINT_OPERAND_ADDRESS alpha_print_operand_address
10012 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
10013 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P alpha_print_operand_punct_valid_p
10014
10015 /* Use 16-bits anchor. */
10016 #undef TARGET_MIN_ANCHOR_OFFSET
10017 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
10018 #undef TARGET_MAX_ANCHOR_OFFSET
10019 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
10020 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10021 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
10022
10023 #undef TARGET_REGISTER_MOVE_COST
10024 #define TARGET_REGISTER_MOVE_COST alpha_register_move_cost
10025 #undef TARGET_MEMORY_MOVE_COST
10026 #define TARGET_MEMORY_MOVE_COST alpha_memory_move_cost
10027 #undef TARGET_RTX_COSTS
10028 #define TARGET_RTX_COSTS alpha_rtx_costs
10029 #undef TARGET_ADDRESS_COST
10030 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10031
10032 #undef TARGET_MACHINE_DEPENDENT_REORG
10033 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10034
10035 #undef TARGET_PROMOTE_FUNCTION_MODE
10036 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10037 #undef TARGET_PROMOTE_PROTOTYPES
10038 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10039
10040 #undef TARGET_FUNCTION_VALUE
10041 #define TARGET_FUNCTION_VALUE alpha_function_value
10042 #undef TARGET_LIBCALL_VALUE
10043 #define TARGET_LIBCALL_VALUE alpha_libcall_value
10044 #undef TARGET_FUNCTION_VALUE_REGNO_P
10045 #define TARGET_FUNCTION_VALUE_REGNO_P alpha_function_value_regno_p
10046 #undef TARGET_RETURN_IN_MEMORY
10047 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10048 #undef TARGET_PASS_BY_REFERENCE
10049 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10050 #undef TARGET_SETUP_INCOMING_VARARGS
10051 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10052 #undef TARGET_STRICT_ARGUMENT_NAMING
10053 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10054 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10055 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10056 #undef TARGET_SPLIT_COMPLEX_ARG
10057 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10058 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10059 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10060 #undef TARGET_ARG_PARTIAL_BYTES
10061 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10062 #undef TARGET_FUNCTION_ARG
10063 #define TARGET_FUNCTION_ARG alpha_function_arg
10064 #undef TARGET_FUNCTION_ARG_ADVANCE
10065 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10066 #undef TARGET_TRAMPOLINE_INIT
10067 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10068
10069 #undef TARGET_INSTANTIATE_DECLS
10070 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10071
10072 #undef TARGET_SECONDARY_RELOAD
10073 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10074 #undef TARGET_SECONDARY_MEMORY_NEEDED
10075 #define TARGET_SECONDARY_MEMORY_NEEDED alpha_secondary_memory_needed
10076 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
10077 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE alpha_secondary_memory_needed_mode
10078
10079 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10080 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10081 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10082 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10083
10084 #undef TARGET_BUILD_BUILTIN_VA_LIST
10085 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10086
10087 #undef TARGET_EXPAND_BUILTIN_VA_START
10088 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10089
10090 #undef TARGET_OPTION_OVERRIDE
10091 #define TARGET_OPTION_OVERRIDE alpha_option_override
10092
10093 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10094 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10095 alpha_override_options_after_change
10096
10097 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10098 #undef TARGET_MANGLE_TYPE
10099 #define TARGET_MANGLE_TYPE alpha_mangle_type
10100 #endif
10101
10102 #undef TARGET_LRA_P
10103 #define TARGET_LRA_P hook_bool_void_false
10104
10105 #undef TARGET_LEGITIMATE_ADDRESS_P
10106 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10107
10108 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10109 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10110
10111 #undef TARGET_CANONICALIZE_COMPARISON
10112 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10113
10114 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10115 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10116
10117 #undef TARGET_HARD_REGNO_MODE_OK
10118 #define TARGET_HARD_REGNO_MODE_OK alpha_hard_regno_mode_ok
10119
10120 #undef TARGET_MODES_TIEABLE_P
10121 #define TARGET_MODES_TIEABLE_P alpha_modes_tieable_p
10122
10123 #undef TARGET_CAN_CHANGE_MODE_CLASS
10124 #define TARGET_CAN_CHANGE_MODE_CLASS alpha_can_change_mode_class
10125
10126 struct gcc_target targetm = TARGET_INITIALIZER;
10127
10128 \f
10129 #include "gt-alpha.h"